diff --git a/trains_agent/backend_api/session/session.py b/trains_agent/backend_api/session/session.py index f747fa7..3a169cb 100644 --- a/trains_agent/backend_api/session/session.py +++ b/trains_agent/backend_api/session/session.py @@ -40,6 +40,7 @@ class Session(TokenManager): _session_requests = 0 _session_initial_timeout = (3.0, 10.) _session_timeout = (10.0, 30.) + _session_initial_connect_retry = 4 _write_session_data_size = 15000 _write_session_timeout = (30.0, 30.) @@ -96,7 +97,7 @@ class Session(TokenManager): else: self.config = load() if initialize_logging: - self.config.initialize_logging() + self.config.initialize_logging(debug=kwargs.get('debug', False)) token_expiration_threshold_sec = self.config.get( "auth.token_expiration_threshold_sec", 60 @@ -145,7 +146,7 @@ class Session(TokenManager): # limit the reconnect retries, so we get an error if we are starting the session http_no_retries_config = dict(**http_retries_config) - http_no_retries_config['connect'] = 3 + http_no_retries_config['connect'] = self._session_initial_connect_retry self.__http_session = get_http_session_with_retry(**http_no_retries_config) # try to connect with the server self.refresh_token() diff --git a/trains_agent/backend_config/config.py b/trains_agent/backend_config/config.py index 40cfb69..8a63f5c 100644 --- a/trains_agent/backend_config/config.py +++ b/trains_agent/backend_config/config.py @@ -190,7 +190,7 @@ class Config(object): def reload(self): self.replace(self._reload()) - def initialize_logging(self): + def initialize_logging(self, debug=False): logging_config = self._config.get("logging", None) if not logging_config: return False @@ -217,6 +217,8 @@ class Config(object): ) for logger in loggers: handlers = logger.get("handlers", None) + if debug: + logger['level'] = 'DEBUG' if not handlers: continue logger["handlers"] = [h for h in handlers if h not in deleted] diff --git a/trains_agent/session.py b/trains_agent/session.py index f1122a8..4ec08c3 100644 --- a/trains_agent/session.py +++ b/trains_agent/session.py @@ -73,9 +73,11 @@ class Session(_Session): os.environ[LOCAL_CONFIG_FILE_OVERRIDE_VAR] = config_file if not Path(config_file).is_file(): raise ValueError("Could not open configuration file: {}".format(config_file)) + cpu_only = kwargs.get('cpu_only') if cpu_only: os.environ['CUDA_VISIBLE_DEVICES'] = os.environ['NVIDIA_VISIBLE_DEVICES'] = 'none' + if kwargs.get('gpus') and not os.environ.get('KUBERNETES_SERVICE_HOST') \ and not os.environ.get('KUBERNETES_PORT'): # CUDA_VISIBLE_DEVICES does not support 'all' @@ -84,6 +86,7 @@ class Session(_Session): os.environ['NVIDIA_VISIBLE_DEVICES'] = kwargs.get('gpus') else: os.environ['CUDA_VISIBLE_DEVICES'] = os.environ['NVIDIA_VISIBLE_DEVICES'] = kwargs.get('gpus') + if kwargs.get('only_load_config'): from trains_agent.backend_api.config import load self.config = load()