mirror of
https://github.com/clearml/clearml
synced 2025-05-06 13:54:26 +00:00
Merge branch 'master' of https://github.com/allegroai/clearml
This commit is contained in:
commit
79d9607638
@ -212,7 +212,7 @@ class PipelineController(object):
|
|||||||
docker=None, # type: Optional[str]
|
docker=None, # type: Optional[str]
|
||||||
docker_args=None, # type: Optional[str]
|
docker_args=None, # type: Optional[str]
|
||||||
docker_bash_setup_script=None, # type: Optional[str]
|
docker_bash_setup_script=None, # type: Optional[str]
|
||||||
packages=None, # type: Optional[Union[str, Sequence[str]]]
|
packages=None, # type: Optional[Union[bool, str, Sequence[str]]]
|
||||||
repo=None, # type: Optional[str]
|
repo=None, # type: Optional[str]
|
||||||
repo_branch=None, # type: Optional[str]
|
repo_branch=None, # type: Optional[str]
|
||||||
repo_commit=None, # type: Optional[str]
|
repo_commit=None, # type: Optional[str]
|
||||||
@ -273,6 +273,7 @@ class PipelineController(object):
|
|||||||
:param packages: Manually specify a list of required packages or a local requirements.txt file.
|
:param packages: Manually specify a list of required packages or a local requirements.txt file.
|
||||||
Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
|
Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
|
||||||
If not provided, packages are automatically added.
|
If not provided, packages are automatically added.
|
||||||
|
Use `False` to install requirements from "requirements.txt" inside your git repository
|
||||||
:param repo: Optional, specify a repository to attach to the pipeline controller, when remotely executing.
|
:param repo: Optional, specify a repository to attach to the pipeline controller, when remotely executing.
|
||||||
Allow users to execute the controller inside the specified repository, enabling them to load modules/script
|
Allow users to execute the controller inside the specified repository, enabling them to load modules/script
|
||||||
from the repository. Notice the execution work directory will be the repository root folder.
|
from the repository. Notice the execution work directory will be the repository root folder.
|
||||||
@ -711,7 +712,7 @@ class PipelineController(object):
|
|||||||
task_type=None, # type: Optional[str]
|
task_type=None, # type: Optional[str]
|
||||||
auto_connect_frameworks=None, # type: Optional[dict]
|
auto_connect_frameworks=None, # type: Optional[dict]
|
||||||
auto_connect_arg_parser=None, # type: Optional[dict]
|
auto_connect_arg_parser=None, # type: Optional[dict]
|
||||||
packages=None, # type: Optional[Union[str, Sequence[str]]]
|
packages=None, # type: Optional[Union[bool, str, Sequence[str]]]
|
||||||
repo=None, # type: Optional[str]
|
repo=None, # type: Optional[str]
|
||||||
repo_branch=None, # type: Optional[str]
|
repo_branch=None, # type: Optional[str]
|
||||||
repo_commit=None, # type: Optional[str]
|
repo_commit=None, # type: Optional[str]
|
||||||
@ -786,6 +787,7 @@ class PipelineController(object):
|
|||||||
:param packages: Manually specify a list of required packages or a local requirements.txt file.
|
:param packages: Manually specify a list of required packages or a local requirements.txt file.
|
||||||
Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
|
Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
|
||||||
If not provided, packages are automatically added based on the imports used in the function.
|
If not provided, packages are automatically added based on the imports used in the function.
|
||||||
|
Use `False` to install requirements from "requirements.txt" inside your git repository
|
||||||
:param repo: Optional, specify a repository to attach to the function, when remotely executing.
|
:param repo: Optional, specify a repository to attach to the function, when remotely executing.
|
||||||
Allow users to execute the function inside the specified repository, enabling to load modules/script
|
Allow users to execute the function inside the specified repository, enabling to load modules/script
|
||||||
from a repository Notice the execution work directory will be the repository root folder.
|
from a repository Notice the execution work directory will be the repository root folder.
|
||||||
@ -2064,7 +2066,7 @@ class PipelineController(object):
|
|||||||
task_type=None, # type: Optional[str]
|
task_type=None, # type: Optional[str]
|
||||||
auto_connect_frameworks=None, # type: Optional[dict]
|
auto_connect_frameworks=None, # type: Optional[dict]
|
||||||
auto_connect_arg_parser=None, # type: Optional[dict]
|
auto_connect_arg_parser=None, # type: Optional[dict]
|
||||||
packages=None, # type: Optional[Union[str, Sequence[str]]]
|
packages=None, # type: Optional[Union[bool, str, Sequence[str]]]
|
||||||
repo=None, # type: Optional[str]
|
repo=None, # type: Optional[str]
|
||||||
repo_branch=None, # type: Optional[str]
|
repo_branch=None, # type: Optional[str]
|
||||||
repo_commit=None, # type: Optional[str]
|
repo_commit=None, # type: Optional[str]
|
||||||
@ -2139,6 +2141,7 @@ class PipelineController(object):
|
|||||||
:param packages: Manually specify a list of required packages or a local requirements.txt file.
|
:param packages: Manually specify a list of required packages or a local requirements.txt file.
|
||||||
Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
|
Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
|
||||||
If not provided, packages are automatically added based on the imports used in the function.
|
If not provided, packages are automatically added based on the imports used in the function.
|
||||||
|
Use `False` to install requirements from "requirements.txt" inside your git repository
|
||||||
:param repo: Optional, specify a repository to attach to the function, when remotely executing.
|
:param repo: Optional, specify a repository to attach to the function, when remotely executing.
|
||||||
Allow users to execute the function inside the specified repository, enabling to load modules/script
|
Allow users to execute the function inside the specified repository, enabling to load modules/script
|
||||||
from a repository Notice the execution work directory will be the repository root folder.
|
from a repository Notice the execution work directory will be the repository root folder.
|
||||||
@ -3485,7 +3488,7 @@ class PipelineDecorator(PipelineController):
|
|||||||
docker=None, # type: Optional[str]
|
docker=None, # type: Optional[str]
|
||||||
docker_args=None, # type: Optional[str]
|
docker_args=None, # type: Optional[str]
|
||||||
docker_bash_setup_script=None, # type: Optional[str]
|
docker_bash_setup_script=None, # type: Optional[str]
|
||||||
packages=None, # type: Optional[Union[str, Sequence[str]]]
|
packages=None, # type: Optional[Union[bool, str, Sequence[str]]]
|
||||||
repo=None, # type: Optional[str]
|
repo=None, # type: Optional[str]
|
||||||
repo_branch=None, # type: Optional[str]
|
repo_branch=None, # type: Optional[str]
|
||||||
repo_commit=None, # type: Optional[str]
|
repo_commit=None, # type: Optional[str]
|
||||||
@ -3541,6 +3544,7 @@ class PipelineDecorator(PipelineController):
|
|||||||
:param packages: Manually specify a list of required packages or a local requirements.txt file.
|
:param packages: Manually specify a list of required packages or a local requirements.txt file.
|
||||||
Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
|
Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
|
||||||
If not provided, packages are automatically added.
|
If not provided, packages are automatically added.
|
||||||
|
Use `False` to install requirements from "requirements.txt" inside your git repository
|
||||||
:param repo: Optional, specify a repository to attach to the pipeline controller, when remotely executing.
|
:param repo: Optional, specify a repository to attach to the pipeline controller, when remotely executing.
|
||||||
Allow users to execute the controller inside the specified repository, enabling them to load modules/script
|
Allow users to execute the controller inside the specified repository, enabling them to load modules/script
|
||||||
from the repository. Notice the execution work directory will be the repository root folder.
|
from the repository. Notice the execution work directory will be the repository root folder.
|
||||||
@ -3950,7 +3954,7 @@ class PipelineDecorator(PipelineController):
|
|||||||
return_values=('return_object', ), # type: Union[str, Sequence[str]]
|
return_values=('return_object', ), # type: Union[str, Sequence[str]]
|
||||||
name=None, # type: Optional[str]
|
name=None, # type: Optional[str]
|
||||||
cache=False, # type: bool
|
cache=False, # type: bool
|
||||||
packages=None, # type: Optional[Union[str, Sequence[str]]]
|
packages=None, # type: Optional[Union[bool, str, Sequence[str]]]
|
||||||
parents=None, # type: Optional[List[str]]
|
parents=None, # type: Optional[List[str]]
|
||||||
execution_queue=None, # type: Optional[str]
|
execution_queue=None, # type: Optional[str]
|
||||||
continue_on_fail=False, # type: bool
|
continue_on_fail=False, # type: bool
|
||||||
@ -3992,6 +3996,7 @@ class PipelineDecorator(PipelineController):
|
|||||||
:param packages: Manually specify a list of required packages or a local requirements.txt file.
|
:param packages: Manually specify a list of required packages or a local requirements.txt file.
|
||||||
Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
|
Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
|
||||||
If not provided, packages are automatically added based on the imports used inside the wrapped function.
|
If not provided, packages are automatically added based on the imports used inside the wrapped function.
|
||||||
|
Use `False` to install requirements from "requirements.txt" inside your git repository
|
||||||
:param parents: Optional list of parent nodes in the DAG.
|
:param parents: Optional list of parent nodes in the DAG.
|
||||||
The current step in the pipeline will be sent for execution only after all the parent nodes
|
The current step in the pipeline will be sent for execution only after all the parent nodes
|
||||||
have been executed successfully.
|
have been executed successfully.
|
||||||
@ -4415,7 +4420,7 @@ class PipelineDecorator(PipelineController):
|
|||||||
docker=None, # type: Optional[str]
|
docker=None, # type: Optional[str]
|
||||||
docker_args=None, # type: Optional[str]
|
docker_args=None, # type: Optional[str]
|
||||||
docker_bash_setup_script=None, # type: Optional[str]
|
docker_bash_setup_script=None, # type: Optional[str]
|
||||||
packages=None, # type: Optional[Union[str, Sequence[str]]]
|
packages=None, # type: Optional[Union[bool, str, Sequence[str]]]
|
||||||
repo=None, # type: Optional[str]
|
repo=None, # type: Optional[str]
|
||||||
repo_branch=None, # type: Optional[str]
|
repo_branch=None, # type: Optional[str]
|
||||||
repo_commit=None, # type: Optional[str]
|
repo_commit=None, # type: Optional[str]
|
||||||
@ -4502,6 +4507,7 @@ class PipelineDecorator(PipelineController):
|
|||||||
:param packages: Manually specify a list of required packages or a local requirements.txt file.
|
:param packages: Manually specify a list of required packages or a local requirements.txt file.
|
||||||
Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
|
Example: ["tqdm>=2.1", "scikit-learn"] or "./requirements.txt"
|
||||||
If not provided, packages are automatically added based on the imports used in the function.
|
If not provided, packages are automatically added based on the imports used in the function.
|
||||||
|
Use `False` to install requirements from "requirements.txt" inside your git repository
|
||||||
:param repo: Optional, specify a repository to attach to the function, when remotely executing.
|
:param repo: Optional, specify a repository to attach to the function, when remotely executing.
|
||||||
Allow users to execute the function inside the specified repository, enabling them to load modules/script
|
Allow users to execute the function inside the specified repository, enabling them to load modules/script
|
||||||
from the repository. Notice the execution work directory will be the repository root folder.
|
from the repository. Notice the execution work directory will be the repository root folder.
|
||||||
@ -4805,7 +4811,7 @@ class PipelineDecorator(PipelineController):
|
|||||||
if n not in _node.parents:
|
if n not in _node.parents:
|
||||||
_node.parents.append(n)
|
_node.parents.append(n)
|
||||||
break
|
break
|
||||||
if kwargs:
|
|
||||||
leaves = cls._singleton._find_executed_node_leaves()
|
leaves = cls._singleton._find_executed_node_leaves()
|
||||||
_node.parents = (_node.parents or []) + [
|
_node.parents = (_node.parents or []) + [
|
||||||
x for x in cls._evaluated_return_values.get(tid, []) if x in leaves
|
x for x in cls._evaluated_return_values.get(tid, []) if x in leaves
|
||||||
|
@ -7,7 +7,7 @@ try:
|
|||||||
# Since `referencing`` only supports Python >= 3.8, this try-except blocks maintain support
|
# Since `referencing`` only supports Python >= 3.8, this try-except blocks maintain support
|
||||||
# for earlier python versions.
|
# for earlier python versions.
|
||||||
from referencing.exceptions import Unresolvable
|
from referencing.exceptions import Unresolvable
|
||||||
except ImportError:
|
except (ImportError, TypeError):
|
||||||
from jsonschema.exceptions import RefResolutionError as Unresolvable
|
from jsonschema.exceptions import RefResolutionError as Unresolvable
|
||||||
|
|
||||||
from .apimodel import ApiModel
|
from .apimodel import ApiModel
|
||||||
|
@ -422,8 +422,9 @@ class CreateAndPopulate(object):
|
|||||||
"diff --git a{script_entry} b{script_entry}\n" \
|
"diff --git a{script_entry} b{script_entry}\n" \
|
||||||
"--- a{script_entry}\n" \
|
"--- a{script_entry}\n" \
|
||||||
"+++ b{script_entry}\n" \
|
"+++ b{script_entry}\n" \
|
||||||
"@@ -{idx_a},0 +{idx_b},3 @@\n" \
|
"@@ -{idx_a},0 +{idx_b},4 @@\n" \
|
||||||
"+from clearml import Task\n" \
|
"+try: from allegroai import Task\n" \
|
||||||
|
"+except ImportError: from clearml import Task\n" \
|
||||||
"+(__name__ != \"__main__\") or Task.init()\n" \
|
"+(__name__ != \"__main__\") or Task.init()\n" \
|
||||||
"+\n".format(
|
"+\n".format(
|
||||||
script_entry=script_entry, idx_a=idx_a, idx_b=idx_a + 1)
|
script_entry=script_entry, idx_a=idx_a, idx_b=idx_a + 1)
|
||||||
@ -432,7 +433,11 @@ class CreateAndPopulate(object):
|
|||||||
pass
|
pass
|
||||||
elif local_entry_file and lines:
|
elif local_entry_file and lines:
|
||||||
# if we are here it means we do not have a git diff, but a single script file
|
# if we are here it means we do not have a git diff, but a single script file
|
||||||
init_lines = ["from clearml import Task\n", "(__name__ != \"__main__\") or Task.init()\n\n"]
|
init_lines = [
|
||||||
|
"try: from allegroai import Task\n",
|
||||||
|
"except ImportError: from clearml import Task\n",
|
||||||
|
'(__name__ != "__main__") or Task.init()\n\n',
|
||||||
|
]
|
||||||
task_state['script']['diff'] = ''.join(lines[:idx_a] + init_lines + lines[idx_a:])
|
task_state['script']['diff'] = ''.join(lines[:idx_a] + init_lines + lines[idx_a:])
|
||||||
# no need to add anything, we patched it.
|
# no need to add anything, we patched it.
|
||||||
task_init_patch = ""
|
task_init_patch = ""
|
||||||
@ -440,7 +445,8 @@ class CreateAndPopulate(object):
|
|||||||
# Add Task.init call
|
# Add Task.init call
|
||||||
# if we are here it means we do not have a git diff, but a single script file
|
# if we are here it means we do not have a git diff, but a single script file
|
||||||
task_init_patch += \
|
task_init_patch += \
|
||||||
"from clearml import Task\n" \
|
"try: from allegroai import Task\n" \
|
||||||
|
"except ImportError: from clearml import Task\n" \
|
||||||
"(__name__ != \"__main__\") or Task.init()\n\n"
|
"(__name__ != \"__main__\") or Task.init()\n\n"
|
||||||
task_state['script']['diff'] = task_init_patch + task_state['script'].get('diff', '')
|
task_state['script']['diff'] = task_init_patch + task_state['script'].get('diff', '')
|
||||||
task_init_patch = ""
|
task_init_patch = ""
|
||||||
|
@ -94,7 +94,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
|||||||
return str(self) == str(other)
|
return str(self) == str(other)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"TaskTypes.{self.value}"
|
return "TaskTypes.{}".format(self.value)
|
||||||
|
|
||||||
training = 'training'
|
training = 'training'
|
||||||
testing = 'testing'
|
testing = 'testing'
|
||||||
@ -116,7 +116,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
|||||||
return str(self) == str(other)
|
return str(self) == str(other)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"TaskTypes.{self.value}"
|
return "TaskTypes.{}".format(self.value)
|
||||||
|
|
||||||
created = "created"
|
created = "created"
|
||||||
queued = "queued"
|
queued = "queued"
|
||||||
|
@ -6,7 +6,7 @@ except ImportError:
|
|||||||
fire = None
|
fire = None
|
||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
from .frameworks import _patched_call # noqa
|
from .frameworks import _patched_call_no_recursion_guard # noqa
|
||||||
from ..config import get_remote_task_id, running_remotely
|
from ..config import get_remote_task_id, running_remotely
|
||||||
from ..utilities.dicts import cast_str_to_bool
|
from ..utilities.dicts import cast_str_to_bool
|
||||||
|
|
||||||
@ -57,9 +57,9 @@ class PatchFire:
|
|||||||
if not cls.__patched:
|
if not cls.__patched:
|
||||||
cls.__patched = True
|
cls.__patched = True
|
||||||
if running_remotely():
|
if running_remotely():
|
||||||
fire.core._Fire = _patched_call(fire.core._Fire, PatchFire.__Fire)
|
fire.core._Fire = _patched_call_no_recursion_guard(fire.core._Fire, PatchFire.__Fire)
|
||||||
else:
|
else:
|
||||||
fire.core._CallAndUpdateTrace = _patched_call(
|
fire.core._CallAndUpdateTrace = _patched_call_no_recursion_guard(
|
||||||
fire.core._CallAndUpdateTrace, PatchFire.__CallAndUpdateTrace
|
fire.core._CallAndUpdateTrace, PatchFire.__CallAndUpdateTrace
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -15,13 +15,15 @@ from clearml.backend_config.defs import LOCAL_CONFIG_FILES, LOCAL_CONFIG_FILE_OV
|
|||||||
from clearml.config import config_obj
|
from clearml.config import config_obj
|
||||||
from clearml.utilities.pyhocon import ConfigFactory, ConfigMissingException
|
from clearml.utilities.pyhocon import ConfigFactory, ConfigMissingException
|
||||||
|
|
||||||
description = "\n" \
|
description = (
|
||||||
"Please create new clearml credentials through the settings page in " \
|
"\n"
|
||||||
"your `clearml-server` web app (e.g. http://localhost:8080//settings/workspace-configuration) \n"\
|
"Please create new clearml credentials through the settings page in "
|
||||||
"Or create a free account at https://app.clear.ml/settings/workspace-configuration\n\n" \
|
"your `clearml-server` web app (e.g. http://localhost:8080//settings/workspace-configuration) \n"
|
||||||
"In settings page, press \"Create new credentials\", then press \"Copy to clipboard\".\n" \
|
"Or create a free account at https://app.clear.ml/settings/workspace-configuration\n\n"
|
||||||
"\n" \
|
'In settings page, press "Create new credentials", then press "Copy to clipboard".\n'
|
||||||
|
"\n"
|
||||||
"Paste copied configuration here:\n"
|
"Paste copied configuration here:\n"
|
||||||
|
)
|
||||||
|
|
||||||
host_description = """
|
host_description = """
|
||||||
Editing configuration file: {CONFIG_FILE}
|
Editing configuration file: {CONFIG_FILE}
|
||||||
@ -30,9 +32,9 @@ Enter the url of the clearml-server's Web service, for example: {HOST}
|
|||||||
|
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
def_host = ENV_HOST.get(default=config_obj.get("api.web_server")) or 'http://localhost:8080'
|
def_host = ENV_HOST.get(default=config_obj.get("api.web_server")) or "http://localhost:8080"
|
||||||
except Exception:
|
except Exception:
|
||||||
def_host = 'http://localhost:8080'
|
def_host = "http://localhost:8080"
|
||||||
|
|
||||||
|
|
||||||
def validate_file(string):
|
def validate_file(string):
|
||||||
@ -51,35 +53,38 @@ def main():
|
|||||||
|
|
||||||
p = argparse.ArgumentParser(description=__doc__)
|
p = argparse.ArgumentParser(description=__doc__)
|
||||||
p.add_argument(
|
p.add_argument(
|
||||||
"--file", "-F", help="Target configuration file path (default is %(default)s)",
|
"--file",
|
||||||
|
"-F",
|
||||||
|
help="Target configuration file path (default is %(default)s)",
|
||||||
default=default_config_file,
|
default=default_config_file,
|
||||||
type=validate_file
|
type=validate_file,
|
||||||
)
|
)
|
||||||
|
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
|
|
||||||
print('ClearML SDK setup process')
|
print("ClearML SDK setup process")
|
||||||
|
|
||||||
conf_file = Path(os.path.expanduser(args.file)).absolute()
|
conf_file = Path(os.path.expanduser(args.file)).absolute()
|
||||||
if conf_file.exists() and conf_file.is_file() and conf_file.stat().st_size > 0:
|
if conf_file.exists() and conf_file.is_file() and conf_file.stat().st_size > 0:
|
||||||
print('Configuration file already exists: {}'.format(str(conf_file)))
|
print("Configuration file already exists: {}".format(str(conf_file)))
|
||||||
print('Leaving setup, feel free to edit the configuration file.')
|
print("Leaving setup, feel free to edit the configuration file.")
|
||||||
return
|
return
|
||||||
print(description, end='')
|
print(description, end="")
|
||||||
sentinel = ''
|
sentinel = ""
|
||||||
parse_input = ''
|
parse_input = ""
|
||||||
|
|
||||||
if os.environ.get("JPY_PARENT_PID"):
|
if os.environ.get("JPY_PARENT_PID"):
|
||||||
# When running from a colab instance and calling clearml-init
|
# When running from a colab instance and calling clearml-init
|
||||||
# colab will squish the api credentials into a single line
|
# colab will squish the api credentials into a single line
|
||||||
# The regex splits this single line based on 2 spaces or more
|
# The regex splits this single line based on 2 spaces or more
|
||||||
import re
|
import re
|
||||||
|
|
||||||
api_input = input()
|
api_input = input()
|
||||||
parse_input = '\n'.join(re.split(r" {2,}", api_input))
|
parse_input = "\n".join(re.split(r" {2,}", api_input))
|
||||||
else:
|
else:
|
||||||
for line in iter(input, sentinel):
|
for line in iter(input, sentinel):
|
||||||
parse_input += line+'\n'
|
parse_input += line + "\n"
|
||||||
if line.rstrip() == '}':
|
if line.rstrip() == "}":
|
||||||
break
|
break
|
||||||
|
|
||||||
credentials = None
|
credentials = None
|
||||||
@ -102,11 +107,14 @@ def main():
|
|||||||
files_server = files_server or None
|
files_server = files_server or None
|
||||||
|
|
||||||
while not credentials or set(credentials) != {"access_key", "secret_key"}:
|
while not credentials or set(credentials) != {"access_key", "secret_key"}:
|
||||||
print('Could not parse credentials, please try entering them manually.')
|
print("Could not parse credentials, please try entering them manually.")
|
||||||
credentials = read_manual_credentials()
|
credentials = read_manual_credentials()
|
||||||
|
|
||||||
print('Detected credentials key=\"{}\" secret=\"{}\"'.format(credentials['access_key'],
|
print(
|
||||||
credentials['secret_key'][0:4] + "***"))
|
'Detected credentials key="{}" secret="{}"'.format(
|
||||||
|
credentials["access_key"], credentials["secret_key"][0:4] + "***"
|
||||||
|
)
|
||||||
|
)
|
||||||
web_input = True
|
web_input = True
|
||||||
if web_server:
|
if web_server:
|
||||||
host = web_server
|
host = web_server
|
||||||
@ -114,47 +122,43 @@ def main():
|
|||||||
web_input = False
|
web_input = False
|
||||||
host = api_server
|
host = api_server
|
||||||
else:
|
else:
|
||||||
print(host_description.format(CONFIG_FILE=args.file, HOST=def_host,))
|
print(
|
||||||
host = input_url('WEB Host', '')
|
host_description.format(
|
||||||
|
CONFIG_FILE=args.file,
|
||||||
|
HOST=def_host,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
host = input_url("WEB Host", "")
|
||||||
|
|
||||||
parsed_host = verify_url(host)
|
parsed_host = verify_url(host)
|
||||||
api_host, files_host, web_host = parse_known_host(parsed_host)
|
api_host, files_host, web_host = parse_known_host(parsed_host)
|
||||||
|
|
||||||
hosts_dict = {
|
hosts_dict = {"API": api_server, "Files": files_server, "Web": web_server}
|
||||||
"API": api_server,
|
|
||||||
"Files": files_server,
|
|
||||||
"Web": web_server
|
|
||||||
}
|
|
||||||
|
|
||||||
infered_hosts_dict = {
|
infered_hosts_dict = {"API": api_host, "Files": files_host, "Web": web_host}
|
||||||
"API": api_host,
|
|
||||||
"Files": files_host,
|
|
||||||
"Web": web_host
|
|
||||||
}
|
|
||||||
|
|
||||||
for host_type, url in six.iteritems(hosts_dict):
|
for host_type, url in six.iteritems(hosts_dict):
|
||||||
if url is None or not (
|
if url is None or not (url.startswith("http://") or url.startswith("https://")):
|
||||||
url.startswith('http://') or url.startswith('https://')
|
|
||||||
):
|
|
||||||
infered_host_url = infered_hosts_dict[host_type]
|
infered_host_url = infered_hosts_dict[host_type]
|
||||||
if infered_host_url != "":
|
if infered_host_url != "":
|
||||||
hosts_dict[host_type] = infered_host_url
|
hosts_dict[host_type] = infered_host_url
|
||||||
else:
|
else:
|
||||||
hosts_dict[host_type] = input_url(host_type)
|
hosts_dict[host_type] = input_url(host_type)
|
||||||
|
|
||||||
api_host, files_host, web_host = hosts_dict['API'], hosts_dict['Files'], hosts_dict['Web']
|
api_host, files_host, web_host = hosts_dict["API"], hosts_dict["Files"], hosts_dict["Web"]
|
||||||
|
|
||||||
# one of these two we configured
|
# one of these two we configured
|
||||||
if not web_input:
|
if not web_input:
|
||||||
web_host = input_url('Web Application Host', web_host)
|
web_host = input_url("Web Application Host", web_host)
|
||||||
else:
|
else:
|
||||||
if web_input is True and not web_host:
|
if web_input is True and not web_host:
|
||||||
web_host = host
|
web_host = host
|
||||||
|
|
||||||
print('\nClearML Hosts configuration:\nWeb App: {}\nAPI: {}\nFile Store: {}\n'.format(
|
print(
|
||||||
web_host, api_host, files_host))
|
"\nClearML Hosts configuration:\nWeb App: {}\nAPI: {}\nFile Store: {}\n".format(web_host, api_host, files_host)
|
||||||
|
)
|
||||||
|
|
||||||
if len(set([web_host, api_host, files_host])) != 3:
|
if len({web_host, api_host, files_host}) != 3:
|
||||||
raise ValueError("All three server URLs should be distinct")
|
raise ValueError("All three server URLs should be distinct")
|
||||||
|
|
||||||
retry = 1
|
retry = 1
|
||||||
@ -166,88 +170,94 @@ def main():
|
|||||||
if retry < max_retries + 1:
|
if retry < max_retries + 1:
|
||||||
credentials = read_manual_credentials()
|
credentials = read_manual_credentials()
|
||||||
else:
|
else:
|
||||||
print('Exiting setup without creating configuration file')
|
print("Exiting setup without creating configuration file")
|
||||||
return
|
return
|
||||||
|
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
default_sdk_conf = Path(__file__).absolute().parents[2] / 'config/default/sdk.conf'
|
default_sdk_conf = Path(__file__).absolute().parents[2] / "config/default/sdk.conf"
|
||||||
with open(str(default_sdk_conf), 'rt') as f:
|
with open(str(default_sdk_conf), "rt") as f:
|
||||||
default_sdk = f.read()
|
default_sdk = f.read()
|
||||||
except Exception:
|
except Exception:
|
||||||
print('Error! Could not read default configuration file')
|
print("Error! Could not read default configuration file")
|
||||||
return
|
return
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
with open(str(conf_file), 'wt') as f:
|
with open(str(conf_file), "wt") as f:
|
||||||
header = '# ClearML SDK configuration file\n' \
|
header = (
|
||||||
'api {\n' \
|
"# ClearML SDK configuration file\n"
|
||||||
' # Notice: \'host\' is the api server (default port 8008), not the web server.\n' \
|
"api {\n"
|
||||||
' api_server: %s\n' \
|
" # Notice: 'host' is the api server (default port 8008), not the web server.\n"
|
||||||
' web_server: %s\n' \
|
" api_server: %s\n"
|
||||||
' files_server: %s\n' \
|
" web_server: %s\n"
|
||||||
' # Credentials are generated using the webapp, %s/settings\n' \
|
" files_server: %s\n"
|
||||||
' # Override with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY\n' \
|
" # Credentials are generated using the webapp, %s/settings\n"
|
||||||
' credentials {"access_key": "%s", "secret_key": "%s"}\n' \
|
" # Override with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY\n"
|
||||||
'}\n' \
|
' credentials {"access_key": "%s", "secret_key": "%s"}\n'
|
||||||
'sdk ' % (api_host, web_host, files_host,
|
"}\n"
|
||||||
web_host, credentials['access_key'], credentials['secret_key'])
|
"sdk "
|
||||||
|
% (api_host, web_host, files_host, web_host, credentials["access_key"], credentials["secret_key"])
|
||||||
|
)
|
||||||
f.write(header)
|
f.write(header)
|
||||||
f.write(default_sdk)
|
f.write(default_sdk)
|
||||||
except Exception:
|
except Exception:
|
||||||
print('Error! Could not write configuration file at: {}'.format(str(conf_file)))
|
print("Error! Could not write configuration file at: {}".format(str(conf_file)))
|
||||||
return
|
return
|
||||||
|
|
||||||
print('\nNew configuration stored in {}'.format(str(conf_file)))
|
print("\nNew configuration stored in {}".format(str(conf_file)))
|
||||||
print('ClearML setup completed successfully.')
|
print("ClearML setup completed successfully.")
|
||||||
|
|
||||||
|
|
||||||
def parse_known_host(parsed_host):
|
def parse_known_host(parsed_host):
|
||||||
if parsed_host.netloc.startswith('demoapp.'):
|
if parsed_host.netloc.startswith("demoapp."):
|
||||||
# this is our demo server
|
# this is our demo server
|
||||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapp.', 'demoapi.', 1) + parsed_host.path
|
api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace("demoapp.", "demoapi.", 1) + parsed_host.path
|
||||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapp.', 'demofiles.',
|
files_host = (
|
||||||
1) + parsed_host.path
|
parsed_host.scheme + "://" + parsed_host.netloc.replace("demoapp.", "demofiles.", 1) + parsed_host.path
|
||||||
elif parsed_host.netloc.startswith('app.'):
|
)
|
||||||
|
elif parsed_host.netloc.startswith("app."):
|
||||||
# this is our application server
|
# this is our application server
|
||||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('app.', 'api.', 1) + parsed_host.path
|
api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace("app.", "api.", 1) + parsed_host.path
|
||||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('app.', 'files.', 1) + parsed_host.path
|
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace("app.", "files.", 1) + parsed_host.path
|
||||||
elif parsed_host.netloc.startswith('demoapi.'):
|
elif parsed_host.netloc.startswith("demoapi."):
|
||||||
print('{} is the api server, we need the web server. Replacing \'demoapi.\' with \'demoapp.\''.format(
|
print(
|
||||||
parsed_host.netloc))
|
"{} is the api server, we need the web server. Replacing 'demoapi.' with 'demoapp.'".format(
|
||||||
|
parsed_host.netloc
|
||||||
|
)
|
||||||
|
)
|
||||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapi.', 'demoapp.', 1) + parsed_host.path
|
web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace("demoapi.", "demoapp.", 1) + parsed_host.path
|
||||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapi.', 'demofiles.',
|
files_host = (
|
||||||
1) + parsed_host.path
|
parsed_host.scheme + "://" + parsed_host.netloc.replace("demoapi.", "demofiles.", 1) + parsed_host.path
|
||||||
elif parsed_host.netloc.startswith('api.'):
|
)
|
||||||
print('{} is the api server, we need the web server. Replacing \'api.\' with \'app.\''.format(
|
elif parsed_host.netloc.startswith("api."):
|
||||||
parsed_host.netloc))
|
print("{} is the api server, we need the web server. Replacing 'api.' with 'app.'".format(parsed_host.netloc))
|
||||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('api.', 'app.', 1) + parsed_host.path
|
web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace("api.", "app.", 1) + parsed_host.path
|
||||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('api.', 'files.', 1) + parsed_host.path
|
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace("api.", "files.", 1) + parsed_host.path
|
||||||
elif parsed_host.port == 8008:
|
elif parsed_host.port == 8008:
|
||||||
print('Port 8008 is the api port. Replacing 8008 with 8080 for Web application')
|
print("Port 8008 is the api port. Replacing 8008 with 8080 for Web application")
|
||||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8008', ':8080', 1) + parsed_host.path
|
web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(":8008", ":8080", 1) + parsed_host.path
|
||||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8008', ':8081', 1) + parsed_host.path
|
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(":8008", ":8081", 1) + parsed_host.path
|
||||||
elif parsed_host.port == 8080:
|
elif parsed_host.port == 8080:
|
||||||
print('Port 8080 is the web port. Replacing 8080 with 8008 for API server')
|
print("Port 8080 is the web port. Replacing 8080 with 8008 for API server")
|
||||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8080', ':8008', 1) + parsed_host.path
|
api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(":8080", ":8008", 1) + parsed_host.path
|
||||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8080', ':8081', 1) + parsed_host.path
|
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(":8080", ":8081", 1) + parsed_host.path
|
||||||
elif parsed_host.port is None:
|
elif parsed_host.port is None:
|
||||||
print('Web app hosted on standard port using ' + parsed_host.scheme + ' protocol.')
|
print("Web app hosted on standard port using " + parsed_host.scheme + " protocol.")
|
||||||
print('Assuming files and api ports are unchanged and use the same (' + parsed_host.scheme + ') protocol')
|
print("Assuming files and api ports are unchanged and use the same (" + parsed_host.scheme + ") protocol")
|
||||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + ':8008' + parsed_host.path
|
api_host = parsed_host.scheme + "://" + parsed_host.netloc + ":8008" + parsed_host.path
|
||||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc + ':8081' + parsed_host.path
|
files_host = parsed_host.scheme + "://" + parsed_host.netloc + ":8081" + parsed_host.path
|
||||||
else:
|
else:
|
||||||
print("Warning! Could not parse host name")
|
print("Warning! Could not parse host name")
|
||||||
api_host = ''
|
api_host = ""
|
||||||
web_host = ''
|
web_host = ""
|
||||||
files_host = ''
|
files_host = ""
|
||||||
|
|
||||||
return api_host, files_host, web_host
|
return api_host, files_host, web_host
|
||||||
|
|
||||||
@ -256,18 +266,25 @@ def verify_credentials(api_host, credentials):
|
|||||||
"""check if the credentials are valid"""
|
"""check if the credentials are valid"""
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
print('Verifying credentials ...')
|
print("Verifying credentials ...")
|
||||||
if api_host:
|
if api_host:
|
||||||
Session(api_key=credentials['access_key'], secret_key=credentials['secret_key'], host=api_host,
|
Session(
|
||||||
http_retries_config={"total": 2})
|
api_key=credentials["access_key"],
|
||||||
print('Credentials verified!')
|
secret_key=credentials["secret_key"],
|
||||||
|
host=api_host,
|
||||||
|
http_retries_config={"total": 2},
|
||||||
|
)
|
||||||
|
print("Credentials verified!")
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
print("Can't verify credentials")
|
print("Can't verify credentials")
|
||||||
return False
|
return False
|
||||||
except Exception:
|
except Exception:
|
||||||
print('Error: could not verify credentials: key={} secret={}'.format(
|
print(
|
||||||
credentials.get('access_key'), credentials.get('secret_key')))
|
"Error: could not verify credentials: key={} secret={}".format(
|
||||||
|
credentials.get("access_key"), credentials.get("secret_key")
|
||||||
|
)
|
||||||
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
@ -292,18 +309,18 @@ def get_parsed_field(parsed_config, fields):
|
|||||||
|
|
||||||
|
|
||||||
def read_manual_credentials():
|
def read_manual_credentials():
|
||||||
print('Enter user access key: ', end='')
|
print("Enter user access key: ", end="")
|
||||||
access_key = input()
|
access_key = input()
|
||||||
print('Enter user secret: ', end='')
|
print("Enter user secret: ", end="")
|
||||||
secret_key = input()
|
secret_key = input()
|
||||||
return {"access_key": access_key, "secret_key": secret_key}
|
return {"access_key": access_key, "secret_key": secret_key}
|
||||||
|
|
||||||
|
|
||||||
def input_url(host_type, host=None):
|
def input_url(host_type, host=None):
|
||||||
while True:
|
while True:
|
||||||
print('{} configured to: {}'.format(host_type, '[{}] '.format(host) if host else ''), end='')
|
print("{} configured to: {}".format(host_type, "[{}] ".format(host) if host else ""), end="")
|
||||||
parse_input = input()
|
parse_input = input()
|
||||||
if host and (not parse_input or parse_input.lower() == 'yes' or parse_input.lower() == 'y'):
|
if host and (not parse_input or parse_input.lower() == "yes" or parse_input.lower() == "y"):
|
||||||
break
|
break
|
||||||
parsed_host = verify_url(parse_input) if parse_input else None
|
parsed_host = verify_url(parse_input) if parse_input else None
|
||||||
if parse_input and parsed_host:
|
if parse_input and parsed_host:
|
||||||
@ -313,29 +330,34 @@ def input_url(host_type, host=None):
|
|||||||
|
|
||||||
|
|
||||||
def input_host_port(host_type, parsed_host):
|
def input_host_port(host_type, parsed_host):
|
||||||
print('Enter port for {} host '.format(host_type), end='')
|
print("Enter port for {} host ".format(host_type), end="")
|
||||||
replace_port = input().lower()
|
replace_port = input().lower()
|
||||||
return parsed_host.scheme + "://" + parsed_host.netloc + (
|
return (
|
||||||
':{}'.format(replace_port) if replace_port else '') + parsed_host.path
|
parsed_host.scheme
|
||||||
|
+ "://"
|
||||||
|
+ parsed_host.netloc
|
||||||
|
+ (":{}".format(replace_port) if replace_port else "")
|
||||||
|
+ parsed_host.path
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def verify_url(parse_input):
|
def verify_url(parse_input):
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
if not parse_input.startswith('http://') and not parse_input.startswith('https://'):
|
if not parse_input.startswith("http://") and not parse_input.startswith("https://"):
|
||||||
# if we have a specific port, use http prefix, otherwise assume https
|
# if we have a specific port, use http prefix, otherwise assume https
|
||||||
if ':' in parse_input:
|
if ":" in parse_input:
|
||||||
parse_input = 'http://' + parse_input
|
parse_input = "http://" + parse_input
|
||||||
else:
|
else:
|
||||||
parse_input = 'https://' + parse_input
|
parse_input = "https://" + parse_input
|
||||||
parsed_host = urlparse(parse_input)
|
parsed_host = urlparse(parse_input)
|
||||||
if parsed_host.scheme not in ('http', 'https'):
|
if parsed_host.scheme not in ("http", "https"):
|
||||||
parsed_host = None
|
parsed_host = None
|
||||||
except Exception:
|
except Exception:
|
||||||
parsed_host = None
|
parsed_host = None
|
||||||
print('Could not parse url {}\nEnter your clearml-server host: '.format(parse_input), end='')
|
print("Could not parse url {}\nEnter your clearml-server host: ".format(parse_input), end="")
|
||||||
return parsed_host
|
return parsed_host
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -123,6 +123,7 @@ class Dataset(object):
|
|||||||
__hyperparams_section = "Datasets"
|
__hyperparams_section = "Datasets"
|
||||||
__datasets_runtime_prop = "datasets"
|
__datasets_runtime_prop = "datasets"
|
||||||
__orig_datasets_runtime_prop_prefix = "orig_datasets"
|
__orig_datasets_runtime_prop_prefix = "orig_datasets"
|
||||||
|
__dataset_struct = "Dataset Struct"
|
||||||
__preview_media_max_file_size = deferred_config("dataset.preview.media.max_file_size", 5 * 1024 * 1024, transform=int)
|
__preview_media_max_file_size = deferred_config("dataset.preview.media.max_file_size", 5 * 1024 * 1024, transform=int)
|
||||||
__preview_tabular_table_count = deferred_config("dataset.preview.tabular.table_count", 10, transform=int)
|
__preview_tabular_table_count = deferred_config("dataset.preview.tabular.table_count", 10, transform=int)
|
||||||
__preview_tabular_row_count = deferred_config("dataset.preview.tabular.row_count", 10, transform=int)
|
__preview_tabular_row_count = deferred_config("dataset.preview.tabular.row_count", 10, transform=int)
|
||||||
@ -2081,13 +2082,35 @@ class Dataset(object):
|
|||||||
self.update_changed_files(num_files_added=count - modified_count, num_files_modified=modified_count)
|
self.update_changed_files(num_files_added=count - modified_count, num_files_modified=modified_count)
|
||||||
return count - modified_count, modified_count
|
return count - modified_count, modified_count
|
||||||
|
|
||||||
|
def _repair_dependency_graph(self):
|
||||||
|
"""
|
||||||
|
Repair dependency graph via the Dataset Struct configuration object.
|
||||||
|
Might happen for datasets with external files in old clearml versions
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
dataset_struct = self._task.get_configuration_object_as_dict(Dataset.__dataset_struct)
|
||||||
|
new_dependency_graph = {}
|
||||||
|
for dataset in dataset_struct.values():
|
||||||
|
new_dependency_graph[dataset["job_id"]] = [dataset_struct[p]["job_id"] for p in dataset["parents"]]
|
||||||
|
self._dependency_graph = new_dependency_graph
|
||||||
|
except Exception as e:
|
||||||
|
LoggerRoot.get_base_logger().warning("Could not repair dependency graph. Error is: {}".format(e))
|
||||||
|
|
||||||
def _update_dependency_graph(self):
|
def _update_dependency_graph(self):
|
||||||
"""
|
"""
|
||||||
Update the dependency graph based on the current self._dataset_file_entries state
|
Update the dependency graph based on the current self._dataset_file_entries
|
||||||
|
and self._dataset_link_entries states
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
# collect all dataset versions
|
# collect all dataset versions
|
||||||
used_dataset_versions = set(f.parent_dataset_id for f in self._dataset_file_entries.values())
|
used_dataset_versions = set(f.parent_dataset_id for f in self._dataset_file_entries.values()) | set(
|
||||||
|
f.parent_dataset_id for f in self._dataset_link_entries.values()
|
||||||
|
)
|
||||||
|
for dataset_id in used_dataset_versions:
|
||||||
|
if dataset_id not in self._dependency_graph and dataset_id != self._id:
|
||||||
|
self._repair_dependency_graph()
|
||||||
|
break
|
||||||
|
|
||||||
used_dataset_versions.add(self._id)
|
used_dataset_versions.add(self._id)
|
||||||
current_parents = self._dependency_graph.get(self._id) or []
|
current_parents = self._dependency_graph.get(self._id) or []
|
||||||
# remove parent versions we no longer need from the main version list
|
# remove parent versions we no longer need from the main version list
|
||||||
@ -2296,29 +2319,8 @@ class Dataset(object):
|
|||||||
Notice you should unlock it manually, or wait for the process to finish for auto unlocking.
|
Notice you should unlock it manually, or wait for the process to finish for auto unlocking.
|
||||||
:param max_workers: Number of threads to be spawned when getting dataset files. Defaults to no multi-threading.
|
:param max_workers: Number of threads to be spawned when getting dataset files. Defaults to no multi-threading.
|
||||||
"""
|
"""
|
||||||
target_folder = (
|
|
||||||
Path(target_folder)
|
|
||||||
if target_folder
|
|
||||||
else self._create_ds_target_folder(
|
|
||||||
lock_target_folder=lock_target_folder
|
|
||||||
)[0]
|
|
||||||
).as_posix()
|
|
||||||
dependencies = self._get_dependencies_by_order(
|
|
||||||
include_unused=False, include_current=True
|
|
||||||
)
|
|
||||||
links = {}
|
|
||||||
for dependency in dependencies:
|
|
||||||
ds = Dataset.get(dependency)
|
|
||||||
links.update(ds._dataset_link_entries)
|
|
||||||
links.update(self._dataset_link_entries)
|
|
||||||
|
|
||||||
def _download_link(link, target_path):
|
def _download_link(link, target_path):
|
||||||
if os.path.exists(target_path):
|
if os.path.exists(target_path):
|
||||||
LoggerRoot.get_base_logger().info(
|
|
||||||
"{} already exists. Skipping downloading {}".format(
|
|
||||||
target_path, link
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
ok = False
|
ok = False
|
||||||
error = None
|
error = None
|
||||||
@ -2341,28 +2343,41 @@ class Dataset(object):
|
|||||||
LoggerRoot.get_base_logger().info(log_string)
|
LoggerRoot.get_base_logger().info(log_string)
|
||||||
else:
|
else:
|
||||||
link.size = Path(target_path).stat().st_size
|
link.size = Path(target_path).stat().st_size
|
||||||
if not max_workers:
|
|
||||||
for relative_path, link in links.items():
|
def _get_target_path(relative_path, target_folder):
|
||||||
if not is_path_traversal(target_folder, relative_path):
|
if not is_path_traversal(target_folder, relative_path):
|
||||||
target_path = os.path.join(target_folder, relative_path)
|
return os.path.join(target_folder, relative_path)
|
||||||
else:
|
else:
|
||||||
LoggerRoot.get_base_logger().warning(
|
LoggerRoot.get_base_logger().warning(
|
||||||
"Ignoring relative path `{}`: it must not traverse directories".format(relative_path)
|
"Ignoring relative path `{}`: it must not traverse directories".format(relative_path)
|
||||||
)
|
)
|
||||||
target_path = os.path.join(target_folder, os.path.basename(relative_path))
|
return os.path.join(target_folder, os.path.basename(relative_path))
|
||||||
|
|
||||||
|
def _submit_download_link(relative_path, link, target_folder, pool=None):
|
||||||
|
if link.parent_dataset_id != self.id and not link.parent_dataset_id.startswith("offline-"):
|
||||||
|
return
|
||||||
|
target_path = _get_target_path(relative_path, target_folder)
|
||||||
|
if pool is None:
|
||||||
_download_link(link, target_path)
|
_download_link(link, target_path)
|
||||||
else:
|
else:
|
||||||
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
|
||||||
for relative_path, link in links.items():
|
|
||||||
if not is_path_traversal(target_folder, relative_path):
|
|
||||||
target_path = os.path.join(target_folder, relative_path)
|
|
||||||
else:
|
|
||||||
LoggerRoot.get_base_logger().warning(
|
|
||||||
"Ignoring relative path `{}`: it must not traverse directories".format(relative_path)
|
|
||||||
)
|
|
||||||
target_path = os.path.join(target_folder, os.path.basename(relative_path))
|
|
||||||
pool.submit(_download_link, link, target_path)
|
pool.submit(_download_link, link, target_path)
|
||||||
|
|
||||||
|
target_folder = (
|
||||||
|
Path(target_folder)
|
||||||
|
if target_folder
|
||||||
|
else self._create_ds_target_folder(
|
||||||
|
lock_target_folder=lock_target_folder
|
||||||
|
)[0]
|
||||||
|
).as_posix()
|
||||||
|
|
||||||
|
if not max_workers:
|
||||||
|
for relative_path, link in self._dataset_link_entries.items():
|
||||||
|
_submit_download_link(relative_path, link, target_folder)
|
||||||
|
else:
|
||||||
|
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
||||||
|
for relative_path, link in self._dataset_link_entries.items():
|
||||||
|
_submit_download_link(relative_path, link, target_folder, pool=pool)
|
||||||
|
|
||||||
def _extract_dataset_archive(
|
def _extract_dataset_archive(
|
||||||
self,
|
self,
|
||||||
force=False,
|
force=False,
|
||||||
@ -2586,6 +2601,7 @@ class Dataset(object):
|
|||||||
:param include_current: If True include the current dataset ID as the last ID in the list
|
:param include_current: If True include the current dataset ID as the last ID in the list
|
||||||
:return: list of str representing the datasets id
|
:return: list of str representing the datasets id
|
||||||
"""
|
"""
|
||||||
|
self._update_dependency_graph()
|
||||||
roots = [self._id]
|
roots = [self._id]
|
||||||
dependencies = []
|
dependencies = []
|
||||||
# noinspection DuplicatedCode
|
# noinspection DuplicatedCode
|
||||||
@ -2742,6 +2758,13 @@ class Dataset(object):
|
|||||||
for k, v in dataset._dependency_graph.items() # noqa
|
for k, v in dataset._dependency_graph.items() # noqa
|
||||||
}
|
}
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
|
for entry in dataset._dataset_file_entries.values():
|
||||||
|
if entry.parent_dataset_id.startswith("offline-"):
|
||||||
|
entry.parent_dataset_id = id
|
||||||
|
for entry in dataset._dataset_link_entries.values():
|
||||||
|
if entry.parent_dataset_id.startswith("offline-"):
|
||||||
|
entry.parent_dataset_id = id
|
||||||
|
# noinspection PyProtectedMember
|
||||||
dataset._update_dependency_graph()
|
dataset._update_dependency_graph()
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
dataset._log_dataset_page()
|
dataset._log_dataset_page()
|
||||||
@ -3027,7 +3050,7 @@ class Dataset(object):
|
|||||||
# fetch the parents of this version (task) based on what we have on the Task itself.
|
# fetch the parents of this version (task) based on what we have on the Task itself.
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
dataset_version_node = task.get_configuration_object_as_dict("Dataset Struct")
|
dataset_version_node = task.get_configuration_object_as_dict(Dataset.__dataset_struct)
|
||||||
# fine the one that is us
|
# fine the one that is us
|
||||||
for node in dataset_version_node.values():
|
for node in dataset_version_node.values():
|
||||||
if node["job_id"] != id_:
|
if node["job_id"] != id_:
|
||||||
@ -3056,7 +3079,7 @@ class Dataset(object):
|
|||||||
dataset_struct[indices[id_]]["parents"] = [indices[p] for p in parents]
|
dataset_struct[indices[id_]]["parents"] = [indices[p] for p in parents]
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
self._task._set_configuration(
|
self._task._set_configuration(
|
||||||
name="Dataset Struct",
|
name=Dataset.__dataset_struct,
|
||||||
description="Structure of the dataset",
|
description="Structure of the dataset",
|
||||||
config_type="json",
|
config_type="json",
|
||||||
config_text=json.dumps(dataset_struct, indent=2),
|
config_text=json.dumps(dataset_struct, indent=2),
|
||||||
@ -3234,7 +3257,8 @@ class Dataset(object):
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
errors = pool.map(copy_file, self._dataset_file_entries.values())
|
errors = list(pool.map(copy_file, self._dataset_file_entries.values()))
|
||||||
|
errors.extend(list(pool.map(copy_file, self._dataset_link_entries.values())))
|
||||||
|
|
||||||
CacheManager.get_cache_manager(cache_context=self.__cache_context).unlock_cache_folder(
|
CacheManager.get_cache_manager(cache_context=self.__cache_context).unlock_cache_folder(
|
||||||
ds_base_folder.as_posix())
|
ds_base_folder.as_posix())
|
||||||
|
4
clearml/external/kerastuner.py
vendored
4
clearml/external/kerastuner.py
vendored
@ -1,12 +1,10 @@
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
|
from ..task import Task
|
||||||
|
|
||||||
_logger = getLogger("clearml.external.kerastuner")
|
_logger = getLogger("clearml.external.kerastuner")
|
||||||
|
|
||||||
|
|
||||||
from ..task import Task
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -211,8 +211,8 @@ class StorageManager(object):
|
|||||||
return Session.get_files_server_host()
|
return Session.get_files_server_host()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def upload_folder(cls, local_folder, remote_url, match_wildcard=None):
|
def upload_folder(cls, local_folder, remote_url, match_wildcard=None, retries=None):
|
||||||
# type: (str, str, Optional[str]) -> Optional[str]
|
# type: (str, str, Optional[str], Optional[int]) -> Optional[str]
|
||||||
"""
|
"""
|
||||||
Upload local folder recursively to a remote storage, maintaining the sub folder structure
|
Upload local folder recursively to a remote storage, maintaining the sub folder structure
|
||||||
in the remote storage.
|
in the remote storage.
|
||||||
@ -231,6 +231,7 @@ class StorageManager(object):
|
|||||||
Example: `*.json`
|
Example: `*.json`
|
||||||
Notice: target file size/date are not checked. Default True, always upload.
|
Notice: target file size/date are not checked. Default True, always upload.
|
||||||
Notice if uploading to http, we will always overwrite the target.
|
Notice if uploading to http, we will always overwrite the target.
|
||||||
|
:param int retries: Number of retries before failing to upload a file in the folder.
|
||||||
:return: Newly uploaded remote URL or None on error.
|
:return: Newly uploaded remote URL or None on error.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -250,6 +251,7 @@ class StorageManager(object):
|
|||||||
pool.apply_async(
|
pool.apply_async(
|
||||||
helper.upload,
|
helper.upload,
|
||||||
args=(str(path), str(path).replace(local_folder, remote_url)),
|
args=(str(path), str(path).replace(local_folder, remote_url)),
|
||||||
|
kwds={"retries": retries if retries else cls._file_upload_retries}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -4148,6 +4148,9 @@ class Task(_Task):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.flush(wait_for_uploads=True)
|
self.flush(wait_for_uploads=True)
|
||||||
|
|
||||||
|
# if running remotely, we want the daemon to kill us
|
||||||
|
if self.running_locally():
|
||||||
self.stopped(status_reason='USER ABORTED')
|
self.stopped(status_reason='USER ABORTED')
|
||||||
|
|
||||||
if self._dev_worker:
|
if self._dev_worker:
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = "1.16.2"
|
__version__ = "1.16.3"
|
||||||
|
@ -22,13 +22,13 @@ Using the **ClearML** [Logger](https://github.com/allegroai/clearml/blob/master/
|
|||||||
Additionally, the **ClearML** Logger module provides methods that allow you to do the following:
|
Additionally, the **ClearML** Logger module provides methods that allow you to do the following:
|
||||||
|
|
||||||
* Get the [current logger]()
|
* Get the [current logger]()
|
||||||
* Overrride the ClearML configuration file with a [default upload destination]() for images and files
|
* Override the ClearML configuration file with a [default upload destination]() for images and files
|
||||||
|
|
||||||
## Graphs and Images
|
## Graphs and Images
|
||||||
|
|
||||||
### Scalar Metrics
|
### Scalar Metrics
|
||||||
|
|
||||||
Use to report scalar metrics by iteration as a line plot.
|
Report scalar metrics by iteration as a line plot.
|
||||||
|
|
||||||
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scalar_reporting.py)) with the following method.
|
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scalar_reporting.py)) with the following method.
|
||||||
|
|
||||||
@ -99,7 +99,7 @@ def report_scalar(self, title, series, value, iteration)
|
|||||||
|
|
||||||
### Histograms
|
### Histograms
|
||||||
|
|
||||||
Use to report any data by iteration as a histogram.
|
Report any data by iteration as a histogram.
|
||||||
|
|
||||||
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scatter_hist_confusion_mat_reporting.py)) with the following method.
|
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scatter_hist_confusion_mat_reporting.py)) with the following method.
|
||||||
|
|
||||||
@ -197,7 +197,7 @@ def report_histogram(self, title, series, values, iteration, labels=None, xlabel
|
|||||||
|
|
||||||
### Line Plots
|
### Line Plots
|
||||||
|
|
||||||
Use to report any data by iteration as a single or multiple line plot.
|
Report any data by iteration as a single or multiple line plot.
|
||||||
|
|
||||||
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scatter_hist_confusion_mat_reporting.py)) with the following method.
|
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scatter_hist_confusion_mat_reporting.py)) with the following method.
|
||||||
|
|
||||||
@ -323,7 +323,7 @@ def report_line_plot(self, title, series, iteration, xaxis, yaxis, mode='lines',
|
|||||||
|
|
||||||
### 2D Scatter Diagrams
|
### 2D Scatter Diagrams
|
||||||
|
|
||||||
Use to report any vector data as a 2D scatter diagram.
|
Report any vector data as a 2D scatter diagram.
|
||||||
|
|
||||||
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scatter_hist_confusion_mat_reporting.py)) with the following method.
|
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scatter_hist_confusion_mat_reporting.py)) with the following method.
|
||||||
|
|
||||||
@ -459,7 +459,7 @@ def report_scatter2d(self, title, series, scatter, iteration, xaxis=None, yaxis=
|
|||||||
|
|
||||||
### 3D Scatter Diagrams
|
### 3D Scatter Diagrams
|
||||||
|
|
||||||
Use to report any array data as a 3D scatter diagram.
|
Report any array data as a 3D scatter diagram.
|
||||||
|
|
||||||
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/3d_plots_reporting.py)) with the following method.
|
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/3d_plots_reporting.py)) with the following method.
|
||||||
|
|
||||||
@ -584,7 +584,7 @@ def report_scatter3d(self, title, series, scatter, iteration, labels=None, mode=
|
|||||||
<li><code>lines+markers</code>
|
<li><code>lines+markers</code>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
The default values is <code>lines</code>.
|
The default value is <code>lines</code>.
|
||||||
</td>
|
</td>
|
||||||
<td>No
|
<td>No
|
||||||
</td>
|
</td>
|
||||||
@ -595,7 +595,7 @@ def report_scatter3d(self, title, series, scatter, iteration, labels=None, mode=
|
|||||||
|
|
||||||
### Confusion Matrices
|
### Confusion Matrices
|
||||||
|
|
||||||
Use to report a heat-map matrix as a confusion matrix. You can also plot a heat-map as a [surface diagram](#surface-diagrams).
|
Report a heat-map matrix as a confusion matrix. You can also plot a heat-map as a [surface diagram](#surface-diagrams).
|
||||||
|
|
||||||
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scatter_hist_confusion_mat_reporting.py)) with the following method.
|
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/scatter_hist_confusion_mat_reporting.py)) with the following method.
|
||||||
|
|
||||||
@ -687,7 +687,7 @@ def report_confusion_matrix(self, title, series, matrix, iteration, xlabels=None
|
|||||||
|
|
||||||
### Surface Diagrams
|
### Surface Diagrams
|
||||||
|
|
||||||
Use to plot a heat-map matrix as a surface diagram. You can also plot a heat-map as a [confusion matrix](#confusion-matrices).
|
Plot a heat-map matrix as a surface diagram. You can also plot a heat-map as a [confusion matrix](#confusion-matrices).
|
||||||
|
|
||||||
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/3d_plots_reporting.py)) with the following method.
|
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/reporting/3d_plots_reporting.py)) with the following method.
|
||||||
|
|
||||||
@ -818,7 +818,7 @@ def report_surface(self, title, series, matrix, iteration, xlabels=None, ylabels
|
|||||||
|
|
||||||
### Images
|
### Images
|
||||||
|
|
||||||
Use to report an image and upload its contents to the bucket specified in the **ClearML** configuration file,
|
Report an image and upload its contents to the bucket specified in the **ClearML** configuration file,
|
||||||
or a [default upload destination](#set-default-upload-destination), if you set a default.
|
or a [default upload destination](#set-default-upload-destination), if you set a default.
|
||||||
|
|
||||||
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/manual_reporting.py)) with the following method.
|
First [get the current logger](#get-the-current-logger) and then use it (see an [example script](https://github.com/allegroai/clearml/blob/master/examples/manual_reporting.py)) with the following method.
|
||||||
@ -896,7 +896,7 @@ def report_image(self, title, series, iteration, local_path=None, matrix=None, m
|
|||||||
</td>
|
</td>
|
||||||
<td>ndarray
|
<td>ndarray
|
||||||
</td>
|
</td>
|
||||||
<td>A 3D numpy.ndarray object containing image data (RGB). If <code>path</code> is not specified, then <code>matrix</code> is required. The default values is <code>None</code>.
|
<td>A 3D numpy.ndarray object containing image data (RGB). If <code>path</code> is not specified, then <code>matrix</code> is required. The default value is <code>None</code>.
|
||||||
</td>
|
</td>
|
||||||
<td>No
|
<td>No
|
||||||
</td>
|
</td>
|
||||||
@ -917,7 +917,7 @@ def report_image(self, title, series, iteration, local_path=None, matrix=None, m
|
|||||||
</td>
|
</td>
|
||||||
<td>string
|
<td>string
|
||||||
</td>
|
</td>
|
||||||
<td>The path of the image file. If <code>matrix</code> is not specified, then <code>path</code> is required. The default values is <code>None</code>.
|
<td>The path of the image file. If <code>matrix</code> is not specified, then <code>path</code> is required. The default value is <code>None</code>.
|
||||||
</td>
|
</td>
|
||||||
<td>No
|
<td>No
|
||||||
</td>
|
</td>
|
||||||
@ -948,13 +948,13 @@ By setting the `CLEARML_LOG_ENVIRONMENT` environment variable, make **ClearML**
|
|||||||
|
|
||||||
* All environment variables
|
* All environment variables
|
||||||
|
|
||||||
export CLEARML_LOG_ENVIRONMENT="*"
|
export CLEARML_LOG_ENVIRONMENT=*
|
||||||
|
|
||||||
* Specific environment variables
|
* Specific environment variables
|
||||||
|
|
||||||
For example, log `PWD` and `PYTHONPATH`
|
For example, log `PWD` and `PYTHONPATH`
|
||||||
|
|
||||||
export CLEARML_LOG_ENVIRONMENT="PWD,PYTHONPATH"
|
export CLEARML_LOG_ENVIRONMENT=PWD,PYTHONPATH
|
||||||
|
|
||||||
* No environment variables
|
* No environment variables
|
||||||
|
|
||||||
@ -1368,7 +1368,7 @@ None.
|
|||||||
|
|
||||||
### Set Default Upload Destination
|
### Set Default Upload Destination
|
||||||
|
|
||||||
Use to specify the default destination storage location used for uploading images.
|
Specify the default destination storage location used for uploading images.
|
||||||
Images are uploaded and a link to the image is reported.
|
Images are uploaded and a link to the image is reported.
|
||||||
|
|
||||||
Credentials for the storage location are in the global configuration file (for example, on Linux, <code>~/clearml.conf</code>).
|
Credentials for the storage location are in the global configuration file (for example, on Linux, <code>~/clearml.conf</code>).
|
||||||
|
@ -6,7 +6,8 @@ def main():
|
|||||||
|
|
||||||
print("STEP1 : Downloading CSV dataset")
|
print("STEP1 : Downloading CSV dataset")
|
||||||
csv_file_path = manager.get_local_copy(
|
csv_file_path = manager.get_local_copy(
|
||||||
remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/Iris_Species.csv")
|
remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/Iris_Species.csv"
|
||||||
|
)
|
||||||
|
|
||||||
print("STEP2 : Creating a dataset")
|
print("STEP2 : Creating a dataset")
|
||||||
# By default, clearml data uploads to the clearml fileserver. Adding output_uri argument to the create() method
|
# By default, clearml data uploads to the clearml fileserver. Adding output_uri argument to the create() method
|
||||||
@ -23,5 +24,5 @@ def main():
|
|||||||
print("We are done, have a great day :)")
|
print("We are done, have a great day :)")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -42,19 +42,13 @@ dataset_path = Dataset.get(
|
|||||||
# Dataset and Dataloader initializations
|
# Dataset and Dataloader initializations
|
||||||
transform = transforms.Compose([transforms.ToTensor()])
|
transform = transforms.Compose([transforms.ToTensor()])
|
||||||
|
|
||||||
trainset = datasets.CIFAR10(
|
trainset = datasets.CIFAR10(root=dataset_path, train=True, download=False, transform=transform)
|
||||||
root=dataset_path, train=True, download=False, transform=transform
|
|
||||||
)
|
|
||||||
trainloader = torch.utils.data.DataLoader(
|
trainloader = torch.utils.data.DataLoader(
|
||||||
trainset, batch_size=params.get("batch_size", 4), shuffle=True, num_workers=10
|
trainset, batch_size=params.get("batch_size", 4), shuffle=True, num_workers=10
|
||||||
)
|
)
|
||||||
|
|
||||||
testset = datasets.CIFAR10(
|
testset = datasets.CIFAR10(root=dataset_path, train=False, download=False, transform=transform)
|
||||||
root=dataset_path, train=False, download=False, transform=transform
|
testloader = torch.utils.data.DataLoader(testset, batch_size=params.get("batch_size", 4), shuffle=False, num_workers=10)
|
||||||
)
|
|
||||||
testloader = torch.utils.data.DataLoader(
|
|
||||||
testset, batch_size=params.get("batch_size", 4), shuffle=False, num_workers=10
|
|
||||||
)
|
|
||||||
|
|
||||||
classes = (
|
classes = (
|
||||||
"plane",
|
"plane",
|
||||||
@ -87,14 +81,10 @@ def predictions_gt_images_handler(engine, logger, *args, **kwargs):
|
|||||||
ax = fig.add_subplot(num_x, num_y, idx + 1, xticks=[], yticks=[])
|
ax = fig.add_subplot(num_x, num_y, idx + 1, xticks=[], yticks=[])
|
||||||
ax.imshow(trans(x[idx]))
|
ax.imshow(trans(x[idx]))
|
||||||
ax.set_title(
|
ax.set_title(
|
||||||
"{0} {1:.1f}% (label: {2})".format(
|
"{0} {1:.1f}% (label: {2})".format(classes[preds], probs * 100, classes[y[idx]]),
|
||||||
classes[preds], probs * 100, classes[y[idx]]
|
|
||||||
),
|
|
||||||
color=("green" if preds == y[idx] else "red"),
|
color=("green" if preds == y[idx] else "red"),
|
||||||
)
|
)
|
||||||
logger.writer.add_figure(
|
logger.writer.add_figure("predictions vs actuals", figure=fig, global_step=engine.state.epoch)
|
||||||
"predictions vs actuals", figure=fig, global_step=engine.state.epoch
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Net(nn.Module):
|
class Net(nn.Module):
|
||||||
|
@ -3,13 +3,9 @@ from clearml import StorageManager, Dataset
|
|||||||
|
|
||||||
manager = StorageManager()
|
manager = StorageManager()
|
||||||
|
|
||||||
dataset_path = manager.get_local_copy(
|
dataset_path = manager.get_local_copy(remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz")
|
||||||
remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
|
|
||||||
)
|
|
||||||
|
|
||||||
dataset = Dataset.create(
|
dataset = Dataset.create(dataset_name="cifar_dataset", dataset_project="dataset_examples")
|
||||||
dataset_name="cifar_dataset", dataset_project="dataset_examples"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Prepare and clean data here before it is added to the dataset
|
# Prepare and clean data here before it is added to the dataset
|
||||||
|
|
||||||
|
@ -8,8 +8,9 @@ from clearml import Dataset, StorageManager
|
|||||||
|
|
||||||
def download_mnist_dataset():
|
def download_mnist_dataset():
|
||||||
manager = StorageManager()
|
manager = StorageManager()
|
||||||
mnist_dataset = Path(manager.get_local_copy(
|
mnist_dataset = Path(
|
||||||
remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST"))
|
manager.get_local_copy(remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")
|
||||||
|
)
|
||||||
mnist_dataset_train = mnist_dataset / "TRAIN"
|
mnist_dataset_train = mnist_dataset / "TRAIN"
|
||||||
mnist_dataset_test = mnist_dataset / "TEST"
|
mnist_dataset_test = mnist_dataset / "TEST"
|
||||||
|
|
||||||
@ -28,7 +29,8 @@ def main():
|
|||||||
|
|
||||||
print("STEP3 : Creating the dataset")
|
print("STEP3 : Creating the dataset")
|
||||||
mnist_dataset = Dataset.create(
|
mnist_dataset = Dataset.create(
|
||||||
dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset (Syncing Example)")
|
dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset (Syncing Example)"
|
||||||
|
)
|
||||||
|
|
||||||
print("STEP4 : Syncing train dataset")
|
print("STEP4 : Syncing train dataset")
|
||||||
shutil.copytree(mnist_dataset_train, mnist_train_path) # Populating dataset folder with TRAIN images
|
shutil.copytree(mnist_dataset_train, mnist_train_path) # Populating dataset folder with TRAIN images
|
||||||
@ -46,5 +48,5 @@ def main():
|
|||||||
print("We are done, have a great day :)")
|
print("We are done, have a great day :)")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -7,29 +7,30 @@ def main():
|
|||||||
manager = StorageManager()
|
manager = StorageManager()
|
||||||
|
|
||||||
print("STEP1 : Downloading mnist dataset")
|
print("STEP1 : Downloading mnist dataset")
|
||||||
mnist_dataset = Path(manager.get_local_copy(
|
mnist_dataset = Path(
|
||||||
remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST"))
|
manager.get_local_copy(remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")
|
||||||
|
)
|
||||||
mnist_dataset_train = mnist_dataset / "TRAIN"
|
mnist_dataset_train = mnist_dataset / "TRAIN"
|
||||||
mnist_dataset_test = mnist_dataset / "TEST"
|
mnist_dataset_test = mnist_dataset / "TEST"
|
||||||
|
|
||||||
print("STEP2 : Creating the training dataset")
|
print("STEP2 : Creating the training dataset")
|
||||||
train_dataset = Dataset.create(
|
train_dataset = Dataset.create(dataset_project="dataset_examples/MNIST", dataset_name="MNIST Training Dataset")
|
||||||
dataset_project="dataset_examples/MNIST", dataset_name="MNIST Training Dataset")
|
|
||||||
train_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN")
|
train_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN")
|
||||||
train_dataset.upload()
|
train_dataset.upload()
|
||||||
train_dataset.finalize()
|
train_dataset.finalize()
|
||||||
|
|
||||||
print("STEP3 : Creating the testing dataset")
|
print("STEP3 : Creating the testing dataset")
|
||||||
test_dataset = Dataset.create(
|
test_dataset = Dataset.create(dataset_project="dataset_examples/MNIST", dataset_name="MNIST Testing Dataset")
|
||||||
dataset_project="dataset_examples/MNIST", dataset_name="MNIST Testing Dataset")
|
|
||||||
test_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST")
|
test_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST")
|
||||||
test_dataset.upload()
|
test_dataset.upload()
|
||||||
test_dataset.finalize()
|
test_dataset.finalize()
|
||||||
|
|
||||||
print("STEP4 : Create a child dataset with both mnist train and test data")
|
print("STEP4 : Create a child dataset with both mnist train and test data")
|
||||||
child_dataset = Dataset.create(
|
child_dataset = Dataset.create(
|
||||||
dataset_project="dataset_examples/MNIST", dataset_name="MNIST Complete Dataset",
|
dataset_project="dataset_examples/MNIST",
|
||||||
parent_datasets=[train_dataset.id, test_dataset.id])
|
dataset_name="MNIST Complete Dataset",
|
||||||
|
parent_datasets=[train_dataset.id, test_dataset.id],
|
||||||
|
)
|
||||||
child_dataset.upload()
|
child_dataset.upload()
|
||||||
child_dataset.finalize()
|
child_dataset.finalize()
|
||||||
|
|
||||||
|
@ -7,21 +7,22 @@ def main():
|
|||||||
manager = StorageManager()
|
manager = StorageManager()
|
||||||
|
|
||||||
print("STEP1 : Downloading mnist dataset")
|
print("STEP1 : Downloading mnist dataset")
|
||||||
mnist_dataset = Path(manager.get_local_copy(
|
mnist_dataset = Path(
|
||||||
remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST"))
|
manager.get_local_copy(remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")
|
||||||
|
)
|
||||||
mnist_dataset_train = mnist_dataset / "TRAIN"
|
mnist_dataset_train = mnist_dataset / "TRAIN"
|
||||||
mnist_dataset_test = mnist_dataset / "TEST"
|
mnist_dataset_test = mnist_dataset / "TEST"
|
||||||
|
|
||||||
print("STEP2 : Creating the training dataset")
|
print("STEP2 : Creating the training dataset")
|
||||||
mnist_dataset = Dataset.create(
|
mnist_dataset = Dataset.create(dataset_project="dataset_examples", dataset_name="MNIST Training Dataset")
|
||||||
dataset_project="dataset_examples", dataset_name="MNIST Training Dataset")
|
|
||||||
mnist_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN")
|
mnist_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN")
|
||||||
mnist_dataset.upload()
|
mnist_dataset.upload()
|
||||||
mnist_dataset.finalize()
|
mnist_dataset.finalize()
|
||||||
|
|
||||||
print("STEP3 : Create a child dataset of mnist dataset using TEST Dataset")
|
print("STEP3 : Create a child dataset of mnist dataset using TEST Dataset")
|
||||||
child_dataset = Dataset.create(
|
child_dataset = Dataset.create(
|
||||||
dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset", parent_datasets=[mnist_dataset.id])
|
dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset", parent_datasets=[mnist_dataset.id]
|
||||||
|
)
|
||||||
child_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST")
|
child_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST")
|
||||||
child_dataset.upload()
|
child_dataset.upload()
|
||||||
child_dataset.finalize()
|
child_dataset.finalize()
|
||||||
@ -29,5 +30,5 @@ def main():
|
|||||||
print("We are done, have a great day :)")
|
print("We are done, have a great day :)")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -43,9 +43,7 @@ class PreProcessor:
|
|||||||
# Make sure all spectrograms are the same size
|
# Make sure all spectrograms are the same size
|
||||||
fixed_length = 3 * (self.configuration["resample_freq"] // 200)
|
fixed_length = 3 * (self.configuration["resample_freq"] // 200)
|
||||||
if melspectogram_db.shape[2] < fixed_length:
|
if melspectogram_db.shape[2] < fixed_length:
|
||||||
melspectogram_db = torch.nn.functional.pad(
|
melspectogram_db = torch.nn.functional.pad(melspectogram_db, (0, fixed_length - melspectogram_db.shape[2]))
|
||||||
melspectogram_db, (0, fixed_length - melspectogram_db.shape[2])
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
melspectogram_db = melspectogram_db[:, :, :fixed_length]
|
melspectogram_db = melspectogram_db[:, :, :fixed_length]
|
||||||
|
|
||||||
@ -64,16 +62,10 @@ class DataSetBuilder:
|
|||||||
alias="Raw Dataset",
|
alias="Raw Dataset",
|
||||||
)
|
)
|
||||||
# This will return the pandas dataframe we added in the previous task
|
# This will return the pandas dataframe we added in the previous task
|
||||||
self.metadata = (
|
self.metadata = Task.get_task(task_id=self.original_dataset._task.id).artifacts["metadata"].get()
|
||||||
Task.get_task(task_id=self.original_dataset._task.id)
|
|
||||||
.artifacts["metadata"]
|
|
||||||
.get()
|
|
||||||
)
|
|
||||||
# This will download the data and return a local path to the data
|
# This will download the data and return a local path to the data
|
||||||
self.original_dataset_path = Path(
|
self.original_dataset_path = Path(
|
||||||
self.original_dataset.get_mutable_local_copy(
|
self.original_dataset.get_mutable_local_copy(self.configuration["dataset_path"], overwrite=True)
|
||||||
self.configuration["dataset_path"], overwrite=True
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare a preprocessor that will handle each sample one by one
|
# Prepare a preprocessor that will handle each sample one by one
|
||||||
@ -114,33 +106,23 @@ class DataSetBuilder:
|
|||||||
# audio side by side in the debug sample UI)
|
# audio side by side in the debug sample UI)
|
||||||
for i, (_, data) in tqdm(enumerate(self.metadata.iterrows())):
|
for i, (_, data) in tqdm(enumerate(self.metadata.iterrows())):
|
||||||
_, audio_file_path, label = data.tolist()
|
_, audio_file_path, label = data.tolist()
|
||||||
sample, sample_freq = torchaudio.load(
|
sample, sample_freq = torchaudio.load(self.original_dataset_path / audio_file_path, normalize=True)
|
||||||
self.original_dataset_path / audio_file_path, normalize=True
|
|
||||||
)
|
|
||||||
spectrogram = self.preprocessor.preprocess_sample(sample, sample_freq)
|
spectrogram = self.preprocessor.preprocess_sample(sample, sample_freq)
|
||||||
# Get only the filename and replace the extension, we're saving an image here
|
# Get only the filename and replace the extension, we're saving an image here
|
||||||
new_file_name = os.path.basename(audio_file_path).replace(".wav", ".npy")
|
new_file_name = os.path.basename(audio_file_path).replace(".wav", ".npy")
|
||||||
# Get the correct folder, basically the original dataset folder + the new filename
|
# Get the correct folder, basically the original dataset folder + the new filename
|
||||||
spectrogram_path = (
|
spectrogram_path = self.original_dataset_path / os.path.dirname(audio_file_path) / new_file_name
|
||||||
self.original_dataset_path
|
|
||||||
/ os.path.dirname(audio_file_path)
|
|
||||||
/ new_file_name
|
|
||||||
)
|
|
||||||
# Save the numpy array to disk
|
# Save the numpy array to disk
|
||||||
np.save(spectrogram_path, spectrogram)
|
np.save(spectrogram_path, spectrogram)
|
||||||
|
|
||||||
# Log every 10th sample as a debug sample to the UI, so we can manually check it
|
# Log every 10th sample as a debug sample to the UI, so we can manually check it
|
||||||
if i % 10 == 0:
|
if i % 10 == 0:
|
||||||
# Convert the numpy array to a viewable JPEG
|
# Convert the numpy array to a viewable JPEG
|
||||||
rgb_image = mpl.colormaps["viridis"](
|
rgb_image = mpl.colormaps["viridis"](spectrogram[0, :, :].detach().numpy() * 255)[:, :, :3]
|
||||||
spectrogram[0, :, :].detach().numpy() * 255
|
|
||||||
)[:, :, :3]
|
|
||||||
title = os.path.splitext(os.path.basename(audio_file_path))[0]
|
title = os.path.splitext(os.path.basename(audio_file_path))[0]
|
||||||
|
|
||||||
# Report the image and the original sound, so they can be viewed side by side
|
# Report the image and the original sound, so they can be viewed side by side
|
||||||
self.preprocessed_dataset.get_logger().report_image(
|
self.preprocessed_dataset.get_logger().report_image(title=title, series="spectrogram", image=rgb_image)
|
||||||
title=title, series="spectrogram", image=rgb_image
|
|
||||||
)
|
|
||||||
self.preprocessed_dataset.get_logger().report_media(
|
self.preprocessed_dataset.get_logger().report_media(
|
||||||
title=title,
|
title=title,
|
||||||
series="original_audio",
|
series="original_audio",
|
||||||
@ -152,9 +134,7 @@ class DataSetBuilder:
|
|||||||
# Again add some visualizations to the task
|
# Again add some visualizations to the task
|
||||||
self.log_dataset_statistics()
|
self.log_dataset_statistics()
|
||||||
# We still want the metadata
|
# We still want the metadata
|
||||||
self.preprocessed_dataset._task.upload_artifact(
|
self.preprocessed_dataset._task.upload_artifact(name="metadata", artifact_object=self.metadata)
|
||||||
name="metadata", artifact_object=self.metadata
|
|
||||||
)
|
|
||||||
self.preprocessed_dataset.finalize(auto_upload=True)
|
self.preprocessed_dataset.finalize(auto_upload=True)
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,9 +28,7 @@ def get_urbansound8k():
|
|||||||
"https://allegro-datasets.s3.amazonaws.com/clearml/UrbanSound8K.zip",
|
"https://allegro-datasets.s3.amazonaws.com/clearml/UrbanSound8K.zip",
|
||||||
extract_archive=True,
|
extract_archive=True,
|
||||||
)
|
)
|
||||||
path_to_urbansound8k_csv = (
|
path_to_urbansound8k_csv = Path(path_to_urbansound8k) / "UrbanSound8K" / "metadata" / "UrbanSound8K.csv"
|
||||||
Path(path_to_urbansound8k) / "UrbanSound8K" / "metadata" / "UrbanSound8K.csv"
|
|
||||||
)
|
|
||||||
path_to_urbansound8k_audio = Path(path_to_urbansound8k) / "UrbanSound8K" / "audio"
|
path_to_urbansound8k_audio = Path(path_to_urbansound8k) / "UrbanSound8K" / "audio"
|
||||||
|
|
||||||
return path_to_urbansound8k_csv, path_to_urbansound8k_audio
|
return path_to_urbansound8k_csv, path_to_urbansound8k_audio
|
||||||
@ -38,9 +36,7 @@ def get_urbansound8k():
|
|||||||
|
|
||||||
def log_dataset_statistics(dataset, metadata):
|
def log_dataset_statistics(dataset, metadata):
|
||||||
histogram_data = metadata["class"].value_counts()
|
histogram_data = metadata["class"].value_counts()
|
||||||
dataset.get_logger().report_table(
|
dataset.get_logger().report_table(title="Raw Dataset Metadata", series="Raw Dataset Metadata", table_plot=metadata)
|
||||||
title="Raw Dataset Metadata", series="Raw Dataset Metadata", table_plot=metadata
|
|
||||||
)
|
|
||||||
dataset.get_logger().report_histogram(
|
dataset.get_logger().report_histogram(
|
||||||
title="Class distribution",
|
title="Class distribution",
|
||||||
series="Class distribution",
|
series="Class distribution",
|
||||||
|
@ -3,6 +3,7 @@ clearml>=1.14.4
|
|||||||
matplotlib >= 3.1.1 ; python_version >= '3.6'
|
matplotlib >= 3.1.1 ; python_version >= '3.6'
|
||||||
matplotlib >= 2.2.4 ; python_version < '3.6'
|
matplotlib >= 2.2.4 ; python_version < '3.6'
|
||||||
numpy != 1.24.0 # https://github.com/numpy/numpy/issues/22826
|
numpy != 1.24.0 # https://github.com/numpy/numpy/issues/22826
|
||||||
|
bokeh_sampledata==2024.2 ; python_version >= '3.10'
|
||||||
pandas
|
pandas
|
||||||
pillow>=4.0
|
pillow>=4.0
|
||||||
plotly
|
plotly
|
||||||
|
Loading…
Reference in New Issue
Block a user