Add requirement parsing optimization using entry point by default

This commit is contained in:
allegroai 2020-04-26 22:58:13 +03:00
parent 7e4fba9d10
commit 9726f782f2
4 changed files with 32 additions and 7 deletions

View File

@ -26,7 +26,7 @@ class ScriptRequirements(object):
def __init__(self, root_folder):
self._root_folder = root_folder
def get_requirements(self):
def get_requirements(self, entry_point_filename=None):
try:
from ....utilities.pigar.reqs import get_installed_pkgs_detail
from ....utilities.pigar.__main__ import GenerateReqs
@ -34,7 +34,8 @@ class ScriptRequirements(object):
gr = GenerateReqs(save_path='', project_path=self._root_folder, installed_pkgs=installed_pkgs,
ignores=['.git', '.hg', '.idea', '__pycache__', '.ipynb_checkpoints',
'site-packages', 'dist-packages'])
reqs, try_imports, guess, local_pks = gr.extract_reqs(module_callback=ScriptRequirements.add_trains_used_packages)
reqs, try_imports, guess, local_pks = gr.extract_reqs(
module_callback=ScriptRequirements.add_trains_used_packages, entry_point_filename=entry_point_filename)
return self.create_requirements_txt(reqs, local_pks)
except Exception:
return '', ''
@ -493,7 +494,6 @@ class ScriptInfo(object):
else:
script_requirements = None
script_info = dict(
repository=furl(repo_info.url).remove(username=True, password=True).tostr(),
branch=repo_info.branch,

View File

@ -231,7 +231,11 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
self.reload()
# if jupyter is present, requirements will be created in the background, when saving a snapshot
if result.script and script_requirements:
requirements, conda_requirements = script_requirements.get_requirements()
entry_point_filename = None if config.get('development.force_analyze_entire_repo', False) else \
os.path.join(result.script['working_dir'], result.script['entry_point'])
requirements, conda_requirements = script_requirements.get_requirements(
entry_point_filename=entry_point_filename)
if requirements:
if not result.script['requirements']:
result.script['requirements'] = {}

View File

@ -141,6 +141,12 @@
# Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead.
default_output_uri: ""
# Default auto generated requirements optimize for smaller requirements
# If True, analyze the entire repository regardless of the entry point.
# If False, first analyze the entry point script, if it does not contain other to local files,
# do not analyze the entire repository.
force_analyze_entire_repo: false
# Development mode worker
worker {
# Status report period in seconds

View File

@ -23,14 +23,29 @@ class GenerateReqs(object):
self._local_mods = dict()
self._comparison_operator = comparison_operator
def extract_reqs(self, module_callback=None):
def extract_reqs(self, module_callback=None, entry_point_filename=None):
"""Extract requirements from project."""
reqs = ReqsModules()
guess = ReqsModules()
local = ReqsModules()
modules, try_imports, local_mods = project_import_modules(
self._project_path, self._ignores)
# make the entry point absolute (relative to the root path)
if entry_point_filename and not os.path.isabs(entry_point_filename):
entry_point_filename = os.path.join(self._project_path, entry_point_filename) \
if os.path.isdir(self._project_path) else None
# check if the entry point script is self contained, i.e. does not use the rest of the project
if entry_point_filename and os.path.isfile(entry_point_filename):
modules, try_imports, local_mods = project_import_modules(entry_point_filename, self._ignores)
# if we have any module/package we cannot find, take no chances and scan the entire project
if try_imports or local_mods:
modules, try_imports, local_mods = project_import_modules(
self._project_path, self._ignores)
else:
modules, try_imports, local_mods = project_import_modules(
self._project_path, self._ignores)
if module_callback:
modules = module_callback(modules)