Add requirement_parser into trains-agent instead as a dependency. Fix requirement_parser to support 'package @ git+http' lines

This commit is contained in:
allegroai 2020-07-30 14:29:37 +03:00
parent 699d13bbb3
commit 52c1772b04
9 changed files with 388 additions and 7 deletions

View File

@ -13,7 +13,6 @@ pyjwt>=1.6.4
PyYAML>=3.12
requests-file>=1.4.2
requests>=2.20.0
requirements_parser>=0.2.0
six>=1.11.0
tqdm>=4.19.5
typing>=3.6.4

0
trains_agent/external/__init__.py vendored Normal file
View File

View File

@ -0,0 +1,22 @@
from .parser import parse # noqa
_MAJOR = 0
_MINOR = 2
_PATCH = 0
def version_tuple():
'''
Returns a 3-tuple of ints that represent the version
'''
return (_MAJOR, _MINOR, _PATCH)
def version():
'''
Returns a string representation of the version
'''
return '%d.%d.%d' % (version_tuple())
__version__ = version()

View File

@ -0,0 +1,44 @@
import re
# Copied from pip
# https://github.com/pypa/pip/blob/281eb61b09d87765d7c2b92f6982b3fe76ccb0af/pip/index.py#L947
HASH_ALGORITHMS = set(['sha1', 'sha224', 'sha384', 'sha256', 'sha512', 'md5'])
extras_require_search = re.compile(
r'(?P<name>.+)\[(?P<extras>[^\]]+)\]').search
def parse_fragment(fragment_string):
"""Takes a fragment string nd returns a dict of the components"""
fragment_string = fragment_string.lstrip('#')
try:
return dict(
key_value_string.split('=')
for key_value_string in fragment_string.split('&')
)
except ValueError:
raise ValueError(
'Invalid fragment string {fragment_string}'.format(
fragment_string=fragment_string
)
)
def get_hash_info(d):
"""Returns the first matching hashlib name and value from a dict"""
for key in d.keys():
if key.lower() in HASH_ALGORITHMS:
return key, d[key]
return None, None
def parse_extras_require(egg):
if egg is not None:
match = extras_require_search(egg)
if match is not None:
name = match.group('name')
extras = match.group('extras')
return name, [extra.strip() for extra in extras.split(',')]
return egg, []

View File

@ -0,0 +1,50 @@
import os
import warnings
from .requirement import Requirement
def parse(reqstr):
"""
Parse a requirements file into a list of Requirements
See: pip/req.py:parse_requirements()
:param reqstr: a string or file like object containing requirements
:returns: a *generator* of Requirement objects
"""
filename = getattr(reqstr, 'name', None)
try:
# Python 2.x compatibility
if not isinstance(reqstr, basestring):
reqstr = reqstr.read()
except NameError:
# Python 3.x only
if not isinstance(reqstr, str):
reqstr = reqstr.read()
for line in reqstr.splitlines():
line = line.strip()
if line == '':
continue
elif not line or line.startswith('#'):
# comments are lines that start with # only
continue
elif line.startswith('-r') or line.startswith('--requirement'):
_, new_filename = line.split()
new_file_path = os.path.join(os.path.dirname(filename or '.'),
new_filename)
with open(new_file_path) as f:
for requirement in parse(f):
yield requirement
elif line.startswith('-f') or line.startswith('--find-links') or \
line.startswith('-i') or line.startswith('--index-url') or \
line.startswith('--extra-index-url') or \
line.startswith('--no-index'):
warnings.warn('Private repos not supported. Skipping.')
continue
elif line.startswith('-Z') or line.startswith('--always-unzip'):
warnings.warn('Unused option --always-unzip. Skipping.')
continue
else:
yield Requirement.parse(line)

View File

@ -0,0 +1,236 @@
from __future__ import unicode_literals
import re
from pkg_resources import Requirement as Req
from .fragment import get_hash_info, parse_fragment, parse_extras_require
from .vcs import VCS, VCS_SCHEMES
URI_REGEX = re.compile(
r'^(?P<scheme>https?|file|ftps?)://(?P<path>[^#]+)'
r'(#(?P<fragment>\S+))?'
)
VCS_REGEX = re.compile(
r'^(?P<scheme>{0})://'.format(r'|'.join(
[scheme.replace('+', r'\+') for scheme in VCS_SCHEMES])) +
r'((?P<login>[^/@]+)@)?'
r'(?P<path>[^#@]+)'
r'(@(?P<revision>[^#]+))?'
r'(#(?P<fragment>\S+))?'
)
# This matches just about everyting
LOCAL_REGEX = re.compile(
r'^((?P<scheme>file)://)?'
r'(?P<path>[^#]+)' +
r'(#(?P<fragment>\S+))?'
)
class Requirement(object):
"""
Represents a single requirementfrom trains_agent.external.requirements_parser.requirement import Requirement
Typically instances of this class are created with ``Requirement.parse``.
For local file requirements, there's no verification that the file
exists. This class attempts to be *dict-like*.
See: http://www.pip-installer.org/en/latest/logic.html
**Members**:
* ``line`` - the actual requirement line being parsed
* ``editable`` - a boolean whether this requirement is "editable"
* ``local_file`` - a boolean whether this requirement is a local file/path
* ``specifier`` - a boolean whether this requirement used a requirement
specifier (eg. "django>=1.5" or "requirements")
* ``vcs`` - a string specifying the version control system
* ``revision`` - a version control system specifier
* ``name`` - the name of the requirement
* ``uri`` - the URI if this requirement was specified by URI
* ``subdirectory`` - the subdirectory fragment of the URI
* ``path`` - the local path to the requirement
* ``hash_name`` - the type of hashing algorithm indicated in the line
* ``hash`` - the hash value indicated by the requirement line
* ``extras`` - a list of extras for this requirement
(eg. "mymodule[extra1, extra2]")
* ``specs`` - a list of specs for this requirement
(eg. "mymodule>1.5,<1.6" => [('>', '1.5'), ('<', '1.6')])
"""
def __init__(self, line):
# Do not call this private method
self.line = line
self.editable = False
self.local_file = False
self.specifier = False
self.vcs = None
self.name = None
self.subdirectory = None
self.uri = None
self.path = None
self.revision = None
self.hash_name = None
self.hash = None
self.extras = []
self.specs = []
def __repr__(self):
return '<Requirement: "{0}">'.format(self.line)
def __getitem__(self, key):
return getattr(self, key)
def keys(self):
return self.__dict__.keys()
@classmethod
def parse_editable(cls, line):
"""
Parses a Requirement from an "editable" requirement which is either
a local project path or a VCS project URI.
See: pip/req.py:from_editable()
:param line: an "editable" requirement
:returns: a Requirement instance for the given line
:raises: ValueError on an invalid requirement
"""
req = cls('-e {0}'.format(line))
req.editable = True
vcs_match = VCS_REGEX.match(line)
local_match = LOCAL_REGEX.match(line)
if vcs_match is not None:
groups = vcs_match.groupdict()
if groups.get('login'):
req.uri = '{scheme}://{login}@{path}'.format(**groups)
else:
req.uri = '{scheme}://{path}'.format(**groups)
req.revision = groups['revision']
if groups['fragment']:
fragment = parse_fragment(groups['fragment'])
egg = fragment.get('egg')
req.name, req.extras = parse_extras_require(egg)
req.hash_name, req.hash = get_hash_info(fragment)
req.subdirectory = fragment.get('subdirectory')
for vcs in VCS:
if req.uri.startswith(vcs):
req.vcs = vcs
else:
assert local_match is not None, 'This should match everything'
groups = local_match.groupdict()
req.local_file = True
if groups['fragment']:
fragment = parse_fragment(groups['fragment'])
egg = fragment.get('egg')
req.name, req.extras = parse_extras_require(egg)
req.hash_name, req.hash = get_hash_info(fragment)
req.subdirectory = fragment.get('subdirectory')
req.path = groups['path']
return req
@classmethod
def parse_line(cls, line):
"""
Parses a Requirement from a non-editable requirement.
See: pip/req.py:from_line()
:param line: a "non-editable" requirement
:returns: a Requirement instance for the given line
:raises: ValueError on an invalid requirement
"""
req = cls(line)
vcs_match = VCS_REGEX.match(line)
uri_match = URI_REGEX.match(line)
local_match = LOCAL_REGEX.match(line)
if vcs_match is not None:
groups = vcs_match.groupdict()
if groups.get('login'):
req.uri = '{scheme}://{login}@{path}'.format(**groups)
else:
req.uri = '{scheme}://{path}'.format(**groups)
req.revision = groups['revision']
if groups['fragment']:
fragment = parse_fragment(groups['fragment'])
egg = fragment.get('egg')
req.name, req.extras = parse_extras_require(egg)
req.hash_name, req.hash = get_hash_info(fragment)
req.subdirectory = fragment.get('subdirectory')
for vcs in VCS:
if req.uri.startswith(vcs):
req.vcs = vcs
elif uri_match is not None:
groups = uri_match.groupdict()
req.uri = '{scheme}://{path}'.format(**groups)
if groups['fragment']:
fragment = parse_fragment(groups['fragment'])
egg = fragment.get('egg')
req.name, req.extras = parse_extras_require(egg)
req.hash_name, req.hash = get_hash_info(fragment)
req.subdirectory = fragment.get('subdirectory')
if groups['scheme'] == 'file':
req.local_file = True
elif '#egg=' in line:
# Assume a local file match
assert local_match is not None, 'This should match everything'
groups = local_match.groupdict()
req.local_file = True
if groups['fragment']:
fragment = parse_fragment(groups['fragment'])
egg = fragment.get('egg')
name, extras = parse_extras_require(egg)
req.name = fragment.get('egg')
req.hash_name, req.hash = get_hash_info(fragment)
req.subdirectory = fragment.get('subdirectory')
req.path = groups['path']
else:
# This is a requirement specifier.
# Delegate to pkg_resources and hope for the best
req.specifier = True
pkg_req = Req.parse(line)
req.name = pkg_req.unsafe_name
req.extras = list(pkg_req.extras)
req.specs = pkg_req.specs
return req
@classmethod
def parse(cls, line):
"""
Parses a Requirement from a line of a requirement file.
:param line: a line of a requirement file
:returns: a Requirement instance for the given line
:raises: ValueError on an invalid requirement
"""
line = line.lstrip()
if line.startswith('-e') or line.startswith('--editable'):
# Editable installs are either a local project path
# or a VCS project URI
return cls.parse_editable(
re.sub(r'^(-e|--editable=?)\s*', '', line))
elif '@' in line:
# Allegro bug fix: support 'name @ git+' entries
name, vcs = line.split('@', 1)
name = name.strip()
vcs = vcs.strip()
# noinspection PyBroadException
try:
# check if the name is valid & parsed
Req.parse(name)
# if we are here, name is a valid package name, check if the vcs part is valid
if VCS_REGEX.match(vcs):
req = cls.parse_line(vcs)
req.name = name
return req
except Exception:
pass
return cls.parse_line(line)

View File

@ -0,0 +1,30 @@
from __future__ import unicode_literals
VCS = [
'git',
'hg',
'svn',
'bzr',
]
VCS_SCHEMES = [
'git',
'git+https',
'git+ssh',
'git+git',
'hg+http',
'hg+https',
'hg+static-http',
'hg+ssh',
'svn',
'svn+svn',
'svn+http',
'svn+https',
'svn+ssh',
'bzr+http',
'bzr+https',
'bzr+ssh',
'bzr+sftp',
'bzr+ftp',
'bzr+lp',
]

View File

@ -14,8 +14,8 @@ import yaml
from time import time
from attr import attrs, attrib, Factory
from pathlib2 import Path
from requirements import parse
from requirements.requirement import Requirement
from trains_agent.external.requirements_parser import parse
from trains_agent.external.requirements_parser.requirement import Requirement
from trains_agent.errors import CommandFailedError
from trains_agent.helper.base import rm_tree, NonStrictAttrs, select_for_platform, is_windows_platform

View File

@ -12,15 +12,15 @@ from typing import Text, List, Type, Optional, Tuple, Dict
from pathlib2 import Path
from pyhocon import ConfigTree
from requirements import parse
# noinspection PyPackageRequirements
from requirements.requirement import Requirement
import six
from trains_agent.definitions import PIP_EXTRA_INDICES
from trains_agent.helper.base import warning, is_conda, which, join_lines, is_windows_platform
from trains_agent.helper.process import Argv, PathLike
from trains_agent.session import Session, normalize_cuda_version
from trains_agent.external.requirements_parser import parse
from trains_agent.external.requirements_parser.requirement import Requirement
from .translator import RequirementsTranslator
@ -57,7 +57,7 @@ class MarkerRequirement(object):
elif self.vcs:
# leave the line as is, let pip handle it
if self.line:
parts = [self.line]
return self.line
else:
# let's build the line manually
parts = [