Change HTTP driver timeout and retry codes (connection timeout will now trigger a retry)

This commit is contained in:
allegroai 2022-01-13 11:57:44 +02:00
parent be1478e67a
commit b3176a223b

View File

@ -24,6 +24,7 @@ from _socket import gethostname
from attr import attrs, attrib, asdict from attr import attrs, attrib, asdict
from furl import furl from furl import furl
from pathlib2 import Path from pathlib2 import Path
from requests import codes as requests_codes
from requests.exceptions import ConnectionError from requests.exceptions import ConnectionError
from six import binary_type, StringIO from six import binary_type, StringIO
from six.moves.queue import Queue, Empty from six.moves.queue import Queue, Empty
@ -1004,6 +1005,7 @@ class _HttpDriver(_Driver):
timeout_connection = deferred_config('http.timeout.connection', 30) timeout_connection = deferred_config('http.timeout.connection', 30)
timeout_total = deferred_config('http.timeout.total', 30) timeout_total = deferred_config('http.timeout.total', 30)
max_retries = deferred_config('http.download.max_retries', 15)
min_kbps_speed = 50 min_kbps_speed = 50
schemes = ('http', 'https') schemes = ('http', 'https')
@ -1014,7 +1016,22 @@ class _HttpDriver(_Driver):
def __init__(self, name, retries=5, **kwargs): def __init__(self, name, retries=5, **kwargs):
self.name = name self.name = name
self.session = get_http_session_with_retry(total=retries, connect=retries, read=retries, redirect=retries) self.session = get_http_session_with_retry(
total=retries,
connect=retries,
read=retries,
redirect=retries,
backoff_factor=0.5,
backoff_max=120,
status_forcelist=[
requests_codes.request_timeout,
requests_codes.timeout,
requests_codes.bad_gateway,
requests_codes.service_unavailable,
requests_codes.bandwidth_limit_exceeded,
requests_codes.too_many_requests,
]
)
def get_headers(self, url): def get_headers(self, url):
if not self._default_backend_session: if not self._default_backend_session:
@ -1032,8 +1049,8 @@ class _HttpDriver(_Driver):
self.url, self.is_stream, self.container_name, self.object_name = \ self.url, self.is_stream, self.container_name, self.object_name = \
url, is_stream, container_name, object_name url, is_stream, container_name, object_name
def __init__(self, retries=5): def __init__(self, retries=None):
self._retries = retries self._retries = retries or int(self.max_retries)
self._containers = {} self._containers = {}
def get_container(self, container_name, config=None, **kwargs): def get_container(self, container_name, config=None, **kwargs):