Add missing completed status

This commit is contained in:
allegroai 2019-07-08 23:59:54 +03:00
parent 30c8be79b5
commit 18392ad2fd
3 changed files with 77 additions and 14 deletions

View File

@ -2,24 +2,16 @@ from datetime import datetime
import database import database
from apierrors import errors from apierrors import errors
from apimodels.auth import ( from apimodels.auth import GetTokenResponse, CreateUserRequest, Credentials as CredModel
GetTokenResponse,
CreateUserRequest,
Credentials as CredModel,
)
from apimodels.users import CreateRequest as Users_CreateRequest from apimodels.users import CreateRequest as Users_CreateRequest
from bll.user import UserBLL from bll.user import UserBLL
from config import config from config import config
from config.info import get_version, get_build_number
from database.errors import translate_errors_context from database.errors import translate_errors_context
from database.model.auth import User, Role, Credentials from database.model.auth import User, Role, Credentials
from database.model.company import Company from database.model.company import Company
from service_repo import APICall from service_repo import APICall, ServiceRepo
from service_repo.auth import ( from service_repo.auth import Identity, Token, get_client_id, get_secret_key
Identity,
Token,
get_client_id,
get_secret_key,
)
log = config.logger("AuthBLL") log = config.logger("AuthBLL")
@ -62,6 +54,9 @@ class AuthBLL:
identity=identity, identity=identity,
entities=entities, entities=entities,
expiration_sec=expiration_sec, expiration_sec=expiration_sec,
api_version=str(ServiceRepo.max_endpoint_version()),
server_version=str(get_version()),
server_build=str(get_build_number()),
) )
return GetTokenResponse(token=token.decode("ascii")) return GetTokenResponse(token=token.decode("ascii"))

View File

@ -1,6 +1,7 @@
import re import re
from collections import OrderedDict from collections import OrderedDict
from datetime import datetime from datetime import datetime, timedelta
from time import sleep
from typing import Mapping, Collection from typing import Mapping, Collection
from urllib.parse import urlparse from urllib.parse import urlparse
@ -10,6 +11,7 @@ from six import string_types
import es_factory import es_factory
from apierrors import errors from apierrors import errors
from config import config
from database.errors import translate_errors_context from database.errors import translate_errors_context
from database.fields import OutputDestinationField from database.fields import OutputDestinationField
from database.model.model import Model from database.model.model import Model
@ -20,10 +22,13 @@ from database.model.task.task import Task, TaskStatus, TaskStatusMessage, TaskTa
from database.utils import get_company_or_none_constraint, id as create_id from database.utils import get_company_or_none_constraint, id as create_id
from service_repo import APICall from service_repo import APICall
from timing_context import TimingContext from timing_context import TimingContext
from utilities.threads_manager import ThreadsManager
from .utils import ChangeStatusRequest, validate_status_change from .utils import ChangeStatusRequest, validate_status_change
class TaskBLL(object): class TaskBLL(object):
threads = ThreadsManager()
def __init__(self, events_es=None): def __init__(self, events_es=None):
self.events_es = ( self.events_es = (
events_es if events_es is not None else es_factory.connect("events") events_es if events_es is not None else es_factory.connect("events")
@ -391,3 +396,55 @@ class TaskBLL(object):
status_message=status_message, status_message=status_message,
force=force, force=force,
).execute() ).execute()
@classmethod
@threads.register("non_responsive_tasks_watchdog", daemon=True)
def start_non_responsive_tasks_watchdog(cls):
log = config.logger("non_responsive_tasks_watchdog")
relevant_status = (TaskStatus.in_progress,)
threshold = timedelta(
seconds=config.get(
"services.tasks.non_responsive_tasks_watchdog.threshold_sec", 7200
)
)
while True:
sleep(
config.get(
"services.tasks.non_responsive_tasks_watchdog.watch_interval_sec",
900,
)
)
try:
ref_time = datetime.utcnow() - threshold
log.info(
f"Starting cleanup cycle for running tasks last updated before {ref_time}"
)
tasks = list(
Task.objects(
status__in=relevant_status, last_update__lt=ref_time
).only("id", "name", "status", "project", "last_update")
)
if tasks:
log.info(f"Stopping {len(tasks)} non-responsive tasks")
for task in tasks:
log.info(
f"Stopping {task.id} ({task.name}), last updated at {task.last_update}"
)
ChangeStatusRequest(
task=task,
new_status=TaskStatus.stopped,
status_reason="Forced stop (non-responsive)",
status_message="Forced stop (non-responsive)",
force=True,
).execute()
log.info(f"Done")
except Exception as ex:
log.exception(f"Failed stopping tasks: {str(ex)}")

View File

@ -96,7 +96,12 @@ def validate_status_change(current_status, new_status):
state_machine = { state_machine = {
TaskStatus.created: {TaskStatus.in_progress}, TaskStatus.created: {TaskStatus.in_progress},
TaskStatus.in_progress: {TaskStatus.stopped, TaskStatus.failed, TaskStatus.created}, TaskStatus.in_progress: {
TaskStatus.stopped,
TaskStatus.failed,
TaskStatus.created,
TaskStatus.completed,
},
TaskStatus.stopped: { TaskStatus.stopped: {
TaskStatus.closed, TaskStatus.closed,
TaskStatus.created, TaskStatus.created,
@ -104,6 +109,7 @@ state_machine = {
TaskStatus.in_progress, TaskStatus.in_progress,
TaskStatus.published, TaskStatus.published,
TaskStatus.publishing, TaskStatus.publishing,
TaskStatus.completed,
}, },
TaskStatus.closed: { TaskStatus.closed: {
TaskStatus.created, TaskStatus.created,
@ -115,6 +121,11 @@ state_machine = {
TaskStatus.failed: {TaskStatus.created, TaskStatus.stopped, TaskStatus.published}, TaskStatus.failed: {TaskStatus.created, TaskStatus.stopped, TaskStatus.published},
TaskStatus.publishing: {TaskStatus.published}, TaskStatus.publishing: {TaskStatus.published},
TaskStatus.published: set(), TaskStatus.published: set(),
TaskStatus.completed: {
TaskStatus.published,
TaskStatus.in_progress,
TaskStatus.created,
}
} }