mirror of
https://github.com/clearml/clearml-agent
synced 2025-06-26 18:16:15 +00:00
Compare commits
140 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c43084825c | ||
|
|
f1abee91dd | ||
|
|
c6b04edc34 | ||
|
|
50b847f4f7 | ||
|
|
1f53a06299 | ||
|
|
257dd95401 | ||
|
|
1736d205bb | ||
|
|
6fef58df6c | ||
|
|
473a8de8bb | ||
|
|
ff6272f48f | ||
|
|
1b5bcebd10 | ||
|
|
c4344d3afd | ||
|
|
45a44b087a | ||
|
|
c58ffdb9f8 | ||
|
|
54d9d77294 | ||
|
|
ce02385420 | ||
|
|
87ffd95eaa | ||
|
|
522dd85d7b | ||
|
|
3651c85fcd | ||
|
|
566427d550 | ||
|
|
cc99077c92 | ||
|
|
5f112447f7 | ||
|
|
22c5f043aa | ||
|
|
860ff8911c | ||
|
|
799b292146 | ||
|
|
fffe8e1c3f | ||
|
|
8245293f7f | ||
|
|
6563ce70c8 | ||
|
|
829b1d8f15 | ||
|
|
f6be64a4b5 | ||
|
|
21f6a73f66 | ||
|
|
77c4c79a2f | ||
|
|
2ad929fa00 | ||
|
|
53f511f536 | ||
|
|
7c87797a40 | ||
|
|
272fa07c29 | ||
|
|
6ce9cf7c2a | ||
|
|
abb30ac2b8 | ||
|
|
5bb257c46c | ||
|
|
c65b28ed92 | ||
|
|
fce8eb6782 | ||
|
|
9cb71b9526 | ||
|
|
38e02ca5cd | ||
|
|
06bfea80bc | ||
|
|
e660c7f2be | ||
|
|
fc28467080 | ||
|
|
8d47905982 | ||
|
|
a6a0b01f71 | ||
|
|
2b561f6066 | ||
|
|
61232d05dd | ||
|
|
b3418e4496 | ||
|
|
5ef627165c | ||
|
|
98a983d9a2 | ||
|
|
482007c4ce | ||
|
|
98198b8006 | ||
|
|
94bb11a81a | ||
|
|
4158d08f6f | ||
|
|
58ab67ea31 | ||
|
|
ea0ed4807e | ||
|
|
389600b91e | ||
|
|
5fb2550212 | ||
|
|
15e9e6b778 | ||
|
|
aa75b92e46 | ||
|
|
757210d5b3 | ||
|
|
00eb2f10ec | ||
|
|
3393372b9c | ||
|
|
f2d2d702de | ||
|
|
e3d0680d39 | ||
|
|
618c2ac5c4 | ||
|
|
0272c4c79c | ||
|
|
ff8cf63abf | ||
|
|
2c7c7f5b44 | ||
|
|
01f57c1e44 | ||
|
|
47bcd3839a | ||
|
|
0a3a8a1c52 | ||
|
|
231a907cff | ||
|
|
8f95eecf2e | ||
|
|
81008ee00e | ||
|
|
25bc44c0cf | ||
|
|
f838c8fc70 | ||
|
|
596093aac6 | ||
|
|
8f23f3b4c0 | ||
|
|
95d503afdd | ||
|
|
73ee33be99 | ||
|
|
ee3adf625f | ||
|
|
afec38a50e | ||
|
|
f9c60904f4 | ||
|
|
a09dc85c67 | ||
|
|
5d74f4b376 | ||
|
|
d558c66d3c | ||
|
|
714c6a05d0 | ||
|
|
43b2f7f41d | ||
|
|
28d752d568 | ||
|
|
6d091d8e08 | ||
|
|
5c6b3ccc94 | ||
|
|
df10e6ed46 | ||
|
|
8ef78fd058 | ||
|
|
640c83288a | ||
|
|
788c79a66f | ||
|
|
bef87c7744 | ||
|
|
f139891276 | ||
|
|
2afaff1713 | ||
|
|
a57a5b151c | ||
|
|
97f446d523 | ||
|
|
a88262c097 | ||
|
|
284271c654 | ||
|
|
ae2775f7b8 | ||
|
|
eb012f5c24 | ||
|
|
06897f7606 | ||
|
|
599219b02d | ||
|
|
b6e04ab982 | ||
|
|
98fe162878 | ||
|
|
f829d80a49 | ||
|
|
b7e568e299 | ||
|
|
6912846326 | ||
|
|
224868c9a4 | ||
|
|
b1ca90a303 | ||
|
|
dee2475698 | ||
|
|
aeede81474 | ||
|
|
2d91d4cde6 | ||
|
|
7a11c7c165 | ||
|
|
a9f479cfcd | ||
|
|
c1d91b0d6a | ||
|
|
cbfba6acb2 | ||
|
|
f2e2e1f94a | ||
|
|
23668a403a | ||
|
|
facbee0005 | ||
|
|
c486cfd09f | ||
|
|
119ecaa2e3 | ||
|
|
d6cc2be653 | ||
|
|
41d75df40c | ||
|
|
901c4be9ae | ||
|
|
966b14f914 | ||
|
|
847d35cbbb | ||
|
|
4022cb5c63 | ||
|
|
2b239829de | ||
|
|
402856656f | ||
|
|
7b94ff410c | ||
|
|
0a03dced50 | ||
|
|
ffe653afc6 |
144
README.md
144
README.md
@@ -1,5 +1,5 @@
|
||||
# TRAINS Agent
|
||||
## Deep Learning DevOps For Everyone - Now supports all platforms (Linux, macOS, and Windows)
|
||||
# Allegro Trains Agent
|
||||
## Deep Learning DevOps For Everyone - Now supporting all platforms (Linux, macOS, and Windows)
|
||||
|
||||
"All the Deep-Learning DevOps your research needs, and then some... Because ain't nobody got time for that"
|
||||
|
||||
@@ -8,27 +8,29 @@
|
||||
[](https://img.shields.io/pypi/v/trains-agent.svg)
|
||||
[](https://pypi.python.org/pypi/trains-agent/)
|
||||
|
||||
**TRAINS Agent is an AI experiment cluster solution.**
|
||||
### Help improve Trains by filling our 2-min [user survey](https://allegro.ai/lp/trains-user-survey/)
|
||||
|
||||
**Trains Agent is an AI experiment cluster solution.**
|
||||
|
||||
It is a zero configuration fire-and-forget execution agent, which combined with trains-server provides a full AI cluster solution.
|
||||
|
||||
**Full AutoML in 5 steps**
|
||||
1. Install the [TRAINS server](https://github.com/allegroai/trains-agent) (or use our [open server](https://demoapp.trains.allegro.ai))
|
||||
2. `pip install trains_agent` ([install](#installing-the-trains-agent) the TRAINS agent on any GPU machine: on-premises / cloud / ...)
|
||||
3. Add [TRAINS](https://github.com/allegroai/trains) to your code with just 2 lines & run it once (on your machine / laptop)
|
||||
1. Install the [Trains Server](https://github.com/allegroai/trains-agent) (or use our [open server](https://demoapp.trains.allegro.ai))
|
||||
2. `pip install trains-agent` ([install](#installing-the-trains-agent) the Trains Agent on any GPU machine: on-premises / cloud / ...)
|
||||
3. Add [Trains](https://github.com/allegroai/trains) to your code with just 2 lines & run it once (on your machine / laptop)
|
||||
4. Change the [parameters](#using-the-trains-agent) in the UI & schedule for [execution](#using-the-trains-agent) (or automate with an [AutoML pipeline](#automl-and-orchestration-pipelines-))
|
||||
5. :chart_with_downwards_trend: :chart_with_upwards_trend: :eyes: :beer:
|
||||
|
||||
|
||||
**Using the TRAINS agent, you can now set up a dynamic cluster with \*epsilon DevOps**
|
||||
**Using the Trains Agent, you can now set up a dynamic cluster with \*epsilon DevOps**
|
||||
|
||||
*epsilon - Because we are scientists :triangular_ruler: and nothing is really zero work
|
||||
|
||||
(Experience TRAINS live at [https://demoapp.trains.allegro.ai](https://demoapp.trains.allegro.ai))
|
||||
(Experience Trains live at [https://demoapp.trains.allegro.ai](https://demoapp.trains.allegro.ai))
|
||||
<a href="https://demoapp.trains.allegro.ai"><img src="https://raw.githubusercontent.com/allegroai/trains-agent/9f1e86c1ca45c984ee13edc9353c7b10c55d7257/docs/screenshots.gif" width="100%"></a>
|
||||
|
||||
## Simple, Flexible Experiment Orchestration
|
||||
**The TRAINS Agent was built to address the DL/ML R&D DevOps needs:**
|
||||
**The Trains Agent was built to address the DL/ML R&D DevOps needs:**
|
||||
|
||||
* Easily add & remove machines from the cluster
|
||||
* Reuse machines without the need for any dedicated containers or images
|
||||
@@ -49,30 +51,30 @@ If you are considering K8S for your research, also consider that you will soon b
|
||||
In our experience, handling and building the environments, having to package every experiment in a docker, managing those hundreds (or more) containers and building pipelines on top of it all, is very complicated (also, it’s usually out of scope for the research team, and overwhelming even for the DevOps team).
|
||||
|
||||
We feel there has to be a better way, that can be just as powerful for R&D and at the same time allow integration with K8S **when the need arises**.
|
||||
(If you already have a K8S cluster for AI, detailed instructions on how to integrate TRAINS into your K8S cluster are *coming soon*.)
|
||||
(If you already have a K8S cluster for AI, detailed instructions on how to integrate Trains into your K8S cluster are [here](https://github.com/allegroai/trains-server-k8s/tree/master/trains-server-chart) with included [helm chart](https://github.com/allegroai/trains-server-helm))
|
||||
|
||||
|
||||
## Using the TRAINS Agent
|
||||
## Using the Trains Agent
|
||||
**Full scale HPC with a click of a button**
|
||||
|
||||
TRAINS Agent is a job scheduler that listens on job queue(s), pulls jobs, sets the job environments, executes the job and monitors its progress.
|
||||
The Trains Agent is a job scheduler that listens on job queue(s), pulls jobs, sets the job environments, executes the job and monitors its progress.
|
||||
|
||||
Any 'Draft' experiment can be scheduled for execution by a TRAINS agent.
|
||||
Any 'Draft' experiment can be scheduled for execution by a Trains agent.
|
||||
|
||||
A previously run experiment can be put into 'Draft' state by either of two methods:
|
||||
* Using the **'Reset'** action from the experiment right-click context menu in the
|
||||
TRAINS UI - This will clear any results and artifacts the previous run had created.
|
||||
Trains UI - This will clear any results and artifacts the previous run had created.
|
||||
* Using the **'Clone'** action from the experiment right-click context menu in the
|
||||
TRAINS UI - This will create a new 'Draft' experiment with the same configuration as the original experiment.
|
||||
Trains UI - This will create a new 'Draft' experiment with the same configuration as the original experiment.
|
||||
|
||||
An experiment is scheduled for execution using the **'Enqueue'** action from the experiment
|
||||
right-click context menu in the TRAINS UI and selecting the execution queue.
|
||||
right-click context menu in the Trains UI and selecting the execution queue.
|
||||
|
||||
See [creating an experiment and enqueuing it for execution](#from-scratch).
|
||||
|
||||
Once an experiment is enqueued, it will be picked up and executed by a TRAINS agent monitoring this queue.
|
||||
Once an experiment is enqueued, it will be picked up and executed by a Trains agent monitoring this queue.
|
||||
|
||||
The TRAINS UI Workers & Queues page provides ongoing execution information:
|
||||
The Trains UI Workers & Queues page provides ongoing execution information:
|
||||
- Workers Tab: Monitor you cluster
|
||||
- Review available resources
|
||||
- Monitor machines statistics (CPU / GPU / Disk / Network)
|
||||
@@ -81,16 +83,16 @@ The TRAINS UI Workers & Queues page provides ongoing execution information:
|
||||
- Cancel or abort job execution
|
||||
- Move jobs between execution queues
|
||||
|
||||
### What The TRAINS Agent Actually Does
|
||||
The TRAINS agent executes experiments using the following process:
|
||||
### What The Trains Agent Actually Does
|
||||
The Trains Agent executes experiments using the following process:
|
||||
- Create a new virtual environment (or launch the selected docker image)
|
||||
- Clone the code into the virtual-environment (or inside the docker)
|
||||
- Install python packages based on the package requirements listed for the experiment
|
||||
- Special note for PyTorch: The TRAINS agent will automatically select the
|
||||
- Special note for PyTorch: The Trains Agent will automatically select the
|
||||
torch packages based on the CUDA_VERSION environment variable of the machine
|
||||
- Execute the code, while monitoring the process
|
||||
- Log all stdout/stderr in the TRAINS UI, including the cloning and installation process, for easy debugging
|
||||
- Monitor the execution and allow you to manually abort the job using the TRAINS UI (or, in the unfortunate case of a code crash, catch the error and signal the experiment has failed)
|
||||
- Log all stdout/stderr in the Trains UI, including the cloning and installation process, for easy debugging
|
||||
- Monitor the execution and allow you to manually abort the job using the Trains UI (or, in the unfortunate case of a code crash, catch the error and signal the experiment has failed)
|
||||
|
||||
### System Design & Flow
|
||||
```text
|
||||
@@ -98,24 +100,24 @@ The TRAINS agent executes experiments using the following process:
|
||||
| GPU Machine |
|
||||
Development Machine | |
|
||||
+------------------------+ | +-------------+ |
|
||||
| Data Scientist's | +--------------+ | |TRAINS Agent | |
|
||||
| Data Scientist's | +--------------+ | |Trains Agent | |
|
||||
| DL/ML Code | | WEB UI | | | | |
|
||||
| | | | | | +---------+ | |
|
||||
| | | | | | | DL/ML | | |
|
||||
| | +--------------+ | | | Code | | |
|
||||
| | User Clones Exp #1 / . . . . . . . / | | | | | |
|
||||
| +-------------------+ | into Exp #2 / . . . . . . . / | | +---------+ | |
|
||||
| | TRAINS | | +---------------/-_____________-/ | | | |
|
||||
| | Trains | | +---------------/-_____________-/ | | | |
|
||||
| +---------+---------+ | | | | ^ | |
|
||||
+-----------|------------+ | | +------|------+ |
|
||||
| | +--------|--------+
|
||||
Auto-Magically | |
|
||||
Creates Exp #1 | The TRAINS Agent
|
||||
Creates Exp #1 | The Trains Agent
|
||||
\ User Change Hyper-Parameters Pulls Exp #2, setup the
|
||||
| | environment & clone code.
|
||||
| | Start execution with the
|
||||
+------------|------------+ | +--------------------+ new set of Hyper-Parameters.
|
||||
| +---------v---------+ | | | TRAINS-SERVER | |
|
||||
| +---------v---------+ | | | Trains Server | |
|
||||
| | Experiment #1 | | | | | |
|
||||
| +-------------------+ | | | Execution Queue | |
|
||||
| || | | | | |
|
||||
@@ -126,17 +128,17 @@ Development Machine |
|
||||
| | ------------->---------------+ | |
|
||||
| | User Send Exp #2 | |Execute Exp #2 +--------------------+
|
||||
| | For Execution | +---------------+ |
|
||||
| TRAINS-SERVER | | |
|
||||
| Trains Server | | |
|
||||
+-------------------------+ +--------------------+
|
||||
```
|
||||
|
||||
### Installing the TRAINS Agent
|
||||
### Installing the Trains Agent
|
||||
|
||||
```bash
|
||||
pip install trains_agent
|
||||
pip install trains-agent
|
||||
```
|
||||
|
||||
### TRAINS Agent Usage Examples
|
||||
### Trains Agent Usage Examples
|
||||
|
||||
Full Interface and capabilities are available with
|
||||
```bash
|
||||
@@ -144,29 +146,30 @@ trains-agent --help
|
||||
trains-agent daemon --help
|
||||
```
|
||||
|
||||
### Configuring the TRAINS Agent
|
||||
### Configuring the Trains Agent
|
||||
|
||||
```bash
|
||||
trains-agent init
|
||||
```
|
||||
|
||||
Note: The TRAINS agent uses a cache folder to cache pip packages, apt packages and cloned repositories. The default TRAINS Agent cache folder is `~/.trains`
|
||||
Note: The Trains Agent uses a cache folder to cache pip packages, apt packages and cloned repositories. The default Trains Agent cache folder is `~/.trains`
|
||||
|
||||
See full details in your configuration file at `~/trains.conf`
|
||||
|
||||
Note: The **TRAINS agent** extends the **TRAINS** configuration file `~/trains.conf`
|
||||
Note: The **Trains agent** extends the **Trains** configuration file `~/trains.conf`
|
||||
They are designed to share the same configuration file, see example [here](docs/trains.conf)
|
||||
|
||||
### Running the TRAINS Agent
|
||||
### Running the Trains Agent
|
||||
|
||||
For debug and experimentation, start the TRAINS agent in `foreground` mode, where all the output is printed to screen
|
||||
For debug and experimentation, start the Trains agent in `foreground` mode, where all the output is printed to screen
|
||||
```bash
|
||||
trains-agent daemon --queue default --foreground
|
||||
```
|
||||
|
||||
For actual service mode, all the stdout will be stored automatically into a temporary file (no need to pipe)
|
||||
Notice: with `--detached` flag, the *trains-agent* will be running in the background
|
||||
```bash
|
||||
trains-agent daemon --queue default
|
||||
trains-agent daemon --detached --queue default
|
||||
```
|
||||
|
||||
GPU allocation is controlled via the standard OS environment `NVIDIA_VISIBLE_DEVICES` or `--gpus` flag (or disabled with `--cpu-only`).
|
||||
@@ -175,42 +178,44 @@ If no flag is set, and `NVIDIA_VISIBLE_DEVICES` variable doesn't exist, all GPU'
|
||||
If `--cpu-only` flag is set, or `NVIDIA_VISIBLE_DEVICES` is an empty string (""), no gpu will be allocated for the `trains-agent`
|
||||
|
||||
Example: spin two agents, one per gpu on the same machine:
|
||||
Notice: with `--detached` flag, the *trains-agent* will be running in the background
|
||||
```bash
|
||||
trains-agent daemon --gpus 0 --queue default &
|
||||
trains-agent daemon --gpus 1 --queue default &
|
||||
trains-agent daemon --detached --gpus 0 --queue default
|
||||
trains-agent daemon --detached --gpus 1 --queue default
|
||||
```
|
||||
|
||||
Example: spin two agents, pulling from dedicated `dual_gpu` queue, two gpu's per agent
|
||||
```bash
|
||||
trains-agent daemon --gpus 0,1 --queue dual_gpu &
|
||||
trains-agent daemon --gpus 2,3 --queue dual_gpu &
|
||||
trains-agent daemon --detached --gpus 0,1 --queue dual_gpu
|
||||
trains-agent daemon --detached --gpus 2,3 --queue dual_gpu
|
||||
```
|
||||
|
||||
#### Starting the TRAINS Agent in docker mode
|
||||
#### Starting the Trains Agent in docker mode
|
||||
|
||||
For debug and experimentation, start the TRAINS agent in `foreground` mode, where all the output is printed to screen
|
||||
For debug and experimentation, start the Trains agent in `foreground` mode, where all the output is printed to screen
|
||||
```bash
|
||||
trains-agent daemon --queue default --docker --foreground
|
||||
```
|
||||
|
||||
For actual service mode, all the stdout will be stored automatically into a file (no need to pipe)
|
||||
Notice: with `--detached` flag, the *trains-agent* will be running in the background
|
||||
```bash
|
||||
trains-agent daemon --queue default --docker
|
||||
trains-agent daemon --detached --queue default --docker
|
||||
```
|
||||
|
||||
Example: spin two agents, one per gpu on the same machine, with default nvidia/cuda docker:
|
||||
```bash
|
||||
trains-agent daemon --gpus 0 --queue default --docker nvidia/cuda &
|
||||
trains-agent daemon --gpus 1 --queue default --docker nvidia/cuda &
|
||||
trains-agent daemon --detached --gpus 0 --queue default --docker nvidia/cuda
|
||||
trains-agent daemon --detached --gpus 1 --queue default --docker nvidia/cuda
|
||||
```
|
||||
|
||||
Example: spin two agents, pulling from dedicated `dual_gpu` queue, two gpu's per agent, with default nvidia/cuda docker:
|
||||
```bash
|
||||
trains-agent daemon --gpus 0,1 --queue dual_gpu --docker nvidia/cuda &
|
||||
trains-agent daemon --gpus 2,3 --queue dual_gpu --docker nvidia/cuda &
|
||||
trains-agent daemon --detached --gpus 0,1 --queue dual_gpu --docker nvidia/cuda
|
||||
trains-agent daemon --detached --gpus 2,3 --queue dual_gpu --docker nvidia/cuda
|
||||
```
|
||||
|
||||
#### Starting the TRAINS Agent - Priority Queues
|
||||
#### Starting the Trains Agent - Priority Queues
|
||||
|
||||
Priority Queues are also supported, example use case:
|
||||
|
||||
@@ -218,14 +223,14 @@ High priority queue: `important_jobs` Low priority queue: `default`
|
||||
```bash
|
||||
trains-agent daemon --queue important_jobs default
|
||||
```
|
||||
The **TRAINS agent** will first try to pull jobs from the `important_jobs` queue, only then it will fetch a job from the `default` queue.
|
||||
The **Trains Agent** will first try to pull jobs from the `important_jobs` queue, only then it will fetch a job from the `default` queue.
|
||||
|
||||
Adding queues, managing job order within a queue and moving jobs between queues, is available using the Web UI, see example on our [open server](https://demoapp.trains.allegro.ai/workers-and-queues/queues)
|
||||
|
||||
# How do I create an experiment on the TRAINS server? <a name="from-scratch"></a>
|
||||
* Integrate [TRAINS](https://github.com/allegroai/trains) with your code
|
||||
## How do I create an experiment on the Trains Server? <a name="from-scratch"></a>
|
||||
* Integrate [Trains](https://github.com/allegroai/trains) with your code
|
||||
* Execute the code on your machine (Manually / PyCharm / Jupyter Notebook)
|
||||
* As your code is running, **TRAINS** creates an experiment logging all the necessary execution information:
|
||||
* As your code is running, **Trains** creates an experiment logging all the necessary execution information:
|
||||
- Git repository link and commit ID (or an entire jupyter notebook)
|
||||
- Git diff (we’re not saying you never commit and push, but still...)
|
||||
- Python packages used by your code (including specific versions used)
|
||||
@@ -234,7 +239,7 @@ Adding queues, managing job order within a queue and moving jobs between queues,
|
||||
|
||||
You now have a 'template' of your experiment with everything required for automated execution
|
||||
|
||||
* In the TRAINS UI, Right click on the experiment and select 'clone'. A copy of your experiment will be created.
|
||||
* In the Trains UI, Right click on the experiment and select 'clone'. A copy of your experiment will be created.
|
||||
* You now have a new draft experiment cloned from your original experiment, feel free to edit it
|
||||
- Change the Hyper-Parameters
|
||||
- Switch to the latest code base of the repository
|
||||
@@ -243,10 +248,31 @@ Adding queues, managing job order within a queue and moving jobs between queues,
|
||||
- Or simply change nothing to run the same experiment again...
|
||||
* Schedule the newly created experiment for execution: Right-click the experiment and select 'enqueue'
|
||||
|
||||
# AutoML and Orchestration Pipelines <a name="automl-pipes"></a>
|
||||
The TRAINS Agent can also be used to implement AutoML orchestration and Experiment Pipelines in conjunction with the TRAINS package.
|
||||
## Trains-Agent Services Mode <a name="services"></a>
|
||||
|
||||
Sample AutoML & Orchestration examples can be found in the TRAINS [example/automl](https://github.com/allegroai/trains/tree/master/examples/automl) folder.
|
||||
Trains-Agent Services is a special mode of Trains-Agent that provides the ability to launch long-lasting jobs
|
||||
that previously had to be executed on local / dedicated machines. It allows a single agent to
|
||||
launch multiple dockers (Tasks) for different use cases. To name a few use cases, auto-scaler service (spinning instances
|
||||
when the need arises and the budget allows), Controllers (Implementing pipelines and more sophisticated DevOps logic),
|
||||
Optimizer (such as Hyper-parameter Optimization or sweeping), and Application (such as interactive Bokeh apps for
|
||||
increased data transparency)
|
||||
|
||||
Trains-Agent Services mode will spin **any** task enqueued into the specified queue.
|
||||
Every task launched by Trains-Agent Services will be registered as a new node in the system,
|
||||
providing tracking and transparency capabilities.
|
||||
Currently trains-agent in services-mode supports cpu only configuration. Trains-agent services mode can be launched alongside GPU agents.
|
||||
|
||||
```bash
|
||||
trains-agent daemon --services-mode --detached --queue services --create-queue --docker ubuntu:18.04 --cpu-only
|
||||
```
|
||||
|
||||
**Note**: It is the user's responsibility to make sure the proper tasks are pushed into the specified queue.
|
||||
|
||||
|
||||
## AutoML and Orchestration Pipelines <a name="automl-pipes"></a>
|
||||
The Trains Agent can also be used to implement AutoML orchestration and Experiment Pipelines in conjunction with the Trains package.
|
||||
|
||||
Sample AutoML & Orchestration examples can be found in the Trains [example/automl](https://github.com/allegroai/trains/tree/master/examples/automl) folder.
|
||||
|
||||
AutoML examples
|
||||
- [Toy Keras training experiment](https://github.com/allegroai/trains/blob/master/examples/automl/automl_base_template_keras_simple.py)
|
||||
@@ -259,3 +285,7 @@ Experiment Pipeline examples
|
||||
- This example will "process data", and once done, will launch a copy of the 'second step' experiment-template
|
||||
- [Second step experiment](https://github.com/allegroai/trains/blob/master/examples/automl/toy_base_task.py)
|
||||
- In order to create an experiment-template in the system, this code must be executed once manually
|
||||
|
||||
## License
|
||||
|
||||
Apache License, Version 2.0 (see the [LICENSE](https://www.apache.org/licenses/LICENSE-2.0.html) for more information)
|
||||
|
||||
18
docker/agent/Dockerfile
Normal file
18
docker/agent/Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
# syntax = docker/dockerfile
|
||||
FROM nvidia/cuda
|
||||
|
||||
WORKDIR /usr/agent
|
||||
|
||||
COPY . /usr/agent
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get dist-upgrade -y
|
||||
RUN apt-get install -y curl python3-pip git
|
||||
RUN curl -sSL https://get.docker.com/ | sh
|
||||
RUN python3 -m pip install -U pip
|
||||
RUN python3 -m pip install trains-agent
|
||||
RUN python3 -m pip install -U "cryptography>=2.9"
|
||||
|
||||
ENV TRAINS_DOCKER_SKIP_GPUS_FLAG=1
|
||||
|
||||
ENTRYPOINT ["/usr/agent/entrypoint.sh"]
|
||||
19
docker/agent/entrypoint.sh
Executable file
19
docker/agent/entrypoint.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/bin/sh
|
||||
|
||||
LOWER_PIP_UPDATE_VERSION="$(echo "$PIP_UPDATE_VERSION" | tr '[:upper:]' '[:lower:]')"
|
||||
LOWER_TRAINS_AGENT_UPDATE_VERSION="$(echo "$TRAINS_AGENT_UPDATE_VERSION" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
if [ "$LOWER_PIP_UPDATE_VERSION" = "yes" ] || [ "$LOWER_PIP_UPDATE_VERSION" = "true" ] ; then
|
||||
python3 -m pip install -U pip
|
||||
elif [ ! -z "$LOWER_PIP_UPDATE_VERSION" ] ; then
|
||||
python3 -m pip install pip$LOWER_PIP_UPDATE_VERSION ;
|
||||
fi
|
||||
|
||||
echo "TRAINS_AGENT_UPDATE_VERSION = $LOWER_TRAINS_AGENT_UPDATE_VERSION"
|
||||
if [ "$LOWER_TRAINS_AGENT_UPDATE_VERSION" = "yes" ] || [ "$LOWER_TRAINS_AGENT_UPDATE_VERSION" = "true" ] ; then
|
||||
python3 -m pip install trains-agent -U
|
||||
elif [ ! -z "$LOWER_TRAINS_AGENT_UPDATE_VERSION" ] ; then
|
||||
python3 -m pip install trains-agent$LOWER_TRAINS_AGENT_UPDATE_VERSION ;
|
||||
fi
|
||||
|
||||
python3 -m trains_agent daemon --docker "$TRAINS_AGENT_DEFAULT_BASE_DOCKER" --force-current-version $TRAINS_AGENT_EXTRA_ARGS
|
||||
16
docker/services/Dockerfile
Normal file
16
docker/services/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
||||
# syntax = docker/dockerfile
|
||||
FROM ubuntu:18.04
|
||||
|
||||
WORKDIR /usr/agent
|
||||
|
||||
COPY . /usr/agent
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get dist-upgrade -y
|
||||
RUN apt-get install -y curl python3-pip git
|
||||
RUN curl -sSL https://get.docker.com/ | sh
|
||||
RUN python3 -m pip install -U pip
|
||||
RUN python3 -m pip install trains-agent
|
||||
RUN python3 -m pip install -U "cryptography>=2.9"
|
||||
|
||||
ENTRYPOINT ["/usr/agent/entrypoint.sh"]
|
||||
14
docker/services/entrypoint.sh
Executable file
14
docker/services/entrypoint.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/sh
|
||||
|
||||
if [ -z "$TRAINS_FILES_HOST" ]; then
|
||||
TRAINS_HOST_IP=${TRAINS_HOST_IP:-$(curl -s https://ifconfig.me/ip)}
|
||||
fi
|
||||
|
||||
TRAINS_FILES_HOST=${TRAINS_FILES_HOST:-"http://$TRAINS_HOST_IP:8081"}
|
||||
TRAINS_WEB_HOST=${TRAINS_WEB_HOST:-"http://$TRAINS_HOST_IP:8080"}
|
||||
TRAINS_API_HOST=${TRAINS_API_HOST:-"http://$TRAINS_HOST_IP:8008"}
|
||||
|
||||
echo $TRAINS_FILES_HOST $TRAINS_WEB_HOST $TRAINS_API_HOST 1>&2
|
||||
|
||||
python3 -m pip install -q -U "trains-agent${TRAINS_AGENT_UPDATE_VERSION}"
|
||||
trains-agent daemon --services-mode --queue services --create-queue --docker $TRAINS_AGENT_DEFAULT_BASE_DOCKER --cpu-only $TRAINS_AGENT_EXTRA_ARGS
|
||||
@@ -13,11 +13,13 @@ api {
|
||||
}
|
||||
|
||||
agent {
|
||||
# Set GIT user/pass credentials
|
||||
# leave blank for GIT SSH credentials
|
||||
# Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)
|
||||
# leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)
|
||||
git_user=""
|
||||
git_pass=""
|
||||
|
||||
# Force GIT protocol to use SSH regardless of the git url (Assumes GIT user/pass are blank)
|
||||
force_git_ssh_protocol: false
|
||||
|
||||
# unique name of this worker, if None, created based on hostname:process_id
|
||||
# Overridden with os environment: TRAINS_WORKER_NAME
|
||||
@@ -38,8 +40,12 @@ agent {
|
||||
# currently supported pip and conda
|
||||
# poetry is used if pip selected and repository contains poetry.lock file
|
||||
package_manager: {
|
||||
# supported options: pip, conda
|
||||
# supported options: pip, conda, poetry
|
||||
type: pip,
|
||||
|
||||
# specify pip version to use (examples "<20", "==19.3.1", "", empty string will install the latest version)
|
||||
# pip_version: "<20"
|
||||
|
||||
# virtual environment inheres packages from system
|
||||
system_site_packages: false,
|
||||
# install with --upgrade
|
||||
@@ -51,6 +57,10 @@ agent {
|
||||
|
||||
# additional conda channels to use when installing with conda package manager
|
||||
conda_channels: ["pytorch", "conda-forge", ]
|
||||
|
||||
# set to True to support torch nightly build installation,
|
||||
# notice: torch nightly builds are ephemeral and are deleted from time to time
|
||||
torch_nightly: false,
|
||||
},
|
||||
|
||||
# target folder for virtual environments builds, created when executing experiment
|
||||
@@ -78,11 +88,22 @@ agent {
|
||||
# reload configuration file every daemon execution
|
||||
reload_config: false,
|
||||
|
||||
# pip cache folder used mapped into docker, for python package caching
|
||||
# pip cache folder mapped into docker, used for python package caching
|
||||
docker_pip_cache = ~/.trains/pip-cache
|
||||
# apt cache folder used mapped into docker, for ubuntu package caching
|
||||
# apt cache folder mapped into docker, used for ubuntu package caching
|
||||
docker_apt_cache = ~/.trains/apt-cache
|
||||
|
||||
# optional arguments to pass to docker image
|
||||
# these are local for this agent and will not be updated in the experiment's docker_cmd section
|
||||
# extra_docker_arguments: ["--ipc=host", ]
|
||||
|
||||
# optional shell script to run in docker when started before the experiment is started
|
||||
# extra_docker_shell_script: ["apt-get install -y bindfs", ]
|
||||
|
||||
# set to true in order to force "docker pull" before running an experiment using a docker image.
|
||||
# This makes sure the docker image is updated.
|
||||
docker_force_pull: false
|
||||
|
||||
default_docker: {
|
||||
# default docker image to use when running in docker mode
|
||||
image: "nvidia/cuda"
|
||||
@@ -90,6 +111,11 @@ agent {
|
||||
# optional arguments to pass to docker image
|
||||
# arguments: ["--ipc=host"]
|
||||
}
|
||||
|
||||
# CUDA versions used for Conda setup & solving PyTorch wheel packages
|
||||
# it Should be detected automatically. Override with os environment CUDA_VERSION / CUDNN_VERSION
|
||||
# cuda_version: 10.1
|
||||
# cudnn_version: 7.6
|
||||
}
|
||||
|
||||
sdk {
|
||||
@@ -126,6 +152,9 @@ sdk {
|
||||
quality: 87
|
||||
subsampling: 0
|
||||
}
|
||||
|
||||
# Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to True, each series should have its own graph)
|
||||
tensorboard_single_series_per_graph: False
|
||||
}
|
||||
|
||||
network {
|
||||
|
||||
59
examples/archive_experiments.py
Normal file
59
examples/archive_experiments.py
Normal file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/python3
|
||||
"""
|
||||
An example script that cleans up failed experiments by moving them to the archive
|
||||
"""
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
|
||||
from trains_agent import APIClient
|
||||
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--project", "-P", help="Project ID. Only clean up experiments from this project")
|
||||
parser.add_argument("--user", "-U", help="User ID. Only clean up experiments assigned to this user")
|
||||
parser.add_argument(
|
||||
"--status", "-S", default="failed",
|
||||
help="Experiment status. Only clean up experiments with this status (default %(default)s)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--iterations", "-I", type=int,
|
||||
help="Number of iterations. Only clean up experiments with less or equal number of iterations"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sec-from-start", "-T", type=int,
|
||||
help="Seconds from start time. "
|
||||
"Only clean up experiments if less or equal number of seconds have elapsed since started"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
client = APIClient()
|
||||
|
||||
tasks = client.tasks.get_all(
|
||||
project=[args.project] if args.project else None,
|
||||
user=[args.user] if args.user else None,
|
||||
status=[args.status] if args.status else None,
|
||||
system_tags=["-archived"]
|
||||
)
|
||||
|
||||
count = 0
|
||||
|
||||
for task in tasks:
|
||||
if args.iterations and (task.last_iteration or 0) > args.iterations:
|
||||
continue
|
||||
if args.sec_from_start:
|
||||
if not task.started:
|
||||
continue
|
||||
if (datetime.utcnow() - task.started.replace(tzinfo=None)).total_seconds() > args.sec_from_start:
|
||||
continue
|
||||
|
||||
try:
|
||||
client.tasks.edit(
|
||||
task=task.id,
|
||||
system_tags=(task.system_tags or []) + ["archived"],
|
||||
force=True
|
||||
)
|
||||
count += 1
|
||||
except Exception as ex:
|
||||
print("Failed editing experiment: {}".format(ex))
|
||||
|
||||
print("Cleaned up {} experiments".format(count))
|
||||
587
examples/dynamic_cloud_cluster.ipynb
Normal file
587
examples/dynamic_cloud_cluster.ipynb
Normal file
@@ -0,0 +1,587 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Auto-Magically Spin AWS EC2 Instances On Demand \n",
|
||||
"# and Create a Dynamic Cluster Running *Trains-Agent*\n",
|
||||
"\n",
|
||||
"### Define your budget and execute the notebook, that's it\n",
|
||||
"### You now have a fully managed cluster on AWS 🎉 🎊 "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**trains-agent**'s main goal is to quickly pull a job from an execution queue, setup the environment (as defined in the experiment, including git cloning, python packages etc.) then execute the experiment and monitor it.\n",
|
||||
"\n",
|
||||
"This notebook defines a cloud budget (currently only AWS is supported, but feel free to expand with PRs), and spins an instance the minute a job is waiting for execution. It will also spin down idle machines, saving you some $$$ :)\n",
|
||||
"\n",
|
||||
"Configuration steps\n",
|
||||
"- Define maximum budget to be used (instance type / number of instances).\n",
|
||||
"- Create new execution *queues* in the **trains-server**.\n",
|
||||
"- Define mapping between the created the *queues* and an instance budget.\n",
|
||||
"\n",
|
||||
"**TL;DR - This notebook:**\n",
|
||||
"- Will spin instances if there are jobs in the execution *queues*, until it will hit the budget limit. \n",
|
||||
"- If machines are idle, it will spin them down.\n",
|
||||
"\n",
|
||||
"The controller implementation itself is stateless, meaning you can always re-execute the notebook, if for some reason it stopped.\n",
|
||||
"\n",
|
||||
"It is as simple as it sounds, but extremely powerful\n",
|
||||
"\n",
|
||||
"Enjoy your newly created dynamic cluster :)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Install & import required packages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install trains-agent\n",
|
||||
"!pip install boto3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Define AWS instance types and configuration (Instance Type, EBS, AMI etc.)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# AWS EC2 machines types - default AMI - NVIDIA Deep Learning AMI 19.11.3\n",
|
||||
"RESOURCE_CONFIGURATIONS = {\n",
|
||||
" \"amazon_ec2_normal\": {\n",
|
||||
" \"instance_type\": \"g4dn.4xlarge\",\n",
|
||||
" \"is_spot\": False,\n",
|
||||
" \"availability_zone\": \"us-east-1b\",\n",
|
||||
" \"ami_id\": \"ami-07c95cafbb788face\",\n",
|
||||
" \"ebs_device_name\": \"/dev/xvda\",\n",
|
||||
" \"ebs_volume_size\": 100,\n",
|
||||
" \"ebs_volume_type\": \"gp2\",\n",
|
||||
" },\n",
|
||||
" \"amazon_ec2_high\": {\n",
|
||||
" \"instance_type\": \"g4dn.8xlarge\",\n",
|
||||
" \"is_spot\": False,\n",
|
||||
" \"availability_zone\": \"us-east-1b\",\n",
|
||||
" \"ami_id\": \"ami-07c95cafbb788face\",\n",
|
||||
" \"ebs_device_name\": \"/dev/xvda\",\n",
|
||||
" \"ebs_volume_size\": 100,\n",
|
||||
" \"ebs_volume_type\": \"gp2\",\n",
|
||||
" },\n",
|
||||
"}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Define machine budget per execution queue\n",
|
||||
"\n",
|
||||
"Now that we defined our budget, we need to connect it with the **Trains** cluster.\n",
|
||||
"\n",
|
||||
"We map each queue to a resource type (instance type).\n",
|
||||
"\n",
|
||||
"Create two queues in the WebUI:\n",
|
||||
"- Browse to http://your_trains_server_ip:8080/workers-and-queues/queues\n",
|
||||
"- Then click on the \"New Queue\" button and name your queues \"aws_normal\" and \"aws_high\" respectively\n",
|
||||
"\n",
|
||||
"The QUEUES dictionary hold the mapping between the queue name and the type/number of instances to spin connected to the specific queue.\n",
|
||||
"```\n",
|
||||
"QUEUES = {\n",
|
||||
" 'queue_name': [(\"instance-type-as-defined-in-RESOURCE_CONFIGURATIONS\", max_number_of_instances), ]\n",
|
||||
"}\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Trains-Agent Queues - Machines budget per Queue\n",
|
||||
"# Per queue: list of (machine type as defined in RESOURCE_CONFIGURATIONS,\n",
|
||||
"# max instances for the specific queue). Order machines from most preferred to least.\n",
|
||||
"QUEUES = {\n",
|
||||
" \"aws_normal\": [(\"amazon_ec2_normal\", 2),],\n",
|
||||
" \"aws_high\": [(\"amazon_ec2_high\", 1)],\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Credentials for your AWS account, as well as for your **Trains-Server**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# AWS credentials (leave empty to use credentials set using the aws cli)\n",
|
||||
"CLOUD_CREDENTIALS_KEY = \"\"\n",
|
||||
"CLOUD_CREDENTIALS_SECRET = \"\"\n",
|
||||
"CLOUD_CREDENTIALS_REGION = \"us-east-1\"\n",
|
||||
"\n",
|
||||
"# TRAINS configuration\n",
|
||||
"TRAINS_SERVER_WEB_SERVER = \"http://localhost:8080\"\n",
|
||||
"TRAINS_SERVER_API_SERVER = \"http://localhost:8008\"\n",
|
||||
"TRAINS_SERVER_FILES_SERVER = \"http://localhost:8081\"\n",
|
||||
"# TRAINS credentials\n",
|
||||
"TRAINS_ACCESS_KEY = \"\"\n",
|
||||
"TRAINS_SECRET_KEY = \"\"\n",
|
||||
"# Git User/Pass to be used by trains-agent,\n",
|
||||
"# leave empty if image already contains git ssh-key\n",
|
||||
"TRAINS_GIT_USER = \"\"\n",
|
||||
"TRAINS_GIT_PASS = \"\"\n",
|
||||
"\n",
|
||||
"# Additional fields for trains.conf file created on the remote instance\n",
|
||||
"# for example: 'agent.default_docker.image: \"nvidia/cuda:10.0-cudnn7-runtime\"'\n",
|
||||
"EXTRA_TRAINS_CONF = \"\"\"\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Bash script to run on instances before running trains-agent\n",
|
||||
"# Example: \"\"\"\n",
|
||||
"# echo \"This is the first line\"\n",
|
||||
"# echo \"This is the second line\"\n",
|
||||
"# \"\"\"\n",
|
||||
"EXTRA_BASH_SCRIPT = \"\"\"\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Default docker for trains-agent when running in docker mode (requires docker v19.03 and above). \n",
|
||||
"# Leave empty to run trains-agent in non-docker mode.\n",
|
||||
"DEFAULT_DOCKER_IMAGE = \"nvidia/cuda\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Controller Internal Definitions\n",
|
||||
"\n",
|
||||
"# maximum idle time in minutes, after which the instance will be shutdown\n",
|
||||
"MAX_IDLE_TIME_MIN = 15\n",
|
||||
"# polling interval in minutes\n",
|
||||
"# make sure to increase in case bash commands were added in EXTRA_BASH_SCRIPT\n",
|
||||
"POLLING_INTERVAL_MIN = 5.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Import Packages and Budget Definition Sanity Check"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"import re\n",
|
||||
"import os\n",
|
||||
"from itertools import chain\n",
|
||||
"from operator import itemgetter\n",
|
||||
"from time import sleep, time\n",
|
||||
"\n",
|
||||
"import boto3\n",
|
||||
"from trains_agent.backend_api.session.client import APIClient"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sanity Check - Validate Queue Resources\n",
|
||||
"if len(set(map(itemgetter(0), chain(*QUEUES.values())))) != sum(\n",
|
||||
" map(len, QUEUES.values())\n",
|
||||
"):\n",
|
||||
" print(\n",
|
||||
" \"Error: at least one resource name is used in multiple queues. \"\n",
|
||||
" \"A resource name can only appear in a single queue definition.\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"# Encode EXTRA_TRAINS_CONF for later bash script usage\n",
|
||||
"EXTRA_TRAINS_CONF_ENCODED = \"\\\\\\\"\".join(EXTRA_TRAINS_CONF.split(\"\\\"\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Cloud specific implementation of spin up/down - currently supports AWS only"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cloud-specific implementation (currently, only AWS EC2 is supported)\n",
|
||||
"def spin_up_worker(resource, worker_id_prefix, queue_name):\n",
|
||||
" \"\"\"\n",
|
||||
" Creates a new worker for trains.\n",
|
||||
" First, create an instance in the cloud and install some required packages.\n",
|
||||
" Then, define trains-agent environment variables and run \n",
|
||||
" trains-agent for the specified queue.\n",
|
||||
" NOTE: - Will wait until instance is running\n",
|
||||
" - This implementation assumes the instance image already has docker installed\n",
|
||||
"\n",
|
||||
" :param str resource: resource name, as defined in BUDGET and QUEUES.\n",
|
||||
" :param str worker_id_prefix: worker name prefix\n",
|
||||
" :param str queue_name: trains queue to listen to\n",
|
||||
" \"\"\"\n",
|
||||
" resource_conf = RESOURCE_CONFIGURATIONS[resource]\n",
|
||||
" # Add worker type and AWS instance type to the worker name.\n",
|
||||
" worker_id = \"{worker_id_prefix}:{worker_type}:{instance_type}\".format(\n",
|
||||
" worker_id_prefix=worker_id_prefix,\n",
|
||||
" worker_type=resource,\n",
|
||||
" instance_type=resource_conf[\"instance_type\"],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # user_data script will automatically run when the instance is started. \n",
|
||||
" # It will install the required packages for trains-agent configure it using \n",
|
||||
" # environment variables and run trains-agent on the required queue\n",
|
||||
" user_data = \"\"\"#!/bin/bash\n",
|
||||
" sudo apt-get update\n",
|
||||
" sudo apt-get install -y python3-dev\n",
|
||||
" sudo apt-get install -y python3-pip\n",
|
||||
" sudo apt-get install -y gcc\n",
|
||||
" sudo apt-get install -y git\n",
|
||||
" sudo apt-get install -y build-essential\n",
|
||||
" python3 -m pip install -U pip\n",
|
||||
" python3 -m pip install virtualenv\n",
|
||||
" python3 -m virtualenv trains_agent_venv\n",
|
||||
" source trains_agent_venv/bin/activate\n",
|
||||
" python -m pip install trains-agent\n",
|
||||
" echo 'agent.git_user=\\\"{git_user}\\\"' >> /root/trains.conf\n",
|
||||
" echo 'agent.git_pass=\\\"{git_pass}\\\"' >> /root/trains.conf\n",
|
||||
" echo \"{trains_conf}\" >> /root/trains.conf\n",
|
||||
" export TRAINS_API_HOST={api_server}\n",
|
||||
" export TRAINS_WEB_HOST={web_server}\n",
|
||||
" export TRAINS_FILES_HOST={files_server}\n",
|
||||
" export DYNAMIC_INSTANCE_ID=`curl http://169.254.169.254/latest/meta-data/instance-id`\n",
|
||||
" export TRAINS_WORKER_ID={worker_id}:$DYNAMIC_INSTANCE_ID\n",
|
||||
" export TRAINS_API_ACCESS_KEY='{access_key}'\n",
|
||||
" export TRAINS_API_SECRET_KEY='{secret_key}'\n",
|
||||
" {bash_script}\n",
|
||||
" source ~/.bashrc\n",
|
||||
" python -m trains_agent --config-file '/root/trains.conf' daemon --queue '{queue}' {docker}\n",
|
||||
" shutdown\n",
|
||||
" \"\"\".format(\n",
|
||||
" api_server=TRAINS_SERVER_API_SERVER,\n",
|
||||
" web_server=TRAINS_SERVER_WEB_SERVER,\n",
|
||||
" files_server=TRAINS_SERVER_FILES_SERVER,\n",
|
||||
" worker_id=worker_id,\n",
|
||||
" access_key=TRAINS_ACCESS_KEY,\n",
|
||||
" secret_key=TRAINS_SECRET_KEY,\n",
|
||||
" queue=queue_name,\n",
|
||||
" git_user=TRAINS_GIT_USER,\n",
|
||||
" git_pass=TRAINS_GIT_PASS,\n",
|
||||
" trains_conf=EXTRA_TRAINS_CONF_ENCODED,\n",
|
||||
" bash_script=EXTRA_BASH_SCRIPT,\n",
|
||||
" docker=\"--docker '{}'\".format(DEFAULT_DOCKER_IMAGE) if DEFAULT_DOCKER_IMAGE else \"\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" ec2 = boto3.client(\n",
|
||||
" \"ec2\",\n",
|
||||
" aws_access_key_id=CLOUD_CREDENTIALS_KEY or None,\n",
|
||||
" aws_secret_access_key=CLOUD_CREDENTIALS_SECRET or None,\n",
|
||||
" region_name=CLOUD_CREDENTIALS_REGION\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if resource_conf[\"is_spot\"]:\n",
|
||||
" # Create a request for a spot instance in AWS\n",
|
||||
" encoded_user_data = base64.b64encode(user_data.encode(\"ascii\")).decode(\"ascii\")\n",
|
||||
" instances = ec2.request_spot_instances(\n",
|
||||
" LaunchSpecification={\n",
|
||||
" \"ImageId\": resource_conf[\"ami_id\"],\n",
|
||||
" \"InstanceType\": resource_conf[\"instance_type\"],\n",
|
||||
" \"Placement\": {\"AvailabilityZone\": resource_conf[\"availability_zone\"]},\n",
|
||||
" \"UserData\": encoded_user_data,\n",
|
||||
" \"BlockDeviceMappings\": [\n",
|
||||
" {\n",
|
||||
" \"DeviceName\": resource_conf[\"ebs_device_name\"],\n",
|
||||
" \"Ebs\": {\n",
|
||||
" \"VolumeSize\": resource_conf[\"ebs_volume_size\"],\n",
|
||||
" \"VolumeType\": resource_conf[\"ebs_volume_type\"],\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" }\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Wait until spot request is fulfilled\n",
|
||||
" request_id = instances[\"SpotInstanceRequests\"][0][\"SpotInstanceRequestId\"]\n",
|
||||
" waiter = ec2.get_waiter(\"spot_instance_request_fulfilled\")\n",
|
||||
" waiter.wait(SpotInstanceRequestIds=[request_id])\n",
|
||||
" # Get the instance object for later use\n",
|
||||
" response = ec2.describe_spot_instance_requests(\n",
|
||||
" SpotInstanceRequestIds=[request_id]\n",
|
||||
" )\n",
|
||||
" instance_id = response[\"SpotInstanceRequests\"][0][\"InstanceId\"]\n",
|
||||
"\n",
|
||||
" else:\n",
|
||||
" # Create a new EC2 instance\n",
|
||||
" instances = ec2.run_instances(\n",
|
||||
" ImageId=resource_conf[\"ami_id\"],\n",
|
||||
" MinCount=1,\n",
|
||||
" MaxCount=1,\n",
|
||||
" InstanceType=resource_conf[\"instance_type\"],\n",
|
||||
" UserData=user_data,\n",
|
||||
" InstanceInitiatedShutdownBehavior='terminate',\n",
|
||||
" BlockDeviceMappings=[\n",
|
||||
" {\n",
|
||||
" \"DeviceName\": resource_conf[\"ebs_device_name\"],\n",
|
||||
" \"Ebs\": {\n",
|
||||
" \"VolumeSize\": resource_conf[\"ebs_volume_size\"],\n",
|
||||
" \"VolumeType\": resource_conf[\"ebs_volume_type\"],\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Get the instance object for later use\n",
|
||||
" instance_id = instances[\"Instances\"][0][\"InstanceId\"]\n",
|
||||
"\n",
|
||||
" instance = boto3.resource(\n",
|
||||
" \"ec2\",\n",
|
||||
" aws_access_key_id=CLOUD_CREDENTIALS_KEY or None,\n",
|
||||
" aws_secret_access_key=CLOUD_CREDENTIALS_SECRET or None,\n",
|
||||
" region_name=CLOUD_CREDENTIALS_REGION\n",
|
||||
" ).Instance(instance_id)\n",
|
||||
"\n",
|
||||
" # Wait until instance is in running state\n",
|
||||
" instance.wait_until_running()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Cloud-specific implementation (currently, only AWS EC2 is supported)\n",
|
||||
"def spin_down_worker(instance_id):\n",
|
||||
" \"\"\"\n",
|
||||
" Destroys the cloud instance.\n",
|
||||
"\n",
|
||||
" :param str instance_id: Cloud instance ID to be destroyed \n",
|
||||
" (currently, only AWS EC2 is supported)\n",
|
||||
" \"\"\"\n",
|
||||
" try:\n",
|
||||
" boto3.resource(\n",
|
||||
" \"ec2\",\n",
|
||||
" aws_access_key_id=CLOUD_CREDENTIALS_KEY or None,\n",
|
||||
" aws_secret_access_key=CLOUD_CREDENTIALS_SECRET or None,\n",
|
||||
" region_name=CLOUD_CREDENTIALS_REGION\n",
|
||||
" ).instances.filter(InstanceIds=[instance_id]).terminate()\n",
|
||||
" except Exception as ex:\n",
|
||||
" raise ex"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"###### Controller Implementation and Logic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def supervisor():\n",
|
||||
" \"\"\"\n",
|
||||
" Spin up or down resources as necessary.\n",
|
||||
" - For every queue in QUEUES do the following:\n",
|
||||
" 1. Check if there are tasks waiting in the queue.\n",
|
||||
" 2. Check if there are enough idle workers available for those tasks.\n",
|
||||
" 3. In case more instances are required, and we haven't reached max instances allowed,\n",
|
||||
" create the required instances with regards to the maximum number defined in QUEUES\n",
|
||||
" Choose which instance to create according to their order QUEUES. Won't create \n",
|
||||
" more instances if maximum number defined has already reached.\n",
|
||||
" - spin down instances according to their idle time. instance which is idle for \n",
|
||||
" more than MAX_IDLE_TIME_MIN minutes would be removed.\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" # Internal definitions\n",
|
||||
" workers_prefix = \"dynamic_aws\"\n",
|
||||
" # Worker's id in trains would be composed from:\n",
|
||||
" # prefix, name, instance_type and cloud_id separated by ';'\n",
|
||||
" workers_pattern = re.compile(\n",
|
||||
" r\"^(?P<prefix>[^:]+):(?P<name>[^:]+):(?P<instance_type>[^:]+):(?P<cloud_id>[^:]+)\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Set up the environment variables for trains\n",
|
||||
" os.environ[\"TRAINS_API_HOST\"] = TRAINS_SERVER_API_SERVER\n",
|
||||
" os.environ[\"TRAINS_WEB_HOST\"] = TRAINS_SERVER_WEB_SERVER\n",
|
||||
" os.environ[\"TRAINS_FILES_HOST\"] = TRAINS_SERVER_FILES_SERVER\n",
|
||||
" os.environ[\"TRAINS_API_ACCESS_KEY\"] = TRAINS_ACCESS_KEY\n",
|
||||
" os.environ[\"TRAINS_API_SECRET_KEY\"] = TRAINS_SECRET_KEY\n",
|
||||
" api_client = APIClient()\n",
|
||||
"\n",
|
||||
" # Verify the requested queues exist and create those that doesn't exist\n",
|
||||
" all_queues = [q.name for q in list(api_client.queues.get_all())]\n",
|
||||
" missing_queues = [q for q in QUEUES if q not in all_queues]\n",
|
||||
" for q in missing_queues:\n",
|
||||
" api_client.queues.create(q)\n",
|
||||
"\n",
|
||||
" idle_workers = {}\n",
|
||||
" while True:\n",
|
||||
" queue_name_to_id = {\n",
|
||||
" queue.name: queue.id for queue in api_client.queues.get_all()\n",
|
||||
" }\n",
|
||||
" resource_to_queue = {\n",
|
||||
" item[0]: queue\n",
|
||||
" for queue, resources in QUEUES.items()\n",
|
||||
" for item in resources\n",
|
||||
" }\n",
|
||||
" all_workers = [\n",
|
||||
" worker\n",
|
||||
" for worker in api_client.workers.get_all()\n",
|
||||
" if workers_pattern.match(worker.id)\n",
|
||||
" and workers_pattern.match(worker.id)[\"prefix\"] == workers_prefix\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" # Workers without a task, are added to the idle list\n",
|
||||
" for worker in all_workers:\n",
|
||||
" if not hasattr(worker, \"task\") or not worker.task:\n",
|
||||
" if worker.id not in idle_workers:\n",
|
||||
" resource_name = workers_pattern.match(worker.id)[\"instance_type\"]\n",
|
||||
" idle_workers[worker.id] = (time(), resource_name, worker)\n",
|
||||
" elif hasattr(worker, \"task\") and worker.task and worker.id in idle_workers:\n",
|
||||
" idle_workers.pop(worker.id, None)\n",
|
||||
"\n",
|
||||
" required_idle_resources = [] # idle resources we'll need to keep running\n",
|
||||
" allocate_new_resources = [] # resources that will need to be started\n",
|
||||
" # Check if we have tasks waiting on one of the designated queues\n",
|
||||
" for queue in QUEUES:\n",
|
||||
" entries = api_client.queues.get_by_id(queue_name_to_id[queue]).entries\n",
|
||||
" if entries and len(entries) > 0:\n",
|
||||
" queue_resources = QUEUES[queue]\n",
|
||||
"\n",
|
||||
" # If we have an idle worker matching the required resource,\n",
|
||||
" # remove it from the required allocation resources\n",
|
||||
" free_queue_resources = [\n",
|
||||
" resource\n",
|
||||
" for _, resource, _ in idle_workers.values()\n",
|
||||
" if resource in queue_resources\n",
|
||||
" ]\n",
|
||||
" required_idle_resources.extend(free_queue_resources)\n",
|
||||
" spin_up_count = len(entries) - len(free_queue_resources)\n",
|
||||
" spin_up_resources = []\n",
|
||||
"\n",
|
||||
" # Add as many resources as possible to handle this queue's entries\n",
|
||||
" for resource, max_instances in queue_resources:\n",
|
||||
" if len(spin_up_resources) >= spin_up_count:\n",
|
||||
" break\n",
|
||||
" max_allowed = max_instances - len(\n",
|
||||
" [\n",
|
||||
" worker\n",
|
||||
" for worker in all_workers\n",
|
||||
" if workers_pattern.match(worker.id)[\"name\"] == resource\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" spin_up_resources.extend(\n",
|
||||
" [resource] * min(max_allowed, spin_up_count)\n",
|
||||
" )\n",
|
||||
" allocate_new_resources.extend(spin_up_resources)\n",
|
||||
"\n",
|
||||
" # Now we actually spin the new machines\n",
|
||||
" for resource in allocate_new_resources:\n",
|
||||
" spin_up_worker(resource, workers_prefix, resource_to_queue[resource])\n",
|
||||
"\n",
|
||||
" # Go over the idle workers list, and spin down idle workers\n",
|
||||
" for timestamp, resources, worker in idle_workers.values():\n",
|
||||
" # skip resource types that might be needed\n",
|
||||
" if resources in required_idle_resources:\n",
|
||||
" continue\n",
|
||||
" # Remove from both aws and trains all instances that are \n",
|
||||
" # idle for longer than MAX_IDLE_TIME_MIN\n",
|
||||
" if time() - timestamp > MAX_IDLE_TIME_MIN * 60.0:\n",
|
||||
" cloud_id = workers_pattern.match(worker.id)[\"cloud_id\"]\n",
|
||||
" spin_down_worker(cloud_id)\n",
|
||||
" worker.unregister()\n",
|
||||
"\n",
|
||||
" # Nothing else to do\n",
|
||||
" sleep(POLLING_INTERVAL_MIN * 60.0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Execute Forever* (the controller is stateless, so you can always re-execute the notebook)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Loop forever, it is okay we are stateless\n",
|
||||
"while True:\n",
|
||||
" try:\n",
|
||||
" supervisor()\n",
|
||||
" except Exception as ex:\n",
|
||||
" print(\"Warning! exception occurred: {ex}\\nRetry in 15 seconds\".format(ex=ex))\n",
|
||||
" sleep(15)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.0"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
"cell_type": "raw",
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -3,7 +3,6 @@ enum34>=0.9 ; python_version < '3.6'
|
||||
furl>=2.0.0
|
||||
future>=0.16.0
|
||||
humanfriendly>=2.1
|
||||
jsonmodels>=2.2
|
||||
jsonschema>=2.6.0
|
||||
pathlib2>=2.3.0
|
||||
psutil>=3.4.2
|
||||
@@ -15,9 +14,8 @@ PyYAML>=3.12
|
||||
requests-file>=1.4.2
|
||||
requests>=2.20.0
|
||||
requirements_parser>=0.2.0
|
||||
semantic_version>=2.6.0
|
||||
six>=1.11.0
|
||||
tqdm>=4.19.5
|
||||
typing>=3.6.4
|
||||
urllib3>=1.21.1
|
||||
virtualenv>=16
|
||||
virtualenv>=16,<20
|
||||
|
||||
27
setup.py
27
setup.py
@@ -4,28 +4,31 @@ TRAINS-AGENT DevOps for machine/deep learning
|
||||
https://github.com/allegroai/trains-agent
|
||||
"""
|
||||
|
||||
import os.path
|
||||
# Always prefer setuptools over distutils
|
||||
from setuptools import setup, find_packages
|
||||
from six import exec_
|
||||
from pathlib2 import Path
|
||||
|
||||
def read_text(filepath):
|
||||
with open(filepath, "r") as f:
|
||||
return f.read()
|
||||
|
||||
here = Path(__file__).resolve().parent
|
||||
|
||||
here = os.path.dirname(__file__)
|
||||
# Get the long description from the README file
|
||||
long_description = (here / 'README.md').read_text()
|
||||
long_description = read_text(os.path.join(here, 'README.md'))
|
||||
|
||||
|
||||
def read_version_string():
|
||||
result = {}
|
||||
exec_((here / 'trains_agent/version.py').read_text(), result)
|
||||
return result['__version__']
|
||||
def read_version_string(version_file):
|
||||
for line in read_text(version_file).splitlines():
|
||||
if line.startswith('__version__'):
|
||||
delim = '"' if '"' in line else "'"
|
||||
return line.split(delim)[1]
|
||||
else:
|
||||
raise RuntimeError("Unable to find version string.")
|
||||
|
||||
|
||||
version = read_version_string()
|
||||
|
||||
requirements = (here / 'requirements.txt').read_text().splitlines()
|
||||
version = read_version_string("trains_agent/version.py")
|
||||
|
||||
requirements = read_text(os.path.join(here, 'requirements.txt')).splitlines()
|
||||
|
||||
setup(
|
||||
name='trains_agent',
|
||||
|
||||
@@ -35,7 +35,7 @@ def trains_agentyaml(tmpdir):
|
||||
def _method(template_file):
|
||||
file = tmpdir.join("trains_agent.yaml")
|
||||
with (PROJECT_ROOT / "tests/templates" / template_file).open() as f:
|
||||
code = yaml.load(f)
|
||||
code = yaml.load(f, Loader=yaml.SafeLoader)
|
||||
yield Namespace(code=code, file=file.strpath)
|
||||
file.write(yaml.dump(code))
|
||||
return _method
|
||||
|
||||
@@ -30,6 +30,6 @@ from trains_agent.helper.repo import VCS
|
||||
),
|
||||
)
|
||||
def test(url, expected):
|
||||
result = VCS.resolve_ssh_url(url)
|
||||
result = VCS.replace_ssh_url(url)
|
||||
expected = expected or url
|
||||
assert result == expected
|
||||
|
||||
@@ -11,7 +11,7 @@ from contextlib import contextmanager
|
||||
from typing import Iterator, ContextManager, Sequence, IO, Text
|
||||
from uuid import uuid4
|
||||
|
||||
from trains_agent.backend_api.services.tasks import Script
|
||||
from trains_agent.backend_api.services import tasks
|
||||
from trains_agent.backend_api.session.client import APIClient
|
||||
from pathlib2 import Path
|
||||
from pytest import fixture
|
||||
@@ -154,7 +154,7 @@ def test_entry_point_warning(client):
|
||||
"""
|
||||
with create_task(
|
||||
client,
|
||||
script=Script(diff="print('hello')", entry_point="foo.py", repository=""),
|
||||
script=tasks.Script(diff="print('hello')", entry_point="foo.py", repository=""),
|
||||
**DEFAULT_TASK_ARGS
|
||||
) as task, iterate_output(SHORT_TIMEOUT, run_task(task)) as output:
|
||||
for line in output:
|
||||
@@ -172,7 +172,7 @@ def test_run_no_dirs(client):
|
||||
script = "print('{}')".format(uuid)
|
||||
with create_task(
|
||||
client,
|
||||
script=Script(diff=script, entry_point="", repository="", working_dir=""),
|
||||
script=tasks.Script(diff=script, entry_point="", repository="", working_dir=""),
|
||||
**DEFAULT_TASK_ARGS
|
||||
) as task, iterate_output(SHORT_TIMEOUT, run_task(task)) as output:
|
||||
search_lines(
|
||||
@@ -196,7 +196,7 @@ def test_run_working_dir(client):
|
||||
script = "print('{}')".format(uuid)
|
||||
with create_task(
|
||||
client,
|
||||
script=Script(
|
||||
script=tasks.Script(
|
||||
diff=script,
|
||||
entry_point="",
|
||||
repository="git@bitbucket.org:seematics/roee_test_git.git",
|
||||
@@ -223,7 +223,7 @@ def test_regular_task(client):
|
||||
"""
|
||||
with create_task(
|
||||
client,
|
||||
script=Script(
|
||||
script=tasks.Script(
|
||||
entry_point="noop.py",
|
||||
repository="git@bitbucket.org:seematics/roee_test_git.git",
|
||||
),
|
||||
@@ -241,7 +241,7 @@ def test_regular_task_nested(client):
|
||||
"""
|
||||
with create_task(
|
||||
client,
|
||||
script=Script(
|
||||
script=tasks.Script(
|
||||
entry_point="noop_nested.py",
|
||||
working_dir="no_reqs",
|
||||
repository="git@bitbucket.org:seematics/roee_test_git.git",
|
||||
|
||||
@@ -1 +1 @@
|
||||
|
||||
from .backend_api.session.client import APIClient
|
||||
|
||||
@@ -20,6 +20,8 @@ from .interface import get_parser
|
||||
def run_command(parser, args, command_name):
|
||||
|
||||
debug = args.debug
|
||||
session.Session.set_debug_mode(debug)
|
||||
|
||||
if command_name and command_name.lower() in ('config', 'init'):
|
||||
command_class = commands.Config
|
||||
elif len(command_name.split('.')) < 2:
|
||||
|
||||
@@ -9,10 +9,14 @@
|
||||
# worker_name: "trains-agent-machine1"
|
||||
worker_name: ""
|
||||
|
||||
# Set GIT user/pass credentials for cloning code, leave blank for GIT SSH credentials.
|
||||
# Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)
|
||||
# leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)
|
||||
# git_user: ""
|
||||
# git_pass: ""
|
||||
|
||||
# Force GIT protocol to use SSH regardless of the git url (Assumes GIT user/pass are blank)
|
||||
force_git_ssh_protocol: false
|
||||
|
||||
# Set the python version to use when creating the virtual environment and launching the experiment
|
||||
# Example values: "/usr/bin/python3" or "/usr/local/bin/python3.6"
|
||||
# The default is the python executing the trains_agent
|
||||
@@ -22,9 +26,12 @@
|
||||
# currently supported pip and conda
|
||||
# poetry is used if pip selected and repository contains poetry.lock file
|
||||
package_manager: {
|
||||
# supported options: pip, conda
|
||||
# supported options: pip, conda, poetry
|
||||
type: pip,
|
||||
|
||||
# specify pip version to use (examples "<20", "==19.3.1", "", empty string will install the latest version)
|
||||
pip_version: "<20.2",
|
||||
|
||||
# virtual environment inheres packages from system
|
||||
system_site_packages: false,
|
||||
|
||||
@@ -33,10 +40,13 @@
|
||||
|
||||
# additional artifact repositories to use when installing python packages
|
||||
# extra_index_url: ["https://allegroai.jfrog.io/trainsai/api/pypi/public/simple"]
|
||||
extra_index_url: []
|
||||
|
||||
# additional conda channels to use when installing with conda package manager
|
||||
conda_channels: ["defaults", "conda-forge", "pytorch", ]
|
||||
|
||||
# set to True to support torch nightly build installation,
|
||||
# notice: torch nightly builds are ephemeral and are deleted from time to time
|
||||
torch_nightly: false,
|
||||
},
|
||||
|
||||
# target folder for virtual environments builds, created when executing experiment
|
||||
@@ -64,11 +74,22 @@
|
||||
# reload configuration file every daemon execution
|
||||
reload_config: false,
|
||||
|
||||
# pip cache folder used mapped into docker, for python package caching
|
||||
# pip cache folder mapped into docker, used for python package caching
|
||||
docker_pip_cache = ~/.trains/pip-cache
|
||||
# apt cache folder used mapped into docker, for ubuntu package caching
|
||||
# apt cache folder mapped into docker, used for ubuntu package caching
|
||||
docker_apt_cache = ~/.trains/apt-cache
|
||||
|
||||
# optional arguments to pass to docker image
|
||||
# these are local for this agent and will not be updated in the experiment's docker_cmd section
|
||||
# extra_docker_arguments: ["--ipc=host", ]
|
||||
|
||||
# optional shell script to run in docker when started before the experiment is started
|
||||
# extra_docker_shell_script: ["apt-get install -y bindfs", ]
|
||||
|
||||
# set to true in order to force "docker pull" before running an experiment using a docker image.
|
||||
# This makes sure the docker image is updated.
|
||||
docker_force_pull: false
|
||||
|
||||
default_docker: {
|
||||
# default docker image to use when running in docker mode
|
||||
image: "nvidia/cuda"
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from .v2_4 import auth
|
||||
from .v2_4 import debug
|
||||
from .v2_4 import queues
|
||||
from .v2_4 import tasks
|
||||
from .v2_4 import workers
|
||||
from .v2_5 import auth
|
||||
from .v2_5 import debug
|
||||
from .v2_5 import queues
|
||||
from .v2_5 import tasks
|
||||
from .v2_5 import workers
|
||||
from .v2_5 import events
|
||||
from .v2_5 import models
|
||||
|
||||
__all__ = [
|
||||
'auth',
|
||||
@@ -10,4 +12,6 @@ __all__ = [
|
||||
'queues',
|
||||
'tasks',
|
||||
'workers',
|
||||
'events',
|
||||
'models',
|
||||
]
|
||||
|
||||
@@ -151,7 +151,7 @@ class CreateCredentialsRequest(Request):
|
||||
|
||||
_service = "auth"
|
||||
_action = "create_credentials"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'additionalProperties': False,
|
||||
'definitions': {},
|
||||
@@ -169,7 +169,7 @@ class CreateCredentialsResponse(Response):
|
||||
"""
|
||||
_service = "auth"
|
||||
_action = "create_credentials"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {
|
||||
@@ -230,7 +230,7 @@ class EditUserRequest(Request):
|
||||
|
||||
_service = "auth"
|
||||
_action = "edit_user"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -287,7 +287,7 @@ class EditUserResponse(Response):
|
||||
"""
|
||||
_service = "auth"
|
||||
_action = "edit_user"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -347,7 +347,7 @@ class GetCredentialsRequest(Request):
|
||||
|
||||
_service = "auth"
|
||||
_action = "get_credentials"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'additionalProperties': False,
|
||||
'definitions': {},
|
||||
@@ -365,7 +365,7 @@ class GetCredentialsResponse(Response):
|
||||
"""
|
||||
_service = "auth"
|
||||
_action = "get_credentials"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {
|
||||
@@ -433,7 +433,7 @@ class LoginRequest(Request):
|
||||
|
||||
_service = "auth"
|
||||
_action = "login"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -474,7 +474,7 @@ class LoginResponse(Response):
|
||||
"""
|
||||
_service = "auth"
|
||||
_action = "login"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -510,7 +510,7 @@ class LogoutRequest(Request):
|
||||
|
||||
_service = "auth"
|
||||
_action = "logout"
|
||||
_version = "2.2"
|
||||
_version = "2.4"
|
||||
_schema = {'additionalProperties': False, 'definitions': {}, 'type': 'object'}
|
||||
|
||||
|
||||
@@ -521,7 +521,7 @@ class LogoutResponse(Response):
|
||||
"""
|
||||
_service = "auth"
|
||||
_action = "logout"
|
||||
_version = "2.2"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {'additionalProperties': False, 'definitions': {}, 'type': 'object'}
|
||||
|
||||
@@ -537,7 +537,7 @@ class RevokeCredentialsRequest(Request):
|
||||
|
||||
_service = "auth"
|
||||
_action = "revoke_credentials"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -577,7 +577,7 @@ class RevokeCredentialsResponse(Response):
|
||||
"""
|
||||
_service = "auth"
|
||||
_action = "revoke_credentials"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
|
||||
@@ -19,7 +19,7 @@ class ApiexRequest(Request):
|
||||
|
||||
_service = "debug"
|
||||
_action = "apiex"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
_schema = {'definitions': {}, 'properties': {}, 'required': [], 'type': 'object'}
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ class ApiexResponse(Response):
|
||||
"""
|
||||
_service = "debug"
|
||||
_action = "apiex"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {'definitions': {}, 'properties': {}, 'type': 'object'}
|
||||
|
||||
@@ -43,7 +43,7 @@ class EchoRequest(Request):
|
||||
|
||||
_service = "debug"
|
||||
_action = "echo"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
_schema = {'definitions': {}, 'properties': {}, 'type': 'object'}
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ class EchoResponse(Response):
|
||||
"""
|
||||
_service = "debug"
|
||||
_action = "echo"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {'definitions': {}, 'properties': {}, 'type': 'object'}
|
||||
|
||||
@@ -65,7 +65,7 @@ class ExRequest(Request):
|
||||
|
||||
_service = "debug"
|
||||
_action = "ex"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
_schema = {'definitions': {}, 'properties': {}, 'required': [], 'type': 'object'}
|
||||
|
||||
|
||||
@@ -76,7 +76,7 @@ class ExResponse(Response):
|
||||
"""
|
||||
_service = "debug"
|
||||
_action = "ex"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {'definitions': {}, 'properties': {}, 'type': 'object'}
|
||||
|
||||
@@ -89,7 +89,7 @@ class PingRequest(Request):
|
||||
|
||||
_service = "debug"
|
||||
_action = "ping"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
_schema = {'definitions': {}, 'properties': {}, 'type': 'object'}
|
||||
|
||||
|
||||
@@ -102,7 +102,7 @@ class PingResponse(Response):
|
||||
"""
|
||||
_service = "debug"
|
||||
_action = "ping"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -141,7 +141,7 @@ class PingAuthRequest(Request):
|
||||
|
||||
_service = "debug"
|
||||
_action = "ping_auth"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
_schema = {'definitions': {}, 'properties': {}, 'type': 'object'}
|
||||
|
||||
|
||||
@@ -154,7 +154,7 @@ class PingAuthResponse(Response):
|
||||
"""
|
||||
_service = "debug"
|
||||
_action = "ping_auth"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
|
||||
2977
trains_agent/backend_api/services/v2_4/events.py
Normal file
2977
trains_agent/backend_api/services/v2_4/events.py
Normal file
File diff suppressed because it is too large
Load Diff
2850
trains_agent/backend_api/services/v2_4/models.py
Normal file
2850
trains_agent/backend_api/services/v2_4/models.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1518,7 +1518,7 @@ class CloseRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "close"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -1612,7 +1612,7 @@ class CloseResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "close"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -1682,7 +1682,7 @@ class CompletedRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "completed"
|
||||
_version = "2.2"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -1776,7 +1776,7 @@ class CompletedResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "completed"
|
||||
_version = "2.2"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -1862,7 +1862,7 @@ class CreateRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "create"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {
|
||||
'artifact': {
|
||||
@@ -2229,7 +2229,7 @@ class CreateResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "create"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -2280,7 +2280,7 @@ class DeleteRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "delete"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -2403,7 +2403,7 @@ class DeleteResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "delete"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -2547,7 +2547,7 @@ class DequeueRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "dequeue"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -2624,7 +2624,7 @@ class DequeueResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "dequeue"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -2733,7 +2733,7 @@ class EditRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "edit"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {
|
||||
'artifact': {
|
||||
@@ -3123,7 +3123,7 @@ class EditResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "edit"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -3201,7 +3201,7 @@ class EnqueueRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "enqueue"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -3296,7 +3296,7 @@ class EnqueueResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "enqueue"
|
||||
_version = "1.5"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -3386,7 +3386,7 @@ class FailedRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "failed"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -3480,7 +3480,7 @@ class FailedResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "failed"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -3587,7 +3587,7 @@ class GetAllRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "get_all"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {
|
||||
'multi_field_pattern_data': {
|
||||
@@ -3986,7 +3986,7 @@ class GetAllResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "get_all"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {
|
||||
@@ -4373,7 +4373,7 @@ class GetByIdRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "get_by_id"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {'task': {'description': 'Task ID', 'type': 'string'}},
|
||||
@@ -4408,7 +4408,7 @@ class GetByIdResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "get_by_id"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {
|
||||
@@ -4792,7 +4792,7 @@ class PingRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "ping"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {'task': {'description': 'Task ID', 'type': 'string'}},
|
||||
@@ -4825,7 +4825,7 @@ class PingResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "ping"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {'additionalProperties': False, 'definitions': {}, 'type': 'object'}
|
||||
|
||||
@@ -4853,7 +4853,7 @@ class PublishRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "publish"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -4967,7 +4967,7 @@ class PublishResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "publish"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -5057,7 +5057,7 @@ class ResetRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "reset"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -5160,7 +5160,7 @@ class ResetResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "reset"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -5305,7 +5305,7 @@ class SetRequirementsRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "set_requirements"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -5362,7 +5362,7 @@ class SetRequirementsResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "set_requirements"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -5431,7 +5431,7 @@ class StartedRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "started"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -5527,7 +5527,7 @@ class StartedResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "started"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -5617,7 +5617,7 @@ class StopRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "stop"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -5711,7 +5711,7 @@ class StopResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "stop"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -5780,7 +5780,7 @@ class StoppedRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "stopped"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -5874,7 +5874,7 @@ class StoppedResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "stopped"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -5952,7 +5952,7 @@ class UpdateRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "update"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
@@ -6120,7 +6120,7 @@ class UpdateResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "update"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -6183,7 +6183,7 @@ class UpdateBatchRequest(BatchRequest):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "update_batch"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_batched_request_cls = UpdateRequest
|
||||
|
||||
|
||||
@@ -6196,7 +6196,7 @@ class UpdateBatchResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "update_batch"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
@@ -6261,7 +6261,7 @@ class ValidateRequest(Request):
|
||||
|
||||
_service = "tasks"
|
||||
_action = "validate"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
_schema = {
|
||||
'definitions': {
|
||||
'artifact': {
|
||||
@@ -6614,7 +6614,7 @@ class ValidateResponse(Response):
|
||||
"""
|
||||
_service = "tasks"
|
||||
_action = "validate"
|
||||
_version = "2.1"
|
||||
_version = "2.4"
|
||||
|
||||
_schema = {'additionalProperties': False, 'definitions': {}, 'type': 'object'}
|
||||
|
||||
|
||||
0
trains_agent/backend_api/services/v2_5/__init__.py
Normal file
0
trains_agent/backend_api/services/v2_5/__init__.py
Normal file
623
trains_agent/backend_api/services/v2_5/auth.py
Normal file
623
trains_agent/backend_api/services/v2_5/auth.py
Normal file
@@ -0,0 +1,623 @@
|
||||
"""
|
||||
auth service
|
||||
|
||||
This service provides authentication management and authorization
|
||||
validation for the entire system.
|
||||
"""
|
||||
import six
|
||||
import types
|
||||
from datetime import datetime
|
||||
import enum
|
||||
|
||||
from dateutil.parser import parse as parse_datetime
|
||||
|
||||
from ....backend_api.session import Request, BatchRequest, Response, DataModel, NonStrictDataModel, CompoundRequest, schema_property, StringEnum
|
||||
|
||||
|
||||
class Credentials(NonStrictDataModel):
|
||||
"""
|
||||
:param access_key: Credentials access key
|
||||
:type access_key: str
|
||||
:param secret_key: Credentials secret key
|
||||
:type secret_key: str
|
||||
"""
|
||||
_schema = {
|
||||
'properties': {
|
||||
'access_key': {
|
||||
'description': 'Credentials access key',
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
'secret_key': {
|
||||
'description': 'Credentials secret key',
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
},
|
||||
'type': 'object',
|
||||
}
|
||||
def __init__(
|
||||
self, access_key=None, secret_key=None, **kwargs):
|
||||
super(Credentials, self).__init__(**kwargs)
|
||||
self.access_key = access_key
|
||||
self.secret_key = secret_key
|
||||
|
||||
@schema_property('access_key')
|
||||
def access_key(self):
|
||||
return self._property_access_key
|
||||
|
||||
@access_key.setter
|
||||
def access_key(self, value):
|
||||
if value is None:
|
||||
self._property_access_key = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "access_key", six.string_types)
|
||||
self._property_access_key = value
|
||||
|
||||
@schema_property('secret_key')
|
||||
def secret_key(self):
|
||||
return self._property_secret_key
|
||||
|
||||
@secret_key.setter
|
||||
def secret_key(self, value):
|
||||
if value is None:
|
||||
self._property_secret_key = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "secret_key", six.string_types)
|
||||
self._property_secret_key = value
|
||||
|
||||
|
||||
class CredentialKey(NonStrictDataModel):
|
||||
"""
|
||||
:param access_key:
|
||||
:type access_key: str
|
||||
:param last_used:
|
||||
:type last_used: datetime.datetime
|
||||
:param last_used_from:
|
||||
:type last_used_from: str
|
||||
"""
|
||||
_schema = {
|
||||
'properties': {
|
||||
'access_key': {'description': '', 'type': ['string', 'null']},
|
||||
'last_used': {
|
||||
'description': '',
|
||||
'format': 'date-time',
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
'last_used_from': {'description': '', 'type': ['string', 'null']},
|
||||
},
|
||||
'type': 'object',
|
||||
}
|
||||
def __init__(
|
||||
self, access_key=None, last_used=None, last_used_from=None, **kwargs):
|
||||
super(CredentialKey, self).__init__(**kwargs)
|
||||
self.access_key = access_key
|
||||
self.last_used = last_used
|
||||
self.last_used_from = last_used_from
|
||||
|
||||
@schema_property('access_key')
|
||||
def access_key(self):
|
||||
return self._property_access_key
|
||||
|
||||
@access_key.setter
|
||||
def access_key(self, value):
|
||||
if value is None:
|
||||
self._property_access_key = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "access_key", six.string_types)
|
||||
self._property_access_key = value
|
||||
|
||||
@schema_property('last_used')
|
||||
def last_used(self):
|
||||
return self._property_last_used
|
||||
|
||||
@last_used.setter
|
||||
def last_used(self, value):
|
||||
if value is None:
|
||||
self._property_last_used = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "last_used", six.string_types + (datetime,))
|
||||
if not isinstance(value, datetime):
|
||||
value = parse_datetime(value)
|
||||
self._property_last_used = value
|
||||
|
||||
@schema_property('last_used_from')
|
||||
def last_used_from(self):
|
||||
return self._property_last_used_from
|
||||
|
||||
@last_used_from.setter
|
||||
def last_used_from(self, value):
|
||||
if value is None:
|
||||
self._property_last_used_from = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "last_used_from", six.string_types)
|
||||
self._property_last_used_from = value
|
||||
|
||||
|
||||
|
||||
|
||||
class CreateCredentialsRequest(Request):
|
||||
"""
|
||||
Creates a new set of credentials for the authenticated user.
|
||||
New key/secret is returned.
|
||||
Note: Secret will never be returned in any other API call.
|
||||
If a secret is lost or compromised, the key should be revoked
|
||||
and a new set of credentials can be created.
|
||||
|
||||
"""
|
||||
|
||||
_service = "auth"
|
||||
_action = "create_credentials"
|
||||
_version = "2.5"
|
||||
_schema = {
|
||||
'additionalProperties': False,
|
||||
'definitions': {},
|
||||
'properties': {},
|
||||
'type': 'object',
|
||||
}
|
||||
|
||||
|
||||
class CreateCredentialsResponse(Response):
|
||||
"""
|
||||
Response of auth.create_credentials endpoint.
|
||||
|
||||
:param credentials: Created credentials
|
||||
:type credentials: Credentials
|
||||
"""
|
||||
_service = "auth"
|
||||
_action = "create_credentials"
|
||||
_version = "2.5"
|
||||
|
||||
_schema = {
|
||||
'definitions': {
|
||||
'credentials': {
|
||||
'properties': {
|
||||
'access_key': {
|
||||
'description': 'Credentials access key',
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
'secret_key': {
|
||||
'description': 'Credentials secret key',
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
},
|
||||
'type': 'object',
|
||||
},
|
||||
},
|
||||
'properties': {
|
||||
'credentials': {
|
||||
'description': 'Created credentials',
|
||||
'oneOf': [{'$ref': '#/definitions/credentials'}, {'type': 'null'}],
|
||||
},
|
||||
},
|
||||
'type': 'object',
|
||||
}
|
||||
def __init__(
|
||||
self, credentials=None, **kwargs):
|
||||
super(CreateCredentialsResponse, self).__init__(**kwargs)
|
||||
self.credentials = credentials
|
||||
|
||||
@schema_property('credentials')
|
||||
def credentials(self):
|
||||
return self._property_credentials
|
||||
|
||||
@credentials.setter
|
||||
def credentials(self, value):
|
||||
if value is None:
|
||||
self._property_credentials = None
|
||||
return
|
||||
if isinstance(value, dict):
|
||||
value = Credentials.from_dict(value)
|
||||
else:
|
||||
self.assert_isinstance(value, "credentials", Credentials)
|
||||
self._property_credentials = value
|
||||
|
||||
|
||||
|
||||
|
||||
class EditUserRequest(Request):
|
||||
"""
|
||||
Edit a users' auth data properties
|
||||
|
||||
:param user: User ID
|
||||
:type user: str
|
||||
:param role: The new user's role within the company
|
||||
:type role: str
|
||||
"""
|
||||
|
||||
_service = "auth"
|
||||
_action = "edit_user"
|
||||
_version = "2.5"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
'role': {
|
||||
'description': "The new user's role within the company",
|
||||
'enum': ['admin', 'superuser', 'user', 'annotator'],
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
'user': {'description': 'User ID', 'type': ['string', 'null']},
|
||||
},
|
||||
'type': 'object',
|
||||
}
|
||||
def __init__(
|
||||
self, user=None, role=None, **kwargs):
|
||||
super(EditUserRequest, self).__init__(**kwargs)
|
||||
self.user = user
|
||||
self.role = role
|
||||
|
||||
@schema_property('user')
|
||||
def user(self):
|
||||
return self._property_user
|
||||
|
||||
@user.setter
|
||||
def user(self, value):
|
||||
if value is None:
|
||||
self._property_user = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "user", six.string_types)
|
||||
self._property_user = value
|
||||
|
||||
@schema_property('role')
|
||||
def role(self):
|
||||
return self._property_role
|
||||
|
||||
@role.setter
|
||||
def role(self, value):
|
||||
if value is None:
|
||||
self._property_role = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "role", six.string_types)
|
||||
self._property_role = value
|
||||
|
||||
|
||||
class EditUserResponse(Response):
|
||||
"""
|
||||
Response of auth.edit_user endpoint.
|
||||
|
||||
:param updated: Number of users updated (0 or 1)
|
||||
:type updated: float
|
||||
:param fields: Updated fields names and values
|
||||
:type fields: dict
|
||||
"""
|
||||
_service = "auth"
|
||||
_action = "edit_user"
|
||||
_version = "2.5"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
'fields': {
|
||||
'additionalProperties': True,
|
||||
'description': 'Updated fields names and values',
|
||||
'type': ['object', 'null'],
|
||||
},
|
||||
'updated': {
|
||||
'description': 'Number of users updated (0 or 1)',
|
||||
'enum': [0, 1],
|
||||
'type': ['number', 'null'],
|
||||
},
|
||||
},
|
||||
'type': 'object',
|
||||
}
|
||||
def __init__(
|
||||
self, updated=None, fields=None, **kwargs):
|
||||
super(EditUserResponse, self).__init__(**kwargs)
|
||||
self.updated = updated
|
||||
self.fields = fields
|
||||
|
||||
@schema_property('updated')
|
||||
def updated(self):
|
||||
return self._property_updated
|
||||
|
||||
@updated.setter
|
||||
def updated(self, value):
|
||||
if value is None:
|
||||
self._property_updated = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "updated", six.integer_types + (float,))
|
||||
self._property_updated = value
|
||||
|
||||
@schema_property('fields')
|
||||
def fields(self):
|
||||
return self._property_fields
|
||||
|
||||
@fields.setter
|
||||
def fields(self, value):
|
||||
if value is None:
|
||||
self._property_fields = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "fields", (dict,))
|
||||
self._property_fields = value
|
||||
|
||||
|
||||
class GetCredentialsRequest(Request):
|
||||
"""
|
||||
Returns all existing credential keys for the authenticated user.
|
||||
Note: Only credential keys are returned.
|
||||
|
||||
"""
|
||||
|
||||
_service = "auth"
|
||||
_action = "get_credentials"
|
||||
_version = "2.5"
|
||||
_schema = {
|
||||
'additionalProperties': False,
|
||||
'definitions': {},
|
||||
'properties': {},
|
||||
'type': 'object',
|
||||
}
|
||||
|
||||
|
||||
class GetCredentialsResponse(Response):
|
||||
"""
|
||||
Response of auth.get_credentials endpoint.
|
||||
|
||||
:param credentials: List of credentials, each with an empty secret field.
|
||||
:type credentials: Sequence[CredentialKey]
|
||||
"""
|
||||
_service = "auth"
|
||||
_action = "get_credentials"
|
||||
_version = "2.5"
|
||||
|
||||
_schema = {
|
||||
'definitions': {
|
||||
'credential_key': {
|
||||
'properties': {
|
||||
'access_key': {'description': '', 'type': ['string', 'null']},
|
||||
'last_used': {
|
||||
'description': '',
|
||||
'format': 'date-time',
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
'last_used_from': {
|
||||
'description': '',
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
},
|
||||
'type': 'object',
|
||||
},
|
||||
},
|
||||
'properties': {
|
||||
'credentials': {
|
||||
'description': 'List of credentials, each with an empty secret field.',
|
||||
'items': {'$ref': '#/definitions/credential_key'},
|
||||
'type': ['array', 'null'],
|
||||
},
|
||||
},
|
||||
'type': 'object',
|
||||
}
|
||||
def __init__(
|
||||
self, credentials=None, **kwargs):
|
||||
super(GetCredentialsResponse, self).__init__(**kwargs)
|
||||
self.credentials = credentials
|
||||
|
||||
@schema_property('credentials')
|
||||
def credentials(self):
|
||||
return self._property_credentials
|
||||
|
||||
@credentials.setter
|
||||
def credentials(self, value):
|
||||
if value is None:
|
||||
self._property_credentials = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "credentials", (list, tuple))
|
||||
if any(isinstance(v, dict) for v in value):
|
||||
value = [CredentialKey.from_dict(v) if isinstance(v, dict) else v for v in value]
|
||||
else:
|
||||
self.assert_isinstance(value, "credentials", CredentialKey, is_array=True)
|
||||
self._property_credentials = value
|
||||
|
||||
|
||||
|
||||
|
||||
class LoginRequest(Request):
|
||||
"""
|
||||
Get a token based on supplied credentials (key/secret).
|
||||
Intended for use by users with key/secret credentials that wish to obtain a token
|
||||
for use with other services. Token will be limited by the same permissions that
|
||||
exist for the credentials used in this call.
|
||||
|
||||
:param expiration_sec: Requested token expiration time in seconds. Not
|
||||
guaranteed, might be overridden by the service
|
||||
:type expiration_sec: int
|
||||
"""
|
||||
|
||||
_service = "auth"
|
||||
_action = "login"
|
||||
_version = "2.5"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
'expiration_sec': {
|
||||
'description': 'Requested token expiration time in seconds. \n Not guaranteed, might be overridden by the service',
|
||||
'type': ['integer', 'null'],
|
||||
},
|
||||
},
|
||||
'type': 'object',
|
||||
}
|
||||
def __init__(
|
||||
self, expiration_sec=None, **kwargs):
|
||||
super(LoginRequest, self).__init__(**kwargs)
|
||||
self.expiration_sec = expiration_sec
|
||||
|
||||
@schema_property('expiration_sec')
|
||||
def expiration_sec(self):
|
||||
return self._property_expiration_sec
|
||||
|
||||
@expiration_sec.setter
|
||||
def expiration_sec(self, value):
|
||||
if value is None:
|
||||
self._property_expiration_sec = None
|
||||
return
|
||||
if isinstance(value, float) and value.is_integer():
|
||||
value = int(value)
|
||||
|
||||
self.assert_isinstance(value, "expiration_sec", six.integer_types)
|
||||
self._property_expiration_sec = value
|
||||
|
||||
|
||||
class LoginResponse(Response):
|
||||
"""
|
||||
Response of auth.login endpoint.
|
||||
|
||||
:param token: Token string
|
||||
:type token: str
|
||||
"""
|
||||
_service = "auth"
|
||||
_action = "login"
|
||||
_version = "2.5"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
'token': {'description': 'Token string', 'type': ['string', 'null']},
|
||||
},
|
||||
'type': 'object',
|
||||
}
|
||||
def __init__(
|
||||
self, token=None, **kwargs):
|
||||
super(LoginResponse, self).__init__(**kwargs)
|
||||
self.token = token
|
||||
|
||||
@schema_property('token')
|
||||
def token(self):
|
||||
return self._property_token
|
||||
|
||||
@token.setter
|
||||
def token(self, value):
|
||||
if value is None:
|
||||
self._property_token = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "token", six.string_types)
|
||||
self._property_token = value
|
||||
|
||||
|
||||
class LogoutRequest(Request):
|
||||
"""
|
||||
Removes the authentication cookie from the current session
|
||||
|
||||
"""
|
||||
|
||||
_service = "auth"
|
||||
_action = "logout"
|
||||
_version = "2.5"
|
||||
_schema = {'additionalProperties': False, 'definitions': {}, 'type': 'object'}
|
||||
|
||||
|
||||
class LogoutResponse(Response):
|
||||
"""
|
||||
Response of auth.logout endpoint.
|
||||
|
||||
"""
|
||||
_service = "auth"
|
||||
_action = "logout"
|
||||
_version = "2.5"
|
||||
|
||||
_schema = {'additionalProperties': False, 'definitions': {}, 'type': 'object'}
|
||||
|
||||
|
||||
class RevokeCredentialsRequest(Request):
|
||||
"""
|
||||
Revokes (and deletes) a set (key, secret) of credentials for
|
||||
the authenticated user.
|
||||
|
||||
:param access_key: Credentials key
|
||||
:type access_key: str
|
||||
"""
|
||||
|
||||
_service = "auth"
|
||||
_action = "revoke_credentials"
|
||||
_version = "2.5"
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
'access_key': {
|
||||
'description': 'Credentials key',
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
},
|
||||
'required': ['key_id'],
|
||||
'type': 'object',
|
||||
}
|
||||
def __init__(
|
||||
self, access_key=None, **kwargs):
|
||||
super(RevokeCredentialsRequest, self).__init__(**kwargs)
|
||||
self.access_key = access_key
|
||||
|
||||
@schema_property('access_key')
|
||||
def access_key(self):
|
||||
return self._property_access_key
|
||||
|
||||
@access_key.setter
|
||||
def access_key(self, value):
|
||||
if value is None:
|
||||
self._property_access_key = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "access_key", six.string_types)
|
||||
self._property_access_key = value
|
||||
|
||||
|
||||
class RevokeCredentialsResponse(Response):
|
||||
"""
|
||||
Response of auth.revoke_credentials endpoint.
|
||||
|
||||
:param revoked: Number of credentials revoked
|
||||
:type revoked: int
|
||||
"""
|
||||
_service = "auth"
|
||||
_action = "revoke_credentials"
|
||||
_version = "2.5"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
'revoked': {
|
||||
'description': 'Number of credentials revoked',
|
||||
'enum': [0, 1],
|
||||
'type': ['integer', 'null'],
|
||||
},
|
||||
},
|
||||
'type': 'object',
|
||||
}
|
||||
def __init__(
|
||||
self, revoked=None, **kwargs):
|
||||
super(RevokeCredentialsResponse, self).__init__(**kwargs)
|
||||
self.revoked = revoked
|
||||
|
||||
@schema_property('revoked')
|
||||
def revoked(self):
|
||||
return self._property_revoked
|
||||
|
||||
@revoked.setter
|
||||
def revoked(self, value):
|
||||
if value is None:
|
||||
self._property_revoked = None
|
||||
return
|
||||
if isinstance(value, float) and value.is_integer():
|
||||
value = int(value)
|
||||
|
||||
self.assert_isinstance(value, "revoked", six.integer_types)
|
||||
self._property_revoked = value
|
||||
|
||||
|
||||
|
||||
|
||||
response_mapping = {
|
||||
LoginRequest: LoginResponse,
|
||||
LogoutRequest: LogoutResponse,
|
||||
CreateCredentialsRequest: CreateCredentialsResponse,
|
||||
GetCredentialsRequest: GetCredentialsResponse,
|
||||
RevokeCredentialsRequest: RevokeCredentialsResponse,
|
||||
EditUserRequest: EditUserResponse,
|
||||
}
|
||||
194
trains_agent/backend_api/services/v2_5/debug.py
Normal file
194
trains_agent/backend_api/services/v2_5/debug.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
debug service
|
||||
|
||||
Debugging utilities
|
||||
"""
|
||||
import six
|
||||
import types
|
||||
from datetime import datetime
|
||||
import enum
|
||||
|
||||
from dateutil.parser import parse as parse_datetime
|
||||
|
||||
from ....backend_api.session import Request, BatchRequest, Response, DataModel, NonStrictDataModel, CompoundRequest, schema_property, StringEnum
|
||||
|
||||
|
||||
class ApiexRequest(Request):
|
||||
"""
|
||||
"""
|
||||
|
||||
_service = "debug"
|
||||
_action = "apiex"
|
||||
_version = "2.5"
|
||||
_schema = {'definitions': {}, 'properties': {}, 'required': [], 'type': 'object'}
|
||||
|
||||
|
||||
class ApiexResponse(Response):
|
||||
"""
|
||||
Response of debug.apiex endpoint.
|
||||
|
||||
"""
|
||||
_service = "debug"
|
||||
_action = "apiex"
|
||||
_version = "2.5"
|
||||
|
||||
_schema = {'definitions': {}, 'properties': {}, 'type': 'object'}
|
||||
|
||||
|
||||
class EchoRequest(Request):
|
||||
"""
|
||||
Return request data
|
||||
|
||||
"""
|
||||
|
||||
_service = "debug"
|
||||
_action = "echo"
|
||||
_version = "2.5"
|
||||
_schema = {'definitions': {}, 'properties': {}, 'type': 'object'}
|
||||
|
||||
|
||||
class EchoResponse(Response):
|
||||
"""
|
||||
Response of debug.echo endpoint.
|
||||
|
||||
"""
|
||||
_service = "debug"
|
||||
_action = "echo"
|
||||
_version = "2.5"
|
||||
|
||||
_schema = {'definitions': {}, 'properties': {}, 'type': 'object'}
|
||||
|
||||
|
||||
class ExRequest(Request):
|
||||
"""
|
||||
"""
|
||||
|
||||
_service = "debug"
|
||||
_action = "ex"
|
||||
_version = "2.5"
|
||||
_schema = {'definitions': {}, 'properties': {}, 'required': [], 'type': 'object'}
|
||||
|
||||
|
||||
class ExResponse(Response):
|
||||
"""
|
||||
Response of debug.ex endpoint.
|
||||
|
||||
"""
|
||||
_service = "debug"
|
||||
_action = "ex"
|
||||
_version = "2.5"
|
||||
|
||||
_schema = {'definitions': {}, 'properties': {}, 'type': 'object'}
|
||||
|
||||
|
||||
class PingRequest(Request):
|
||||
"""
|
||||
Return a message. Does not require authorization.
|
||||
|
||||
"""
|
||||
|
||||
_service = "debug"
|
||||
_action = "ping"
|
||||
_version = "2.5"
|
||||
_schema = {'definitions': {}, 'properties': {}, 'type': 'object'}
|
||||
|
||||
|
||||
class PingResponse(Response):
|
||||
"""
|
||||
Response of debug.ping endpoint.
|
||||
|
||||
:param msg: A friendly message
|
||||
:type msg: str
|
||||
"""
|
||||
_service = "debug"
|
||||
_action = "ping"
|
||||
_version = "2.5"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
'msg': {
|
||||
'description': 'A friendly message',
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
},
|
||||
'type': 'object',
|
||||
}
|
||||
def __init__(
|
||||
self, msg=None, **kwargs):
|
||||
super(PingResponse, self).__init__(**kwargs)
|
||||
self.msg = msg
|
||||
|
||||
@schema_property('msg')
|
||||
def msg(self):
|
||||
return self._property_msg
|
||||
|
||||
@msg.setter
|
||||
def msg(self, value):
|
||||
if value is None:
|
||||
self._property_msg = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "msg", six.string_types)
|
||||
self._property_msg = value
|
||||
|
||||
|
||||
class PingAuthRequest(Request):
|
||||
"""
|
||||
Return a message. Requires authorization.
|
||||
|
||||
"""
|
||||
|
||||
_service = "debug"
|
||||
_action = "ping_auth"
|
||||
_version = "2.5"
|
||||
_schema = {'definitions': {}, 'properties': {}, 'type': 'object'}
|
||||
|
||||
|
||||
class PingAuthResponse(Response):
|
||||
"""
|
||||
Response of debug.ping_auth endpoint.
|
||||
|
||||
:param msg: A friendly message
|
||||
:type msg: str
|
||||
"""
|
||||
_service = "debug"
|
||||
_action = "ping_auth"
|
||||
_version = "2.5"
|
||||
|
||||
_schema = {
|
||||
'definitions': {},
|
||||
'properties': {
|
||||
'msg': {
|
||||
'description': 'A friendly message',
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
},
|
||||
'type': 'object',
|
||||
}
|
||||
def __init__(
|
||||
self, msg=None, **kwargs):
|
||||
super(PingAuthResponse, self).__init__(**kwargs)
|
||||
self.msg = msg
|
||||
|
||||
@schema_property('msg')
|
||||
def msg(self):
|
||||
return self._property_msg
|
||||
|
||||
@msg.setter
|
||||
def msg(self, value):
|
||||
if value is None:
|
||||
self._property_msg = None
|
||||
return
|
||||
|
||||
self.assert_isinstance(value, "msg", six.string_types)
|
||||
self._property_msg = value
|
||||
|
||||
|
||||
response_mapping = {
|
||||
EchoRequest: EchoResponse,
|
||||
PingRequest: PingResponse,
|
||||
PingAuthRequest: PingAuthResponse,
|
||||
ApiexRequest: ApiexResponse,
|
||||
ExRequest: ExResponse,
|
||||
}
|
||||
3000
trains_agent/backend_api/services/v2_5/events.py
Normal file
3000
trains_agent/backend_api/services/v2_5/events.py
Normal file
File diff suppressed because it is too large
Load Diff
2850
trains_agent/backend_api/services/v2_5/models.py
Normal file
2850
trains_agent/backend_api/services/v2_5/models.py
Normal file
File diff suppressed because it is too large
Load Diff
2198
trains_agent/backend_api/services/v2_5/queues.py
Normal file
2198
trains_agent/backend_api/services/v2_5/queues.py
Normal file
File diff suppressed because it is too large
Load Diff
7053
trains_agent/backend_api/services/v2_5/tasks.py
Normal file
7053
trains_agent/backend_api/services/v2_5/tasks.py
Normal file
File diff suppressed because it is too large
Load Diff
2368
trains_agent/backend_api/services/v2_5/workers.py
Normal file
2368
trains_agent/backend_api/services/v2_5/workers.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -139,13 +139,25 @@ class Response(object):
|
||||
:param dest: if all of a response's data is contained in one field, use that field
|
||||
:type dest: Text
|
||||
"""
|
||||
self.response = None
|
||||
self._result = result
|
||||
response = getattr(result, "response", result)
|
||||
if getattr(response, "_service") == "events" and \
|
||||
getattr(response, "_action") in ("scalar_metrics_iter_histogram",
|
||||
"multi_task_scalar_metrics_iter_histogram",
|
||||
"vector_metrics_iter_histogram",
|
||||
):
|
||||
# put all the response data under metrics:
|
||||
response.metrics = result.response_data
|
||||
if 'metrics' not in response.__class__._get_data_props():
|
||||
response.__class__._data_props_list['metrics'] = 'metrics'
|
||||
if dest:
|
||||
response = getattr(response, dest)
|
||||
self.response = response
|
||||
|
||||
def __getattr__(self, attr):
|
||||
if self.response is None:
|
||||
return None
|
||||
return getattr(self.response, attr)
|
||||
|
||||
@property
|
||||
@@ -493,6 +505,7 @@ class APIClient(object):
|
||||
queues = None # type: Any
|
||||
tasks = None # type: Any
|
||||
workers = None # type: Any
|
||||
events = None # type: Any
|
||||
|
||||
def __init__(self, session=None, api_version=None):
|
||||
self.session = session or StrictSession()
|
||||
|
||||
9
trains_agent/backend_api/session/jsonmodels/__init__.py
Normal file
9
trains_agent/backend_api/session/jsonmodels/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# coding: utf-8
|
||||
|
||||
__author__ = 'Szczepan Cieślik'
|
||||
__email__ = 'szczepan.cieslik@gmail.com'
|
||||
__version__ = '2.4'
|
||||
|
||||
from . import models
|
||||
from . import fields
|
||||
from . import errors
|
||||
230
trains_agent/backend_api/session/jsonmodels/builders.py
Normal file
230
trains_agent/backend_api/session/jsonmodels/builders.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""Builders to generate in memory representation of model and fields tree."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
import six
|
||||
|
||||
from . import errors
|
||||
from .fields import NotSet
|
||||
|
||||
|
||||
class Builder(object):
|
||||
|
||||
def __init__(self, parent=None, nullable=False, default=NotSet):
|
||||
self.parent = parent
|
||||
self.types_builders = {}
|
||||
self.types_count = defaultdict(int)
|
||||
self.definitions = set()
|
||||
self.nullable = nullable
|
||||
self.default = default
|
||||
|
||||
@property
|
||||
def has_default(self):
|
||||
return self.default is not NotSet
|
||||
|
||||
def register_type(self, type, builder):
|
||||
if self.parent:
|
||||
return self.parent.register_type(type, builder)
|
||||
|
||||
self.types_count[type] += 1
|
||||
if type not in self.types_builders:
|
||||
self.types_builders[type] = builder
|
||||
|
||||
def get_builder(self, type):
|
||||
if self.parent:
|
||||
return self.parent.get_builder(type)
|
||||
|
||||
return self.types_builders[type]
|
||||
|
||||
def count_type(self, type):
|
||||
if self.parent:
|
||||
return self.parent.count_type(type)
|
||||
|
||||
return self.types_count[type]
|
||||
|
||||
@staticmethod
|
||||
def maybe_build(value):
|
||||
return value.build() if isinstance(value, Builder) else value
|
||||
|
||||
def add_definition(self, builder):
|
||||
if self.parent:
|
||||
return self.parent.add_definition(builder)
|
||||
|
||||
self.definitions.add(builder)
|
||||
|
||||
|
||||
class ObjectBuilder(Builder):
|
||||
|
||||
def __init__(self, model_type, *args, **kwargs):
|
||||
super(ObjectBuilder, self).__init__(*args, **kwargs)
|
||||
self.properties = {}
|
||||
self.required = []
|
||||
self.type = model_type
|
||||
|
||||
self.register_type(self.type, self)
|
||||
|
||||
def add_field(self, name, field, schema):
|
||||
_apply_validators_modifications(schema, field)
|
||||
self.properties[name] = schema
|
||||
if field.required:
|
||||
self.required.append(name)
|
||||
|
||||
def build(self):
|
||||
builder = self.get_builder(self.type)
|
||||
if self.is_definition and not self.is_root:
|
||||
self.add_definition(builder)
|
||||
[self.maybe_build(value) for _, value in self.properties.items()]
|
||||
return '#/definitions/{name}'.format(name=self.type_name)
|
||||
else:
|
||||
return builder.build_definition(nullable=self.nullable)
|
||||
|
||||
@property
|
||||
def type_name(self):
|
||||
module_name = '{module}.{name}'.format(
|
||||
module=self.type.__module__,
|
||||
name=self.type.__name__,
|
||||
)
|
||||
return module_name.replace('.', '_').lower()
|
||||
|
||||
def build_definition(self, add_defintitions=True, nullable=False):
|
||||
properties = dict(
|
||||
(name, self.maybe_build(value))
|
||||
for name, value
|
||||
in self.properties.items()
|
||||
)
|
||||
schema = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': properties,
|
||||
}
|
||||
if self.required:
|
||||
schema['required'] = self.required
|
||||
if self.definitions and add_defintitions:
|
||||
schema['definitions'] = dict(
|
||||
(builder.type_name, builder.build_definition(False, False))
|
||||
for builder in self.definitions
|
||||
)
|
||||
return schema
|
||||
|
||||
@property
|
||||
def is_definition(self):
|
||||
if self.count_type(self.type) > 1:
|
||||
return True
|
||||
elif self.parent:
|
||||
return self.parent.is_definition
|
||||
else:
|
||||
return False
|
||||
|
||||
@property
|
||||
def is_root(self):
|
||||
return not bool(self.parent)
|
||||
|
||||
|
||||
def _apply_validators_modifications(field_schema, field):
|
||||
for validator in field.validators:
|
||||
try:
|
||||
validator.modify_schema(field_schema)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
|
||||
class PrimitiveBuilder(Builder):
|
||||
|
||||
def __init__(self, type, *args, **kwargs):
|
||||
super(PrimitiveBuilder, self).__init__(*args, **kwargs)
|
||||
self.type = type
|
||||
|
||||
def build(self):
|
||||
schema = {}
|
||||
if issubclass(self.type, six.string_types):
|
||||
obj_type = 'string'
|
||||
elif issubclass(self.type, bool):
|
||||
obj_type = 'boolean'
|
||||
elif issubclass(self.type, int):
|
||||
obj_type = 'number'
|
||||
elif issubclass(self.type, float):
|
||||
obj_type = 'number'
|
||||
else:
|
||||
raise errors.FieldNotSupported(
|
||||
"Can't specify value schema!", self.type
|
||||
)
|
||||
|
||||
if self.nullable:
|
||||
obj_type = [obj_type, 'null']
|
||||
schema['type'] = obj_type
|
||||
|
||||
if self.has_default:
|
||||
schema["default"] = self.default
|
||||
|
||||
return schema
|
||||
|
||||
|
||||
class ListBuilder(Builder):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(ListBuilder, self).__init__(*args, **kwargs)
|
||||
self.schemas = []
|
||||
|
||||
def add_type_schema(self, schema):
|
||||
self.schemas.append(schema)
|
||||
|
||||
def build(self):
|
||||
schema = {'type': 'array'}
|
||||
if self.nullable:
|
||||
self.add_type_schema({'type': 'null'})
|
||||
|
||||
if self.has_default:
|
||||
schema["default"] = [self.to_struct(i) for i in self.default]
|
||||
|
||||
schemas = [self.maybe_build(s) for s in self.schemas]
|
||||
if len(schemas) == 1:
|
||||
items = schemas[0]
|
||||
else:
|
||||
items = {'oneOf': schemas}
|
||||
|
||||
schema['items'] = items
|
||||
return schema
|
||||
|
||||
@property
|
||||
def is_definition(self):
|
||||
return self.parent.is_definition
|
||||
|
||||
@staticmethod
|
||||
def to_struct(item):
|
||||
from .models import Base
|
||||
if isinstance(item, Base):
|
||||
return item.to_struct()
|
||||
return item
|
||||
|
||||
|
||||
class EmbeddedBuilder(Builder):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(EmbeddedBuilder, self).__init__(*args, **kwargs)
|
||||
self.schemas = []
|
||||
|
||||
def add_type_schema(self, schema):
|
||||
self.schemas.append(schema)
|
||||
|
||||
def build(self):
|
||||
if self.nullable:
|
||||
self.add_type_schema({'type': 'null'})
|
||||
|
||||
schemas = [self.maybe_build(schema) for schema in self.schemas]
|
||||
if len(schemas) == 1:
|
||||
schema = schemas[0]
|
||||
else:
|
||||
schema = {'oneOf': schemas}
|
||||
|
||||
if self.has_default:
|
||||
# The default value of EmbeddedField is expected to be an instance
|
||||
# of a subclass of models.Base, thus have `to_struct`
|
||||
schema["default"] = self.default.to_struct()
|
||||
|
||||
return schema
|
||||
|
||||
@property
|
||||
def is_definition(self):
|
||||
return self.parent.is_definition
|
||||
21
trains_agent/backend_api/session/jsonmodels/collections.py
Normal file
21
trains_agent/backend_api/session/jsonmodels/collections.py
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
|
||||
class ModelCollection(list):
|
||||
|
||||
"""`ModelCollection` is list which validates stored values.
|
||||
|
||||
Validation is made with use of field passed to `__init__` at each point,
|
||||
when new value is assigned.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, field):
|
||||
self.field = field
|
||||
|
||||
def append(self, value):
|
||||
self.field.validate_single_value(value)
|
||||
super(ModelCollection, self).append(value)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self.field.validate_single_value(value)
|
||||
super(ModelCollection, self).__setitem__(key, value)
|
||||
15
trains_agent/backend_api/session/jsonmodels/errors.py
Normal file
15
trains_agent/backend_api/session/jsonmodels/errors.py
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
|
||||
class ValidationError(RuntimeError):
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class FieldNotFound(RuntimeError):
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class FieldNotSupported(ValueError):
|
||||
|
||||
pass
|
||||
488
trains_agent/backend_api/session/jsonmodels/fields.py
Normal file
488
trains_agent/backend_api/session/jsonmodels/fields.py
Normal file
@@ -0,0 +1,488 @@
|
||||
import datetime
|
||||
import re
|
||||
from weakref import WeakKeyDictionary
|
||||
|
||||
import six
|
||||
from dateutil.parser import parse
|
||||
|
||||
from .errors import ValidationError
|
||||
from .collections import ModelCollection
|
||||
|
||||
|
||||
# unique marker for "no default value specified". None is not good enough since
|
||||
# it is a completely valid default value.
|
||||
NotSet = object()
|
||||
|
||||
|
||||
class BaseField(object):
|
||||
|
||||
"""Base class for all fields."""
|
||||
|
||||
types = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
required=False,
|
||||
nullable=False,
|
||||
help_text=None,
|
||||
validators=None,
|
||||
default=NotSet,
|
||||
name=None):
|
||||
self.memory = WeakKeyDictionary()
|
||||
self.required = required
|
||||
self.help_text = help_text
|
||||
self.nullable = nullable
|
||||
self._assign_validators(validators)
|
||||
self.name = name
|
||||
self._validate_name()
|
||||
if default is not NotSet:
|
||||
self.validate(default)
|
||||
self._default = default
|
||||
|
||||
@property
|
||||
def has_default(self):
|
||||
return self._default is not NotSet
|
||||
|
||||
def _assign_validators(self, validators):
|
||||
if validators and not isinstance(validators, list):
|
||||
validators = [validators]
|
||||
self.validators = validators or []
|
||||
|
||||
def __set__(self, instance, value):
|
||||
self._finish_initialization(type(instance))
|
||||
value = self.parse_value(value)
|
||||
self.validate(value)
|
||||
self.memory[instance._cache_key] = value
|
||||
|
||||
def __get__(self, instance, owner=None):
|
||||
if instance is None:
|
||||
self._finish_initialization(owner)
|
||||
return self
|
||||
|
||||
self._finish_initialization(type(instance))
|
||||
|
||||
self._check_value(instance)
|
||||
return self.memory[instance._cache_key]
|
||||
|
||||
def _finish_initialization(self, owner):
|
||||
pass
|
||||
|
||||
def _check_value(self, obj):
|
||||
if obj._cache_key not in self.memory:
|
||||
self.__set__(obj, self.get_default_value())
|
||||
|
||||
def validate_for_object(self, obj):
|
||||
value = self.__get__(obj)
|
||||
self.validate(value)
|
||||
|
||||
def validate(self, value):
|
||||
self._check_types()
|
||||
self._validate_against_types(value)
|
||||
self._check_against_required(value)
|
||||
self._validate_with_custom_validators(value)
|
||||
|
||||
def _check_against_required(self, value):
|
||||
if value is None and self.required:
|
||||
raise ValidationError('Field is required!')
|
||||
|
||||
def _validate_against_types(self, value):
|
||||
if value is not None and not isinstance(value, self.types):
|
||||
raise ValidationError(
|
||||
'Value is wrong, expected type "{types}"'.format(
|
||||
types=', '.join([t.__name__ for t in self.types])
|
||||
),
|
||||
value,
|
||||
)
|
||||
|
||||
def _check_types(self):
|
||||
if self.types is None:
|
||||
raise ValidationError(
|
||||
'Field "{type}" is not usable, try '
|
||||
'different field type.'.format(type=type(self).__name__))
|
||||
|
||||
def to_struct(self, value):
|
||||
"""Cast value to Python structure."""
|
||||
return value
|
||||
|
||||
def parse_value(self, value):
|
||||
"""Parse value from primitive to desired format.
|
||||
|
||||
Each field can parse value to form it wants it to be (like string or
|
||||
int).
|
||||
|
||||
"""
|
||||
return value
|
||||
|
||||
def _validate_with_custom_validators(self, value):
|
||||
if value is None and self.nullable:
|
||||
return
|
||||
|
||||
for validator in self.validators:
|
||||
try:
|
||||
validator.validate(value)
|
||||
except AttributeError:
|
||||
validator(value)
|
||||
|
||||
def get_default_value(self):
|
||||
"""Get default value for field.
|
||||
|
||||
Each field can specify its default.
|
||||
|
||||
"""
|
||||
return self._default if self.has_default else None
|
||||
|
||||
def _validate_name(self):
|
||||
if self.name is None:
|
||||
return
|
||||
if not re.match('^[A-Za-z_](([\w\-]*)?\w+)?$', self.name):
|
||||
raise ValueError('Wrong name', self.name)
|
||||
|
||||
def structue_name(self, default):
|
||||
return self.name if self.name is not None else default
|
||||
|
||||
|
||||
class StringField(BaseField):
|
||||
|
||||
"""String field."""
|
||||
|
||||
types = six.string_types
|
||||
|
||||
|
||||
class IntField(BaseField):
|
||||
|
||||
"""Integer field."""
|
||||
|
||||
types = (int,)
|
||||
|
||||
def parse_value(self, value):
|
||||
"""Cast value to `int`, e.g. from string or long"""
|
||||
parsed = super(IntField, self).parse_value(value)
|
||||
if parsed is None:
|
||||
return parsed
|
||||
return int(parsed)
|
||||
|
||||
|
||||
class FloatField(BaseField):
|
||||
|
||||
"""Float field."""
|
||||
|
||||
types = (float, int)
|
||||
|
||||
|
||||
class BoolField(BaseField):
|
||||
|
||||
"""Bool field."""
|
||||
|
||||
types = (bool,)
|
||||
|
||||
def parse_value(self, value):
|
||||
"""Cast value to `bool`."""
|
||||
parsed = super(BoolField, self).parse_value(value)
|
||||
return bool(parsed) if parsed is not None else None
|
||||
|
||||
|
||||
class ListField(BaseField):
|
||||
|
||||
"""List field."""
|
||||
|
||||
types = (list,)
|
||||
|
||||
def __init__(self, items_types=None, *args, **kwargs):
|
||||
"""Init.
|
||||
|
||||
`ListField` is **always not required**. If you want to control number
|
||||
of items use validators.
|
||||
|
||||
"""
|
||||
self._assign_types(items_types)
|
||||
super(ListField, self).__init__(*args, **kwargs)
|
||||
self.required = False
|
||||
|
||||
def get_default_value(self):
|
||||
default = super(ListField, self).get_default_value()
|
||||
if default is None:
|
||||
return ModelCollection(self)
|
||||
return default
|
||||
|
||||
def _assign_types(self, items_types):
|
||||
if items_types:
|
||||
try:
|
||||
self.items_types = tuple(items_types)
|
||||
except TypeError:
|
||||
self.items_types = items_types,
|
||||
else:
|
||||
self.items_types = tuple()
|
||||
|
||||
types = []
|
||||
for type_ in self.items_types:
|
||||
if isinstance(type_, six.string_types):
|
||||
types.append(_LazyType(type_))
|
||||
else:
|
||||
types.append(type_)
|
||||
self.items_types = tuple(types)
|
||||
|
||||
def validate(self, value):
|
||||
super(ListField, self).validate(value)
|
||||
|
||||
if len(self.items_types) == 0:
|
||||
return
|
||||
|
||||
for item in value:
|
||||
self.validate_single_value(item)
|
||||
|
||||
def validate_single_value(self, item):
|
||||
if len(self.items_types) == 0:
|
||||
return
|
||||
|
||||
if not isinstance(item, self.items_types):
|
||||
raise ValidationError(
|
||||
'All items must be instances '
|
||||
'of "{types}", and not "{type}".'.format(
|
||||
types=', '.join([t.__name__ for t in self.items_types]),
|
||||
type=type(item).__name__,
|
||||
))
|
||||
|
||||
def parse_value(self, values):
|
||||
"""Cast value to proper collection."""
|
||||
result = self.get_default_value()
|
||||
|
||||
if not values:
|
||||
return result
|
||||
|
||||
if not isinstance(values, list):
|
||||
return values
|
||||
|
||||
return [self._cast_value(value) for value in values]
|
||||
|
||||
def _cast_value(self, value):
|
||||
if isinstance(value, self.items_types):
|
||||
return value
|
||||
else:
|
||||
if len(self.items_types) != 1:
|
||||
tpl = 'Cannot decide which type to choose from "{types}".'
|
||||
raise ValidationError(
|
||||
tpl.format(
|
||||
types=', '.join([t.__name__ for t in self.items_types])
|
||||
)
|
||||
)
|
||||
return self.items_types[0](**value)
|
||||
|
||||
def _finish_initialization(self, owner):
|
||||
super(ListField, self)._finish_initialization(owner)
|
||||
|
||||
types = []
|
||||
for type in self.items_types:
|
||||
if isinstance(type, _LazyType):
|
||||
types.append(type.evaluate(owner))
|
||||
else:
|
||||
types.append(type)
|
||||
self.items_types = tuple(types)
|
||||
|
||||
def _elem_to_struct(self, value):
|
||||
try:
|
||||
return value.to_struct()
|
||||
except AttributeError:
|
||||
return value
|
||||
|
||||
def to_struct(self, values):
|
||||
return [self._elem_to_struct(v) for v in values]
|
||||
|
||||
|
||||
class EmbeddedField(BaseField):
|
||||
|
||||
"""Field for embedded models."""
|
||||
|
||||
def __init__(self, model_types, *args, **kwargs):
|
||||
self._assign_model_types(model_types)
|
||||
super(EmbeddedField, self).__init__(*args, **kwargs)
|
||||
|
||||
def _assign_model_types(self, model_types):
|
||||
if not isinstance(model_types, (list, tuple)):
|
||||
model_types = (model_types,)
|
||||
|
||||
types = []
|
||||
for type_ in model_types:
|
||||
if isinstance(type_, six.string_types):
|
||||
types.append(_LazyType(type_))
|
||||
else:
|
||||
types.append(type_)
|
||||
self.types = tuple(types)
|
||||
|
||||
def _finish_initialization(self, owner):
|
||||
super(EmbeddedField, self)._finish_initialization(owner)
|
||||
|
||||
types = []
|
||||
for type in self.types:
|
||||
if isinstance(type, _LazyType):
|
||||
types.append(type.evaluate(owner))
|
||||
else:
|
||||
types.append(type)
|
||||
self.types = tuple(types)
|
||||
|
||||
def validate(self, value):
|
||||
super(EmbeddedField, self).validate(value)
|
||||
try:
|
||||
value.validate()
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
def parse_value(self, value):
|
||||
"""Parse value to proper model type."""
|
||||
if not isinstance(value, dict):
|
||||
return value
|
||||
|
||||
embed_type = self._get_embed_type()
|
||||
return embed_type(**value)
|
||||
|
||||
def _get_embed_type(self):
|
||||
if len(self.types) != 1:
|
||||
raise ValidationError(
|
||||
'Cannot decide which type to choose from "{types}".'.format(
|
||||
types=', '.join([t.__name__ for t in self.types])
|
||||
)
|
||||
)
|
||||
return self.types[0]
|
||||
|
||||
def to_struct(self, value):
|
||||
return value.to_struct()
|
||||
|
||||
|
||||
class _LazyType(object):
|
||||
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
|
||||
def evaluate(self, base_cls):
|
||||
module, type_name = _evaluate_path(self.path, base_cls)
|
||||
return _import(module, type_name)
|
||||
|
||||
|
||||
def _evaluate_path(relative_path, base_cls):
|
||||
base_module = base_cls.__module__
|
||||
|
||||
modules = _get_modules(relative_path, base_module)
|
||||
|
||||
type_name = modules.pop()
|
||||
module = '.'.join(modules)
|
||||
if not module:
|
||||
module = base_module
|
||||
return module, type_name
|
||||
|
||||
|
||||
def _get_modules(relative_path, base_module):
|
||||
canonical_path = relative_path.lstrip('.')
|
||||
canonical_modules = canonical_path.split('.')
|
||||
|
||||
if not relative_path.startswith('.'):
|
||||
return canonical_modules
|
||||
|
||||
parents_amount = len(relative_path) - len(canonical_path)
|
||||
parent_modules = base_module.split('.')
|
||||
parents_amount = max(0, parents_amount - 1)
|
||||
if parents_amount > len(parent_modules):
|
||||
raise ValueError("Can't evaluate path '{}'".format(relative_path))
|
||||
return parent_modules[:parents_amount * -1] + canonical_modules
|
||||
|
||||
|
||||
def _import(module_name, type_name):
|
||||
module = __import__(module_name, fromlist=[type_name])
|
||||
try:
|
||||
return getattr(module, type_name)
|
||||
except AttributeError:
|
||||
raise ValueError(
|
||||
"Can't find type '{}.{}'.".format(module_name, type_name))
|
||||
|
||||
|
||||
class TimeField(StringField):
|
||||
|
||||
"""Time field."""
|
||||
|
||||
types = (datetime.time,)
|
||||
|
||||
def __init__(self, str_format=None, *args, **kwargs):
|
||||
"""Init.
|
||||
|
||||
:param str str_format: Format to cast time to (if `None` - casting to
|
||||
ISO 8601 format).
|
||||
|
||||
"""
|
||||
self.str_format = str_format
|
||||
super(TimeField, self).__init__(*args, **kwargs)
|
||||
|
||||
def to_struct(self, value):
|
||||
"""Cast `time` object to string."""
|
||||
if self.str_format:
|
||||
return value.strftime(self.str_format)
|
||||
return value.isoformat()
|
||||
|
||||
def parse_value(self, value):
|
||||
"""Parse string into instance of `time`."""
|
||||
if value is None:
|
||||
return value
|
||||
if isinstance(value, datetime.time):
|
||||
return value
|
||||
return parse(value).timetz()
|
||||
|
||||
|
||||
class DateField(StringField):
|
||||
|
||||
"""Date field."""
|
||||
|
||||
types = (datetime.date,)
|
||||
default_format = '%Y-%m-%d'
|
||||
|
||||
def __init__(self, str_format=None, *args, **kwargs):
|
||||
"""Init.
|
||||
|
||||
:param str str_format: Format to cast date to (if `None` - casting to
|
||||
%Y-%m-%d format).
|
||||
|
||||
"""
|
||||
self.str_format = str_format
|
||||
super(DateField, self).__init__(*args, **kwargs)
|
||||
|
||||
def to_struct(self, value):
|
||||
"""Cast `date` object to string."""
|
||||
if self.str_format:
|
||||
return value.strftime(self.str_format)
|
||||
return value.strftime(self.default_format)
|
||||
|
||||
def parse_value(self, value):
|
||||
"""Parse string into instance of `date`."""
|
||||
if value is None:
|
||||
return value
|
||||
if isinstance(value, datetime.date):
|
||||
return value
|
||||
return parse(value).date()
|
||||
|
||||
|
||||
class DateTimeField(StringField):
|
||||
|
||||
"""Datetime field."""
|
||||
|
||||
types = (datetime.datetime,)
|
||||
|
||||
def __init__(self, str_format=None, *args, **kwargs):
|
||||
"""Init.
|
||||
|
||||
:param str str_format: Format to cast datetime to (if `None` - casting
|
||||
to ISO 8601 format).
|
||||
|
||||
"""
|
||||
self.str_format = str_format
|
||||
super(DateTimeField, self).__init__(*args, **kwargs)
|
||||
|
||||
def to_struct(self, value):
|
||||
"""Cast `datetime` object to string."""
|
||||
if self.str_format:
|
||||
return value.strftime(self.str_format)
|
||||
return value.isoformat()
|
||||
|
||||
def parse_value(self, value):
|
||||
"""Parse string into instance of `datetime`."""
|
||||
if isinstance(value, datetime.datetime):
|
||||
return value
|
||||
if value:
|
||||
return parse(value)
|
||||
else:
|
||||
return None
|
||||
154
trains_agent/backend_api/session/jsonmodels/models.py
Normal file
154
trains_agent/backend_api/session/jsonmodels/models.py
Normal file
@@ -0,0 +1,154 @@
|
||||
import six
|
||||
|
||||
from . import parsers, errors
|
||||
from .fields import BaseField
|
||||
from .errors import ValidationError
|
||||
|
||||
|
||||
class JsonmodelMeta(type):
|
||||
|
||||
def __new__(cls, name, bases, attributes):
|
||||
cls.validate_fields(attributes)
|
||||
return super(cls, cls).__new__(cls, name, bases, attributes)
|
||||
|
||||
@staticmethod
|
||||
def validate_fields(attributes):
|
||||
fields = {
|
||||
key: value for key, value in attributes.items()
|
||||
if isinstance(value, BaseField)
|
||||
}
|
||||
taken_names = set()
|
||||
for name, field in fields.items():
|
||||
structue_name = field.structue_name(name)
|
||||
if structue_name in taken_names:
|
||||
raise ValueError('Name taken', structue_name, name)
|
||||
taken_names.add(structue_name)
|
||||
|
||||
|
||||
class Base(six.with_metaclass(JsonmodelMeta, object)):
|
||||
|
||||
"""Base class for all models."""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self._cache_key = _CacheKey()
|
||||
self.populate(**kwargs)
|
||||
|
||||
def populate(self, **values):
|
||||
"""Populate values to fields. Skip non-existing."""
|
||||
values = values.copy()
|
||||
fields = list(self.iterate_with_name())
|
||||
for _, structure_name, field in fields:
|
||||
if structure_name in values:
|
||||
field.__set__(self, values.pop(structure_name))
|
||||
for name, _, field in fields:
|
||||
if name in values:
|
||||
field.__set__(self, values.pop(name))
|
||||
|
||||
def get_field(self, field_name):
|
||||
"""Get field associated with given attribute."""
|
||||
for attr_name, field in self:
|
||||
if field_name == attr_name:
|
||||
return field
|
||||
|
||||
raise errors.FieldNotFound('Field not found', field_name)
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate through fields and values."""
|
||||
for name, field in self.iterate_over_fields():
|
||||
yield name, field
|
||||
|
||||
def validate(self):
|
||||
"""Explicitly validate all the fields."""
|
||||
for name, field in self:
|
||||
try:
|
||||
field.validate_for_object(self)
|
||||
except ValidationError as error:
|
||||
raise ValidationError(
|
||||
"Error for field '{name}'.".format(name=name),
|
||||
error,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def iterate_over_fields(cls):
|
||||
"""Iterate through fields as `(attribute_name, field_instance)`."""
|
||||
for attr in dir(cls):
|
||||
clsattr = getattr(cls, attr)
|
||||
if isinstance(clsattr, BaseField):
|
||||
yield attr, clsattr
|
||||
|
||||
@classmethod
|
||||
def iterate_with_name(cls):
|
||||
"""Iterate over fields, but also give `structure_name`.
|
||||
|
||||
Format is `(attribute_name, structue_name, field_instance)`.
|
||||
Structure name is name under which value is seen in structure and
|
||||
schema (in primitives) and only there.
|
||||
"""
|
||||
for attr_name, field in cls.iterate_over_fields():
|
||||
structure_name = field.structue_name(attr_name)
|
||||
yield attr_name, structure_name, field
|
||||
|
||||
def to_struct(self):
|
||||
"""Cast model to Python structure."""
|
||||
return parsers.to_struct(self)
|
||||
|
||||
@classmethod
|
||||
def to_json_schema(cls):
|
||||
"""Generate JSON schema for model."""
|
||||
return parsers.to_json_schema(cls)
|
||||
|
||||
def __repr__(self):
|
||||
attrs = {}
|
||||
for name, _ in self:
|
||||
try:
|
||||
attr = getattr(self, name)
|
||||
if attr is not None:
|
||||
attrs[name] = repr(attr)
|
||||
except ValidationError:
|
||||
pass
|
||||
|
||||
return '{class_name}({fields})'.format(
|
||||
class_name=self.__class__.__name__,
|
||||
fields=', '.join(
|
||||
'{0[0]}={0[1]}'.format(x) for x in sorted(attrs.items())
|
||||
),
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return '{name} object'.format(name=self.__class__.__name__)
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
try:
|
||||
return super(Base, self).__setattr__(name, value)
|
||||
except ValidationError as error:
|
||||
raise ValidationError(
|
||||
"Error for field '{name}'.".format(name=name),
|
||||
error
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
if type(other) is not type(self):
|
||||
return False
|
||||
|
||||
for name, _ in self.iterate_over_fields():
|
||||
try:
|
||||
our = getattr(self, name)
|
||||
except errors.ValidationError:
|
||||
our = None
|
||||
|
||||
try:
|
||||
their = getattr(other, name)
|
||||
except errors.ValidationError:
|
||||
their = None
|
||||
|
||||
if our != their:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
|
||||
class _CacheKey(object):
|
||||
"""Object to identify model in memory."""
|
||||
106
trains_agent/backend_api/session/jsonmodels/parsers.py
Normal file
106
trains_agent/backend_api/session/jsonmodels/parsers.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""Parsers to change model structure into different ones."""
|
||||
import inspect
|
||||
|
||||
from . import fields, builders, errors
|
||||
|
||||
|
||||
def to_struct(model):
|
||||
"""Cast instance of model to python structure.
|
||||
|
||||
:param model: Model to be casted.
|
||||
:rtype: ``dict``
|
||||
|
||||
"""
|
||||
model.validate()
|
||||
|
||||
resp = {}
|
||||
for _, name, field in model.iterate_with_name():
|
||||
value = field.__get__(model)
|
||||
if value is None:
|
||||
continue
|
||||
|
||||
value = field.to_struct(value)
|
||||
resp[name] = value
|
||||
return resp
|
||||
|
||||
|
||||
def to_json_schema(cls):
|
||||
"""Generate JSON schema for given class.
|
||||
|
||||
:param cls: Class to be casted.
|
||||
:rtype: ``dict``
|
||||
|
||||
"""
|
||||
builder = build_json_schema(cls)
|
||||
return builder.build()
|
||||
|
||||
|
||||
def build_json_schema(value, parent_builder=None):
|
||||
from .models import Base
|
||||
|
||||
cls = value if inspect.isclass(value) else value.__class__
|
||||
if issubclass(cls, Base):
|
||||
return build_json_schema_object(cls, parent_builder)
|
||||
else:
|
||||
return build_json_schema_primitive(cls, parent_builder)
|
||||
|
||||
|
||||
def build_json_schema_object(cls, parent_builder=None):
|
||||
builder = builders.ObjectBuilder(cls, parent_builder)
|
||||
if builder.count_type(builder.type) > 1:
|
||||
return builder
|
||||
for _, name, field in cls.iterate_with_name():
|
||||
if isinstance(field, fields.EmbeddedField):
|
||||
builder.add_field(name, field, _parse_embedded(field, builder))
|
||||
elif isinstance(field, fields.ListField):
|
||||
builder.add_field(name, field, _parse_list(field, builder))
|
||||
else:
|
||||
builder.add_field(
|
||||
name, field, _create_primitive_field_schema(field))
|
||||
return builder
|
||||
|
||||
|
||||
def _parse_list(field, parent_builder):
|
||||
builder = builders.ListBuilder(
|
||||
parent_builder, field.nullable, default=field._default)
|
||||
for type in field.items_types:
|
||||
builder.add_type_schema(build_json_schema(type, builder))
|
||||
return builder
|
||||
|
||||
|
||||
def _parse_embedded(field, parent_builder):
|
||||
builder = builders.EmbeddedBuilder(
|
||||
parent_builder, field.nullable, default=field._default)
|
||||
for type in field.types:
|
||||
builder.add_type_schema(build_json_schema(type, builder))
|
||||
return builder
|
||||
|
||||
|
||||
def build_json_schema_primitive(cls, parent_builder):
|
||||
builder = builders.PrimitiveBuilder(cls, parent_builder)
|
||||
return builder
|
||||
|
||||
|
||||
def _create_primitive_field_schema(field):
|
||||
if isinstance(field, fields.StringField):
|
||||
obj_type = 'string'
|
||||
elif isinstance(field, fields.IntField):
|
||||
obj_type = 'number'
|
||||
elif isinstance(field, fields.FloatField):
|
||||
obj_type = 'float'
|
||||
elif isinstance(field, fields.BoolField):
|
||||
obj_type = 'boolean'
|
||||
else:
|
||||
raise errors.FieldNotSupported(
|
||||
'Field {field} is not supported!'.format(
|
||||
field=type(field).__class__.__name__))
|
||||
|
||||
if field.nullable:
|
||||
obj_type = [obj_type, 'null']
|
||||
|
||||
schema = {'type': obj_type}
|
||||
|
||||
if field.has_default:
|
||||
schema["default"] = field._default
|
||||
|
||||
return schema
|
||||
156
trains_agent/backend_api/session/jsonmodels/utilities.py
Normal file
156
trains_agent/backend_api/session/jsonmodels/utilities.py
Normal file
@@ -0,0 +1,156 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import six
|
||||
import re
|
||||
from collections import namedtuple
|
||||
|
||||
SCALAR_TYPES = tuple(list(six.string_types) + [int, float, bool])
|
||||
|
||||
ECMA_TO_PYTHON_FLAGS = {
|
||||
'i': re.I,
|
||||
'm': re.M,
|
||||
}
|
||||
|
||||
PYTHON_TO_ECMA_FLAGS = dict(
|
||||
(value, key) for key, value in ECMA_TO_PYTHON_FLAGS.items()
|
||||
)
|
||||
|
||||
PythonRegex = namedtuple('PythonRegex', ['regex', 'flags'])
|
||||
|
||||
|
||||
def _normalize_string_type(value):
|
||||
if isinstance(value, six.string_types):
|
||||
return six.text_type(value)
|
||||
else:
|
||||
return value
|
||||
|
||||
|
||||
def _compare_dicts(one, two):
|
||||
if len(one) != len(two):
|
||||
return False
|
||||
|
||||
for key, value in one.items():
|
||||
if key not in one or key not in two:
|
||||
return False
|
||||
|
||||
if not compare_schemas(one[key], two[key]):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _compare_lists(one, two):
|
||||
if len(one) != len(two):
|
||||
return False
|
||||
|
||||
they_match = False
|
||||
for first_item in one:
|
||||
for second_item in two:
|
||||
if they_match:
|
||||
continue
|
||||
they_match = compare_schemas(first_item, second_item)
|
||||
return they_match
|
||||
|
||||
|
||||
def _assert_same_types(one, two):
|
||||
if not isinstance(one, type(two)) or not isinstance(two, type(one)):
|
||||
raise RuntimeError('Types mismatch! "{type1}" and "{type2}".'.format(
|
||||
type1=type(one).__name__, type2=type(two).__name__))
|
||||
|
||||
|
||||
def compare_schemas(one, two):
|
||||
"""Compare two structures that represents JSON schemas.
|
||||
|
||||
For comparison you can't use normal comparison, because in JSON schema
|
||||
lists DO NOT keep order (and Python lists do), so this must be taken into
|
||||
account during comparison.
|
||||
|
||||
Note this wont check all configurations, only first one that seems to
|
||||
match, which can lead to wrong results.
|
||||
|
||||
:param one: First schema to compare.
|
||||
:param two: Second schema to compare.
|
||||
:rtype: `bool`
|
||||
|
||||
"""
|
||||
one = _normalize_string_type(one)
|
||||
two = _normalize_string_type(two)
|
||||
|
||||
_assert_same_types(one, two)
|
||||
|
||||
if isinstance(one, list):
|
||||
return _compare_lists(one, two)
|
||||
elif isinstance(one, dict):
|
||||
return _compare_dicts(one, two)
|
||||
elif isinstance(one, SCALAR_TYPES):
|
||||
return one == two
|
||||
elif one is None:
|
||||
return one is two
|
||||
else:
|
||||
raise RuntimeError('Not allowed type "{type}"'.format(
|
||||
type=type(one).__name__))
|
||||
|
||||
|
||||
def is_ecma_regex(regex):
|
||||
"""Check if given regex is of type ECMA 262 or not.
|
||||
|
||||
:rtype: bool
|
||||
|
||||
"""
|
||||
parts = regex.split('/')
|
||||
|
||||
if len(parts) == 1:
|
||||
return False
|
||||
|
||||
if len(parts) < 3:
|
||||
raise ValueError('Given regex isn\'t ECMA regex nor Python regex.')
|
||||
parts.pop()
|
||||
parts.append('')
|
||||
|
||||
raw_regex = '/'.join(parts)
|
||||
if raw_regex.startswith('/') and raw_regex.endswith('/'):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def convert_ecma_regex_to_python(value):
|
||||
"""Convert ECMA 262 regex to Python tuple with regex and flags.
|
||||
|
||||
If given value is already Python regex it will be returned unchanged.
|
||||
|
||||
:param string value: ECMA regex.
|
||||
:return: 2-tuple with `regex` and `flags`
|
||||
:rtype: namedtuple
|
||||
|
||||
"""
|
||||
if not is_ecma_regex(value):
|
||||
return PythonRegex(value, [])
|
||||
|
||||
parts = value.split('/')
|
||||
flags = parts.pop()
|
||||
|
||||
try:
|
||||
result_flags = [ECMA_TO_PYTHON_FLAGS[f] for f in flags]
|
||||
except KeyError:
|
||||
raise ValueError('Wrong flags "{}".'.format(flags))
|
||||
|
||||
return PythonRegex('/'.join(parts[1:]), result_flags)
|
||||
|
||||
|
||||
def convert_python_regex_to_ecma(value, flags=[]):
|
||||
"""Convert Python regex to ECMA 262 regex.
|
||||
|
||||
If given value is already ECMA regex it will be returned unchanged.
|
||||
|
||||
:param string value: Python regex.
|
||||
:param list flags: List of flags (allowed flags: `re.I`, `re.M`)
|
||||
:return: ECMA 262 regex
|
||||
:rtype: str
|
||||
|
||||
"""
|
||||
if is_ecma_regex(value):
|
||||
return value
|
||||
|
||||
result_flags = [PYTHON_TO_ECMA_FLAGS[f] for f in flags]
|
||||
result_flags = ''.join(result_flags)
|
||||
|
||||
return '/{value}/{flags}'.format(value=value, flags=result_flags)
|
||||
202
trains_agent/backend_api/session/jsonmodels/validators.py
Normal file
202
trains_agent/backend_api/session/jsonmodels/validators.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""Predefined validators."""
|
||||
import re
|
||||
|
||||
from six.moves import reduce
|
||||
|
||||
from .errors import ValidationError
|
||||
from . import utilities
|
||||
|
||||
|
||||
class Min(object):
|
||||
|
||||
"""Validator for minimum value."""
|
||||
|
||||
def __init__(self, minimum_value, exclusive=False):
|
||||
"""Init.
|
||||
|
||||
:param minimum_value: Minimum value for validator.
|
||||
:param bool exclusive: If `True`, then validated value must be strongly
|
||||
lower than given threshold.
|
||||
|
||||
"""
|
||||
self.minimum_value = minimum_value
|
||||
self.exclusive = exclusive
|
||||
|
||||
def validate(self, value):
|
||||
"""Validate value."""
|
||||
if self.exclusive:
|
||||
if value <= self.minimum_value:
|
||||
tpl = "'{value}' is lower or equal than minimum ('{min}')."
|
||||
raise ValidationError(
|
||||
tpl.format(value=value, min=self.minimum_value))
|
||||
else:
|
||||
if value < self.minimum_value:
|
||||
raise ValidationError(
|
||||
"'{value}' is lower than minimum ('{min}').".format(
|
||||
value=value, min=self.minimum_value))
|
||||
|
||||
def modify_schema(self, field_schema):
|
||||
"""Modify field schema."""
|
||||
field_schema['minimum'] = self.minimum_value
|
||||
if self.exclusive:
|
||||
field_schema['exclusiveMinimum'] = True
|
||||
|
||||
|
||||
class Max(object):
|
||||
|
||||
"""Validator for maximum value."""
|
||||
|
||||
def __init__(self, maximum_value, exclusive=False):
|
||||
"""Init.
|
||||
|
||||
:param maximum_value: Maximum value for validator.
|
||||
:param bool exclusive: If `True`, then validated value must be strongly
|
||||
bigger than given threshold.
|
||||
|
||||
"""
|
||||
self.maximum_value = maximum_value
|
||||
self.exclusive = exclusive
|
||||
|
||||
def validate(self, value):
|
||||
"""Validate value."""
|
||||
if self.exclusive:
|
||||
if value >= self.maximum_value:
|
||||
tpl = "'{val}' is bigger or equal than maximum ('{max}')."
|
||||
raise ValidationError(
|
||||
tpl.format(val=value, max=self.maximum_value))
|
||||
else:
|
||||
if value > self.maximum_value:
|
||||
raise ValidationError(
|
||||
"'{value}' is bigger than maximum ('{max}').".format(
|
||||
value=value, max=self.maximum_value))
|
||||
|
||||
def modify_schema(self, field_schema):
|
||||
"""Modify field schema."""
|
||||
field_schema['maximum'] = self.maximum_value
|
||||
if self.exclusive:
|
||||
field_schema['exclusiveMaximum'] = True
|
||||
|
||||
|
||||
class Regex(object):
|
||||
|
||||
"""Validator for regular expressions."""
|
||||
|
||||
FLAGS = {
|
||||
'ignorecase': re.I,
|
||||
'multiline': re.M,
|
||||
}
|
||||
|
||||
def __init__(self, pattern, **flags):
|
||||
"""Init.
|
||||
|
||||
Note, that if given pattern is ECMA regex, given flags will be
|
||||
**completely ignored** and taken from given regex.
|
||||
|
||||
|
||||
:param string pattern: Pattern of regex.
|
||||
:param bool flags: Flags used for the regex matching.
|
||||
Allowed flag names are in the `FLAGS` attribute. The flag value
|
||||
does not matter as long as it evaluates to True.
|
||||
Flags with False values will be ignored.
|
||||
Invalid flags will be ignored.
|
||||
|
||||
"""
|
||||
if utilities.is_ecma_regex(pattern):
|
||||
result = utilities.convert_ecma_regex_to_python(pattern)
|
||||
self.pattern, self.flags = result
|
||||
else:
|
||||
self.pattern = pattern
|
||||
self.flags = [self.FLAGS[key] for key, value in flags.items()
|
||||
if key in self.FLAGS and value]
|
||||
|
||||
def validate(self, value):
|
||||
"""Validate value."""
|
||||
flags = self._calculate_flags()
|
||||
|
||||
try:
|
||||
result = re.search(self.pattern, value, flags)
|
||||
except TypeError as te:
|
||||
raise ValidationError(*te.args)
|
||||
|
||||
if not result:
|
||||
raise ValidationError(
|
||||
'Value "{value}" did not match pattern "{pattern}".'.format(
|
||||
value=value, pattern=self.pattern
|
||||
))
|
||||
|
||||
def _calculate_flags(self):
|
||||
return reduce(lambda x, y: x | y, self.flags, 0)
|
||||
|
||||
def modify_schema(self, field_schema):
|
||||
"""Modify field schema."""
|
||||
field_schema['pattern'] = utilities.convert_python_regex_to_ecma(
|
||||
self.pattern, self.flags)
|
||||
|
||||
|
||||
class Length(object):
|
||||
|
||||
"""Validator for length."""
|
||||
|
||||
def __init__(self, minimum_value=None, maximum_value=None):
|
||||
"""Init.
|
||||
|
||||
Note that if no `minimum_value` neither `maximum_value` will be
|
||||
specified, `ValueError` will be raised.
|
||||
|
||||
:param int minimum_value: Minimum value (optional).
|
||||
:param int maximum_value: Maximum value (optional).
|
||||
|
||||
"""
|
||||
if minimum_value is None and maximum_value is None:
|
||||
raise ValueError(
|
||||
"Either 'minimum_value' or 'maximum_value' must be specified.")
|
||||
|
||||
self.minimum_value = minimum_value
|
||||
self.maximum_value = maximum_value
|
||||
|
||||
def validate(self, value):
|
||||
"""Validate value."""
|
||||
len_ = len(value)
|
||||
|
||||
if self.minimum_value is not None and len_ < self.minimum_value:
|
||||
tpl = "Value '{val}' length is lower than allowed minimum '{min}'."
|
||||
raise ValidationError(tpl.format(
|
||||
val=value, min=self.minimum_value
|
||||
))
|
||||
|
||||
if self.maximum_value is not None and len_ > self.maximum_value:
|
||||
raise ValidationError(
|
||||
"Value '{val}' length is bigger than "
|
||||
"allowed maximum '{max}'.".format(
|
||||
val=value,
|
||||
max=self.maximum_value,
|
||||
))
|
||||
|
||||
def modify_schema(self, field_schema):
|
||||
"""Modify field schema."""
|
||||
if self.minimum_value:
|
||||
field_schema['minLength'] = self.minimum_value
|
||||
|
||||
if self.maximum_value:
|
||||
field_schema['maxLength'] = self.maximum_value
|
||||
|
||||
|
||||
class Enum(object):
|
||||
|
||||
"""Validator for enums."""
|
||||
|
||||
def __init__(self, *choices):
|
||||
"""Init.
|
||||
|
||||
:param [] choices: Valid choices for the field.
|
||||
"""
|
||||
|
||||
self.choices = list(choices)
|
||||
|
||||
def validate(self, value):
|
||||
if value not in self.choices:
|
||||
tpl = "Value '{val}' is not a valid choice."
|
||||
raise ValidationError(tpl.format(val=value))
|
||||
|
||||
def modify_schema(self, field_schema):
|
||||
field_schema['enum'] = self.choices
|
||||
@@ -1,10 +1,8 @@
|
||||
import requests
|
||||
|
||||
import six
|
||||
import jsonmodels.models
|
||||
import jsonmodels.fields
|
||||
import jsonmodels.errors
|
||||
|
||||
from . import jsonmodels
|
||||
from .apimodel import ApiModel
|
||||
from .datamodel import NonStrictDataModelMixin
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ from .request import Request, BatchRequest
|
||||
from .token_manager import TokenManager
|
||||
from ..config import load
|
||||
from ..utils import get_http_session_with_retry, urllib_log_warning_setup
|
||||
from ...backend_config.environment import backward_compatibility_support
|
||||
from ...version import __version__
|
||||
|
||||
|
||||
@@ -84,8 +85,11 @@ class Session(TokenManager):
|
||||
initialize_logging=True,
|
||||
client=None,
|
||||
config=None,
|
||||
http_retries_config=None,
|
||||
**kwargs
|
||||
):
|
||||
# add backward compatibility support for old environment variables
|
||||
backward_compatibility_support()
|
||||
|
||||
if config is not None:
|
||||
self.config = config
|
||||
@@ -126,7 +130,7 @@ class Session(TokenManager):
|
||||
raise ValueError("host is required in init or config")
|
||||
|
||||
self.__host = host.strip("/")
|
||||
http_retries_config = self.config.get(
|
||||
http_retries_config = http_retries_config or self.config.get(
|
||||
"api.http.retries", ConfigTree()
|
||||
).as_plain_ordered_dict()
|
||||
http_retries_config["status_forcelist"] = self._retry_codes
|
||||
|
||||
@@ -23,3 +23,31 @@ class EnvEntry(Entry):
|
||||
|
||||
def error(self, message):
|
||||
print("Environment configuration: {}".format(message))
|
||||
|
||||
|
||||
def backward_compatibility_support():
|
||||
from ..definitions import ENVIRONMENT_CONFIG, ENVIRONMENT_SDK_PARAMS, ENVIRONMENT_BACKWARD_COMPATIBLE
|
||||
if not ENVIRONMENT_BACKWARD_COMPATIBLE.get():
|
||||
return
|
||||
|
||||
# Add ALG_ prefix on every TRAINS_ os environment we support
|
||||
for k, v in ENVIRONMENT_CONFIG.items():
|
||||
try:
|
||||
trains_vars = [var for var in v.vars if var.startswith('TRAINS_')]
|
||||
if not trains_vars:
|
||||
continue
|
||||
alg_var = trains_vars[0].replace('TRAINS_', 'ALG_', 1)
|
||||
if alg_var not in v.vars:
|
||||
v.vars = tuple(list(v.vars) + [alg_var])
|
||||
except:
|
||||
continue
|
||||
for k, v in ENVIRONMENT_SDK_PARAMS.items():
|
||||
try:
|
||||
trains_vars = [var for var in v if var.startswith('TRAINS_')]
|
||||
if not trains_vars:
|
||||
continue
|
||||
alg_var = trains_vars[0].replace('TRAINS_', 'ALG_', 1)
|
||||
if alg_var not in v:
|
||||
ENVIRONMENT_SDK_PARAMS[k] = tuple(list(v) + [alg_var])
|
||||
except:
|
||||
continue
|
||||
|
||||
@@ -94,9 +94,20 @@ class ServiceCommandSection(BaseCommandSection):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(ServiceCommandSection, self).__init__()
|
||||
kwargs = self._verify_command_states(kwargs)
|
||||
self._session = self._get_session(*args, **kwargs)
|
||||
self._list_formatter = ListFormatter(self.service)
|
||||
|
||||
@classmethod
|
||||
def _verify_command_states(cls, kwargs):
|
||||
"""
|
||||
Conform and enforce command argument
|
||||
This is where you can automatically turn on/off switches based on different states.
|
||||
:param kwargs:
|
||||
:return: kwargs
|
||||
"""
|
||||
return kwargs
|
||||
|
||||
@staticmethod
|
||||
def _get_session(*args, **kwargs):
|
||||
return Session(*args, **kwargs)
|
||||
|
||||
@@ -44,7 +44,7 @@ def main():
|
||||
sentinel = ''
|
||||
parse_input = '\n'.join(iter(input, sentinel))
|
||||
credentials = None
|
||||
api_host = None
|
||||
api_server = None
|
||||
web_server = None
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
@@ -52,11 +52,11 @@ def main():
|
||||
if parsed:
|
||||
# Take the credentials in raw form or from api section
|
||||
credentials = get_parsed_field(parsed, ["credentials"])
|
||||
api_host = get_parsed_field(parsed, ["api_server", "host"])
|
||||
api_server = get_parsed_field(parsed, ["api_server", "host"])
|
||||
web_server = get_parsed_field(parsed, ["web_server"])
|
||||
except Exception:
|
||||
credentials = credentials or None
|
||||
api_host = api_host or None
|
||||
api_server = api_server or None
|
||||
web_server = web_server or None
|
||||
|
||||
while not credentials or set(credentials) != {"access_key", "secret_key"}:
|
||||
@@ -65,63 +65,25 @@ def main():
|
||||
|
||||
print('Detected credentials key=\"{}\" secret=\"{}\"'.format(credentials['access_key'],
|
||||
credentials['secret_key'][0:4] + "***"))
|
||||
if api_host:
|
||||
api_host = input_url('API Host', api_host)
|
||||
web_input = True
|
||||
if web_server:
|
||||
host = input_url('WEB Host', web_server)
|
||||
elif api_server:
|
||||
web_input = False
|
||||
host = input_url('API Host', api_server)
|
||||
else:
|
||||
print(host_description)
|
||||
api_host = input_url('API Host', '')
|
||||
parsed_host = verify_url(api_host)
|
||||
host = input_url('WEB Host', '')
|
||||
|
||||
if parsed_host.netloc.startswith('demoapp.'):
|
||||
# this is our demo server
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapp.', 'demoapi.', 1) + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapp.', 'demofiles.', 1) + parsed_host.path
|
||||
elif parsed_host.netloc.startswith('app.'):
|
||||
# this is our application server
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('app.', 'api.', 1) + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('app.', 'files.', 1) + parsed_host.path
|
||||
elif parsed_host.netloc.startswith('demoapi.'):
|
||||
print('{} is the api server, we need the web server. Replacing \'demoapi.\' with \'demoapp.\''.format(
|
||||
parsed_host.netloc))
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapi.', 'demoapp.', 1) + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapi.', 'demofiles.', 1) + parsed_host.path
|
||||
elif parsed_host.netloc.startswith('api.'):
|
||||
print('{} is the api server, we need the web server. Replacing \'api.\' with \'app.\''.format(
|
||||
parsed_host.netloc))
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('api.', 'app.', 1) + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('api.', 'files.', 1) + parsed_host.path
|
||||
elif parsed_host.port == 8008:
|
||||
print('Port 8008 is the api port. Replacing 8080 with 8008 for Web application')
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8008', ':8080', 1) + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8008', ':8081', 1) + parsed_host.path
|
||||
elif parsed_host.port == 8080:
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8080', ':8008', 1) + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8080', ':8081', 1) + parsed_host.path
|
||||
parsed_host = verify_url(host)
|
||||
api_host, files_host, web_host = parse_host(parsed_host, allow_input=True)
|
||||
|
||||
# on of these two we configured
|
||||
if not web_input:
|
||||
web_host = input_url('Web Application Host', web_host)
|
||||
else:
|
||||
api_host = ''
|
||||
web_host = ''
|
||||
files_host = ''
|
||||
if not parsed_host.port:
|
||||
print('Host port not detected, do you wish to use the default 8080 port n/[y]? ', end='')
|
||||
replace_port = input().lower()
|
||||
if not replace_port or replace_port == 'y' or replace_port == 'yes':
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + ':8008' + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc + ':8080' + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc + ':8081' + parsed_host.path
|
||||
elif not replace_port or replace_port.lower() == 'n' or replace_port.lower() == 'no':
|
||||
web_host = input_host_port("Web", parsed_host)
|
||||
api_host = input_host_port("API", parsed_host)
|
||||
files_host = input_host_port("Files", parsed_host)
|
||||
if not api_host:
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
api_host = input_url('API Host', api_host)
|
||||
|
||||
web_host = input_url('Web Application Host', web_server if web_server else web_host)
|
||||
files_host = input_url('File Store Host', files_host)
|
||||
|
||||
print('\nTRAINS Hosts configuration:\nWeb App: {}\nAPI: {}\nFile Store: {}\n'.format(
|
||||
@@ -150,6 +112,18 @@ def main():
|
||||
git_user = None
|
||||
git_pass = None
|
||||
|
||||
# get extra-index-url for pip installations
|
||||
extra_index_urls = []
|
||||
print('\nEnter additional artifact repository (extra-index-url) to use when installing python packages '
|
||||
'(leave blank if not required):', end='')
|
||||
index_url = input().strip()
|
||||
while index_url:
|
||||
extra_index_urls.append(index_url)
|
||||
print('Another artifact repository? (enter another url or leave blank if done):', end='')
|
||||
index_url = input().strip()
|
||||
if len(extra_index_urls):
|
||||
print("The following artifact repositories will be added:\n\t- {}".format("\n\t- ".join(extra_index_urls)))
|
||||
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
conf_folder = Path(__file__).parent.absolute() / '..' / 'backend_api' / 'config' / 'default'
|
||||
@@ -183,6 +157,10 @@ def main():
|
||||
'agent.git_pass=\"{}\"\n' \
|
||||
'\n'.format(git_user or '', git_pass or '')
|
||||
f.write(git_credentials)
|
||||
extra_index_str = '# extra_index_url: ["https://allegroai.jfrog.io/trainsai/api/pypi/public/simple"]\n' \
|
||||
'agent.package_manager.extra_index_url= ' \
|
||||
'[\n{}\n]\n\n'.format("\n".join(map("\"{}\"".format, extra_index_urls)))
|
||||
f.write(extra_index_str)
|
||||
f.write(default_conf)
|
||||
except Exception:
|
||||
print('Error! Could not write configuration file at: {}'.format(str(conf_file)))
|
||||
@@ -192,13 +170,71 @@ def main():
|
||||
print('TRAINS-AGENT setup completed successfully.')
|
||||
|
||||
|
||||
def parse_host(parsed_host, allow_input=True):
|
||||
if parsed_host.netloc.startswith('demoapp.'):
|
||||
# this is our demo server
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapp.', 'demoapi.', 1) + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapp.', 'demofiles.',
|
||||
1) + parsed_host.path
|
||||
elif parsed_host.netloc.startswith('app.'):
|
||||
# this is our application server
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('app.', 'api.', 1) + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('app.', 'files.', 1) + parsed_host.path
|
||||
elif parsed_host.netloc.startswith('demoapi.'):
|
||||
print('{} is the api server, we need the web server. Replacing \'demoapi.\' with \'demoapp.\''.format(
|
||||
parsed_host.netloc))
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapi.', 'demoapp.', 1) + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('demoapi.', 'demofiles.',
|
||||
1) + parsed_host.path
|
||||
elif parsed_host.netloc.startswith('api.'):
|
||||
print('{} is the api server, we need the web server. Replacing \'api.\' with \'app.\''.format(
|
||||
parsed_host.netloc))
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('api.', 'app.', 1) + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace('api.', 'files.', 1) + parsed_host.path
|
||||
elif parsed_host.port == 8008:
|
||||
print('Port 8008 is the api port. Replacing 8080 with 8008 for Web application')
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8008', ':8080', 1) + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8008', ':8081', 1) + parsed_host.path
|
||||
elif parsed_host.port == 8080:
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8080', ':8008', 1) + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc.replace(':8080', ':8081', 1) + parsed_host.path
|
||||
elif allow_input:
|
||||
api_host = ''
|
||||
web_host = ''
|
||||
files_host = ''
|
||||
if not parsed_host.port:
|
||||
print('Host port not detected, do you wish to use the default 8080 port n/[y]? ', end='')
|
||||
replace_port = input().lower()
|
||||
if not replace_port or replace_port == 'y' or replace_port == 'yes':
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + ':8008' + parsed_host.path
|
||||
web_host = parsed_host.scheme + "://" + parsed_host.netloc + ':8080' + parsed_host.path
|
||||
files_host = parsed_host.scheme + "://" + parsed_host.netloc + ':8081' + parsed_host.path
|
||||
elif not replace_port or replace_port.lower() == 'n' or replace_port.lower() == 'no':
|
||||
web_host = input_host_port("Web", parsed_host)
|
||||
api_host = input_host_port("API", parsed_host)
|
||||
files_host = input_host_port("Files", parsed_host)
|
||||
if not api_host:
|
||||
api_host = parsed_host.scheme + "://" + parsed_host.netloc + parsed_host.path
|
||||
else:
|
||||
raise ValueError("Could not parse host name")
|
||||
|
||||
return api_host, files_host, web_host
|
||||
|
||||
|
||||
def verify_credentials(api_host, credentials):
|
||||
"""check if the credentials are valid"""
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
print('Verifying credentials ...')
|
||||
if api_host:
|
||||
Session(api_key=credentials['access_key'], secret_key=credentials['secret_key'], host=api_host)
|
||||
Session(api_key=credentials['access_key'], secret_key=credentials['secret_key'], host=api_host,
|
||||
http_retries_config={"total": 2})
|
||||
print('Credentials verified!')
|
||||
return True
|
||||
else:
|
||||
@@ -240,7 +276,7 @@ def read_manual_credentials():
|
||||
|
||||
def input_url(host_type, host=None):
|
||||
while True:
|
||||
print('{} configured to: [{}] '.format(host_type, host), end='')
|
||||
print('{} configured to: {}'.format(host_type, '[{}] '.format(host) if host else ''), end='')
|
||||
parse_input = input()
|
||||
if host and (not parse_input or parse_input.lower() == 'yes' or parse_input.lower() == 'y'):
|
||||
break
|
||||
@@ -254,11 +290,12 @@ def input_url(host_type, host=None):
|
||||
def input_host_port(host_type, parsed_host):
|
||||
print('Enter port for {} host '.format(host_type), end='')
|
||||
replace_port = input().lower()
|
||||
return parsed_host.scheme + "://" + parsed_host.netloc + (':{}'.format(replace_port) if replace_port else '') + \
|
||||
parsed_host.path
|
||||
return parsed_host.scheme + "://" + parsed_host.netloc + (
|
||||
':{}'.format(replace_port) if replace_port else '') + parsed_host.path
|
||||
|
||||
|
||||
def verify_url(parse_input):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if not parse_input.startswith('http://') and not parse_input.startswith('https://'):
|
||||
# if we have a specific port, use http prefix, otherwise assume https
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -55,35 +55,41 @@ class EnvironmentConfig(object):
|
||||
|
||||
|
||||
ENVIRONMENT_CONFIG = {
|
||||
"api.api_server": EnvironmentConfig("TRAINS_API_HOST", "ALG_API_HOST"),
|
||||
"api.api_server": EnvironmentConfig("TRAINS_API_HOST", ),
|
||||
"api.credentials.access_key": EnvironmentConfig(
|
||||
"TRAINS_API_ACCESS_KEY", "ALG_API_ACCESS_KEY"
|
||||
"TRAINS_API_ACCESS_KEY",
|
||||
),
|
||||
"api.credentials.secret_key": EnvironmentConfig(
|
||||
"TRAINS_API_SECRET_KEY", "ALG_API_SECRET_KEY"
|
||||
"TRAINS_API_SECRET_KEY",
|
||||
),
|
||||
"agent.worker_name": EnvironmentConfig("TRAINS_WORKER_NAME", "ALG_WORKER_NAME"),
|
||||
"agent.worker_id": EnvironmentConfig("TRAINS_WORKER_ID", "ALG_WORKER_ID"),
|
||||
"agent.worker_name": EnvironmentConfig("TRAINS_WORKER_NAME", ),
|
||||
"agent.worker_id": EnvironmentConfig("TRAINS_WORKER_ID", ),
|
||||
"agent.cuda_version": EnvironmentConfig(
|
||||
"TRAINS_CUDA_VERSION", "ALG_CUDA_VERSION", "CUDA_VERSION"
|
||||
"TRAINS_CUDA_VERSION", "CUDA_VERSION"
|
||||
),
|
||||
"agent.cudnn_version": EnvironmentConfig(
|
||||
"TRAINS_CUDNN_VERSION", "ALG_CUDNN_VERSION", "CUDNN_VERSION"
|
||||
"TRAINS_CUDNN_VERSION", "CUDNN_VERSION"
|
||||
),
|
||||
"agent.cpu_only": EnvironmentConfig(
|
||||
"TRAINS_CPU_ONLY", "ALG_CPU_ONLY", "CPU_ONLY", type=bool
|
||||
"TRAINS_CPU_ONLY", "CPU_ONLY", type=bool
|
||||
),
|
||||
"sdk.aws.s3.key": EnvironmentConfig("AWS_ACCESS_KEY_ID"),
|
||||
"sdk.aws.s3.secret": EnvironmentConfig("AWS_SECRET_ACCESS_KEY"),
|
||||
"sdk.aws.s3.region": EnvironmentConfig("AWS_DEFAULT_REGION"),
|
||||
"sdk.azure.storage.containers.0": {'account_name': EnvironmentConfig("AZURE_STORAGE_ACCOUNT"),
|
||||
'account_key': EnvironmentConfig("AZURE_STORAGE_KEY")},
|
||||
"sdk.google.storage.credentials_json": EnvironmentConfig("GOOGLE_APPLICATION_CREDENTIALS"),
|
||||
}
|
||||
|
||||
CONFIG_FILE_ENV = EnvironmentConfig("ALG_CONFIG_FILE")
|
||||
|
||||
ENVIRONMENT_SDK_PARAMS = {
|
||||
"task_id": ("TRAINS_TASK_ID", "ALG_TASK_ID"),
|
||||
"config_file": ("TRAINS_CONFIG_FILE", "ALG_CONFIG_FILE", "TRAINS_CONFIG_FILE"),
|
||||
"log_level": ("TRAINS_LOG_LEVEL", "ALG_LOG_LEVEL"),
|
||||
"log_to_backend": ("TRAINS_LOG_TASK_TO_BACKEND", "ALG_LOG_TASK_TO_BACKEND"),
|
||||
"task_id": ("TRAINS_TASK_ID", ),
|
||||
"config_file": ("TRAINS_CONFIG_FILE", ),
|
||||
"log_level": ("TRAINS_LOG_LEVEL", ),
|
||||
"log_to_backend": ("TRAINS_LOG_TASK_TO_BACKEND", ),
|
||||
}
|
||||
|
||||
ENVIRONMENT_BACKWARD_COMPATIBLE = EnvironmentConfig("TRAINS_AGENT_ALG_ENV", type=bool)
|
||||
|
||||
VIRTUAL_ENVIRONMENT_PATH = {
|
||||
"python2": normalize_path(CONFIG_DIR, "py2venv"),
|
||||
"python3": normalize_path(CONFIG_DIR, "py3venv"),
|
||||
@@ -107,13 +113,18 @@ HTTP_HEADERS = {
|
||||
METADATA_EXTENSION = ".json"
|
||||
|
||||
DEFAULT_VENV_UPDATE_URL = (
|
||||
"https://raw.githubusercontent.com/Yelp/venv-update/v3.2.2/venv_update.py"
|
||||
"https://raw.githubusercontent.com/Yelp/venv-update/v3.2.4/venv_update.py"
|
||||
)
|
||||
WORKING_REPOSITORY_DIR = "task_repository"
|
||||
DEFAULT_VCS_CACHE = normalize_path(CONFIG_DIR, "vcs-cache")
|
||||
PIP_EXTRA_INDICES = [
|
||||
]
|
||||
DEFAULT_PIP_DOWNLOAD_CACHE = normalize_path(CONFIG_DIR, "pip-download-cache")
|
||||
ENV_AGENT_GIT_USER = EnvironmentConfig('TRAINS_AGENT_GIT_USER')
|
||||
ENV_AGENT_GIT_PASS = EnvironmentConfig('TRAINS_AGENT_GIT_PASS')
|
||||
ENV_TASK_EXECUTE_AS_USER = 'TRAINS_AGENT_EXEC_USER'
|
||||
ENV_TASK_EXTRA_PYTHON_PATH = 'TRAINS_AGENT_EXTRA_PYTHON_PATH'
|
||||
ENV_DOCKER_HOST_MOUNT = EnvironmentConfig('TRAINS_AGENT_K8S_HOST_MOUNT', 'TRAINS_AGENT_DOCKER_HOST_MOUNT')
|
||||
|
||||
|
||||
class FileBuffering(IntEnum):
|
||||
|
||||
1
trains_agent/glue/__init__.py
Normal file
1
trains_agent/glue/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
169
trains_agent/glue/k8s.py
Normal file
169
trains_agent/glue/k8s.py
Normal file
@@ -0,0 +1,169 @@
|
||||
from __future__ import print_function, division, unicode_literals
|
||||
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from time import sleep
|
||||
from typing import Text, List
|
||||
|
||||
from pyhocon import HOCONConverter
|
||||
|
||||
from trains_agent.commands.events import Events
|
||||
from trains_agent.commands.worker import Worker
|
||||
from trains_agent.helper.process import get_bash_output
|
||||
from trains_agent.helper.resource_monitor import ResourceMonitor
|
||||
|
||||
|
||||
class K8sIntegration(Worker):
|
||||
K8S_PENDING_QUEUE = "k8s_scheduler"
|
||||
|
||||
KUBECTL_RUN_CMD = "kubectl run trains_id_{task_id} " \
|
||||
"--image {docker_image} " \
|
||||
"--restart=Never --replicas=1 " \
|
||||
"--generator=run-pod/v1"
|
||||
|
||||
KUBECTL_DELETE_CMD = "kubectl delete pods " \
|
||||
"--selector=TRAINS=agent " \
|
||||
"--field-selector=status.phase!=Pending,status.phase!=Running"
|
||||
|
||||
CONTAINER_BASH_SCRIPT = "apt-get install -y git python-pip && " \
|
||||
"pip install trains-agent && " \
|
||||
"python -u -m trains_agent execute --full-monitoring --require-queue --id {}"
|
||||
|
||||
def __init__(self, k8s_pending_queue_name=None, kubectl_cmd=None, container_bash_script=None, debug=False):
|
||||
"""
|
||||
Initialize the k8s integration glue layer daemon
|
||||
|
||||
:param str k8s_pending_queue_name: queue name to use when task is pending in the k8s scheduler
|
||||
:param str|callable kubectl_cmd: kubectl command line str, supports formating (default: KUBECTL_RUN_CMD)
|
||||
example: "task={task_id} image={docker_image} queue_id={queue_id}"
|
||||
or a callable function: kubectl_cmd(task_id, docker_image, queue_id, task_data)
|
||||
:param str container_bash_script: container bash script to be executed in k8s (default: CONTAINER_BASH_SCRIPT)
|
||||
:param bool debug: Switch logging on
|
||||
"""
|
||||
super(K8sIntegration, self).__init__()
|
||||
self.k8s_pending_queue_name = k8s_pending_queue_name or self.K8S_PENDING_QUEUE
|
||||
self.kubectl_cmd = kubectl_cmd or self.KUBECTL_RUN_CMD
|
||||
self.container_bash_script = container_bash_script or self.CONTAINER_BASH_SCRIPT
|
||||
# Always do system packages, because by we will be running inside a docker
|
||||
self._session.config.put("agent.package_manager.system_site_packages", True)
|
||||
# Add debug logging
|
||||
if debug:
|
||||
self.log.logger.disabled = False
|
||||
self.log.logger.setLevel(logging.INFO)
|
||||
|
||||
def run_one_task(self, queue: Text, task_id: Text, worker_args=None):
|
||||
task_data = self._session.api_client.tasks.get_all(id=[task_id])[0]
|
||||
|
||||
# push task into the k8s queue, so we have visibility on pending tasks in the k8s scheduler
|
||||
try:
|
||||
self._session.api_client.tasks.enqueue(task_id, queue=self.k8s_pending_queue_name,
|
||||
status_reason='k8s pending scheduler')
|
||||
except Exception as e:
|
||||
self.log.error("ERROR: Could not push back task [{}] to k8s pending queue [{}], error: {}".format(
|
||||
task_id, self.k8s_pending_queue_name, e))
|
||||
return
|
||||
|
||||
if task_data.execution.docker_cmd:
|
||||
docker_image = task_data.execution.docker_cmd
|
||||
else:
|
||||
docker_image = str(os.environ.get("TRAINS_DOCKER_IMAGE") or
|
||||
self._session.config.get("agent.default_docker.image", "nvidia/cuda"))
|
||||
|
||||
# take the first part, this is the docker image name (not arguments)
|
||||
docker_image = docker_image.split()[0]
|
||||
|
||||
create_trains_conf = "echo '{}' >> ~/trains.conf && ".format(
|
||||
HOCONConverter.to_hocon(self._session.config._config))
|
||||
|
||||
if callable(self.kubectl_cmd):
|
||||
kubectl_cmd = self.kubectl_cmd(task_id, docker_image, queue, task_data)
|
||||
else:
|
||||
kubectl_cmd = self.kubectl_cmd.format(task_id=task_id, docker_image=docker_image, queue_id=queue)
|
||||
|
||||
# make sure we gave a list
|
||||
if isinstance(kubectl_cmd, str):
|
||||
kubectl_cmd = kubectl_cmd.split()
|
||||
|
||||
kubectl_cmd += ["--labels=TRAINS=agent", "--command", "--", "/bin/sh", "-c",
|
||||
create_trains_conf + self.container_bash_script.format(task_id)]
|
||||
process = subprocess.Popen(kubectl_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
output, error = process.communicate()
|
||||
self.log.info("K8s scheduling experiment task id={}".format(task_id))
|
||||
if error:
|
||||
self.log.error("Running kubectl encountered an error: {}".format(
|
||||
error if isinstance(error, str) else error.decode()))
|
||||
|
||||
def run_tasks_loop(self, queues: List[Text], worker_params):
|
||||
"""
|
||||
:summary: Pull and run tasks from queues.
|
||||
:description: 1. Go through ``queues`` by order.
|
||||
2. Try getting the next task for each and run the first one that returns.
|
||||
3. Go to step 1
|
||||
:param queues: IDs of queues to pull tasks from
|
||||
:type queues: list of ``Text``
|
||||
:param worker_params: Worker command line arguments
|
||||
:type worker_params: ``trains_agent.helper.process.WorkerParams``
|
||||
"""
|
||||
events_service = self.get_service(Events)
|
||||
|
||||
# make sure we have a k8s pending queue
|
||||
try:
|
||||
self._session.api_client.queues.create(self.k8s_pending_queue_name)
|
||||
except Exception:
|
||||
pass
|
||||
# get queue id
|
||||
self.k8s_pending_queue_name = self._resolve_name(self.k8s_pending_queue_name, "queues")
|
||||
|
||||
_last_machine_update_ts = 0
|
||||
while True:
|
||||
# iterate over queues (priority style, queues[0] is highest)
|
||||
for queue in queues:
|
||||
# delete old completed /failed pods
|
||||
get_bash_output(self.KUBECTL_DELETE_CMD)
|
||||
|
||||
# get next task in queue
|
||||
try:
|
||||
response = self._session.api_client.queues.get_next_task(queue=queue)
|
||||
except Exception as e:
|
||||
print("Warning: Could not access task queue [{}], error: {}".format(queue, e))
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
task_id = response.entry.task
|
||||
except AttributeError:
|
||||
print("No tasks in queue {}".format(queue))
|
||||
continue
|
||||
events_service.send_log_events(
|
||||
self.worker_id,
|
||||
task_id=task_id,
|
||||
lines="task {} pulled from {} by worker {}".format(
|
||||
task_id, queue, self.worker_id
|
||||
),
|
||||
level="INFO",
|
||||
)
|
||||
|
||||
self.report_monitor(ResourceMonitor.StatusReport(queues=queues, queue=queue, task=task_id))
|
||||
self.run_one_task(queue, task_id, worker_params)
|
||||
self.report_monitor(ResourceMonitor.StatusReport(queues=self.queues))
|
||||
break
|
||||
else:
|
||||
# sleep and retry polling
|
||||
print("No tasks in Queues, sleeping for {:.1f} seconds".format(self._polling_interval))
|
||||
sleep(self._polling_interval)
|
||||
|
||||
if self._session.config["agent.reload_config"]:
|
||||
self.reload_config()
|
||||
|
||||
def k8s_daemon(self, queues):
|
||||
"""
|
||||
Start the k8s Glue service.
|
||||
This service will be pulling tasks from *queues* and scheduling them for execution using kubectl.
|
||||
Notice all scheduled tasks are pushed back into K8S_PENDING_QUEUE,
|
||||
and popped when execution actually starts. This creates full visibility into the k8s scheduler.
|
||||
Manually popping a task from the K8S_PENDING_QUEUE,
|
||||
will cause the k8s scheduler to skip the execution once the scheduled tasks needs to be executed
|
||||
|
||||
:param list(str) queues: List of queue names to pull from
|
||||
"""
|
||||
return self.daemon(queues=queues, log_level=logging.INFO, foreground=True, docker=False)
|
||||
@@ -157,6 +157,10 @@ def is_windows_platform():
|
||||
return any(platform.win32_ver())
|
||||
|
||||
|
||||
def is_linux_platform():
|
||||
return 'linux' in platform.system().lower()
|
||||
|
||||
|
||||
def normalize_path(*paths):
|
||||
"""
|
||||
normalize_path
|
||||
@@ -195,6 +199,20 @@ def get_python_path(script_dir, entry_point, package_api):
|
||||
return None
|
||||
|
||||
|
||||
def add_python_path(base_path, extra_path):
|
||||
try:
|
||||
if not extra_path:
|
||||
return base_path
|
||||
python_path_sep = ';' if is_windows_platform() else ':'
|
||||
base_path = base_path or ''
|
||||
if not base_path.endswith(python_path_sep):
|
||||
base_path += python_path_sep
|
||||
base_path += extra_path.replace(':', python_path_sep)
|
||||
except:
|
||||
pass
|
||||
return base_path
|
||||
|
||||
|
||||
class Singleton(ABCMeta):
|
||||
_instances = {}
|
||||
|
||||
@@ -459,6 +477,17 @@ def rm_tree(root): # type: (Union[Path, Text]) -> None
|
||||
return shutil.rmtree(os.path.expanduser(os.path.expandvars(Text(root))), onerror=on_error)
|
||||
|
||||
|
||||
def rm_file(filename): # type: (Union[Path, Text]) -> None
|
||||
"""
|
||||
A version of os.unlink that will not raise error
|
||||
"""
|
||||
try:
|
||||
os.unlink(os.path.expanduser(os.path.expandvars(Text(filename))))
|
||||
except:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_conda(config):
|
||||
return config['agent.package_manager.type'].lower() == 'conda'
|
||||
|
||||
@@ -526,3 +555,17 @@ class ExecutionInfo(NonStrictAttrs):
|
||||
execution.working_dir = working_dir or ""
|
||||
|
||||
return execution
|
||||
|
||||
|
||||
class safe_furl(furl.furl):
|
||||
|
||||
@property
|
||||
def port(self):
|
||||
return self._port
|
||||
|
||||
@port.setter
|
||||
def port(self, port):
|
||||
"""
|
||||
Any port value is valid
|
||||
"""
|
||||
self._port = port
|
||||
|
||||
@@ -4,7 +4,7 @@ from time import sleep
|
||||
import requests
|
||||
import json
|
||||
from threading import Thread
|
||||
from semantic_version import Version
|
||||
from .package.requirements import SimpleVersion
|
||||
from ..version import __version__
|
||||
|
||||
__check_update_thread = None
|
||||
@@ -30,11 +30,11 @@ def _check_new_version_available():
|
||||
return None
|
||||
trains_answer = update_server_releases.get("trains-agent", {})
|
||||
latest_version = trains_answer.get("version")
|
||||
cur_version = Version(cur_version)
|
||||
latest_version = Version(latest_version)
|
||||
if cur_version >= latest_version:
|
||||
cur_version = cur_version
|
||||
latest_version = latest_version or ''
|
||||
if SimpleVersion.compare_versions(cur_version, '>=', latest_version):
|
||||
return None
|
||||
patch_upgrade = latest_version.major == cur_version.major and latest_version.minor == cur_version.minor
|
||||
patch_upgrade = True # latest_version.major == cur_version.major and latest_version.minor == cur_version.minor
|
||||
return str(latest_version), patch_upgrade, trains_answer.get("description").split("\r\n")
|
||||
|
||||
|
||||
|
||||
@@ -22,6 +22,18 @@ def print_text(text, newline=True):
|
||||
sys.stdout.write(data)
|
||||
|
||||
|
||||
def decode_binary_lines(binary_lines, encoding='utf-8'):
|
||||
# decode per line, if we failed decoding skip the line
|
||||
lines = []
|
||||
for b in binary_lines:
|
||||
try:
|
||||
l = b.decode(encoding=encoding, errors='replace').replace('\r', '\n')
|
||||
except:
|
||||
l = ''
|
||||
lines.append(l + '\n' if l and l[-1] != '\n' else l)
|
||||
return lines
|
||||
|
||||
|
||||
def ensure_text(s, encoding='utf-8', errors='strict'):
|
||||
"""Coerce *s* to six.text_type.
|
||||
For Python 2:
|
||||
|
||||
0
trains_agent/helper/os/__init__.py
Normal file
0
trains_agent/helper/os/__init__.py
Normal file
74
trains_agent/helper/os/daemonize.py
Normal file
74
trains_agent/helper/os/daemonize.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import os
|
||||
|
||||
|
||||
def daemonize_process(redirect_fd=None):
|
||||
"""
|
||||
Detach a process from the controlling terminal and run it in the background as a daemon.
|
||||
"""
|
||||
assert redirect_fd is None or isinstance(redirect_fd, int)
|
||||
|
||||
# re-spawn in the same directory
|
||||
WORKDIR = os.getcwd()
|
||||
|
||||
# The standard I/O file descriptors are redirected to /dev/null by default.
|
||||
if hasattr(os, "devnull"):
|
||||
devnull = os.devnull
|
||||
else:
|
||||
devnull = "/dev/null"
|
||||
|
||||
try:
|
||||
# Fork a child process so the parent can exit. This returns control to
|
||||
# the command-line or shell. It also guarantees that the child will not
|
||||
# be a process group leader, since the child receives a new process ID
|
||||
# and inherits the parent's process group ID. This step is required
|
||||
# to insure that the next call to os.setsid is successful.
|
||||
pid = os.fork()
|
||||
except OSError as e:
|
||||
raise Exception("%s [%d]" % (e.strerror, e.errno))
|
||||
|
||||
if pid == 0: # The first child.
|
||||
# To become the session leader of this new session and the process group
|
||||
# leader of the new process group, we call os.setsid().
|
||||
# The process is also guaranteed not to have a controlling terminal.
|
||||
os.setsid()
|
||||
|
||||
# Is ignoring SIGHUP necessary? (Set handlers for asynchronous events.)
|
||||
# import signal
|
||||
# signal.signal(signal.SIGHUP, signal.SIG_IGN)
|
||||
|
||||
try:
|
||||
# Fork a second child and exit immediately to prevent zombies. This
|
||||
# causes the second child process to be orphaned, making the init
|
||||
# process responsible for its cleanup.
|
||||
pid = os.fork() # Fork a second child.
|
||||
except OSError as e:
|
||||
raise Exception("%s [%d]" % (e.strerror, e.errno))
|
||||
|
||||
if pid == 0: # The second child.
|
||||
# Since the current working directory may be a mounted filesystem, we
|
||||
# avoid the issue of not being able to unmount the filesystem at
|
||||
# shutdown time by changing it to the root directory.
|
||||
os.chdir(WORKDIR)
|
||||
# We probably don't want the file mode creation mask inherited from
|
||||
# the parent, so we give the child complete control over permissions.
|
||||
os.umask(0)
|
||||
else:
|
||||
# Exit parent (the first child) of the second child.
|
||||
os._exit(0)
|
||||
else:
|
||||
# Exit parent of the first child.
|
||||
os._exit(0)
|
||||
|
||||
# notice we count on the fact that we keep all file descriptors open,
|
||||
# since we opened then in the parent process, but the daemon process will use them
|
||||
|
||||
# Redirect the standard I/O file descriptors to the specified file /dev/null.
|
||||
if redirect_fd is None:
|
||||
redirect_fd = os.open(devnull, os.O_RDWR)
|
||||
|
||||
# Duplicate standard input to standard output and standard error.
|
||||
# standard output (1), standard error (2)
|
||||
os.dup2(redirect_fd, 1)
|
||||
os.dup2(redirect_fd, 2)
|
||||
|
||||
return 0
|
||||
@@ -16,6 +16,8 @@ class PackageManager(object):
|
||||
"""
|
||||
|
||||
_selected_manager = None
|
||||
_cwd = None
|
||||
_pip_version = None
|
||||
|
||||
@abc.abstractproperty
|
||||
def bin(self):
|
||||
@@ -64,7 +66,7 @@ class PackageManager(object):
|
||||
pass
|
||||
|
||||
def upgrade_pip(self):
|
||||
return self._install("pip", "--upgrade")
|
||||
return self._install("pip"+self.get_pip_version(), "--upgrade")
|
||||
|
||||
def get_python_command(self, extra=()):
|
||||
# type: (...) -> Executable
|
||||
@@ -97,11 +99,44 @@ class PackageManager(object):
|
||||
# this is helpful when we want out of context requirement installations
|
||||
PackageManager._selected_manager = self
|
||||
|
||||
@property
|
||||
def cwd(self):
|
||||
return self._cwd
|
||||
|
||||
@cwd.setter
|
||||
def cwd(self, value):
|
||||
self._cwd = value
|
||||
|
||||
@classmethod
|
||||
def out_of_scope_install_package(cls, package_name):
|
||||
def out_of_scope_install_package(cls, package_name, *args):
|
||||
if PackageManager._selected_manager is not None:
|
||||
try:
|
||||
return PackageManager._selected_manager._install(package_name)
|
||||
result = PackageManager._selected_manager._install(package_name, *args)
|
||||
if result not in (0, None, True):
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def out_of_scope_freeze(cls):
|
||||
if PackageManager._selected_manager is not None:
|
||||
try:
|
||||
return PackageManager._selected_manager.freeze()
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def set_pip_version(cls, version):
|
||||
if not version:
|
||||
return
|
||||
version = version.replace(' ', '')
|
||||
if ('=' in version) or ('~' in version) or ('<' in version) or ('>' in version):
|
||||
cls._pip_version = version
|
||||
else:
|
||||
cls._pip_version = "=="+version
|
||||
|
||||
@classmethod
|
||||
def get_pip_version(cls):
|
||||
return cls._pip_version or ''
|
||||
|
||||
@@ -14,13 +14,13 @@ import yaml
|
||||
from time import time
|
||||
from attr import attrs, attrib, Factory
|
||||
from pathlib2 import Path
|
||||
from semantic_version import Version
|
||||
from requirements import parse
|
||||
from requirements.requirement import Requirement
|
||||
|
||||
from trains_agent.errors import CommandFailedError
|
||||
from trains_agent.helper.base import rm_tree, NonStrictAttrs, select_for_platform, is_windows_platform
|
||||
from trains_agent.helper.process import Argv, Executable, DEVNULL, CommandSequence, PathLike
|
||||
from trains_agent.helper.package.requirements import SimpleVersion
|
||||
from trains_agent.session import Session
|
||||
from .base import PackageManager
|
||||
from .pip_api.venv import VirtualenvPip
|
||||
@@ -59,7 +59,7 @@ class CondaAPI(PackageManager):
|
||||
A programmatic interface for controlling conda
|
||||
"""
|
||||
|
||||
MINIMUM_VERSION = Version("4.3.30", partial=True)
|
||||
MINIMUM_VERSION = "4.3.30"
|
||||
|
||||
def __init__(self, session, path, python, requirements_manager):
|
||||
# type: (Session, PathLike, float, RequirementsManager) -> None
|
||||
@@ -93,7 +93,7 @@ class CondaAPI(PackageManager):
|
||||
)
|
||||
)
|
||||
self.conda_version = self.get_conda_version(output)
|
||||
if Version(self.conda_version, partial=True) < self.MINIMUM_VERSION:
|
||||
if SimpleVersion.compare_versions(self.conda_version, '<', self.MINIMUM_VERSION):
|
||||
raise CommandFailedError(
|
||||
"conda version '{}' is smaller than minimum supported conda version '{}'".format(
|
||||
self.conda_version, self.MINIMUM_VERSION
|
||||
@@ -112,7 +112,7 @@ class CondaAPI(PackageManager):
|
||||
return self.pip.bin
|
||||
|
||||
def upgrade_pip(self):
|
||||
return self.pip.upgrade_pip()
|
||||
return self._install("pip" + self.pip.get_pip_version())
|
||||
|
||||
def create(self):
|
||||
"""
|
||||
@@ -227,20 +227,20 @@ class CondaAPI(PackageManager):
|
||||
self.pip.install_from_file(reqs)
|
||||
|
||||
def freeze(self):
|
||||
# result = yaml.load(
|
||||
# self._run_command((self.conda, "env", "export", "-p", self.path), raw=True)
|
||||
# )
|
||||
# for key in "name", "prefix":
|
||||
# result.pop(key, None)
|
||||
# freeze = {"conda": result}
|
||||
# try:
|
||||
# freeze["pip"] = result["dependencies"][-1]["pip"]
|
||||
# except (TypeError, KeyError):
|
||||
# freeze["pip"] = []
|
||||
# else:
|
||||
# del result["dependencies"][-1]
|
||||
# return freeze
|
||||
return self.pip.freeze()
|
||||
requirements = self.pip.freeze()
|
||||
try:
|
||||
conda_packages = json.loads(self._run_command((self.conda, "list", "--json", "-p", self.path), raw=True))
|
||||
conda_packages_txt = []
|
||||
requirements_pip = [r.split('==')[0].strip().lower() for r in requirements['pip']]
|
||||
for pkg in conda_packages:
|
||||
# skip if this is a pypi package or it is not a python package at all
|
||||
if pkg['channel'] == 'pypi' or pkg['name'].lower() not in requirements_pip:
|
||||
continue
|
||||
conda_packages_txt.append('{0}{1}{2}'.format(pkg['name'], '==', pkg['version']))
|
||||
requirements['conda'] = conda_packages_txt
|
||||
except:
|
||||
pass
|
||||
return requirements
|
||||
|
||||
def load_requirements(self, requirements):
|
||||
# create new environment file
|
||||
@@ -249,6 +249,8 @@ class CondaAPI(PackageManager):
|
||||
reqs = []
|
||||
if isinstance(requirements['pip'], six.string_types):
|
||||
requirements['pip'] = requirements['pip'].split('\n')
|
||||
if isinstance(requirements.get('conda'), six.string_types):
|
||||
requirements['conda'] = requirements['conda'].split('\n')
|
||||
has_torch = False
|
||||
has_matplotlib = False
|
||||
try:
|
||||
@@ -256,35 +258,90 @@ class CondaAPI(PackageManager):
|
||||
except:
|
||||
cuda_version = 0
|
||||
|
||||
for r in requirements['pip']:
|
||||
marker = list(parse(r))
|
||||
if marker:
|
||||
m = MarkerRequirement(marker[0])
|
||||
if m.req.name.lower() == 'matplotlib':
|
||||
has_matplotlib = True
|
||||
elif m.req.name.lower().startswith('torch'):
|
||||
has_torch = True
|
||||
|
||||
if m.req.name.lower() in ('torch', 'pytorch'):
|
||||
has_torch = True
|
||||
m.req.name = 'pytorch'
|
||||
|
||||
if m.req.name.lower() in ('tensorflow_gpu', 'tensorflow-gpu', 'tensorflow'):
|
||||
has_torch = True
|
||||
m.req.name = 'tensorflow-gpu' if cuda_version > 0 else 'tensorflow'
|
||||
|
||||
reqs.append(m)
|
||||
# notice 'conda' entry with empty string is a valid conda requirements list, it means pip only
|
||||
# this should happen if experiment was executed on non-conda machine or old trains client
|
||||
conda_supported_req = requirements['pip'] if requirements.get('conda', None) is None else requirements['conda']
|
||||
conda_supported_req_names = []
|
||||
pip_requirements = []
|
||||
for r in conda_supported_req:
|
||||
try:
|
||||
marker = list(parse(r))
|
||||
except:
|
||||
marker = None
|
||||
if not marker:
|
||||
continue
|
||||
|
||||
m = MarkerRequirement(marker[0])
|
||||
# conda does not support version control links
|
||||
if m.vcs:
|
||||
pip_requirements.append(m)
|
||||
continue
|
||||
conda_supported_req_names.append(m.name.lower())
|
||||
if m.req.name.lower() == 'matplotlib':
|
||||
has_matplotlib = True
|
||||
elif m.req.name.lower().startswith('torch'):
|
||||
has_torch = True
|
||||
|
||||
if m.req.name.lower() in ('torch', 'pytorch'):
|
||||
has_torch = True
|
||||
m.req.name = 'pytorch'
|
||||
|
||||
if m.req.name.lower() in ('tensorflow_gpu', 'tensorflow-gpu', 'tensorflow'):
|
||||
has_torch = True
|
||||
m.req.name = 'tensorflow-gpu' if cuda_version > 0 else 'tensorflow'
|
||||
|
||||
reqs.append(m)
|
||||
|
||||
# if we have a conda list, the rest should be installed with pip,
|
||||
if requirements.get('conda', None) is not None:
|
||||
for r in requirements['pip']:
|
||||
try:
|
||||
marker = list(parse(r))
|
||||
except:
|
||||
marker = None
|
||||
if not marker:
|
||||
continue
|
||||
|
||||
m = MarkerRequirement(marker[0])
|
||||
m_name = m.name.lower()
|
||||
if m_name in conda_supported_req_names:
|
||||
# this package is in the conda list,
|
||||
# make sure that if we changed version and we match it in conda
|
||||
conda_supported_req_names.remove(m_name)
|
||||
for cr in reqs:
|
||||
if m_name == cr.name.lower():
|
||||
# match versions
|
||||
cr.specs = m.specs
|
||||
break
|
||||
else:
|
||||
# not in conda, it is a pip package
|
||||
pip_requirements.append(m)
|
||||
if m_name == 'matplotlib':
|
||||
has_matplotlib = True
|
||||
|
||||
# remove any leftover conda packages (they were removed from the pip list)
|
||||
if conda_supported_req_names:
|
||||
reqs = [r for r in reqs if r.name.lower() not in conda_supported_req_names]
|
||||
|
||||
# Conda requirements Hacks:
|
||||
if has_matplotlib:
|
||||
reqs.append(MarkerRequirement(Requirement.parse('graphviz')))
|
||||
reqs.append(MarkerRequirement(Requirement.parse('python-graphviz')))
|
||||
reqs.append(MarkerRequirement(Requirement.parse('kiwisolver')))
|
||||
if has_torch and cuda_version == 0:
|
||||
reqs.append(MarkerRequirement(Requirement.parse('cpuonly')))
|
||||
|
||||
# conform conda packages (version/name)
|
||||
for r in reqs:
|
||||
# remove .post from version numbers, it fails ~= version, and change == to ~=
|
||||
if r.specs and r.specs[0]:
|
||||
r.specs = [(r.specs[0][0].replace('==', '~='), r.specs[0][1].split('.post')[0])]
|
||||
# conda always likes "-" not "_"
|
||||
r.req.name = r.req.name.replace('_', '-')
|
||||
|
||||
while reqs:
|
||||
conda_env['dependencies'] = [r.tostr().replace('==', '=') for r in reqs]
|
||||
# notice, we give conda more freedom in version selection, to help it choose best combination
|
||||
conda_env['dependencies'] = [r.tostr() for r in reqs]
|
||||
with self.temp_file("conda_env", yaml.dump(conda_env), suffix=".yml") as name:
|
||||
print('Conda: Trying to install requirements:\n{}'.format(conda_env['dependencies']))
|
||||
result = self._run_command(
|
||||
@@ -297,7 +354,7 @@ class CondaAPI(PackageManager):
|
||||
|
||||
solved = False
|
||||
for bad_r in bad_req:
|
||||
name = bad_r.split('[')[0].split('=')[0]
|
||||
name = bad_r.split('[')[0].split('=')[0].split('~')[0].split('<')[0].split('>')[0]
|
||||
# look for name in requirements
|
||||
for r in reqs:
|
||||
if r.name.lower() == name.lower():
|
||||
@@ -321,7 +378,7 @@ class CondaAPI(PackageManager):
|
||||
print(e)
|
||||
raise e
|
||||
|
||||
self.requirements_manager.post_install()
|
||||
self.requirements_manager.post_install(self.session)
|
||||
return True
|
||||
|
||||
def _parse_conda_result_bad_packges(self, result_dict):
|
||||
@@ -338,7 +395,7 @@ class CondaAPI(PackageManager):
|
||||
if len(empty_lines) >= 2:
|
||||
deps = error_lines[empty_lines[0]+1:empty_lines[1]]
|
||||
try:
|
||||
return yaml.load('\n'.join(deps))
|
||||
return yaml.load('\n'.join(deps), Loader=yaml.SafeLoader)
|
||||
except:
|
||||
return None
|
||||
return None
|
||||
@@ -363,10 +420,14 @@ class CondaAPI(PackageManager):
|
||||
try:
|
||||
print('Executing Conda: {}'.format(command.serialize()))
|
||||
result = command.get_output(stdin=DEVNULL, **kwargs)
|
||||
if self.session.debug_mode:
|
||||
print(result)
|
||||
except Exception as e:
|
||||
result = e.output if hasattr(e, 'output') else ''
|
||||
if self.session.debug_mode:
|
||||
print(result)
|
||||
if raw:
|
||||
raise
|
||||
result = e.output if hasattr(e, 'output') else ''
|
||||
if raw:
|
||||
return result
|
||||
|
||||
@@ -412,4 +473,4 @@ class PackageNotFoundError(CondaException):
|
||||
as a singleton YAML list.
|
||||
"""
|
||||
|
||||
pkg = attrib(default="", converter=lambda val: yaml.load(val)[0].replace(" ", ""))
|
||||
pkg = attrib(default="", converter=lambda val: yaml.load(val, Loader=yaml.SafeLoader)[0].replace(" ", ""))
|
||||
|
||||
@@ -6,14 +6,14 @@ from .requirements import SimpleSubstitution
|
||||
|
||||
class CythonRequirement(SimpleSubstitution):
|
||||
|
||||
name = "cython"
|
||||
name = ("cython", "numpy", )
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CythonRequirement, self).__init__(*args, **kwargs)
|
||||
|
||||
def match(self, req):
|
||||
# match both Cython & cython
|
||||
return self.name == req.name.lower()
|
||||
return req.name and req.name.lower() in self.name
|
||||
|
||||
def replace(self, req):
|
||||
"""
|
||||
|
||||
85
trains_agent/helper/package/external_req.py
Normal file
85
trains_agent/helper/package/external_req.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from collections import OrderedDict
|
||||
from typing import Text
|
||||
|
||||
from .base import PackageManager
|
||||
from .requirements import SimpleSubstitution
|
||||
from ..base import safe_furl as furl
|
||||
|
||||
|
||||
class ExternalRequirements(SimpleSubstitution):
|
||||
|
||||
name = "external_link"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(ExternalRequirements, self).__init__(*args, **kwargs)
|
||||
self.post_install_req = []
|
||||
self.post_install_req_lookup = OrderedDict()
|
||||
|
||||
def match(self, req):
|
||||
# match both editable or code or unparsed
|
||||
if not (not req.name or req.req and (req.req.editable or req.req.vcs)):
|
||||
return False
|
||||
if not req.req or not req.req.line or not req.req.line.strip() or req.req.line.strip().startswith('#'):
|
||||
return False
|
||||
return True
|
||||
|
||||
def post_install(self, session):
|
||||
post_install_req = self.post_install_req
|
||||
self.post_install_req = []
|
||||
for req in post_install_req:
|
||||
try:
|
||||
freeze_base = PackageManager.out_of_scope_freeze() or ''
|
||||
except:
|
||||
freeze_base = ''
|
||||
|
||||
req_line = req.tostr(markers=False)
|
||||
if req.req.vcs and req_line.startswith('git+'):
|
||||
try:
|
||||
url_no_frag = furl(req_line)
|
||||
url_no_frag.set(fragment=None)
|
||||
# reverse replace
|
||||
fragment = req_line[::-1].replace(url_no_frag.url[::-1], '', 1)[::-1]
|
||||
vcs_url = req_line[4:]
|
||||
# reverse replace
|
||||
vcs_url = vcs_url[::-1].replace(fragment[::-1], '', 1)[::-1]
|
||||
from ..repo import Git
|
||||
vcs = Git(session=session, url=vcs_url, location=None, revision=None)
|
||||
vcs._set_ssh_url()
|
||||
new_req_line = 'git+{}{}'.format(vcs.url_with_auth, fragment)
|
||||
if new_req_line != req_line:
|
||||
url_pass = furl(new_req_line).password
|
||||
print('Replacing original pip vcs \'{}\' with \'{}\''.format(
|
||||
req_line, new_req_line.replace(url_pass, '****', 1) if url_pass else new_req_line))
|
||||
req_line = new_req_line
|
||||
except Exception:
|
||||
print('WARNING: Failed parsing pip git install, using original line {}'.format(req_line))
|
||||
|
||||
PackageManager.out_of_scope_install_package(req_line, "--no-deps")
|
||||
try:
|
||||
freeze_post = PackageManager.out_of_scope_freeze() or ''
|
||||
package_name = list(set(freeze_post['pip']) - set(freeze_base['pip']))
|
||||
if package_name and package_name[0] not in self.post_install_req_lookup:
|
||||
self.post_install_req_lookup[package_name[0]] = req.req.line
|
||||
except:
|
||||
pass
|
||||
if not PackageManager.out_of_scope_install_package(req_line, "--ignore-installed"):
|
||||
raise ValueError("Failed installing GIT/HTTPs package \'{}\'".format(req_line))
|
||||
|
||||
def replace(self, req):
|
||||
"""
|
||||
Replace a requirement
|
||||
:raises: ValueError if version is pre-release
|
||||
"""
|
||||
# Store in post req install, and return nothing
|
||||
self.post_install_req.append(req)
|
||||
# mark skip package, we will install it in post install hook
|
||||
return Text('')
|
||||
|
||||
def replace_back(self, list_of_requirements):
|
||||
if 'pip' in list_of_requirements:
|
||||
original_requirements = list_of_requirements['pip']
|
||||
list_of_requirements['pip'] = [r for r in original_requirements
|
||||
if r not in self.post_install_req_lookup]
|
||||
list_of_requirements['pip'] += [self.post_install_req_lookup.get(r, '')
|
||||
for r in self.post_install_req_lookup.keys() if r in original_requirements]
|
||||
return list_of_requirements
|
||||
@@ -14,9 +14,9 @@ class HorovodRequirement(SimpleSubstitution):
|
||||
|
||||
def match(self, req):
|
||||
# match both horovod
|
||||
return self.name == req.name.lower()
|
||||
return req.name and self.name == req.name.lower()
|
||||
|
||||
def post_install(self):
|
||||
def post_install(self, session):
|
||||
if self.post_install_req:
|
||||
PackageManager.out_of_scope_install_package(self.post_install_req.tostr(markers=False))
|
||||
self.post_install_req = None
|
||||
|
||||
@@ -1,22 +1,24 @@
|
||||
import sys
|
||||
from itertools import chain
|
||||
from typing import Text
|
||||
from typing import Text, Optional
|
||||
|
||||
from trains_agent.definitions import PIP_EXTRA_INDICES, PROGRAM_NAME
|
||||
from trains_agent.helper.package.base import PackageManager
|
||||
from trains_agent.helper.process import Argv, DEVNULL
|
||||
from trains_agent.session import Session
|
||||
|
||||
|
||||
class SystemPip(PackageManager):
|
||||
|
||||
indices_args = None
|
||||
|
||||
def __init__(self, interpreter=None):
|
||||
# type: (Text) -> ()
|
||||
def __init__(self, interpreter=None, session=None):
|
||||
# type: (Optional[Text], Optional[Session]) -> ()
|
||||
"""
|
||||
Program interface to the system pip.
|
||||
"""
|
||||
self._bin = interpreter or sys.executable
|
||||
self.session = session
|
||||
|
||||
@property
|
||||
def bin(self):
|
||||
@@ -29,13 +31,13 @@ class SystemPip(PackageManager):
|
||||
pass
|
||||
|
||||
def install_from_file(self, path):
|
||||
self.run_with_env(('install', '-r', path) + self.install_flags())
|
||||
self.run_with_env(('install', '-r', path) + self.install_flags(), cwd=self.cwd)
|
||||
|
||||
def install_packages(self, *packages):
|
||||
self._install(*(packages + self.install_flags()))
|
||||
|
||||
def _install(self, *args):
|
||||
self.run_with_env(('install',) + args)
|
||||
self.run_with_env(('install',) + args, cwd=self.cwd)
|
||||
|
||||
def uninstall_packages(self, *packages):
|
||||
self.run_with_env(('uninstall', '-y') + packages)
|
||||
@@ -82,7 +84,7 @@ class SystemPip(PackageManager):
|
||||
return (command.get_output if output else command.check_call)(stdin=DEVNULL, **kwargs)
|
||||
|
||||
def _make_command(self, command):
|
||||
return Argv(self.bin, '-m', 'pip', *command)
|
||||
return Argv(self.bin, '-m', 'pip', '--disable-pip-version-check', *command)
|
||||
|
||||
def install_flags(self):
|
||||
if self.indices_args is None:
|
||||
|
||||
@@ -15,31 +15,29 @@ class VirtualenvPip(SystemPip, PackageManager):
|
||||
Program interface to virtualenv pip.
|
||||
Must be given either path to virtualenv or source command.
|
||||
Either way, ``self.source`` is exposed.
|
||||
:param session: a Session object for communication
|
||||
:param python: interpreter path
|
||||
:param path: path of virtual environment to create/manipulate
|
||||
:param python: python version
|
||||
:param interpreter: path of python interpreter
|
||||
"""
|
||||
super(VirtualenvPip, self).__init__(
|
||||
interpreter
|
||||
or Path(
|
||||
path,
|
||||
select_for_platform(linux="bin/python", windows="scripts/python.exe"),
|
||||
)
|
||||
session=session,
|
||||
interpreter=interpreter or Path(
|
||||
path, select_for_platform(linux="bin/python", windows="scripts/python.exe"))
|
||||
)
|
||||
self.session = session
|
||||
self.path = path
|
||||
self.requirements_manager = requirements_manager
|
||||
self.python = python
|
||||
|
||||
def _make_command(self, command):
|
||||
return self.session.command(self.bin, "-m", "pip", *command)
|
||||
return self.session.command(self.bin, "-m", "pip", "--disable-pip-version-check", *command)
|
||||
|
||||
def load_requirements(self, requirements):
|
||||
if isinstance(requirements, dict) and requirements.get("pip"):
|
||||
requirements["pip"] = self.requirements_manager.replace(requirements["pip"])
|
||||
super(VirtualenvPip, self).load_requirements(requirements)
|
||||
self.requirements_manager.post_install()
|
||||
self.requirements_manager.post_install(self.session)
|
||||
|
||||
def create_flags(self):
|
||||
"""
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
from copy import deepcopy
|
||||
from functools import wraps
|
||||
|
||||
import attr
|
||||
import sys
|
||||
import os
|
||||
from pathlib2 import Path
|
||||
from trains_agent.helper.process import Argv, DEVNULL
|
||||
from trains_agent.helper.process import Argv, DEVNULL, check_if_command_exists
|
||||
from trains_agent.session import Session, POETRY
|
||||
|
||||
|
||||
@@ -35,10 +38,12 @@ def prop_guard(prop, log_prop=None):
|
||||
|
||||
class PoetryConfig:
|
||||
|
||||
def __init__(self, session):
|
||||
# type: (Session) -> ()
|
||||
def __init__(self, session, interpreter=None):
|
||||
# type: (Session, str) -> ()
|
||||
self.session = session
|
||||
self._log = session.get_logger(__name__)
|
||||
self._python = interpreter or sys.executable
|
||||
self._initialized = False
|
||||
|
||||
@property
|
||||
def log(self):
|
||||
@@ -53,7 +58,20 @@ class PoetryConfig:
|
||||
def run(self, *args, **kwargs):
|
||||
func = kwargs.pop("func", Argv.get_output)
|
||||
kwargs.setdefault("stdin", DEVNULL)
|
||||
argv = Argv("poetry", "-n", *args)
|
||||
kwargs['env'] = deepcopy(os.environ)
|
||||
if 'VIRTUAL_ENV' in kwargs['env'] or 'CONDA_PREFIX' in kwargs['env']:
|
||||
kwargs['env'].pop('VIRTUAL_ENV', None)
|
||||
kwargs['env'].pop('CONDA_PREFIX', None)
|
||||
kwargs['env'].pop('PYTHONPATH', None)
|
||||
if hasattr(sys, "real_prefix") and hasattr(sys, "base_prefix"):
|
||||
path = ':'+kwargs['env']['PATH']
|
||||
path = path.replace(':'+sys.base_prefix, ':'+sys.real_prefix, 1)
|
||||
kwargs['env']['PATH'] = path
|
||||
|
||||
if check_if_command_exists("poetry"):
|
||||
argv = Argv("poetry", *args)
|
||||
else:
|
||||
argv = Argv(self._python, "-m", "poetry", *args)
|
||||
self.log.debug("running: %s", argv)
|
||||
return func(argv, **kwargs)
|
||||
|
||||
@@ -61,10 +79,16 @@ class PoetryConfig:
|
||||
return self.run("config", *args, **kwargs)
|
||||
|
||||
@_guard_enabled
|
||||
def initialize(self):
|
||||
self._config("settings.virtualenvs.in-project", "true")
|
||||
# self._config("repositories.{}".format(self.REPO_NAME), PYTHON_INDEX)
|
||||
# self._config("http-basic.{}".format(self.REPO_NAME), *PYTHON_INDEX_CREDENTIALS)
|
||||
def initialize(self, cwd=None):
|
||||
if not self._initialized:
|
||||
self._initialized = True
|
||||
try:
|
||||
self._config("--local", "virtualenvs.in-project", "true", cwd=cwd)
|
||||
# self._config("repositories.{}".format(self.REPO_NAME), PYTHON_INDEX)
|
||||
# self._config("http-basic.{}".format(self.REPO_NAME), *PYTHON_INDEX_CREDENTIALS)
|
||||
except Exception as ex:
|
||||
print("Exception: {}\nError: Failed configuring Poetry virtualenvs.in-project".format(ex))
|
||||
raise
|
||||
|
||||
def get_api(self, path):
|
||||
# type: (Path) -> PoetryAPI
|
||||
@@ -81,7 +105,7 @@ class PoetryAPI(object):
|
||||
def install(self):
|
||||
# type: () -> bool
|
||||
if self.enabled:
|
||||
self.config.run("install", cwd=str(self.path), func=Argv.check_call)
|
||||
self.config.run("install", "-n", cwd=str(self.path), func=Argv.check_call)
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -92,10 +116,15 @@ class PoetryAPI(object):
|
||||
)
|
||||
|
||||
def freeze(self):
|
||||
return {"poetry": self.config.run("show", cwd=str(self.path)).splitlines()}
|
||||
lines = self.config.run("show", cwd=str(self.path)).splitlines()
|
||||
lines = [[p for p in line.split(' ') if p] for line in lines]
|
||||
return {"pip": [parts[0]+'=='+parts[1]+' # '+' '.join(parts[2:]) for parts in lines]}
|
||||
|
||||
def get_python_command(self, extra):
|
||||
return Argv("poetry", "run", "python", *extra)
|
||||
if check_if_command_exists("poetry"):
|
||||
return Argv("poetry", "run", "python", *extra)
|
||||
else:
|
||||
return Argv(self.config._python, "-m", "poetry", "run", "python", *extra)
|
||||
|
||||
def upgrade_pip(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
@@ -10,10 +10,9 @@ from typing import Text
|
||||
|
||||
import attr
|
||||
import requests
|
||||
from semantic_version import Version, Spec
|
||||
|
||||
import six
|
||||
from .requirements import SimpleSubstitution, FatalSpecsResolutionError
|
||||
from .requirements import SimpleSubstitution, FatalSpecsResolutionError, SimpleVersion
|
||||
|
||||
OS_TO_WHEEL_NAME = {"linux": "linux_x86_64", "windows": "win_amd64"}
|
||||
|
||||
@@ -75,6 +74,7 @@ class SimplePytorchRequirement(SimpleSubstitution):
|
||||
packages = ("torch", "torchvision", "torchaudio")
|
||||
|
||||
page_lookup_template = 'https://download.pytorch.org/whl/cu{}/torch_stable.html'
|
||||
nightly_page_lookup_template = 'https://download.pytorch.org/whl/nightly/cu{}/torch_nightly.html'
|
||||
torch_page_lookup = {
|
||||
0: 'https://download.pytorch.org/whl/cpu/torch_stable.html',
|
||||
80: 'https://download.pytorch.org/whl/cu80/torch_stable.html',
|
||||
@@ -116,11 +116,23 @@ class SimplePytorchRequirement(SimpleSubstitution):
|
||||
package_manager.add_extra_install_flags(('-f', extra_url))
|
||||
|
||||
@classmethod
|
||||
def get_torch_page(cls, cuda_version):
|
||||
def get_torch_page(cls, cuda_version, nightly=False):
|
||||
try:
|
||||
cuda = int(cuda_version)
|
||||
except:
|
||||
cuda = 0
|
||||
|
||||
if nightly:
|
||||
# then try the nightly builds, it might be there...
|
||||
torch_url = cls.nightly_page_lookup_template.format(cuda)
|
||||
try:
|
||||
if requests.get(torch_url, timeout=10).ok:
|
||||
cls.torch_page_lookup[cuda] = torch_url
|
||||
return cls.torch_page_lookup[cuda], cuda
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
# first check if key is valid
|
||||
if cuda in cls.torch_page_lookup:
|
||||
return cls.torch_page_lookup[cuda], cuda
|
||||
@@ -155,10 +167,15 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
self.os = os_name or self.get_platform()
|
||||
self.cuda = "cuda{}".format(self.cuda_version).lower()
|
||||
self.python_version_string = str(self.config["agent.default_python"])
|
||||
self.python_semantic_version = Version.coerce(
|
||||
self.python_version_string, partial=True
|
||||
)
|
||||
self.python = "python{}.{}".format(self.python_semantic_version.major, self.python_semantic_version.minor)
|
||||
self.python_major_minor_str = '.'.join(self.python_version_string.split('.')[:2])
|
||||
if '.' not in self.python_major_minor_str:
|
||||
raise PytorchResolutionError(
|
||||
"invalid python version {!r} defined in configuration file, key 'agent.default_python': "
|
||||
"must have both major and minor parts of the version (for example: '3.7')".format(
|
||||
self.python_version_string
|
||||
)
|
||||
)
|
||||
self.python = "python{}".format(self.python_major_minor_str)
|
||||
|
||||
self.exceptions = [
|
||||
PytorchResolutionError(message)
|
||||
@@ -176,6 +193,8 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
except PytorchResolutionError as e:
|
||||
self.log.warn("will not be able to install pytorch wheels: %s", e.args[0])
|
||||
|
||||
self._original_req = []
|
||||
|
||||
@property
|
||||
def is_conda(self):
|
||||
return self.package_manager == "conda"
|
||||
@@ -188,9 +207,7 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
"""
|
||||
Make sure python version has both major and minor versions as required for choosing pytorch wheel
|
||||
"""
|
||||
if self.is_pip and not (
|
||||
self.python_semantic_version.major and self.python_semantic_version.minor
|
||||
):
|
||||
if self.is_pip and not self.python_major_minor_str:
|
||||
raise PytorchResolutionError(
|
||||
"invalid python version {!r} defined in configuration file, key 'agent.default_python': "
|
||||
"must have both major and minor parts of the version (for example: '3.7')".format(
|
||||
@@ -215,8 +232,9 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
links_parser = LinksHTMLParser()
|
||||
links_parser.feed(requests.get(torch_url, timeout=10).text)
|
||||
platform_wheel = "win" if self.get_platform() == "windows" else self.get_platform()
|
||||
py_ver = "{0.major}{0.minor}".format(self.python_semantic_version)
|
||||
py_ver = self.python_major_minor_str.replace('.', '')
|
||||
url = None
|
||||
last_v = None
|
||||
# search for our package
|
||||
for l in links_parser.links:
|
||||
parts = l.split('/')[-1].split('-')
|
||||
@@ -225,21 +243,49 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
if parts[0] != req.name:
|
||||
continue
|
||||
# version (ignore +cpu +cu92 etc. + is %2B in the file link)
|
||||
if parts[1].split('%')[0].split('+')[0] != req.specs[0][1]:
|
||||
# version ignore .postX suffix (treat as regular version)
|
||||
try:
|
||||
v = str(parts[1].split('%')[0].split('+')[0])
|
||||
except Exception:
|
||||
continue
|
||||
if not req.compare_version(v) or \
|
||||
(last_v and SimpleVersion.compare_versions(last_v, '>', v, ignore_sub_versions=False)):
|
||||
continue
|
||||
if not parts[2].endswith(py_ver):
|
||||
continue
|
||||
if platform_wheel not in parts[4]:
|
||||
continue
|
||||
url = '/'.join(torch_url.split('/')[:-1] + l.split('/'))
|
||||
break
|
||||
last_v = v
|
||||
# if we found an exact match, use it
|
||||
try:
|
||||
if req.specs[0][0] == '==' and \
|
||||
SimpleVersion.compare_versions(req.specs[0][1], '==', v, ignore_sub_versions=False):
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
return url
|
||||
|
||||
def get_url_for_platform(self, req):
|
||||
assert self.package_manager == "pip"
|
||||
assert self.os != "mac"
|
||||
assert req.specs
|
||||
# check if package is already installed with system packages
|
||||
try:
|
||||
if self.config.get("agent.package_manager.system_site_packages", None):
|
||||
from pip._internal.commands.show import search_packages_info
|
||||
installed_torch = list(search_packages_info([req.name]))
|
||||
# notice the comparision order, the first part will make sure we have a valid installed package
|
||||
if installed_torch[0]['version'] and req.compare_version(installed_torch[0]['version']):
|
||||
print('PyTorch: requested "{}" version {}, using pre-installed version {}'.format(
|
||||
req.name, req.specs[0] if req.specs else 'unspecified', installed_torch[0]['version']))
|
||||
# package already installed, do nothing
|
||||
return str(req), True
|
||||
except:
|
||||
pass
|
||||
|
||||
# make sure we have a specific version to retrieve
|
||||
if not req.specs:
|
||||
req.specs = [('>', '0')]
|
||||
|
||||
try:
|
||||
req.specs[0] = (req.specs[0][0], req.specs[0][1].split('+')[0])
|
||||
except:
|
||||
@@ -249,6 +295,9 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
|
||||
torch_url, torch_url_key = SimplePytorchRequirement.get_torch_page(self.cuda_version)
|
||||
url = self._get_link_from_torch_page(req, torch_url)
|
||||
if not url and self.config.get("agent.package_manager.torch_nightly", None):
|
||||
torch_url, torch_url_key = SimplePytorchRequirement.get_torch_page(self.cuda_version, nightly=True)
|
||||
url = self._get_link_from_torch_page(req, torch_url)
|
||||
# try one more time, with a lower cuda version (never fallback to CPU):
|
||||
while not url and torch_url_key > 0:
|
||||
previous_cuda_key = torch_url_key
|
||||
@@ -266,7 +315,7 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
if not url:
|
||||
url = PytorchWheel(
|
||||
torch_version=fix_version(version),
|
||||
python="{0.major}{0.minor}".format(self.python_semantic_version),
|
||||
python=self.python_major_minor_str.replace('.', ''),
|
||||
os_name=self.os,
|
||||
cuda_version=self.cuda_version,
|
||||
).make_url()
|
||||
@@ -280,20 +329,17 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
@staticmethod
|
||||
def match_version(req, options):
|
||||
versioned_options = sorted(
|
||||
((Version(fix_version(key)), value) for key, value in options.items()),
|
||||
((fix_version(key), value) for key, value in options.items()),
|
||||
key=itemgetter(0),
|
||||
reverse=True,
|
||||
)
|
||||
req.specs = [(op, fix_version(version)) for op, version in req.specs]
|
||||
if req.specs:
|
||||
specs = Spec(req.format_specs())
|
||||
else:
|
||||
specs = None
|
||||
|
||||
try:
|
||||
return next(
|
||||
replacement
|
||||
for version, replacement in versioned_options
|
||||
if not specs or version in specs
|
||||
if req.compare_version(version)
|
||||
)
|
||||
except StopIteration:
|
||||
raise PytorchResolutionError(
|
||||
@@ -342,7 +388,10 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
|
||||
def replace(self, req):
|
||||
try:
|
||||
return self._replace(req)
|
||||
new_req = self._replace(req)
|
||||
if new_req:
|
||||
self._original_req.append((req, new_req))
|
||||
return new_req
|
||||
except Exception as e:
|
||||
message = "Exception when trying to resolve python wheel"
|
||||
self.log.debug(message, exc_info=True)
|
||||
@@ -357,17 +406,17 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
result = self._table_lookup(req)
|
||||
except Exception as e:
|
||||
exc = e
|
||||
else:
|
||||
self.log.debug('Replacing requirement "%s" with %r', req, result)
|
||||
return result
|
||||
# try:
|
||||
# result = self._table_lookup(req)
|
||||
# except Exception as e:
|
||||
# exc = e
|
||||
# else:
|
||||
# self.log.debug('Replacing requirement "%s" with %r', req, result)
|
||||
# return result
|
||||
# self.log.debug(
|
||||
# "Could not find Pytorch wheel in table, trying manually constructing URL"
|
||||
# )
|
||||
|
||||
self.log.debug(
|
||||
"Could not find Pytorch wheel in table, trying manually constructing URL"
|
||||
)
|
||||
result = ok = None
|
||||
# try:
|
||||
# result, ok = self.get_url_for_platform(req)
|
||||
@@ -378,7 +427,7 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
if result:
|
||||
self.log.debug("URL not found: {}".format(result))
|
||||
exc = PytorchResolutionError(
|
||||
"Was not able to find pytorch wheel URL: {}".format(exc)
|
||||
"Could not find pytorch wheel URL for: {} with cuda {} support".format(req, self.cuda_version)
|
||||
)
|
||||
# cancel exception chaining
|
||||
six.raise_from(exc, None)
|
||||
@@ -386,6 +435,37 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
self.log.debug('Replacing requirement "%s" with %r', req, result)
|
||||
return result
|
||||
|
||||
def replace_back(self, list_of_requirements): # type: (Dict) -> Dict
|
||||
"""
|
||||
:param list_of_requirements: {'pip': ['a==1.0', ]}
|
||||
:return: {'pip': ['a==1.0', ]}
|
||||
"""
|
||||
if not self._original_req:
|
||||
return list_of_requirements
|
||||
try:
|
||||
for k, lines in list_of_requirements.items():
|
||||
# k is either pip/conda
|
||||
if k not in ('pip', 'conda'):
|
||||
continue
|
||||
for i, line in enumerate(lines):
|
||||
if not line or line.lstrip().startswith('#'):
|
||||
continue
|
||||
parts = [p for p in re.split('\s|=|\.|<|>|~|!|@|#', line) if p]
|
||||
if not parts:
|
||||
continue
|
||||
for req, new_req in self._original_req:
|
||||
if req.req.name == parts[0]:
|
||||
# support for pip >= 20.1
|
||||
if '@' in line:
|
||||
lines[i] = '{} # {}'.format(str(req), str(new_req))
|
||||
else:
|
||||
lines[i] = '{} # {}'.format(line, str(new_req))
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
return list_of_requirements
|
||||
|
||||
MAP = {
|
||||
"windows": {
|
||||
"cuda100": {
|
||||
|
||||
@@ -8,9 +8,8 @@ from copy import deepcopy
|
||||
from itertools import chain, starmap
|
||||
from operator import itemgetter
|
||||
from os import path
|
||||
from typing import Text, List, Type, Optional, Tuple
|
||||
from typing import Text, List, Type, Optional, Tuple, Dict
|
||||
|
||||
import semantic_version
|
||||
from pathlib2 import Path
|
||||
from pyhocon import ConfigTree
|
||||
from requirements import parse
|
||||
@@ -48,14 +47,24 @@ class MarkerRequirement(object):
|
||||
|
||||
def tostr(self, markers=True):
|
||||
if not self.uri:
|
||||
parts = [self.name]
|
||||
parts = [self.name or self.line]
|
||||
|
||||
if self.extras:
|
||||
parts.append('[{0}]'.format(','.join(sorted(self.extras))))
|
||||
|
||||
if self.specifier:
|
||||
parts.append(self.format_specs())
|
||||
|
||||
elif self.vcs:
|
||||
# leave the line as is, let pip handle it
|
||||
if self.line:
|
||||
parts = [self.line]
|
||||
else:
|
||||
# let's build the line manually
|
||||
parts = [
|
||||
self.uri,
|
||||
'@{}'.format(self.revision) if self.revision else '',
|
||||
'#subdirectory={}'.format(self.subdirectory) if self.subdirectory else ''
|
||||
]
|
||||
else:
|
||||
parts = [self.uri]
|
||||
|
||||
@@ -69,8 +78,19 @@ class MarkerRequirement(object):
|
||||
def __repr__(self):
|
||||
return '{self.__class__.__name__}[{self}]'.format(self=self)
|
||||
|
||||
def format_specs(self):
|
||||
return ','.join(starmap(operator.add, self.specs))
|
||||
def format_specs(self, num_parts=None, max_num_parts=None):
|
||||
max_num_parts = max_num_parts or num_parts
|
||||
if max_num_parts is None or not self.specs:
|
||||
return ','.join(starmap(operator.add, self.specs))
|
||||
|
||||
op, version = self.specs[0]
|
||||
for v in self._sub_versions_pep440:
|
||||
version = version.replace(v, '.')
|
||||
if num_parts:
|
||||
version = (version.strip('.').split('.') + ['0'] * num_parts)[:max_num_parts]
|
||||
else:
|
||||
version = version.strip('.').split('.')[:max_num_parts]
|
||||
return op+'.'.join(version)
|
||||
|
||||
def __getattr__(self, item):
|
||||
return getattr(self.req, item)
|
||||
@@ -99,6 +119,186 @@ class MarkerRequirement(object):
|
||||
else:
|
||||
self.specs = greater + smaller
|
||||
|
||||
def compare_version(self, requested_version, op=None, num_parts=3):
|
||||
"""
|
||||
compare the requested version with the one we have in the spec,
|
||||
If the requested version is 1.2.3 the self.spec should be 1.2.3*
|
||||
If the requested version is 1.2 the self.spec should be 1.2*
|
||||
etc.
|
||||
|
||||
:param str requested_version:
|
||||
:param str op: '==', '>', '>=', '<=', '<', '~='
|
||||
:param int num_parts: number of parts to compare
|
||||
:return: True if we answer the requested version
|
||||
"""
|
||||
# if we have no specific version, we cannot compare, so assume it's okay
|
||||
if not self.specs:
|
||||
return True
|
||||
|
||||
version = self.specs[0][1]
|
||||
op = (op or self.specs[0][0]).strip()
|
||||
|
||||
return SimpleVersion.compare_versions(requested_version, op, version)
|
||||
|
||||
|
||||
class SimpleVersion:
|
||||
_sub_versions_pep440 = ['a', 'b', 'rc', '.post', '.dev', '+', ]
|
||||
VERSION_PATTERN = r"""
|
||||
v?
|
||||
(?:
|
||||
(?:(?P<epoch>[0-9]+)!)? # epoch
|
||||
(?P<release>[0-9]+(?:\.[0-9]+)*) # release segment
|
||||
(?P<pre> # pre-release
|
||||
[-_\.]?
|
||||
(?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
|
||||
[-_\.]?
|
||||
(?P<pre_n>[0-9]+)?
|
||||
)?
|
||||
(?P<post> # post release
|
||||
(?:-(?P<post_n1>[0-9]+))
|
||||
|
|
||||
(?:
|
||||
[-_\.]?
|
||||
(?P<post_l>post|rev|r)
|
||||
[-_\.]?
|
||||
(?P<post_n2>[0-9]+)?
|
||||
)
|
||||
)?
|
||||
(?P<dev> # dev release
|
||||
[-_\.]?
|
||||
(?P<dev_l>dev)
|
||||
[-_\.]?
|
||||
(?P<dev_n>[0-9]+)?
|
||||
)?
|
||||
)
|
||||
(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
|
||||
"""
|
||||
_local_version_separators = re.compile(r"[\._-]")
|
||||
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
|
||||
|
||||
@classmethod
|
||||
def compare_versions(cls, version_a, op, version_b, ignore_sub_versions=True):
|
||||
"""
|
||||
Compare two versions based on the op operator
|
||||
returns bool(version_a op version_b)
|
||||
Notice: Ignores a/b/rc/post/dev markers on the version
|
||||
|
||||
:param str version_a:
|
||||
:param str op: '==', '===', '>', '>=', '<=', '<', '~='
|
||||
:param str version_b:
|
||||
:param bool ignore_sub_versions: if true compare only major.minor.patch
|
||||
(ignore a/b/rc/post/dev in the comparison)
|
||||
:return bool: version_a op version_b
|
||||
"""
|
||||
|
||||
if not version_b:
|
||||
return True
|
||||
num_parts = 3
|
||||
|
||||
if op == '~=':
|
||||
num_parts = max(num_parts, 2)
|
||||
op = '=='
|
||||
ignore_sub_versions = True
|
||||
elif op == '===':
|
||||
op = '=='
|
||||
|
||||
try:
|
||||
version_a_key = cls._get_match_key(cls._regex.search(version_a), num_parts, ignore_sub_versions)
|
||||
version_b_key = cls._get_match_key(cls._regex.search(version_b), num_parts, ignore_sub_versions)
|
||||
except:
|
||||
# revert to string based
|
||||
for v in cls._sub_versions_pep440:
|
||||
version_a = version_a.replace(v, '.')
|
||||
version_b = version_b.replace(v, '.')
|
||||
|
||||
version_a = (version_a.strip('.').split('.') + ['0'] * num_parts)[:num_parts]
|
||||
version_b = (version_b.strip('.').split('.') + ['0'] * num_parts)[:num_parts]
|
||||
version_a_key = ''
|
||||
version_b_key = ''
|
||||
for i in range(num_parts):
|
||||
pad = '{:0>%d}.' % max([9, 1 + len(version_a[i]), 1 + len(version_b[i])])
|
||||
version_a_key += pad.format(version_a[i])
|
||||
version_b_key += pad.format(version_b[i])
|
||||
|
||||
if op == '==':
|
||||
return version_a_key == version_b_key
|
||||
if op == '<=':
|
||||
return version_a_key <= version_b_key
|
||||
if op == '>=':
|
||||
return version_a_key >= version_b_key
|
||||
if op == '>':
|
||||
return version_a_key > version_b_key
|
||||
if op == '<':
|
||||
return version_a_key < version_b_key
|
||||
raise ValueError('Unrecognized comparison operator [{}]'.format(op))
|
||||
|
||||
@staticmethod
|
||||
def _parse_letter_version(
|
||||
letter, # type: str
|
||||
number, # type: Union[str, bytes, SupportsInt]
|
||||
):
|
||||
# type: (...) -> Optional[Tuple[str, int]]
|
||||
|
||||
if letter:
|
||||
# We consider there to be an implicit 0 in a pre-release if there is
|
||||
# not a numeral associated with it.
|
||||
if number is None:
|
||||
number = 0
|
||||
|
||||
# We normalize any letters to their lower case form
|
||||
letter = letter.lower()
|
||||
|
||||
# We consider some words to be alternate spellings of other words and
|
||||
# in those cases we want to normalize the spellings to our preferred
|
||||
# spelling.
|
||||
if letter == "alpha":
|
||||
letter = "a"
|
||||
elif letter == "beta":
|
||||
letter = "b"
|
||||
elif letter in ["c", "pre", "preview"]:
|
||||
letter = "rc"
|
||||
elif letter in ["rev", "r"]:
|
||||
letter = "post"
|
||||
|
||||
return letter, int(number)
|
||||
if not letter and number:
|
||||
# We assume if we are given a number, but we are not given a letter
|
||||
# then this is using the implicit post release syntax (e.g. 1.0-1)
|
||||
letter = "post"
|
||||
|
||||
return letter, int(number)
|
||||
|
||||
return ()
|
||||
|
||||
@staticmethod
|
||||
def _get_match_key(match, num_parts, ignore_sub_versions):
|
||||
if ignore_sub_versions:
|
||||
return (0, tuple(int(i) for i in match.group("release").split(".")[:num_parts]),
|
||||
(), (), (), (),)
|
||||
return (
|
||||
int(match.group("epoch")) if match.group("epoch") else 0,
|
||||
tuple(int(i) for i in match.group("release").split(".")[:num_parts]),
|
||||
SimpleVersion._parse_letter_version(match.group("pre_l"), match.group("pre_n")),
|
||||
SimpleVersion._parse_letter_version(
|
||||
match.group("post_l"), match.group("post_n1") or match.group("post_n2")
|
||||
),
|
||||
SimpleVersion._parse_letter_version(match.group("dev_l"), match.group("dev_n")),
|
||||
SimpleVersion._parse_local_version(match.group("local")),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_local_version(local):
|
||||
# type: (str) -> Optional[LocalType]
|
||||
"""
|
||||
Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
|
||||
"""
|
||||
if local is not None:
|
||||
return tuple(
|
||||
part.lower() if not part.isdigit() else int(part)
|
||||
for part in SimpleVersion._local_version_separators.split(local)
|
||||
)
|
||||
return ()
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class RequirementSubstitution(object):
|
||||
@@ -126,7 +326,7 @@ class RequirementSubstitution(object):
|
||||
"""
|
||||
pass
|
||||
|
||||
def post_install(self):
|
||||
def post_install(self, session):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
@@ -177,13 +377,20 @@ class SimpleSubstitution(RequirementSubstitution):
|
||||
|
||||
if req.specs:
|
||||
_, version_number = req.specs[0]
|
||||
assert semantic_version.Version(version_number, partial=True)
|
||||
# assert packaging_version.parse(version_number)
|
||||
else:
|
||||
version_number = self.get_pip_version(self.name)
|
||||
|
||||
req.specs = [('==', version_number + self.suffix)]
|
||||
return Text(req)
|
||||
|
||||
def replace_back(self, list_of_requirements): # type: (Dict) -> Dict
|
||||
"""
|
||||
:param list_of_requirements: {'pip': ['a==1.0', ]}
|
||||
:return: {'pip': ['a==1.0', ]}
|
||||
"""
|
||||
return list_of_requirements
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class CudaSensitiveSubstitution(SimpleSubstitution):
|
||||
@@ -235,15 +442,17 @@ class RequirementsManager(object):
|
||||
return None
|
||||
|
||||
def replace(self, requirements): # type: (Text) -> Text
|
||||
def safe_parse(req_str):
|
||||
try:
|
||||
return next(parse(req_str))
|
||||
except Exception as ex:
|
||||
return Requirement(req_str)
|
||||
|
||||
parsed_requirements = tuple(
|
||||
map(
|
||||
MarkerRequirement,
|
||||
filter(
|
||||
None,
|
||||
parse(requirements)
|
||||
if isinstance(requirements, six.text_type)
|
||||
else (next(parse(line), None) for line in requirements)
|
||||
)
|
||||
[safe_parse(line) for line in (requirements.splitlines()
|
||||
if isinstance(requirements, six.text_type) else requirements)]
|
||||
)
|
||||
)
|
||||
if not parsed_requirements:
|
||||
@@ -258,7 +467,7 @@ class RequirementsManager(object):
|
||||
warning('could not resolve python wheel replacement for {}'.format(req))
|
||||
raise
|
||||
except Exception:
|
||||
warning('could not resolve python wheel replacement for {}, '
|
||||
warning('could not resolve python wheel replacement for \"{}\", '
|
||||
'using original requirements line: {}'.format(req, i))
|
||||
return None
|
||||
|
||||
@@ -273,12 +482,21 @@ class RequirementsManager(object):
|
||||
result = map(self.translator.translate, result)
|
||||
return join_lines(result)
|
||||
|
||||
def post_install(self):
|
||||
def post_install(self, session):
|
||||
for h in self.handlers:
|
||||
try:
|
||||
h.post_install()
|
||||
h.post_install(session)
|
||||
except Exception as ex:
|
||||
print('RequirementsManager handler {} raised exception: {}'.format(h, ex))
|
||||
raise
|
||||
|
||||
def replace_back(self, requirements):
|
||||
for h in self.handlers:
|
||||
try:
|
||||
requirements = h.replace_back(requirements)
|
||||
except Exception:
|
||||
pass
|
||||
return requirements
|
||||
|
||||
@staticmethod
|
||||
def get_cuda_version(config): # type: (ConfigTree) -> (Text, Text)
|
||||
|
||||
@@ -22,7 +22,7 @@ class RequirementsTranslator(object):
|
||||
self.enabled = config["agent.pip_download_cache.enabled"]
|
||||
Path(self.cache_dir).mkdir(parents=True, exist_ok=True)
|
||||
self.config = Config()
|
||||
self.pip = SystemPip(interpreter=interpreter)
|
||||
self.pip = SystemPip(interpreter=interpreter, session=self._session)
|
||||
|
||||
def download(self, url):
|
||||
self.pip.download_package(url, cache_dir=self.cache_dir)
|
||||
|
||||
@@ -83,7 +83,15 @@ def shutdown_docker_process(docker_cmd_contains=None, docker_id=None):
|
||||
pass
|
||||
|
||||
|
||||
def commit_docker(container_name, docker_cmd_contains=None, docker_id=None):
|
||||
def commit_docker(container_name, docker_cmd_contains=None, docker_id=None, apply_change=None):
|
||||
"""
|
||||
Commit a docker into a new image
|
||||
:param str container_name: Name for the new image
|
||||
:param docker_cmd_contains: partial container id to be committed
|
||||
:param str docker_id: Id of container to be comitted
|
||||
:param str apply_change: apply Dockerfile instructions to the image that is created
|
||||
(see docker commit documentation for '--change').
|
||||
"""
|
||||
try:
|
||||
if not docker_id:
|
||||
docker_id = get_docker_id(docker_cmd_contains=docker_cmd_contains)
|
||||
@@ -93,7 +101,8 @@ def commit_docker(container_name, docker_cmd_contains=None, docker_id=None):
|
||||
|
||||
if docker_id:
|
||||
# we found our docker, stop it
|
||||
output = get_bash_output(cmd='docker commit {} {}'.format(docker_id, container_name))
|
||||
apply_change = '--change=\'{}\''.format(apply_change) if apply_change else ''
|
||||
output = get_bash_output(cmd='docker commit {} {} {}'.format(apply_change, docker_id, container_name))
|
||||
return output
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -12,6 +12,8 @@ from furl import furl
|
||||
from pathlib2 import Path
|
||||
|
||||
import six
|
||||
|
||||
from trains_agent.definitions import ENV_AGENT_GIT_USER, ENV_AGENT_GIT_PASS
|
||||
from trains_agent.helper.console import ensure_text, ensure_binary
|
||||
from trains_agent.errors import CommandFailedError
|
||||
from trains_agent.helper.base import (
|
||||
@@ -42,7 +44,9 @@ class VcsFactory(object):
|
||||
:param location: (desired) clone location
|
||||
"""
|
||||
url = execution_info.repository
|
||||
is_git = url.endswith(cls.GIT_SUFFIX)
|
||||
# We only support git, hg is deprecated
|
||||
is_git = True
|
||||
# is_git = url.endswith(cls.GIT_SUFFIX)
|
||||
vcs_cls = Git if is_git else Hg
|
||||
revision = (
|
||||
execution_info.version_num
|
||||
@@ -93,7 +97,7 @@ class VCS(object):
|
||||
:param session: program session
|
||||
:param url: repository url
|
||||
:param location: (desired) clone location
|
||||
:param: desired clone revision
|
||||
:param revision: desired clone revision
|
||||
"""
|
||||
self.session = session
|
||||
self.log = self.session.get_logger(
|
||||
@@ -204,7 +208,7 @@ class VCS(object):
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def resolve_ssh_url(cls, url):
|
||||
def replace_ssh_url(cls, url):
|
||||
# type: (Text) -> Text
|
||||
"""
|
||||
Replace SSH URL with HTTPS URL when applicable
|
||||
@@ -238,18 +242,46 @@ class VCS(object):
|
||||
).url
|
||||
return url
|
||||
|
||||
@classmethod
|
||||
def replace_http_url(cls, url):
|
||||
# type: (Text) -> Text
|
||||
"""
|
||||
Replace HTTPS URL with SSH URL when applicable
|
||||
"""
|
||||
parsed_url = furl(url)
|
||||
if parsed_url.scheme == "https":
|
||||
parsed_url.scheme = "ssh"
|
||||
parsed_url.username = "git"
|
||||
parsed_url.password = None
|
||||
# make sure there is no port in the final url (safe_furl support)
|
||||
parsed_url.port = None
|
||||
url = parsed_url.url
|
||||
return url
|
||||
|
||||
def _set_ssh_url(self):
|
||||
"""
|
||||
Replace instance URL with SSH substitution result and report to log.
|
||||
According to ``man ssh-add``, ``SSH_AUTH_SOCK`` must be set in order for ``ssh-add`` to work.
|
||||
"""
|
||||
if self.session.config.get('agent.force_git_ssh_protocol', None) and self.url:
|
||||
parsed_url = furl(self.url)
|
||||
if parsed_url.scheme == "https":
|
||||
new_url = self.replace_http_url(self.url)
|
||||
if new_url != self.url:
|
||||
print("Using SSH credentials - replacing https url '{}' with ssh url '{}'".format(
|
||||
self.url, new_url))
|
||||
self.url = new_url
|
||||
return
|
||||
|
||||
if not self.session.config.agent.translate_ssh:
|
||||
return
|
||||
|
||||
ssh_agent_variable = "SSH_AUTH_SOCK"
|
||||
if not getenv(ssh_agent_variable) and (self.session.config.get('agent.git_user', None) and
|
||||
self.session.config.get('agent.git_pass', None)):
|
||||
new_url = self.resolve_ssh_url(self.url)
|
||||
if not getenv(ssh_agent_variable) and (
|
||||
(ENV_AGENT_GIT_USER.get() or self.session.config.get('agent.git_user', None)) and
|
||||
(ENV_AGENT_GIT_PASS.get() or self.session.config.get('agent.git_pass', None))
|
||||
):
|
||||
new_url = self.replace_ssh_url(self.url)
|
||||
if new_url != self.url:
|
||||
print("Using user/pass credentials - replacing ssh url '{}' with https url '{}'".format(
|
||||
self.url, new_url))
|
||||
@@ -390,11 +422,14 @@ class VCS(object):
|
||||
Add username and password to URL if missing from URL and present in config.
|
||||
Does not modify ssh URLs.
|
||||
"""
|
||||
parsed_url = furl(url)
|
||||
try:
|
||||
parsed_url = furl(url)
|
||||
except ValueError:
|
||||
return url
|
||||
if parsed_url.scheme in ["", "ssh"] or parsed_url.scheme.startswith("git"):
|
||||
return parsed_url.url
|
||||
config_user = config.get("agent.{}_user".format(cls.executable_name), None)
|
||||
config_pass = config.get("agent.{}_pass".format(cls.executable_name), None)
|
||||
config_user = ENV_AGENT_GIT_USER.get() or config.get("agent.{}_user".format(cls.executable_name), None)
|
||||
config_pass = ENV_AGENT_GIT_PASS.get() or config.get("agent.{}_pass".format(cls.executable_name), None)
|
||||
if (
|
||||
(not (parsed_url.username and parsed_url.password))
|
||||
and config_user
|
||||
@@ -454,7 +489,17 @@ class Git(VCS):
|
||||
)
|
||||
|
||||
def pull(self):
|
||||
self.call("fetch", "--all", cwd=self.location)
|
||||
self.call("fetch", "--all", "--recurse-submodules", cwd=self.location)
|
||||
|
||||
def checkout(self): # type: () -> None
|
||||
"""
|
||||
Checkout repository at specified revision
|
||||
"""
|
||||
self.call("checkout", self.revision, *self.checkout_flags, cwd=self.location)
|
||||
try:
|
||||
self.call("submodule", "update", "--recursive", cwd=self.location)
|
||||
except:
|
||||
pass
|
||||
|
||||
info_commands = dict(
|
||||
url=Argv(executable_name, "ls-remote", "--get-url", "origin"),
|
||||
@@ -517,11 +562,16 @@ def clone_repository_cached(session, execution, destination):
|
||||
|
||||
clone_folder_name = Path(str(furl(repo_url).path)).name # type: str
|
||||
clone_folder = Path(destination) / clone_folder_name
|
||||
cached_repo_path = (
|
||||
Path(session.config["agent.vcs_cache.path"]).expanduser()
|
||||
/ "{}.{}".format(clone_folder_name, md5(ensure_binary(repo_url)).hexdigest())
|
||||
/ clone_folder_name
|
||||
) # type: Path
|
||||
|
||||
standalone_mode = session.config.get("agent.standalone_mode", False)
|
||||
if standalone_mode:
|
||||
cached_repo_path = clone_folder
|
||||
else:
|
||||
cached_repo_path = (
|
||||
Path(session.config["agent.vcs_cache.path"]).expanduser()
|
||||
/ "{}.{}".format(clone_folder_name, md5(ensure_binary(repo_url)).hexdigest())
|
||||
/ clone_folder_name
|
||||
) # type: Path
|
||||
|
||||
vcs = VcsFactory.create(
|
||||
session, execution_info=execution, location=cached_repo_path
|
||||
@@ -529,23 +579,25 @@ def clone_repository_cached(session, execution, destination):
|
||||
if not find_executable(vcs.executable_name):
|
||||
raise CommandFailedError(vcs.executable_not_found_error_help())
|
||||
|
||||
if session.config["agent.vcs_cache.enabled"] and cached_repo_path.exists():
|
||||
print('Using cached repository in "{}"'.format(cached_repo_path))
|
||||
else:
|
||||
print("cloning: {}".format(no_password_url))
|
||||
rm_tree(cached_repo_path)
|
||||
# We clone the entire repository, not a specific branch
|
||||
vcs.clone() # branch=execution.branch)
|
||||
if not standalone_mode:
|
||||
if session.config["agent.vcs_cache.enabled"] and cached_repo_path.exists():
|
||||
print('Using cached repository in "{}"'.format(cached_repo_path))
|
||||
|
||||
vcs.pull()
|
||||
rm_tree(destination)
|
||||
shutil.copytree(Text(cached_repo_path), Text(clone_folder))
|
||||
if not clone_folder.is_dir():
|
||||
raise CommandFailedError(
|
||||
"copying of repository failed: from {} to {}".format(
|
||||
cached_repo_path, clone_folder
|
||||
else:
|
||||
print("cloning: {}".format(no_password_url))
|
||||
rm_tree(cached_repo_path)
|
||||
# We clone the entire repository, not a specific branch
|
||||
vcs.clone() # branch=execution.branch)
|
||||
|
||||
vcs.pull()
|
||||
rm_tree(destination)
|
||||
shutil.copytree(Text(cached_repo_path), Text(clone_folder))
|
||||
if not clone_folder.is_dir():
|
||||
raise CommandFailedError(
|
||||
"copying of repository failed: from {} to {}".format(
|
||||
cached_repo_path, clone_folder
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# checkout in the newly copy destination
|
||||
vcs.location = Text(clone_folder)
|
||||
|
||||
@@ -75,9 +75,15 @@ class ResourceMonitor(object):
|
||||
self._exit_event = Event()
|
||||
self._gpustat_fail = 0
|
||||
self._gpustat = gpustat
|
||||
if not self._gpustat:
|
||||
self._active_gpus = None
|
||||
if os.environ.get('NVIDIA_VISIBLE_DEVICES') == 'none':
|
||||
# NVIDIA_VISIBLE_DEVICES set to none, marks cpu_only flag
|
||||
# active_gpus == False means no GPU reporting
|
||||
self._active_gpus = False
|
||||
elif not self._gpustat:
|
||||
log.warning('Trains-Agent Resource Monitor: GPU monitoring is not available')
|
||||
else:
|
||||
# None means no filtering, report all gpus
|
||||
self._active_gpus = None
|
||||
try:
|
||||
active_gpus = os.environ.get('NVIDIA_VISIBLE_DEVICES', '') or \
|
||||
@@ -244,8 +250,8 @@ class ResourceMonitor(object):
|
||||
stats["io_read_mbs"] = BytesSizes.megabytes(io_stats.read_bytes)
|
||||
stats["io_write_mbs"] = BytesSizes.megabytes(io_stats.write_bytes)
|
||||
|
||||
# check if we can access the gpu statistics
|
||||
if self._gpustat:
|
||||
# check if we need to monitor gpus and if we can access the gpu statistics
|
||||
if self._active_gpus is not False and self._gpustat:
|
||||
try:
|
||||
gpu_stat = self._gpustat.new_query()
|
||||
for i, g in enumerate(gpu_stat.gpus):
|
||||
|
||||
@@ -4,11 +4,12 @@ from time import sleep
|
||||
from glob import glob
|
||||
from tempfile import gettempdir, NamedTemporaryFile
|
||||
|
||||
from trains_agent.definitions import ENV_DOCKER_HOST_MOUNT
|
||||
from trains_agent.helper.base import warning
|
||||
|
||||
|
||||
class Singleton(object):
|
||||
prefix = 'trainsagent'
|
||||
prefix = '.trainsagent'
|
||||
sep = '_'
|
||||
ext = '.tmp'
|
||||
worker_id = None
|
||||
@@ -17,9 +18,27 @@ class Singleton(object):
|
||||
_pid_file = None
|
||||
_lock_file_name = sep+prefix+sep+'global.lock'
|
||||
_lock_timeout = 10
|
||||
_pid = None
|
||||
|
||||
@classmethod
|
||||
def register_instance(cls, unique_worker_id=None, worker_name=None):
|
||||
def update_pid_file(cls):
|
||||
new_pid = str(os.getpid())
|
||||
if not cls._pid_file or cls._pid == new_pid:
|
||||
return
|
||||
old_name = cls._pid_file.name
|
||||
parts = cls._pid_file.name.split(os.path.sep)
|
||||
parts[-1] = parts[-1].replace(cls.sep + cls._pid + cls.sep, cls.sep + new_pid + cls.sep)
|
||||
new_pid_file = os.path.sep.join(parts)
|
||||
cls._pid = new_pid
|
||||
cls._pid_file.name = new_pid_file
|
||||
# we need to rename to match new pid
|
||||
try:
|
||||
os.rename(old_name, new_pid_file)
|
||||
except:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def register_instance(cls, unique_worker_id=None, worker_name=None, api_client=None, allow_double=False):
|
||||
"""
|
||||
# Exit the process if another instance of us is using the same worker_id
|
||||
|
||||
@@ -28,7 +47,7 @@ class Singleton(object):
|
||||
:return: (str worker_id, int slot_number) Return None value on instance already running
|
||||
"""
|
||||
# try to lock file
|
||||
lock_file = os.path.join(gettempdir(), cls._lock_file_name)
|
||||
lock_file = os.path.join(cls._get_temp_folder(), cls._lock_file_name)
|
||||
timeout = 0
|
||||
while os.path.exists(lock_file):
|
||||
if timeout > cls._lock_timeout:
|
||||
@@ -46,7 +65,9 @@ class Singleton(object):
|
||||
f.write(bytes(os.getpid()))
|
||||
f.flush()
|
||||
try:
|
||||
ret = cls._register_instance(unique_worker_id=unique_worker_id, worker_name=worker_name)
|
||||
ret = cls._register_instance(
|
||||
unique_worker_id=unique_worker_id, worker_name=worker_name,
|
||||
api_client=api_client, allow_double=allow_double)
|
||||
except:
|
||||
ret = None, None
|
||||
|
||||
@@ -58,12 +79,12 @@ class Singleton(object):
|
||||
return ret
|
||||
|
||||
@classmethod
|
||||
def _register_instance(cls, unique_worker_id=None, worker_name=None):
|
||||
def _register_instance(cls, unique_worker_id=None, worker_name=None, api_client=None, allow_double=False):
|
||||
if cls.worker_id:
|
||||
return cls.worker_id, cls.instance_slot
|
||||
# make sure we have a unique name
|
||||
instance_num = 0
|
||||
temp_folder = gettempdir()
|
||||
temp_folder = cls._get_temp_folder()
|
||||
files = glob(os.path.join(temp_folder, cls.prefix + cls.sep + '*' + cls.ext))
|
||||
slots = {}
|
||||
for file in files:
|
||||
@@ -73,8 +94,24 @@ class Singleton(object):
|
||||
except Exception:
|
||||
# something is wrong, use non existing pid and delete the file
|
||||
pid = -1
|
||||
|
||||
uid, slot = None, None
|
||||
try:
|
||||
with open(file, 'r') as f:
|
||||
uid, slot = str(f.read()).split('\n')
|
||||
slot = int(slot)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
worker = None
|
||||
if api_client and ENV_DOCKER_HOST_MOUNT.get() and uid:
|
||||
try:
|
||||
worker = [w for w in api_client.workers.get_all() if w.id == uid]
|
||||
except Exception:
|
||||
worker = None
|
||||
|
||||
# count active instances and delete dead files
|
||||
if not psutil.pid_exists(pid):
|
||||
if not worker and not psutil.pid_exists(pid):
|
||||
# delete the file
|
||||
try:
|
||||
os.remove(os.path.join(file))
|
||||
@@ -83,15 +120,15 @@ class Singleton(object):
|
||||
continue
|
||||
|
||||
instance_num += 1
|
||||
try:
|
||||
with open(file, 'r') as f:
|
||||
uid, slot = str(f.read()).split('\n')
|
||||
slot = int(slot)
|
||||
except Exception:
|
||||
if slot is None:
|
||||
continue
|
||||
|
||||
if uid == unique_worker_id:
|
||||
return None, None
|
||||
if allow_double:
|
||||
warning('Instance with the same WORKER_ID [{}] was found on this machine. '
|
||||
'We are ignoring it, make sure this not a mistake.'.format(unique_worker_id))
|
||||
else:
|
||||
return None, None
|
||||
|
||||
slots[slot] = uid
|
||||
|
||||
@@ -110,10 +147,21 @@ class Singleton(object):
|
||||
unique_worker_id = worker_name + cls.worker_name_sep + str(cls.instance_slot)
|
||||
|
||||
# create lock
|
||||
cls._pid_file = NamedTemporaryFile(dir=gettempdir(), prefix=cls.prefix + cls.sep + str(os.getpid()) + cls.sep,
|
||||
suffix=cls.ext)
|
||||
cls._pid = str(os.getpid())
|
||||
cls._pid_file = NamedTemporaryFile(
|
||||
dir=cls._get_temp_folder(), prefix=cls.prefix + cls.sep + cls._pid + cls.sep, suffix=cls.ext)
|
||||
cls._pid_file.write(('{}\n{}'.format(unique_worker_id, cls.instance_slot)).encode())
|
||||
cls._pid_file.flush()
|
||||
cls.worker_id = unique_worker_id
|
||||
|
||||
return cls.worker_id, cls.instance_slot
|
||||
|
||||
@classmethod
|
||||
def _get_temp_folder(cls):
|
||||
if ENV_DOCKER_HOST_MOUNT.get():
|
||||
return ENV_DOCKER_HOST_MOUNT.get().split(':')[-1]
|
||||
return gettempdir()
|
||||
|
||||
@classmethod
|
||||
def get_slot(cls):
|
||||
return cls.instance_slot or 0
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import itertools
|
||||
from functools import partial
|
||||
from importlib import import_module
|
||||
import argparse
|
||||
@@ -24,8 +25,17 @@ def get_parser():
|
||||
from .worker import COMMANDS
|
||||
subparsers = top_parser.add_subparsers(dest='command')
|
||||
for c in COMMANDS:
|
||||
parser = subparsers.add_parser(name=c, help=COMMANDS[c]['help'])
|
||||
for a in COMMANDS[c].get('args', {}).keys():
|
||||
parser.add_argument(a, **COMMANDS[c]['args'][a])
|
||||
parser = subparsers.add_parser(name=c, help=COMMANDS[c]["help"])
|
||||
groups = itertools.groupby(
|
||||
sorted(
|
||||
COMMANDS[c].get("args", {}).items(), key=lambda x: x[1].get("group", "")
|
||||
),
|
||||
key=lambda x: x[1].pop("group", ""),
|
||||
)
|
||||
for group_name, group in groups:
|
||||
p = parser if not group_name else parser.add_argument_group(group_name)
|
||||
for key, value in group:
|
||||
aliases = value.pop("aliases", [])
|
||||
p.add_argument(key, *aliases, **value)
|
||||
|
||||
return top_parser
|
||||
|
||||
@@ -30,6 +30,17 @@ WORKER_ARGS = {
|
||||
'type': lambda x: x.upper(),
|
||||
'default': 'INFO',
|
||||
},
|
||||
'--gpus': {
|
||||
'help': 'Specify active GPUs for the daemon to use (docker / virtual environment), '
|
||||
'Equivalent to setting NVIDIA_VISIBLE_DEVICES '
|
||||
'Examples: --gpus 0 or --gpu 0,1,2 or --gpus all',
|
||||
'group': 'Docker support',
|
||||
},
|
||||
'--cpu-only': {
|
||||
'help': 'Disable GPU access for the daemon, only use CPU in either docker or virtual environment',
|
||||
'action': 'store_true',
|
||||
'group': 'Docker support',
|
||||
},
|
||||
}
|
||||
|
||||
DAEMON_ARGS = dict({
|
||||
@@ -37,21 +48,18 @@ DAEMON_ARGS = dict({
|
||||
'help': 'Pipe full log to stdout/stderr, should not be used if running in background',
|
||||
'action': 'store_true',
|
||||
},
|
||||
'--gpus': {
|
||||
'help': 'Specify active GPUs for the daemon to use (docker / virtual environment), '
|
||||
'Equivalent to setting NVIDIA_VISIBLE_DEVICES '
|
||||
'Examples: --gpus 0 or --gpu 0,1,2 or --gpus all',
|
||||
},
|
||||
'--cpu-only': {
|
||||
'help': 'Disable GPU access for the daemon, only use CPU in either docker or virtual environment',
|
||||
'action': 'store_true',
|
||||
},
|
||||
'--docker': {
|
||||
'help': 'Run execution task inside a docker (v19.03 and above). Optional args <image> <arguments> or '
|
||||
'specify default docker image in agent.default_docker.image / agent.default_docker.arguments'
|
||||
'use --gpus/--cpu-only (or set NVIDIA_VISIBLE_DEVICES) to limit gpu visibility for docker',
|
||||
'nargs': '*',
|
||||
'default': False,
|
||||
'group': 'Docker support',
|
||||
},
|
||||
'--force-current-version': {
|
||||
'help': 'Force trains-agent to use the current trains-agent version when running in the docker',
|
||||
'action': 'store_true',
|
||||
'group': 'Docker support',
|
||||
},
|
||||
'--queue': {
|
||||
'help': 'Queue ID(s)/Name(s) to pull tasks from (\'default\' queue)',
|
||||
@@ -64,6 +72,19 @@ DAEMON_ARGS = dict({
|
||||
'help': 'Do not use any network connects, assume everything is pre-installed',
|
||||
'action': 'store_true',
|
||||
},
|
||||
'--services-mode': {
|
||||
'help': 'Launch multiple long-term docker services. Implies docker & cpu-only flags.',
|
||||
'action': 'store_true',
|
||||
},
|
||||
'--create-queue': {
|
||||
'help': 'Create requested queue if it does not exist already.',
|
||||
'action': 'store_true',
|
||||
},
|
||||
'--detached': {
|
||||
'help': 'Detached mode, run agent in the background',
|
||||
'action': 'store_true',
|
||||
'aliases': ['-d'],
|
||||
},
|
||||
|
||||
}, **WORKER_ARGS)
|
||||
|
||||
@@ -97,6 +118,17 @@ COMMANDS = {
|
||||
'help': 'Do not use any network connects, assume everything is pre-installed',
|
||||
'action': 'store_true',
|
||||
},
|
||||
'--docker': {
|
||||
'help': 'Run execution task inside a docker (v19.03 and above). Optional args <image> <arguments> or '
|
||||
'specify default docker image in agent.default_docker.image / agent.default_docker.arguments'
|
||||
'use --gpus/--cpu-only (or set NVIDIA_VISIBLE_DEVICES) to limit gpu visibility for docker',
|
||||
'nargs': '*',
|
||||
'default': False,
|
||||
},
|
||||
'--clone': {
|
||||
'help': 'Clone the experiment before execution, and execute the cloned experiment',
|
||||
'action': 'store_true',
|
||||
},
|
||||
}, **WORKER_ARGS),
|
||||
},
|
||||
'build': {
|
||||
@@ -114,6 +146,12 @@ COMMANDS = {
|
||||
'help': 'Where to build the task\'s virtual environment and source code. '
|
||||
'When used with --docker, target docker image name to create',
|
||||
},
|
||||
'--install-globally': {
|
||||
'help': 'Install required python packages before creating the virtual environment used to execute an '
|
||||
'experiment, and use the \'agent.package_manager.system_site_packages\' virtual env option. '
|
||||
'Note: when --docker is used, install-globally is always true',
|
||||
'action': 'store_true',
|
||||
},
|
||||
'--docker': {
|
||||
'help': 'Build the experiment inside a docker (v19.03 and above). Optional args <image> <arguments> or '
|
||||
'specify default docker image in agent.default_docker.image / agent.default_docker.arguments'
|
||||
@@ -121,18 +159,15 @@ COMMANDS = {
|
||||
'nargs': '*',
|
||||
'default': False,
|
||||
},
|
||||
'--gpus': {
|
||||
'help': 'Specify active GPUs for the docker to use'
|
||||
'Equivalent to setting NVIDIA_VISIBLE_DEVICES '
|
||||
'Examples: --gpus 0 or --gpu 0,1,2 or --gpus all',
|
||||
},
|
||||
'--cpu-only': {
|
||||
'help': 'Disable GPU access (cpu only) for the docker',
|
||||
'action': 'store_true',
|
||||
},
|
||||
'--python-version': {
|
||||
'help': 'Virtual environment python version to use',
|
||||
},
|
||||
'--entry-point': {
|
||||
'help': 'Run the task in the new docker. There are two options:\nEither add "reuse_task" to run the '
|
||||
'given task in the docker, or "clone_task" to first clone the given task and then run it in the docker',
|
||||
'default': False,
|
||||
'choices': ['reuse_task', 'clone_task'],
|
||||
}
|
||||
}, **WORKER_ARGS),
|
||||
},
|
||||
'list': {
|
||||
|
||||
@@ -15,7 +15,7 @@ from pyhocon import ConfigFactory, HOCONConverter, ConfigTree
|
||||
from trains_agent.backend_api.session import Session as _Session, Request
|
||||
from trains_agent.backend_api.session.client import APIClient
|
||||
from trains_agent.backend_config.defs import LOCAL_CONFIG_FILE_OVERRIDE_VAR, LOCAL_CONFIG_FILES
|
||||
from trains_agent.definitions import ENVIRONMENT_CONFIG
|
||||
from trains_agent.definitions import ENVIRONMENT_CONFIG, ENV_TASK_EXECUTE_AS_USER, ENVIRONMENT_BACKWARD_COMPATIBLE
|
||||
from trains_agent.errors import APIError
|
||||
from trains_agent.helper.base import HOCONEncoder
|
||||
from trains_agent.helper.process import Argv
|
||||
@@ -63,6 +63,7 @@ def tree(*args):
|
||||
|
||||
class Session(_Session):
|
||||
version = __version__
|
||||
force_debug = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
# make sure we set the environment variable so the parent session opens the correct file
|
||||
@@ -75,18 +76,29 @@ class Session(_Session):
|
||||
cpu_only = kwargs.get('cpu_only')
|
||||
if cpu_only:
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = os.environ['NVIDIA_VISIBLE_DEVICES'] = 'none'
|
||||
if kwargs.get('gpus'):
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = os.environ['NVIDIA_VISIBLE_DEVICES'] = kwargs.get('gpus')
|
||||
if kwargs.get('gpus') and not os.environ.get('KUBERNETES_SERVICE_HOST') \
|
||||
and not os.environ.get('KUBERNETES_PORT'):
|
||||
# CUDA_VISIBLE_DEVICES does not support 'all'
|
||||
if kwargs.get('gpus') == 'all':
|
||||
os.environ.pop('CUDA_VISIBLE_DEVICES', None)
|
||||
os.environ['NVIDIA_VISIBLE_DEVICES'] = kwargs.get('gpus')
|
||||
else:
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = os.environ['NVIDIA_VISIBLE_DEVICES'] = kwargs.get('gpus')
|
||||
if kwargs.get('only_load_config'):
|
||||
from trains_agent.backend_api.config import load
|
||||
self.config = load()
|
||||
else:
|
||||
super(Session, self).__init__(*args, **kwargs)
|
||||
|
||||
# set force debug mode, if it's on:
|
||||
if Session.force_debug:
|
||||
self.config["agent"]["debug"] = True
|
||||
|
||||
self.log = self.get_logger(__name__)
|
||||
self.trace = kwargs.get('trace', False)
|
||||
self._config_file = kwargs.get('config_file') or \
|
||||
os.environ.get(LOCAL_CONFIG_FILE_OVERRIDE_VAR) or LOCAL_CONFIG_FILES[0]
|
||||
self.api_client = APIClient(session=self, api_version="2.4")
|
||||
self.api_client = APIClient(session=self, api_version="2.5")
|
||||
# HACK make sure we have python version to execute,
|
||||
# if nothing was specific, use the one that runs us
|
||||
def_python = ConfigValue(self.config, "agent.default_python")
|
||||
@@ -94,8 +106,10 @@ class Session(_Session):
|
||||
def_python.set("{version.major}.{version.minor}".format(version=sys.version_info))
|
||||
|
||||
# HACK: backwards compatibility
|
||||
os.environ['ALG_CONFIG_FILE'] = self._config_file
|
||||
os.environ['SM_CONFIG_FILE'] = self._config_file
|
||||
if ENVIRONMENT_BACKWARD_COMPATIBLE.get():
|
||||
os.environ['ALG_CONFIG_FILE'] = self._config_file
|
||||
os.environ['SM_CONFIG_FILE'] = self._config_file
|
||||
|
||||
if not self.config.get('api.host', None) and self.config.get('api.api_server', None):
|
||||
self.config['api']['host'] = self.config.get('api.api_server')
|
||||
|
||||
@@ -111,6 +125,17 @@ class Session(_Session):
|
||||
# override with environment variables
|
||||
# cuda_version & cudnn_version are overridden with os.environ here, and normalized in the next section
|
||||
for config_key, env_config in ENVIRONMENT_CONFIG.items():
|
||||
# check if the propery is of a list:
|
||||
if config_key.endswith('.0'):
|
||||
if all(not i.get() for i in env_config.values()):
|
||||
continue
|
||||
parent = config_key.partition('.0')[0]
|
||||
if not self.config[parent]:
|
||||
self.config.put(parent, [])
|
||||
|
||||
self.config.put(parent, self.config[parent] + [ConfigTree((k, v.get()) for k, v in env_config.items())])
|
||||
continue
|
||||
|
||||
value = env_config.get()
|
||||
if not value:
|
||||
continue
|
||||
@@ -140,9 +165,16 @@ class Session(_Session):
|
||||
logger.propagate = True
|
||||
return TrainsAgentLogger(logger)
|
||||
|
||||
@staticmethod
|
||||
def set_debug_mode(enable):
|
||||
if enable:
|
||||
import logging
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
Session.force_debug = enable
|
||||
|
||||
@property
|
||||
def debug_mode(self):
|
||||
return self.config.get("agent.debug", False)
|
||||
return Session.force_debug or self.config.get("agent.debug", False)
|
||||
|
||||
@property
|
||||
def config_file(self):
|
||||
@@ -165,7 +197,11 @@ class Session(_Session):
|
||||
folder_keys = ('agent.venvs_dir', 'agent.vcs_cache.path',
|
||||
'agent.pip_download_cache.path',
|
||||
'agent.docker_pip_cache', 'agent.docker_apt_cache')
|
||||
singleton_folders = ('agent.venvs_dir', 'agent.vcs_cache.path',)
|
||||
singleton_folders = ('agent.venvs_dir', 'agent.vcs_cache.path', 'agent.docker_apt_cache')
|
||||
|
||||
if os.environ.get(ENV_TASK_EXECUTE_AS_USER):
|
||||
folder_keys = tuple(list(folder_keys) + ['sdk.storage.cache.default_base_dir'])
|
||||
singleton_folders = tuple(list(singleton_folders) + ['sdk.storage.cache.default_base_dir'])
|
||||
|
||||
for key in folder_keys:
|
||||
folder_key = ConfigValue(self.config, key)
|
||||
@@ -206,6 +242,15 @@ class Session(_Session):
|
||||
config.pop('env', None)
|
||||
if remove_secret_keys:
|
||||
recursive_remove_secrets(config, secret_keys=remove_secret_keys)
|
||||
# remove logging.loggers.urllib3.level from the print
|
||||
try:
|
||||
config['logging']['loggers']['urllib3'].pop('level', None)
|
||||
except (KeyError, TypeError, AttributeError):
|
||||
pass
|
||||
try:
|
||||
config['logging'].pop('version', None)
|
||||
except (KeyError, TypeError, AttributeError):
|
||||
pass
|
||||
config = ConfigFactory.from_dict(config)
|
||||
self.log.debug("Run by interpreter: %s", sys.executable)
|
||||
print(
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '0.12.2'
|
||||
__version__ = '0.15.1'
|
||||
|
||||
Reference in New Issue
Block a user