diff --git a/README.md b/README.md index d7be5e6..f321ee1 100644 --- a/README.md +++ b/README.md @@ -152,7 +152,7 @@ To restart the **ClearML Server**, you must first stop the containers, and then ## Upgrading -**ClearML Server** releases are also reflected in the [docker compose configuration file](https://github.com/allegroai/trains-server/blob/master/docker-compose.yml). +**ClearML Server** releases are also reflected in the [docker compose configuration file](https://github.com/allegroai/trains-server/blob/master/docker/docker-compose.yml). We strongly encourage you to keep your **ClearML Server** up to date, by keeping up with the current release. **Note**: The following upgrade instructions use the Linux OS as an example. @@ -185,7 +185,7 @@ To upgrade your existing **ClearML Server** deployment: 1. Download the latest `docker-compose.yml` file. ```bash - curl https://raw.githubusercontent.com/allegroai/trains-server/master/docker-compose.yml -o docker-compose.yml + curl https://raw.githubusercontent.com/allegroai/trains-server/master/docker/docker-compose.yml -o docker-compose.yml ``` 1. Configure the ClearML-Agent Services (not supported on Windows installation). diff --git a/docker/docker-compose-unified.yml b/docker/docker-compose-unified.yml new file mode 100644 index 0000000..a08b391 --- /dev/null +++ b/docker/docker-compose-unified.yml @@ -0,0 +1,85 @@ +version: "3.6" +services: + clearmlserver: + command: + - -c + - "echo \"#!/bin/bash\" > /opt/clearml/all.sh && echo \"/opt/clearml/wrapper.sh webserver&\" >> /opt/clearml/all.sh && echo \"/opt/clearml/wrapper.sh fileserver&\" >> /opt/clearml/all.sh && echo \"/opt/clearml/wrapper.sh apiserver\" >> /opt/clearml/all.sh && cat /opt/clearml/all.sh && chmod +x /opt/clearml/all.sh && /opt/clearml/all.sh" + entrypoint: /bin/bash + container_name: clearml-server + image: allegroai/clearml:latest + ports: + - 8008:8008 + - 8080:80 + - 8081:8081 + restart: unless-stopped + volumes: + - /opt/clearml/logs:/var/log/trains + - /opt/clearml/data/fileserver:/mnt/fileserver + - /opt/clearml/config:/opt/trains/config + + depends_on: + - redis + - mongo + - elasticsearch + environment: + TRAINS_ELASTIC_SERVICE_HOST: elasticsearch + TRAINS_ELASTIC_SERVICE_PORT: 9200 + TRAINS_MONGODB_SERVICE_HOST: mongo + TRAINS_MONGODB_SERVICE_PORT: 27017 + TRAINS_REDIS_SERVICE_HOST: redis + TRAINS_REDIS_SERVICE_PORT: 6379 + networks: + - backend + elasticsearch: + networks: + - backend + container_name: clearml-elastic + environment: + ES_JAVA_OPTS: -Xms2g -Xmx2g + bootstrap.memory_lock: "true" + cluster.name: clearml + cluster.routing.allocation.node_initial_primaries_recoveries: "500" + cluster.routing.allocation.disk.watermark.low: 10gb + cluster.routing.allocation.disk.watermark.high: 10gb + cluster.routing.allocation.disk.watermark.flood_stage: 10gb + discovery.zen.minimum_master_nodes: "1" + discovery.type: "single-node" + http.compression_level: "7" + node.ingest: "true" + node.name: clearml + reindex.remote.whitelist: '*.*' + xpack.monitoring.enabled: "false" + xpack.security.enabled: "false" + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + image: docker.elastic.co/elasticsearch/elasticsearch:7.6.2 + restart: unless-stopped + volumes: + - /opt/clearml/data/elastic_7:/usr/share/elasticsearch/data + mongo: + networks: + - backend + container_name: clearml-mongo + image: mongo:3.6.5 + restart: unless-stopped + command: --setParameter internalQueryExecMaxBlockingSortBytes=196100200 + volumes: + - /opt/clearml/data/mongo/db:/data/db + - /opt/clearml/data/mongo/configdb:/data/configdb + redis: + networks: + - backend + container_name: clearml-redis + image: redis:5.0 + restart: unless-stopped + volumes: + - /opt/clearml/data/redis:/data + +networks: + backend: + driver: bridge diff --git a/docker/docker-compose-win10.yml b/docker/docker-compose-win10.yml new file mode 100644 index 0000000..514d0c1 --- /dev/null +++ b/docker/docker-compose-win10.yml @@ -0,0 +1,116 @@ +version: "3.6" +services: + + apiserver: + command: + - apiserver + container_name: clearml-apiserver + image: allegroai/clearml:latest + restart: unless-stopped + volumes: + - c:/opt/clearml/logs:/var/log/trains + - c:/opt/clearml/config:/opt/trains/config + depends_on: + - redis + - mongo + - elasticsearch + - fileserver + environment: + TRAINS_ELASTIC_SERVICE_HOST: elasticsearch + TRAINS_ELASTIC_SERVICE_PORT: 9200 + TRAINS_MONGODB_SERVICE_HOST: mongo + TRAINS_MONGODB_SERVICE_PORT: 27017 + TRAINS_REDIS_SERVICE_HOST: redis + TRAINS_REDIS_SERVICE_PORT: 6379 + TRAINS_SERVER_DEPLOYMENT_TYPE: ${TRAINS_SERVER_DEPLOYMENT_TYPE:-win10} + TRAINS__apiserver__mongo__pre_populate__enabled: "true" + TRAINS__apiserver__mongo__pre_populate__zip_file: "/opt/clearml/db-pre-populate/export.zip" + ports: + - "8008:8008" + networks: + - backend + + elasticsearch: + networks: + - backend + container_name: clearml-elastic + environment: + ES_JAVA_OPTS: -Xms2g -Xmx2g + bootstrap.memory_lock: "true" + cluster.name: clearml + cluster.routing.allocation.node_initial_primaries_recoveries: "500" + cluster.routing.allocation.disk.watermark.low: 10gb + cluster.routing.allocation.disk.watermark.high: 10gb + cluster.routing.allocation.disk.watermark.flood_stage: 10gb + discovery.zen.minimum_master_nodes: "1" + discovery.type: "single-node" + http.compression_level: "7" + node.ingest: "true" + node.name: clearml + reindex.remote.whitelist: '*.*' + xpack.monitoring.enabled: "false" + xpack.security.enabled: "false" + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + image: docker.elastic.co/elasticsearch/elasticsearch:7.6.2 + restart: unless-stopped + volumes: + - c:/opt/clearml/data/elastic_7:/usr/share/elasticsearch/data + + fileserver: + networks: + - backend + command: + - fileserver + container_name: clearml-fileserver + image: allegroai/clearml:latest + restart: unless-stopped + volumes: + - c:/opt/clearml/logs:/var/log/trains + - c:/opt/clearml/data/fileserver:/mnt/fileserver + - c:/opt/clearml/config:/opt/trains/config + + ports: + - "8081:8081" + + mongo: + networks: + - backend + container_name: clearml-mongo + image: mongo:3.6.5 + restart: unless-stopped + command: --setParameter internalQueryExecMaxBlockingSortBytes=196100200 + volumes: + - c:/opt/clearml/data/mongo/db:/data/db + - c:/opt/clearml/data/mongo/configdb:/data/configdb + + redis: + networks: + - backend + container_name: clearml-redis + image: redis:5.0 + restart: unless-stopped + volumes: + - c:/opt/clearml/data/redis:/data + + webserver: + command: + - webserver + container_name: clearml-webserver + image: allegroai/clearml:latest + restart: unless-stopped + volumes: + - c:/clearml/logs:/var/log/trains + depends_on: + - apiserver + ports: + - "8080:80" + +networks: + backend: + driver: bridge diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..99d94d1 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,153 @@ +version: "3.6" +services: + + apiserver: + command: + - apiserver + container_name: clearml-apiserver + image: allegroai/clearml:latest + restart: unless-stopped + volumes: + - /opt/clearml/logs:/var/log/trains + - /opt/clearml/config:/opt/trains/config + - /opt/clearml/data/fileserver:/mnt/fileserver + depends_on: + - redis + - mongo + - elasticsearch + - fileserver + environment: + TRAINS_ELASTIC_SERVICE_HOST: elasticsearch + TRAINS_ELASTIC_SERVICE_PORT: 9200 + TRAINS_MONGODB_SERVICE_HOST: mongo + TRAINS_MONGODB_SERVICE_PORT: 27017 + TRAINS_REDIS_SERVICE_HOST: redis + TRAINS_REDIS_SERVICE_PORT: 6379 + TRAINS_SERVER_DEPLOYMENT_TYPE: ${TRAINS_SERVER_DEPLOYMENT_TYPE:-linux} + TRAINS__apiserver__pre_populate__enabled: "true" + TRAINS__apiserver__pre_populate__zip_files: "/opt/trains/db-pre-populate" + TRAINS__apiserver__pre_populate__artifacts_path: "/mnt/fileserver" + ports: + - "8008:8008" + networks: + - backend + - frontend + + elasticsearch: + networks: + - backend + container_name: clearml-elastic + environment: + ES_JAVA_OPTS: -Xms2g -Xmx2g + bootstrap.memory_lock: "true" + cluster.name: clearml + cluster.routing.allocation.node_initial_primaries_recoveries: "500" + cluster.routing.allocation.disk.watermark.low: 10gb + cluster.routing.allocation.disk.watermark.high: 10gb + cluster.routing.allocation.disk.watermark.flood_stage: 10gb + discovery.zen.minimum_master_nodes: "1" + discovery.type: "single-node" + http.compression_level: "7" + node.ingest: "true" + node.name: clearml + reindex.remote.whitelist: '*.*' + xpack.monitoring.enabled: "false" + xpack.security.enabled: "false" + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + image: docker.elastic.co/elasticsearch/elasticsearch:7.6.2 + restart: unless-stopped + volumes: + - /opt/clearml/data/elastic_7:/usr/share/elasticsearch/data + + fileserver: + networks: + - backend + command: + - fileserver + container_name: clearml-fileserver + image: allegroai/clearml:latest + restart: unless-stopped + volumes: + - /opt/clearml/logs:/var/log/trains + - /opt/clearml/data/fileserver:/mnt/fileserver + - /opt/clearml/config:/opt/trains/config + ports: + - "8081:8081" + + mongo: + networks: + - backend + container_name: clearml-mongo + image: mongo:3.6.5 + restart: unless-stopped + command: --setParameter internalQueryExecMaxBlockingSortBytes=196100200 + volumes: + - /opt/clearml/data/mongo/db:/data/db + - /opt/clearml/data/mongo/configdb:/data/configdb + + redis: + networks: + - backend + container_name: clearml-redis + image: redis:5.0 + restart: unless-stopped + volumes: + - /opt/clearml/data/redis:/data + + webserver: + command: + - webserver + container_name: clearml-webserver + image: allegroai/clearml:latest + restart: unless-stopped + depends_on: + - apiserver + ports: + - "8080:80" + networks: + - backend + - frontend + + agent-services: + networks: + - backend + container_name: trains-agent-services + image: allegroai/trains-agent-services:latest + restart: unless-stopped + privileged: true + environment: + TRAINS_HOST_IP: ${TRAINS_HOST_IP} + TRAINS_WEB_HOST: ${TRAINS_WEB_HOST:-} + TRAINS_API_HOST: http://apiserver:8008 + TRAINS_FILES_HOST: ${TRAINS_FILES_HOST:-} + TRAINS_API_ACCESS_KEY: ${TRAINS_API_ACCESS_KEY:-} + TRAINS_API_SECRET_KEY: ${TRAINS_API_SECRET_KEY:-} + TRAINS_AGENT_GIT_USER: ${TRAINS_AGENT_GIT_USER} + TRAINS_AGENT_GIT_PASS: ${TRAINS_AGENT_GIT_PASS} + TRAINS_AGENT_UPDATE_VERSION: ${TRAINS_AGENT_UPDATE_VERSION:->=0.15.0} + TRAINS_AGENT_DEFAULT_BASE_DOCKER: "ubuntu:18.04" + AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-} + AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-} + AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-} + AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-} + AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-} + GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-} + TRAINS_WORKER_ID: "trains-services" + TRAINS_AGENT_DOCKER_HOST_MOUNT: "/opt/trains/agent:/root/.trains" + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - /opt/clearml/agent:/root/.trains + depends_on: + - apiserver + +networks: + backend: + driver: bridge + frontend: + driver: bridge diff --git a/docker-compose-unified.yml b/docker/legacy/trains-server/docker-compose-unified.yml similarity index 93% rename from docker-compose-unified.yml rename to docker/legacy/trains-server/docker-compose-unified.yml index 7d1adbd..afeb5e8 100644 --- a/docker-compose-unified.yml +++ b/docker/legacy/trains-server/docker-compose-unified.yml @@ -39,9 +39,9 @@ services: bootstrap.memory_lock: "true" cluster.name: trains cluster.routing.allocation.node_initial_primaries_recoveries: "500" - cluster.routing.allocation.disk.watermark.low: 2gb - cluster.routing.allocation.disk.watermark.high: 2gb - cluster.routing.allocation.disk.watermark.flood_stage: 2gb + cluster.routing.allocation.disk.watermark.low: 10gb + cluster.routing.allocation.disk.watermark.high: 10gb + cluster.routing.allocation.disk.watermark.flood_stage: 10gb discovery.zen.minimum_master_nodes: "1" discovery.type: "single-node" http.compression_level: "7" diff --git a/docker-compose-win10.yml b/docker/legacy/trains-server/docker-compose-win10.yml similarity index 94% rename from docker-compose-win10.yml rename to docker/legacy/trains-server/docker-compose-win10.yml index 2cda252..b44ddd3 100644 --- a/docker-compose-win10.yml +++ b/docker/legacy/trains-server/docker-compose-win10.yml @@ -39,9 +39,9 @@ services: bootstrap.memory_lock: "true" cluster.name: trains cluster.routing.allocation.node_initial_primaries_recoveries: "500" - cluster.routing.allocation.disk.watermark.low: 2gb - cluster.routing.allocation.disk.watermark.high: 2gb - cluster.routing.allocation.disk.watermark.flood_stage: 2gb + cluster.routing.allocation.disk.watermark.low: 10gb + cluster.routing.allocation.disk.watermark.high: 10gb + cluster.routing.allocation.disk.watermark.flood_stage: 10gb discovery.zen.minimum_master_nodes: "1" discovery.type: "single-node" http.compression_level: "7" diff --git a/docker-compose.yml b/docker/legacy/trains-server/docker-compose.yml similarity index 91% rename from docker-compose.yml rename to docker/legacy/trains-server/docker-compose.yml index e5b8869..0243a84 100644 --- a/docker-compose.yml +++ b/docker/legacy/trains-server/docker-compose.yml @@ -5,7 +5,7 @@ services: command: - apiserver container_name: trains-apiserver - image: allegroai/trains:latest + image: allegroai/clearml:latest restart: unless-stopped volumes: - /opt/trains/logs:/var/log/trains @@ -31,6 +31,7 @@ services: - "8008:8008" networks: - backend + - frontend elasticsearch: networks: @@ -41,9 +42,9 @@ services: bootstrap.memory_lock: "true" cluster.name: trains cluster.routing.allocation.node_initial_primaries_recoveries: "500" - cluster.routing.allocation.disk.watermark.low: 2gb - cluster.routing.allocation.disk.watermark.high: 2gb - cluster.routing.allocation.disk.watermark.flood_stage: 2gb + cluster.routing.allocation.disk.watermark.low: 10gb + cluster.routing.allocation.disk.watermark.high: 10gb + cluster.routing.allocation.disk.watermark.flood_stage: 10gb discovery.zen.minimum_master_nodes: "1" discovery.type: "single-node" http.compression_level: "7" @@ -70,7 +71,7 @@ services: command: - fileserver container_name: trains-fileserver - image: allegroai/trains:latest + image: allegroai/clearml:latest restart: unless-stopped volumes: - /opt/trains/logs:/var/log/trains @@ -103,12 +104,15 @@ services: command: - webserver container_name: trains-webserver - image: allegroai/trains:latest + image: allegroai/clearml:latest restart: unless-stopped depends_on: - apiserver ports: - "8080:80" + networks: + - backend + - frontend agent-services: networks: @@ -145,3 +149,5 @@ services: networks: backend: driver: bridge + frontend: + driver: bridge