|
services: |
|
train: |
|
build: |
|
context: . |
|
command: | |
|
python -m src.train && \ |
|
touch ./checkpoints/train_done.flag |
|
volumes: |
|
- ./data:/app/data |
|
- ./checkpoints:/app/checkpoints |
|
- ./logs:/app/logs |
|
environment: |
|
- PYTHONUNBUFFERED=1 |
|
- PYTHONPATH=/app |
|
- NUM_WORKERS=4 |
|
shm_size: '4g' |
|
deploy: |
|
resources: |
|
limits: |
|
memory: 8g |
|
cpus: '4.0' |
|
reservations: |
|
memory: 6g |
|
cpus: '4.0' |
|
networks: |
|
- default |
|
env_file: |
|
- .env |
|
|
|
eval: |
|
build: |
|
context: . |
|
command: | |
|
sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.test' |
|
volumes: |
|
- ./data:/app/data |
|
- ./checkpoints:/app/checkpoints |
|
- ./logs:/app/logs |
|
environment: |
|
- PYTHONUNBUFFERED=1 |
|
- PYTHONPATH=/app |
|
- NUM_WORKERS=2 |
|
shm_size: '4g' |
|
deploy: |
|
resources: |
|
limits: |
|
memory: 4g |
|
cpus: '4.0' |
|
reservations: |
|
memory: 2g |
|
cpus: '2' |
|
networks: |
|
- default |
|
env_file: |
|
- .env |
|
|
|
volumes: |
|
data: |
|
checkpoints: |
|
logs: |
|
|
|
networks: |
|
default: |
|
|