version: "3.9" services: fastchat-controller: build: context: . dockerfile: Dockerfile image: fastchat:latest ports: - "21001:21001" entrypoint: ["python3.9", "-m", "fastchat.serve.controller", "--host", "0.0.0.0", "--port", "21001"] fastchat-model-worker: build: context: . dockerfile: Dockerfile volumes: - huggingface:/root/.cache/huggingface image: fastchat:latest deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] entrypoint: ["python3.9", "-m", "fastchat.serve.model_worker", "--model-names", "${FASTCHAT_WORKER_MODEL_NAMES:-vicuna-7b-v1.5}", "--model-path", "${FASTCHAT_WORKER_MODEL_PATH:-lmsys/vicuna-7b-v1.5}", "--worker-address", "http://fastchat-model-worker:21002", "--controller-address", "http://fastchat-controller:21001", "--host", "0.0.0.0", "--port", "21002"] fastchat-api-server: build: context: . dockerfile: Dockerfile image: fastchat:latest ports: - "8000:8000" entrypoint: ["python3.9", "-m", "fastchat.serve.openai_api_server", "--controller-address", "http://fastchat-controller:21001", "--host", "0.0.0.0", "--port", "8000"] volumes: huggingface: