Spaces:
Running
Running
services: | |
Llama2-70B-INT8: | |
container_name: worker4 | |
image: mlenergy/tgi:latest | |
command: ["--model-id", "meta-llama/Llama-2-70b-chat-hf", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317", "--quantize", "bitsandbytes"] | |
shm_size: 1g | |
environment: | |
HUGGING_FACE_HUB_TOKEN: "${HF_TOKEN}" | |
networks: | |
- leaderboard | |
volumes: | |
- /data/leaderboard/tgi-data:/data | |
deploy: | |
restart_policy: | |
condition: any | |
resources: | |
reservations: | |
devices: | |
- driver: nvidia | |
device_ids: ["0", "1"] | |
capabilities: [gpu] | |
MPT-30B: | |
container_name: worker5 | |
image: mlenergy/tgi:latest | |
command: ["--model-id", "mosaicml/mpt-30b-chat", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317"] | |
shm_size: 1g | |
networks: | |
- leaderboard | |
volumes: | |
- /data/leaderboard/tgi-data:/data | |
deploy: | |
restart_policy: | |
condition: any | |
resources: | |
reservations: | |
devices: | |
- driver: nvidia | |
device_ids: ["2", "3"] | |
capabilities: [gpu] | |
networks: | |
leaderboard: | |
name: leaderboard | |
external: true | |