Jae-Won Chung commited on
Commit
b3e31f5
1 Parent(s): e795d0f

Update docker-compose files

Browse files
deployment/docker-compose-0.yaml CHANGED
@@ -1,7 +1,7 @@
1
  services:
2
  MPT-7B:
3
  container_name: worker0
4
- image: mlenergy/tgi:latest
5
  command: ["--model-id", "mosaicml/mpt-7b-chat", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
6
  shm_size: 1g
7
  networks:
@@ -19,7 +19,7 @@ services:
19
  capabilities: [gpu]
20
  Llama2-7B:
21
  container_name: worker1
22
- image: mlenergy/tgi:latest
23
  command: ["--model-id", "/weights/metaai/Llama-2-7b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
24
  shm_size: 1g
25
  networks:
@@ -38,7 +38,7 @@ services:
38
  capabilities: [gpu]
39
  Vicuna-13B:
40
  container_name: worker2
41
- image: mlenergy/tgi:latest
42
  command: ["--model-id", "lmsys/vicuna-13b-v1.5", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
43
  shm_size: 1g
44
  networks:
@@ -56,7 +56,7 @@ services:
56
  capabilities: [gpu]
57
  Llama2-13B:
58
  container_name: worker3
59
- image: mlenergy/tgi:latest
60
  command: ["--model-id", "/weights/metaai/Llama-2-13b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
61
  shm_size: 1g
62
  networks:
 
1
  services:
2
  MPT-7B:
3
  container_name: worker0
4
+ image: mlenergy/tgi:v1.0.0
5
  command: ["--model-id", "mosaicml/mpt-7b-chat", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
6
  shm_size: 1g
7
  networks:
 
19
  capabilities: [gpu]
20
  Llama2-7B:
21
  container_name: worker1
22
+ image: mlenergy/tgi:v1.0.0
23
  command: ["--model-id", "/weights/metaai/Llama-2-7b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
24
  shm_size: 1g
25
  networks:
 
38
  capabilities: [gpu]
39
  Vicuna-13B:
40
  container_name: worker2
41
+ image: mlenergy/tgi:v1.0.0
42
  command: ["--model-id", "lmsys/vicuna-13b-v1.5", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
43
  shm_size: 1g
44
  networks:
 
56
  capabilities: [gpu]
57
  Llama2-13B:
58
  container_name: worker3
59
+ image: mlenergy/tgi:v1.0.0
60
  command: ["--model-id", "/weights/metaai/Llama-2-13b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
61
  shm_size: 1g
62
  networks:
deployment/docker-compose-1.yaml CHANGED
@@ -1,7 +1,7 @@
1
  services:
2
  Llama2-70B-INT8:
3
  container_name: worker4
4
- image: mlenergy/tgi:latest
5
  command: ["--model-id", "meta-llama/Llama-2-70b-chat-hf", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317", "--quantize", "bitsandbytes"]
6
  shm_size: 1g
7
  environment:
@@ -21,7 +21,7 @@ services:
21
  capabilities: [gpu]
22
  MPT-30B:
23
  container_name: worker5
24
- image: mlenergy/tgi:latest
25
  command: ["--model-id", "mosaicml/mpt-30b-chat", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317"]
26
  shm_size: 1g
27
  networks:
 
1
  services:
2
  Llama2-70B-INT8:
3
  container_name: worker4
4
+ image: mlenergy/tgi:v1.0.0
5
  command: ["--model-id", "meta-llama/Llama-2-70b-chat-hf", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317", "--quantize", "bitsandbytes"]
6
  shm_size: 1g
7
  environment:
 
21
  capabilities: [gpu]
22
  MPT-30B:
23
  container_name: worker5
24
+ image: mlenergy/tgi:v1.0.0
25
  command: ["--model-id", "mosaicml/mpt-30b-chat", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317"]
26
  shm_size: 1g
27
  networks: