JRosenkranz commited on
Commit
08facc0
1 Parent(s): 9fca77e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -37,7 +37,7 @@ docker run -d --rm --gpus all \
37
  -p 8033:8033 \
38
  -v /path/to/all/models:/models \
39
  -e MODEL_NAME=/models/model_weights/llama/13B-F \
40
- -e SPECULATOR_NAME=/models/speculator_weights/llama/llama-13b-accelerator \
41
  -e FLASH_ATTENTION=true \
42
  -e PAGED_ATTENTION=true \
43
  -e DTYPE_STR=float16 \
@@ -85,7 +85,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
85
  --model_path=/path/to/model_weights/llama/13B-F \
86
  --model_source=hf \
87
  --tokenizer=/path/to/llama/13B-F \
88
- --speculator_path=/path/to/speculator_weights/llama/13B-F \
89
  --speculator_source=hf \
90
  --compile \
91
  --compile_mode=reduce-overhead
@@ -99,7 +99,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
99
  --model_path=/path/to/model_weights/llama/13B-F \
100
  --model_source=hf \
101
  --tokenizer=/path/to/llama/13B-F \
102
- --speculator_path=/path/to/speculator_weights/llama/13B-F \
103
  --speculator_source=hf \
104
  --compile \
105
  ```
@@ -112,7 +112,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
112
  --model_path=/path/to/model_weights/llama/13B-F \
113
  --model_source=hf \
114
  --tokenizer=/path/to/llama/13B-F \
115
- --speculator_path=/path/to/speculator_weights/llama/13B-F \
116
  --speculator_source=hf \
117
  --batch_input \
118
  --compile \
 
37
  -p 8033:8033 \
38
  -v /path/to/all/models:/models \
39
  -e MODEL_NAME=/models/model_weights/llama/13B-F \
40
+ -e SPECULATOR_NAME=ibm-fms/llama-13b-accelerator \
41
  -e FLASH_ATTENTION=true \
42
  -e PAGED_ATTENTION=true \
43
  -e DTYPE_STR=float16 \
 
85
  --model_path=/path/to/model_weights/llama/13B-F \
86
  --model_source=hf \
87
  --tokenizer=/path/to/llama/13B-F \
88
+ --speculator_path=ibm-fms/llama-13b-accelerator \
89
  --speculator_source=hf \
90
  --compile \
91
  --compile_mode=reduce-overhead
 
99
  --model_path=/path/to/model_weights/llama/13B-F \
100
  --model_source=hf \
101
  --tokenizer=/path/to/llama/13B-F \
102
+ --speculator_path=ibm-fms/llama-13b-accelerator \
103
  --speculator_source=hf \
104
  --compile \
105
  ```
 
112
  --model_path=/path/to/model_weights/llama/13B-F \
113
  --model_source=hf \
114
  --tokenizer=/path/to/llama/13B-F \
115
+ --speculator_path=ibm-fms/llama-13b-accelerator \
116
  --speculator_source=hf \
117
  --batch_input \
118
  --compile \