JRosenkranz commited on
Commit
0f37d3d
1 Parent(s): a85babb

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -6
README.md CHANGED
@@ -89,11 +89,12 @@ pip install transformers==4.35.0 sentencepiece numpy
89
  ##### batch_size=1 (compile + cudagraphs)
90
 
91
  ```bash
 
92
  python fms-extras/scripts/paged_speculative_inference.py \
93
  --variant=13b \
94
- --model_path=/path/to/llama/13B-F \
95
  --model_source=hf \
96
- --tokenizer=/path/to/llama/13B-F \
97
  --speculator_path=ibm-fms/llama-13b-accelerator \
98
  --speculator_source=hf \
99
  --compile \
@@ -103,11 +104,12 @@ python fms-extras/scripts/paged_speculative_inference.py \
103
  ##### batch_size=1 (compile)
104
 
105
  ```bash
 
106
  python fms-extras/scripts/paged_speculative_inference.py \
107
  --variant=13b \
108
- --model_path=/path/to/llama/13B-F \
109
  --model_source=hf \
110
- --tokenizer=/path/to/llama/13B-F \
111
  --speculator_path=ibm-fms/llama-13b-accelerator \
112
  --speculator_source=hf \
113
  --compile \
@@ -116,11 +118,12 @@ python fms-extras/scripts/paged_speculative_inference.py \
116
  ##### batch_size=4 (compile)
117
 
118
  ```bash
 
119
  python fms-extras/scripts/paged_speculative_inference.py \
120
  --variant=13b \
121
- --model_path=/path/to/llama/13B-F \
122
  --model_source=hf \
123
- --tokenizer=/path/to/llama/13B-F \
124
  --speculator_path=ibm-fms/llama-13b-accelerator \
125
  --speculator_source=hf \
126
  --batch_input \
 
89
  ##### batch_size=1 (compile + cudagraphs)
90
 
91
  ```bash
92
+ MODEL_PATH=/path/to/llama/13B-F
93
  python fms-extras/scripts/paged_speculative_inference.py \
94
  --variant=13b \
95
+ --model_path=$MODEL_PATH \
96
  --model_source=hf \
97
+ --tokenizer=$MODEL_PATH \
98
  --speculator_path=ibm-fms/llama-13b-accelerator \
99
  --speculator_source=hf \
100
  --compile \
 
104
  ##### batch_size=1 (compile)
105
 
106
  ```bash
107
+ MODEL_PATH=/path/to/llama/13B-F
108
  python fms-extras/scripts/paged_speculative_inference.py \
109
  --variant=13b \
110
+ --model_path=$MODEL_PATH \
111
  --model_source=hf \
112
+ --tokenizer=$MODEL_PATH \
113
  --speculator_path=ibm-fms/llama-13b-accelerator \
114
  --speculator_source=hf \
115
  --compile \
 
118
  ##### batch_size=4 (compile)
119
 
120
  ```bash
121
+ MODEL_PATH=/path/to/llama/13B-F
122
  python fms-extras/scripts/paged_speculative_inference.py \
123
  --variant=13b \
124
+ --model_path=$MODEL_PATH \
125
  --model_source=hf \
126
+ --tokenizer=$MODEL_PATH \
127
  --speculator_path=ibm-fms/llama-13b-accelerator \
128
  --speculator_source=hf \
129
  --batch_input \