seungrok81
commited on
Commit
•
1dc70e5
1
Parent(s):
972b266
Update README.md
Browse files
README.md
CHANGED
@@ -43,7 +43,7 @@ torchrun --standalone --nproc_per_node=8 run_vllm_fp8.py
|
|
43 |
from vllm import LLM, SamplingParams
|
44 |
prompt = "Write me an essay about bear and knight"
|
45 |
|
46 |
-
model_name="
|
47 |
tp=8 # 8 GPUs
|
48 |
|
49 |
model = LLM(model=model_name, tensor_parallel_size=tp, max_model_len=8192, trust_remote_code=True, dtype="float16", quantization="fp8", quantized_weights_path="/llama.safetensors")
|
@@ -59,7 +59,7 @@ print(result)
|
|
59 |
|
60 |
```sh
|
61 |
# 8 GPUs
|
62 |
-
torchrun --standalone --nproc_per_node=8
|
63 |
```
|
64 |
|
65 |
```python
|
@@ -67,7 +67,7 @@ torchrun --standalone --nproc_per_node=8 run_vllm_fp8.py
|
|
67 |
from vllm import LLM, SamplingParams
|
68 |
prompt = "Write me an essay about bear and knight"
|
69 |
|
70 |
-
model_name="
|
71 |
tp=8 # 8 GPUs
|
72 |
model = LLM(model=model_name, tensor_parallel_size=tp, max_model_len=8192, trust_remote_code=True, dtype="bfloat16")
|
73 |
sampling_params = SamplingParams(
|
|
|
43 |
from vllm import LLM, SamplingParams
|
44 |
prompt = "Write me an essay about bear and knight"
|
45 |
|
46 |
+
model_name="models--meta-llama--Meta-Llama-3.1-405B-Instruct/snapshots/069992c75aed59df00ec06c17177e76c63296a26/"
|
47 |
tp=8 # 8 GPUs
|
48 |
|
49 |
model = LLM(model=model_name, tensor_parallel_size=tp, max_model_len=8192, trust_remote_code=True, dtype="float16", quantization="fp8", quantized_weights_path="/llama.safetensors")
|
|
|
59 |
|
60 |
```sh
|
61 |
# 8 GPUs
|
62 |
+
torchrun --standalone --nproc_per_node=8 run_vllm_fp16.py
|
63 |
```
|
64 |
|
65 |
```python
|
|
|
67 |
from vllm import LLM, SamplingParams
|
68 |
prompt = "Write me an essay about bear and knight"
|
69 |
|
70 |
+
model_name="models--meta-llama--Meta-Llama-3.1-405B-Instruct/snapshots/069992c75aed59df00ec06c17177e76c63296a26/"
|
71 |
tp=8 # 8 GPUs
|
72 |
model = LLM(model=model_name, tensor_parallel_size=tp, max_model_len=8192, trust_remote_code=True, dtype="bfloat16")
|
73 |
sampling_params = SamplingParams(
|