seungrok81 commited on
Commit
1dc70e5
1 Parent(s): 972b266

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -43,7 +43,7 @@ torchrun --standalone --nproc_per_node=8 run_vllm_fp8.py
43
  from vllm import LLM, SamplingParams
44
  prompt = "Write me an essay about bear and knight"
45
 
46
- model_name="/workspace/models--meta-llama--Meta-Llama-3.1-405B-Instruct/snapshots/069992c75aed59df00ec06c17177e76c63296a26/"
47
  tp=8 # 8 GPUs
48
 
49
  model = LLM(model=model_name, tensor_parallel_size=tp, max_model_len=8192, trust_remote_code=True, dtype="float16", quantization="fp8", quantized_weights_path="/llama.safetensors")
@@ -59,7 +59,7 @@ print(result)
59
 
60
  ```sh
61
  # 8 GPUs
62
- torchrun --standalone --nproc_per_node=8 run_vllm_fp8.py
63
  ```
64
 
65
  ```python
@@ -67,7 +67,7 @@ torchrun --standalone --nproc_per_node=8 run_vllm_fp8.py
67
  from vllm import LLM, SamplingParams
68
  prompt = "Write me an essay about bear and knight"
69
 
70
- model_name="/workspace/models--meta-llama--Meta-Llama-3.1-405B-Instruct/snapshots/069992c75aed59df00ec06c17177e76c63296a26/"
71
  tp=8 # 8 GPUs
72
  model = LLM(model=model_name, tensor_parallel_size=tp, max_model_len=8192, trust_remote_code=True, dtype="bfloat16")
73
  sampling_params = SamplingParams(
 
43
  from vllm import LLM, SamplingParams
44
  prompt = "Write me an essay about bear and knight"
45
 
46
+ model_name="models--meta-llama--Meta-Llama-3.1-405B-Instruct/snapshots/069992c75aed59df00ec06c17177e76c63296a26/"
47
  tp=8 # 8 GPUs
48
 
49
  model = LLM(model=model_name, tensor_parallel_size=tp, max_model_len=8192, trust_remote_code=True, dtype="float16", quantization="fp8", quantized_weights_path="/llama.safetensors")
 
59
 
60
  ```sh
61
  # 8 GPUs
62
+ torchrun --standalone --nproc_per_node=8 run_vllm_fp16.py
63
  ```
64
 
65
  ```python
 
67
  from vllm import LLM, SamplingParams
68
  prompt = "Write me an essay about bear and knight"
69
 
70
+ model_name="models--meta-llama--Meta-Llama-3.1-405B-Instruct/snapshots/069992c75aed59df00ec06c17177e76c63296a26/"
71
  tp=8 # 8 GPUs
72
  model = LLM(model=model_name, tensor_parallel_size=tp, max_model_len=8192, trust_remote_code=True, dtype="bfloat16")
73
  sampling_params = SamplingParams(