bowenbaoamd commited on
Commit
2ff6250
1 Parent(s): 2fb49ed

Update README.md

Browse files

Remove trailing spaces.

Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -19,9 +19,9 @@ base_model: meta-llama/Meta-Llama-3.1-405B-Instruct
19
  ```sh
20
  export MODEL_DIR = [local model checkpoint folder] or meta-llama/Meta-Llama-3.1-405B-Instruct
21
  # single GPU
22
- python3 quantize_quark.py \
23
  --model_dir $MODEL_DIR \
24
- --output_dir Meta-Llama-3.1-405B-Instruct-FP8-KV \
25
  --quant_scheme w_fp8_a_fp8 \
26
  --kv_cache_dtype fp8 \
27
  --num_calib_data 128 \
@@ -29,9 +29,9 @@ python3 quantize_quark.py \
29
  --no_weight_matrix_merge
30
 
31
  # If model size is too large for single GPU, please use multi GPU instead.
32
- python3 quantize_quark.py \
33
  --model_dir $MODEL_DIR \
34
- --output_dir Meta-Llama-3.1-405B-Instruct-FP8-KV \
35
  --quant_scheme w_fp8_a_fp8 \
36
  --kv_cache_dtype fp8 \
37
  --num_calib_data 128 \
 
19
  ```sh
20
  export MODEL_DIR = [local model checkpoint folder] or meta-llama/Meta-Llama-3.1-405B-Instruct
21
  # single GPU
22
+ python3 quantize_quark.py \
23
  --model_dir $MODEL_DIR \
24
+ --output_dir Meta-Llama-3.1-405B-Instruct-FP8-KV \
25
  --quant_scheme w_fp8_a_fp8 \
26
  --kv_cache_dtype fp8 \
27
  --num_calib_data 128 \
 
29
  --no_weight_matrix_merge
30
 
31
  # If model size is too large for single GPU, please use multi GPU instead.
32
+ python3 quantize_quark.py \
33
  --model_dir $MODEL_DIR \
34
+ --output_dir Meta-Llama-3.1-405B-Instruct-FP8-KV \
35
  --quant_scheme w_fp8_a_fp8 \
36
  --kv_cache_dtype fp8 \
37
  --num_calib_data 128 \