robertgshaw2
commited on
Commit
•
29715d8
1
Parent(s):
29e8c23
Update quantization/apply_gptq_save_marlin.py
Browse files
quantization/apply_gptq_save_marlin.py
CHANGED
@@ -46,7 +46,7 @@ if __name__ == "__main__":
|
|
46 |
device_map="auto")
|
47 |
model.quantize(examples)
|
48 |
|
49 |
-
gptq_save_dir =
|
50 |
print(f"Saving gptq model to {gptq_save_dir}")
|
51 |
model.save_pretrained(gptq_save_dir)
|
52 |
tokenizer.save_pretrained(gptq_save_dir)
|
@@ -55,14 +55,14 @@ if __name__ == "__main__":
|
|
55 |
gc.collect()
|
56 |
|
57 |
print("Reloading in marlin format")
|
58 |
-
|
59 |
marlin_model = AutoGPTQForCausalLM.from_quantized(
|
60 |
gptq_save_dir,
|
61 |
use_marlin=True,
|
62 |
device_map="auto")
|
63 |
|
64 |
print("Saving in marlin format")
|
65 |
-
marlin_model.save_pretrained(args.
|
66 |
-
tokenizer.save_pretrained(args.
|
67 |
|
68 |
shutil.rmtree(gptq_save_dir)
|
|
|
46 |
device_map="auto")
|
47 |
model.quantize(examples)
|
48 |
|
49 |
+
gptq_save_dir = "./tmp-gptq"
|
50 |
print(f"Saving gptq model to {gptq_save_dir}")
|
51 |
model.save_pretrained(gptq_save_dir)
|
52 |
tokenizer.save_pretrained(gptq_save_dir)
|
|
|
55 |
gc.collect()
|
56 |
|
57 |
print("Reloading in marlin format")
|
58 |
+
|
59 |
marlin_model = AutoGPTQForCausalLM.from_quantized(
|
60 |
gptq_save_dir,
|
61 |
use_marlin=True,
|
62 |
device_map="auto")
|
63 |
|
64 |
print("Saving in marlin format")
|
65 |
+
marlin_model.save_pretrained(args.save_dir)
|
66 |
+
tokenizer.save_pretrained(args.save_dir)
|
67 |
|
68 |
shutil.rmtree(gptq_save_dir)
|