robertgshaw2 commited on
Commit
29715d8
1 Parent(s): 29e8c23

Update quantization/apply_gptq_save_marlin.py

Browse files
quantization/apply_gptq_save_marlin.py CHANGED
@@ -46,7 +46,7 @@ if __name__ == "__main__":
46
  device_map="auto")
47
  model.quantize(examples)
48
 
49
- gptq_save_dir = args.gptq_save_dir
50
  print(f"Saving gptq model to {gptq_save_dir}")
51
  model.save_pretrained(gptq_save_dir)
52
  tokenizer.save_pretrained(gptq_save_dir)
@@ -55,14 +55,14 @@ if __name__ == "__main__":
55
  gc.collect()
56
 
57
  print("Reloading in marlin format")
58
- gptq_save_dir = "./tmp-gptq"
59
  marlin_model = AutoGPTQForCausalLM.from_quantized(
60
  gptq_save_dir,
61
  use_marlin=True,
62
  device_map="auto")
63
 
64
  print("Saving in marlin format")
65
- marlin_model.save_pretrained(args.marlin_save_dir)
66
- tokenizer.save_pretrained(args.marlin_save_dir)
67
 
68
  shutil.rmtree(gptq_save_dir)
 
46
  device_map="auto")
47
  model.quantize(examples)
48
 
49
+ gptq_save_dir = "./tmp-gptq"
50
  print(f"Saving gptq model to {gptq_save_dir}")
51
  model.save_pretrained(gptq_save_dir)
52
  tokenizer.save_pretrained(gptq_save_dir)
 
55
  gc.collect()
56
 
57
  print("Reloading in marlin format")
58
+
59
  marlin_model = AutoGPTQForCausalLM.from_quantized(
60
  gptq_save_dir,
61
  use_marlin=True,
62
  device_map="auto")
63
 
64
  print("Saving in marlin format")
65
+ marlin_model.save_pretrained(args.save_dir)
66
+ tokenizer.save_pretrained(args.save_dir)
67
 
68
  shutil.rmtree(gptq_save_dir)