sunkencity commited on
Commit
dc8648d
·
verified ·
1 Parent(s): cf9d873

Upload convert_survival_gguf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. convert_survival_gguf.py +21 -12
convert_survival_gguf.py CHANGED
@@ -1,6 +1,5 @@
1
-
2
  # /// script
3
- # dependencies = ["peft", "transformers", "torch", "huggingface_hub", "sentencepiece"]
4
  # ///
5
 
6
  import os
@@ -36,17 +35,15 @@ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
36
  tokenizer.save_pretrained(MERGED_DIR)
37
 
38
  print("Cloning llama.cpp...")
 
 
39
  subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp"], check=True)
40
 
41
  print("Installing llama.cpp requirements...")
42
  subprocess.run(["pip", "install", "-r", "llama.cpp/requirements.txt"], check=True)
43
 
44
- print("Converting to GGUF (Q4_K_M)...")
45
- # Note: Newer llama.cpp uses convert_hf_to_gguf.py
46
- # We perform quantization in two steps: convert to fp16 gguf, then quantize
47
- # Or if convert script supports outtype...
48
-
49
- # Step 1: Convert to FP16 GGUF
50
  subprocess.run([
51
  "python", "llama.cpp/convert_hf_to_gguf.py",
52
  MERGED_DIR,
@@ -54,10 +51,22 @@ subprocess.run([
54
  "--outtype", "f16"
55
  ], check=True)
56
 
57
- # Step 2: Quantize to Q4_K_M
58
- subprocess.run(["make", "-C", "llama.cpp", "llama-quantize"], check=True)
 
 
 
 
 
 
 
 
 
 
 
 
59
  subprocess.run([
60
- "./llama.cpp/llama-quantize",
61
  "merged_fp16.gguf",
62
  GGUF_FILE,
63
  "Q4_K_M"
@@ -75,4 +84,4 @@ api.upload_file(
75
  repo_type="model"
76
  )
77
 
78
- print("Done! GGUF available at:", f"https://huggingface.co/{OUTPUT_REPO}")
 
 
1
  # /// script
2
+ # dependencies = ["peft", "transformers", "torch", "huggingface_hub", "sentencepiece", "cmake"]
3
  # ///
4
 
5
  import os
 
35
  tokenizer.save_pretrained(MERGED_DIR)
36
 
37
  print("Cloning llama.cpp...")
38
+ if os.path.exists("llama.cpp"):
39
+ subprocess.run(["rm", "-rf", "llama.cpp"])
40
  subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp"], check=True)
41
 
42
  print("Installing llama.cpp requirements...")
43
  subprocess.run(["pip", "install", "-r", "llama.cpp/requirements.txt"], check=True)
44
 
45
+ print("Converting to GGUF (FP16)...")
46
+ # Convert to FP16 GGUF using the python script
 
 
 
 
47
  subprocess.run([
48
  "python", "llama.cpp/convert_hf_to_gguf.py",
49
  MERGED_DIR,
 
51
  "--outtype", "f16"
52
  ], check=True)
53
 
54
+ print("Building llama-quantize with CMake...")
55
+ # Create build directory
56
+ os.makedirs("llama.cpp/build", exist_ok=True)
57
+
58
+ # Run cmake configuration
59
+ subprocess.run(["cmake", "-B", "llama.cpp/build", "-S", "llama.cpp"], check=True)
60
+
61
+ # Build the project
62
+ subprocess.run(["cmake", "--build", "llama.cpp/build", "--config", "Release", "-j"], check=True)
63
+
64
+ print("Quantizing to Q4_K_M...")
65
+ # The binary is usually in llama.cpp/build/bin/llama-quantize
66
+ quantize_bin = "llama.cpp/build/bin/llama-quantize"
67
+
68
  subprocess.run([
69
+ quantize_bin,
70
  "merged_fp16.gguf",
71
  GGUF_FILE,
72
  "Q4_K_M"
 
84
  repo_type="model"
85
  )
86
 
87
+ print("Done! GGUF available at:", f"https://huggingface.co/{OUTPUT_REPO}")