Upload convert_survival_gguf.py with huggingface_hub
Browse files- convert_survival_gguf.py +21 -12
convert_survival_gguf.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
-
|
| 2 |
# /// script
|
| 3 |
-
# dependencies = ["peft", "transformers", "torch", "huggingface_hub", "sentencepiece"]
|
| 4 |
# ///
|
| 5 |
|
| 6 |
import os
|
|
@@ -36,17 +35,15 @@ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
|
|
| 36 |
tokenizer.save_pretrained(MERGED_DIR)
|
| 37 |
|
| 38 |
print("Cloning llama.cpp...")
|
|
|
|
|
|
|
| 39 |
subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp"], check=True)
|
| 40 |
|
| 41 |
print("Installing llama.cpp requirements...")
|
| 42 |
subprocess.run(["pip", "install", "-r", "llama.cpp/requirements.txt"], check=True)
|
| 43 |
|
| 44 |
-
print("Converting to GGUF (
|
| 45 |
-
#
|
| 46 |
-
# We perform quantization in two steps: convert to fp16 gguf, then quantize
|
| 47 |
-
# Or if convert script supports outtype...
|
| 48 |
-
|
| 49 |
-
# Step 1: Convert to FP16 GGUF
|
| 50 |
subprocess.run([
|
| 51 |
"python", "llama.cpp/convert_hf_to_gguf.py",
|
| 52 |
MERGED_DIR,
|
|
@@ -54,10 +51,22 @@ subprocess.run([
|
|
| 54 |
"--outtype", "f16"
|
| 55 |
], check=True)
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
subprocess.run([
|
| 60 |
-
|
| 61 |
"merged_fp16.gguf",
|
| 62 |
GGUF_FILE,
|
| 63 |
"Q4_K_M"
|
|
@@ -75,4 +84,4 @@ api.upload_file(
|
|
| 75 |
repo_type="model"
|
| 76 |
)
|
| 77 |
|
| 78 |
-
print("Done! GGUF available at:", f"https://huggingface.co/{OUTPUT_REPO}")
|
|
|
|
|
|
|
| 1 |
# /// script
|
| 2 |
+
# dependencies = ["peft", "transformers", "torch", "huggingface_hub", "sentencepiece", "cmake"]
|
| 3 |
# ///
|
| 4 |
|
| 5 |
import os
|
|
|
|
| 35 |
tokenizer.save_pretrained(MERGED_DIR)
|
| 36 |
|
| 37 |
print("Cloning llama.cpp...")
|
| 38 |
+
if os.path.exists("llama.cpp"):
|
| 39 |
+
subprocess.run(["rm", "-rf", "llama.cpp"])
|
| 40 |
subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp"], check=True)
|
| 41 |
|
| 42 |
print("Installing llama.cpp requirements...")
|
| 43 |
subprocess.run(["pip", "install", "-r", "llama.cpp/requirements.txt"], check=True)
|
| 44 |
|
| 45 |
+
print("Converting to GGUF (FP16)...")
|
| 46 |
+
# Convert to FP16 GGUF using the python script
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
subprocess.run([
|
| 48 |
"python", "llama.cpp/convert_hf_to_gguf.py",
|
| 49 |
MERGED_DIR,
|
|
|
|
| 51 |
"--outtype", "f16"
|
| 52 |
], check=True)
|
| 53 |
|
| 54 |
+
print("Building llama-quantize with CMake...")
|
| 55 |
+
# Create build directory
|
| 56 |
+
os.makedirs("llama.cpp/build", exist_ok=True)
|
| 57 |
+
|
| 58 |
+
# Run cmake configuration
|
| 59 |
+
subprocess.run(["cmake", "-B", "llama.cpp/build", "-S", "llama.cpp"], check=True)
|
| 60 |
+
|
| 61 |
+
# Build the project
|
| 62 |
+
subprocess.run(["cmake", "--build", "llama.cpp/build", "--config", "Release", "-j"], check=True)
|
| 63 |
+
|
| 64 |
+
print("Quantizing to Q4_K_M...")
|
| 65 |
+
# The binary is usually in llama.cpp/build/bin/llama-quantize
|
| 66 |
+
quantize_bin = "llama.cpp/build/bin/llama-quantize"
|
| 67 |
+
|
| 68 |
subprocess.run([
|
| 69 |
+
quantize_bin,
|
| 70 |
"merged_fp16.gguf",
|
| 71 |
GGUF_FILE,
|
| 72 |
"Q4_K_M"
|
|
|
|
| 84 |
repo_type="model"
|
| 85 |
)
|
| 86 |
|
| 87 |
+
print("Done! GGUF available at:", f"https://huggingface.co/{OUTPUT_REPO}")
|