Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import streamlit as st
|
3 |
+
import subprocess
|
4 |
+
import os
|
5 |
+
import requests
|
6 |
+
from huggingface_hub import snapshot_download, login, HfApi
|
7 |
+
from pathlib import Path
|
8 |
+
import tempfile
|
9 |
+
|
10 |
+
# Define paths for llama.cpp binaries
|
11 |
+
LLAMA_CPP_PATH = "https://huggingface.co/spaces/KBaba7/llama.cpp/tree/main/llama.cpp"
|
12 |
+
LLAMA_CPP_BIN = "build/bin"
|
13 |
+
BUILD_DIR = "build"
|
14 |
+
CONVERT_SCRIPT = "convert-hf-to-gguf.py" # Ensure correct path
|
15 |
+
|
16 |
+
def run_command(command):
|
17 |
+
""" Run a shell command and return its output. """
|
18 |
+
result = subprocess.run(
|
19 |
+
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True
|
20 |
+
)
|
21 |
+
return result.stdout, result.stderr
|
22 |
+
|
23 |
+
st.title("LLAMA Quantization Pipeline")
|
24 |
+
st.markdown(
|
25 |
+
"""
|
26 |
+
This tool downloads a model from Hugging Face, converts it to GGUF format, quantizes it, and provides an option to download the final model.
|
27 |
+
"""
|
28 |
+
)
|
29 |
+
|
30 |
+
st.sidebar.header("Settings")
|
31 |
+
st.sidebar.write("Please login to your Hugging Face account to use your llama.cpp repository.")
|
32 |
+
username = st.sidebar.text_input("Hugging Face Username")
|
33 |
+
password = st.sidebar.text_input("Hugging Face Password", type="password")
|
34 |
+
model_repo_id = st.sidebar.text_input("Model Repository ID", "Qwen/Qwen2.5-3B")
|
35 |
+
quantization_options = ["q4_k_m", "q4_0", "q4_1"]
|
36 |
+
quantization_type = st.sidebar.selectbox("Select Quantization Type", quantization_options)
|
37 |
+
quant_options = ["f32", "f16", "bf16", "q8_0", "auto"]
|
38 |
+
quant_type = st.sidebar.selectbox("Select GGUF Output Type", quant_options)
|
39 |
+
upload_option = st.sidebar.checkbox("Upload quantized model to Hugging Face?", value=False)
|
40 |
+
run_button = st.button("Run Pipeline")
|
41 |
+
|
42 |
+
if run_button:
|
43 |
+
st.info("Starting the pipeline. Please be patient...")
|
44 |
+
log_area = st.empty()
|
45 |
+
logs = []
|
46 |
+
|
47 |
+
def log(message):
|
48 |
+
logs.append(message)
|
49 |
+
log_area.text("\n".join(logs))
|
50 |
+
|
51 |
+
try:
|
52 |
+
# Download the llama.cpp repository
|
53 |
+
snapshot_download(repo_id="KBaba7/llama.cpp", local_dir="llama.cpp", repo_type="space")
|
54 |
+
|
55 |
+
# Create temporary directories for the original and quantized models
|
56 |
+
temp_path = Path(tempfile.gettempdir())
|
57 |
+
original_model_dir = temp_path / "original_model"
|
58 |
+
quantized_model_dir = temp_path / "quantized_model"
|
59 |
+
original_model_dir.mkdir(parents=True, exist_ok=True)
|
60 |
+
quantized_model_dir.mkdir(parents=True, exist_ok=True)
|
61 |
+
|
62 |
+
log("Downloading model from Hugging Face...")
|
63 |
+
snapshot_download(repo_id=model_repo_id, local_dir=str(original_model_dir), local_dir_use_symlinks=False)
|
64 |
+
log(f"Model downloaded to: {original_model_dir}")
|
65 |
+
|
66 |
+
log("Converting model to GGUF format...")
|
67 |
+
conversion_outfile = quantized_model_dir / "model_converted.gguf"
|
68 |
+
conversion_cmd = (
|
69 |
+
f"python3 convert-hf-to-gguf.py {original_model_dir} --outtype {quant_type} "
|
70 |
+
f"--outfile {conversion_outfile}"
|
71 |
+
)
|
72 |
+
conv_stdout, conv_stderr = run_command(conversion_cmd)
|
73 |
+
log(conv_stdout + conv_stderr)
|
74 |
+
|
75 |
+
if not conversion_outfile.exists():
|
76 |
+
log("Error: GGUF conversion failed! No output file found.")
|
77 |
+
st.error("GGUF conversion failed. Check logs.")
|
78 |
+
st.stop()
|
79 |
+
|
80 |
+
log("Quantizing the model...")
|
81 |
+
quantized_model_outfile = quantized_model_dir / f"model_quantized_{quantization_type}.gguf"
|
82 |
+
quantize_cmd = f"build/bin/llama-quantize {conversion_outfile} {quantized_model_outfile} {quantization_type}"
|
83 |
+
quant_stdout, quant_stderr = run_command(quantize_cmd)
|
84 |
+
log(quant_stdout + quant_stderr)
|
85 |
+
|
86 |
+
if not quantized_model_outfile.exists():
|
87 |
+
log("Error: Quantization failed! No output file found.")
|
88 |
+
st.error("Quantization failed. Check logs.")
|
89 |
+
st.stop()
|
90 |
+
|
91 |
+
log("Pipeline completed successfully!")
|
92 |
+
st.success("Quantized model ready for download.")
|
93 |
+
with open(quantized_model_outfile, "rb") as file:
|
94 |
+
st.download_button(label="Download Quantized Model", data=file, file_name=quantized_model_outfile.name)
|
95 |
+
|
96 |
+
# Upload if selected
|
97 |
+
if upload_option:
|
98 |
+
log("Uploading quantized model to Hugging Face...")
|
99 |
+
login(username, password)
|
100 |
+
api = HfApi()
|
101 |
+
target_repo = f"automated-quantization/{quantized_model_outfile.stem}"
|
102 |
+
api.create_repo(target_repo, exist_ok=True, repo_type="model")
|
103 |
+
api.upload_file(
|
104 |
+
path_or_fileobj=str(quantized_model_outfile),
|
105 |
+
path_in_repo=quantized_model_outfile.name,
|
106 |
+
)
|
107 |
+
log("Upload complete!")
|
108 |
+
except Exception as e:
|
109 |
+
log(f"An error occurred: {e}")
|
110 |
+
finally:
|
111 |
+
# Remove temporary directories
|
112 |
+
original_model_dir.rmdir()
|
113 |
+
quantized_model_dir.rmdir()
|