KBaba7 commited on
Commit
660a3f9
·
verified ·
1 Parent(s): 5d0b006

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import subprocess
4
+ import os
5
+ import requests
6
+ from huggingface_hub import snapshot_download, login, HfApi
7
+ from pathlib import Path
8
+ import tempfile
9
+
10
+ # Define paths for llama.cpp binaries
11
+ LLAMA_CPP_PATH = "https://huggingface.co/spaces/KBaba7/llama.cpp/tree/main/llama.cpp"
12
+ LLAMA_CPP_BIN = "build/bin"
13
+ BUILD_DIR = "build"
14
+ CONVERT_SCRIPT = "convert-hf-to-gguf.py" # Ensure correct path
15
+
16
+ def run_command(command):
17
+ """ Run a shell command and return its output. """
18
+ result = subprocess.run(
19
+ command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True
20
+ )
21
+ return result.stdout, result.stderr
22
+
23
+ st.title("LLAMA Quantization Pipeline")
24
+ st.markdown(
25
+ """
26
+ This tool downloads a model from Hugging Face, converts it to GGUF format, quantizes it, and provides an option to download the final model.
27
+ """
28
+ )
29
+
30
+ st.sidebar.header("Settings")
31
+ st.sidebar.write("Please login to your Hugging Face account to use your llama.cpp repository.")
32
+ username = st.sidebar.text_input("Hugging Face Username")
33
+ password = st.sidebar.text_input("Hugging Face Password", type="password")
34
+ model_repo_id = st.sidebar.text_input("Model Repository ID", "Qwen/Qwen2.5-3B")
35
+ quantization_options = ["q4_k_m", "q4_0", "q4_1"]
36
+ quantization_type = st.sidebar.selectbox("Select Quantization Type", quantization_options)
37
+ quant_options = ["f32", "f16", "bf16", "q8_0", "auto"]
38
+ quant_type = st.sidebar.selectbox("Select GGUF Output Type", quant_options)
39
+ upload_option = st.sidebar.checkbox("Upload quantized model to Hugging Face?", value=False)
40
+ run_button = st.button("Run Pipeline")
41
+
42
+ if run_button:
43
+ st.info("Starting the pipeline. Please be patient...")
44
+ log_area = st.empty()
45
+ logs = []
46
+
47
+ def log(message):
48
+ logs.append(message)
49
+ log_area.text("\n".join(logs))
50
+
51
+ try:
52
+ # Download the llama.cpp repository
53
+ snapshot_download(repo_id="KBaba7/llama.cpp", local_dir="llama.cpp", repo_type="space")
54
+
55
+ # Create temporary directories for the original and quantized models
56
+ temp_path = Path(tempfile.gettempdir())
57
+ original_model_dir = temp_path / "original_model"
58
+ quantized_model_dir = temp_path / "quantized_model"
59
+ original_model_dir.mkdir(parents=True, exist_ok=True)
60
+ quantized_model_dir.mkdir(parents=True, exist_ok=True)
61
+
62
+ log("Downloading model from Hugging Face...")
63
+ snapshot_download(repo_id=model_repo_id, local_dir=str(original_model_dir), local_dir_use_symlinks=False)
64
+ log(f"Model downloaded to: {original_model_dir}")
65
+
66
+ log("Converting model to GGUF format...")
67
+ conversion_outfile = quantized_model_dir / "model_converted.gguf"
68
+ conversion_cmd = (
69
+ f"python3 convert-hf-to-gguf.py {original_model_dir} --outtype {quant_type} "
70
+ f"--outfile {conversion_outfile}"
71
+ )
72
+ conv_stdout, conv_stderr = run_command(conversion_cmd)
73
+ log(conv_stdout + conv_stderr)
74
+
75
+ if not conversion_outfile.exists():
76
+ log("Error: GGUF conversion failed! No output file found.")
77
+ st.error("GGUF conversion failed. Check logs.")
78
+ st.stop()
79
+
80
+ log("Quantizing the model...")
81
+ quantized_model_outfile = quantized_model_dir / f"model_quantized_{quantization_type}.gguf"
82
+ quantize_cmd = f"build/bin/llama-quantize {conversion_outfile} {quantized_model_outfile} {quantization_type}"
83
+ quant_stdout, quant_stderr = run_command(quantize_cmd)
84
+ log(quant_stdout + quant_stderr)
85
+
86
+ if not quantized_model_outfile.exists():
87
+ log("Error: Quantization failed! No output file found.")
88
+ st.error("Quantization failed. Check logs.")
89
+ st.stop()
90
+
91
+ log("Pipeline completed successfully!")
92
+ st.success("Quantized model ready for download.")
93
+ with open(quantized_model_outfile, "rb") as file:
94
+ st.download_button(label="Download Quantized Model", data=file, file_name=quantized_model_outfile.name)
95
+
96
+ # Upload if selected
97
+ if upload_option:
98
+ log("Uploading quantized model to Hugging Face...")
99
+ login(username, password)
100
+ api = HfApi()
101
+ target_repo = f"automated-quantization/{quantized_model_outfile.stem}"
102
+ api.create_repo(target_repo, exist_ok=True, repo_type="model")
103
+ api.upload_file(
104
+ path_or_fileobj=str(quantized_model_outfile),
105
+ path_in_repo=quantized_model_outfile.name,
106
+ )
107
+ log("Upload complete!")
108
+ except Exception as e:
109
+ log(f"An error occurred: {e}")
110
+ finally:
111
+ # Remove temporary directories
112
+ original_model_dir.rmdir()
113
+ quantized_model_dir.rmdir()