Spaces:

KBaba7
/

Quantization-LLM

Running

App Files Files Community

KBaba7 commited on 15 days ago

Commit

660a3f9

verified ·

1 Parent(s): 5d0b006

Create app.py

Browse files

Files changed (1) hide show

app.py +113 -0

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import streamlit as st
+import subprocess
+import os
+import requests
+from huggingface_hub import snapshot_download, login, HfApi
+from pathlib import Path
+import tempfile
+# Define paths for llama.cpp binaries
+LLAMA_CPP_PATH = "https://huggingface.co/spaces/KBaba7/llama.cpp/tree/main/llama.cpp"
+LLAMA_CPP_BIN = "build/bin"
+BUILD_DIR = "build"
+CONVERT_SCRIPT = "convert-hf-to-gguf.py"  # Ensure correct path
+def run_command(command):
+    """ Run a shell command and return its output. """
+    result = subprocess.run(
+        command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True
+    )
+    return result.stdout, result.stderr
+st.title("LLAMA Quantization Pipeline")
+st.markdown(
+    """
+    This tool downloads a model from Hugging Face, converts it to GGUF format, quantizes it, and provides an option to download the final model.
+    """
+)
+st.sidebar.header("Settings")
+st.sidebar.write("Please login to your Hugging Face account to use your llama.cpp repository.")
+username = st.sidebar.text_input("Hugging Face Username")
+password = st.sidebar.text_input("Hugging Face Password", type="password")
+model_repo_id = st.sidebar.text_input("Model Repository ID", "Qwen/Qwen2.5-3B")
+quantization_options = ["q4_k_m", "q4_0", "q4_1"]
+quantization_type = st.sidebar.selectbox("Select Quantization Type", quantization_options)
+quant_options = ["f32", "f16", "bf16", "q8_0", "auto"]
+quant_type = st.sidebar.selectbox("Select GGUF Output Type", quant_options)
+upload_option = st.sidebar.checkbox("Upload quantized model to Hugging Face?", value=False)
+run_button = st.button("Run Pipeline")
+if run_button:
+    st.info("Starting the pipeline. Please be patient...")
+    log_area = st.empty()
+    logs = []
+    def log(message):
+        logs.append(message)
+        log_area.text("\n".join(logs))
+    try:
+        # Download the llama.cpp repository
+        snapshot_download(repo_id="KBaba7/llama.cpp", local_dir="llama.cpp", repo_type="space")
+        # Create temporary directories for the original and quantized models
+        temp_path = Path(tempfile.gettempdir())
+        original_model_dir = temp_path / "original_model"
+        quantized_model_dir = temp_path / "quantized_model"
+        original_model_dir.mkdir(parents=True, exist_ok=True)
+        quantized_model_dir.mkdir(parents=True, exist_ok=True)
+        log("Downloading model from Hugging Face...")
+        snapshot_download(repo_id=model_repo_id, local_dir=str(original_model_dir), local_dir_use_symlinks=False)
+        log(f"Model downloaded to: {original_model_dir}")
+        log("Converting model to GGUF format...")
+        conversion_outfile = quantized_model_dir / "model_converted.gguf"
+        conversion_cmd = (
+            f"python3 convert-hf-to-gguf.py {original_model_dir} --outtype {quant_type} "
+            f"--outfile {conversion_outfile}"
+        )
+        conv_stdout, conv_stderr = run_command(conversion_cmd)
+        log(conv_stdout + conv_stderr)
+        if not conversion_outfile.exists():
+            log("Error: GGUF conversion failed! No output file found.")
+            st.error("GGUF conversion failed. Check logs.")
+            st.stop()
+        log("Quantizing the model...")
+        quantized_model_outfile = quantized_model_dir / f"model_quantized_{quantization_type}.gguf"
+        quantize_cmd = f"build/bin/llama-quantize {conversion_outfile} {quantized_model_outfile} {quantization_type}"
+        quant_stdout, quant_stderr = run_command(quantize_cmd)
+        log(quant_stdout + quant_stderr)
+        if not quantized_model_outfile.exists():
+            log("Error: Quantization failed! No output file found.")
+            st.error("Quantization failed. Check logs.")
+            st.stop()
+        log("Pipeline completed successfully!")
+        st.success("Quantized model ready for download.")
+        with open(quantized_model_outfile, "rb") as file:
+            st.download_button(label="Download Quantized Model", data=file, file_name=quantized_model_outfile.name)
+        # Upload if selected
+        if upload_option:
+            log("Uploading quantized model to Hugging Face...")
+            login(username, password)
+            api = HfApi()
+            target_repo = f"automated-quantization/{quantized_model_outfile.stem}"
+            api.create_repo(target_repo, exist_ok=True, repo_type="model")
+            api.upload_file(
+                path_or_fileobj=str(quantized_model_outfile),
+                path_in_repo=quantized_model_outfile.name,
+            )
+            log("Upload complete!")
+    except Exception as e:
+        log(f"An error occurred: {e}")
+    finally:
+        # Remove temporary directories
+        original_model_dir.rmdir()
+        quantized_model_dir.rmdir()