Spaces:

lee-ite
/

LoRA-To-GGUF

Running

App Files Files Community

lee-ite commited on Jul 29

Commit

016546d

•

1 Parent(s): 6891b0a

add application file

Browse files

Files changed (4) hide show

20.jpg +0 -0
Dockerfile +57 -0
app.py +199 -0
start.sh +5 -0

20.jpg ADDED Viewed

Dockerfile ADDED Viewed

	@@ -0,0 +1,57 @@

+FROM ubuntu:24.04
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y --no-install-recommends \
+    git \
+    git-lfs \
+    wget \
+    curl \
+    # python build dependencies \
+    build-essential \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    liblzma-dev \
+    ffmpeg
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:${PATH}
+WORKDIR ${HOME}/app
+RUN curl https://pyenv.run | bash
+ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
+ARG PYTHON_VERSION=3.11
+RUN pyenv install ${PYTHON_VERSION} && \
+    pyenv global ${PYTHON_VERSION} && \
+    pyenv rehash && \
+    pip install --no-cache-dir -U pip setuptools wheel && \
+    pip install "huggingface-hub" "hf-transfer" "gradio[oauth]" "gradio_huggingfacehub_search==0.07" "APScheduler"
+COPY --chown=1000 . ${HOME}/app
+RUN git clone https://github.com/ggerganov/llama.cpp
+RUN pip install -r llama.cpp/requirements.txt
+ENV PYTHONPATH=${HOME}/app \
+    PYTHONUNBUFFERED=1 \
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    TQDM_POSITION=-1 \
+    TQDM_MININTERVAL=1 \
+    SYSTEM=spaces
+ENTRYPOINT /bin/sh start.sh

app.py ADDED Viewed

	@@ -0,0 +1,199 @@

+import os
+import shutil
+import subprocess
+import signal
+os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
+import gradio as gr
+from huggingface_hub import create_repo, HfApi
+from huggingface_hub import snapshot_download
+from huggingface_hub import whoami
+from huggingface_hub import ModelCard
+from gradio_huggingfacehub_search import HuggingfaceHubSearch
+from apscheduler.schedulers.background import BackgroundScheduler
+from textwrap import dedent
+HF_TOKEN = os.environ.get("HF_TOKEN")
+def lora_to_gguf(model_path, lora_path, lora_gguf_path):
+    transform_command = f"!python convert_lora_to_gguf.py --base ../{model_path} {lora_gguf_path}"
+    os.chdir("llama.cpp")
+    print(f"Current working directory: {os.getcwd()}")
+    print(f"Files in the current directory: {os.listdir('.')}")
+    if not os.path.isfile(f"../{model_path}"):
+        raise Exception(f"Model file not found: {model_path}")
+    if not os.path.isfile(f"../{lora_path}"):
+        raise Exception(f"LoRA Adapter file not found: {lora_path}")
+    print("Running transform command...")
+    process = subprocess.Popen(transform_command, shell=True)
+    try:
+        process.wait(timeout=800)
+    except subprocess.TimeoutExpired:
+        print("Transform timed out")
+        process.send_signal(signal.SIGINT)
+        try:
+            process.wait(timeout=5)
+        except subprocess.TimeoutExpired:
+            print("Transform proc still didn't stop. Forcefully stopping process...")
+            process.kill()
+    os.chdir("..")
+def process_lora(model_id, lora_id, oauth_token: gr.OAuthToken | None):
+    if oauth_token.token is None:
+        raise ValueError("You must be logged in to use")
+    model_name = model_id.split('/')[-1]
+    lora_name = lora_id.split('/')[-1]
+    fp16 = f"{lora_name}.fp16.gguf"
+    try:
+        api = HfApi(token=oauth_token.token)
+        dl_pattern = ["*.md", "*.json", "*.model"]
+        pattern = (
+            "*.safetensors"
+            if any(
+                file.path.endswith(".safetensors")
+                for file in api.list_repo_tree(
+                    repo_id=model_id,
+                    recursive=True,
+                )
+            )
+            else "*.bin"
+        )
+        dl_pattern += pattern
+        api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
+        print("Model downloaded successfully!")
+        print(f"Current working directory: {os.getcwd()}")
+        print(f"Model directory contents: {os.listdir(model_name)}")
+        api.snapshot_download(repo_id=lora_id, local_dir=lora_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
+        print("LoRA downloaded successfully!")
+        print(f"Current working directory: {os.getcwd()}")
+        print(f"LoRA directory contents: {os.listdir(lora_name)}")
+        conversion_script = "convert_lora_to_gguf.py"
+        fp16_conversion = f"python llama.cpp/{conversion_script} --base {model_name} {lora_name} --outtype f16 --outfile {fp16}"
+        result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
+        print(result)
+        if result.returncode != 0:
+            raise Exception(f"Error converting to fp16: {result.stderr}")
+        print("LoRA converted to fp16 successfully!")
+        print(f"Converted LoRA-GGUF path: {fp16}")
+        username = whoami(oauth_token.token)["name"]
+        new_repo_url = api.create_repo(repo_id=f"{username}/{lora_name}-GGUF", exist_ok=True, private=private_repo)
+        new_repo_id = new_repo_url.repo_id
+        print("Repo created successfully!", new_repo_url)
+        try:
+            card = ModelCard.load(model_id, token=oauth_token.token)
+        except:
+            card = ModelCard("")
+        if card.data.tags is None:
+            card.data.tags = []
+        card.data.tags.append("llama-cpp")
+        card.data.tags.append("LoRA-GGUF")
+        card.data.base_model = model_id
+        card.text = dedent(
+            f"""
+            # {new_repo_id}
+            This LoRA was converted to GGUF format from [`{lora_id}`](https://huggingface.co/{lora_id}) using llama.cpp. The base Model is [`{model_id}`](https://huggingface.co/{model_id}).
+            ## Use with llama.cpp
+            You need to merge the LoRA-GGUF into the Base-Model.
+            """
+        )
+        card.save(f"README.md")
+        try:
+            print(f"Uploading LoRA-GGUF: {fp16}")
+            api.upload_file(
+                path_or_fileobj=fp16,
+                path_in_repo=fp16,
+                repo_id=new_repo_id,
+            )
+        except Exception as e:
+            raise Exception(f"Error uploading LoRA-GGUF: {e}")
+        api.upload_file(
+            path_or_fileobj=f"README.md",
+            path_in_repo=f"README.md",
+            repo_id=new_repo_id,
+        )
+        print(f"Uploaded successfully!")
+        return (
+            f'Find your repo <a href=\'{new_repo_id}\' target="_blank" style="text-decoration:underline">here</a>',
+            "20.jpg",
+        )
+    except Exception as e:
+        return (f"Error: {e}", "error.png")
+    finally:
+        shutil.rmtree(model_name, ignore_errors=True)
+        shutil.rmtree(lora_name, ignore_errors=True)
+        print("Folder cleaned up successfully!")
+css="""/* Custom CSS to allow scrolling */
+.gradio-container {overflow-y: auto;}
+"""
+with gr.Blocks(css=css) as demo:
+    with gr.Row():
+        gr.Markdown("You must be logged in to use")
+        gr.LoginButton(min_width=250)
+    model_id = HuggingfaceHubSearch(
+        label="Huggingface Hub Model ID",
+        placeholder="Search for model id on Huggingface",
+        search_type="model",
+    )
+    lora_id = HuggingfaceHubSearch(
+        label="Huggingface Hub LoRA Model ID",
+        placeholder="Search for LoRA model id on Huggingface"
+        search_type="model",
+    )
+    private_repo = gr.Checkbox(
+        value=False,
+        label="Private Repo",
+        info="Create a private repo under your username."
+    )
+    iface = gr.Interface(
+        fn=lora_to_gguf,
+        inputs = [
+            model_id,
+            lora_id,
+            private_repo,
+        ],
+        outputs = [
+            gr.Markdown(label="output"),
+            gr.Image(show_label=False),
+        ],
+        titel="Create your own LoRA-GGUF",
+        description="The space takes an HF repo as an input.",
+        api_name=False
+    )
+def restart_space():
+    HfApi().restart_space(repo_id="lee-ite/Merge-LoRA-into-GGUF", token=HF_TOKEN, factory_reboot=True)
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", seconds=21600)
+scheduler.start()
+demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)

start.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+cd llama.cpp
+make clean && make all -j
+cd ..
+python app.py