Spaces:

lee-ite
/

LoRA-To-GGUF

Running

App Files Files Community

lee-ite commited on Jul 29

Commit

cbc43f0

•

1 Parent(s): 9a2decb

update

Browse files

Files changed (3) hide show

Dockerfile +1 -0
app.py +114 -63
start.sh +3 -0

Dockerfile CHANGED Viewed

@@ -10,6 +10,7 @@ RUN apt-get update && \
     git-lfs \
     wget \
     curl \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*

     git-lfs \
     wget \
     curl \
+    jq \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*

app.py CHANGED Viewed

@@ -18,13 +18,74 @@ from textwrap import dedent
 HF_TOKEN = os.environ.get("HF_TOKEN")
-def process_lora(model_id, lora_id, private_repo, oauth_token: gr.OAuthToken | None):
     if oauth_token.token is None:
         raise ValueError("You must log in to use")
     model_name = model_id.split('/')[-1]
     lora_name = lora_id.split('/')[-1]
-    fp16 = f"{lora_name}-fp16.gguf"
     try:
         api = HfApi(token=oauth_token)
@@ -55,57 +116,51 @@ def process_lora(model_id, lora_id, private_repo, oauth_token: gr.OAuthToken | N
         print(f"Current working directory: {os.getcwd()}")
         print(f"LoRA directory contents: {os.listdir(lora_name)}")
-        conversion_script = "convert_lora_to_gguf.py"
-        fp16_conversion = f"python llama.cpp/{conversion_script} --base {model_name} {lora_name} --outtype f16 --outfile {fp16}"
-        result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
-        print(result)
-        if result.returncode != 0:
-            raise Exception(f"Error converting to fp16: {result.stderr}")
         print("LoRA converted to fp16 successfully!")
-        print(f"Converted LoRA-GGUF path: {fp16}")
         username = whoami(oauth_token.token)["name"]
-        new_repo_url = api.create_repo(repo_id=f"{username}/{lora_name}-GGUF", exist_ok=True, private=private_repo)
         new_repo_id = new_repo_url.repo_id
         print("Repo created successfully!", new_repo_url)
-        try:
-            card = ModelCard.load(model_id, token=oauth_token.token)
-        except:
-            card = ModelCard("")
-        if card.data.tags is None:
-            card.data.tags = []
-        card.data.tags.append("llama-cpp")
-        card.data.tags.append("LoRA-GGUF")
-        card.data.base_model = model_id
-        card.text = dedent(
-            f"""
-            # {new_repo_id}
-            This LoRA was converted to GGUF format from [`{lora_id}`](https://huggingface.co/{lora_id}) using llama.cpp.
-            The base Model is [`{model_id}`](https://huggingface.co/{model_id}).
-            ## Use with llama.cpp
-            You need to merge the LoRA-GGUF into the Base-Model use llama.cpp.
-            """
-        )
-        card.save(f"README.md")
-        try:
-            print(f"Uploading LoRA-GGUF: {fp16}")
-            api.upload_file(
-                path_or_fileobj=fp16,
-                path_in_repo=fp16,
-                repo_id=new_repo_id,
-            )
-        except Exception as e:
-            raise Exception(f"Error uploading LoRA-GGUF: {e}")
-        api.upload_file(
-            path_or_fileobj=f"README.md",
-            path_in_repo=f"README.md",
-            repo_id=new_repo_id,
-        )
-        print(f"Uploaded successfully!")
         return (
             f'Everything done! Find your repo {new_repo_id}'
@@ -113,30 +168,24 @@ def process_lora(model_id, lora_id, private_repo, oauth_token: gr.OAuthToken | N
     except Exception as e:
         return (f"Error: {e}")
     finally:
-        shutil.rmtree(model_name, ignore_errors=True)
-        shutil.rmtree(lora_name, ignore_errors=True)
         print("Folder cleaned up successfully!")
-def list_organizations(oauth_token: Optional[gr.OAuthToken]) -> str:
-    if oauth_token is None:
-        return "Please log in to list organizations."
-    org_names = [org["name"] for org in whoami(oauth_token.token)["orgs"]]
-    return f"You belong to {', '.join(org_names)}."
-css="""/* Custom CSS to allow scrolling */
-.gradio-container {overflow-y: auto;}
 """
-DESCRIPTION = "**🤯Turn LoRA adapter to GGUF and merge into Base GGUF!🤯**"
 with gr.Blocks(css=css) as demo:
     gr.Markdown(DESCRIPTION)
     with gr.Row():
-        with gr.Column():
-            gr.Markdown("You must log in to create your repo!")
-            gr.LoginButton().activate()
-        with gr.Column():
-            gr.Markdown().attach_load_event(list_organizations, None)
     with gr.Row():
         with gr.Column():
@@ -160,9 +209,11 @@ with gr.Blocks(css=css) as demo:
                 submit_btn = gr.Button(value="Submit")
         with gr.Column():
             gr.Markdown("# See your repo!")
             output_text = gr.Textbox()
-    submit_btn.click(process_lora, [model_id, lora_id, private_repo], [output_text])
 def restart_space():
     HfApi().restart_space(repo_id="lee-ite/LoRA-To-GGUF", token=HF_TOKEN, factory_reboot=True)

 HF_TOKEN = os.environ.get("HF_TOKEN")
+def upload_reme_to_hf(new_repo_id, api, oauth_token: gr.OAuthToken | None):
+    try:
+        card = ModelCard.load(model_id, token=oauth_token.token)
+    except:
+        card = ModelCard("")
+    if card.data.tags is None:
+        card.data.tags = []
+    card.data.tags.append("llama-cpp")
+    card.data.tags.append("LoRA-GGUF")
+    card.data.base_model = model_id
+    card.text = dedent(
+        f"""
+        # {new_repo_id}
+        This LoRA was converted to GGUF format from [`{lora_id}`](https://huggingface.co/{lora_id}) using llama.cpp.
+        The base Model is [`{model_id}`](https://huggingface.co/{model_id}).
+        ## Use with llama.cpp
+        You need to merge the LoRA-GGUF into the Base-Model use llama.cpp.
+        """
+    )
+    card.save(f"README.md")
+    api.upload_file(
+        path_or_fileobj=f"README.md",
+        path_in_repo=f"README.md",
+        repo_id=new_repo_id,
+    )
+    print(f"Uploaded successfully!")
+def upload_file_to_hf(upload_file_name, new_repo_id, api):
+    try:
+        print(f"Uploading LoRA-GGUF: {upload_file_name}")
+        api.upload_file(
+            path_or_fileobj=upload_file_name,
+            path_in_repo=upload_file_name,
+            repo_id=new_repo_id,
+        )
+    except Exception as e:
+        raise Exception(f"Error uploading LoRA-GGUF: {e}")
+def export_lora_to_gguf(model_fp16, lora_fp16, merged_name):
+    script = f"./build/bin/llama-export-lora -m {model_fp16} -o {merged_name}-fp16.gguf --lora {lora_fp16}"
+    export_result = subprocess.run(script, shell=True, capture_output=True)
+    print(export_result)
+    if export_result.returncode != 0:
+        raise Exception(f"Error converting to fp16: {export_result.stderr}")
+    print("LoRA converted to fp16 successfully!")
+    print(f"Converted GGUF path: {merged_name}-fp16.gguf")
+    return merged_name
+def quantize_merged_gguf(merged_fp16, method):
+    script = f"./build/bin/llama-quantize {merged_fp16}-fp16.gguf {merged_fp16}-{method}.gguf {method}"
+    quantize_result = subprocess.run(script, shell=True, capture_output=True)
+    print(quantize_result)
+    if quantize_result.returncode != 0:
+        raise Exception(f"Error quantizing to {method}: {quantize_result.stderr}")
+    print(f"Merged GGUF quantized to {method} successfully!")
+    print(f"{method} GGUF file path: {merged_fp16}-{method}.gguf")
+    return f"{merged_fp16}-{method}.gguf"
+def process_lora(model_id, lora_id, merged_name, methods, private_repo, oauth_token: gr.OAuthToken | None):
     if oauth_token.token is None:
         raise ValueError("You must log in to use")
     model_name = model_id.split('/')[-1]
     lora_name = lora_id.split('/')[-1]
+    model_fp16 = f"{model_name}-fp16.gguf"
+    lora_fp16 = f"{lora_name}-fp16.gguf"
     try:
         api = HfApi(token=oauth_token)
         print(f"Current working directory: {os.getcwd()}")
         print(f"LoRA directory contents: {os.listdir(lora_name)}")
+        lora_conversion_script = "convert_lora_to_gguf.py"
+        lora_fp16_conversion = f"python llama.cpp/{lora_conversion_script} --base {model_name} {lora_name} --outtype f16 --outfile {lora_fp16}"
+        lora_result = subprocess.run(lora_fp16_conversion, shell=True, capture_output=True)
+        print(lora_result)
+        if lora_result.returncode != 0:
+            raise Exception(f"Error converting to fp16: {lora_result.stderr}")
         print("LoRA converted to fp16 successfully!")
+        print(f"Converted LoRA-GGUF path: {lora_fp16}")
         username = whoami(oauth_token.token)["name"]
+        new_repo_url = api.create_repo(repo_id=f"{username}/{merged_name}", exist_ok=True, private=private_repo)
         new_repo_id = new_repo_url.repo_id
         print("Repo created successfully!", new_repo_url)
+        upload_reme_to_hf(new_repo_id, api, oauth_token)
+        upload_file_to_hf(lora_fp16, new_repo_id, api)
+        base_conversion_script = "convert_hf_to_gguf.py"
+        base_fp16_conversion = f"python llama.cpp/{base_conversion_script} {model_name} --outtype f16 --outfile {model_fp16}"
+        base_result = subprocess.run(base_fp16_conversion, shell=True, capture_output=True)
+        print(base_result)
+        if base_result.returncode != 0:
+            raise Exception(f"Error converting to fp16: {base_result.stderr}")
+        print("LoRA converted to fp16 successfully!")
+        print(f"Converted GGUF path: {model_fp16}")
+        upload_file_to_hf(model_fp16, new_repo_id, api)
+        print(f"Merging LoRA into GGUF => fp16")
+        merged_fp16 = export_lora_to_gguf(model_fp16, lora_fp16, merged_name)
+        upload_file_to_hf(f"{merged_name}-fp16.gguf", new_repo_id, api)
+        # Clean storage: hf-model & hf-lora
+        shutil.rmtree(model_name, ignore_errors=True)
+        shutil.rmtree(lora_name, ignore_errors=True)
+        print("Folder cleaned up successfully!")
+        for method in methods:
+            print(f"Quantizing merged fp16-gguf to {method}")
+            quantized_name = quantize_merged_gguf(merged_fp16, method)
+            upload_file_to_hf(quantized_name, new_repo_id, api)
+            os.remove(quantized_name)
+            print("Removed the uploaded model.")
+        os.remove(merged_fp16)
+        print("Remove the fp16 GGUF file.")
         return (
             f'Everything done! Find your repo {new_repo_id}'
     except Exception as e:
         return (f"Error: {e}")
     finally:
         print("Folder cleaned up successfully!")
+css = """
+  #output {
+    height: 500px;
+    overflow: auto;
+    border: 1px solid #ccc;
+  }
 """
+DESCRIPTION = "# 🤯Turn LoRA adapter to GGUF and merge into Base GGUF!🤯"
 with gr.Blocks(css=css) as demo:
     gr.Markdown(DESCRIPTION)
     with gr.Row():
+        gr.Markdown("You must log in to create your repo!")
+        gr.LoginButton().activate()
     with gr.Row():
         with gr.Column():
                 submit_btn = gr.Button(value="Submit")
         with gr.Column():
+            quantize_methods = gr.CheckboxGroup(["Q4_K_S", "Q4_K_M", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"], label="Quantize Methods", info="Bigger is Better")
+            cool_name = gr.Textbox(label="Your final model name", placeholder="Enter a cool name:")
             gr.Markdown("# See your repo!")
             output_text = gr.Textbox()
+    submit_btn.click(process_lora, [model_id, lora_id, cool_name, quantize_methods, private_repo], [output_text])
 def restart_space():
     HfApi().restart_space(repo_id="lee-ite/LoRA-To-GGUF", token=HF_TOKEN, factory_reboot=True)

start.sh CHANGED Viewed

@@ -1,2 +1,5 @@
 ls
 venv/bin/python app.py

 ls
+curl -s https://api.github.com/repos/ggerganov/llama.cpp/releases/latest | jq -r '.assets[].browser_download_url' | grep -i ubuntu | while read -r url; do curl -LO "$url"; done
+ls
+unzip *ubuntu*.zip
 venv/bin/python app.py