lee-ite commited on
Commit
cbc43f0
1 Parent(s): 9a2decb
Files changed (3) hide show
  1. Dockerfile +1 -0
  2. app.py +114 -63
  3. start.sh +3 -0
Dockerfile CHANGED
@@ -10,6 +10,7 @@ RUN apt-get update && \
10
  git-lfs \
11
  wget \
12
  curl \
 
13
  && apt-get clean \
14
  && rm -rf /var/lib/apt/lists/*
15
 
 
10
  git-lfs \
11
  wget \
12
  curl \
13
+ jq \
14
  && apt-get clean \
15
  && rm -rf /var/lib/apt/lists/*
16
 
app.py CHANGED
@@ -18,13 +18,74 @@ from textwrap import dedent
18
 
19
  HF_TOKEN = os.environ.get("HF_TOKEN")
20
 
21
-
22
- def process_lora(model_id, lora_id, private_repo, oauth_token: gr.OAuthToken | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  if oauth_token.token is None:
24
  raise ValueError("You must log in to use")
25
  model_name = model_id.split('/')[-1]
26
  lora_name = lora_id.split('/')[-1]
27
- fp16 = f"{lora_name}-fp16.gguf"
 
28
 
29
  try:
30
  api = HfApi(token=oauth_token)
@@ -55,57 +116,51 @@ def process_lora(model_id, lora_id, private_repo, oauth_token: gr.OAuthToken | N
55
  print(f"Current working directory: {os.getcwd()}")
56
  print(f"LoRA directory contents: {os.listdir(lora_name)}")
57
 
58
- conversion_script = "convert_lora_to_gguf.py"
59
- fp16_conversion = f"python llama.cpp/{conversion_script} --base {model_name} {lora_name} --outtype f16 --outfile {fp16}"
60
- result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
61
- print(result)
62
- if result.returncode != 0:
63
- raise Exception(f"Error converting to fp16: {result.stderr}")
 
64
  print("LoRA converted to fp16 successfully!")
65
- print(f"Converted LoRA-GGUF path: {fp16}")
66
 
67
  username = whoami(oauth_token.token)["name"]
68
- new_repo_url = api.create_repo(repo_id=f"{username}/{lora_name}-GGUF", exist_ok=True, private=private_repo)
69
  new_repo_id = new_repo_url.repo_id
70
  print("Repo created successfully!", new_repo_url)
71
 
72
- try:
73
- card = ModelCard.load(model_id, token=oauth_token.token)
74
- except:
75
- card = ModelCard("")
76
- if card.data.tags is None:
77
- card.data.tags = []
78
- card.data.tags.append("llama-cpp")
79
- card.data.tags.append("LoRA-GGUF")
80
- card.data.base_model = model_id
81
- card.text = dedent(
82
- f"""
83
- # {new_repo_id}
84
- This LoRA was converted to GGUF format from [`{lora_id}`](https://huggingface.co/{lora_id}) using llama.cpp.
85
- The base Model is [`{model_id}`](https://huggingface.co/{model_id}).
86
-
87
- ## Use with llama.cpp
88
- You need to merge the LoRA-GGUF into the Base-Model use llama.cpp.
89
- """
90
- )
91
- card.save(f"README.md")
92
-
93
- try:
94
- print(f"Uploading LoRA-GGUF: {fp16}")
95
- api.upload_file(
96
- path_or_fileobj=fp16,
97
- path_in_repo=fp16,
98
- repo_id=new_repo_id,
99
- )
100
- except Exception as e:
101
- raise Exception(f"Error uploading LoRA-GGUF: {e}")
102
 
103
- api.upload_file(
104
- path_or_fileobj=f"README.md",
105
- path_in_repo=f"README.md",
106
- repo_id=new_repo_id,
107
- )
108
- print(f"Uploaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  return (
111
  f'Everything done! Find your repo {new_repo_id}'
@@ -113,30 +168,24 @@ def process_lora(model_id, lora_id, private_repo, oauth_token: gr.OAuthToken | N
113
  except Exception as e:
114
  return (f"Error: {e}")
115
  finally:
116
- shutil.rmtree(model_name, ignore_errors=True)
117
- shutil.rmtree(lora_name, ignore_errors=True)
118
  print("Folder cleaned up successfully!")
119
 
120
- def list_organizations(oauth_token: Optional[gr.OAuthToken]) -> str:
121
- if oauth_token is None:
122
- return "Please log in to list organizations."
123
- org_names = [org["name"] for org in whoami(oauth_token.token)["orgs"]]
124
- return f"You belong to {', '.join(org_names)}."
125
 
126
 
127
- css="""/* Custom CSS to allow scrolling */
128
- .gradio-container {overflow-y: auto;}
 
 
 
 
129
  """
130
 
131
- DESCRIPTION = "**🤯Turn LoRA adapter to GGUF and merge into Base GGUF!🤯**"
132
  with gr.Blocks(css=css) as demo:
133
  gr.Markdown(DESCRIPTION)
134
  with gr.Row():
135
- with gr.Column():
136
- gr.Markdown("You must log in to create your repo!")
137
- gr.LoginButton().activate()
138
- with gr.Column():
139
- gr.Markdown().attach_load_event(list_organizations, None)
140
 
141
  with gr.Row():
142
  with gr.Column():
@@ -160,9 +209,11 @@ with gr.Blocks(css=css) as demo:
160
  submit_btn = gr.Button(value="Submit")
161
 
162
  with gr.Column():
 
 
163
  gr.Markdown("# See your repo!")
164
  output_text = gr.Textbox()
165
- submit_btn.click(process_lora, [model_id, lora_id, private_repo], [output_text])
166
 
167
  def restart_space():
168
  HfApi().restart_space(repo_id="lee-ite/LoRA-To-GGUF", token=HF_TOKEN, factory_reboot=True)
 
18
 
19
  HF_TOKEN = os.environ.get("HF_TOKEN")
20
 
21
+ def upload_reme_to_hf(new_repo_id, api, oauth_token: gr.OAuthToken | None):
22
+ try:
23
+ card = ModelCard.load(model_id, token=oauth_token.token)
24
+ except:
25
+ card = ModelCard("")
26
+ if card.data.tags is None:
27
+ card.data.tags = []
28
+ card.data.tags.append("llama-cpp")
29
+ card.data.tags.append("LoRA-GGUF")
30
+ card.data.base_model = model_id
31
+ card.text = dedent(
32
+ f"""
33
+ # {new_repo_id}
34
+ This LoRA was converted to GGUF format from [`{lora_id}`](https://huggingface.co/{lora_id}) using llama.cpp.
35
+ The base Model is [`{model_id}`](https://huggingface.co/{model_id}).
36
+
37
+ ## Use with llama.cpp
38
+ You need to merge the LoRA-GGUF into the Base-Model use llama.cpp.
39
+ """
40
+ )
41
+ card.save(f"README.md")
42
+
43
+ api.upload_file(
44
+ path_or_fileobj=f"README.md",
45
+ path_in_repo=f"README.md",
46
+ repo_id=new_repo_id,
47
+ )
48
+ print(f"Uploaded successfully!")
49
+
50
+ def upload_file_to_hf(upload_file_name, new_repo_id, api):
51
+ try:
52
+ print(f"Uploading LoRA-GGUF: {upload_file_name}")
53
+ api.upload_file(
54
+ path_or_fileobj=upload_file_name,
55
+ path_in_repo=upload_file_name,
56
+ repo_id=new_repo_id,
57
+ )
58
+ except Exception as e:
59
+ raise Exception(f"Error uploading LoRA-GGUF: {e}")
60
+
61
+ def export_lora_to_gguf(model_fp16, lora_fp16, merged_name):
62
+ script = f"./build/bin/llama-export-lora -m {model_fp16} -o {merged_name}-fp16.gguf --lora {lora_fp16}"
63
+ export_result = subprocess.run(script, shell=True, capture_output=True)
64
+ print(export_result)
65
+ if export_result.returncode != 0:
66
+ raise Exception(f"Error converting to fp16: {export_result.stderr}")
67
+ print("LoRA converted to fp16 successfully!")
68
+ print(f"Converted GGUF path: {merged_name}-fp16.gguf")
69
+ return merged_name
70
+
71
+ def quantize_merged_gguf(merged_fp16, method):
72
+ script = f"./build/bin/llama-quantize {merged_fp16}-fp16.gguf {merged_fp16}-{method}.gguf {method}"
73
+ quantize_result = subprocess.run(script, shell=True, capture_output=True)
74
+ print(quantize_result)
75
+ if quantize_result.returncode != 0:
76
+ raise Exception(f"Error quantizing to {method}: {quantize_result.stderr}")
77
+ print(f"Merged GGUF quantized to {method} successfully!")
78
+ print(f"{method} GGUF file path: {merged_fp16}-{method}.gguf")
79
+ return f"{merged_fp16}-{method}.gguf"
80
+
81
+
82
+ def process_lora(model_id, lora_id, merged_name, methods, private_repo, oauth_token: gr.OAuthToken | None):
83
  if oauth_token.token is None:
84
  raise ValueError("You must log in to use")
85
  model_name = model_id.split('/')[-1]
86
  lora_name = lora_id.split('/')[-1]
87
+ model_fp16 = f"{model_name}-fp16.gguf"
88
+ lora_fp16 = f"{lora_name}-fp16.gguf"
89
 
90
  try:
91
  api = HfApi(token=oauth_token)
 
116
  print(f"Current working directory: {os.getcwd()}")
117
  print(f"LoRA directory contents: {os.listdir(lora_name)}")
118
 
119
+
120
+ lora_conversion_script = "convert_lora_to_gguf.py"
121
+ lora_fp16_conversion = f"python llama.cpp/{lora_conversion_script} --base {model_name} {lora_name} --outtype f16 --outfile {lora_fp16}"
122
+ lora_result = subprocess.run(lora_fp16_conversion, shell=True, capture_output=True)
123
+ print(lora_result)
124
+ if lora_result.returncode != 0:
125
+ raise Exception(f"Error converting to fp16: {lora_result.stderr}")
126
  print("LoRA converted to fp16 successfully!")
127
+ print(f"Converted LoRA-GGUF path: {lora_fp16}")
128
 
129
  username = whoami(oauth_token.token)["name"]
130
+ new_repo_url = api.create_repo(repo_id=f"{username}/{merged_name}", exist_ok=True, private=private_repo)
131
  new_repo_id = new_repo_url.repo_id
132
  print("Repo created successfully!", new_repo_url)
133
 
134
+ upload_reme_to_hf(new_repo_id, api, oauth_token)
135
+ upload_file_to_hf(lora_fp16, new_repo_id, api)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
+ base_conversion_script = "convert_hf_to_gguf.py"
138
+ base_fp16_conversion = f"python llama.cpp/{base_conversion_script} {model_name} --outtype f16 --outfile {model_fp16}"
139
+ base_result = subprocess.run(base_fp16_conversion, shell=True, capture_output=True)
140
+ print(base_result)
141
+ if base_result.returncode != 0:
142
+ raise Exception(f"Error converting to fp16: {base_result.stderr}")
143
+ print("LoRA converted to fp16 successfully!")
144
+ print(f"Converted GGUF path: {model_fp16}")
145
+ upload_file_to_hf(model_fp16, new_repo_id, api)
146
+
147
+ print(f"Merging LoRA into GGUF => fp16")
148
+ merged_fp16 = export_lora_to_gguf(model_fp16, lora_fp16, merged_name)
149
+ upload_file_to_hf(f"{merged_name}-fp16.gguf", new_repo_id, api)
150
+
151
+ # Clean storage: hf-model & hf-lora
152
+ shutil.rmtree(model_name, ignore_errors=True)
153
+ shutil.rmtree(lora_name, ignore_errors=True)
154
+ print("Folder cleaned up successfully!")
155
+
156
+ for method in methods:
157
+ print(f"Quantizing merged fp16-gguf to {method}")
158
+ quantized_name = quantize_merged_gguf(merged_fp16, method)
159
+ upload_file_to_hf(quantized_name, new_repo_id, api)
160
+ os.remove(quantized_name)
161
+ print("Removed the uploaded model.")
162
+ os.remove(merged_fp16)
163
+ print("Remove the fp16 GGUF file.")
164
 
165
  return (
166
  f'Everything done! Find your repo {new_repo_id}'
 
168
  except Exception as e:
169
  return (f"Error: {e}")
170
  finally:
 
 
171
  print("Folder cleaned up successfully!")
172
 
 
 
 
 
 
173
 
174
 
175
+ css = """
176
+ #output {
177
+ height: 500px;
178
+ overflow: auto;
179
+ border: 1px solid #ccc;
180
+ }
181
  """
182
 
183
+ DESCRIPTION = "# 🤯Turn LoRA adapter to GGUF and merge into Base GGUF!🤯"
184
  with gr.Blocks(css=css) as demo:
185
  gr.Markdown(DESCRIPTION)
186
  with gr.Row():
187
+ gr.Markdown("You must log in to create your repo!")
188
+ gr.LoginButton().activate()
 
 
 
189
 
190
  with gr.Row():
191
  with gr.Column():
 
209
  submit_btn = gr.Button(value="Submit")
210
 
211
  with gr.Column():
212
+ quantize_methods = gr.CheckboxGroup(["Q4_K_S", "Q4_K_M", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"], label="Quantize Methods", info="Bigger is Better")
213
+ cool_name = gr.Textbox(label="Your final model name", placeholder="Enter a cool name:")
214
  gr.Markdown("# See your repo!")
215
  output_text = gr.Textbox()
216
+ submit_btn.click(process_lora, [model_id, lora_id, cool_name, quantize_methods, private_repo], [output_text])
217
 
218
  def restart_space():
219
  HfApi().restart_space(repo_id="lee-ite/LoRA-To-GGUF", token=HF_TOKEN, factory_reboot=True)
start.sh CHANGED
@@ -1,2 +1,5 @@
1
  ls
 
 
 
2
  venv/bin/python app.py
 
1
  ls
2
+ curl -s https://api.github.com/repos/ggerganov/llama.cpp/releases/latest | jq -r '.assets[].browser_download_url' | grep -i ubuntu | while read -r url; do curl -LO "$url"; done
3
+ ls
4
+ unzip *ubuntu*.zip
5
  venv/bin/python app.py