Spaces:
Running
Running
update
Browse files- Dockerfile +1 -0
- app.py +114 -63
- start.sh +3 -0
Dockerfile
CHANGED
@@ -10,6 +10,7 @@ RUN apt-get update && \
|
|
10 |
git-lfs \
|
11 |
wget \
|
12 |
curl \
|
|
|
13 |
&& apt-get clean \
|
14 |
&& rm -rf /var/lib/apt/lists/*
|
15 |
|
|
|
10 |
git-lfs \
|
11 |
wget \
|
12 |
curl \
|
13 |
+
jq \
|
14 |
&& apt-get clean \
|
15 |
&& rm -rf /var/lib/apt/lists/*
|
16 |
|
app.py
CHANGED
@@ -18,13 +18,74 @@ from textwrap import dedent
|
|
18 |
|
19 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
20 |
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
if oauth_token.token is None:
|
24 |
raise ValueError("You must log in to use")
|
25 |
model_name = model_id.split('/')[-1]
|
26 |
lora_name = lora_id.split('/')[-1]
|
27 |
-
|
|
|
28 |
|
29 |
try:
|
30 |
api = HfApi(token=oauth_token)
|
@@ -55,57 +116,51 @@ def process_lora(model_id, lora_id, private_repo, oauth_token: gr.OAuthToken | N
|
|
55 |
print(f"Current working directory: {os.getcwd()}")
|
56 |
print(f"LoRA directory contents: {os.listdir(lora_name)}")
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
64 |
print("LoRA converted to fp16 successfully!")
|
65 |
-
print(f"Converted LoRA-GGUF path: {
|
66 |
|
67 |
username = whoami(oauth_token.token)["name"]
|
68 |
-
new_repo_url = api.create_repo(repo_id=f"{username}/{
|
69 |
new_repo_id = new_repo_url.repo_id
|
70 |
print("Repo created successfully!", new_repo_url)
|
71 |
|
72 |
-
|
73 |
-
|
74 |
-
except:
|
75 |
-
card = ModelCard("")
|
76 |
-
if card.data.tags is None:
|
77 |
-
card.data.tags = []
|
78 |
-
card.data.tags.append("llama-cpp")
|
79 |
-
card.data.tags.append("LoRA-GGUF")
|
80 |
-
card.data.base_model = model_id
|
81 |
-
card.text = dedent(
|
82 |
-
f"""
|
83 |
-
# {new_repo_id}
|
84 |
-
This LoRA was converted to GGUF format from [`{lora_id}`](https://huggingface.co/{lora_id}) using llama.cpp.
|
85 |
-
The base Model is [`{model_id}`](https://huggingface.co/{model_id}).
|
86 |
-
|
87 |
-
## Use with llama.cpp
|
88 |
-
You need to merge the LoRA-GGUF into the Base-Model use llama.cpp.
|
89 |
-
"""
|
90 |
-
)
|
91 |
-
card.save(f"README.md")
|
92 |
-
|
93 |
-
try:
|
94 |
-
print(f"Uploading LoRA-GGUF: {fp16}")
|
95 |
-
api.upload_file(
|
96 |
-
path_or_fileobj=fp16,
|
97 |
-
path_in_repo=fp16,
|
98 |
-
repo_id=new_repo_id,
|
99 |
-
)
|
100 |
-
except Exception as e:
|
101 |
-
raise Exception(f"Error uploading LoRA-GGUF: {e}")
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
return (
|
111 |
f'Everything done! Find your repo {new_repo_id}'
|
@@ -113,30 +168,24 @@ def process_lora(model_id, lora_id, private_repo, oauth_token: gr.OAuthToken | N
|
|
113 |
except Exception as e:
|
114 |
return (f"Error: {e}")
|
115 |
finally:
|
116 |
-
shutil.rmtree(model_name, ignore_errors=True)
|
117 |
-
shutil.rmtree(lora_name, ignore_errors=True)
|
118 |
print("Folder cleaned up successfully!")
|
119 |
|
120 |
-
def list_organizations(oauth_token: Optional[gr.OAuthToken]) -> str:
|
121 |
-
if oauth_token is None:
|
122 |
-
return "Please log in to list organizations."
|
123 |
-
org_names = [org["name"] for org in whoami(oauth_token.token)["orgs"]]
|
124 |
-
return f"You belong to {', '.join(org_names)}."
|
125 |
|
126 |
|
127 |
-
css="""
|
128 |
-
|
|
|
|
|
|
|
|
|
129 |
"""
|
130 |
|
131 |
-
DESCRIPTION = "
|
132 |
with gr.Blocks(css=css) as demo:
|
133 |
gr.Markdown(DESCRIPTION)
|
134 |
with gr.Row():
|
135 |
-
|
136 |
-
|
137 |
-
gr.LoginButton().activate()
|
138 |
-
with gr.Column():
|
139 |
-
gr.Markdown().attach_load_event(list_organizations, None)
|
140 |
|
141 |
with gr.Row():
|
142 |
with gr.Column():
|
@@ -160,9 +209,11 @@ with gr.Blocks(css=css) as demo:
|
|
160 |
submit_btn = gr.Button(value="Submit")
|
161 |
|
162 |
with gr.Column():
|
|
|
|
|
163 |
gr.Markdown("# See your repo!")
|
164 |
output_text = gr.Textbox()
|
165 |
-
submit_btn.click(process_lora, [model_id, lora_id, private_repo], [output_text])
|
166 |
|
167 |
def restart_space():
|
168 |
HfApi().restart_space(repo_id="lee-ite/LoRA-To-GGUF", token=HF_TOKEN, factory_reboot=True)
|
|
|
18 |
|
19 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
20 |
|
21 |
+
def upload_reme_to_hf(new_repo_id, api, oauth_token: gr.OAuthToken | None):
|
22 |
+
try:
|
23 |
+
card = ModelCard.load(model_id, token=oauth_token.token)
|
24 |
+
except:
|
25 |
+
card = ModelCard("")
|
26 |
+
if card.data.tags is None:
|
27 |
+
card.data.tags = []
|
28 |
+
card.data.tags.append("llama-cpp")
|
29 |
+
card.data.tags.append("LoRA-GGUF")
|
30 |
+
card.data.base_model = model_id
|
31 |
+
card.text = dedent(
|
32 |
+
f"""
|
33 |
+
# {new_repo_id}
|
34 |
+
This LoRA was converted to GGUF format from [`{lora_id}`](https://huggingface.co/{lora_id}) using llama.cpp.
|
35 |
+
The base Model is [`{model_id}`](https://huggingface.co/{model_id}).
|
36 |
+
|
37 |
+
## Use with llama.cpp
|
38 |
+
You need to merge the LoRA-GGUF into the Base-Model use llama.cpp.
|
39 |
+
"""
|
40 |
+
)
|
41 |
+
card.save(f"README.md")
|
42 |
+
|
43 |
+
api.upload_file(
|
44 |
+
path_or_fileobj=f"README.md",
|
45 |
+
path_in_repo=f"README.md",
|
46 |
+
repo_id=new_repo_id,
|
47 |
+
)
|
48 |
+
print(f"Uploaded successfully!")
|
49 |
+
|
50 |
+
def upload_file_to_hf(upload_file_name, new_repo_id, api):
|
51 |
+
try:
|
52 |
+
print(f"Uploading LoRA-GGUF: {upload_file_name}")
|
53 |
+
api.upload_file(
|
54 |
+
path_or_fileobj=upload_file_name,
|
55 |
+
path_in_repo=upload_file_name,
|
56 |
+
repo_id=new_repo_id,
|
57 |
+
)
|
58 |
+
except Exception as e:
|
59 |
+
raise Exception(f"Error uploading LoRA-GGUF: {e}")
|
60 |
+
|
61 |
+
def export_lora_to_gguf(model_fp16, lora_fp16, merged_name):
|
62 |
+
script = f"./build/bin/llama-export-lora -m {model_fp16} -o {merged_name}-fp16.gguf --lora {lora_fp16}"
|
63 |
+
export_result = subprocess.run(script, shell=True, capture_output=True)
|
64 |
+
print(export_result)
|
65 |
+
if export_result.returncode != 0:
|
66 |
+
raise Exception(f"Error converting to fp16: {export_result.stderr}")
|
67 |
+
print("LoRA converted to fp16 successfully!")
|
68 |
+
print(f"Converted GGUF path: {merged_name}-fp16.gguf")
|
69 |
+
return merged_name
|
70 |
+
|
71 |
+
def quantize_merged_gguf(merged_fp16, method):
|
72 |
+
script = f"./build/bin/llama-quantize {merged_fp16}-fp16.gguf {merged_fp16}-{method}.gguf {method}"
|
73 |
+
quantize_result = subprocess.run(script, shell=True, capture_output=True)
|
74 |
+
print(quantize_result)
|
75 |
+
if quantize_result.returncode != 0:
|
76 |
+
raise Exception(f"Error quantizing to {method}: {quantize_result.stderr}")
|
77 |
+
print(f"Merged GGUF quantized to {method} successfully!")
|
78 |
+
print(f"{method} GGUF file path: {merged_fp16}-{method}.gguf")
|
79 |
+
return f"{merged_fp16}-{method}.gguf"
|
80 |
+
|
81 |
+
|
82 |
+
def process_lora(model_id, lora_id, merged_name, methods, private_repo, oauth_token: gr.OAuthToken | None):
|
83 |
if oauth_token.token is None:
|
84 |
raise ValueError("You must log in to use")
|
85 |
model_name = model_id.split('/')[-1]
|
86 |
lora_name = lora_id.split('/')[-1]
|
87 |
+
model_fp16 = f"{model_name}-fp16.gguf"
|
88 |
+
lora_fp16 = f"{lora_name}-fp16.gguf"
|
89 |
|
90 |
try:
|
91 |
api = HfApi(token=oauth_token)
|
|
|
116 |
print(f"Current working directory: {os.getcwd()}")
|
117 |
print(f"LoRA directory contents: {os.listdir(lora_name)}")
|
118 |
|
119 |
+
|
120 |
+
lora_conversion_script = "convert_lora_to_gguf.py"
|
121 |
+
lora_fp16_conversion = f"python llama.cpp/{lora_conversion_script} --base {model_name} {lora_name} --outtype f16 --outfile {lora_fp16}"
|
122 |
+
lora_result = subprocess.run(lora_fp16_conversion, shell=True, capture_output=True)
|
123 |
+
print(lora_result)
|
124 |
+
if lora_result.returncode != 0:
|
125 |
+
raise Exception(f"Error converting to fp16: {lora_result.stderr}")
|
126 |
print("LoRA converted to fp16 successfully!")
|
127 |
+
print(f"Converted LoRA-GGUF path: {lora_fp16}")
|
128 |
|
129 |
username = whoami(oauth_token.token)["name"]
|
130 |
+
new_repo_url = api.create_repo(repo_id=f"{username}/{merged_name}", exist_ok=True, private=private_repo)
|
131 |
new_repo_id = new_repo_url.repo_id
|
132 |
print("Repo created successfully!", new_repo_url)
|
133 |
|
134 |
+
upload_reme_to_hf(new_repo_id, api, oauth_token)
|
135 |
+
upload_file_to_hf(lora_fp16, new_repo_id, api)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
+
base_conversion_script = "convert_hf_to_gguf.py"
|
138 |
+
base_fp16_conversion = f"python llama.cpp/{base_conversion_script} {model_name} --outtype f16 --outfile {model_fp16}"
|
139 |
+
base_result = subprocess.run(base_fp16_conversion, shell=True, capture_output=True)
|
140 |
+
print(base_result)
|
141 |
+
if base_result.returncode != 0:
|
142 |
+
raise Exception(f"Error converting to fp16: {base_result.stderr}")
|
143 |
+
print("LoRA converted to fp16 successfully!")
|
144 |
+
print(f"Converted GGUF path: {model_fp16}")
|
145 |
+
upload_file_to_hf(model_fp16, new_repo_id, api)
|
146 |
+
|
147 |
+
print(f"Merging LoRA into GGUF => fp16")
|
148 |
+
merged_fp16 = export_lora_to_gguf(model_fp16, lora_fp16, merged_name)
|
149 |
+
upload_file_to_hf(f"{merged_name}-fp16.gguf", new_repo_id, api)
|
150 |
+
|
151 |
+
# Clean storage: hf-model & hf-lora
|
152 |
+
shutil.rmtree(model_name, ignore_errors=True)
|
153 |
+
shutil.rmtree(lora_name, ignore_errors=True)
|
154 |
+
print("Folder cleaned up successfully!")
|
155 |
+
|
156 |
+
for method in methods:
|
157 |
+
print(f"Quantizing merged fp16-gguf to {method}")
|
158 |
+
quantized_name = quantize_merged_gguf(merged_fp16, method)
|
159 |
+
upload_file_to_hf(quantized_name, new_repo_id, api)
|
160 |
+
os.remove(quantized_name)
|
161 |
+
print("Removed the uploaded model.")
|
162 |
+
os.remove(merged_fp16)
|
163 |
+
print("Remove the fp16 GGUF file.")
|
164 |
|
165 |
return (
|
166 |
f'Everything done! Find your repo {new_repo_id}'
|
|
|
168 |
except Exception as e:
|
169 |
return (f"Error: {e}")
|
170 |
finally:
|
|
|
|
|
171 |
print("Folder cleaned up successfully!")
|
172 |
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
|
175 |
+
css = """
|
176 |
+
#output {
|
177 |
+
height: 500px;
|
178 |
+
overflow: auto;
|
179 |
+
border: 1px solid #ccc;
|
180 |
+
}
|
181 |
"""
|
182 |
|
183 |
+
DESCRIPTION = "# 🤯Turn LoRA adapter to GGUF and merge into Base GGUF!🤯"
|
184 |
with gr.Blocks(css=css) as demo:
|
185 |
gr.Markdown(DESCRIPTION)
|
186 |
with gr.Row():
|
187 |
+
gr.Markdown("You must log in to create your repo!")
|
188 |
+
gr.LoginButton().activate()
|
|
|
|
|
|
|
189 |
|
190 |
with gr.Row():
|
191 |
with gr.Column():
|
|
|
209 |
submit_btn = gr.Button(value="Submit")
|
210 |
|
211 |
with gr.Column():
|
212 |
+
quantize_methods = gr.CheckboxGroup(["Q4_K_S", "Q4_K_M", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"], label="Quantize Methods", info="Bigger is Better")
|
213 |
+
cool_name = gr.Textbox(label="Your final model name", placeholder="Enter a cool name:")
|
214 |
gr.Markdown("# See your repo!")
|
215 |
output_text = gr.Textbox()
|
216 |
+
submit_btn.click(process_lora, [model_id, lora_id, cool_name, quantize_methods, private_repo], [output_text])
|
217 |
|
218 |
def restart_space():
|
219 |
HfApi().restart_space(repo_id="lee-ite/LoRA-To-GGUF", token=HF_TOKEN, factory_reboot=True)
|
start.sh
CHANGED
@@ -1,2 +1,5 @@
|
|
1 |
ls
|
|
|
|
|
|
|
2 |
venv/bin/python app.py
|
|
|
1 |
ls
|
2 |
+
curl -s https://api.github.com/repos/ggerganov/llama.cpp/releases/latest | jq -r '.assets[].browser_download_url' | grep -i ubuntu | while read -r url; do curl -LO "$url"; done
|
3 |
+
ls
|
4 |
+
unzip *ubuntu*.zip
|
5 |
venv/bin/python app.py
|