phate334 commited on
Commit
4688574
·
1 Parent(s): c0976bd

[fix] modify huggingface url

Browse files
Files changed (3) hide show
  1. app/utils.py +19 -0
  2. main.py +4 -3
  3. tests/test_url.py +19 -0
app/utils.py CHANGED
@@ -1,3 +1,10 @@
 
 
 
 
 
 
 
1
  def human_readable_size(size_in_bytes: int) -> str:
2
  # Convert file size to a human-readable format
3
  for unit in ["B", "KB", "MB", "GB", "TB", "PB"]:
@@ -13,3 +20,15 @@ def abbreviate_number(number: int) -> str:
13
  if number >= threshold:
14
  return f"{number/threshold:.2f} {unit}"
15
  return str(number)
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ hf_pattern = re.compile(
4
+ r"https://huggingface.co/(?P<account>[^/]+)/(?P<repo_id>[^/]+)/.*/(?P<branch>[^/]+)/(?P<filename>[^?]+).*"
5
+ )
6
+
7
+
8
  def human_readable_size(size_in_bytes: int) -> str:
9
  # Convert file size to a human-readable format
10
  for unit in ["B", "KB", "MB", "GB", "TB", "PB"]:
 
20
  if number >= threshold:
21
  return f"{number/threshold:.2f} {unit}"
22
  return str(number)
23
+
24
+
25
+ def cleanup_url(url: str) -> str:
26
+ match = hf_pattern.match(url)
27
+ if match:
28
+ account = match.group("account")
29
+ repo_id = match.group("repo_id")
30
+ branch = match.group("branch")
31
+ filename = match.group("filename")
32
+ return f"https://huggingface.co/{account}/{repo_id}/resolve/{branch}/{filename}"
33
+
34
+ return url.strip()
main.py CHANGED
@@ -3,11 +3,11 @@ import os
3
  from pathlib import Path
4
 
5
  import gradio as gr
6
- import pandas as pd
7
 
8
  from app.devices import Device
9
  from app.models import GgufParser
10
  from app.tables import get_estimate_df, get_gpus_df, get_model_info_df
 
11
 
12
  GGUF_PARSER_VERSION = os.getenv("GGUF_PARSER_VERSION", "v0.12.0")
13
  gguf_parser = Path("gguf-parser-linux-amd64")
@@ -28,6 +28,7 @@ def process_url(url, context_length, device_selection):
28
  try:
29
  device_name = device_selection.split(" (")[0]
30
  selected_device = devices[device_name]
 
31
  res = os.popen(
32
  f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
33
  ).read()
@@ -57,13 +58,13 @@ if __name__ == "__main__":
57
  url_input = gr.Textbox(
58
  label="GGUF File URL", placeholder="Enter GGUF URL", value=DEFAULT_URL
59
  )
60
- context_length = gr.Number(label="Context Length", value=8192)
61
  device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
62
  submit_btn = gr.Button("Send")
63
 
64
  submit_btn.click(
65
  fn=process_url,
66
- inputs=[url_input, context_length, device_dropdown],
67
  outputs=[
68
  gr.DataFrame(label="Model Info"),
69
  gr.DataFrame(label="ESTIMATE"),
 
3
  from pathlib import Path
4
 
5
  import gradio as gr
 
6
 
7
  from app.devices import Device
8
  from app.models import GgufParser
9
  from app.tables import get_estimate_df, get_gpus_df, get_model_info_df
10
+ from app.utils import cleanup_url
11
 
12
  GGUF_PARSER_VERSION = os.getenv("GGUF_PARSER_VERSION", "v0.12.0")
13
  gguf_parser = Path("gguf-parser-linux-amd64")
 
28
  try:
29
  device_name = device_selection.split(" (")[0]
30
  selected_device = devices[device_name]
31
+ url = cleanup_url(url)
32
  res = os.popen(
33
  f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
34
  ).read()
 
58
  url_input = gr.Textbox(
59
  label="GGUF File URL", placeholder="Enter GGUF URL", value=DEFAULT_URL
60
  )
61
+ context_length_input = gr.Number(label="Context Length", value=8192)
62
  device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
63
  submit_btn = gr.Button("Send")
64
 
65
  submit_btn.click(
66
  fn=process_url,
67
+ inputs=[url_input, context_length_input, device_dropdown],
68
  outputs=[
69
  gr.DataFrame(label="Model Info"),
70
  gr.DataFrame(label="ESTIMATE"),
tests/test_url.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.utils import cleanup_url
2
+
3
+ # https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.1-8b-instruct-q4_k_m.gguf?download=true
4
+ # https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.1-8b-instruct-q4_k_m.gguf
5
+ # https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/blob/main/llama-3.1-8b-instruct-q4_k_m.gguf
6
+ # cleanup_url 輸出都應該要是 https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.1-8b-instruct-q4_k_m.gguf
7
+ # 其餘非 huggingface.co 的 url 只要前後沒有空白就好
8
+
9
+ resolve_url = "https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.1-8b-instruct-q4_k_m.gguf"
10
+ resolve_url_download = "https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.1-8b-instruct-q4_k_m.gguf?download=true"
11
+ blob_url = "https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/blob/main/llama-3.1-8b-instruct-q4_k_m.gguf"
12
+ other_url = "https://git.gss.com.tw/phate_wang/llm/llama-3.1-8b-instruct-q4_k_m.gguf"
13
+
14
+
15
+ def test_cleanup_url():
16
+ assert cleanup_url(resolve_url) == resolve_url
17
+ assert cleanup_url(resolve_url_download) == resolve_url
18
+ assert cleanup_url(blob_url) == resolve_url
19
+ assert cleanup_url(other_url) == other_url