reach-vb HF staff commited on
Commit
9781999
1 Parent(s): 7a117c3
Files changed (1) hide show
  1. app.py +79 -75
app.py CHANGED
@@ -27,83 +27,87 @@ def process_model(model_id, q_method, hf_token):
27
  MODEL_NAME = model_id.split('/')[-1]
28
  fp16 = f"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin"
29
 
30
- api = HfApi(token=hf_token)
 
31
 
32
- username = whoami(hf_token)["name"]
 
 
 
33
 
34
- snapshot_download(repo_id=model_id, local_dir = f"{MODEL_NAME}", local_dir_use_symlinks=False)
35
- print("Model downloaded successully!")
36
-
37
- conversion_script = script_to_use(model_id, api)
38
- fp16_conversion = f"python llama.cpp/{conversion_script} {MODEL_NAME} --outtype f16 --outfile {fp16}"
39
- result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
40
- if result.returncode != 0:
41
- return (f"Error converting to fp16: {result.stderr}", "error.png")
42
- print("Model converted to fp16 successully!")
43
-
44
- qtype = f"{MODEL_NAME}/{MODEL_NAME.lower()}.{q_method.upper()}.gguf"
45
- quantise_ggml = f"./llama.cpp/quantize {fp16} {qtype} {q_method}"
46
- result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
47
- if result.returncode != 0:
48
- return (f"Error quantizing: {result.stderr}", "error.png")
49
- print("Quantised successfully!")
50
-
51
- # Create empty repo
52
- repo_id = f"{username}/{MODEL_NAME}-{q_method}-GGUF"
53
- repo_url = create_repo(
54
- repo_id = repo_id,
55
- repo_type="model",
56
- exist_ok=True,
57
- token=hf_token
58
- )
59
- print("Repo created successfully!")
60
-
61
- card = ModelCard.load(model_id)
62
- card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
63
- card.text = dedent(
64
- f"""
65
- # {repo_id}
66
- This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp.
67
- Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
68
- ## Use with llama.cpp
69
-
70
- ```bash
71
- brew install ggerganov/ggerganov/llama.cpp
72
- ```
73
-
74
- ```bash
75
- llama-cli --hf-repo {repo_id} --model {qtype.split("/")[-1]} -p "The meaning to life and the universe is "
76
- ```
77
-
78
- ```bash
79
- llama-server --hf-repo {repo_id} --model {qtype.split("/")[-1]} -c 2048
80
- ```
81
- """
82
- )
83
- card.save(os.path.join(MODEL_NAME, "README-new.md"))
84
-
85
- api.upload_file(
86
- path_or_fileobj=qtype,
87
- path_in_repo=qtype.split("/")[-1],
88
- repo_id=repo_id,
89
- repo_type="model",
90
- )
91
-
92
- api.upload_file(
93
- path_or_fileobj=f"{MODEL_NAME}/README-new.md",
94
- path_in_repo="README.md",
95
- repo_id=repo_id,
96
- repo_type="model",
97
- )
98
- print("Uploaded successfully!")
99
-
100
- shutil.rmtree(MODEL_NAME)
101
- print("Folder cleaned up successfully!")
102
-
103
- return (
104
- f'Find your repo <a href=\'{repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
105
- "llama.png",
106
- )
107
 
108
  # Create Gradio interface
109
  iface = gr.Interface(
 
27
  MODEL_NAME = model_id.split('/')[-1]
28
  fp16 = f"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin"
29
 
30
+ try:
31
+ api = HfApi(token=hf_token)
32
 
33
+ username = whoami(hf_token)["name"]
34
+
35
+ snapshot_download(repo_id=model_id, local_dir = f"{MODEL_NAME}", local_dir_use_symlinks=False)
36
+ print("Model downloaded successully!")
37
 
38
+ conversion_script = script_to_use(model_id, api)
39
+ fp16_conversion = f"python llama.cpp/{conversion_script} {MODEL_NAME} --outtype f16 --outfile {fp16}"
40
+ result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
41
+ if result.returncode != 0:
42
+ raise Exception(f"Error converting to fp16: {result.stderr}")
43
+ print("Model converted to fp16 successully!")
44
+
45
+ qtype = f"{MODEL_NAME}/{MODEL_NAME.lower()}.{q_method.upper()}.gguf"
46
+ quantise_ggml = f"./llama.cpp/quantize {fp16} {qtype} {q_method}"
47
+ result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
48
+ if result.returncode != 0:
49
+ raise Exception(f"Error quantizing: {result.stderr}")
50
+ print("Quantised successfully!")
51
+
52
+ # Create empty repo
53
+ repo_id = f"{username}/{MODEL_NAME}-{q_method}-GGUF"
54
+ repo_url = create_repo(
55
+ repo_id = repo_id,
56
+ repo_type="model",
57
+ exist_ok=True,
58
+ token=hf_token
59
+ )
60
+ print("Repo created successfully!")
61
+
62
+ card = ModelCard.load(model_id)
63
+ card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
64
+ card.text = dedent(
65
+ f"""
66
+ # {repo_id}
67
+ This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp.
68
+ Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
69
+ ## Use with llama.cpp
70
+
71
+ ```bash
72
+ brew install ggerganov/ggerganov/llama.cpp
73
+ ```
74
+
75
+ ```bash
76
+ llama-cli --hf-repo {repo_id} --model {qtype.split("/")[-1]} -p "The meaning to life and the universe is "
77
+ ```
78
+
79
+ ```bash
80
+ llama-server --hf-repo {repo_id} --model {qtype.split("/")[-1]} -c 2048
81
+ ```
82
+ """
83
+ )
84
+ card.save(os.path.join(MODEL_NAME, "README-new.md"))
85
+
86
+ api.upload_file(
87
+ path_or_fileobj=qtype,
88
+ path_in_repo=qtype.split("/")[-1],
89
+ repo_id=repo_id,
90
+ repo_type="model",
91
+ )
92
+
93
+ api.upload_file(
94
+ path_or_fileobj=f"{MODEL_NAME}/README-new.md",
95
+ path_in_repo="README.md",
96
+ repo_id=repo_id,
97
+ repo_type="model",
98
+ )
99
+ print("Uploaded successfully!")
100
+
101
+ return (
102
+ f'Find your repo <a href=\'{repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
103
+ "llama.png",
104
+ )
105
+ except Exception as e:
106
+ return (f"Error: {e}", "error.png")
107
+ finally:
108
+ shutil.rmtree(MODEL_NAME, ignore_errors=True)
109
+ print("Folder cleaned up successfully!")
110
+
111
 
112
  # Create Gradio interface
113
  iface = gr.Interface(