Files changed (1) hide show
  1. app.py +21 -4
app.py CHANGED
@@ -30,12 +30,13 @@ def process_model(model_id, q_method, hf_token):
30
  try:
31
  api = HfApi(token=hf_token)
32
 
33
- snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False)
34
  print("Model downloaded successully!")
35
 
36
  conversion_script = script_to_use(model_id, api)
37
  fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
38
  result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
 
39
  if result.returncode != 0:
40
  raise Exception(f"Error converting to fp16: {result.stderr}")
41
  print("Model converted to fp16 successully!")
@@ -52,26 +53,42 @@ def process_model(model_id, q_method, hf_token):
52
  new_repo_id = new_repo_url.repo_id
53
  print("Repo created successfully!", new_repo_url)
54
 
55
- card = ModelCard.load(model_id)
 
 
 
56
  card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
57
  card.text = dedent(
58
  f"""
59
  # {new_repo_id}
60
- This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp.
61
  Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
62
  ## Use with llama.cpp
63
 
 
 
64
  ```bash
65
  brew install ggerganov/ggerganov/llama.cpp
66
  ```
 
67
 
 
 
68
  ```bash
69
  llama-cli --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -p "The meaning to life and the universe is "
70
  ```
71
 
 
 
72
  ```bash
73
  llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
74
  ```
 
 
 
 
 
 
75
  """
76
  )
77
  card.save(os.path.join(model_name, "README-new.md"))
@@ -128,7 +145,7 @@ iface = gr.Interface(
128
  gr.Image(show_label=False),
129
  ],
130
  title="Create your own GGUF Quants, blazingly fast ⚡!",
131
- description="The space takes a HF repo as an input, quantises it and creates a Public repo containing the selected quant under your HF user namespace. You need to specify a write token obtained in https://hf.co/settings/tokens.",
132
  article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
133
 
134
  )
 
30
  try:
31
  api = HfApi(token=hf_token)
32
 
33
+ snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, token=hf_token)
34
  print("Model downloaded successully!")
35
 
36
  conversion_script = script_to_use(model_id, api)
37
  fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
38
  result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
39
+ print(result)
40
  if result.returncode != 0:
41
  raise Exception(f"Error converting to fp16: {result.stderr}")
42
  print("Model converted to fp16 successully!")
 
53
  new_repo_id = new_repo_url.repo_id
54
  print("Repo created successfully!", new_repo_url)
55
 
56
+ try:
57
+ card = ModelCard.load(model_id,)
58
+ except:
59
+ card = ModelCard("")
60
  card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
61
  card.text = dedent(
62
  f"""
63
  # {new_repo_id}
64
+ This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the GGML.ai's [GGUF-it](https://huggingface.co/spaces/ggml-org/GGUF-it) space.
65
  Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
66
  ## Use with llama.cpp
67
 
68
+ Install Llama.cpp through brew.
69
+
70
  ```bash
71
  brew install ggerganov/ggerganov/llama.cpp
72
  ```
73
+ Invoke the llama.cpp server or the CLI.
74
 
75
+ CLI:
76
+
77
  ```bash
78
  llama-cli --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -p "The meaning to life and the universe is "
79
  ```
80
 
81
+ Server:
82
+
83
  ```bash
84
  llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
85
  ```
86
+
87
+ Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the llama.cpp repo as well.
88
+
89
+ ```
90
+ git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && make && ./main -m {qtype.split("/")[-1]} -n 128
91
+ ```
92
  """
93
  )
94
  card.save(os.path.join(model_name, "README-new.md"))
 
145
  gr.Image(show_label=False),
146
  ],
147
  title="Create your own GGUF Quants, blazingly fast ⚡!",
148
+ description="The space takes a HF repo as an input, quantises it and creates anoter repo containing the selected quant under your HF user namespace. You need to specify a write token obtained in https://hf.co/settings/tokens.",
149
  article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
150
 
151
  )