Federico Galatolo commited on
Commit
fafd74a
1 Parent(s): 7b3a47b

Q4_K quantization

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +2 -2
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ /env
app.py CHANGED
@@ -9,7 +9,7 @@ from huggingface_hub import hf_hub_download
9
  llm = Llama(
10
  model_path=hf_hub_download(
11
  repo_id="galatolo/cerbero-7b-gguf",
12
- filename="ggml-model-Q8_0.gguf",
13
  ),
14
  n_ctx=4086,
15
  )
@@ -51,7 +51,7 @@ def generate_text(message, history):
51
 
52
  demo = gr.ChatInterface(
53
  generate_text,
54
- title="cerbero-7b running on CPU (quantized)",
55
  description="This is a quantized version of cerbero-7b running on CPU. It is less powerful than the original version, but it is much faster and it can even run on a Raspberry Pi 4.",
56
  examples=[
57
  "Dammi 3 idee di ricette che posso fare con i pistacchi",
 
9
  llm = Llama(
10
  model_path=hf_hub_download(
11
  repo_id="galatolo/cerbero-7b-gguf",
12
+ filename="ggml-model-Q4_K.gguf",
13
  ),
14
  n_ctx=4086,
15
  )
 
51
 
52
  demo = gr.ChatInterface(
53
  generate_text,
54
+ title="cerbero-7b running on CPU (quantized Q4_K)",
55
  description="This is a quantized version of cerbero-7b running on CPU. It is less powerful than the original version, but it is much faster and it can even run on a Raspberry Pi 4.",
56
  examples=[
57
  "Dammi 3 idee di ricette che posso fare con i pistacchi",