Federico Galatolo commited on
Commit
34e2eaa
1 Parent(s): 7b2cfb0

first commit

Browse files
Files changed (2) hide show
  1. app.py +67 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import copy
4
+ import time
5
+ import llama_cpp
6
+ from llama_cpp import Llama
7
+ from huggingface_hub import hf_hub_download
8
+
9
+ llm = Llama(
10
+ model_path=hf_hub_download(
11
+ repo_id="galatolo/cerbero-7b-gguf",
12
+ filename="ggml-model-Q8_0.gguf",
13
+ ),
14
+ n_ctx=4086,
15
+ )
16
+
17
+ history = []
18
+
19
+ def generate_text(message, history):
20
+ temp = ""
21
+ input_prompt = "Conversazione tra umano ed un assistente AI di nome cerbero-7b\n"
22
+ for interaction in history:
23
+ input_prompt += "[|Umano|] " + interaction[0] + "\n"
24
+ input_prompt += "[|AI|]" + interaction[1]
25
+
26
+ input_prompt += "[|Umano|] " + message + "\n[|AI|]"
27
+
28
+ print(input_prompt)
29
+
30
+ output = llm(
31
+ input_prompt,
32
+ temperature=0.15,
33
+ top_p=0.1,
34
+ top_k=40,
35
+ repeat_penalty=1.1,
36
+ max_tokens=1024,
37
+ stop=[
38
+ "[|Umano|]",
39
+ "[|Human|]",
40
+ "[|AI|]",
41
+ ],
42
+ stream=True,
43
+ )
44
+ for out in output:
45
+ stream = copy.deepcopy(out)
46
+ temp += stream["choices"][0]["text"]
47
+ yield temp
48
+
49
+ history = ["init", input_prompt]
50
+
51
+
52
+ demo = gr.ChatInterface(
53
+ generate_text,
54
+ title="cerbero-7b running on CPU (quantized)",
55
+ description="This is a quantized version of cerbero-7b running on CPU. It is less powerful than the original version, but it is much faster and it can even run on a Raspberry Pi 4.",
56
+ examples=[
57
+ "Dammi 3 idee di ricette che posso fare con i pistacchi",
58
+ "Prepara un piano di esercizi da poter fare a casa",
59
+ "Scrivi una poesia sulla nuova AI chiamata cerbero-7b"
60
+ ],
61
+ cache_examples=False,
62
+ retry_btn=None,
63
+ undo_btn="Delete Previous",
64
+ clear_btn="Clear",
65
+ )
66
+ demo.queue(concurrency_count=1, max_size=5)
67
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ llama-cpp-python
2
+ huggingface-hub