giux78 commited on
Commit
9eb0dec
1 Parent(s): 9d17447

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -71
app.py CHANGED
@@ -6,27 +6,21 @@ import llama_cpp
6
  from llama_cpp import Llama
7
  from huggingface_hub import hf_hub_download
8
 
9
- saiga = Llama(
10
  model_path=hf_hub_download(
11
- repo_id="FinancialSupport/saiga-7b-gguf",
12
- filename="saiga-7b.Q4_K_M.gguf",
 
13
  ),
14
  n_ctx=4086,
15
  )
16
 
17
- dante = Llama(
18
- model_path=hf_hub_download(
19
- repo_id="FinancialSupport/saiga-7b-gguf",
20
- filename="saiga-7b-dante-qlora.Q4_K_M.gguf",
21
- ),
22
- n_ctx=4086,
23
- )
24
 
25
  history = []
26
 
27
  def generate_text(message, history):
28
  temp = ""
29
- input_prompt = "Conversazione tra umano ed un assistente AI di nome saiga-7b\n"
30
  for interaction in history:
31
  input_prompt += "[|Umano|] " + interaction[0] + "\n"
32
  input_prompt += "[|Assistente|]" + interaction[1]
@@ -35,7 +29,7 @@ def generate_text(message, history):
35
 
36
  print(input_prompt)
37
 
38
- output = saiga(input_prompt,
39
  temperature= 0.15,
40
  top_p= 0.1,
41
  top_k= 40,
@@ -54,68 +48,24 @@ def generate_text(message, history):
54
 
55
  history = ["init", input_prompt]
56
 
57
- def generate_text_Dante(message, history):
58
- temp = ""
59
- input_prompt = ""
60
- for interaction in history:
61
- input_prompt += "[|Umano|] " + interaction[0] + "\n"
62
- input_prompt += "[|Assistente|]" + interaction[1]
63
-
64
- input_prompt += "[|Umano|] " + message + "\n[|Assistente|]"
65
-
66
- print(input_prompt)
67
-
68
- output = dante(input_prompt,
69
- temperature= 0.15,
70
- top_p= 0.1,
71
- top_k= 40,
72
- repeat_penalty= 1.1,
73
- max_tokens= 1024,
74
- stop= [
75
- "[|Umano|]",
76
- "[|Assistente|]",
77
- ],
78
- stream= True)
79
-
80
- for out in output:
81
- stream = copy.deepcopy(out)
82
- temp += stream["choices"][0]["text"]
83
- yield temp
84
-
85
- history = ["init", input_prompt]
86
 
87
 
88
  with gr.Blocks() as demo:
89
- # with gr.Tab('saiga'):
90
- # gr.ChatInterface(
91
- # generate_text,
92
- # title="saiga-7b running on CPU (quantized Q4_K)",
93
- # description="This is a quantized version of saiga-7b running on CPU (very slow). It is less powerful than the original version, but it can even run on the free tier of huggingface.",
94
- # examples=[
95
- # "Dammi 3 idee di ricette che posso fare con i pistacchi",
96
- # "Prepara un piano di esercizi da poter fare a casa",
97
- # "Scrivi una poesia sulla nuova AI chiamata cerbero-7b"
98
- # ],
99
- # cache_examples=False,
100
- # retry_btn=None,
101
- # undo_btn="Delete Previous",
102
- # clear_btn="Clear",
103
- # )
104
- with gr.Tab('Dante'):
105
- gr.ChatInterface(
106
- generate_text_Dante,
107
- title="saigaDante-7b running on CPU (quantized Q4_K)",
108
- description="This is a quantized version of saiga-7b with Dante LoRA attached running on CPU (very slow).",
109
- examples=[
110
- "Traduci in volgare fiorentino: tanto va la gatta al lardo che ci lascia lo zampino",
111
- "Traduci in volgare fiorentino: narrami come cucinare la pasta alla carbonara vegana.",
112
- "Traduci in volgare fiorentino: raccontami una fiaba su Firenze"
113
- ],
114
- cache_examples=False,
115
- retry_btn=None,
116
- undo_btn="Delete Previous",
117
- clear_btn="Clear",
118
- )
119
 
120
  demo.queue(concurrency_count=1, max_size=5)
121
  demo.launch()
 
6
  from llama_cpp import Llama
7
  from huggingface_hub import hf_hub_download
8
 
9
+ zefiro = Llama(
10
  model_path=hf_hub_download(
11
+ repo_id="giux78/zefiro-7b-beta-ITA-v0.1-GGUF",
12
+ filename="zefiro-7b-beta-ITA-v0.1-q4_0.gguf
13
+ ",
14
  ),
15
  n_ctx=4086,
16
  )
17
 
 
 
 
 
 
 
 
18
 
19
  history = []
20
 
21
  def generate_text(message, history):
22
  temp = ""
23
+ input_prompt = "Chiedi a zefiro"
24
  for interaction in history:
25
  input_prompt += "[|Umano|] " + interaction[0] + "\n"
26
  input_prompt += "[|Assistente|]" + interaction[1]
 
29
 
30
  print(input_prompt)
31
 
32
+ output = zefiro(input_prompt,
33
  temperature= 0.15,
34
  top_p= 0.1,
35
  top_k= 40,
 
48
 
49
  history = ["init", input_prompt]
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  with gr.Blocks() as demo:
54
+ with gr.Tab('zefiro'):
55
+ gr.ChatInterface(
56
+ generate_text,
57
+ title="zefiro-7b-v01 running on CPU (quantized Q4_K)",
58
+ description="This is a quantized version of zefiro-7b-v01 running on CPU (very slow). It is less powerful than the original version, but it can even run on the free tier of huggingface.",
59
+ examples=[
60
+ "Dammi 3 idee di ricette che posso fare con i pistacchi",
61
+ "Prepara un piano di esercizi da poter fare a casa",
62
+ "Scrivi una poesia su una giornato di pioggia"
63
+ ],
64
+ cache_examples=False,
65
+ retry_btn=None,
66
+ undo_btn="Delete Previous",
67
+ clear_btn="Clear",
68
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  demo.queue(concurrency_count=1, max_size=5)
71
  demo.launch()