File size: 3,487 Bytes
34e2eaa
 
 
 
 
 
80d1253
34e2eaa
80d1253
34e2eaa
6e234f4
 
34e2eaa
 
 
 
80d1253
 
 
1d2c828
80d1253
 
 
 
34e2eaa
 
 
 
80d1253
34e2eaa
 
49d2c83
34e2eaa
49d2c83
34e2eaa
 
a7121e5
 
30782f5
 
 
 
 
d58e122
c74934d
 
 
30782f5
80d1253
 
 
 
 
 
 
 
 
 
a7121e5
80d1253
 
 
 
 
 
 
a7121e5
 
30782f5
 
 
 
 
d58e122
c74934d
 
 
30782f5
80d1253
34e2eaa
 
 
 
 
 
 
 
80d1253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34e2eaa
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import os
import gradio as gr
import copy
import time
import llama_cpp
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

saiga = Llama(
    model_path=hf_hub_download(
        repo_id="FinancialSupport/saiga-7b-gguf",
        filename="saiga-7b.Q4_K_M.gguf",
    ),
    n_ctx=4086,
) 

dante = Llama(
    model_path=hf_hub_download(
        repo_id="FinancialSupport/saiga-7b-gguf",
        filename="saiga-7b-dante-qlora.Q4_K_M.gguf",
    ),
    n_ctx=4086,
) 

history = []

def generate_text(message, history):
    temp = ""
    input_prompt = "Conversazione tra umano ed un assistente AI di nome saiga-7b\n"
    for interaction in history:
        input_prompt += "[|Umano|] " + interaction[0] + "\n"
        input_prompt += "[|Assistente|]" + interaction[1]
    
    input_prompt += "[|Umano|] " + message + "\n[|Assistente|]"

    print(input_prompt)
    
    output = saiga(input_prompt,
    temperature= 0.15,
    top_p= 0.1,
    top_k= 40, 
    repeat_penalty= 1.1,
    max_tokens= 1024,
    stop= [
        "[|Umano|]",
        "[|Assistente|]",
    ],
    stream= True)
    
    for out in output:
        stream = copy.deepcopy(out)
        temp += stream["choices"][0]["text"]
        yield temp

    history = ["init", input_prompt]

def generate_text_Dante(message, history):
    temp = ""
    input_prompt = ""
    for interaction in history:
        input_prompt += "[|Umano|] " + interaction[0] + "\n"
        input_prompt += "[|Assistente|]" + interaction[1]
    
    input_prompt += "[|Umano|] " + message + "\n[|Assistente|]"

    print(input_prompt)
           
    output = dante(input_prompt,
    temperature= 0.15,
    top_p= 0.1,
    top_k= 40, 
    repeat_penalty= 1.1,
    max_tokens= 1024,
    stop= [
        "[|Umano|]",
        "[|Assistente|]",
    ],
    stream= True)
    
    for out in output:
        stream = copy.deepcopy(out)
        temp += stream["choices"][0]["text"]
        yield temp

    history = ["init", input_prompt]


with gr.Blocks() as demo:
    with gr.Tab('saiga'):
        gr.ChatInterface(
        generate_text,
        title="saiga-7b running on CPU (quantized Q4_K)",
        description="This is a quantized version of saiga-7b running on CPU (very slow). It is less powerful than the original version, but it can even run on the free tier of huggingface.",
        examples=[
            "Dammi 3 idee di ricette che posso fare con i pistacchi",
            "Prepara un piano di esercizi da poter fare a casa",
            "Scrivi una poesia sulla nuova AI chiamata cerbero-7b"
        ],
        cache_examples=True,
        retry_btn=None,
        undo_btn="Delete Previous",
        clear_btn="Clear",
    )
    with gr.Tab('Dante'):
        gr.ChatInterface(
        generate_text_Dante,
        title="saigaDante-7b running on CPU (quantized Q4_K)",
        description="This is a quantized version of saiga-7b with Dante LoRA attached running on CPU (very slow).",
        examples=[
            "Traduci in volgare fiorentino: tanto va la gatta al lardo che ci lascia lo zampino", #se trovi un esempio di traduzione valido mettilo!
            "Traduci in volgare fiorentino: come preparo la pasta alla carbonara?",
            "Traduci in volgare fiorentino: raccontami una fiaba su Firenze"
        ],
        cache_examples=False,
        retry_btn=None,
        undo_btn="Delete Previous",
        clear_btn="Clear",
    )
        
demo.queue(concurrency_count=1, max_size=5)
demo.launch()