zefiro-7b-v0.1

Sleeping

App Files Files Community

giux78 commited on Jan 30, 2024

Commit

9eb0dec

verified ·

1 Parent(s): 9d17447

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -71

app.py CHANGED Viewed

@@ -6,27 +6,21 @@ import llama_cpp
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-saiga = Llama(
     model_path=hf_hub_download(
-        repo_id="FinancialSupport/saiga-7b-gguf",
-        filename="saiga-7b.Q4_K_M.gguf",
     ),
     n_ctx=4086,
 )
-dante = Llama(
-    model_path=hf_hub_download(
-        repo_id="FinancialSupport/saiga-7b-gguf",
-        filename="saiga-7b-dante-qlora.Q4_K_M.gguf",
-    ),
-    n_ctx=4086,
-)
 history = []
 def generate_text(message, history):
     temp = ""
-    input_prompt = "Conversazione tra umano ed un assistente AI di nome saiga-7b\n"
     for interaction in history:
         input_prompt += "[|Umano|] " + interaction[0] + "\n"
         input_prompt += "[|Assistente|]" + interaction[1]
@@ -35,7 +29,7 @@ def generate_text(message, history):
     print(input_prompt)
-    output = saiga(input_prompt,
     temperature= 0.15,
     top_p= 0.1,
     top_k= 40,
@@ -54,68 +48,24 @@ def generate_text(message, history):
     history = ["init", input_prompt]
-def generate_text_Dante(message, history):
-    temp = ""
-    input_prompt = ""
-    for interaction in history:
-        input_prompt += "[|Umano|] " + interaction[0] + "\n"
-        input_prompt += "[|Assistente|]" + interaction[1]
-    input_prompt += "[|Umano|] " + message + "\n[|Assistente|]"
-    print(input_prompt)
-    output = dante(input_prompt,
-    temperature= 0.15,
-    top_p= 0.1,
-    top_k= 40,
-    repeat_penalty= 1.1,
-    max_tokens= 1024,
-    stop= [
-        "[|Umano|]",
-        "[|Assistente|]",
-    ],
-    stream= True)
-    for out in output:
-        stream = copy.deepcopy(out)
-        temp += stream["choices"][0]["text"]
-        yield temp
-    history = ["init", input_prompt]
 with gr.Blocks() as demo:
-    # with gr.Tab('saiga'):
-    #     gr.ChatInterface(
-    #     generate_text,
-    #     title="saiga-7b running on CPU (quantized Q4_K)",
-    #     description="This is a quantized version of saiga-7b running on CPU (very slow). It is less powerful than the original version, but it can even run on the free tier of huggingface.",
-    #     examples=[
-    #         "Dammi 3 idee di ricette che posso fare con i pistacchi",
-    #         "Prepara un piano di esercizi da poter fare a casa",
-    #         "Scrivi una poesia sulla nuova AI chiamata cerbero-7b"
-    #     ],
-    #     cache_examples=False,
-    #     retry_btn=None,
-    #     undo_btn="Delete Previous",
-    #     clear_btn="Clear",
-    # )
-    with gr.Tab('Dante'):
-        gr.ChatInterface(
-        generate_text_Dante,
-        title="saigaDante-7b running on CPU (quantized Q4_K)",
-        description="This is a quantized version of saiga-7b with Dante LoRA attached running on CPU (very slow).",
-        examples=[
-            "Traduci in volgare fiorentino: tanto va la gatta al lardo che ci lascia lo zampino",
-            "Traduci in volgare fiorentino: narrami come cucinare la pasta alla carbonara vegana.",
-            "Traduci in volgare fiorentino: raccontami una fiaba su Firenze"
-        ],
-        cache_examples=False,
-        retry_btn=None,
-        undo_btn="Delete Previous",
-        clear_btn="Clear",
-    )
 demo.queue(concurrency_count=1, max_size=5)
 demo.launch()

 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+zefiro = Llama(
     model_path=hf_hub_download(
+        repo_id="giux78/zefiro-7b-beta-ITA-v0.1-GGUF",
+        filename="zefiro-7b-beta-ITA-v0.1-q4_0.gguf
+",
     ),
     n_ctx=4086,
 )
 history = []
 def generate_text(message, history):
     temp = ""
+    input_prompt = "Chiedi a zefiro"
     for interaction in history:
         input_prompt += "[|Umano|] " + interaction[0] + "\n"
         input_prompt += "[|Assistente|]" + interaction[1]
     print(input_prompt)
+    output = zefiro(input_prompt,
     temperature= 0.15,
     top_p= 0.1,
     top_k= 40,
     history = ["init", input_prompt]
 with gr.Blocks() as demo:
+    with gr.Tab('zefiro'):
+         gr.ChatInterface(
+         generate_text,
+         title="zefiro-7b-v01 running on CPU (quantized Q4_K)",
+         description="This is a quantized version of zefiro-7b-v01 running on CPU (very slow). It is less powerful than the original version, but it can even run on the free tier of huggingface.",
+         examples=[
+             "Dammi 3 idee di ricette che posso fare con i pistacchi",
+             "Prepara un piano di esercizi da poter fare a casa",
+             "Scrivi una poesia su una giornato di pioggia"
+         ],
+         cache_examples=False,
+         retry_btn=None,
+         undo_btn="Delete Previous",
+         clear_btn="Clear",
+     )
 demo.queue(concurrency_count=1, max_size=5)
 demo.launch()