Spaces:

diarizers-community
/

DiarizationLM-GGUF

Running

wq2012 commited on Aug 3

Commit

1ad1ab5

•

1 Parent(s): 93baeba

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ from gpt4all import GPT4All
 from huggingface_hub import hf_hub_download
 from diarizationlm import utils
-title = "DiarizationLM GGUF inference on CPU"
 description = """
 A demo of the DiarizationLM model finetuned from Llama 3. In this demo, we run a 4-bit quantized GGUF model on CPU.
@@ -25,11 +25,11 @@ model = GPT4All(model_name=model_name,
                 device="cpu")
 print("Finish the model init process")
-def generater(message):
-    prompt = message + prompt_suffix
     max_new_tokens = round(len(prompt) / 3.0 * 1.2)
     outputs = []
-    for token in model.generate(prompt=prompt,
                                 temp=0.1,
                                 top_k=50,
                                 top_p=0.5,
@@ -40,7 +40,7 @@ def generater(message):
         yield completion
         if completion.endswith(" [eod]"):
             break
-    transferred_completion = utils.transfer_llm_completion(completion, message)
     yield transferred_completion
@@ -52,6 +52,7 @@ demo = gr.Interface(
     outputs=["text"],
     examples=[
         ["<speaker:1> Hello, my name is Tom. May I speak to Laura <speaker:2> please? Hello, this is Laura. <speaker:1> Hi Laura, how are you? This is <speaker:2> Tom. Hi Tom, I haven't seen you for a <speaker:1> while."],
    ]
 )

 from huggingface_hub import hf_hub_download
 from diarizationlm import utils
+title = "💬DiarizationLM GGUF inference on CPU💬"
 description = """
 A demo of the DiarizationLM model finetuned from Llama 3. In this demo, we run a 4-bit quantized GGUF model on CPU.
                 device="cpu")
 print("Finish the model init process")
+def generater(prompt):
+    llm_prompt = prompt + prompt_suffix
     max_new_tokens = round(len(prompt) / 3.0 * 1.2)
     outputs = []
+    for token in model.generate(prompt=llm_prompt,
                                 temp=0.1,
                                 top_k=50,
                                 top_p=0.5,
         yield completion
         if completion.endswith(" [eod]"):
             break
+    transferred_completion = utils.transfer_llm_completion(completion, prompt)
     yield transferred_completion
     outputs=["text"],
     examples=[
         ["<speaker:1> Hello, my name is Tom. May I speak to Laura <speaker:2> please? Hello, this is Laura. <speaker:1> Hi Laura, how are you? This is <speaker:2> Tom. Hi Tom, I haven't seen you for a <speaker:1> while."],
+        ["<speaker:1> This demo looks really <speaker:2> good! Thanks, I am glad to hear that."],
    ]
 )