Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ from gpt4all import GPT4All
|
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
from diarizationlm import utils
|
5 |
|
6 |
-
title = "DiarizationLM GGUF inference on CPU"
|
7 |
|
8 |
description = """
|
9 |
A demo of the DiarizationLM model finetuned from Llama 3. In this demo, we run a 4-bit quantized GGUF model on CPU.
|
@@ -25,11 +25,11 @@ model = GPT4All(model_name=model_name,
|
|
25 |
device="cpu")
|
26 |
print("Finish the model init process")
|
27 |
|
28 |
-
def generater(
|
29 |
-
|
30 |
max_new_tokens = round(len(prompt) / 3.0 * 1.2)
|
31 |
outputs = []
|
32 |
-
for token in model.generate(prompt=
|
33 |
temp=0.1,
|
34 |
top_k=50,
|
35 |
top_p=0.5,
|
@@ -40,7 +40,7 @@ def generater(message):
|
|
40 |
yield completion
|
41 |
if completion.endswith(" [eod]"):
|
42 |
break
|
43 |
-
transferred_completion = utils.transfer_llm_completion(completion,
|
44 |
yield transferred_completion
|
45 |
|
46 |
|
@@ -52,6 +52,7 @@ demo = gr.Interface(
|
|
52 |
outputs=["text"],
|
53 |
examples=[
|
54 |
["<speaker:1> Hello, my name is Tom. May I speak to Laura <speaker:2> please? Hello, this is Laura. <speaker:1> Hi Laura, how are you? This is <speaker:2> Tom. Hi Tom, I haven't seen you for a <speaker:1> while."],
|
|
|
55 |
]
|
56 |
)
|
57 |
|
|
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
from diarizationlm import utils
|
5 |
|
6 |
+
title = "💬DiarizationLM GGUF inference on CPU💬"
|
7 |
|
8 |
description = """
|
9 |
A demo of the DiarizationLM model finetuned from Llama 3. In this demo, we run a 4-bit quantized GGUF model on CPU.
|
|
|
25 |
device="cpu")
|
26 |
print("Finish the model init process")
|
27 |
|
28 |
+
def generater(prompt):
|
29 |
+
llm_prompt = prompt + prompt_suffix
|
30 |
max_new_tokens = round(len(prompt) / 3.0 * 1.2)
|
31 |
outputs = []
|
32 |
+
for token in model.generate(prompt=llm_prompt,
|
33 |
temp=0.1,
|
34 |
top_k=50,
|
35 |
top_p=0.5,
|
|
|
40 |
yield completion
|
41 |
if completion.endswith(" [eod]"):
|
42 |
break
|
43 |
+
transferred_completion = utils.transfer_llm_completion(completion, prompt)
|
44 |
yield transferred_completion
|
45 |
|
46 |
|
|
|
52 |
outputs=["text"],
|
53 |
examples=[
|
54 |
["<speaker:1> Hello, my name is Tom. May I speak to Laura <speaker:2> please? Hello, this is Laura. <speaker:1> Hi Laura, how are you? This is <speaker:2> Tom. Hi Tom, I haven't seen you for a <speaker:1> while."],
|
55 |
+
["<speaker:1> This demo looks really <speaker:2> good! Thanks, I am glad to hear that."],
|
56 |
]
|
57 |
)
|
58 |
|