File size: 1,710 Bytes
9c5645d ce01037 fcc93c7 ce01037 fcc93c7 9c5645d 33a927c c951d7c 66e320f 9c5645d ce01037 c951d7c ce01037 9c5645d fcc93c7 ce01037 9c5645d fcc93c7 ce01037 fcc93c7 c951d7c ce01037 c951d7c ce01037 9c5645d fcc93c7 ce01037 9c5645d fcc93c7 9c5645d ce01037 c951d7c ce01037 fcc93c7 9c5645d fcc93c7 ce01037 9c5645d fcc93c7 9c5645d ce01037 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import gradio as gr
from gpt4all import GPT4All
from huggingface_hub import hf_hub_download
title = "DiarizationLM GGUF inference on CPU"
description = """
DiarizationLM GGUF inference on CPU
"""
model_path = "models"
model_name = "q4_k_m.gguf"
hf_hub_download(repo_id="google/DiarizationLM-13b-Fisher-v1", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
print("Start the model init process")
model = GPT4All(model_name=model_name, model_path=model_path, allow_download = False, device="cpu")
print("Finish the model init process")
model.config["promptTemplate"] = "{0} --> "
model.config["systemPrompt"] = ""
model._is_chat_session_activated = False
print("Finish the model config process")
def generater(message, history, temperature, top_p, top_k):
prompt = model.config["promptTemplate"].format(message)
max_new_tokens = round(len(prompt) / 3.0 * 1.2)
outputs = []
for token in model.generate(prompt=prompt, temp=0.0, top_k = 50, top_p = 0.9, max_tokens = max_new_tokens, streaming=True):
outputs.append(token)
yield "".join(outputs)
def vote(data: gr.LikeData):
if data.liked:
return
else:
return
print("Create chatbot")
chatbot = gr.Chatbot()
print("Created chatbot")
iface = gr.ChatInterface(
fn = generater,
title=title,
description = description,
chatbot=chatbot,
additional_inputs=[],
examples=[
["<speaker:1> Hello, how are you doing <speaker:2> today? I am doing well."],
]
)
print("Added iface")
with gr.Blocks() as demo:
chatbot.like(vote, None, None)
iface.render()
print("Rendered iface")
if __name__ == "__main__":
demo.queue(max_size=3).launch() |