Spaces:

diarizers-community
/

DiarizationLM-GGUF

Running

File size: 1,710 Bytes

9c5645d
ce01037
 
 
fcc93c7
ce01037
 
fcc93c7
9c5645d
 
33a927c
c951d7c
66e320f
9c5645d
ce01037
c951d7c
ce01037
9c5645d
fcc93c7
ce01037
 
9c5645d
fcc93c7
 
ce01037
fcc93c7
c951d7c
ce01037
c951d7c
ce01037
 
9c5645d
fcc93c7
ce01037
 
 
 
 
9c5645d
fcc93c7
 
 
9c5645d
ce01037
 
 
 
 
c951d7c
ce01037
fcc93c7
 
9c5645d
 
fcc93c7
 
 
ce01037
 
9c5645d
fcc93c7
 
9c5645d
ce01037

import gradio as gr
from gpt4all import GPT4All
from huggingface_hub import hf_hub_download

title = "DiarizationLM GGUF inference on CPU"

description = """
DiarizationLM GGUF inference on CPU
"""

model_path = "models"
model_name = "q4_k_m.gguf"
hf_hub_download(repo_id="google/DiarizationLM-13b-Fisher-v1", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)

print("Start the model init process")
model = GPT4All(model_name=model_name, model_path=model_path, allow_download = False, device="cpu")
print("Finish the model init process")

model.config["promptTemplate"] = "{0} --> "
model.config["systemPrompt"] = ""
model._is_chat_session_activated = False

print("Finish the model config process")

def generater(message, history, temperature, top_p, top_k):
    prompt = model.config["promptTemplate"].format(message)
    max_new_tokens = round(len(prompt) / 3.0 * 1.2)
    outputs = []    
    for token in model.generate(prompt=prompt, temp=0.0, top_k = 50, top_p = 0.9, max_tokens = max_new_tokens, streaming=True):
        outputs.append(token)
        yield "".join(outputs)


def vote(data: gr.LikeData):
    if data.liked:
        return
    else:
        return

print("Create chatbot")
chatbot = gr.Chatbot()
print("Created chatbot")

iface = gr.ChatInterface(
    fn = generater,
    title=title,
    description = description,
    chatbot=chatbot,
    additional_inputs=[],
    examples=[
        ["<speaker:1> Hello, how are you doing <speaker:2> today? I am doing well."],
   ]
)

print("Added iface")

with gr.Blocks() as demo:
    chatbot.like(vote, None, None)
    iface.render()

print("Rendered iface")

if __name__ == "__main__":
    demo.queue(max_size=3).launch()