Aspik101's picture
Update app.py
5f30ec5
raw
history blame
2.72 kB
import gradio as gr
import random
import time
from ctransformers import AutoModelForCausalLM
from datetime import datetime
import whisper
from transformers import VitsModel, AutoTokenizer
import torch
params = {
"max_new_tokens":512,
"stop":["<end>" ,"<|endoftext|>","[", "<user>"],
"temperature":0.7,
"top_p":0.8,
"stream":True,
"batch_size": 8}
whisper_model = whisper.load_model("small")
llm = AutoModelForCausalLM.from_pretrained("Aspik101/trurl-2-7b-pl-instruct_GGML", model_type="llama")
tts_model = VitsModel.from_pretrained("facebook/mms-tts-pol")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-pol")
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
audio_input = gr.Audio(source="microphone", type="filepath", show_label=False)
submit_audio = gr.Button("Submit Audio")
clear = gr.Button("Clear")
audio_output = gr.Audio('temp_file.wav', label="Generated Audio (wav)", type='filepath', autoplay=False)
def translate(audio):
print("__Wysyłam nagranie do whisper!")
transcription = whisper_model.transcribe(audio, language="pl")
return transcription["text"]
def read_text(text):
print("Tutaj jest tekst to przeczytania!", text[-1][-1])
inputs = tokenizer(text[-1][-1], return_tensors="pt")
with torch.no_grad():
output = tts_model(**inputs).waveform.squeeze().numpy()
sf.write('temp_file.wav', output, tts_model.config.sampling_rate)
return 'temp_file.wav'
def user(audio_data, history):
if audio_data:
user_message = translate(audio_data)
print("USER!:")
print("", history + [[user_message, None]])
return history + [[user_message, None]]
def parse_history(hist):
history_ = ""
for q, a in hist:
history_ += f"<user>: {q } \n"
if a:
history_ += f"<assistant>: {a} \n"
return history_
def bot(history):
print(f"When: {datetime.today().strftime('%Y-%m-%d %H:%M:%S')}")
prompt = f"Jesteś AI assystentem. Odpowiadaj krótko i po polsku. {parse_history(history)}. <assistant>:"
stream = llm(prompt, **params)
history[-1][1] = ""
answer_save = ""
for character in stream:
history[-1][1] += character
answer_save += character
time.sleep(0.005)
yield history
submit_audio.click(user, [audio_input, chatbot], [chatbot], queue=False).then(bot, chatbot, chatbot).then(read_text, chatbot, audio_output)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue()
demo.launch(share=True)