Spaces:
Runtime error
Runtime error
import gradio as gr | |
import random | |
import time | |
from ctransformers import AutoModelForCausalLM | |
from datetime import datetime | |
import whisper | |
from transformers import VitsModel, AutoTokenizer | |
import torch | |
params = { | |
"max_new_tokens":512, | |
"stop":["<end>" ,"<|endoftext|>","[", "<user>"], | |
"temperature":0.7, | |
"top_p":0.8, | |
"stream":True, | |
"batch_size": 8} | |
whisper_model = whisper.load_model("small") | |
llm = AutoModelForCausalLM.from_pretrained("Aspik101/trurl-2-7b-pl-instruct_GGML", model_type="llama") | |
tts_model = VitsModel.from_pretrained("facebook/mms-tts-pol") | |
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-pol") | |
with gr.Blocks() as demo: | |
chatbot = gr.Chatbot() | |
audio_input = gr.Audio(source="microphone", type="filepath", show_label=False) | |
submit_audio = gr.Button("Submit Audio") | |
clear = gr.Button("Clear") | |
audio_output = gr.Audio('temp_file.wav', label="Generated Audio (wav)", type='filepath', autoplay=False) | |
def translate(audio): | |
print("__Wysyłam nagranie do whisper!") | |
transcription = whisper_model.transcribe(audio, language="pl") | |
return transcription["text"] | |
def read_text(text): | |
print("Tutaj jest tekst to przeczytania!", text[-1][-1]) | |
inputs = tokenizer(text[-1][-1], return_tensors="pt") | |
with torch.no_grad(): | |
output = tts_model(**inputs).waveform.squeeze().numpy() | |
sf.write('temp_file.wav', output, tts_model.config.sampling_rate) | |
return 'temp_file.wav' | |
def user(audio_data, history): | |
if audio_data: | |
user_message = translate(audio_data) | |
print("USER!:") | |
print("", history + [[user_message, None]]) | |
return history + [[user_message, None]] | |
def parse_history(hist): | |
history_ = "" | |
for q, a in hist: | |
history_ += f"<user>: {q } \n" | |
if a: | |
history_ += f"<assistant>: {a} \n" | |
return history_ | |
def bot(history): | |
print(f"When: {datetime.today().strftime('%Y-%m-%d %H:%M:%S')}") | |
prompt = f"Jesteś AI assystentem. Odpowiadaj krótko i po polsku. {parse_history(history)}. <assistant>:" | |
stream = llm(prompt, **params) | |
history[-1][1] = "" | |
answer_save = "" | |
for character in stream: | |
history[-1][1] += character | |
answer_save += character | |
time.sleep(0.005) | |
yield history | |
submit_audio.click(user, [audio_input, chatbot], [chatbot], queue=False).then(bot, chatbot, chatbot).then(read_text, chatbot, audio_output) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
demo.queue() | |
demo.launch(share=True) | |