import gradio as gr import random import time from ctransformers import AutoModelForCausalLM from datetime import datetime import whisper from transformers import VitsModel, AutoTokenizer import torch import soundfile as sf params = { "max_new_tokens":512, "stop":["" ,"<|endoftext|>","[", ""], "temperature":0.7, "top_p":0.8, "stream":True, "batch_size": 8} whisper_model = whisper.load_model("small") llm = AutoModelForCausalLM.from_pretrained("Aspik101/trurl-2-7b-pl-instruct_GGML", model_type="llama") tts_model = VitsModel.from_pretrained("facebook/mms-tts-pol") tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-pol") with gr.Blocks() as demo: chatbot = gr.Chatbot() audio_input = gr.Audio(source="microphone", type="filepath", show_label=False) submit_audio = gr.Button("Submit Audio") clear = gr.Button("Clear") audio_output = gr.Audio('temp_file.wav', label="Generated Audio (wav)", type='filepath', autoplay=False) def translate(audio): print("__Wysyłam nagranie do whisper!") transcription = whisper_model.transcribe(audio, language="pl") return transcription["text"] def read_text(text): print("Tutaj jest tekst to przeczytania!", text[-1][-1]) inputs = tokenizer(text[-1][-1], return_tensors="pt") with torch.no_grad(): output = tts_model(**inputs).waveform.squeeze().numpy() sf.write('temp_file.wav', output, tts_model.config.sampling_rate) return 'temp_file.wav' def user(audio_data, history): if audio_data: user_message = translate(audio_data) print("USER!:") print("", history + [[user_message, None]]) return history + [[user_message, None]] def parse_history(hist): history_ = "" for q, a in hist: history_ += f": {q } \n" if a: history_ += f": {a} \n" return history_ def bot(history): print(f"When: {datetime.today().strftime('%Y-%m-%d %H:%M:%S')}") prompt = f"Jesteś AI assystentem. Odpowiadaj krótko i po polsku. {parse_history(history)}. :" stream = llm(prompt, **params) history[-1][1] = "" answer_save = "" for character in stream: history[-1][1] += character answer_save += character time.sleep(0.005) yield history submit_audio.click(user, [audio_input, chatbot], [chatbot], queue=False).then(bot, chatbot, chatbot).then(read_text, chatbot, audio_output) clear.click(lambda: None, None, chatbot, queue=False) demo.queue() demo.launch()