Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, WhisperProcessor, WhisperForConditionalGeneration | |
from gtts import gTTS | |
import os | |
class InteractiveChat: | |
def __init__(self): | |
self.whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-large") | |
self.whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large") | |
self.zephyr_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta") | |
self.zephyr_model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta", device_map="auto") | |
def generate_response(self, input_data): | |
input_features = self.whisper_processor(input_data) | |
predicted_ids = self.whisper_model.generate(input_features) | |
transcription = self.whisper_processor.batch_decode(predicted_ids) | |
response = self.get_zephyr_response(transcription) | |
self.speak(response) | |
return response | |
def get_zephyr_response(self, transcription): | |
zephyr_pipeline = pipeline("text-generation") | |
response = zephyr_pipeline(transcription)[0]["generated_text"] | |
return response | |
def speak(self, text): | |
tts = gTTS(text=text, lang='en') | |
tts.save("output.mp3") | |
os.system("mpg321 output.mp3") | |
# Create an instance of the InteractiveChat class | |
chat = InteractiveChat() | |
# Define a function that wraps the generate_response method | |
def generate_response_fn(input_data): | |
return chat.generate_response(input_data) | |
# Use the function in gr.Interface | |
interface = gr.Interface( | |
gr.Audio(type="filepath"), # Accept audio files | |
gr.Textbox(), | |
generate_response_fn # Pass the function here | |
) | |
interface.launch() |