import gradio as gr class InteractiveChat: whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-large") whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large") def __init__(self): self.zephyr_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta") self.zephyr_model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta", device_map="auto") def generate_response(self, input_data): input_features = self.whisper_processor(input_data) predicted_ids = self.whisper_model.generate(input_features) transcription = self.whisper_processor.batch_decode(predicted_ids) response = self.get_zephyr_response(transcription) self.speak(response) return response def get_zephyr_response(self, transcription): zephyr_pipeline = pipeline("text-generation") response = zephyr_pipeline(transcription)[0]["generated_text"] return response def speak(self, text): speech_client = SpeechClient() speech_client.synthesize(text) def generate_response(self, input): # get transcription from Whisper response = self.get_zephyr_response(transcription) self.speak(response) return response interface = gr.Interface( gr.Audio(type="microphone"), gr.Textbox(), self.generate_response ) interface.launch()