from transformers import pipeline,Conversation import gradio as gr pipe = pipeline( "automatic-speech-recognition", model="openai/whisper-base", ) model= pipeline("conversational", model="facebook/blenderbot-400M-distill") # Get a response # Initialize the conversational model # conversational_model = pipeline("conversational", model="microsoft/DialoGPT-medium") # Mock implementation of transcribe_speech for demonstration def transcribe_speech(filepath): output = pipe( filepath, max_new_tokens=256, generate_kwargs={ "task": "transcribe", "language": "english", }, # update with the language you've fine-tuned on chunk_length_s=30, batch_size=8, ) return output["text"] def handle_audio_input(audio_file): try: # Step 1: Transcribe the audio transcribed_text = transcribe_speech(audio_file) print(f"Transcribed text: {transcribed_text}") # Step 2: Create a conversation and generate a response from transcribed text conversation = Conversation(transcribed_text) response = model(conversation) chatbot_response = response.generated_responses[-1] print(f"Chatbot response: {chatbot_response}") return transcribed_text, chatbot_response except Exception as e: print(f"Error: {e}") return "Error in processing audio", str(e) # Create the Gradio Blocks container with gr.Blocks() as demo: gr.Markdown("## Customer query audio to text chatbot") with gr.Tab("Microphone"): mic_transcribe = gr.Interface( fn=handle_audio_input, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=[gr.Textbox(label="Transcription"), gr.Textbox(label="Chatbot Response")], ) mic_transcribe.render() with gr.Tab("File Upload"): file_transcribe = gr.Interface( fn=handle_audio_input, inputs=gr.Audio(sources="upload", type="filepath"), outputs=[gr.Textbox(label="Transcription"), gr.Textbox(label="Chatbot Response")], ) file_transcribe.render() # Launch the Gradio app demo.launch(share=True)