import speech_recognition as sr from pydub import AudioSegment import gradio as gr from os import path import requests import openai r = sr.Recognizer() def record_text(audio_file): sound = audio_file sound_type = sound.split(".") if sound_type[-1] == 'mp3': input_file = sound output_file = "con_sound.wav" try: # convert mp3 file to wav file sound = AudioSegment.from_mp3(input_file) sound.export(output_file, format="wav") sound = "con_sound.wav" except Exception as e: print(f"Error converting MP3 to WAV: {e}") return "" MyText = "" with sr.AudioFile(sound) as source: r.adjust_for_ambient_noise(source) print("Converting audio file to text..") audio2 = r.record(source, duration=None) # Use record instead of listen try: MyText = r.recognize_google(audio2, language="en-US", key=None, show_all=False) MyText = MyText.lower() print("Converted audio is:\n" + MyText + '.') except sr.UnknownValueError: print("Google Speech Recognition could not understand audio") return MyText def message_and_history(audio_file, history, api_key): history = history or [] input_text = "Type and press Enter" output_text = record_text(audio_file) if len(input_text) == 0: input_text = "Apply proper punctuation on the given paragraph." history.append((input_text, output_text)) else: history.append((input_text, output_text)) return history, history prompt = "Type and press Enter" block = gr.Blocks(theme=gr.themes.Glass(primary_hue="slate")) with block: gr.Markdown("""

Audio Recognition - Ask and Learn about an Audio

""") with gr.Row(): with gr.Column(scale=0.5): aud_input = gr.Audio(type="filepath", label="Upload Audio") api_input = gr.Textbox(label="Enter Api-key") upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary") with gr.Column(): chatbot = gr.Chatbot(label="Ask questions about the audio") message = gr.Textbox(label="User", placeholder=prompt) state = gr.State() upload_button.click(message_and_history, inputs=[aud_input, state, api_input], outputs=[chatbot, state]) message.submit(message_and_history, inputs=[aud_input, state, api_input], outputs=[chatbot, state]) message.submit(lambda: None, None, message, queue=False) block.launch(share=True)