Vihang28's picture
Create app.py
1c66a80
raw history blame
No virus
2.63 kB
import speech_recognition as sr
from pydub import AudioSegment
import gradio as gr
from os import path
import requests
import openai
r = sr.Recognizer()
def record_text(audio_file):
sound = audio_file
sound_type = sound.split(".")
if sound_type[-1] == 'mp3':
input_file = sound
output_file = "con_sound.wav"
try:
# convert mp3 file to wav file
sound = AudioSegment.from_mp3(input_file)
sound.export(output_file, format="wav")
sound = "con_sound.wav"
except Exception as e:
print(f"Error converting MP3 to WAV: {e}")
return ""
MyText = ""
with sr.AudioFile(sound) as source:
r.adjust_for_ambient_noise(source)
print("Converting audio file to text..")
audio2 = r.record(source, duration=None) # Use record instead of listen
try:
MyText = r.recognize_google(audio2, language="en-US", key=None, show_all=False)
MyText = MyText.lower()
print("Converted audio is:\n" + MyText + '.')
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
return MyText
def message_and_history(audio_file, history, api_key):
history = history or []
input_text = "Type and press Enter"
output_text = record_text(audio_file)
if len(input_text) == 0:
input_text = "Apply proper punctuation on the given paragraph."
history.append((input_text, output_text))
else:
history.append((input_text, output_text))
return history, history
prompt = "Type and press Enter"
block = gr.Blocks(theme=gr.themes.Glass(primary_hue="slate"))
with block:
gr.Markdown("""<h1><center>Audio Recognition - Ask and Learn about an Audio</center></h1> """)
with gr.Row():
with gr.Column(scale=0.5):
aud_input = gr.Audio(type="filepath", label="Upload Audio")
api_input = gr.Textbox(label="Enter Api-key")
upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
with gr.Column():
chatbot = gr.Chatbot(label="Ask questions about the audio")
message = gr.Textbox(label="User", placeholder=prompt)
state = gr.State()
upload_button.click(message_and_history, inputs=[aud_input, state, api_input], outputs=[chatbot, state])
message.submit(message_and_history, inputs=[aud_input, state, api_input], outputs=[chatbot, state])
message.submit(lambda: None, None, message, queue=False)
block.launch(share=True)