Spaces:
Sleeping
Sleeping
File size: 3,337 Bytes
1c66a80 678aa20 1c66a80 12d3854 1c66a80 0aefd0d 1c66a80 1f38dc2 1c66a80 c52635d 12d3854 246aa34 12d3854 08f3107 12d3854 d66b86d 12d3854 1c66a80 08f3107 1c66a80 08f3107 1c66a80 cefb089 e103d16 cefb089 1c66a80 cefb089 1c66a80 678aa20 1c66a80 246aa34 1c66a80 246aa34 1c66a80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import speech_recognition as sr
from pydub import AudioSegment
import gradio as gr
from os import path
import requests
import openai
prompt = "Type and press Enter"
def record_text(audio_file):
r = sr.Recognizer()
sound = audio_file
sound_type = str_sound.split(".")
if sound_type[-1] == 'mp3':
input_file = sound
output_file = "con_sound.wav"
# convert mp3 file to wav file
sound = AudioSegment.from_mp3(input_file)
sound.export(output_file, format="wav")
sound = "con_sound.wav"
MyText = ""
with sr.AudioFile(sound) as source:
r.adjust_for_ambient_noise(source)
print("Converting audio file to text..")
audio2 = r.record(source, duration=None) # Use record instead of listen
MyText = r.recognize_google(audio2, language="en-US", key=None, show_all=False)
MyText = MyText.lower()
return (MyText)
def api_calling(audio_file, prompt, api_key):
audio_text = record_text(audio_file)
if len(prompt) == 0:
prompt = "Apply proper punctuations, upper case and lower case to the provided text."
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "text",
"text": audio_text
}
]
}
],
"max_tokens": 1000
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
audio_text_res = response.json()
return audio_text_res["choices"][0]["message"]["content"]
def message_and_history(audio_text,input, history, api_key):
history = history or []
output_text = api_calling(audio_text,input,api_key)
if len(input) == 0:
input = "Speech from the video."
history.append((input, output_text))
else:
history.append((input, output_text))
return history, history
block = gr.Blocks(theme=gr.themes.Glass(primary_hue="slate"))
with block:
gr.Markdown("""<h1><center>Audio Recognition - Ask and Learn about an Audio</center></h1> """)
with gr.Row():
with gr.Column(scale=0.5):
aud_input = gr.Audio(type="filepath", label="Upload Audio")
# audio_text = record_text(aud_input)
api_input = gr.Textbox(label="Enter Api-key")
upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
with gr.Column():
chatbot = gr.Chatbot(label="Ask questions about the audio")
message = gr.Textbox(label="User", placeholder=prompt)
state = gr.State()
upload_button.click(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state])
message.submit(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state])
message.submit(lambda: None, None, message, queue=False)
block.launch(share=True) |