Spaces:
Sleeping
Sleeping
| import speech_recognition as sr | |
| from pydub import AudioSegment | |
| import gradio as gr | |
| from os import path | |
| import requests | |
| import openai | |
| from openai import OpenAI | |
| prompt = "Type and press Enter" | |
| def record_text(audio_file,api_key): | |
| client = OpenAI(api_key = api_key) | |
| input_file = audio_file | |
| output_file = "converted_sound.mp3" | |
| sound = AudioSegment.from_wav(input_file) | |
| sound.export(output_file, format="mp3") | |
| audio_file = "converted_sound.mp3" | |
| audio_file = open(audio_file, "rb") | |
| transcript = client.audio.transcriptions.create( | |
| model="whisper-1", | |
| file=audio_file, | |
| response_format="text" | |
| ) | |
| return transcript | |
| # return(str(path.getsize(audio_file)/1000000)+'mb') | |
| # sound = audio_file | |
| # sound_type = sound.split(".") | |
| # if sound_type[-1] == 'mp3': | |
| # input_file = sound | |
| # output_file = "con_sound.wav" | |
| # # convert mp3 file to wav file | |
| # sound = AudioSegment.from_mp3(input_file) | |
| # sound.export(output_file, format="wav") | |
| # sound = "con_sound.wav" | |
| # MyText = "" | |
| # with sr.AudioFile(sound) as source: | |
| # r.adjust_for_ambient_noise(source) | |
| # print("Converting audio file to text..") | |
| # audio2 = r.record(source, duration=None) # Use record instead of listen | |
| # MyText = r.recognize_google(audio2, language="en-US", key=None, show_all=False) | |
| # MyText = MyText.lower() | |
| # return (MyText) | |
| def api_calling(audio_file, prompt, api_key): | |
| audio_text = record_text(audio_file,api_key) | |
| if len(prompt) == 0: | |
| prompt = "Apply proper punctuations, upper case and lower case to the provided text." | |
| return audio_text | |
| else: | |
| headers = { | |
| "Content-Type": "application/json", | |
| "Authorization": f"Bearer {api_key}" | |
| } | |
| payload = { | |
| "model": "gpt-3.5-turbo", | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": prompt | |
| }, | |
| { | |
| "type": "text", | |
| "text": audio_text | |
| } | |
| ] | |
| } | |
| ], | |
| "max_tokens": 1000 | |
| } | |
| response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) | |
| audio_text_res = response.json() | |
| return audio_text_res["choices"][0]["message"]["content"] | |
| def message_and_history(audio_text,input, history, api_key): | |
| history = history or [] | |
| output_text = api_calling(audio_text,input,api_key) | |
| if len(input) == 0: | |
| input = "Speech from the video." | |
| history.append((input, output_text)) | |
| else: | |
| history.append((input, output_text)) | |
| return history, history | |
| block = gr.Blocks(theme=gr.themes.Monochrome(primary_hue="slate")) | |
| with block: | |
| gr.Markdown("""<h1><center>Audio Recognition - Ask & Learn about an Audio</center></h1> """) | |
| with gr.Row(): | |
| with gr.Column(scale=0.5): | |
| aud_input = gr.Audio(type="filepath", label="Upload .mp3 or .wav file", sources="upload") | |
| api_input = gr.Textbox(label="Enter Api-key") | |
| upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary") | |
| with gr.Column(): | |
| chatbot = gr.Chatbot(label="Ask questions about the audio") | |
| message = gr.Textbox(label="User", placeholder=prompt) | |
| state = gr.State() | |
| upload_button.click(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state]) | |
| message.submit(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state]) | |
| message.submit(lambda: None, None, message, queue=False) | |
| block.launch() |