Spaces:
Sleeping
Sleeping
import speech_recognition as sr | |
from pydub import AudioSegment | |
import gradio as gr | |
from os import path | |
import requests | |
import openai | |
from openai import OpenAI | |
import numpy as np | |
prompt = "Type and press Enter" | |
def record_text(audio_file,api_key): | |
client = OpenAI(api_key = api_key) | |
audio_file = open(audio_file, "rb") | |
transcript = client.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file, | |
response_format="text" | |
) | |
return transcript | |
def api_calling(audio_file, prompt, api_key): | |
audio_text = record_text(audio_file,api_key) | |
if len(prompt) == 0: | |
prompt = "Apply proper punctuations, upper case and lower case to the provided text." | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {api_key}" | |
} | |
payload = { | |
"model": "gpt-3.5-turbo", | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": prompt | |
}, | |
{ | |
"type": "text", | |
"text": audio_text | |
} | |
] | |
} | |
], | |
"max_tokens": 1500 | |
} | |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) | |
audio_text_res = response.json() | |
return audio_text_res["choices"][0]["message"]["content"] | |
def message_and_history(audio_text,input, history, api_key): | |
history = history or [] | |
output_text = api_calling(audio_text,input,api_key) | |
if len(input) == 0: | |
input = "Speech from the video." | |
history.append((input, output_text)) | |
else: | |
history.append((input, output_text)) | |
return history, history | |
block = gr.Blocks(theme=gr.themes.Glass(primary_hue="slate")) | |
with block: | |
gr.Markdown("""<h1><center>Audio Recognition - Ask and Learn about an Audio</center></h1> """) | |
with gr.Row(): | |
with gr.Column(scale=0.5): | |
aud_input = gr.Audio(type="numpy", label="Upload Audio", sources="upload") | |
api_input = gr.Textbox(label="Enter Api-key") | |
upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary") | |
with gr.Column(): | |
chatbot = gr.Chatbot(label="Ask questions about the audio") | |
message = gr.Textbox(label="User", placeholder=prompt) | |
state = gr.State() | |
upload_button.click(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state]) | |
message.submit(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state]) | |
message.submit(lambda: None, None, message, queue=False) | |
block.launch() |