Spaces:

Vihang28
/

Audio_Recognition_QnA

Sleeping

App Files Files Community

Audio_Recognition_QnA / app.py

Vihang28

Update app.py

e687694 8 months ago

raw

history blame

No virus

2.82 kB

	import speech_recognition as sr
	from pydub import AudioSegment
	import gradio as gr
	from os import path
	import requests
	import openai
	from openai import OpenAI

	prompt = "Type and press Enter"


	def record_text(audio_file,api_key):
	client = OpenAI(api_key = api_key)
	audio_file = open(audio_file, "rb")
	transcript = client.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file,
	response_format="text"
	)
	return transcript


	def api_calling(audio_file, prompt, api_key):
	audio_text = record_text(audio_file,api_key)
	if len(prompt) == 0:
	prompt = "Apply proper punctuations, upper case and lower case to the provided text."

	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}"
	}
	payload = {
	"model": "gpt-3.5-turbo",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt
	},
	{
	"type": "text",
	"text": audio_text
	}
	]
	}
	],
	"max_tokens": 1500
	}
	response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
	audio_text_res = response.json()
	return audio_text_res["choices"][0]["message"]["content"]



	def message_and_history(audio_text,input, history, api_key):
	history = history or []
	output_text = api_calling(audio_text,input,api_key)

	if len(input) == 0:
	input = "Speech from the video."
	history.append((input, output_text))
	else:
	history.append((input, output_text))

	return history, history


	block = gr.Blocks(theme=gr.themes.Glass(primary_hue="slate"))
	with block:
	gr.Markdown("""<h1><center>Audio Recognition - Ask and Learn about an Audio</center></h1> """)
	with gr.Row():
	with gr.Column(scale=0.5):
	aud_input = gr.Audio(type="filepath", label="Upload Audio")
	api_input = gr.Textbox(label="Enter Api-key")
	upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
	with gr.Column():
	chatbot = gr.Chatbot(label="Ask questions about the audio")
	message = gr.Textbox(label="User", placeholder=prompt)
	state = gr.State()

	upload_button.click(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state])
	message.submit(message_and_history, inputs=[aud_input,message, state, api_input], outputs=[chatbot, state])
	message.submit(lambda: None, None, message, queue=False)
	block.launch()