Video_QnA / app.py
Vihang28's picture
Update app.py
e737ee7 verified
raw
history blame contribute delete
No virus
4.44 kB
import speech_recognition as sr
from pydub import AudioSegment
import gradio as gr
from os import path
import requests
import openai
from openai import OpenAI
from moviepy.editor import *
prompt = "Type and press Enter"
def record_text(audio_file,api_key):
client = OpenAI(api_key = api_key)
input_file = audio_file
output_file = "converted_sound.mp3"
sound = AudioSegment.from_wav(input_file)
sound.export(output_file, format="mp3")
audio_file = "converted_sound.mp3"
audio_file = open(audio_file, "rb")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="srt"
)
return transcript
# return(str(path.getsize(audio_file)/1000000)+'mb')
# sound = audio_file
# sound_type = sound.split(".")
# if sound_type[-1] == 'mp3':
# input_file = sound
# output_file = "con_sound.wav"
# # convert mp3 file to wav file
# sound = AudioSegment.from_mp3(input_file)
# sound.export(output_file, format="wav")
# sound = "con_sound.wav"
# MyText = ""
# with sr.AudioFile(sound) as source:
# r.adjust_for_ambient_noise(source)
# print("Converting audio file to text..")
# audio2 = r.record(source, duration=None) # Use record instead of listen
# MyText = r.recognize_google(audio2, language="en-US", key=None, show_all=False)
# MyText = MyText.lower()
# return (MyText)
def api_calling(audio_file, prompt, api_key):
audio_text = record_text(audio_file,api_key)
sp_txt = audio_text.split("\n")
new_lst = ''
for i in range(2,len(sp_txt),4):
new_lst = new_lst + ' ' + sp_txt[i]
if len(prompt) == 0:
prompt = '''Hi, act as a content writer and from the transcript provided to you separate all the text.
Apply proper punctuations, upper case and lower case to the provided text.'''
return new_lst
else:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "text",
"text": audio_text
}
]
}
],
"max_tokens": 1000
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
audio_text_res = response.json()
return audio_text_res["choices"][0]["message"]["content"]
def convert_to_mp3(mp4_file, wav_file):
audio = wav_file
video = VideoFileClip(mp4_file)
audio = video.audio
audio.write_audiofile(wav_file)
def message_and_history(audio_text,input, history, api_key):
mp4_file = audio_text
wav_file = "output.wav"
convert_to_mp3(mp4_file, wav_file)
history = history or []
output_text = api_calling(wav_file,input,api_key)
if len(input) == 0:
input = "Speech from the video."
history.append((input, output_text))
else:
history.append((input, output_text))
return history, history
block = gr.Blocks(theme=gr.themes.Soft(primary_hue="slate"))
with block:
gr.Markdown("""<h1><center>Stock-Analysis</center></h1> """)
with gr.Row():
with gr.Column(scale=0.5):
vid_input = gr.Video(format="mp4", label="Upload .mp4 file")
api_input = gr.Textbox(label="Enter Api-key")
upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
with gr.Column():
chatbot = gr.Chatbot(label="Ask questions about the Video")
message = gr.Textbox(label="User", placeholder=prompt)
state = gr.State()
upload_button.click(message_and_history, inputs=[vid_input,message, state, api_input], outputs=[chatbot, state])
message.submit(message_and_history, inputs=[vid_input,message, state, api_input], outputs=[chatbot, state])
message.submit(lambda: None, None, message, queue=False)
block.launch()