Spaces:

Vihang28
/

Video_QnA

Sleeping

File size: 4,444 Bytes

import speech_recognition as sr
from pydub import AudioSegment
import gradio as gr
from os import path
import requests
import openai
from openai import OpenAI
from moviepy.editor import *

prompt = "Type and press Enter"


def record_text(audio_file,api_key):
    client = OpenAI(api_key = api_key)
    input_file = audio_file
    output_file = "converted_sound.mp3"
    sound = AudioSegment.from_wav(input_file) 
    sound.export(output_file, format="mp3")
    audio_file = "converted_sound.mp3"
    audio_file = open(audio_file, "rb") 
    transcript = client.audio.transcriptions.create(
        model="whisper-1", 
        file=audio_file,
        response_format="srt"
        )
    return transcript
    # return(str(path.getsize(audio_file)/1000000)+'mb')
    # sound = audio_file
    # sound_type = sound.split(".")
    # if sound_type[-1] == 'mp3':
    #     input_file = sound
    #     output_file = "con_sound.wav"
        
    #     # convert mp3 file to wav file 
    #     sound = AudioSegment.from_mp3(input_file) 
    #     sound.export(output_file, format="wav")
    #     sound = "con_sound.wav"
    
    # MyText = ""
    # with sr.AudioFile(sound) as source:
    #     r.adjust_for_ambient_noise(source)
    #     print("Converting audio file to text..")
    #     audio2 = r.record(source, duration=None)  # Use record instead of listen
        
    #     MyText = r.recognize_google(audio2, language="en-US", key=None, show_all=False)
    #     MyText = MyText.lower()
    # return (MyText)


def api_calling(audio_file, prompt, api_key):
    audio_text = record_text(audio_file,api_key)
    sp_txt = audio_text.split("\n")
    new_lst = ''
    for i in range(2,len(sp_txt),4):
        new_lst = new_lst + ' ' + sp_txt[i]
    if len(prompt) == 0:
        prompt = '''Hi, act as a content writer and from the transcript provided to you separate all the text. 
        Apply proper punctuations, upper case and lower case to the provided text.'''
        
        return new_lst
    else:
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_key}"
        }
        payload = {
            "model": "gpt-3.5-turbo",
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": prompt
                        },
                        {
                            "type": "text",
                            "text": audio_text
                        }
                    ]
                }
            ],
            "max_tokens": 1000
        }
        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
        audio_text_res = response.json()
        return audio_text_res["choices"][0]["message"]["content"]

def convert_to_mp3(mp4_file, wav_file):
    audio = wav_file
    video = VideoFileClip(mp4_file)
    audio = video.audio
    audio.write_audiofile(wav_file)

def message_and_history(audio_text,input, history, api_key):
    mp4_file = audio_text
    wav_file = "output.wav"
    convert_to_mp3(mp4_file, wav_file)
    history = history or []
    output_text = api_calling(wav_file,input,api_key)
    
    if len(input) == 0:
        input = "Speech from the video."
        history.append((input, output_text))
    else:
        history.append((input, output_text))
    
    return history, history


block = gr.Blocks(theme=gr.themes.Soft(primary_hue="slate"))
with block:
    gr.Markdown("""<h1><center>Stock-Analysis</center></h1> """)
    with gr.Row():
        with gr.Column(scale=0.5):
            vid_input = gr.Video(format="mp4", label="Upload .mp4 file")
            api_input = gr.Textbox(label="Enter Api-key")
            upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
        with gr.Column():
            chatbot = gr.Chatbot(label="Ask questions about the Video")
            message = gr.Textbox(label="User", placeholder=prompt)
            state = gr.State()
            
    upload_button.click(message_and_history, inputs=[vid_input,message, state, api_input], outputs=[chatbot, state])
    message.submit(message_and_history, inputs=[vid_input,message, state, api_input], outputs=[chatbot, state])
    message.submit(lambda: None, None, message, queue=False)
block.launch()