Spaces:

Vihang28
/

Video_QnA

Running

App Files Files Community

Vihang28 commited on Apr 11

Commit

14610a8

•

1 Parent(s): b6d22c3

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -0

app.py CHANGED Viewed

	@@ -0,0 +1,116 @@

+import speech_recognition as sr
+from pydub import AudioSegment
+import gradio as gr
+from os import path
+import requests
+import openai
+from openai import OpenAI
+prompt = "Type and press Enter"
+def record_text(audio_file,api_key):
+    client = OpenAI(api_key = api_key)
+    input_file = audio_file
+    output_file = "converted_sound.mp3"
+    sound = AudioSegment.from_wav(input_file)
+    sound.export(output_file, format="mp3")
+    audio_file = "converted_sound.mp3"
+    audio_file = open(audio_file, "rb")
+    transcript = client.audio.transcriptions.create(
+        model="whisper-1",
+        file=audio_file,
+        response_format="text"
+        )
+    return transcript
+    # return(str(path.getsize(audio_file)/1000000)+'mb')
+    # sound = audio_file
+    # sound_type = sound.split(".")
+    # if sound_type[-1] == 'mp3':
+    #     input_file = sound
+    #     output_file = "con_sound.wav"
+    #     # convert mp3 file to wav file
+    #     sound = AudioSegment.from_mp3(input_file)
+    #     sound.export(output_file, format="wav")
+    #     sound = "con_sound.wav"
+    # MyText = ""
+    # with sr.AudioFile(sound) as source:
+    #     r.adjust_for_ambient_noise(source)
+    #     print("Converting audio file to text..")
+    #     audio2 = r.record(source, duration=None)  # Use record instead of listen
+    #     MyText = r.recognize_google(audio2, language="en-US", key=None, show_all=False)
+    #     MyText = MyText.lower()
+    # return (MyText)
+def api_calling(audio_file, prompt, api_key):
+    audio_text = record_text(audio_file,api_key)
+    if len(prompt) == 0:
+        prompt = "Apply proper punctuations, upper case and lower case to the provided text."
+        return audio_text
+    else:
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}"
+        }
+        payload = {
+            "model": "gpt-3.5-turbo",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": prompt
+                        },
+                        {
+                            "type": "text",
+                            "text": audio_text
+                        }
+                    ]
+                }
+            ],
+            "max_tokens": 1000
+        }
+        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+        audio_text_res = response.json()
+        return audio_text_res["choices"][0]["message"]["content"]
+def message_and_history(audio_text,input, history, api_key):
+    video = VideoFileClip(mp4_file)
+    audio = video.audio
+    audio.write_audiofile(mp3_file)
+    history = history or []
+    output_text = api_calling(mp3_file,input,api_key)
+    if len(input) == 0:
+        input = "Speech from the video."
+        history.append((input, output_text))
+    else:
+        history.append((input, output_text))
+    return history, history
+block = gr.Blocks(theme=gr.themes.Soft(primary_hue="slate"))
+with block:
+    gr.Markdown("""<h1><center>Stock-Analysis</center></h1> """)
+    with gr.Row():
+        with gr.Column(scale=0.5):
+            vid_input = gr.Video(type="filepath", label="Upload .mp4 file", sources="upload")
+            api_input = gr.Textbox(label="Enter Api-key")
+            upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
+        with gr.Column():
+            chatbot = gr.Chatbot(label="Ask questions about the Video")
+            message = gr.Textbox(label="User", placeholder=prompt)
+            state = gr.State()
+    upload_button.click(message_and_history, inputs=[vid_input,message, state, api_input], outputs=[chatbot, state])
+    message.submit(message_and_history, inputs=[vid_input,message, state, api_input], outputs=[chatbot, state])
+    message.submit(lambda: None, None, message, queue=False)
+block.launch()