Vihang28 commited on
Commit
14610a8
1 Parent(s): b6d22c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py CHANGED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import speech_recognition as sr
2
+ from pydub import AudioSegment
3
+ import gradio as gr
4
+ from os import path
5
+ import requests
6
+ import openai
7
+ from openai import OpenAI
8
+
9
+ prompt = "Type and press Enter"
10
+
11
+
12
+ def record_text(audio_file,api_key):
13
+ client = OpenAI(api_key = api_key)
14
+ input_file = audio_file
15
+ output_file = "converted_sound.mp3"
16
+ sound = AudioSegment.from_wav(input_file)
17
+ sound.export(output_file, format="mp3")
18
+ audio_file = "converted_sound.mp3"
19
+ audio_file = open(audio_file, "rb")
20
+ transcript = client.audio.transcriptions.create(
21
+ model="whisper-1",
22
+ file=audio_file,
23
+ response_format="text"
24
+ )
25
+ return transcript
26
+ # return(str(path.getsize(audio_file)/1000000)+'mb')
27
+ # sound = audio_file
28
+ # sound_type = sound.split(".")
29
+ # if sound_type[-1] == 'mp3':
30
+ # input_file = sound
31
+ # output_file = "con_sound.wav"
32
+
33
+ # # convert mp3 file to wav file
34
+ # sound = AudioSegment.from_mp3(input_file)
35
+ # sound.export(output_file, format="wav")
36
+ # sound = "con_sound.wav"
37
+
38
+ # MyText = ""
39
+ # with sr.AudioFile(sound) as source:
40
+ # r.adjust_for_ambient_noise(source)
41
+ # print("Converting audio file to text..")
42
+ # audio2 = r.record(source, duration=None) # Use record instead of listen
43
+
44
+ # MyText = r.recognize_google(audio2, language="en-US", key=None, show_all=False)
45
+ # MyText = MyText.lower()
46
+ # return (MyText)
47
+
48
+
49
+ def api_calling(audio_file, prompt, api_key):
50
+ audio_text = record_text(audio_file,api_key)
51
+ if len(prompt) == 0:
52
+ prompt = "Apply proper punctuations, upper case and lower case to the provided text."
53
+ return audio_text
54
+ else:
55
+ headers = {
56
+ "Content-Type": "application/json",
57
+ "Authorization": f"Bearer {api_key}"
58
+ }
59
+ payload = {
60
+ "model": "gpt-3.5-turbo",
61
+ "messages": [
62
+ {
63
+ "role": "user",
64
+ "content": [
65
+ {
66
+ "type": "text",
67
+ "text": prompt
68
+ },
69
+ {
70
+ "type": "text",
71
+ "text": audio_text
72
+ }
73
+ ]
74
+ }
75
+ ],
76
+ "max_tokens": 1000
77
+ }
78
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
79
+ audio_text_res = response.json()
80
+ return audio_text_res["choices"][0]["message"]["content"]
81
+
82
+
83
+
84
+ def message_and_history(audio_text,input, history, api_key):
85
+ video = VideoFileClip(mp4_file)
86
+ audio = video.audio
87
+ audio.write_audiofile(mp3_file)
88
+ history = history or []
89
+ output_text = api_calling(mp3_file,input,api_key)
90
+
91
+ if len(input) == 0:
92
+ input = "Speech from the video."
93
+ history.append((input, output_text))
94
+ else:
95
+ history.append((input, output_text))
96
+
97
+ return history, history
98
+
99
+
100
+ block = gr.Blocks(theme=gr.themes.Soft(primary_hue="slate"))
101
+ with block:
102
+ gr.Markdown("""<h1><center>Stock-Analysis</center></h1> """)
103
+ with gr.Row():
104
+ with gr.Column(scale=0.5):
105
+ vid_input = gr.Video(type="filepath", label="Upload .mp4 file", sources="upload")
106
+ api_input = gr.Textbox(label="Enter Api-key")
107
+ upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
108
+ with gr.Column():
109
+ chatbot = gr.Chatbot(label="Ask questions about the Video")
110
+ message = gr.Textbox(label="User", placeholder=prompt)
111
+ state = gr.State()
112
+
113
+ upload_button.click(message_and_history, inputs=[vid_input,message, state, api_input], outputs=[chatbot, state])
114
+ message.submit(message_and_history, inputs=[vid_input,message, state, api_input], outputs=[chatbot, state])
115
+ message.submit(lambda: None, None, message, queue=False)
116
+ block.launch()