arjunanand13 commited on
Commit
f10dfdc
1 Parent(s): e9cb65b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -2
app.py CHANGED
@@ -22,7 +22,7 @@ import shutil
22
  from PIL import Image
23
  import google.generativeai as genai
24
  from huggingface_hub import InferenceClient
25
-
26
 
27
  class VideoClassifier:
28
  global audio_time , setup_time , caption_time , classification_time
@@ -45,6 +45,7 @@ class VideoClassifier:
45
  self.setup_gemini_model()
46
  self.setup_paths()
47
  self.hf_key = os.environ.get("HF_KEY", None)
 
48
  # self.whisper_model = whisper.load_model("base")
49
 
50
  def setup_paths(self):
@@ -164,6 +165,29 @@ class VideoClassifier:
164
  task="transcribe"
165
  result = pipe(audiotrack, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
166
  return result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  def generate_text(self, inputs, parameters=None):
169
  if parameters is None:
@@ -178,7 +202,8 @@ class VideoClassifier:
178
 
179
  def classify_video(self,video_input):
180
  global classification_time , caption_time
181
- transcript=self.audio_extraction_space(video_input)
 
182
  start_time_caption = time.time()
183
  video = cv2.VideoCapture(video_input)
184
  length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 
22
  from PIL import Image
23
  import google.generativeai as genai
24
  from huggingface_hub import InferenceClient
25
+ from openai import OpenAI
26
 
27
  class VideoClassifier:
28
  global audio_time , setup_time , caption_time , classification_time
 
45
  self.setup_gemini_model()
46
  self.setup_paths()
47
  self.hf_key = os.environ.get("HF_KEY", None)
48
+ self.client = OpenAI(api_key="sk-proj-KY1qI7zTpsUiJhMUHuNdT3BlbkFJLOjVnTUSpYJi87yUtSEI")
49
  # self.whisper_model = whisper.load_model("base")
50
 
51
  def setup_paths(self):
 
165
  task="transcribe"
166
  result = pipe(audiotrack, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
167
  return result["text"]
168
+
169
+ def audio_extraction_chatgptapi(self,video_input):
170
+ """For cpu inference , we use this function for faster api calling inference"""
171
+ global audio_time
172
+ start_time_audio = time.time()
173
+ print(f"Processing video: {video_input} with {self.no_of_frames} frames.")
174
+ mp4_file = video_input
175
+ video_name = mp4_file.split("/")[-1]
176
+ wav_file = "results/audiotrack.wav"
177
+ video_clip = VideoFileClip(mp4_file)
178
+ audioclip = video_clip.audio
179
+ wav_file = audioclip.write_audiofile(wav_file)
180
+ audioclip.close()
181
+ video_clip.close()
182
+ audiotrack = "results/audiotrack.wav"
183
+ # client = OpenAI(api_key="sk-proj-KY1qI7zTpsUiJhMUHuNdT3BlbkFJLOjVnTUSpYJi87yUtSEI")
184
+ # audiotrack= open("audiotrack.wav", "rb")
185
+ transcription = client.audio.transcriptions.create(
186
+ model="whisper-1",
187
+ file=audiotrack
188
+ )
189
+ print(transcription.text)
190
+ return transcription.text
191
 
192
  def generate_text(self, inputs, parameters=None):
193
  if parameters is None:
 
202
 
203
  def classify_video(self,video_input):
204
  global classification_time , caption_time
205
+ # transcript=self.audio_extraction_space(video_input)
206
+ transcript=self.audio_extraction_chatgptapi(video_input)
207
  start_time_caption = time.time()
208
  video = cv2.VideoCapture(video_input)
209
  length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))