Pradheep1647 commited on
Commit
cbf53ef
1 Parent(s): 9f703fc

updates the analyze video func

Browse files
Files changed (1) hide show
  1. app.py +5 -9
app.py CHANGED
@@ -11,12 +11,11 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
11
  from transformers import BlipProcessor, BlipForConditionalGeneration
12
  import cv2
13
 
14
- YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY')
15
-
16
  def download_youtube_video(video_url, api_key):
17
  ydl_opts = {
18
  'format': 'bestvideo+bestaudio',
19
  'outtmpl': os.path.join('./', '%(title)s.%(ext)s'),
 
20
  }
21
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
22
  ydl.download([video_url])
@@ -91,11 +90,8 @@ emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
91
  emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
92
  emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
93
 
94
- def analyze_video(video_url):
95
- global output_path
96
- output_path = './'
97
-
98
- video_path = download_youtube_video(video_url, YOUTUBE_API_KEY)
99
  mp4_path = convert_to_mp4(video_path)
100
  audio_path = extract_audio_from_video(mp4_path)
101
  audio_wav_path = convert_mp3_to_wav(audio_path)
@@ -122,7 +118,7 @@ def analyze_video(video_url):
122
  if frame_count_video % n_frame_interval == 0:
123
  pixel_values_video = preprocess_frame(frame_video)
124
  caption_video = generate_caption(pixel_values_video)
125
- predicted_emotions_video, _ = predict_emotions(caption_video)
126
  emotion_vectors_video.append(np.array(list(predicted_emotions_video.values())))
127
 
128
  frame_count_video += 1
@@ -148,7 +144,7 @@ with gr.Blocks() as iface:
148
 
149
  with gr.Row():
150
  transcript_output = gr.Textbox(label="Transcript", interactive=False)
151
- audio_emotion_output = gr.Textbox(label="Emotion from Audio", interactive=False)
152
  visual_emotion_output = gr.Textbox(label="Emotion from Video", interactive=False)
153
 
154
  submit_button.click(analyze_video, inputs=[video_url, api_key], outputs=[transcript_output, audio_emotion_output, visual_emotion_output])
 
11
  from transformers import BlipProcessor, BlipForConditionalGeneration
12
  import cv2
13
 
 
 
14
  def download_youtube_video(video_url, api_key):
15
  ydl_opts = {
16
  'format': 'bestvideo+bestaudio',
17
  'outtmpl': os.path.join('./', '%(title)s.%(ext)s'),
18
+ 'quiet': True,
19
  }
20
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
21
  ydl.download([video_url])
 
90
  emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
91
  emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
92
 
93
+ def analyze_video(video_url, api_key):
94
+ video_path = download_youtube_video(video_url, api_key)
 
 
 
95
  mp4_path = convert_to_mp4(video_path)
96
  audio_path = extract_audio_from_video(mp4_path)
97
  audio_wav_path = convert_mp3_to_wav(audio_path)
 
118
  if frame_count_video % n_frame_interval == 0:
119
  pixel_values_video = preprocess_frame(frame_video)
120
  caption_video = generate_caption(pixel_values_video)
121
+ predicted_emotions_video = predict_emotions(caption_video)
122
  emotion_vectors_video.append(np.array(list(predicted_emotions_video.values())))
123
 
124
  frame_count_video += 1
 
144
 
145
  with gr.Row():
146
  transcript_output = gr.Textbox(label="Transcript", interactive=False)
147
+ audio_emotion_output = gr.Textbox(label="Emotion from Audio and Text", interactive=False)
148
  visual_emotion_output = gr.Textbox(label="Emotion from Video", interactive=False)
149
 
150
  submit_button.click(analyze_video, inputs=[video_url, api_key], outputs=[transcript_output, audio_emotion_output, visual_emotion_output])