soft-video-understanding-addingaudio

Paused

fffiloni commited on Mar 8

Commit

91c5ebc

•

1 Parent(s): 003efa5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import spaces
 import gradio as gr
 from gradio_client import Client
 import cv2
 from moviepy.editor import *
@@ -48,7 +49,7 @@ def extract_frames(video_in, interval=24, output_format='.jpg'):
         # Check if successful read and not past end of video
         if success:
-            print('Read a new frame:', success)
             # Save current frame if it meets criteria
             if count % interval == 0:
@@ -70,6 +71,7 @@ def extract_frames(video_in, interval=24, output_format='.jpg'):
     return frames
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from PIL import Image
@@ -79,11 +81,12 @@ model = AutoModelForCausalLM.from_pretrained(
     model_id, trust_remote_code=True, revision=revision
 )
 tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
-@spaces.GPU()
 def process_image(image_in):
-    '''
-    client = Client("https://vikhyatk-moondream1.hf.space/")
     result = client.predict(
 		image_in,	# filepath  in 'image' Image component
 		"Describe precisely the image in one sentence.",	# str  in 'Question' Textbox component
@@ -98,6 +101,7 @@ def process_image(image_in):
     result = model.answer_question(enc_image, "Describe the image in one sentence.", tokenizer)
     print(result)
     return result
 def extract_audio(video_path):
     video_clip = VideoFileClip(video_path)

 import spaces
 import gradio as gr
 from gradio_client import Client
+client = Client("https://vikhyatk-moondream1.hf.space/")
 import cv2
 from moviepy.editor import *
         # Check if successful read and not past end of video
         if success:
+            #print('Read a new frame:', success)
             # Save current frame if it meets criteria
             if count % interval == 0:
     return frames
+'''
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from PIL import Image
     model_id, trust_remote_code=True, revision=revision
 )
 tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
+'''
+#@spaces.GPU()
 def process_image(image_in):
     result = client.predict(
 		image_in,	# filepath  in 'image' Image component
 		"Describe precisely the image in one sentence.",	# str  in 'Question' Textbox component
     result = model.answer_question(enc_image, "Describe the image in one sentence.", tokenizer)
     print(result)
     return result
+    '''
 def extract_audio(video_path):
     video_clip = VideoFileClip(video_path)