Spaces:

ignitariumcloud
/

IAB_VIDEO_AD_CLASSIFIER

Runtime error

App Files Files Community

arjunanand13 commited on Apr 19, 2024

Commit

1ec7ea5

verified ·

1 Parent(s): 19fd223

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -2

app.py CHANGED Viewed

@@ -25,6 +25,7 @@ from huggingface_hub import InferenceClient
 class VideoClassifier:
     def __init__(self, no_of_frames, mode='interface',model='gemini'):
         self.no_of_frames = no_of_frames
         self.mode = mode
         self.model_name = model.strip().lower()
@@ -104,6 +105,7 @@ class VideoClassifier:
         self.llm = HuggingFacePipeline(pipeline=self.generate_text)
     def audio_extraction(self,video_input):
         print(f"Processing video: {video_input} with {self.no_of_frames} frames.")
         mp4_file = video_input
         video_name = mp4_file.split("/")[-1]
@@ -117,6 +119,9 @@ class VideoClassifier:
         result = self.whisper_model.transcribe(audiotrack, fp16=False)
         transcript = result["text"]
         print("TRANSCRIPT",transcript)
         return transcript
     def generate_text(self, inputs, parameters=None):
@@ -133,7 +138,7 @@ class VideoClassifier:
     def classify_video(self,video_input):
         transcript=self.audio_extraction(video_input)
         video = cv2.VideoCapture(video_input)
         length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
         no_of_frame = int(self.no_of_frames)
@@ -158,7 +163,10 @@ class VideoClassifier:
         print("CAPTIONS", captions)
         video.release()
         cv2.destroyAllWindows()
         main_categories = Path("main_classes.txt").read_text()
         main_categories_list = ['Automotive', 'Books and Literature', 'Business and Finance', 'Careers', 'Education','Family and Relationships',
         'Fine Art', 'Food & Drink', 'Healthy Living', 'Hobbies & Interests', 'Home & Garden','Medical Health', 'Movies', 'Music and Audio',
@@ -303,9 +311,17 @@ class VideoClassifier:
         second_video = os.path.join(os.path.dirname(__file__), "PersonalFinance_clip.mp4")
         # return final_answer, first_video, second_video
         return final_answer
     def save_model_choice(self,model_name):
         self.model_name = model_name
         if self.model_name=='mistral':
             print("Setting up Mistral model for Class Selection")
@@ -313,6 +329,9 @@ class VideoClassifier:
         else :
             print("Setting up Gemini model for Class Selection")
             self.setup_gemini_model()
         return "Model selected: " + model_name
     def launch_interface(self):
@@ -366,7 +385,8 @@ if __name__ == "__main__":
     args = parser.parse_args()
     vc = VideoClassifier(no_of_frames=args.no_of_frames, mode=args.mode , model=args.model)
     if args.mode == 'interface':
         vc.launch_interface()
     elif args.mode == 'inference' and args.video_path and args.model:

 class VideoClassifier:
     def __init__(self, no_of_frames, mode='interface',model='gemini'):
         self.no_of_frames = no_of_frames
         self.mode = mode
         self.model_name = model.strip().lower()
         self.llm = HuggingFacePipeline(pipeline=self.generate_text)
     def audio_extraction(self,video_input):
+        start_time_audio = time.time()
         print(f"Processing video: {video_input} with {self.no_of_frames} frames.")
         mp4_file = video_input
         video_name = mp4_file.split("/")[-1]
         result = self.whisper_model.transcribe(audiotrack, fp16=False)
         transcript = result["text"]
         print("TRANSCRIPT",transcript)
+        end_time_audio = time.time()
+        # print("TIME TAKEN FOR AUDIO CONVERSION (WHISPER)",end_time_audio-start_time_audio)
         return transcript
     def generate_text(self, inputs, parameters=None):
     def classify_video(self,video_input):
         transcript=self.audio_extraction(video_input)
+        start_time_caption = time.time()
         video = cv2.VideoCapture(video_input)
         length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
         no_of_frame = int(self.no_of_frames)
         print("CAPTIONS", captions)
         video.release()
         cv2.destroyAllWindows()
+        end_time_caption = time.time()
+        # print("TIME TAKEN FOR IMAGE CAPTIONING", end_time_caption-start_time_caption)
+        start_time_generation = time.time()
         main_categories = Path("main_classes.txt").read_text()
         main_categories_list = ['Automotive', 'Books and Literature', 'Business and Finance', 'Careers', 'Education','Family and Relationships',
         'Fine Art', 'Food & Drink', 'Healthy Living', 'Hobbies & Interests', 'Home & Garden','Medical Health', 'Movies', 'Music and Audio',
         second_video = os.path.join(os.path.dirname(__file__), "PersonalFinance_clip.mp4")
         # return final_answer, first_video, second_video
+        end_time_generation = time.time()
+        print("MODEL SETUP TIME",end_time_setup-start_time_setup)
+        print("TIME TAKEN FOR AUDIO CONVERSION (WHISPER)",end_time_audio-start_time_audio)
+        print("TIME TAKEN FOR IMAGE CAPTIONING", end_time_caption-start_time_caption)
+        print("TIME TAKEN FOR CLASS GENERATION",end_time_generation - start_time_generation)
         return final_answer
     def save_model_choice(self,model_name):
+        start_time_setup = time.time()
         self.model_name = model_name
         if self.model_name=='mistral':
             print("Setting up Mistral model for Class Selection")
         else :
             print("Setting up Gemini model for Class Selection")
             self.setup_gemini_model()
+        end_time_setup = time.time()
+        # print("MODEL SETUP TIME",end_time_setup-start_time_setup)
         return "Model selected: " + model_name
     def launch_interface(self):
     args = parser.parse_args()
     vc = VideoClassifier(no_of_frames=args.no_of_frames, mode=args.mode , model=args.model)
     if args.mode == 'interface':
         vc.launch_interface()
     elif args.mode == 'inference' and args.video_path and args.model: