Build

Paused

ManishThota commited on Mar 9

Commit

0b1b270

•

1 Parent(s): 2724204

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -64,7 +64,7 @@ def predict_answer(image, video, question, max_tokens=100):
     input_ids = tokenizer(text, return_tensors='pt').input_ids.to(device)
-    if image is not None:
         # Process as an image
         image = image.convert("RGB")
         image_tensor = model.image_preprocess(image)
@@ -78,12 +78,12 @@ def predict_answer(image, video, question, max_tokens=100):
         return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
-    elif video is not None:
         # Process as a video
         frames = video_to_frames(video)
         answers = []
         for frame in frames:
-            image = extract_frames(frame)
             image_tensor = model.image_preprocess(image)
             # Generate the answer
@@ -95,7 +95,7 @@ def predict_answer(image, video, question, max_tokens=100):
             answer = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
             answers.append(answer)
-        return "\n".join(answers)
     else:
         return "Unsupported file type. Please upload an image or video."

     input_ids = tokenizer(text, return_tensors='pt').input_ids.to(device)
+    if image:
         # Process as an image
         image = image.convert("RGB")
         image_tensor = model.image_preprocess(image)
         return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
+    elif video:
         # Process as a video
         frames = video_to_frames(video)
         answers = []
         for frame in frames:
+            image = Image.open(extract_frames(frame)).convert("RGB")
             image_tensor = model.image_preprocess(image)
             # Generate the answer
             answer = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
             answers.append(answer)
+        return answers
     else:
         return "Unsupported file type. Please upload an image or video."