ManishThota commited on
Commit
bda5bd0
1 Parent(s): 69344b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -18
app.py CHANGED
@@ -83,34 +83,46 @@ def predict_answer(video, image, question):
83
  return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
84
 
85
  elif video:
86
- # Process as a video
87
  frames = video_to_frames(video)
88
- answers = []
89
- for frame in frames:
90
- image = extract_frames(frame)
91
- image_tensor = model.image_preprocess([image])
92
-
93
- # Generate the answer
94
  output_ids = model.generate(
95
  input_ids,
96
  max_new_tokens=25,
97
  images=image_tensor,
98
  use_cache=True)[0]
99
 
100
- answer = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
101
- answers.append(answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- # Modify this logic based on your specific needs
104
- most_common_answer = Counter(answers).most_common(1)[0][0]
105
 
106
- # Safely evaluate the most common answer assuming it's a string representation of a Python literal
107
- try:
108
- evaluated_answer = ast.literal_eval(most_common_answer)
109
- except (ValueError, SyntaxError):
110
- # Handle malformed answer string
111
- evaluated_answer = f"Error evaluating answer: {most_common_answer}"
112
 
113
- return evaluated_answer
114
 
115
  # return ast.literal_eval(answers[0])
116
 
 
83
  return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
84
 
85
  elif video:
 
86
  frames = video_to_frames(video)
87
+ image = extract_frames(frames[2])
88
+ image_tensor = model.image_preprocess([image])
89
+ # Generate the answer
 
 
 
90
  output_ids = model.generate(
91
  input_ids,
92
  max_new_tokens=25,
93
  images=image_tensor,
94
  use_cache=True)[0]
95
 
96
+ return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
97
+
98
+ # # Process as a video
99
+ # frames = video_to_frames(video)
100
+ # answers = []
101
+ # for frame in frames:
102
+ # image = extract_frames(frame)
103
+ # image_tensor = model.image_preprocess([image])
104
+
105
+ # # Generate the answer
106
+ # output_ids = model.generate(
107
+ # input_ids,
108
+ # max_new_tokens=25,
109
+ # images=image_tensor,
110
+ # use_cache=True)[0]
111
+
112
+ # answer = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
113
+ # answers.append(answer)
114
 
115
+ # # Modify this logic based on your specific needs
116
+ # most_common_answer = Counter(answers).most_common(1)[0][0]
117
 
118
+ # # Safely evaluate the most common answer assuming it's a string representation of a Python literal
119
+ # try:
120
+ # evaluated_answer = ast.literal_eval(most_common_answer)
121
+ # except (ValueError, SyntaxError):
122
+ # # Handle malformed answer string
123
+ # evaluated_answer = f"Error evaluating answer: {most_common_answer}"
124
 
125
+ # return evaluated_answer
126
 
127
  # return ast.literal_eval(answers[0])
128