Video-to-Multilingual-OCR

Runtime error

App Files Files Community

stupidog04 commited on Apr 9, 2023

Commit

658f973

•

1 Parent(s): d3af935

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -11

app.py CHANGED Viewed

@@ -61,9 +61,11 @@ def inference(video, lang, time_step, full_scan=False):
     temporal_profiles = [[] for _ in range(len(largest_boxes))]
     # Match bboxes to position and store the text read by OCR
-    while success:
-        if count % (int(frame_rate * time_step)) == 0:
-            if full_scan:
                 bounds = reader.readtext(frame)
                 for box in bounds:
                     bbox_pos = box_position(box)
@@ -72,7 +74,15 @@ def inference(video, lang, time_step, full_scan=False):
                         if distance < 50:
                             temporal_profiles[i].append((count / frame_rate, box[1]))
                             break
-            else:
                 for i, box in enumerate(largest_boxes):
                     x1, y1 = box[0][0]
                     x2, y2 = box[0][2]
@@ -87,13 +97,11 @@ def inference(video, lang, time_step, full_scan=False):
                     text = reader.readtext(cropped_frame)
                     if text:
                         temporal_profiles[i].append((count / frame_rate, text[0][1]))
-            im = PIL.Image.fromarray(frame)
-            im_with_boxes = draw_boxes(im, bounds)
-            output_frames.append(np.array(im_with_boxes))
-        success, frame = vidcap.read()
-        count += 1
     # Default resolutions of the frame are obtained. The default resolutions are system dependent.
     # We convert the resolutions from float to integer.

     temporal_profiles = [[] for _ in range(len(largest_boxes))]
     # Match bboxes to position and store the text read by OCR
+    # Match bboxes to position and store the text read by OCR
+    if full_scan:
+        # Match bboxes to position and store the text read by OCR
+        while success:
+            if count % (int(frame_rate * time_step)) == 0:
                 bounds = reader.readtext(frame)
                 for box in bounds:
                     bbox_pos = box_position(box)
                         if distance < 50:
                             temporal_profiles[i].append((count / frame_rate, box[1]))
                             break
+                im = PIL.Image.fromarray(frame)
+                im_with_boxes = draw_boxes(im, bounds)
+                output_frames.append(np.array(im_with_boxes))
+            success, frame = vidcap.read()
+            count += 1
+    else:
+        # Match bboxes to position and store the text read by OCR
+        while success:
+            if count % (int(frame_rate * time_step)) == 0:
                 for i, box in enumerate(largest_boxes):
                     x1, y1 = box[0][0]
                     x2, y2 = box[0][2]
                     text = reader.readtext(cropped_frame)
                     if text:
                         temporal_profiles[i].append((count / frame_rate, text[0][1]))
+                im = PIL.Image.fromarray(frame)
+                im_with_boxes = draw_boxes(im, bounds)
+                output_frames.append(np.array(im_with_boxes))
+            success, frame = vidcap.read()
+            count += 1
     # Default resolutions of the frame are obtained. The default resolutions are system dependent.
     # We convert the resolutions from float to integer.