COVER

Runtime error

App Files Files Community

nanushio commited on May 29, 2024

Commit

0c18aca

1 Parent(s): c45a2ea

- [MINOR] [SOURCE] [UPDATE] 1. update app.py

Browse files

Files changed (2) hide show

app.py +3 -1
cover/datasets/cover_datasets.py +32 -17

app.py CHANGED Viewed

@@ -66,8 +66,10 @@ def inference_one_video(input_video):
     """
     TESTING
     """
     views, _ = spatial_temporal_view_decomposition(
-        input_video, dopt["sample_types"], temporal_samplers
     )
     for k, v in views.items():

     """
     TESTING
     """
+    # Convert input video to tensor and adjust dimensions
+    input_video_tensor = torch.from_numpy(input_video).permute(0, 3, 1, 2)
     views, _ = spatial_temporal_view_decomposition(
+        input_video_tensor, dopt["sample_types"], temporal_samplers
     )
     for k, v in views.items():

cover/datasets/cover_datasets.py CHANGED Viewed

@@ -232,34 +232,49 @@ def spatial_temporal_view_decomposition(
     video_path, sample_types, samplers, is_train=False, augment=False,
 ):
     video = {}
-    if video_path.endswith(".yuv"):
-        print("This part will be deprecated due to large memory cost.")
-        ## This is only an adaptation to LIVE-Qualcomm
-        ovideo = skvideo.io.vread(
-            video_path, 1080, 1920, inputdict={"-pix_fmt": "yuvj420p"}
-        )
-        for stype in samplers:
-            frame_inds = samplers[stype](ovideo.shape[0], is_train)
-            imgs = [torch.from_numpy(ovideo[idx]) for idx in frame_inds]
-            video[stype] = torch.stack(imgs, 0).permute(3, 0, 1, 2)
-        del ovideo
-    else:
-        decord.bridge.set_bridge("torch")
-        vreader = VideoReader(video_path)
-        ### Avoid duplicated video decoding!!! Important!!!!
         all_frame_inds = []
         frame_inds = {}
         for stype in samplers:
-            frame_inds[stype] = samplers[stype](len(vreader), is_train)
             all_frame_inds.append(frame_inds[stype])
         ### Each frame is only decoded one time!!!
         all_frame_inds = np.concatenate(all_frame_inds, 0)
-        frame_dict = {idx: vreader[idx] for idx in np.unique(all_frame_inds)}
         for stype in samplers:
             imgs = [frame_dict[idx] for idx in frame_inds[stype]]
             video[stype] = torch.stack(imgs, 0).permute(3, 0, 1, 2)
     sampled_video = {}
     for stype, sopt in sample_types.items():

     video_path, sample_types, samplers, is_train=False, augment=False,
 ):
     video = {}
+    if torch.is_tensor(video_path):
         all_frame_inds = []
         frame_inds = {}
         for stype in samplers:
+            frame_inds[stype] = samplers[stype](video_path.shape[0], is_train)
             all_frame_inds.append(frame_inds[stype])
         ### Each frame is only decoded one time!!!
         all_frame_inds = np.concatenate(all_frame_inds, 0)
+        frame_dict = {idx: video_path[idx].permute(1, 2, 0) for idx in np.unique(all_frame_inds)}
         for stype in samplers:
             imgs = [frame_dict[idx] for idx in frame_inds[stype]]
             video[stype] = torch.stack(imgs, 0).permute(3, 0, 1, 2)
+    else:
+        if video_path.endswith(".yuv"):
+            print("This part will be deprecated due to large memory cost.")
+            ## This is only an adaptation to LIVE-Qualcomm
+            ovideo = skvideo.io.vread(
+                video_path, 1080, 1920, inputdict={"-pix_fmt": "yuvj420p"}
+            )
+            for stype in samplers:
+                frame_inds = samplers[stype](ovideo.shape[0], is_train)
+                imgs = [torch.from_numpy(ovideo[idx]) for idx in frame_inds]
+                video[stype] = torch.stack(imgs, 0).permute(3, 0, 1, 2)
+            del ovideo
+        else:
+            decord.bridge.set_bridge("torch")
+            vreader = VideoReader(video_path)
+            ### Avoid duplicated video decoding!!! Important!!!!
+            all_frame_inds = []
+            frame_inds = {}
+            for stype in samplers:
+                frame_inds[stype] = samplers[stype](len(vreader), is_train)
+                all_frame_inds.append(frame_inds[stype])
+            ### Each frame is only decoded one time!!!
+            all_frame_inds = np.concatenate(all_frame_inds, 0)
+            frame_dict = {idx: vreader[idx] for idx in np.unique(all_frame_inds)}
+            for stype in samplers:
+                imgs = [frame_dict[idx] for idx in frame_inds[stype]]
+                video[stype] = torch.stack(imgs, 0).permute(3, 0, 1, 2)
     sampled_video = {}
     for stype, sopt in sample_types.items():