Spaces:

innat
/

VideoMAE

Running

App Files Files Community

innat commited on Oct 12, 2023

Commit

36e374f

1 Parent(s): 93ca8bb

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -11

app.py CHANGED Viewed

@@ -10,6 +10,18 @@ from utils import read_video, frame_sampling, denormalize, reconstrunction
 from utils import IMAGENET_MEAN, IMAGENET_STD, num_frames, patch_size, input_size
 from labels import K400_label_map, SSv2_label_map, UCF_label_map
 MODELS = {
     'K400': [
         './TFVideoMAE_S_K400_16x224_FT',
@@ -48,9 +60,16 @@ def tube_mask_generator(mask_ratio):
     return bool_masked_pos_tf
-def get_model(data_type):
-    ft_model = keras.models.load_model(MODELS[data_type][0])
-    pt_model = keras.models.load_model(MODELS[data_type][1])
     label_map = LABEL_MAPS.get(data_type)
     label_map = K400_label_map
@@ -59,14 +78,14 @@ def get_model(data_type):
     return ft_model, pt_model, label_map
-def inference(video_file, data_type, mask_ratio):
     # get sample data
     container = read_video(video_file)
     frames = frame_sampling(container, num_frames=num_frames)
     # get models
     bool_masked_pos_tf = tube_mask_generator(mask_ratio)
-    ft_model, pt_model, label_map = get_model(data_type)
     ft_model.trainable = False
     pt_model.trainable = False
@@ -110,12 +129,17 @@ def main():
         fn=inference,
         inputs=[
             gr.Video(type="file", label="Input Video"),
-            gr.Radio(
-                datasets,
-                type='value',
-                default=datasets[0],
-                label='Dataset',
-            ),
             gr.Slider(
                 0.5,
                 1.0,

 from utils import IMAGENET_MEAN, IMAGENET_STD, num_frames, patch_size, input_size
 from labels import K400_label_map, SSv2_label_map, UCF_label_map
+def available_models():
+    ALL_MODELS = [
+        'TFVideoMAE_S_K400_16x224',
+        'TFVideoMAE_B_K400_16x224',
+        'TFVideoMAE_L_K400_16x224',
+        'TFVideoMAE_S_SSv2_16x224',
+        'TFVideoMAE_B_SSv2_16x224',
+        'TFVideoMAE_B_UCF_16x224',
+    ]
+    return ALL_MODELS
 MODELS = {
     'K400': [
         './TFVideoMAE_S_K400_16x224_FT',
     return bool_masked_pos_tf
+def get_model(model_type):
+    ft_model = keras.models.load_model(model_type + '_FT')
+    pt_model = keras.models.load_model(model_type + '_PT')
+    if 'K400' in model_type:
+        data_type = 'K400'
+    elif 'SSv2' in model_type:
+        data_type = 'SSv2'
+    else:
+        data_type = 'UCF'
     label_map = LABEL_MAPS.get(data_type)
     label_map = K400_label_map
     return ft_model, pt_model, label_map
+def inference(video_file, model_type, mask_ratio):
     # get sample data
     container = read_video(video_file)
     frames = frame_sampling(container, num_frames=num_frames)
     # get models
     bool_masked_pos_tf = tube_mask_generator(mask_ratio)
+    ft_model, pt_model, label_map = get_model(model_type)
     ft_model.trainable = False
     pt_model.trainable = False
         fn=inference,
         inputs=[
             gr.Video(type="file", label="Input Video"),
+            gr.Dropdown(
+                choices=available_models(),
+                value="TFVideoMAE_S_K400_16x224",
+                label="Model"
+            )
+            # gr.Radio(
+            #     datasets,
+            #     type='value',
+            #     default=datasets[0],
+            #     label='Dataset',
+            # ),
             gr.Slider(
                 0.5,
                 1.0,