Spaces:

yeliudev
/

R2-Tuning

Running

App Files Files Community

yeliudev commited on Oct 3, 2024

Commit

e9fc911

1 Parent(s): ed74388

Add example videos

Browse files

Files changed (9) hide show

.gitattributes +2 -0
.gitignore +0 -1
app.py +50 -23
data/CkWOpyrAXdw_210.0_360.0.mp4 +3 -0
data/HkLfNhgP0TM_660.0_810.0.mp4 +3 -0
data/gTAvxnQtjXM_60.0_210.0.mp4 +3 -0
data/ocLUzCNodj4_360.0_510.0.mp4 +3 -0
data/pA6Z-qYhSNg_210.0_360.0.mp4 +3 -0
setup.cfg +1 -1

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+data filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -5,6 +5,5 @@ __pycache__/
 # Temporary data
 /checkpoints
-/flagged
 .DS_Store
 ._*

 # Temporary data
 /checkpoints
 .DS_Store
 ._*

app.py CHANGED Viewed

@@ -1,24 +1,39 @@
 # Copyright (c) Ye Liu. Licensed under the BSD 3-Clause License.
 from functools import partial
 import clip
 import decord
-import nncore
-import torch
 import gradio as gr
 import numpy as np
-import pandas as pd
 import torchvision.transforms.functional as F
 from decord import VideoReader
 from nncore.engine import load_checkpoint
 from nncore.nn import build_model
-TITLE = '🌀R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding'  # noqa
-DESCRIPTION = 'R2-Tuning is a parameter- and memory efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.\n\nUser Guide:\n1. Upload or record a video using web camera.\n2. Input a text query. A good practice is to write a sentence with 5~10 words.\n3. Click "submit" and you\'ll see the moment retrieval and highlight detection results on the right.'  # noqa
 CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
-WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth'  # noqa
 def convert_time(seconds):
@@ -88,22 +103,34 @@ def main(video, query, model, cfg):
 model, cfg = init_model(CONFIG, WEIGHT)
-demo = gr.Interface(
-    fn=partial(main, model=model, cfg=cfg),
-    inputs=[gr.Video(label='Video'),
-            gr.Textbox(label='Text Query')],
-    outputs=[
-        gr.Dataframe(
-            headers=['Start Time', 'End Time', 'Score'], label='Moment Retrieval'),
-        gr.LinePlot(
-            x='x',
-            y='y',
-            x_title='Time (seconds)',
-            y_title='Saliency Score',
-            label='Highlight Detection')
-    ],
-    allow_flagging='never',
-    title=TITLE,
-    description=DESCRIPTION)
 demo.launch()

 # Copyright (c) Ye Liu. Licensed under the BSD 3-Clause License.
+import random
 from functools import partial
 import clip
 import decord
 import gradio as gr
+import nncore
 import numpy as np
+import torch
 import torchvision.transforms.functional as F
 from decord import VideoReader
 from nncore.engine import load_checkpoint
 from nncore.nn import build_model
+import pandas as pd
+TITLE = '🌀R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding'
+TITLE_MD = '<h1 align="center">🌀R<sup>2</sup>-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding</h1>'
+DESCRIPTION_MD = 'R<sup>2</sup>-Tuning is a parameter- and memory-efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.'
+GUIDE_MD = '### User Guide:\n1. Upload a video or click "random" to sample one.\n2. Input a text query. A good practice is to write a sentence with 5~15 words.\n3. Click "submit" and you\'ll see the moment retrieval and highlight detection results on the right.'
 CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
+WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth'
+# yapf:disable
+EXAMPLES = [
+    ('data/gTAvxnQtjXM_60.0_210.0.mp4', 'A man in a white t shirt wearing a backpack is showing a nearby cathedral.'),
+    ('data/pA6Z-qYhSNg_210.0_360.0.mp4', 'Different Facebook posts on transgender bathrooms are shown.'),
+    ('data/CkWOpyrAXdw_210.0_360.0.mp4', 'Indian girl cleaning her kitchen before cooking.'),
+    ('data/ocLUzCNodj4_360.0_510.0.mp4', 'A woman stands in her bedroom in front of a mirror and talks.'),
+    ('data/HkLfNhgP0TM_660.0_810.0.mp4', 'Woman lays down on the couch while talking to the camera.')
+]
+# yapf:enable
 def convert_time(seconds):
 model, cfg = init_model(CONFIG, WEIGHT)
+fn = partial(main, model=model, cfg=cfg)
+with gr.Blocks(title=TITLE) as demo:
+    gr.Markdown(TITLE_MD)
+    gr.Markdown(DESCRIPTION_MD)
+    gr.Markdown(GUIDE_MD)
+    with gr.Row():
+        with gr.Column():
+            video = gr.Video(label='Video')
+            query = gr.Textbox(label='Text Query')
+            with gr.Row():
+                random_btn = gr.Button(value='🔮 Random')
+                gr.ClearButton([video, query], value='🗑️ Reset')
+                submit_btn = gr.Button(value='🚀 Submit')
+        with gr.Column():
+            mr = gr.DataFrame(
+                headers=['Start Time', 'End Time', 'Score'], label='Moment Retrieval')
+            hd = gr.LinePlot(
+                x='x',
+                y='y',
+                x_title='Time (seconds)',
+                y_title='Saliency Score',
+                label='Highlight Detection')
+        random_btn.click(lambda: random.sample(EXAMPLES, 1)[0], None, [video, query])
+        submit_btn.click(fn, [video, query], [mr, hd])
 demo.launch()

data/CkWOpyrAXdw_210.0_360.0.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8a433f49ddeabe2ac5eae143e5129de8f2b6ae3838d286b94c838b0b01f9365
+size 6004497

data/HkLfNhgP0TM_660.0_810.0.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f9ec60b9b5c2f0d235465610f3680216c42c87ce777a6698a78f263711bde36
+size 5166216

data/gTAvxnQtjXM_60.0_210.0.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:beedc2fd64f1c9da322a32b5246b2219726784abf92f0b0236bc8bb16ba5497b
+size 7422854

data/ocLUzCNodj4_360.0_510.0.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:093de4b36ca46d8b410f01b0cebc1f36c05669f6cb3cb4b5514f7de0329fdceb
+size 9791456

data/pA6Z-qYhSNg_210.0_360.0.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a34d5e47ebdb868ea24fac0d38d3cb063c16cf947a91eb77056cc389fc224421
+size 6419206

setup.cfg CHANGED Viewed

@@ -12,4 +12,4 @@ no_lines_before = STDLIB,LOCALFOLDER
 default_section = FIRSTPARTY
 [flake8]
-max-line-length = 90

 default_section = FIRSTPARTY
 [flake8]
+max-line-length = 500