Add example videos
Browse files- .gitattributes +2 -0
- .gitignore +0 -1
- app.py +50 -23
- data/CkWOpyrAXdw_210.0_360.0.mp4 +3 -0
- data/HkLfNhgP0TM_660.0_810.0.mp4 +3 -0
- data/gTAvxnQtjXM_60.0_210.0.mp4 +3 -0
- data/ocLUzCNodj4_360.0_510.0.mp4 +3 -0
- data/pA6Z-qYhSNg_210.0_360.0.mp4 +3 -0
- setup.cfg +1 -1
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
data filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -5,6 +5,5 @@ __pycache__/
|
|
5 |
|
6 |
# Temporary data
|
7 |
/checkpoints
|
8 |
-
/flagged
|
9 |
.DS_Store
|
10 |
._*
|
|
|
5 |
|
6 |
# Temporary data
|
7 |
/checkpoints
|
|
|
8 |
.DS_Store
|
9 |
._*
|
app.py
CHANGED
@@ -1,24 +1,39 @@
|
|
1 |
# Copyright (c) Ye Liu. Licensed under the BSD 3-Clause License.
|
2 |
|
|
|
3 |
from functools import partial
|
4 |
|
5 |
import clip
|
6 |
import decord
|
7 |
-
import nncore
|
8 |
-
import torch
|
9 |
import gradio as gr
|
|
|
10 |
import numpy as np
|
11 |
-
import
|
12 |
import torchvision.transforms.functional as F
|
13 |
from decord import VideoReader
|
14 |
from nncore.engine import load_checkpoint
|
15 |
from nncore.nn import build_model
|
16 |
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
|
21 |
-
WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
|
24 |
def convert_time(seconds):
|
@@ -88,22 +103,34 @@ def main(video, query, model, cfg):
|
|
88 |
|
89 |
model, cfg = init_model(CONFIG, WEIGHT)
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
demo.launch()
|
|
|
1 |
# Copyright (c) Ye Liu. Licensed under the BSD 3-Clause License.
|
2 |
|
3 |
+
import random
|
4 |
from functools import partial
|
5 |
|
6 |
import clip
|
7 |
import decord
|
|
|
|
|
8 |
import gradio as gr
|
9 |
+
import nncore
|
10 |
import numpy as np
|
11 |
+
import torch
|
12 |
import torchvision.transforms.functional as F
|
13 |
from decord import VideoReader
|
14 |
from nncore.engine import load_checkpoint
|
15 |
from nncore.nn import build_model
|
16 |
|
17 |
+
import pandas as pd
|
18 |
+
|
19 |
+
TITLE = '๐R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding'
|
20 |
+
|
21 |
+
TITLE_MD = '<h1 align="center">๐R<sup>2</sup>-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding</h1>'
|
22 |
+
DESCRIPTION_MD = 'R<sup>2</sup>-Tuning is a parameter- and memory-efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.'
|
23 |
+
GUIDE_MD = '### User Guide:\n1. Upload a video or click "random" to sample one.\n2. Input a text query. A good practice is to write a sentence with 5~15 words.\n3. Click "submit" and you\'ll see the moment retrieval and highlight detection results on the right.'
|
24 |
|
25 |
CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
|
26 |
+
WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth'
|
27 |
+
|
28 |
+
# yapf:disable
|
29 |
+
EXAMPLES = [
|
30 |
+
('data/gTAvxnQtjXM_60.0_210.0.mp4', 'A man in a white t shirt wearing a backpack is showing a nearby cathedral.'),
|
31 |
+
('data/pA6Z-qYhSNg_210.0_360.0.mp4', 'Different Facebook posts on transgender bathrooms are shown.'),
|
32 |
+
('data/CkWOpyrAXdw_210.0_360.0.mp4', 'Indian girl cleaning her kitchen before cooking.'),
|
33 |
+
('data/ocLUzCNodj4_360.0_510.0.mp4', 'A woman stands in her bedroom in front of a mirror and talks.'),
|
34 |
+
('data/HkLfNhgP0TM_660.0_810.0.mp4', 'Woman lays down on the couch while talking to the camera.')
|
35 |
+
]
|
36 |
+
# yapf:enable
|
37 |
|
38 |
|
39 |
def convert_time(seconds):
|
|
|
103 |
|
104 |
model, cfg = init_model(CONFIG, WEIGHT)
|
105 |
|
106 |
+
fn = partial(main, model=model, cfg=cfg)
|
107 |
+
|
108 |
+
with gr.Blocks(title=TITLE) as demo:
|
109 |
+
gr.Markdown(TITLE_MD)
|
110 |
+
gr.Markdown(DESCRIPTION_MD)
|
111 |
+
gr.Markdown(GUIDE_MD)
|
112 |
+
|
113 |
+
with gr.Row():
|
114 |
+
with gr.Column():
|
115 |
+
video = gr.Video(label='Video')
|
116 |
+
query = gr.Textbox(label='Text Query')
|
117 |
+
|
118 |
+
with gr.Row():
|
119 |
+
random_btn = gr.Button(value='๐ฎ Random')
|
120 |
+
gr.ClearButton([video, query], value='๐๏ธ Reset')
|
121 |
+
submit_btn = gr.Button(value='๐ Submit')
|
122 |
+
|
123 |
+
with gr.Column():
|
124 |
+
mr = gr.DataFrame(
|
125 |
+
headers=['Start Time', 'End Time', 'Score'], label='Moment Retrieval')
|
126 |
+
hd = gr.LinePlot(
|
127 |
+
x='x',
|
128 |
+
y='y',
|
129 |
+
x_title='Time (seconds)',
|
130 |
+
y_title='Saliency Score',
|
131 |
+
label='Highlight Detection')
|
132 |
+
|
133 |
+
random_btn.click(lambda: random.sample(EXAMPLES, 1)[0], None, [video, query])
|
134 |
+
submit_btn.click(fn, [video, query], [mr, hd])
|
135 |
|
136 |
demo.launch()
|
data/CkWOpyrAXdw_210.0_360.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8a433f49ddeabe2ac5eae143e5129de8f2b6ae3838d286b94c838b0b01f9365
|
3 |
+
size 6004497
|
data/HkLfNhgP0TM_660.0_810.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f9ec60b9b5c2f0d235465610f3680216c42c87ce777a6698a78f263711bde36
|
3 |
+
size 5166216
|
data/gTAvxnQtjXM_60.0_210.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:beedc2fd64f1c9da322a32b5246b2219726784abf92f0b0236bc8bb16ba5497b
|
3 |
+
size 7422854
|
data/ocLUzCNodj4_360.0_510.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:093de4b36ca46d8b410f01b0cebc1f36c05669f6cb3cb4b5514f7de0329fdceb
|
3 |
+
size 9791456
|
data/pA6Z-qYhSNg_210.0_360.0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a34d5e47ebdb868ea24fac0d38d3cb063c16cf947a91eb77056cc389fc224421
|
3 |
+
size 6419206
|
setup.cfg
CHANGED
@@ -12,4 +12,4 @@ no_lines_before = STDLIB,LOCALFOLDER
|
|
12 |
default_section = FIRSTPARTY
|
13 |
|
14 |
[flake8]
|
15 |
-
max-line-length =
|
|
|
12 |
default_section = FIRSTPARTY
|
13 |
|
14 |
[flake8]
|
15 |
+
max-line-length = 500
|