MTTR commited on
Commit
1136361
1 Parent(s): fffb571

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -15
app.py CHANGED
@@ -158,23 +158,19 @@ description = "This notebook provides a (limited) hands-on demonstration of MTTR
158
  article = "**Disclaimer:** <br> This is a **limited** demonstration of MTTR's performance. The model used here was trained **exclusively** on Refer-YouTube-VOS with window size `w=12` (as described in our paper). No additional training data was used whatsoever. Hence, the model's performance may be limited, especially on instances from unseen categories. <br> Additionally, slow processing times may be encountered, depending on the input clip length and/or resolution, and due to HuggingFace's limited computational resources (no GPU acceleration unfortunately). <br> Finally, we emphasize that this demonstration is intended to be used for academic purposes only. We do not take any responsibility for how the created content is used or distributed. <br> <p style='text-align: center'><a href='https://github.com/mttr2021/MTTR'>Github Repo</a></p>"
159
 
160
  examples = [['guy in white shirt performing tricks on a bike', 'bike_tricks_2.mp4'],
161
- ['a man riding a surfboard', 'surfing.mp4'],
162
- ['a guy performing tricks on a skateboard', 'skateboarding.mp4'],
163
- ['man in red shirt playing tennis', 'tennis.mp4'],
164
- ['brown and black dog playing', 'dogs_playing_1.mp4'],
165
- ['a dog to the left playing with a toy', 'dogs_playing_2.mp4'],
166
- ['person in blue riding a bike', 'blue_biker_riding.mp4'],
167
- ['a dog to the right', 'dog_and_cat.mp4'],
168
- ['a person hugging a dog', 'girl_hugging_dog.mp4'],
169
- ['a black bike used to perform tricks', 'bike_tricks_1.mp4'],
170
- ['a black horse playing with a person', 'horse_plays_ball.mp4'] ]
171
-
172
-
173
- # article = "<p style='text-align: center'><a href='https://github.com/mttr2021/MTTR'>Github Repo</a></p>"
174
- # article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.14821'>End-to-End Referring Video Object Segmentation with Multimodal Transformers</a> | <a href='https://github.com/mttr2021/MTTR'>Github Repo</a></p>"
175
 
176
  iface = gr.Interface(fn=process,
177
- inputs=[gr.inputs.Textbox(label="text query"), gr.inputs.Video(label="Input video. First 10 seconds of the video are used.")],
178
  outputs='video',
179
  title=title,
180
  description=description,
 
158
  article = "**Disclaimer:** <br> This is a **limited** demonstration of MTTR's performance. The model used here was trained **exclusively** on Refer-YouTube-VOS with window size `w=12` (as described in our paper). No additional training data was used whatsoever. Hence, the model's performance may be limited, especially on instances from unseen categories. <br> Additionally, slow processing times may be encountered, depending on the input clip length and/or resolution, and due to HuggingFace's limited computational resources (no GPU acceleration unfortunately). <br> Finally, we emphasize that this demonstration is intended to be used for academic purposes only. We do not take any responsibility for how the created content is used or distributed. <br> <p style='text-align: center'><a href='https://github.com/mttr2021/MTTR'>Github Repo</a></p>"
159
 
160
  examples = [['guy in white shirt performing tricks on a bike', 'bike_tricks_2.mp4'],
161
+ ['a man riding a surfboard', 'surfing.mp4'],
162
+ ['a guy performing tricks on a skateboard', 'skateboarding.mp4'],
163
+ ['man in red shirt playing tennis', 'tennis.mp4'],
164
+ ['brown and black dog playing', 'dogs_playing_1.mp4'],
165
+ ['a dog to the left playing with a toy', 'dogs_playing_2.mp4'],
166
+ ['person in blue riding a bike', 'blue_biker_riding.mp4'],
167
+ ['a dog to the right', 'dog_and_cat.mp4'],
168
+ ['a person hugging a dog', 'girl_hugging_dog.mp4'],
169
+ ['a black bike used to perform tricks', 'bike_tricks_1.mp4'],
170
+ ['a black horse playing with a person', 'horse_plays_ball.mp4']]
 
 
 
 
171
 
172
  iface = gr.Interface(fn=process,
173
+ inputs=[gr.inputs.Textbox(label="text query"), gr.inputs.Video(label="input video - first 10 seconds are used")],
174
  outputs='video',
175
  title=title,
176
  description=description,