egmaminta commited on
Commit
52e7c21
·
1 Parent(s): bf7aa41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -44,13 +44,14 @@ gradio.Interface(fn=classify,
44
  optional=False),
45
  outputs=gradio.outputs.Label(num_top_classes=5,
46
  type='auto'),
47
- theme='peach',
48
  examples=[['bedroom.jpg'],
49
  ['bathroom_AS.jpg'],
50
  ['samsung_room.jpg']],
51
  live=True,
 
52
  title='Indoor Scene Recognition',
53
- description='An indoor scene classifier. Start by uploading an input image. The outputs are the top five indoor scene classes that best fit your input image.',
54
  interpretation='default',
55
- article='''<h2><b>Additional Information</b></h2><p style='text-align: justify'>This indoor scene classifier employs the <b>google/vit-base-patch16-224-in21k</b>, a <b>Visual Transformer (ViT)</b> model pre-trained on ImageNet-21k (14 million images, 21,843 classes) at resolution 224x224 introduced in the paper <b><a href='https://arxiv.org/abs/2010.11929' target='_blank'>An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale</a></b> by Dosovitskiy et al. The original GitHub repository of the Visual Transformer is found in <b><a href='https://github.com/google-research/vision_transformer' target='_blank'>this link</a></b>. This model was fine-tuned on the <b><a href='https://www.kaggle.com/itsahmad/indoor-scenes-cvpr-2019' target='_blank'>MIT Indoor Scenes</a></b> from Kaggle. The source model is found in <b><a href='https://huggingface.co/vincentclaes/mit-indoor-scenes' target='_blank'>this link</a></b>.</p>''',
56
  allow_flagging='never').launch()
 
44
  optional=False),
45
  outputs=gradio.outputs.Label(num_top_classes=5,
46
  type='auto'),
47
+ theme='grass',
48
  examples=[['bedroom.jpg'],
49
  ['bathroom_AS.jpg'],
50
  ['samsung_room.jpg']],
51
  live=True,
52
+ layout='horizontal',
53
  title='Indoor Scene Recognition',
54
+ description='A smart and easy-to-use indoor scene classifier. Start by uploading an input image. The outputs are the top five indoor scene classes that best fit your input image.',
55
  interpretation='default',
56
+ article='''<h2><b>Additional Information</b></h2><p style='text-align: justify'>This indoor scene classifier employs the <b><a href='https://huggingface.co/google/vit-base-patch16-224-in21k' target='_blank'>google/vit-base-patch16-224-in21k</a></b>, a <b>Visual Transformer (ViT)</b> model pre-trained on ImageNet-21k (14 million images, 21,843 classes) at resolution 224x224 and was firstly introduced in the paper <b><a href='https://arxiv.org/abs/2010.11929' target='_blank'>An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale</a></b> by Dosovitskiy et al. The original GitHub repository of the Visual Transformer is found in <b><a href='https://github.com/google-research/vision_transformer' target='_blank'>this link</a></b>. This Visual Transformer model was fine-tuned on the <b><a href='https://www.kaggle.com/itsahmad/indoor-scenes-cvpr-2019' target='_blank'>MIT Indoor Scenes</a></b> from Kaggle. The source model from Hugging Face is found in <b><a href='https://huggingface.co/vincentclaes/mit-indoor-scenes' target='_blank'>this link</a></b>.</p>''',
57
  allow_flagging='never').launch()