lbourdois commited on
Commit
70e4469
1 Parent(s): 1fe2f91

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -37,3 +37,5 @@ pages/4M-21/video_1.mp4 filter=lfs diff=lfs merge=lfs -text
37
  pages/Depth[[:space:]]Anything/video_1.mp4 filter=lfs diff=lfs merge=lfs -text
38
  pages/RT-DETR/video_1.mp4 filter=lfs diff=lfs merge=lfs -text
39
  pages/KOSMOS-2/video_1.mp4 filter=lfs diff=lfs merge=lfs -text
 
 
 
37
  pages/Depth[[:space:]]Anything/video_1.mp4 filter=lfs diff=lfs merge=lfs -text
38
  pages/RT-DETR/video_1.mp4 filter=lfs diff=lfs merge=lfs -text
39
  pages/KOSMOS-2/video_1.mp4 filter=lfs diff=lfs merge=lfs -text
40
+ pages/Aria/image_0.png filter=lfs diff=lfs merge=lfs -text
41
+ pages/Aria/image_2.png filter=lfs diff=lfs merge=lfs -text
pages/30_GOT.py CHANGED
@@ -189,7 +189,7 @@ with col2:
189
  with col3:
190
  if lang == "en":
191
  if st.button("Next paper", use_container_width=True):
192
- switch_page("Home")
193
  else:
194
  if st.button("Papier suivant", use_container_width=True):
195
- switch_page("Home")
 
189
  with col3:
190
  if lang == "en":
191
  if st.button("Next paper", use_container_width=True):
192
+ switch_page("Aria")
193
  else:
194
  if st.button("Papier suivant", use_container_width=True):
195
+ switch_page("Aria")
pages/31_Aria.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_extras.switch_page_button import switch_page
3
+
4
+
5
+ translations = {
6
+ 'en': {'title': 'Aria',
7
+ 'original_tweet':
8
+ """
9
+ [Original tweet](https://x.com/mervenoyann/status/1844356121370427546) (October 10, 2024)
10
+ """,
11
+ 'tweet_1':
12
+ """
13
+ This is the BEST vision language model I have ever tried!
14
+ <br>
15
+ Aria is a new model by @rhymes_ai_ : a 25.3B multimodal model that can take image/video inputs 🤩
16
+ <br>
17
+ They release the model with Apache-2.0 license and fine-tuning scripts as well 👏
18
+ I tested it extensively, keep reading to learn more 🧶
19
+ """,
20
+ 'tweet_2':
21
+ """
22
+ The model is open-sourced [here](huggingface.co/rhymes-ai/Aria)
23
+ <br>
24
+ The authors have released fine-tuning examples on RefCOCO, NextQA and NLVR and [inference examples](github.com/rhymes-ai/Aria)
25
+ <br>
26
+ Try the demo [here](rhymes.ai)
27
+ <br>
28
+ It's super nice that you can get started with this model using 🤗 Transformers.
29
+ """,
30
+ 'tweet_3':
31
+ """
32
+ I saw on the paper that it can debug screenshot of code??? 🤯
33
+ So I tried it on piece of code that calculates KL-div and it understood very well!
34
+ """,
35
+ 'tweet_4':
36
+ """
37
+ The model has very impressive OCR capabilities even with the bad handwriting 📝
38
+ """,
39
+ 'tweet_5':
40
+ """
41
+ Real world knowledge ⇓
42
+ """,
43
+ 'ressources':
44
+ """
45
+ Ressources:
46
+ [Aria: An Open Multimodal Native Mixture-of-Experts Model](https://arxiv.org/abs/2410.05993)
47
+ by Dongxu Li, Yudong Liu, Haoning Wu, Yue Wang, Zhiqi Shen, Bowen Qu, Xinyao Niu, Guoyin Wang, Bei Chen, Junnan Li (2024)
48
+ [GitHub](https://github.com/rhymes-ai/Aria)
49
+ [Model](https://huggingface.co/rhymes-ai/Aria)
50
+ """
51
+ },
52
+ 'fr': {
53
+ 'title': 'Aria',
54
+ 'original_tweet':
55
+ """
56
+ [Tweet de base](https://x.com/mervenoyann/status/1844356121370427546) (en anglais) (10 ocotbre 2024)
57
+ """,
58
+ 'tweet_1':
59
+ """
60
+ C'est le MEILLEUR modèle de langage-vision que j'ai jamais essayé !
61
+ <br>
62
+ Aria est un nouveau modèle de @rhymes_ai_ : de 25,3Mds paramètres ce un modèle multimodal peut prendre des images et des vidéos en entrée 🤩
63
+ <br>
64
+ Ils publient le modèle avec une licence Apache-2.0 et des scripts fine-tuning 👏
65
+ Je l'ai testé en profondeur, continuez à lire pour en savoir plus 🧶
66
+ """,
67
+ 'tweet_2':
68
+ """
69
+ Le modèle est en libre accès [ici](huggingface.co/rhymes-ai/Aria)
70
+ <br>
71
+ Les auteurs ont publié des exemples de finetuning sur RefCOCO, NextQA et NLVR et des [exemples d'inférence](github.com/rhymes-ai/Aria).
72
+ <br>
73
+ Essayez la démo [ici](rhymes.ai)
74
+ <br>
75
+ C'est super sympa de pouvoir utiliser avec ce modèle en utilisant 🤗 Transformers
76
+ """,
77
+ 'tweet_3':
78
+ """
79
+ J'ai vu sur le papier qu'il pouvait déboguer des captures d'écran de code ? ??? 🤯
80
+ J'ai donc essayé sur un bout de code qui calcule la divergence de Kullback-Leibler et il a très bien compris !
81
+ """,
82
+ 'tweet_4':
83
+ """
84
+ Le modèle possède des capacités d'OCR très impressionnantes, même avec une mauvaise écriture. 📝
85
+ """,
86
+ 'tweet_5':
87
+ """
88
+ Connaissance du monde réel ⇓
89
+ """,
90
+ 'ressources':
91
+ """
92
+ Ressources :
93
+ [Aria: An Open Multimodal Native Mixture-of-Experts Model](https://arxiv.org/abs/2410.05993)
94
+ de Dongxu Li, Yudong Liu, Haoning Wu, Yue Wang, Zhiqi Shen, Bowen Qu, Xinyao Niu, Guoyin Wang, Bei Chen, Junnan Li (2024)
95
+ [GitHub](https://github.com/rhymes-ai/Aria)
96
+ [Model](https://huggingface.co/rhymes-ai/Aria)
97
+ """
98
+ }
99
+ }
100
+
101
+
102
+ def language_selector():
103
+ languages = {'EN': '🇬🇧', 'FR': '🇫🇷'}
104
+ selected_lang = st.selectbox('', options=list(languages.keys()), format_func=lambda x: languages[x], key='lang_selector')
105
+ return 'en' if selected_lang == 'EN' else 'fr'
106
+
107
+ left_column, right_column = st.columns([5, 1])
108
+
109
+ # Add a selector to the right column
110
+ with right_column:
111
+ lang = language_selector()
112
+
113
+ # Add a title to the left column
114
+ with left_column:
115
+ st.title(translations[lang]["title"])
116
+
117
+ st.success(translations[lang]["original_tweet"], icon="ℹ️")
118
+ st.markdown(""" """)
119
+
120
+ st.markdown(translations[lang]["tweet_1"], unsafe_allow_html=True)
121
+ st.markdown(""" """)
122
+
123
+ st.video("pages/Aria/video_1.mp4", format="video/mp4")
124
+ st.markdown(""" """)
125
+
126
+ st.markdown(translations[lang]["tweet_2"], unsafe_allow_html=True)
127
+ st.markdown(""" """)
128
+
129
+ st.image("pages/Aria/image_0.png", use_column_width=True)
130
+ st.markdown(""" """)
131
+ with st.expander ("Code"):
132
+ st.code("""
133
+ from transformers import AutoModelForCausalLM, AutoProcessor
134
+ model_id_or_path = "rhymes-ai/Aria"
135
+
136
+ model = AutoModelForCausalLM.from_pretrained(model_id_or_path, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True)
137
+
138
+ processor = AutoProcessor.from_pretrained(model_id_or_path, trust_remote_code=True)
139
+ """)
140
+ st.markdown(""" """)
141
+
142
+ st.markdown(translations[lang]["tweet_3"], unsafe_allow_html=True)
143
+ st.markdown(""" """)
144
+
145
+ st.image("pages/Aria/image_1.png", use_column_width=True)
146
+ st.markdown(""" """)
147
+
148
+ st.markdown(translations[lang]["tweet_4"], unsafe_allow_html=True)
149
+ st.markdown(""" """)
150
+
151
+ st.image("pages/Aria/image_2.png", use_column_width=True)
152
+ st.image("pages/Aria/image_3.png", use_column_width=True)
153
+ st.markdown(""" """)
154
+
155
+ st.markdown(translations[lang]["tweet_5"], unsafe_allow_html=True)
156
+ st.markdown(""" """)
157
+
158
+ st.image("pages/Aria/image_4.png", use_column_width=True)
159
+ st.markdown(""" """)
160
+
161
+ st.info(translations[lang]["ressources"], icon="📚")
162
+
163
+ st.markdown(""" """)
164
+ st.markdown(""" """)
165
+ st.markdown(""" """)
166
+ col1, col2, col3= st.columns(3)
167
+ with col1:
168
+ if lang == "en":
169
+ if st.button('Previous paper', use_container_width=True):
170
+ switch_page("GOT")
171
+ else:
172
+ if st.button('Papier précédent', use_container_width=True):
173
+ switch_page("GOT")
174
+ with col2:
175
+ if lang == "en":
176
+ if st.button("Home", use_container_width=True):
177
+ switch_page("Home")
178
+ else:
179
+ if st.button("Accueil", use_container_width=True):
180
+ switch_page("Home")
181
+ with col3:
182
+ if lang == "en":
183
+ if st.button("Next paper", use_container_width=True):
184
+ switch_page("Home")
185
+ else:
186
+ if st.button("Papier suivant", use_container_width=True):
187
+ switch_page("Home")
pages/Aria/image_0.png ADDED

Git LFS Details

  • SHA256: 6f3bfcaac960618859d998466ae695d3dab3ee8b51a8f5d188cc247e099b3bff
  • Pointer size: 132 Bytes
  • Size of remote file: 1.61 MB
pages/Aria/image_1.png ADDED
pages/Aria/image_2.png ADDED

Git LFS Details

  • SHA256: d6a49f0b152b3785861be507da6ae95ca9872e727c1cd907fd061f7356bb2145
  • Pointer size: 133 Bytes
  • Size of remote file: 16.9 MB
pages/Aria/image_3.png ADDED
pages/Aria/image_4.png ADDED
pages/Aria/video_1.mp4 ADDED
Binary file (655 kB). View file