MAGAer13 commited on
Commit
56d7102
1 Parent(s): 1b4debd
app.py CHANGED
@@ -1,67 +1,41 @@
1
  import os
2
  import wget
3
- apex_file = wget.download(os.getenv('apex'))
4
- os.system('tar zxvf {}'.format(apex_file))
5
-
6
- os.system('ls -l; cd apex;'
7
- 'pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .; cd ..')
8
-
9
- # os.system('pip install gradio --upgrade')
10
-
11
- resources = os.getenv('resources')
12
- resources_tokenizer = os.getenv('resources_tokenizer')
13
-
14
- _ = wget.download(resources_tokenizer)
15
  resources_filename = wget.download(resources)
16
 
17
  os.system('tar zxvf {}'.format(resources_filename))
18
-
19
- # if not os.path.exists('./model_optim_rng.pth'):
20
- # resources_weight = os.getenv('resources_weight')
21
- # _ = wget.download(resources_weight)
22
-
23
  os.system('ls -l')
24
 
25
-
26
- import time
27
  import argparse
28
  import datetime
29
  import json
30
- import re
31
  import os
 
 
 
32
  import gradio as gr
33
  import requests
34
- from utils import (
35
- mplug_owl, load_demo_refresh_model_list, vote_last_response,
36
- upvote_last_response, downvote_last_response, flag_last_response, regenerate,
37
- add_text, after_process_image, get_inputs, init,
38
- headers, no_change_btn, enable_btn, disable_btn, get_window_url_params
39
- )
40
  from gradio_css import code_highlight_css
41
  from gradio_patch import Chatbot as grChatbot
42
- from conversation import default_conversation
 
 
 
 
 
 
 
43
 
44
  SHARED_UI_WARNING = f'''### [NOTE] You can duplicate and use it with a paid private GPU.
45
  <a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/MAGAer13/mPLUG-Owl?duplicate=true"><img style="margin-top:0;margin-bottom:0" src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-md.svg" alt="Duplicate Space"></a>
46
  '''
47
 
48
- # Alternatively, you can also use the Colab demo on our project page.
49
- # <a style="display:inline-block" href="https://https://github.com/X-PLUG/mPLUG-Owl"><img style="margin-top:0;margin-bottom:0" src="https://img.shields.io/badge/Project%20Page-online-brightgreen"></a>
50
-
51
- io = None
52
- init()
53
- model = mplug_owl(device="cuda")
54
- log_dir = ""
55
 
56
  def load_demo(url_params, request: gr.Request):
57
 
58
  dropdown_update = gr.Dropdown.update(visible=True)
59
- if "model" in url_params:
60
- model = url_params["model"]
61
- if model in models:
62
- dropdown_update = gr.Dropdown.update(
63
- value=model, visible=True)
64
-
65
  state = default_conversation.copy()
66
 
67
  return (state,
@@ -72,36 +46,54 @@ def load_demo(url_params, request: gr.Request):
72
  gr.Row.update(visible=True),
73
  gr.Accordion.update(visible=True))
74
 
75
- def contains_chinese(string):
76
- pattern = re.compile(r'[一-龥]')
77
- match = pattern.search(string)
78
- return match is not None
79
-
80
  def clear_history(request: gr.Request):
81
  state = default_conversation.copy()
82
 
83
  return (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
84
 
85
- def http_bot(state, topk, max_new_tokens, random_seed, request: gr.Request):
 
 
 
 
 
 
 
86
  prompt = after_process_image(state.get_prompt())
87
  images = state.get_images()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  state.messages[-1][-1] = "▌"
89
  yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
90
 
91
- # if contains_chinese(prompt):
92
- # state.messages[-1][-1] = "**CURRENTLY WE ONLY SUPPORT ENGLISH. PLEASE REFRESH THIS PAGE TO RESTART.**"
93
- # yield (state, state.to_gradio_chatbot()) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
94
- # return
95
-
96
  try:
97
- data = get_inputs(prompt, images, topk, max_new_tokens, random_seed)
98
- output = model.prediction(data, log_dir)
99
- print(output)
100
- # output = output.replace("```", "")
101
-
102
- state.messages[-1][-1] = output + "▌"
103
- yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
104
- time.sleep(0.03)
 
 
 
 
 
105
 
106
  except requests.exceptions.RequestException as e:
107
  state.messages[-1][-1] = "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
@@ -112,25 +104,23 @@ def http_bot(state, topk, max_new_tokens, random_seed, request: gr.Request):
112
  yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
113
 
114
 
115
- def add_text_http_bot(state, text, image, video, topk, max_new_tokens, random_seed, request: gr.Request):
116
- if len(text) <= 0 and (image is None or video is None):
 
 
 
 
117
  state.skip_next = True
118
  return (state, state.to_gradio_chatbot(), "", None, None) + (no_change_btn,) * 5
119
 
120
  if image is not None:
121
- multimodal_msg = None
122
  if '<image>' not in text:
123
  text = text + '\n<image>'
124
-
125
- if multimodal_msg is not None:
126
- return (state, state.to_gradio_chatbot(), multimodal_msg, None, None) + (
127
- no_change_btn,) * 5
128
  text = (text, image)
129
 
130
  if video is not None:
131
- num_frames = 4
132
- if '<image>' not in text:
133
- text = text + '\n<image>' * num_frames
134
  text = (text, video)
135
 
136
  state.append_message(state.roles[0], text)
@@ -138,26 +128,54 @@ def add_text_http_bot(state, text, image, video, topk, max_new_tokens, random_se
138
  state.skip_next = False
139
 
140
  yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
141
-
142
- prompt = after_process_image(state.get_prompt())
 
 
 
 
 
 
 
 
 
143
  images = state.get_images()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  state.messages[-1][-1] = "▌"
145
  yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
146
 
147
- # if contains_chinese(prompt):
148
- # state.messages[-1][-1] = "**CURRENTLY WE ONLY SUPPORT ENGLISH. PLEASE REFRESH THIS PAGE TO RESTART OR CLEAR HISTORY.**"
149
- # yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
150
- # return
151
-
152
  try:
153
- data = get_inputs(prompt, images, topk, max_new_tokens, random_seed)
154
- output = model.prediction(data, log_dir)
155
- print(output)
156
- # output = output.replace("```", "")
157
-
158
- state.messages[-1][-1] = output + "▌"
159
- yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
160
- time.sleep(0.03)
 
 
 
 
 
161
 
162
  except requests.exceptions.RequestException as e:
163
  state.messages[-1][-1] = "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
@@ -168,33 +186,53 @@ def add_text_http_bot(state, text, image, video, topk, max_new_tokens, random_se
168
  yield (state, state.to_gradio_chatbot(), "", None, None) + (enable_btn,) * 5
169
 
170
 
171
- def regenerate_http_bot(state, topk, max_new_tokens, random_seed, request: gr.Request):
 
 
 
172
  state.messages[-1][-1] = None
173
  state.skip_next = False
174
  yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
175
-
176
- prompt = after_process_image(state.get_prompt())
177
  images = state.get_images()
178
- state.messages[-1][-1] = " "
179
- yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
- # if contains_chinese(prompt):
182
- # state.messages[-1][-1] = "**CURRENTLY WE ONLY SUPPORT ENGLISH. PLEASE REFRESH THIS PAGE TO RESTART OR CLEAR HISTORY.**"
183
- # yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
184
- # return
185
 
186
  try:
187
- data = get_inputs(prompt, images, topk, max_new_tokens, random_seed)
188
- output = model.prediction(data, log_dir)
189
- print(">>>> output:", output)
190
- # output = output.replace("```", "")
191
-
192
- state.messages[-1][-1] = output + " "
193
- yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
194
- time.sleep(0.03)
 
 
 
 
 
195
 
196
  except requests.exceptions.RequestException as e:
197
- print(e)
198
  state.messages[-1][-1] = "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
199
  yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
200
  return
@@ -202,25 +240,38 @@ def regenerate_http_bot(state, topk, max_new_tokens, random_seed, request: gr.Re
202
  state.messages[-1][-1] = state.messages[-1][-1][:-1]
203
  yield (state, state.to_gradio_chatbot(), "", None, None) + (enable_btn,) * 5
204
 
 
 
 
205
  title_markdown = ("""
206
- # mPLUG-Owl🦉 (GitHub Repo: https://github.com/X-PLUG/mPLUG-Owl)
207
- ## If you like our project, please give us a star ✨ on Github for latest update.
208
- [![Star on GitHub](https://img.shields.io/github/stars/X-PLUG/mPLUG-Owl.svg?style=social)](https://github.com/X-PLUG/mPLUG-Owl/stargazers)
209
- """)
210
 
211
- tos_markdown = ("""
212
- ### Notice
213
- The output is generated by top-k sampling scheme and may involve some randomness. For multiple image and video, we cannot ensure it's performance since only image-text pairs are used during training. For Video inputs, we recommand use the video **less than 10 seconds**.
214
 
215
- **If you are facing ERROR, it might be Out-Of-Memory (OOM) issue due to the limited GPU memory, please refresh the page to restart.** Besides, we recommand you to duplicate the space with a single A10 GPU to have a better experience. Or you can visit our demo hosted on [Modelscope](https://www.modelscope.cn/studios/damo/mPLUG-Owl/summary) which is hosted on a V100 machine.
216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  ### Terms of use
218
  By using this service, users are required to agree to the following terms:
219
  The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
220
  Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
221
  For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
222
 
223
- Copyright 2023 Alibaba DAMO Academy.
224
  """)
225
 
226
  learn_more_markdown = ("""
@@ -229,14 +280,19 @@ The service is a research preview intended for non-commercial use only, subject
229
  """)
230
 
231
  css = code_highlight_css + """
232
- version 1.0
 
 
 
 
 
 
233
  """
234
 
235
  def build_demo():
236
  # with gr.Blocks(title="mPLUG-Owl🦉", theme=gr.themes.Base(), css=css) as demo:
237
  with gr.Blocks(title="mPLUG-Owl🦉", css=css) as demo:
238
  state = gr.State()
239
-
240
  gr.Markdown(SHARED_UI_WARNING)
241
 
242
  gr.Markdown(title_markdown)
@@ -245,17 +301,23 @@ def build_demo():
245
  with gr.Column(scale=3):
246
 
247
  imagebox = gr.Image(type="pil")
248
-
249
  videobox = gr.Video()
250
 
251
  with gr.Accordion("Parameters", open=True, visible=False) as parameter_row:
252
- topk = gr.Slider(minimum=1, maximum=5, value=5, step=1, interactive=True, label="Top K",)
253
  max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
254
- temperature = gr.Slider(minimum=0, maximum=10, value=1, step=0.1, interactive=True, label="Temperature",)
 
 
 
 
 
 
 
 
255
  gr.Markdown(tos_markdown)
256
 
257
  with gr.Column(scale=6):
258
- chatbot = grChatbot(elem_id="chatbot", visible=False).style(height=800)
259
  with gr.Row():
260
  with gr.Column(scale=8):
261
  textbox = gr.Textbox(show_label=False,
@@ -278,18 +340,17 @@ def build_demo():
278
  [f'examples/laundry.jpeg', 'Why this happens and how to fix it?'],
279
  [f'examples/ca.jpeg', "What do you think about the person's behavior?"],
280
  [f'examples/monalisa-fun.jpg', 'Do you know who drew this painting?​'],
281
- # [f"examples/Yao_Ming.jpeg", "What is the name of the man on the right?"],
282
  ], inputs=[imagebox, textbox])
283
 
284
- gr.Examples(examples=[
285
- [f"examples/surf.mp4", "What is the man doing?"],
286
- [f"examples/yoga.mp4", "What did the woman doing?"],
287
- ], inputs=[videobox, textbox])
288
-
289
  gr.Markdown(learn_more_markdown)
290
  url_params = gr.JSON(visible=False)
291
 
292
  btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
 
 
 
 
 
293
  upvote_btn.click(upvote_last_response,
294
  [state], [textbox, upvote_btn, downvote_btn, flag_btn])
295
  downvote_btn.click(downvote_last_response,
@@ -297,19 +358,30 @@ def build_demo():
297
  flag_btn.click(flag_last_response,
298
  [state], [textbox, upvote_btn, downvote_btn, flag_btn])
299
  # regenerate_btn.click(regenerate, state,
300
- # [state, chatbot, textbox, imagebox] + btn_list).then(
301
- # http_bot, [state, topk, max_output_tokens, temperature],
302
  # [state, chatbot] + btn_list)
303
- regenerate_btn.click(regenerate_http_bot, [state, topk, max_output_tokens, temperature],
304
  [state, chatbot, textbox, imagebox, videobox] + btn_list)
 
305
  clear_btn.click(clear_history, None, [state, chatbot, textbox, imagebox, videobox] + btn_list)
306
 
307
- # textbox.submit(add_text, [state, textbox, imagebox], [state, chatbot, textbox, imagebox] + btn_list
308
- # ).then(http_bot, [state, topk, max_output_tokens, temperature],
309
  # [state, chatbot] + btn_list)
310
- textbox.submit(add_text_http_bot, [state, textbox, imagebox, videobox, topk, max_output_tokens, temperature], [state, chatbot, textbox, imagebox, videobox] + btn_list)
 
 
 
 
 
 
 
311
 
312
- submit_btn.click(add_text_http_bot, [state, textbox, imagebox, videobox, topk, max_output_tokens, temperature], [state, chatbot, textbox, imagebox, videobox] + btn_list)
 
 
 
313
 
314
  demo.load(load_demo, [url_params], [state,
315
  chatbot, textbox, submit_btn, button_row, parameter_row],
@@ -318,15 +390,30 @@ def build_demo():
318
  return demo
319
 
320
  if __name__ == "__main__":
321
- cur_dir = os.getenv("cur_dir")
322
- log_dir = cur_dir + "log"
323
 
324
  parser = argparse.ArgumentParser()
325
  parser.add_argument("--host", type=str, default="0.0.0.0")
326
  parser.add_argument("--debug", action="store_true", help="using debug mode")
327
  parser.add_argument("--port", type=int)
328
  parser.add_argument("--concurrency-count", type=int, default=1)
 
 
 
329
  args = parser.parse_args()
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  demo = build_demo()
331
  demo.queue(concurrency_count=args.concurrency_count, status_update_rate=10, api_open=False).launch(server_name=args.host, debug=args.debug, server_port=args.port, share=False)
332
 
 
1
  import os
2
  import wget
3
+ resources = os.getenv('resources_new')
 
 
 
 
 
 
 
 
 
 
 
4
  resources_filename = wget.download(resources)
5
 
6
  os.system('tar zxvf {}'.format(resources_filename))
 
 
 
 
 
7
  os.system('ls -l')
8
 
 
 
9
  import argparse
10
  import datetime
11
  import json
 
12
  import os
13
+ import time
14
+ import torch
15
+
16
  import gradio as gr
17
  import requests
18
+
19
+ from conversation import default_conversation
 
 
 
 
20
  from gradio_css import code_highlight_css
21
  from gradio_patch import Chatbot as grChatbot
22
+ from serve_utils import (
23
+ add_text, after_process_image, disable_btn, no_change_btn,
24
+ downvote_last_response, enable_btn, flag_last_response,
25
+ get_window_url_params, init, regenerate, upvote_last_response,
26
+ after_process_video
27
+ )
28
+ from model_worker import mPLUG_Owl_Server
29
+ from model_utils import post_process_code
30
 
31
  SHARED_UI_WARNING = f'''### [NOTE] You can duplicate and use it with a paid private GPU.
32
  <a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/MAGAer13/mPLUG-Owl?duplicate=true"><img style="margin-top:0;margin-bottom:0" src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-md.svg" alt="Duplicate Space"></a>
33
  '''
34
 
 
 
 
 
 
 
 
35
 
36
  def load_demo(url_params, request: gr.Request):
37
 
38
  dropdown_update = gr.Dropdown.update(visible=True)
 
 
 
 
 
 
39
  state = default_conversation.copy()
40
 
41
  return (state,
 
46
  gr.Row.update(visible=True),
47
  gr.Accordion.update(visible=True))
48
 
 
 
 
 
 
49
  def clear_history(request: gr.Request):
50
  state = default_conversation.copy()
51
 
52
  return (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
53
 
54
+ def http_bot(state, max_output_tokens, temperature, top_k, top_p,
55
+ num_beams, no_repeat_ngram_size, length_penalty,
56
+ do_sample, request: gr.Request):
57
+ if state.skip_next:
58
+ # This generate call is skipped due to invalid inputs
59
+ yield (state, state.to_gradio_chatbot()) + (no_change_btn,) * 5
60
+ return
61
+
62
  prompt = after_process_image(state.get_prompt())
63
  images = state.get_images()
64
+
65
+ data = {
66
+ "text_input": prompt,
67
+ "images": images if len(images) > 0 else [],
68
+ "generation_config": {
69
+ "top_k": int(top_k),
70
+ "top_p": float(top_p),
71
+ "num_beams": int(num_beams),
72
+ "no_repeat_ngram_size": int(no_repeat_ngram_size),
73
+ "length_penalty": float(length_penalty),
74
+ "do_sample": bool(do_sample),
75
+ "temperature": float(temperature),
76
+ "max_new_tokens": min(int(max_output_tokens), 1536),
77
+ }
78
+ }
79
+
80
  state.messages[-1][-1] = "▌"
81
  yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
82
 
 
 
 
 
 
83
  try:
84
+ for chunk in model.predict(data):
85
+ if chunk:
86
+ if chunk[1]:
87
+ output = chunk[0].strip()
88
+ output = post_process_code(output)
89
+ state.messages[-1][-1] = output + "▌"
90
+ yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
91
+ else:
92
+ output = chunk[0].strip()
93
+ state.messages[-1][-1] = output
94
+ yield (state, state.to_gradio_chatbot()) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
95
+ return
96
+ time.sleep(0.03)
97
 
98
  except requests.exceptions.RequestException as e:
99
  state.messages[-1][-1] = "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
 
104
  yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
105
 
106
 
107
+ def add_text_http_bot(
108
+ state, text, image, video, num_frames,
109
+ max_output_tokens, temperature, top_k, top_p,
110
+ num_beams, no_repeat_ngram_size, length_penalty,
111
+ do_sample, request: gr.Request):
112
+ if len(text) <= 0 and image is None and video is None:
113
  state.skip_next = True
114
  return (state, state.to_gradio_chatbot(), "", None, None) + (no_change_btn,) * 5
115
 
116
  if image is not None:
 
117
  if '<image>' not in text:
118
  text = text + '\n<image>'
 
 
 
 
119
  text = (text, image)
120
 
121
  if video is not None:
122
+ if '<|video|>' not in text:
123
+ text = text + '\n<|video|>'
 
124
  text = (text, video)
125
 
126
  state.append_message(state.roles[0], text)
 
128
  state.skip_next = False
129
 
130
  yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
131
+
132
+ if state.skip_next:
133
+ # This generate call is skipped due to invalid inputs
134
+ yield (state, state.to_gradio_chatbot(), "", None, None) + (no_change_btn,) * 5
135
+ return
136
+
137
+ prompt = state.get_prompt(num_frames)
138
+ prompt = after_process_image(prompt)
139
+ prompt = after_process_video(prompt)
140
+ prompt = prompt.replace("Human: \n", "")
141
+
142
  images = state.get_images()
143
+ videos = state.get_videos(num_frames)
144
+
145
+ data = {
146
+ "text_input": prompt,
147
+ "images": images if len(images) > 0 else [],
148
+ "videos": videos if len(videos) > 0 else [],
149
+ "video": video if video is not None else None,
150
+ "generation_config": {
151
+ "top_k": int(top_k),
152
+ "top_p": float(top_p),
153
+ "num_beams": int(num_beams),
154
+ "no_repeat_ngram_size": int(no_repeat_ngram_size),
155
+ "length_penalty": float(length_penalty),
156
+ "do_sample": bool(do_sample),
157
+ "temperature": float(temperature),
158
+ "max_new_tokens": min(int(max_output_tokens), 1536),
159
+ }
160
+ }
161
+
162
  state.messages[-1][-1] = "▌"
163
  yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
164
 
 
 
 
 
 
165
  try:
166
+ for chunk in model.predict(data):
167
+ if chunk:
168
+ if chunk[1]:
169
+ output = chunk[0].strip()
170
+ output = post_process_code(output)
171
+ state.messages[-1][-1] = output + "▌"
172
+ yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
173
+ else:
174
+ output = chunk[0].strip()
175
+ state.messages[-1][-1] = output
176
+ yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
177
+ return
178
+ time.sleep(0.03)
179
 
180
  except requests.exceptions.RequestException as e:
181
  state.messages[-1][-1] = "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
 
186
  yield (state, state.to_gradio_chatbot(), "", None, None) + (enable_btn,) * 5
187
 
188
 
189
+ def regenerate_http_bot(state, num_frames,
190
+ max_output_tokens, temperature, top_k, top_p,
191
+ num_beams, no_repeat_ngram_size, length_penalty,
192
+ do_sample, request: gr.Request):
193
  state.messages[-1][-1] = None
194
  state.skip_next = False
195
  yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
196
+
197
+ prompt = after_process_image(state.get_prompt(num_frames))
198
  images = state.get_images()
199
+ videos = state.get_videos(num_frames)
200
+
201
+ data = {
202
+ "text_input": prompt,
203
+ "images": images if len(images) > 0 else [],
204
+ "videos": videos if len(videos) > 0 else [],
205
+ "generation_config": {
206
+ "top_k": int(top_k),
207
+ "top_p": float(top_p),
208
+ "num_beams": int(num_beams),
209
+ "no_repeat_ngram_size": int(no_repeat_ngram_size),
210
+ "length_penalty": float(length_penalty),
211
+ "do_sample": bool(do_sample),
212
+ "temperature": float(temperature),
213
+ "max_new_tokens": min(int(max_output_tokens), 1536),
214
+ }
215
+ }
216
 
217
+ state.messages[-1][-1] = "▌"
218
+ yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
 
 
219
 
220
  try:
221
+ for chunk in model.predict(data):
222
+ if chunk:
223
+ if chunk[1]:
224
+ output = chunk[0].strip()
225
+ output = post_process_code(output)
226
+ state.messages[-1][-1] = output + ""
227
+ yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
228
+ else:
229
+ output = chunk[0].strip()
230
+ state.messages[-1][-1] = output
231
+ yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
232
+ return
233
+ time.sleep(0.03)
234
 
235
  except requests.exceptions.RequestException as e:
 
236
  state.messages[-1][-1] = "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
237
  yield (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
238
  return
 
240
  state.messages[-1][-1] = state.messages[-1][-1][:-1]
241
  yield (state, state.to_gradio_chatbot(), "", None, None) + (enable_btn,) * 5
242
 
243
+ # [![Star on GitHub](https://img.shields.io/github/stars/X-PLUG/mPLUG-Owl.svg?style=social)](https://github.com/X-PLUG/mPLUG-Owl/stargazers)
244
+ # **If you are facing ERROR, it might be Out-Of-Memory (OOM) issue due to the limited GPU memory, please refresh the page to restart.** Besides, we recommand you to duplicate the space with a single A10 GPU to have a better experience. Or you can visit our demo hosted on [Modelscope](https://www.modelscope.cn/studios/damo/mPLUG-Owl/summary) which is hosted on a V100 machine.
245
+
246
  title_markdown = ("""
247
+ <h1 align="center"><a href="https://github.com/X-PLUG/mPLUG-Owl"><img src="https://s1.ax1x.com/2023/05/12/p9yGA0g.png", alt="mPLUG-Owl" border="0" style="margin: 0 auto; height: 200px;" /></a> </h1>
 
 
 
248
 
249
+ <h2 align="center"> mPLUG-Owl🦉: Modularization Empowers Large Language Models with Multimodality </h2>
 
 
250
 
251
+ <h5 align="center"> If you like our project, please give us a star on Github for latest update. </h2>
252
 
253
+ <div align="center">
254
+ <div style="display:flex; gap: 0.25rem;" align="center">
255
+ <a href='https://github.com/X-PLUG/mPLUG-Owl'><img src='https://img.shields.io/badge/Github-Code-blue'></a>
256
+ <a href="https://arxiv.org/abs/2304.14178"><img src="https://img.shields.io/badge/Arxiv-2304.14178-red"></a>
257
+ <a href='https://github.com/X-PLUG/mPLUG-Owl/stargazers'><img src='https://img.shields.io/github/stars/X-PLUG/mPLUG-Owl.svg?style=social'></a>
258
+ </div>
259
+ </div>
260
+
261
+ **Notice**: The output is generated by top-k sampling scheme and may involve some randomness. For multiple images and video, we cannot ensure it's performance since only image-text / video-text pairs are used during training.
262
+
263
+ **We recommand only one image or video per conversation session.** If you want to start to chat with new images or videos, we recommand you to **CLEAR** the history to restart.
264
+
265
+ """)
266
+
267
+ tos_markdown = ("""
268
  ### Terms of use
269
  By using this service, users are required to agree to the following terms:
270
  The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
271
  Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
272
  For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
273
 
274
+ **Copyright 2023 Alibaba DAMO Academy.**
275
  """)
276
 
277
  learn_more_markdown = ("""
 
280
  """)
281
 
282
  css = code_highlight_css + """
283
+ pre {
284
+ white-space: pre-wrap; /* Since CSS 2.1 */
285
+ white-space: -moz-pre-wrap; /* Mozilla, since 1999 */
286
+ white-space: -pre-wrap; /* Opera 4-6 */
287
+ white-space: -o-pre-wrap; /* Opera 7 */
288
+ word-wrap: break-word; /* Internet Explorer 5.5+ */
289
+ }
290
  """
291
 
292
  def build_demo():
293
  # with gr.Blocks(title="mPLUG-Owl🦉", theme=gr.themes.Base(), css=css) as demo:
294
  with gr.Blocks(title="mPLUG-Owl🦉", css=css) as demo:
295
  state = gr.State()
 
296
  gr.Markdown(SHARED_UI_WARNING)
297
 
298
  gr.Markdown(title_markdown)
 
301
  with gr.Column(scale=3):
302
 
303
  imagebox = gr.Image(type="pil")
 
304
  videobox = gr.Video()
305
 
306
  with gr.Accordion("Parameters", open=True, visible=False) as parameter_row:
 
307
  max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
308
+ temperature = gr.Slider(minimum=0, maximum=1, value=1, step=0.1, interactive=True, label="Temperature",)
309
+ top_k = gr.Slider(minimum=1, maximum=5, value=3, step=1, interactive=True, label="Top K",)
310
+ top_p = gr.Slider(minimum=0, maximum=1, value=0.9, step=0.1, interactive=True, label="Top p",)
311
+ length_penalty = gr.Slider(minimum=1, maximum=5, value=1, step=0.1, interactive=True, label="length_penalty",)
312
+ num_beams = gr.Slider(minimum=1, maximum=5, value=1, step=1, interactive=True, label="Beam Size",)
313
+ no_repeat_ngram_size = gr.Slider(minimum=1, maximum=5, value=2, step=1, interactive=True, label="no_repeat_ngram_size",)
314
+ num_frames = gr.Slider(minimum=8, maximum=32, value=8, step=4, interactive=True, label="Number of Frames",)
315
+ do_sample = gr.Checkbox(interactive=True, value=True, label="do_sample")
316
+
317
  gr.Markdown(tos_markdown)
318
 
319
  with gr.Column(scale=6):
320
+ chatbot = grChatbot(elem_id="chatbot", visible=False).style(height=1000)
321
  with gr.Row():
322
  with gr.Column(scale=8):
323
  textbox = gr.Textbox(show_label=False,
 
340
  [f'examples/laundry.jpeg', 'Why this happens and how to fix it?'],
341
  [f'examples/ca.jpeg', "What do you think about the person's behavior?"],
342
  [f'examples/monalisa-fun.jpg', 'Do you know who drew this painting?​'],
 
343
  ], inputs=[imagebox, textbox])
344
 
 
 
 
 
 
345
  gr.Markdown(learn_more_markdown)
346
  url_params = gr.JSON(visible=False)
347
 
348
  btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
349
+ parameter_list = [
350
+ num_frames, max_output_tokens, temperature, top_k, top_p,
351
+ num_beams, no_repeat_ngram_size, length_penalty,
352
+ do_sample
353
+ ]
354
  upvote_btn.click(upvote_last_response,
355
  [state], [textbox, upvote_btn, downvote_btn, flag_btn])
356
  downvote_btn.click(downvote_last_response,
 
358
  flag_btn.click(flag_last_response,
359
  [state], [textbox, upvote_btn, downvote_btn, flag_btn])
360
  # regenerate_btn.click(regenerate, state,
361
+ # [state, chatbot, textbox, imagebox, videobox] + btn_list).then(
362
+ # http_bot, [state] + parameter_list,
363
  # [state, chatbot] + btn_list)
364
+ regenerate_btn.click(regenerate_http_bot, [state] + parameter_list,
365
  [state, chatbot, textbox, imagebox, videobox] + btn_list)
366
+
367
  clear_btn.click(clear_history, None, [state, chatbot, textbox, imagebox, videobox] + btn_list)
368
 
369
+ # textbox.submit(add_text, [state, textbox, imagebox, videobox], [state, chatbot, textbox, imagebox, videobox] + btn_list
370
+ # ).then(http_bot, [state] + parameter_list,
371
  # [state, chatbot] + btn_list)
372
+ # submit_btn.click(add_text, [state, textbox, imagebox, videobox], [state, chatbot, textbox, imagebox, videobox] + btn_list
373
+ # ).then(http_bot, [state] + parameter_list,
374
+ # [state, chatbot] + btn_list)
375
+
376
+ textbox.submit(add_text_http_bot,
377
+ [state, textbox, imagebox, videobox] + parameter_list,
378
+ [state, chatbot, textbox, imagebox, videobox] + btn_list
379
+ )
380
 
381
+ submit_btn.click(add_text_http_bot,
382
+ [state, textbox, imagebox, videobox] + parameter_list,
383
+ [state, chatbot, textbox, imagebox, videobox] + btn_list
384
+ )
385
 
386
  demo.load(load_demo, [url_params], [state,
387
  chatbot, textbox, submit_btn, button_row, parameter_row],
 
390
  return demo
391
 
392
  if __name__ == "__main__":
393
+ io = init()
 
394
 
395
  parser = argparse.ArgumentParser()
396
  parser.add_argument("--host", type=str, default="0.0.0.0")
397
  parser.add_argument("--debug", action="store_true", help="using debug mode")
398
  parser.add_argument("--port", type=int)
399
  parser.add_argument("--concurrency-count", type=int, default=1)
400
+ parser.add_argument("--base-model",type=str, default='./')
401
+ parser.add_argument("--load-8bit", action="store_true", help="using 8bit mode")
402
+ parser.add_argument("--bf16", action="store_true", default=True, help="using 8bit mode")
403
  args = parser.parse_args()
404
+
405
+ if torch.cuda.is_available():
406
+ device = "cuda"
407
+ else:
408
+ device = "cpu"
409
+
410
+ model = mPLUG_Owl_Server(
411
+ base_model=args.base_model,
412
+ load_in_8bit=args.load_8bit,
413
+ bf16=args.bf16,
414
+ device=device,
415
+ io=io
416
+ )
417
  demo = build_demo()
418
  demo.queue(concurrency_count=args.concurrency_count, status_update_rate=10, api_open=False).launch(server_name=args.host, debug=args.debug, server_port=args.port, share=False)
419
 
config.json ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "return_dict": true,
3
+ "output_hidden_states": false,
4
+ "output_attentions": false,
5
+ "torchscript": false,
6
+ "torch_dtype": null,
7
+ "use_bfloat16": false,
8
+ "tf_legacy_loss": false,
9
+ "pruned_heads": {},
10
+ "tie_word_embeddings": false,
11
+ "is_encoder_decoder": false,
12
+ "is_decoder": false,
13
+ "cross_attention_hidden_size": null,
14
+ "add_cross_attention": false,
15
+ "tie_encoder_decoder": false,
16
+ "max_length": 20,
17
+ "min_length": 0,
18
+ "do_sample": false,
19
+ "early_stopping": false,
20
+ "num_beams": 1,
21
+ "num_beam_groups": 1,
22
+ "diversity_penalty": 0.0,
23
+ "temperature": 1.0,
24
+ "top_k": 50,
25
+ "top_p": 1.0,
26
+ "typical_p": 1.0,
27
+ "repetition_penalty": 1.0,
28
+ "length_penalty": 1.0,
29
+ "no_repeat_ngram_size": 0,
30
+ "encoder_no_repeat_ngram_size": 0,
31
+ "bad_words_ids": null,
32
+ "num_return_sequences": 1,
33
+ "chunk_size_feed_forward": 0,
34
+ "output_scores": false,
35
+ "return_dict_in_generate": false,
36
+ "forced_bos_token_id": null,
37
+ "forced_eos_token_id": null,
38
+ "remove_invalid_values": false,
39
+ "exponential_decay_length_penalty": null,
40
+ "suppress_tokens": null,
41
+ "begin_suppress_tokens": null,
42
+ "architectures": null,
43
+ "finetuning_task": null,
44
+ "id2label": {
45
+ "0": "LABEL_0",
46
+ "1": "LABEL_1"
47
+ },
48
+ "label2id": {
49
+ "LABEL_0": 0,
50
+ "LABEL_1": 1
51
+ },
52
+ "tokenizer_class": null,
53
+ "prefix": null,
54
+ "bos_token_id": null,
55
+ "pad_token_id": null,
56
+ "eos_token_id": null,
57
+ "sep_token_id": null,
58
+ "decoder_start_token_id": null,
59
+ "task_specific_params": null,
60
+ "problem_type": null,
61
+ "_name_or_path": "",
62
+ "_commit_hash": null,
63
+ "transformers_version": null,
64
+ "vision_config": {
65
+ "return_dict": true,
66
+ "output_hidden_states": false,
67
+ "output_attentions": false,
68
+ "torchscript": false,
69
+ "torch_dtype": null,
70
+ "use_bfloat16": false,
71
+ "tf_legacy_loss": false,
72
+ "pruned_heads": {},
73
+ "tie_word_embeddings": true,
74
+ "is_encoder_decoder": false,
75
+ "is_decoder": false,
76
+ "cross_attention_hidden_size": null,
77
+ "add_cross_attention": false,
78
+ "tie_encoder_decoder": false,
79
+ "max_length": 20,
80
+ "min_length": 0,
81
+ "do_sample": false,
82
+ "early_stopping": false,
83
+ "num_beams": 1,
84
+ "num_beam_groups": 1,
85
+ "diversity_penalty": 0.0,
86
+ "temperature": 1.0,
87
+ "top_k": 50,
88
+ "top_p": 1.0,
89
+ "typical_p": 1.0,
90
+ "repetition_penalty": 1.0,
91
+ "length_penalty": 1.0,
92
+ "no_repeat_ngram_size": 0,
93
+ "encoder_no_repeat_ngram_size": 0,
94
+ "bad_words_ids": null,
95
+ "num_return_sequences": 1,
96
+ "chunk_size_feed_forward": 0,
97
+ "output_scores": false,
98
+ "return_dict_in_generate": false,
99
+ "forced_bos_token_id": null,
100
+ "forced_eos_token_id": null,
101
+ "remove_invalid_values": false,
102
+ "exponential_decay_length_penalty": null,
103
+ "suppress_tokens": null,
104
+ "begin_suppress_tokens": null,
105
+ "architectures": null,
106
+ "finetuning_task": null,
107
+ "id2label": {
108
+ "0": "LABEL_0",
109
+ "1": "LABEL_1"
110
+ },
111
+ "label2id": {
112
+ "LABEL_0": 0,
113
+ "LABEL_1": 1
114
+ },
115
+ "tokenizer_class": null,
116
+ "prefix": null,
117
+ "bos_token_id": null,
118
+ "pad_token_id": null,
119
+ "eos_token_id": null,
120
+ "sep_token_id": null,
121
+ "decoder_start_token_id": null,
122
+ "task_specific_params": null,
123
+ "problem_type": null,
124
+ "_name_or_path": "",
125
+ "transformers_version": "4.29.0.dev0",
126
+ "model_type": "mplug_owl_vision_model",
127
+ "hidden_size": 1024,
128
+ "intermediate_size": 4096,
129
+ "projection_dim": 768,
130
+ "num_hidden_layers": 24,
131
+ "num_attention_heads": 16,
132
+ "num_channels": 3,
133
+ "patch_size": 14,
134
+ "image_size": 224,
135
+ "initializer_range": 0.02,
136
+ "initializer_factor": 1.0,
137
+ "attention_dropout": 0.0,
138
+ "layer_norm_eps": 1e-06,
139
+ "hidden_act": "quick_gelu"
140
+ },
141
+ "visual_abstractor_config": {
142
+ "return_dict": true,
143
+ "output_hidden_states": false,
144
+ "output_attentions": false,
145
+ "torchscript": false,
146
+ "torch_dtype": null,
147
+ "use_bfloat16": false,
148
+ "tf_legacy_loss": false,
149
+ "pruned_heads": {},
150
+ "tie_word_embeddings": true,
151
+ "is_encoder_decoder": false,
152
+ "is_decoder": false,
153
+ "cross_attention_hidden_size": null,
154
+ "add_cross_attention": false,
155
+ "tie_encoder_decoder": false,
156
+ "max_length": 20,
157
+ "min_length": 0,
158
+ "do_sample": false,
159
+ "early_stopping": false,
160
+ "num_beams": 1,
161
+ "num_beam_groups": 1,
162
+ "diversity_penalty": 0.0,
163
+ "temperature": 1.0,
164
+ "top_k": 50,
165
+ "top_p": 1.0,
166
+ "typical_p": 1.0,
167
+ "repetition_penalty": 1.0,
168
+ "length_penalty": 1.0,
169
+ "no_repeat_ngram_size": 0,
170
+ "encoder_no_repeat_ngram_size": 0,
171
+ "bad_words_ids": null,
172
+ "num_return_sequences": 1,
173
+ "chunk_size_feed_forward": 0,
174
+ "output_scores": false,
175
+ "return_dict_in_generate": false,
176
+ "forced_bos_token_id": null,
177
+ "forced_eos_token_id": null,
178
+ "remove_invalid_values": false,
179
+ "exponential_decay_length_penalty": null,
180
+ "suppress_tokens": null,
181
+ "begin_suppress_tokens": null,
182
+ "architectures": null,
183
+ "finetuning_task": null,
184
+ "id2label": {
185
+ "0": "LABEL_0",
186
+ "1": "LABEL_1"
187
+ },
188
+ "label2id": {
189
+ "LABEL_0": 0,
190
+ "LABEL_1": 1
191
+ },
192
+ "tokenizer_class": null,
193
+ "prefix": null,
194
+ "bos_token_id": null,
195
+ "pad_token_id": 0,
196
+ "eos_token_id": null,
197
+ "sep_token_id": null,
198
+ "decoder_start_token_id": null,
199
+ "task_specific_params": null,
200
+ "problem_type": null,
201
+ "_name_or_path": "",
202
+ "transformers_version": "4.29.0.dev0",
203
+ "vocab_size": 30522,
204
+ "hidden_size": 1024,
205
+ "num_hidden_layers": 6,
206
+ "num_attention_heads": 16,
207
+ "hidden_act": "gelu",
208
+ "intermediate_size": 4096,
209
+ "hidden_dropout_prob": 0.1,
210
+ "attention_probs_dropout_prob": 0.1,
211
+ "max_position_embeddings": 512,
212
+ "initializer_range": 0.02,
213
+ "layer_norm_eps": 1e-06,
214
+ "position_embedding_type": "absolute",
215
+ "classifier_dropout": null,
216
+ "cross_attention_frequency": 2,
217
+ "encoder_hidden_size": 1024,
218
+ "model_type": "MPlugOwlVisualAbstractor"
219
+ },
220
+ "text_config": {
221
+ "vocab_size": 32000,
222
+ "max_position_embeddings": 2048,
223
+ "hidden_size": 4096,
224
+ "intermediate_size": 11008,
225
+ "num_hidden_layers": 32,
226
+ "num_attention_heads": 32,
227
+ "hidden_act": "silu",
228
+ "initializer_range": 0.02,
229
+ "rms_norm_eps": 1e-06,
230
+ "use_cache": true,
231
+ "return_dict": true,
232
+ "output_hidden_states": false,
233
+ "output_attentions": false,
234
+ "torchscript": false,
235
+ "torch_dtype": null,
236
+ "use_bfloat16": false,
237
+ "tf_legacy_loss": false,
238
+ "pruned_heads": {},
239
+ "tie_word_embeddings": false,
240
+ "is_encoder_decoder": false,
241
+ "is_decoder": false,
242
+ "cross_attention_hidden_size": null,
243
+ "add_cross_attention": false,
244
+ "tie_encoder_decoder": false,
245
+ "max_length": 20,
246
+ "min_length": 0,
247
+ "do_sample": false,
248
+ "early_stopping": false,
249
+ "num_beams": 1,
250
+ "num_beam_groups": 1,
251
+ "diversity_penalty": 0.0,
252
+ "temperature": 1.0,
253
+ "top_k": 50,
254
+ "top_p": 1.0,
255
+ "typical_p": 1.0,
256
+ "repetition_penalty": 1.0,
257
+ "length_penalty": 1.0,
258
+ "no_repeat_ngram_size": 0,
259
+ "encoder_no_repeat_ngram_size": 0,
260
+ "bad_words_ids": null,
261
+ "num_return_sequences": 1,
262
+ "chunk_size_feed_forward": 0,
263
+ "output_scores": false,
264
+ "return_dict_in_generate": false,
265
+ "forced_bos_token_id": null,
266
+ "forced_eos_token_id": null,
267
+ "remove_invalid_values": false,
268
+ "exponential_decay_length_penalty": null,
269
+ "suppress_tokens": null,
270
+ "begin_suppress_tokens": null,
271
+ "architectures": null,
272
+ "finetuning_task": null,
273
+ "id2label": {
274
+ "0": "LABEL_0",
275
+ "1": "LABEL_1"
276
+ },
277
+ "label2id": {
278
+ "LABEL_0": 0,
279
+ "LABEL_1": 1
280
+ },
281
+ "tokenizer_class": null,
282
+ "prefix": null,
283
+ "bos_token_id": 1,
284
+ "pad_token_id": 2,
285
+ "eos_token_id": 2,
286
+ "sep_token_id": null,
287
+ "decoder_start_token_id": null,
288
+ "task_specific_params": null,
289
+ "problem_type": null,
290
+ "_name_or_path": "",
291
+ "transformers_version": "4.29.0.dev0",
292
+ "model_type": "llama"
293
+ },
294
+ "num_query_tokens": 64,
295
+ "use_decoder_only_language_model": true,
296
+ "initializer_factor": 1.0,
297
+ "initializer_range": 0.02,
298
+ "model_type": "mplug-owl"
299
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resolution": 224,
3
+ "image_mean": [
4
+ 0.48145466, 0.4578275, 0.40821073
5
+ ],
6
+ "image_std": [
7
+ 0.26862954, 0.26130258, 0.27577711
8
+ ]
9
+ }
model_optim_rng.pt → pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af244914525f592ad44647084b05b02e49c12e6d42e91232bbe6cd7aeff0ed17
3
- size 14305364484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5930c4a575f679aaef82b3c4c4601f3c57bafcc54c4f19431f7228267084968e
3
+ size 14305584703
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "model_max_length": 1000000000000000019884624838656, "tokenizer_class": "MplugOwlTokenizer", "unk_token": "<unk>", "pad_token": "<unk>"}