Willow123 commited on
Commit
c195a6f
1 Parent(s): c7749dd

Upload 12 files

Browse files
app.py ADDED
@@ -0,0 +1,1075 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import sys
4
+ sys.path.insert(0, '.')
5
+ sys.path.insert(0, '..')
6
+
7
+ import argparse
8
+ import gradio as gr
9
+ os.environ["GRADIO_TEMP_DIR"] = os.path.join(os.getcwd(), 'tmp')
10
+ import copy
11
+ import time
12
+ import shutil
13
+ import requests
14
+ from PIL import Image, ImageFile
15
+ import torch
16
+ import transformers
17
+ from transformers import StoppingCriteriaList, AutoTokenizer, AutoModel
18
+
19
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
20
+
21
+ from demo_asset.assets.css_html_js import custom_css
22
+ from demo_asset.gradio_patch import Chatbot as grChatbot
23
+ from demo_asset.serve_utils import Stream, Iteratorize
24
+ from demo_asset.conversation import CONV_VISION_7132_v2, StoppingCriteriaSub
25
+ from demo_asset.download import download_image_thread
26
+
27
+ max_section = 60
28
+ no_change_btn = gr.Button.update()
29
+ disable_btn = gr.Button.update(interactive=False)
30
+ enable_btn = gr.Button.update(interactive=True)
31
+ chat_stream_output = True
32
+ article_stream_output = True
33
+
34
+
35
+ def get_urls(caption, exclude):
36
+ headers = {'Content-Type': 'application/json'}
37
+ json_data = {'caption': caption, 'exclude': exclude, 'need_idxs': True}
38
+ response = requests.post('https://lingbi.openxlab.org.cn/image/similar',
39
+ headers=headers,
40
+ json=json_data)
41
+ urls = response.json()['data']['image_urls']
42
+ idx = response.json()['data']['indices']
43
+ return urls, idx
44
+
45
+
46
+ class Demo_UI:
47
+ def __init__(self, folder):
48
+ self.llm_model = AutoModel.from_pretrained(folder, trust_remote_code=True)
49
+ tokenizer = AutoTokenizer.from_pretrained(folder, trust_remote_code=True)
50
+
51
+ self.llm_model.internlm_tokenizer = tokenizer
52
+ self.llm_model.tokenizer = tokenizer
53
+ self.llm_model.eval().to('cuda')
54
+ self.device = 'cuda'
55
+ print(f" load model done: ", type(self.llm_model))
56
+
57
+ self.eoh = self.llm_model.internlm_tokenizer.decode(
58
+ torch.Tensor([103027]), skip_special_tokens=True)
59
+ self.eoa = self.llm_model.internlm_tokenizer.decode(
60
+ torch.Tensor([103028]), skip_special_tokens=True)
61
+ self.soi_id = len(tokenizer) - 1
62
+ self.soi_token = '<SOI_TOKEN>'
63
+
64
+ self.vis_processor = self.llm_model.vis_processor
65
+ self.device = 'cuda'
66
+
67
+ stop_words_ids = [
68
+ torch.tensor([943]).to(self.device),
69
+ torch.tensor([2917, 44930]).to(self.device),
70
+ torch.tensor([45623]).to(self.device), ### new setting
71
+ torch.tensor([46323]).to(self.device), ### new setting
72
+ torch.tensor([103027]).to(self.device), ### new setting
73
+ torch.tensor([103028]).to(self.device), ### new setting
74
+ ]
75
+ self.stopping_criteria = StoppingCriteriaList(
76
+ [StoppingCriteriaSub(stops=stop_words_ids)])
77
+ self.r2 = re.compile(r'<Seg[0-9]*>')
78
+ self.max_txt_len = 1680
79
+
80
+ def reset(self):
81
+ self.output_text = ''
82
+ self.caps = {}
83
+ self.show_caps = False
84
+ self.show_ids = {}
85
+
86
+ def get_images_xlab(self, caption, loc, exclude):
87
+ urls, idxs = get_urls(caption.strip()[:53], exclude)
88
+ print(urls[0])
89
+ print('download image with url')
90
+ download_image_thread(urls,
91
+ folder='articles/' + self.title,
92
+ index=self.show_ids[loc] * 1000 + loc,
93
+ num_processes=4)
94
+ print('image downloaded')
95
+ return idxs
96
+
97
+ def generate(self, text, random, beam, max_length, repetition):
98
+ input_tokens = self.llm_model.internlm_tokenizer(
99
+ text, return_tensors="pt",
100
+ add_special_tokens=True).to(self.llm_model.device)
101
+ img_embeds = self.llm_model.internlm_model.model.embed_tokens(
102
+ input_tokens.input_ids)
103
+ with torch.no_grad():
104
+ with self.llm_model.maybe_autocast():
105
+ outputs = self.llm_model.internlm_model.generate(
106
+ inputs_embeds=img_embeds,
107
+ stopping_criteria=self.stopping_criteria,
108
+ do_sample=random,
109
+ num_beams=beam,
110
+ max_length=max_length,
111
+ repetition_penalty=float(repetition),
112
+ )
113
+ output_text = self.llm_model.internlm_tokenizer.decode(
114
+ outputs[0][1:], add_special_tokens=False)
115
+ output_text = output_text.split('<TOKENS_UNUSED_1>')[0]
116
+ return output_text
117
+
118
+ def generate_text(self, title, beam, repetition, text_num, random):
119
+ text = ' <|User|>:根据给定标题写一个图文并茂,不重复的文章:{}\n'.format(
120
+ title) + self.eoh + ' <|Bot|>:'
121
+ print('random generate:{}'.format(random))
122
+ output_text = self.generate(text, random, beam, text_num, repetition)
123
+ return output_text
124
+
125
+ def generate_loc(self, text_sections, image_num, progress):
126
+ full_txt = ''.join(text_sections)
127
+ input_text = f' <|User|>:给定文章"{full_txt}" 根据上述文章,选择适合插入图像的{image_num}行' + ' \n<TOKENS_UNUSED_0> <|Bot|>:适合插入图像的行是'
128
+
129
+ for _ in progress.tqdm([1], desc="image spotting"):
130
+ output_text = self.generate(input_text,
131
+ random=False,
132
+ beam=5,
133
+ max_length=300,
134
+ repetition=1.)
135
+ inject_text = '适合插入图像的行是' + output_text
136
+ print(inject_text)
137
+
138
+ locs = []
139
+ for m in self.r2.findall(inject_text):
140
+ locs.append(int(m[4:-1]))
141
+ print(locs)
142
+ return inject_text, locs
143
+
144
+ def generate_cap(self, text_sections, pos, progress):
145
+ pasts = ''
146
+ caps = {}
147
+ for idx, po in progress.tqdm(enumerate(pos), desc="image captioning"):
148
+ full_txt = ''.join(text_sections[:po + 2])
149
+ if idx > 0:
150
+ past = pasts[:-2] + '。'
151
+ else:
152
+ past = pasts
153
+
154
+ input_text = f' <|User|>: 给定文章"{full_txt}" {past}给出适合在<Seg{po}>后插入的图像对应的标题。' + ' \n<TOKENS_UNUSED_0> <|Bot|>: 标题是"'
155
+
156
+ cap_text = self.generate(input_text,
157
+ random=False,
158
+ beam=1,
159
+ max_length=100,
160
+ repetition=5.)
161
+ cap_text = cap_text.split('"')[0].strip()
162
+ print(cap_text)
163
+ caps[po] = cap_text
164
+
165
+ if idx == 0:
166
+ pasts = f'现在<Seg{po}>后插入图像对应的标题是"{cap_text}", '
167
+ else:
168
+ pasts += f'<Seg{po}>后插入图像对应的标题是"{cap_text}", '
169
+
170
+ print(caps)
171
+ return caps
172
+
173
+ def generate_loc_cap(self, text_sections, image_num, progress):
174
+ inject_text, locs = self.generate_loc(text_sections, image_num,
175
+ progress)
176
+ caps = self.generate_cap(text_sections, locs, progress)
177
+ return caps
178
+
179
+ def interleav_wrap(self, img_embeds, text):
180
+ batch_size = img_embeds.shape[0]
181
+ im_len = img_embeds.shape[1]
182
+ text = text[0]
183
+ text = text.replace('<Img>', '')
184
+ text = text.replace('</Img>', '')
185
+ parts = text.split('<ImageHere>')
186
+ assert batch_size + 1 == len(parts)
187
+ warp_tokens = []
188
+ warp_embeds = []
189
+ warp_attns = []
190
+ soi = (torch.ones([1, 1]) * self.soi_id).long().to(img_embeds.device)
191
+ soi_embeds = self.llm_model.internlm_model.model.embed_tokens(soi)
192
+ temp_len = 0
193
+
194
+ for idx, part in enumerate(parts):
195
+ if len(part) > 0:
196
+ part_tokens = self.llm_model.internlm_tokenizer(
197
+ part, return_tensors="pt",
198
+ add_special_tokens=False).to(img_embeds.device)
199
+ part_embeds = self.llm_model.internlm_model.model.embed_tokens(
200
+ part_tokens.input_ids)
201
+
202
+ warp_tokens.append(part_tokens.input_ids)
203
+ warp_embeds.append(part_embeds)
204
+ temp_len += part_embeds.shape[1]
205
+ if idx < batch_size:
206
+ warp_tokens.append(soi.expand(-1, img_embeds[idx].shape[0]))
207
+ # warp_tokens.append(soi.expand(-1, img_embeds[idx].shape[0] + 1))
208
+ # warp_embeds.append(soi_embeds) ### 1, 1, C
209
+ warp_embeds.append(img_embeds[idx].unsqueeze(0)) ### 1, 34, C
210
+ temp_len += im_len
211
+
212
+ if temp_len > self.max_txt_len:
213
+ break
214
+
215
+ warp_embeds = torch.cat(warp_embeds, dim=1)
216
+
217
+ return warp_embeds[:, :self.max_txt_len].to(img_embeds.device)
218
+
219
+ def align_text(self, samples):
220
+ text_new = []
221
+ text = [t + self.eoa + ' </s>' for t in samples["text_input"]]
222
+ for i in range(len(text)):
223
+ temp = text[i]
224
+ temp = temp.replace('###Human', '<|User|>')
225
+ temp = temp.replace('### Human', '<|User|>')
226
+ temp = temp.replace('<|User|> :', '<|User|>:')
227
+ temp = temp.replace('<|User|>: ', '<|User|>:')
228
+ temp = temp.replace('<|User|>', ' <|User|>')
229
+
230
+ temp = temp.replace('###Assistant', '<|Bot|>')
231
+ temp = temp.replace('### Assistant', '<|Bot|>')
232
+ temp = temp.replace('<|Bot|> :', '<|Bot|>:')
233
+ temp = temp.replace('<|Bot|>: ', '<|Bot|>:')
234
+ temp = temp.replace('<|Bot|>', self.eoh + ' <|Bot|>')
235
+ if temp.find('<|User|>') > temp.find('<|Bot|>'):
236
+ temp = temp.replace(' <|User|>', self.eoa + ' <|User|>')
237
+ text_new.append(temp)
238
+ #print (temp)
239
+ return text_new
240
+
241
+ def model_select_image(self, output_text, caps, root, progress):
242
+ print('model_select_image')
243
+ pre_text = ''
244
+ pre_img = []
245
+ pre_text_list = []
246
+ ans2idx = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
247
+ selected = {k: 0 for k in caps.keys()}
248
+ for i, text in enumerate(output_text.split('\n')):
249
+ pre_text += text + '\n'
250
+ if i in caps:
251
+ images = copy.deepcopy(pre_img)
252
+ for j in range(4):
253
+ image = Image.open(
254
+ os.path.join(
255
+ root, f'temp_{self.show_ids[i] * 1000 + i}_{j}.png'
256
+ )).convert("RGB")
257
+ image = self.vis_processor(image)
258
+ images.append(image)
259
+ images = torch.stack(images, dim=0)
260
+
261
+ pre_text_list.append(pre_text)
262
+ pre_text = ''
263
+
264
+ images = images.cuda()
265
+ instruct = ' <|User|>:根据给定上下文和候选图像,选择合适的配图:'
266
+ input_text = '<ImageHere>'.join(
267
+ pre_text_list
268
+ ) + '\n\n候选图像包括: A.<ImageHere>\nB.<ImageHere>\nC.<ImageHere>\nD.<ImageHere>\n\n<TOKENS_UNUSED_0> <|Bot|>:最合适的图是'
269
+ input_text = instruct + input_text
270
+ samples = {}
271
+ samples['text_input'] = [input_text]
272
+ self.llm_model.debug_flag = 0
273
+ with torch.no_grad():
274
+ with torch.cuda.amp.autocast():
275
+ img_embeds = self.llm_model.encode_img(images)
276
+ input_text = self.align_text(samples)
277
+ img_embeds = self.interleav_wrap(
278
+ img_embeds, input_text)
279
+ bos = torch.ones(
280
+ [1, 1]) * self.llm_model.internlm_tokenizer.bos_token_id
281
+ bos = bos.long().to(images.device)
282
+ meta_embeds = self.llm_model.internlm_model.model.embed_tokens(
283
+ bos)
284
+ inputs_embeds = torch.cat([meta_embeds, img_embeds], dim=1)
285
+
286
+ with torch.cuda.amp.autocast():
287
+ outputs = self.llm_model.internlm_model.generate(
288
+ inputs_embeds=inputs_embeds[:, :-2],
289
+ do_sample=False,
290
+ num_beams=5,
291
+ max_length=10,
292
+ repetition_penalty=1.,
293
+ )
294
+ out_text = self.llm_model.internlm_tokenizer.decode(
295
+ outputs[0][1:], add_special_tokens=False)
296
+
297
+ try:
298
+ answer = out_text[1] if out_text[0] == ' ' else out_text[0]
299
+ pre_img.append(images[len(pre_img) + ans2idx[answer]].cpu())
300
+ except:
301
+ print('Select fail, use first image')
302
+ answer = 'A'
303
+ pre_img.append(images[len(pre_img) + ans2idx[answer]].cpu())
304
+ selected[i] = ans2idx[answer]
305
+ return selected
306
+
307
+ def show_md(self, text_sections, title, caps, selected, show_cap=False):
308
+ md_shows = []
309
+ ga_shows = []
310
+ btn_shows = []
311
+ cap_textboxs, cap_searchs = [], []
312
+ editers = []
313
+ for i in range(len(text_sections)):
314
+ if i in caps:
315
+ if show_cap:
316
+ md = text_sections[
317
+ i] + '\n' + '<div align="center"> <img src="file/articles/{}/temp_{}_{}.png" width = 500/> {} </div>'.format(
318
+ title, self.show_ids[i] * 1000 + i, selected[i],
319
+ caps[i])
320
+ else:
321
+ md = text_sections[
322
+ i] + '\n' + '<div align="center"> <img src="file=articles/{}/temp_{}_{}.png" width = 500/> </div>'.format(
323
+ title, self.show_ids[i] * 1000 + i, selected[i])
324
+ img_list = [('articles/{}/temp_{}_{}.png'.format(
325
+ title, self.show_ids[i] * 1000 + i,
326
+ j), 'articles/{}/temp_{}_{}.png'.format(
327
+ title, self.show_ids[i] * 1000 + i, j))
328
+ for j in range(4)]
329
+
330
+ ga_show = gr.Gallery.update(visible=True, value=img_list)
331
+ ga_shows.append(ga_show)
332
+
333
+ btn_show = gr.Button.update(visible=True,
334
+ value='\U0001f5d1\uFE0F')
335
+
336
+ cap_textboxs.append(
337
+ gr.Textbox.update(visible=True, value=caps[i]))
338
+ cap_searchs.append(gr.Button.update(visible=True))
339
+ else:
340
+ md = text_sections[i]
341
+ ga_show = gr.Gallery.update(visible=False, value=[])
342
+ ga_shows.append(ga_show)
343
+
344
+ btn_show = gr.Button.update(visible=True, value='\u2795')
345
+ cap_textboxs.append(gr.Textbox.update(visible=False))
346
+ cap_searchs.append(gr.Button.update(visible=False))
347
+
348
+ md_show = gr.Markdown.update(visible=True, value=md)
349
+ md_shows.append(md_show)
350
+ btn_shows.append(btn_show)
351
+ editers.append(gr.update(visible=True))
352
+ print(i, md)
353
+
354
+ md_hides = []
355
+ ga_hides = []
356
+ btn_hides = []
357
+ for i in range(max_section - len(text_sections)):
358
+ md_hide = gr.Markdown.update(visible=False, value='')
359
+ md_hides.append(md_hide)
360
+
361
+ btn_hide = gr.Button.update(visible=False)
362
+ btn_hides.append(btn_hide)
363
+ editers.append(gr.update(visible=False))
364
+
365
+ for i in range(max_section - len(ga_shows)):
366
+ ga_hide = gr.Gallery.update(visible=False, value=[])
367
+ ga_hides.append(ga_hide)
368
+ cap_textboxs.append(gr.Textbox.update(visible=False))
369
+ cap_searchs.append(gr.Button.update(visible=False))
370
+
371
+ return md_shows + md_hides + ga_shows + ga_hides + btn_shows + btn_hides + cap_textboxs + cap_searchs + editers, md_shows
372
+
373
+ def generate_article(self,
374
+ title,
375
+ beam,
376
+ repetition,
377
+ text_num,
378
+ msi,
379
+ random,
380
+ progress=gr.Progress()):
381
+ self.reset()
382
+ self.title = title
383
+ if article_stream_output:
384
+ text = ' <|User|>:根据给定标题写一个图文并茂,不重复的文章:{}\n'.format(
385
+ title) + self.eoh + ' <|Bot|>:'
386
+ input_tokens = self.llm_model.internlm_tokenizer(
387
+ text, return_tensors="pt",
388
+ add_special_tokens=True).to(self.llm_model.device)
389
+ img_embeds = self.llm_model.internlm_model.model.embed_tokens(
390
+ input_tokens.input_ids)
391
+ generate_params = dict(
392
+ inputs_embeds=img_embeds,
393
+ num_beams=beam,
394
+ do_sample=random,
395
+ stopping_criteria=self.stopping_criteria,
396
+ repetition_penalty=float(repetition),
397
+ max_length=text_num,
398
+ bos_token_id=self.llm_model.internlm_tokenizer.bos_token_id,
399
+ eos_token_id=self.llm_model.internlm_tokenizer.eos_token_id,
400
+ pad_token_id=self.llm_model.internlm_tokenizer.pad_token_id,
401
+ )
402
+ output_text = "▌"
403
+ with self.generate_with_streaming(**generate_params) as generator:
404
+ for output in generator:
405
+ decoded_output = self.llm_model.internlm_tokenizer.decode(
406
+ output[1:])
407
+ if output[-1] in [
408
+ self.llm_model.internlm_tokenizer.eos_token_id
409
+ ]:
410
+ break
411
+ output_text = decoded_output.replace('\n', '\n\n') + "▌"
412
+ yield (output_text,) + (gr.Markdown.update(visible=False),) * (max_section - 1) + (gr.Gallery.update(visible=False),) * max_section + \
413
+ (gr.Button.update(visible=False),) * max_section + (gr.Textbox.update(visible=False),) * max_section + (gr.Button.update(visible=False),) * max_section + \
414
+ (gr.update(visible=False),) * max_section + (disable_btn,) * 2
415
+ time.sleep(0.03)
416
+ output_text = output_text[:-1]
417
+ yield (output_text,) + (gr.Markdown.update(visible=False),) * (max_section - 1) + (gr.Gallery.update(visible=False),) * max_section + \
418
+ (gr.Button.update(visible=False),) * max_section + (gr.Textbox.update(visible=False),) * max_section + (gr.Button.update(visible=False),) * max_section +\
419
+ (gr.update(visible=False),) * max_section + (disable_btn,) * 2
420
+ else:
421
+ output_text = self.generate_text(title, beam, repetition, text_num,
422
+ random)
423
+
424
+ print(output_text)
425
+ output_text = re.sub(r'(\n[ \t]*)+', '\n', output_text)
426
+ if output_text[-1] == '\n':
427
+ output_text = output_text[:-1]
428
+ print(output_text)
429
+ output_text = '\n'.join(output_text.split('\n')[:max_section])
430
+
431
+ text_sections = output_text.split('\n')
432
+ idx_text_sections = [
433
+ f'<Seg{i}>' + ' ' + it + '\n' for i, it in enumerate(text_sections)
434
+ ]
435
+ caps = self.generate_loc_cap(idx_text_sections, '', progress)
436
+ #caps = {0: '成都的三日游路线图,包括春熙路、太古里、IFS国金中心、大慈寺、宽窄巷子、奎星楼街、九眼桥(酒吧一条街)、武侯祠、锦里、杜甫草堂、浣花溪公园、青羊宫、金沙遗址博物馆、文殊院、人民公园、熊猫基地、望江楼公园、东郊记忆、建设路小吃街、电子科大清水河校区、三圣乡万福花卉市场、龙湖滨江天街购物广场和返程。', 2: '春熙路的繁华景象,各种时尚潮流的品牌店和美食餐厅鳞次栉比。', 4: 'IFS国金中心的豪华购物中心,拥有众多国际知名品牌的旗舰店和专卖店,同时还有电影院、健身房 配套设施。', 6: '春熙路上的著名景点——太古里,是一个集购物、餐饮、娱乐于一体的高端时尚街���,也是成都著名的网红打卡地之一。', 8: '大慈寺的外观,是一座历史悠久的佛教寺庙,始建于唐朝,有着深厚的文化底蕴和历史价值。'}
437
+ #self.show_ids = {k:0 for k in caps.keys()}
438
+ self.show_ids = {k: 1 for k in caps.keys()}
439
+
440
+ print(caps)
441
+ self.ex_idxs = []
442
+ for loc, cap in progress.tqdm(caps.items(), desc="download image"):
443
+ #self.show_ids[loc] += 1
444
+ idxs = self.get_images_xlab(cap, loc, self.ex_idxs)
445
+ self.ex_idxs.extend(idxs)
446
+
447
+ if msi:
448
+ self.selected = self.model_select_image(output_text, caps,
449
+ 'articles/' + title,
450
+ progress)
451
+ else:
452
+ self.selected = {k: 0 for k in caps.keys()}
453
+ components, md_shows = self.show_md(text_sections, title, caps,
454
+ self.selected)
455
+ self.show_caps = False
456
+
457
+ self.output_text = output_text
458
+ self.caps = caps
459
+ if article_stream_output:
460
+ yield components + [enable_btn] * 2
461
+ else:
462
+ return components + [enable_btn] * 2
463
+
464
+ def adjust_img(self, img_num, progress=gr.Progress()):
465
+ text_sections = self.output_text.split('\n')
466
+ idx_text_sections = [
467
+ f'<Seg{i}>' + ' ' + it + '\n' for i, it in enumerate(text_sections)
468
+ ]
469
+ img_num = min(img_num, len(text_sections))
470
+ caps = self.generate_loc_cap(idx_text_sections, int(img_num), progress)
471
+ #caps = {1:'318川藏线沿途的风景照片', 4:'泸定桥的全景照片', 6:'折多山垭口的全景照片', 8:'稻城亚丁机场的全景照片', 10:'姊妹湖的全景照片'}
472
+
473
+ print(caps)
474
+ sidxs = []
475
+ for loc, cap in caps.items():
476
+ if loc in self.show_ids:
477
+ self.show_ids[loc] += 1
478
+ else:
479
+ self.show_ids[loc] = 1
480
+ idxs = self.get_images_xlab(cap, loc, sidxs)
481
+ sidxs.extend(idxs)
482
+ self.sidxs = sidxs
483
+
484
+ self.selected = {k: 0 for k in caps.keys()}
485
+ components, md_shows = self.show_md(text_sections, self.title, caps,
486
+ self.selected)
487
+
488
+ self.caps = caps
489
+ return components
490
+
491
+ def add_delete_image(self, text, status, index):
492
+ index = int(index)
493
+ if status == '\U0001f5d1\uFE0F':
494
+ if index in self.caps:
495
+ self.caps.pop(index)
496
+ self.selected.pop(index)
497
+ md_show = gr.Markdown.update(value=text.split('\n')[0])
498
+ gallery = gr.Gallery.update(visible=False, value=[])
499
+ btn_show = gr.Button.update(value='\u2795')
500
+ cap_textbox = gr.Textbox.update(visible=False)
501
+ cap_search = gr.Button.update(visible=False)
502
+ else:
503
+ md_show = gr.Markdown.update()
504
+ gallery = gr.Gallery.update(visible=True, value=[])
505
+ btn_show = gr.Button.update(value='\U0001f5d1\uFE0F')
506
+ cap_textbox = gr.Textbox.update(visible=True)
507
+ cap_search = gr.Button.update(visible=True)
508
+
509
+ return md_show, gallery, btn_show, cap_textbox, cap_search
510
+
511
+ def search_image(self, text, index):
512
+ index = int(index)
513
+ if text == '':
514
+ return gr.Gallery.update()
515
+
516
+ if index in self.show_ids:
517
+ self.show_ids[index] += 1
518
+ else:
519
+ self.show_ids[index] = 1
520
+ self.caps[index] = text
521
+ idxs = self.get_images_xlab(text, index, self.ex_idxs)
522
+ self.ex_idxs.extend(idxs)
523
+
524
+ img_list = [('articles/{}/temp_{}_{}.png'.format(
525
+ self.title, self.show_ids[index] * 1000 + index,
526
+ j), 'articles/{}/temp_{}_{}.png'.format(
527
+ self.title, self.show_ids[index] * 1000 + index, j))
528
+ for j in range(4)]
529
+ ga_show = gr.Gallery.update(visible=True, value=img_list)
530
+ return ga_show
531
+
532
+ def replace_image(self, article, index, evt: gr.SelectData):
533
+ index = int(index)
534
+ self.selected[index] = evt.index
535
+ if '<div align="center">' in article:
536
+ return re.sub(r'file=.*.png', 'file={}'.format(evt.value), article)
537
+ else:
538
+ return article + '\n' + '<div align="center"> <img src="file={}" width = 500/> </div>'.format(
539
+ evt.value)
540
+
541
+ def add_delete_caption(self):
542
+ self.show_caps = False if self.show_caps else True
543
+ text_sections = self.output_text.split('\n')
544
+ components, _ = self.show_md(text_sections,
545
+ self.title,
546
+ self.caps,
547
+ selected=self.selected,
548
+ show_cap=self.show_caps)
549
+ return components
550
+
551
+ def save(self):
552
+ folder = 'save_articles/' + self.title
553
+ if os.path.exists(folder):
554
+ for item in os.listdir(folder):
555
+ os.remove(os.path.join(folder, item))
556
+ os.makedirs(folder, exist_ok=True)
557
+
558
+ save_text = ''
559
+ count = 0
560
+ if len(self.output_text) > 0:
561
+ text_sections = self.output_text.split('\n')
562
+ for i in range(len(text_sections)):
563
+ if i in self.caps:
564
+ if self.show_caps:
565
+ md = text_sections[
566
+ i] + '\n' + '<div align="center"> <img src="temp_{}_{}.png" width = 500/> {} </div>'.format(
567
+ self.show_ids[i] * 1000 + i, self.selected[i],
568
+ self.caps[i])
569
+ else:
570
+ md = text_sections[
571
+ i] + '\n' + '<div align="center"> <img src="temp_{}_{}.png" width = 500/> </div>'.format(
572
+ self.show_ids[i] * 1000 + i, self.selected[i])
573
+ count += 1
574
+ else:
575
+ md = text_sections[i]
576
+
577
+ save_text += md + '\n\n'
578
+ save_text = save_text[:-2]
579
+
580
+ with open(os.path.join(folder, 'io.MD'), 'w') as f:
581
+ f.writelines(save_text)
582
+
583
+ for k in self.caps.keys():
584
+ shutil.copy(
585
+ os.path.join(
586
+ 'articles', self.title,
587
+ f'temp_{self.show_ids[k] * 1000 + k}_{self.selected[k]}.png'
588
+ ), folder)
589
+ archived = shutil.make_archive(folder, 'zip', folder)
590
+ return archived
591
+
592
+ def get_context_emb(self, state, img_list):
593
+ prompt = state.get_prompt()
594
+ print(prompt)
595
+ prompt_segs = prompt.split('<Img><ImageHere></Img>')
596
+
597
+ assert len(prompt_segs) == len(
598
+ img_list
599
+ ) + 1, "Unmatched numbers of image placeholders and images."
600
+ seg_tokens = [
601
+ self.llm_model.internlm_tokenizer(seg,
602
+ return_tensors="pt",
603
+ add_special_tokens=i == 0).to(
604
+ self.device).input_ids
605
+ for i, seg in enumerate(prompt_segs)
606
+ ]
607
+ seg_embs = [
608
+ self.llm_model.internlm_model.model.embed_tokens(seg_t)
609
+ for seg_t in seg_tokens
610
+ ]
611
+ mixed_embs = [
612
+ emb for pair in zip(seg_embs[:-1], img_list) for emb in pair
613
+ ] + [seg_embs[-1]]
614
+ mixed_embs = torch.cat(mixed_embs, dim=1)
615
+ return mixed_embs
616
+
617
+ def chat_ask(self, state, img_list, text, image):
618
+ print(1111)
619
+ state.skip_next = False
620
+ if len(text) <= 0 and image is None:
621
+ state.skip_next = True
622
+ return (state, img_list, state.to_gradio_chatbot(), "",
623
+ None) + (no_change_btn, ) * 2
624
+
625
+ if image is not None:
626
+ image_pt = self.vis_processor(image).unsqueeze(0).to(0)
627
+ image_emb = self.llm_model.encode_img(image_pt)
628
+ img_list.append(image_emb)
629
+
630
+ state.append_message(state.roles[0],
631
+ ["<Img><ImageHere></Img>", image])
632
+
633
+ if len(state.messages) > 0 and state.messages[-1][0] == state.roles[
634
+ 0] and isinstance(state.messages[-1][1], list):
635
+ #state.messages[-1][1] = ' '.join([state.messages[-1][1], text])
636
+ state.messages[-1][1][0] = ' '.join(
637
+ [state.messages[-1][1][0], text])
638
+ else:
639
+ state.append_message(state.roles[0], text)
640
+
641
+ print(state.messages)
642
+
643
+ state.append_message(state.roles[1], None)
644
+
645
+ return (state, img_list, state.to_gradio_chatbot(), "",
646
+ None) + (disable_btn, ) * 2
647
+
648
+ def generate_with_callback(self, callback=None, **kwargs):
649
+ kwargs.setdefault("stopping_criteria",
650
+ transformers.StoppingCriteriaList())
651
+ kwargs["stopping_criteria"].append(Stream(callback_func=callback))
652
+ with torch.no_grad():
653
+ with self.llm_model.maybe_autocast():
654
+ self.llm_model.internlm_model.generate(**kwargs)
655
+
656
+ def generate_with_streaming(self, **kwargs):
657
+ return Iteratorize(self.generate_with_callback, kwargs, callback=None)
658
+
659
+ def chat_answer(self, state, img_list, max_output_tokens,
660
+ repetition_penalty, num_beams, do_sample):
661
+ # text = '图片中是一幅油画,描绘了红军长征的场景。画面中,一群红军战士正在穿过一片草地,他们身后的旗帜在风中飘扬。'
662
+ # for i in range(len(text)):
663
+ # state.messages[-1][-1] = text[:i+1] + "▌"
664
+ # yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 2
665
+ # state.messages[-1][-1] = text[:i + 1]
666
+ # yield (state, state.to_gradio_chatbot()) + (enable_btn, ) * 2
667
+ # return
668
+
669
+ if state.skip_next:
670
+ return (state, state.to_gradio_chatbot()) + (no_change_btn, ) * 2
671
+
672
+ embs = self.get_context_emb(state, img_list)
673
+ if chat_stream_output:
674
+ generate_params = dict(
675
+ inputs_embeds=embs,
676
+ num_beams=num_beams,
677
+ do_sample=do_sample,
678
+ stopping_criteria=self.stopping_criteria,
679
+ repetition_penalty=float(repetition_penalty),
680
+ max_length=max_output_tokens,
681
+ bos_token_id=self.llm_model.internlm_tokenizer.bos_token_id,
682
+ eos_token_id=self.llm_model.internlm_tokenizer.eos_token_id,
683
+ pad_token_id=self.llm_model.internlm_tokenizer.pad_token_id,
684
+ )
685
+ state.messages[-1][-1] = "▌"
686
+ with self.generate_with_streaming(**generate_params) as generator:
687
+ for output in generator:
688
+ decoded_output = self.llm_model.internlm_tokenizer.decode(
689
+ output[1:])
690
+ if output[-1] in [
691
+ self.llm_model.internlm_tokenizer.eos_token_id, 333, 497
692
+ ]:
693
+ break
694
+ state.messages[-1][-1] = decoded_output + "▌"
695
+ yield (state,
696
+ state.to_gradio_chatbot()) + (disable_btn, ) * 2
697
+ time.sleep(0.03)
698
+ state.messages[-1][-1] = state.messages[-1][-1][:-1]
699
+ yield (state, state.to_gradio_chatbot()) + (enable_btn, ) * 2
700
+ return
701
+ else:
702
+ outputs = self.llm_model.internlm_model.generate(
703
+ inputs_embeds=embs,
704
+ max_new_tokens=max_output_tokens,
705
+ stopping_criteria=self.stopping_criteria,
706
+ num_beams=num_beams,
707
+ #temperature=float(temperature),
708
+ do_sample=do_sample,
709
+ repetition_penalty=float(repetition_penalty),
710
+ bos_token_id=self.llm_model.internlm_tokenizer.bos_token_id,
711
+ eos_token_id=self.llm_model.internlm_tokenizer.eos_token_id,
712
+ pad_token_id=self.llm_model.internlm_tokenizer.pad_token_id,
713
+ )
714
+
715
+ output_token = outputs[0]
716
+ if output_token[0] == 0:
717
+ output_token = output_token[1:]
718
+ output_text = self.llm_model.internlm_tokenizer.decode(
719
+ output_token, add_special_tokens=False)
720
+ print(output_text)
721
+ output_text = output_text.split('<TOKENS_UNUSED_1>')[
722
+ 0] # remove the stop sign '###'
723
+ output_text = output_text.split('Assistant:')[-1].strip()
724
+ output_text = output_text.replace("<s>", "")
725
+ state.messages[-1][1] = output_text
726
+
727
+ return (state, state.to_gradio_chatbot()) + (enable_btn, ) * 2
728
+
729
+ def clear_answer(self, state):
730
+ state.messages[-1][-1] = None
731
+ return (state, state.to_gradio_chatbot())
732
+
733
+ def chat_clear_history(self):
734
+ state = CONV_VISION_7132_v2.copy()
735
+ return (state, [], state.to_gradio_chatbot(), "",
736
+ None) + (disable_btn, ) * 2
737
+
738
+
739
+ def load_demo():
740
+ state = CONV_VISION_7132_v2.copy()
741
+
742
+ return (state, [], gr.Chatbot.update(visible=True),
743
+ gr.Textbox.update(visible=True), gr.Button.update(visible=True),
744
+ gr.Row.update(visible=True), gr.Accordion.update(visible=True))
745
+
746
+
747
+ def change_language(lang):
748
+ if lang == '中文':
749
+ lang_btn = gr.update(value='English')
750
+ title = gr.update(label='根据给定标题写一个图文并茂的文章:')
751
+ btn = gr.update(value='生成')
752
+ parameter_article = gr.update(label='高级设置')
753
+
754
+ beam = gr.update(label='集束大小')
755
+ repetition = gr.update(label='重复惩罚')
756
+ text_num = gr.update(label='最多输出字数')
757
+ msi = gr.update(label='模型选图')
758
+ random = gr.update(label='采样')
759
+ img_num = gr.update(label='生成文章后,可选择全文配图数量')
760
+ adjust_btn = gr.update(value='固定数量配图')
761
+ cap_searchs, editers = [], []
762
+ for _ in range(max_section):
763
+ cap_searchs.append(gr.update(value='搜索'))
764
+ editers.append(gr.update(label='编辑'))
765
+
766
+ save_btn = gr.update(value='文章下载')
767
+ save_file = gr.update(label='文章下载')
768
+
769
+ parameter_chat = gr.update(label='参数')
770
+ chat_text_num = gr.update(label='最多输出字数')
771
+ chat_beam = gr.update(label='集束大小')
772
+ chat_repetition = gr.update(label='重复惩罚')
773
+ chat_random = gr.update(label='采样')
774
+
775
+ chat_textbox = gr.update(placeholder='输入聊天内容并回车')
776
+ submit_btn = gr.update(value='提交')
777
+ regenerate_btn = gr.update(value='🔄 重新生成')
778
+ clear_btn = gr.update(value='🗑️ 清空聊天框')
779
+ elif lang == 'English':
780
+ lang_btn = gr.update(value='中文')
781
+ title = gr.update(
782
+ label='Write an illustrated article based on the given title:')
783
+ btn = gr.update(value='Submit')
784
+ parameter_article = gr.update(label='Advanced Settings')
785
+
786
+ beam = gr.update(label='Beam Size')
787
+ repetition = gr.update(label='Repetition_penalty')
788
+ text_num = gr.update(label='Max output tokens')
789
+ msi = gr.update(label='Model selects images')
790
+ random = gr.update(label='Do_sample')
791
+ img_num = gr.update(
792
+ label=
793
+ 'Select the number of the inserted image after article generation.'
794
+ )
795
+ adjust_btn = gr.update(value='Insert a fixed number of images')
796
+ cap_searchs, editers = [], []
797
+ for _ in range(max_section):
798
+ cap_searchs.append(gr.update(value='Search'))
799
+ editers.append(gr.update(label='edit'))
800
+
801
+ save_btn = gr.update(value='Save article')
802
+ save_file = gr.update(label='Save article')
803
+
804
+ parameter_chat = gr.update(label='Parameters')
805
+ chat_text_num = gr.update(label='Max output tokens')
806
+ chat_beam = gr.update(label='Beam Size')
807
+ chat_repetition = gr.update(label='Repetition_penalty')
808
+ chat_random = gr.update(label='Do_sample')
809
+
810
+ chat_textbox = gr.update(placeholder='Enter text and press ENTER')
811
+ submit_btn = gr.update(value='Submit')
812
+ regenerate_btn = gr.update(value='🔄 Regenerate')
813
+ clear_btn = gr.update(value='🗑️ Clear history')
814
+
815
+ return [lang_btn, title, btn, parameter_article, beam, repetition, text_num, msi, random, img_num, adjust_btn] +\
816
+ cap_searchs + editers + [save_btn, save_file] +[parameter_chat, chat_text_num, chat_beam, chat_repetition, chat_random] + \
817
+ [chat_textbox, submit_btn, regenerate_btn, clear_btn]
818
+
819
+
820
+ parser = argparse.ArgumentParser()
821
+ parser.add_argument("--folder", default='internlm/internlm-xcomposer-7b')
822
+ parser.add_argument("--private", default=False, action='store_true')
823
+ args = parser.parse_args()
824
+ demo_ui = Demo_UI(args.folder)
825
+
826
+ with gr.Blocks(css=custom_css, title='浦语·灵笔 (InternLM-XComposer)') as demo:
827
+ with gr.Row():
828
+ with gr.Column(scale=20):
829
+ #gr.HTML("""<h1 align="center" id="space-title" style="font-size:35px;">🤗 浦语·灵笔 (InternLM-XComposer)</h1>""")
830
+ gr.HTML(
831
+ """<h1 align="center"><img src="https://raw.githubusercontent.com/panzhang0212/interleaved_io/main/logo.png", alt="InternLM-XComposer" border="0" style="margin: 0 auto; height: 200px;" /></a> </h1>"""
832
+ )
833
+ with gr.Column(scale=1, min_width=100):
834
+ lang_btn = gr.Button("中文")
835
+
836
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
837
+ with gr.TabItem("📝 创作图文并茂文章 (Write Interleaved-text-image Article)"):
838
+ with gr.Row():
839
+ title = gr.Textbox(
840
+ label=
841
+ 'Write an illustrated article based on the given title:',
842
+ scale=2)
843
+ btn = gr.Button("Submit", scale=1)
844
+
845
+ with gr.Row():
846
+ img_num = gr.Slider(
847
+ minimum=1.0,
848
+ maximum=30.0,
849
+ value=5.0,
850
+ step=1.0,
851
+ scale=2,
852
+ label=
853
+ 'Select the number of the inserted image after article generation.'
854
+ )
855
+ adjust_btn = gr.Button('Insert a fixed number of images',
856
+ interactive=False,
857
+ scale=1)
858
+
859
+ with gr.Row():
860
+ with gr.Column(scale=1):
861
+ with gr.Accordion("Advanced Settings",
862
+ open=False,
863
+ visible=True) as parameter_article:
864
+ beam = gr.Slider(minimum=1.0,
865
+ maximum=6.0,
866
+ value=5.0,
867
+ step=1.0,
868
+ label='Beam Size')
869
+ repetition = gr.Slider(minimum=0.0,
870
+ maximum=10.0,
871
+ value=5.0,
872
+ step=0.1,
873
+ label='Repetition_penalty')
874
+ text_num = gr.Slider(minimum=100.0,
875
+ maximum=2000.0,
876
+ value=1000.0,
877
+ step=1.0,
878
+ label='Max output tokens')
879
+ msi = gr.Checkbox(value=True,
880
+ label='Model selects images')
881
+ random = gr.Checkbox(label='Do_sample')
882
+
883
+ with gr.Column(scale=1):
884
+ gr.Examples(
885
+ examples=[["又见敦煌"], ["星链新闻稿"], ["如何养好一只宠物"],
886
+ ["Shanghai Travel Guide in English"], ["Travel guidance of London in English"], ["Advertising for Genshin Impact in English"]],
887
+ inputs=[title],
888
+ )
889
+
890
+ articles = []
891
+ gallerys = []
892
+ add_delete_btns = []
893
+ cap_textboxs = []
894
+ cap_searchs = []
895
+ editers = []
896
+ with gr.Column():
897
+ for i in range(max_section):
898
+ with gr.Row():
899
+ visible = True if i == 0 else False
900
+ with gr.Column(scale=2):
901
+ article = gr.Markdown(visible=visible,
902
+ elem_classes='feedback')
903
+ articles.append(article)
904
+
905
+ with gr.Column(scale=1):
906
+ with gr.Accordion('edit',
907
+ open=False,
908
+ visible=False) as editer:
909
+ with gr.Row():
910
+ cap_textbox = gr.Textbox(show_label=False,
911
+ interactive=True,
912
+ scale=6,
913
+ visible=False)
914
+ cap_search = gr.Button(value="Search",
915
+ visible=False,
916
+ scale=1)
917
+ with gr.Row():
918
+ gallery = gr.Gallery(visible=False,
919
+ columns=2,
920
+ height='auto')
921
+
922
+ add_delete_btn = gr.Button(visible=False)
923
+
924
+ gallery.select(demo_ui.replace_image, [
925
+ articles[i],
926
+ gr.Number(value=i, visible=False)
927
+ ], articles[i])
928
+ gallerys.append(gallery)
929
+ add_delete_btns.append(add_delete_btn)
930
+
931
+ cap_textboxs.append(cap_textbox)
932
+ cap_searchs.append(cap_search)
933
+ editers.append(editer)
934
+
935
+ save_btn = gr.Button("Save article")
936
+ save_file = gr.File(label="Save article")
937
+
938
+ for i in range(max_section):
939
+ add_delete_btns[i].click(demo_ui.add_delete_image,
940
+ inputs=[
941
+ articles[i],
942
+ add_delete_btns[i],
943
+ gr.Number(value=i,
944
+ visible=False)
945
+ ],
946
+ outputs=[
947
+ articles[i], gallerys[i],
948
+ add_delete_btns[i],
949
+ cap_textboxs[i],
950
+ cap_searchs[i]
951
+ ])
952
+ cap_searchs[i].click(demo_ui.search_image,
953
+ inputs=[
954
+ cap_textboxs[i],
955
+ gr.Number(value=i, visible=False)
956
+ ],
957
+ outputs=gallerys[i])
958
+
959
+ btn.click(
960
+ demo_ui.generate_article,
961
+ inputs=[title, beam, repetition, text_num, msi, random],
962
+ outputs=articles + gallerys + add_delete_btns +
963
+ cap_textboxs + cap_searchs + editers + [btn, adjust_btn])
964
+ # cap_btn.click(demo_ui.add_delete_caption, inputs=None, outputs=articles)
965
+ save_btn.click(demo_ui.save, inputs=None, outputs=save_file)
966
+ adjust_btn.click(demo_ui.adjust_img,
967
+ inputs=img_num,
968
+ outputs=articles + gallerys +
969
+ add_delete_btns + cap_textboxs + cap_searchs +
970
+ editers)
971
+
972
+ with gr.TabItem("💬 多模态对话 (Multimodal Chat)", elem_id="chat", id=0):
973
+ chat_state = gr.State()
974
+ img_list = gr.State()
975
+ with gr.Row():
976
+ with gr.Column(scale=3):
977
+ imagebox = gr.Image(type="pil")
978
+
979
+ with gr.Accordion("Parameters", open=True,
980
+ visible=False) as parameter_row:
981
+ chat_max_output_tokens = gr.Slider(
982
+ minimum=0,
983
+ maximum=1024,
984
+ value=512,
985
+ step=64,
986
+ interactive=True,
987
+ label="Max output tokens",
988
+ )
989
+ chat_num_beams = gr.Slider(
990
+ minimum=1,
991
+ maximum=5,
992
+ value=3,
993
+ step=1,
994
+ interactive=True,
995
+ label="Beam Size",
996
+ )
997
+ chat_repetition_penalty = gr.Slider(
998
+ minimum=1,
999
+ maximum=5,
1000
+ value=1,
1001
+ step=0.1,
1002
+ interactive=True,
1003
+ label="Repetition_penalty",
1004
+ )
1005
+ # chat_temperature = gr.Slider(minimum=0, maximum=1, value=1, step=0.1, interactive=True,
1006
+ # label="Temperature", )
1007
+ chat_do_sample = gr.Checkbox(interactive=True,
1008
+ value=True,
1009
+ label="Do_sample")
1010
+
1011
+ with gr.Column(scale=6):
1012
+ chatbot = grChatbot(elem_id="chatbot",
1013
+ visible=False,
1014
+ height=750)
1015
+ with gr.Row():
1016
+ with gr.Column(scale=8):
1017
+ chat_textbox = gr.Textbox(
1018
+ show_label=False,
1019
+ placeholder="Enter text and press ENTER",
1020
+ visible=False).style(container=False)
1021
+ with gr.Column(scale=1, min_width=60):
1022
+ submit_btn = gr.Button(value="Submit",
1023
+ visible=False)
1024
+ with gr.Row(visible=True) as button_row:
1025
+ regenerate_btn = gr.Button(value="🔄 Regenerate",
1026
+ interactive=False)
1027
+ clear_btn = gr.Button(value="🗑️ Clear history",
1028
+ interactive=False)
1029
+
1030
+ btn_list = [regenerate_btn, clear_btn]
1031
+ parameter_list = [
1032
+ chat_max_output_tokens, chat_repetition_penalty,
1033
+ chat_num_beams, chat_do_sample
1034
+ ]
1035
+
1036
+ chat_textbox.submit(
1037
+ demo_ui.chat_ask,
1038
+ [chat_state, img_list, chat_textbox, imagebox],
1039
+ [chat_state, img_list, chatbot, chat_textbox, imagebox] +
1040
+ btn_list).then(demo_ui.chat_answer,
1041
+ [chat_state, img_list] + parameter_list,
1042
+ [chat_state, chatbot] + btn_list)
1043
+ submit_btn.click(
1044
+ demo_ui.chat_ask,
1045
+ [chat_state, img_list, chat_textbox, imagebox],
1046
+ [chat_state, img_list, chatbot, chat_textbox, imagebox] +
1047
+ btn_list).then(demo_ui.chat_answer,
1048
+ [chat_state, img_list] + parameter_list,
1049
+ [chat_state, chatbot] + btn_list)
1050
+
1051
+ regenerate_btn.click(demo_ui.clear_answer, chat_state,
1052
+ [chat_state, chatbot]).then(
1053
+ demo_ui.chat_answer,
1054
+ [chat_state, img_list] + parameter_list,
1055
+ [chat_state, chatbot] + btn_list)
1056
+ clear_btn.click(
1057
+ demo_ui.chat_clear_history, None,
1058
+ [chat_state, img_list, chatbot, chat_textbox, imagebox] +
1059
+ btn_list)
1060
+
1061
+ demo.load(load_demo, None, [
1062
+ chat_state, img_list, chatbot, chat_textbox, submit_btn,
1063
+ parameter_row
1064
+ ])
1065
+
1066
+ lang_btn.click(change_language, inputs=lang_btn, outputs=[lang_btn, title, btn, parameter_article] +\
1067
+ [beam, repetition, text_num, msi, random, img_num, adjust_btn] + cap_searchs + editers +\
1068
+ [save_btn, save_file] + [parameter_row, chat_max_output_tokens, chat_num_beams, chat_repetition_penalty, chat_do_sample] +\
1069
+ [chat_textbox, submit_btn, regenerate_btn, clear_btn])
1070
+ demo.queue(concurrency_count=8, status_update_rate=10, api_open=False)
1071
+
1072
+ if __name__ == "__main__":
1073
+ demo.launch()
1074
+
1075
+
demo_asset/assets/UI_en.png ADDED
demo_asset/assets/css_html_js.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_css = """
2
+ .feedback {font-size: 20px !important}
3
+ .markdown-text {
4
+ font-size: 1em !important;
5
+ }
6
+ #models-to-add-text {
7
+ font-size: 18px !important;
8
+ }
9
+ #citation-button span {
10
+ font-size: 16px !important;
11
+ }
12
+ #citation-button textarea {
13
+ font-size: 16px !important;
14
+ }
15
+ #citation-button > label > button {
16
+ margin: 6px;
17
+ transform: scale(1.3);
18
+ }
19
+ #leaderboard-table {
20
+ margin-top: 15px
21
+ }
22
+ #leaderboard-table-lite {
23
+ margin-top: 15px
24
+ }
25
+ #search-bar-table-box > div:first-child {
26
+ background: none;
27
+ border: none;
28
+ }
29
+
30
+ #search-bar {
31
+ padding: 0px;
32
+ }
33
+ /* Hides the final AutoEvalColumn */
34
+ #llm-benchmark-tab-table table td:last-child,
35
+ #llm-benchmark-tab-table table th:last-child {
36
+ display: none;
37
+ }
38
+ /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
39
+ table td:first-child,
40
+ table th:first-child {
41
+ max-width: 400px;
42
+ overflow: auto;
43
+ white-space: nowrap;
44
+ }
45
+ .tab-buttons button {
46
+ font-size: 1.25em;
47
+ }
48
+ #scale-logo {
49
+ border-style: none !important;
50
+ box-shadow: none;
51
+ display: block;
52
+ margin-left: auto;
53
+ margin-right: auto;
54
+ max-width: 600px;
55
+ }
56
+ #scale-logo .download {
57
+ display: none;
58
+ }
59
+ #filter_type{
60
+ border: 0;
61
+ padding-left: 0;
62
+ padding-top: 0;
63
+ }
64
+ #filter_type label {
65
+ display: flex;
66
+ }
67
+ #filter_type label > span{
68
+ margin-top: var(--spacing-lg);
69
+ margin-right: 0.5em;
70
+ }
71
+ #filter_type label > .wrap{
72
+ width: 103px;
73
+ }
74
+ #filter_type label > .wrap .wrap-inner{
75
+ padding: 2px;
76
+ }
77
+ #filter_type label > .wrap .wrap-inner input{
78
+ width: 1px
79
+ }
80
+ #filter-columns-type{
81
+ border:0;
82
+ padding:0.5;
83
+ }
84
+ #filter-columns-size{
85
+ border:0;
86
+ padding:0.5;
87
+ }
88
+ #box-filter > .form{
89
+ border: 0
90
+ }
91
+ """
92
+
93
+ get_window_url_params = """
94
+ function(url_params) {
95
+ const params = new URLSearchParams(window.location.search);
96
+ url_params = Object.fromEntries(params);
97
+ return url_params;
98
+ }
99
+ """
demo_asset/assets/edit1.png ADDED
demo_asset/assets/logo.png ADDED
demo_asset/assets/logo.svg ADDED
demo_asset/assets/start.png ADDED
demo_asset/conversation.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+
3
+ import torch
4
+ from transformers import StoppingCriteria, StoppingCriteriaList
5
+
6
+ import dataclasses
7
+ from enum import auto, Enum
8
+ from typing import List, Any
9
+
10
+
11
+ class SeparatorStyle(Enum):
12
+ """Different separator style."""
13
+ SINGLE = auto()
14
+ TWO = auto()
15
+
16
+
17
+ @dataclasses.dataclass
18
+ class Conversation:
19
+ """A class that keeps all conversation history."""
20
+ system: str
21
+ roles: List[str]
22
+ messages: List[List[str]]
23
+ offset: int
24
+ # system_img: List[Image.Image] = []
25
+ sep_style: SeparatorStyle = SeparatorStyle.SINGLE
26
+ sep: str = "###"
27
+ sep2: str = None
28
+
29
+ skip_next: bool = False
30
+ conv_id: Any = None
31
+
32
+ def get_prompt(self):
33
+ if self.sep_style == SeparatorStyle.SINGLE:
34
+ ret = self.system + self.sep
35
+ for role, message in self.messages:
36
+ if message:
37
+ #ret += role + ": " + message + self.sep
38
+ ret += role + ":" + message + self.sep
39
+ else:
40
+ ret += role + ":"
41
+ return ret
42
+ elif self.sep_style == SeparatorStyle.TWO:
43
+ seps = [self.sep, self.sep2]
44
+ ret = self.system + seps[0]
45
+ for i, (role, message) in enumerate(self.messages):
46
+ if message:
47
+ ret += role + ": " + message[0] + seps[i % 2] if isinstance(message, list) else role + ": " + message + seps[i % 2]
48
+ else:
49
+ ret += role + ":"
50
+ return ret
51
+ elif self.sep_style == "7132":
52
+ seps = [self.sep, self.sep2]
53
+ ret = self.system
54
+ for i, (role, message) in enumerate(self.messages):
55
+ if message:
56
+ ret += role + ": " + message[0] + seps[i % 2] if isinstance(message, list) else role + ": " + message + seps[i % 2]
57
+ else:
58
+ ret += role + ":"
59
+ return ret
60
+ elif self.sep_style == "raw":
61
+ seps = [self.sep, self.sep2]
62
+ ret = self.system
63
+ for i, (role, message) in enumerate(self.messages):
64
+ if message:
65
+ ret += role + message + seps[i % 2]
66
+ else:
67
+ ret += role
68
+ return ret
69
+
70
+ else:
71
+ raise ValueError(f"Invalid style: {self.sep_style}")
72
+
73
+ def append_message(self, role, message):
74
+ self.messages.append([role, message])
75
+
76
+ def to_gradio_chatbot(self):
77
+ ret = []
78
+ for i, (role, msg) in enumerate(self.messages[self.offset:]):
79
+ if i % 2 == 0:
80
+ if type(msg) is tuple or type(msg) is list:
81
+ import base64
82
+ from io import BytesIO
83
+ msg, image = msg
84
+
85
+ max_hw, min_hw = max(image.size), min(image.size)
86
+ aspect_ratio = max_hw / min_hw
87
+ max_len, min_len = 800, 400
88
+ shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
89
+ longest_edge = int(shortest_edge * aspect_ratio)
90
+ W, H = image.size
91
+ if H > W:
92
+ H, W = longest_edge, shortest_edge
93
+ else:
94
+ H, W = shortest_edge, longest_edge
95
+ image = image.resize((W, H))
96
+ # image = image.resize((224, 224))
97
+ buffered = BytesIO()
98
+ image.save(buffered, format="JPEG")
99
+ img_b64_str = base64.b64encode(buffered.getvalue()).decode()
100
+ img_str = f'<img src="data:image/png;base64,{img_b64_str}" alt="user upload image" />'
101
+ msg = msg.replace('<Img><ImageHere></Img>', img_str)
102
+ ret.append([msg, None])
103
+ else:
104
+ ret[-1][-1] = msg
105
+ return ret
106
+
107
+ def copy(self):
108
+ return Conversation(
109
+ system=self.system,
110
+ # system_img=self.system_img,
111
+ roles=self.roles,
112
+ messages=[[x, y] for x, y in self.messages],
113
+ offset=self.offset,
114
+ sep_style=self.sep_style,
115
+ sep=self.sep,
116
+ sep2=self.sep2,
117
+ conv_id=self.conv_id)
118
+
119
+ def dict(self):
120
+ return {
121
+ "system": self.system,
122
+ # "system_img": self.system_img,
123
+ "roles": self.roles,
124
+ "messages": self.messages,
125
+ "offset": self.offset,
126
+ "sep": self.sep,
127
+ "sep2": self.sep2,
128
+ "conv_id": self.conv_id,
129
+ }
130
+
131
+
132
+ class StoppingCriteriaSub(StoppingCriteria):
133
+
134
+ def __init__(self, stops=[], encounters=1):
135
+ super().__init__()
136
+ self.stops = stops
137
+
138
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
139
+ for stop in self.stops:
140
+ if torch.all((stop == input_ids[0][-len(stop):])).item():
141
+ return True
142
+
143
+ return False
144
+
145
+
146
+ meta = """meta instruction
147
+ You are an AI assistant whose name is 浦语.
148
+ - 浦语 is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
149
+ - 浦语 can understand and communicate fluently in the language chosen by the user such as English and 中文.
150
+ conversation
151
+ """
152
+ CONV_VISION_7132_v2 = Conversation(
153
+ system=meta,
154
+ roles=(" <|User|>", " <|Bot|>"),
155
+ messages=(),
156
+ offset=0,
157
+ sep_style="7132",
158
+ sep="<TOKENS_UNUSED_0>",
159
+ sep2="<TOKENS_UNUSED_1>",
160
+ )
demo_asset/demo.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center">
2
+ <img src="assets/UI_en.png" width="500"/>
3
+ </p>
4
+
5
+ ## User Guidance for Web Demo
6
+
7
+ For an academic setting, especially in the context of conferences like CVPR, it's critical to prioritize clarity and precision. Here's a revision:
8
+
9
+ ### Quick Start Guide
10
+
11
+ 1. Input the desired article title into the **textbox** and subsequently press the **submit** button.
12
+ 2. Alternatively, you may choose from one of the **example** titles available, and then proceed to click the **submit** button.
13
+ 3. Please allow some time for processing. Upon completion, a comprehensive article, inclusive of visual illustrations and textual content, will be generated automatically.
14
+
15
+ <p align="left">
16
+ <img src="assets/start.png" width="500"/>
17
+ </p>
18
+
19
+ ### Interactively improving the article
20
+ If you want to edit the images, you can follow the guidance below:
21
+
22
+ <p align="left">
23
+ <img src="assets/edit1.png" width="500"/>
24
+ </p>
25
+
26
+ To begin editing, click on the **Edit** bar. This will reveal a suite of components designed for image-related edits.
27
+
28
+ - **Textbox**: This displays the caption generated by XComposer.
29
+ - **Search Button**: Use this to find images based on the caption.
30
+ - **Image Gallery**: Displays images sourced from your search.
31
+
32
+ ### 1. Image Selection
33
+ - Choose an image by clicking on any image within the **gallery**.
34
+ - To display additional images, click the **Search Button**.
35
+ - To explore captions, modify the content in the **Textbox** and then click the **Search Button**.
36
+
37
+ ### 2. Image Deletion
38
+ - To remove an image from your selection, simply click the **🗑️ Button**.
39
+
40
+ ### 3. Adding Images
41
+ - To add a new image, click the **Add Button** (visible after activating the **Edit** mode). For further instructions, refer to the Image Selection section above.
42
+
43
+ ## Finalizing Your Article
44
+
45
+ Once you're satisfied with the edits, click the **Save Article** button located at the bottom of the user interface to store the final version of your article.
demo_asset/download.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import requests
5
+ import urllib.request
6
+ from multiprocessing.pool import ThreadPool
7
+
8
+
9
+ def download_image(url, path):
10
+ if url == '':
11
+ print('url is empty')
12
+ return False
13
+
14
+ try:
15
+ urllib.request.urlopen(url)
16
+ urllib.request.urlretrieve(url, path)
17
+ return True
18
+ except urllib.error.URLError as e:
19
+ if hasattr(e, "code"):
20
+ print(e.code)
21
+ if hasattr(e, "reason"):
22
+ print(e.reason)
23
+ print(f"{url} download failed")
24
+ return False
25
+
26
+
27
+ def download_image_thread(url_list, folder, index, num_processes, Async=True):
28
+ pool = ThreadPool(processes=num_processes)
29
+ thread_list = []
30
+ os.makedirs(folder, exist_ok=True)
31
+ for i in range(len(url_list)):
32
+ path = os.path.join(folder, f'temp_{index}_{i}.png')
33
+ if Async:
34
+ out = pool.apply_async(func=download_image, args=(url_list[i], path))
35
+ else:
36
+ out = pool.apply(func=download_image, args=(url_list[i], path))
37
+ thread_list.append(out)
38
+
39
+ pool.close()
40
+ pool.join()
41
+
42
+
demo_asset/gradio_patch.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from gradio.components import *
4
+ from gradio.components import Chatbot as oldChatbot
5
+ from gradio_client import utils as client_utils
6
+ from enum import Enum
7
+ from markdown2 import Markdown
8
+ import inspect
9
+
10
+ from gradio.events import Changeable
11
+ from gradio_client.documentation import document
12
+ from gradio_client.serializing import JSONSerializable
13
+
14
+
15
+ class _Keywords(Enum):
16
+ NO_VALUE = "NO_VALUE" # Used as a sentinel to determine if nothing is provided as a argument for `value` in `Component.update()`
17
+ FINISHED_ITERATING = "FINISHED_ITERATING" # Used to skip processing of a component's value (needed for generators + state)
18
+
19
+
20
+ class Chatbot(oldChatbot):
21
+ def postprocess(
22
+ self,
23
+ y: list[list[str | tuple[str] | tuple[str, str] | None] | tuple],
24
+ ) -> list[list[str | dict | None]]:
25
+ """
26
+ Parameters:
27
+ y: List of lists representing the message and response pairs. Each message and response should be a string, which may be in Markdown format. It can also be a tuple whose first element is a string or pathlib.Path filepath or URL to an image/video/audio, and second (optional) element is the alt text, in which case the media file is displayed. It can also be None, in which case that message is not displayed.
28
+ Returns:
29
+ List of lists representing the message and response. Each message and response will be a string of HTML, or a dictionary with media information. Or None if the message is not to be displayed.
30
+ """
31
+ if y is None:
32
+ return []
33
+ processed_messages = []
34
+ for message_pair in y:
35
+ assert isinstance(
36
+ message_pair, (tuple, list)
37
+ ), f"Expected a list of lists or list of tuples. Received: {message_pair}"
38
+ assert (
39
+ len(message_pair) == 2
40
+ ), f"Expected a list of lists of length 2 or list of tuples of length 2. Received: {message_pair}"
41
+ processed_messages.append(
42
+ [
43
+ '<p style="font-family: var(--font)">' + message_pair[0] + "</p>",
44
+ #self._postprocess_chat_messages(message_pair[0]),
45
+ self._postprocess_chat_messages(message_pair[1]),
46
+ ]
47
+ )
48
+ return processed_messages
49
+
50
+
51
+ # @document("style")
52
+ # # class Chatbot(Changeable, Selectable, IOComponent, JSONSerializable):
53
+ # #class Chatbot(Changeable, IOComponent, JSONSerializable):
54
+ # class Chatbot(oldChatbot):
55
+ # """
56
+ # Displays a chatbot output showing both user submitted messages and responses. Supports a subset of Markdown including bold, italics, code, and images.
57
+ # Preprocessing: this component does *not* accept input.
58
+ # Postprocessing: expects function to return a {List[Tuple[str | None | Tuple, str | None | Tuple]]}, a list of tuples with user message and response messages. Messages should be strings, tuples, or Nones. If the message is a string, it can include Markdown. If it is a tuple, it should consist of (string filepath to image/video/audio, [optional string alt text]). Messages that are `None` are not displayed.
59
+ #
60
+ # Demos: chatbot_simple, chatbot_multimodal
61
+ # """
62
+ #
63
+ # def __init__(
64
+ # self,
65
+ # value: List[Tuple[str | None, str | None]] | Callable | None = None,
66
+ # color_map: Dict[str, str] | None = None, # Parameter moved to Chatbot.style()
67
+ # *,
68
+ # label: str | None = None,
69
+ # every: float | None = None,
70
+ # show_label: bool = True,
71
+ # visible: bool = True,
72
+ # elem_id: str | None = None,
73
+ # elem_classes: List[str] | str | None = None,
74
+ # **kwargs,
75
+ # ):
76
+ # """
77
+ # Parameters:
78
+ # value: Default value to show in chatbot. If callable, the function will be called whenever the app loads to set the initial value of the component.
79
+ # label: component name in interface.
80
+ # every: If `value` is a callable, run the function 'every' number of seconds while the client connection is open. Has no effect otherwise. Queue must be enabled. The event can be accessed (e.g. to cancel it) via this component's .load_event attribute.
81
+ # show_label: if True, will display label.
82
+ # visible: If False, component will be hidden.
83
+ # elem_id: An optional string that is assigned as the id of this component in the HTML DOM. Can be used for targeting CSS styles.
84
+ # elem_classes: An optional list of strings that are assigned as the classes of this component in the HTML DOM. Can be used for targeting CSS styles.
85
+ # """
86
+ # if color_map is not None:
87
+ # warnings.warn(
88
+ # "The 'color_map' parameter has been deprecated.",
89
+ # )
90
+ # #self.md = utils.get_markdown_parser()
91
+ # self.md = Markdown(extras=["fenced-code-blocks", "tables", "break-on-newline"])
92
+ # self.select: EventListenerMethod
93
+ # """
94
+ # Event listener for when the user selects message from Chatbot.
95
+ # Uses event data gradio.SelectData to carry `value` referring to text of selected message, and `index` tuple to refer to [message, participant] index.
96
+ # See EventData documentation on how to use this event data.
97
+ # """
98
+ #
99
+ # IOComponent.__init__(
100
+ # self,
101
+ # label=label,
102
+ # every=every,
103
+ # show_label=show_label,
104
+ # visible=visible,
105
+ # elem_id=elem_id,
106
+ # elem_classes=elem_classes,
107
+ # value=value,
108
+ # **kwargs,
109
+ # )
110
+ #
111
+ # def get_config(self):
112
+ # return {
113
+ # "value": self.value,
114
+ # # "selectable": self.selectable,
115
+ # **IOComponent.get_config(self),
116
+ # }
117
+ #
118
+ # @staticmethod
119
+ # def update(
120
+ # value: Any | Literal[_Keywords.NO_VALUE] | None = _Keywords.NO_VALUE,
121
+ # label: str | None = None,
122
+ # show_label: bool | None = None,
123
+ # visible: bool | None = None,
124
+ # ):
125
+ # updated_config = {
126
+ # "label": label,
127
+ # "show_label": show_label,
128
+ # "visible": visible,
129
+ # "value": value,
130
+ # "__type__": "update",
131
+ # }
132
+ # return updated_config
133
+ #
134
+ # def _process_chat_messages(
135
+ # self, chat_message: str | Tuple | List | Dict | None
136
+ # ) -> str | Dict | None:
137
+ # if chat_message is None:
138
+ # return None
139
+ # elif isinstance(chat_message, (tuple, list)):
140
+ # #mime_type = processing_utils.get_mimetype(chat_message[0])
141
+ # mime_type = client_utils.get_mimetype(chat_message[0])
142
+ # return {
143
+ # "name": chat_message[0],
144
+ # "mime_type": mime_type,
145
+ # "alt_text": chat_message[1] if len(chat_message) > 1 else None,
146
+ # "data": None, # These last two fields are filled in by the frontend
147
+ # "is_file": True,
148
+ # }
149
+ # elif isinstance(
150
+ # chat_message, dict
151
+ # ): # This happens for previously processed messages
152
+ # return chat_message
153
+ # elif isinstance(chat_message, str):
154
+ # #return self.md.render(chat_message)
155
+ # return str(self.md.convert(chat_message))
156
+ # else:
157
+ # raise ValueError(f"Invalid message for Chatbot component: {chat_message}")
158
+ #
159
+ # def postprocess(
160
+ # self,
161
+ # y: List[
162
+ # Tuple[str | Tuple | List | Dict | None, str | Tuple | List | Dict | None]
163
+ # ],
164
+ # ) -> List[Tuple[str | Dict | None, str | Dict | None]]:
165
+ # """
166
+ # Parameters:
167
+ # y: List of tuples representing the message and response pairs. Each message and response should be a string, which may be in Markdown format. It can also be a tuple whose first element is a string filepath or URL to an image/video/audio, and second (optional) element is the alt text, in which case the media file is displayed. It can also be None, in which case that message is not displayed.
168
+ # Returns:
169
+ # List of tuples representing the message and response. Each message and response will be a string of HTML, or a dictionary with media information.
170
+ # """
171
+ # if y is None:
172
+ # return []
173
+ # processed_messages = []
174
+ # for message_pair in y:
175
+ # assert isinstance(
176
+ # message_pair, (tuple, list)
177
+ # ), f"Expected a list of lists or list of tuples. Received: {message_pair}"
178
+ # assert (
179
+ # len(message_pair) == 2
180
+ # ), f"Expected a list of lists of length 2 or list of tuples of length 2. Received: {message_pair}"
181
+ # processed_messages.append(
182
+ # (
183
+ # #self._process_chat_messages(message_pair[0]),
184
+ # '<pre style="font-family: var(--font)">' +
185
+ # message_pair[0] + "</pre>",
186
+ # self._process_chat_messages(message_pair[1]),
187
+ # )
188
+ # )
189
+ # return processed_messages
190
+ #
191
+ # def style(self, height: int | None = None, **kwargs):
192
+ # """
193
+ # This method can be used to change the appearance of the Chatbot component.
194
+ # """
195
+ # if height is not None:
196
+ # self._style["height"] = height
197
+ # if kwargs.get("color_map") is not None:
198
+ # warnings.warn("The 'color_map' parameter has been deprecated.")
199
+ #
200
+ # Component.style(
201
+ # self,
202
+ # **kwargs,
203
+ # )
204
+ # return self
demo_asset/serve_utils.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import re
4
+ import torch
5
+ import transformers
6
+ import traceback
7
+
8
+ from queue import Queue
9
+ from threading import Thread
10
+
11
+
12
+ def post_process_output(text):
13
+ text = text.strip()
14
+ pattern = re.compile(
15
+ r"<unk>|<pad>|<s>|</s>|\[PAD\]|<\|endoftext\|>|\[UNK\]|\[CLS\]|\[MASK\]|<\|startofpiece\|>|<\|endofpiece\|>|\[gMASK\]|\[sMASK\]"
16
+ )
17
+ text = pattern.sub("", text.strip()).strip()
18
+ return text
19
+
20
+
21
+ def post_process_code(code):
22
+ sep = "\n```"
23
+ if sep in code:
24
+ blocks = code.split(sep)
25
+ if len(blocks) % 2 == 1:
26
+ for i in range(1, len(blocks), 2):
27
+ blocks[i] = blocks[i].replace("\\_", "_")
28
+ code = sep.join(blocks)
29
+ return code
30
+
31
+
32
+ class Stream(transformers.StoppingCriteria):
33
+ def __init__(self, callback_func=None):
34
+ self.callback_func = callback_func
35
+
36
+ def __call__(self, input_ids, scores) -> bool:
37
+ if self.callback_func is not None:
38
+ self.callback_func(input_ids[0])
39
+ return False
40
+
41
+
42
+ class Iteratorize:
43
+
44
+ """
45
+ Transforms a function that takes a callback
46
+ into a lazy iterator (generator).
47
+ """
48
+
49
+ def __init__(self, func, kwargs={}, callback=None):
50
+ self.mfunc = func
51
+ self.c_callback = callback
52
+ self.q = Queue()
53
+ self.sentinel = object()
54
+ self.kwargs = kwargs
55
+ self.stop_now = False
56
+ print('init')
57
+
58
+ def _callback(val):
59
+ if self.stop_now:
60
+ raise ValueError
61
+ self.q.put(val)
62
+
63
+ def gentask():
64
+ try:
65
+ ret = self.mfunc(callback=_callback, **self.kwargs)
66
+ except ValueError:
67
+ pass
68
+ except:
69
+ traceback.print_exc()
70
+ pass
71
+
72
+ self.q.put(self.sentinel)
73
+ if self.c_callback:
74
+ self.c_callback(ret)
75
+
76
+ self.thread = Thread(target=gentask)
77
+ self.thread.start()
78
+
79
+ def __iter__(self):
80
+ return self
81
+
82
+ def __next__(self):
83
+ obj = self.q.get(True, None)
84
+ if obj is self.sentinel:
85
+ print(obj)
86
+ raise StopIteration
87
+ else:
88
+ return obj
89
+
90
+ def __enter__(self):
91
+ return self
92
+
93
+ def __exit__(self, exc_type, exc_val, exc_tb):
94
+ #self.stop_now = True
95
+ pass