jiaqingj commited on
Commit
85a5010
·
1 Parent(s): 8ec26be
POS_classifier.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from nltk.tokenize import word_tokenize
2
+ from nltk import pos_tag
3
+ import torch
4
+ import json
5
+
6
+ def batch_texts_POS_analysis(batch_texts, pos_templete, device="cuda"):
7
+ batch_size = len(batch_texts)
8
+ pos_tags = []
9
+ pos_scores = torch.zeros(batch_size)
10
+
11
+ for b_id in range(batch_size):
12
+ text = batch_texts[b_id]
13
+ words = word_tokenize(text)
14
+ word_tag = pos_tag(words, tagset="universal")
15
+ res_tag = [tag[1] for tag in word_tag]
16
+ total_num = len(pos_templete)
17
+ correct = 0
18
+ if len(res_tag) <= total_num:
19
+ cur_tag = res_tag + [""] * (len(pos_templete)-len(res_tag))
20
+ else:
21
+ cur_tag = res_tag[:total_num]
22
+ for word_id in range(len(cur_tag)):
23
+ if pos_templete[word_id]=="":
24
+ correct += 1
25
+ elif cur_tag[word_id] in pos_templete[word_id]:
26
+ correct +=1
27
+ acc = correct/total_num
28
+ pos_tags.append(res_tag)
29
+ pos_scores[b_id] = acc
30
+
31
+ return pos_tags, pos_scores
32
+
33
+ def text_POS_analysis(text):
34
+ words = word_tokenize(text)
35
+ word_tag = pos_tag(words, tagset="universal")
36
+ res_tag = [tag[1] for tag in word_tag]
37
+
38
+ return res_tag
39
+
40
+ if __name__=="__main__":
41
+ batch_texts = ["A cat sitting in the bed.",
42
+ "Two men in a nice hotel room one playing a video game with a remote control.",
43
+ "The man sitting in the chair feels like an invisible,dead man."]
44
+ pos_templete = ['DET', 'NOUN', 'ADP', 'ADJ', 'NOUN', '.', 'NOUN', 'CONJ', 'NOUN', 'ADP', 'PRON', '.']
45
+
46
+ batch_texts_POS_analysis(batch_texts, pos_templete, device="cuda")
47
+ cur_path = "iter_15.json"
48
+ all_caption = []
49
+
50
+ with open(cur_path, "r") as cur_json_file:
51
+ all_res = list(json.load(cur_json_file).values())
52
+ for res in all_res:
53
+ if isinstance(res, list):
54
+ all_caption += res
55
+ else:
56
+ all_caption.append(res)
57
+ pos_tags, pos_scores = batch_texts_POS_analysis(all_caption, pos_templete, device="cuda")
58
+ word_id = 12
59
+ pos_dict = {"ADJ": 0, "ADP": 0, "ADV": 0,
60
+ "CONJ": 0, "DET": 0, "NOUN": 0,"X":0,
61
+ "NUM": 0, "PRT": 0, "PRON": 0, "VERB": 0, ".": 0}
62
+ for pos_tag in pos_tags:
63
+ if word_id < len(pos_tag):
64
+ pos_dict[pos_tag[word_id]] += 1
65
+ print(1)
66
+
67
+
68
+
69
+
app.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils import create_logger, set_seed, format_output
2
+ import os
3
+ import time
4
+ import argparse
5
+ import json
6
+ from PIL import Image
7
+ import torch
8
+ import gradio as gr
9
+ import nltk
10
+
11
+ from clip.clip import CLIP
12
+ from gen_utils import generate_caption
13
+ from control_gen_utils import control_generate_caption
14
+ from transformers import AutoModelForMaskedLM, AutoTokenizer
15
+
16
+
17
+ def get_args():
18
+ parser = argparse.ArgumentParser()
19
+
20
+ parser.add_argument("--seed", type=int, default=42)
21
+ parser.add_argument("--batch_size", type=int, default=1, help = "Only supports batch_size=1 currently.")
22
+ parser.add_argument("--device", type=str,
23
+ default='cpu',choices=['cuda','cpu'])
24
+
25
+ ## Generation and Controllable Type
26
+ parser.add_argument('--run_type',
27
+ default='caption',
28
+ nargs='?',
29
+ choices=['caption', 'controllable'])
30
+ parser.add_argument('--prompt',
31
+ default='Image of a',type=str)
32
+ parser.add_argument('--order',
33
+ default='shuffle',
34
+ nargs='?',
35
+ choices=['sequential', 'shuffle', 'span', 'random','parallel'],
36
+ help="Generation order of text")
37
+ parser.add_argument('--control_type',
38
+ default='sentiment',
39
+ nargs='?',
40
+ choices=["sentiment","pos"],
41
+ help="which controllable task to conduct")
42
+ parser.add_argument('--pos_type', type=list,
43
+ default=[['DET'], ['ADJ','NOUN'], ['NOUN'],
44
+ ['VERB'], ['VERB'],['ADV'], ['ADP'],
45
+ ['DET','NOUN'], ['NOUN'], ['NOUN','.'],
46
+ ['.','NOUN'],['.','NOUN']],
47
+ help="predefined part-of-speech templete")
48
+ parser.add_argument('--sentiment_type',
49
+ default="positive",
50
+ nargs='?',
51
+ choices=["positive", "negative"])
52
+ parser.add_argument('--samples_num',
53
+ default=2,type=int)
54
+
55
+ ## Hyperparameters
56
+ parser.add_argument("--sentence_len", type=int, default=10)
57
+ parser.add_argument("--candidate_k", type=int, default=200)
58
+ parser.add_argument("--alpha", type=float, default=0.02, help="weight for fluency")
59
+ parser.add_argument("--beta", type=float, default=2.0, help="weight for image-matching degree")
60
+ parser.add_argument("--gamma", type=float, default=5.0, help="weight for controllable degree")
61
+ parser.add_argument("--lm_temperature", type=float, default=0.1)
62
+ parser.add_argument("--num_iterations", type=int, default=1, help="predefined iterations for Gibbs Sampling")
63
+
64
+ ## Models and Paths
65
+ parser.add_argument("--lm_model", type=str, default='bert-base-uncased',
66
+ help="Path to language model") # bert,roberta
67
+ parser.add_argument("--match_model", type=str, default='clip-vit-base-patch32',
68
+ help="Path to Image-Text model") # clip,align
69
+ parser.add_argument("--caption_img_path", type=str, default='./examples/girl.jpg',
70
+ help="file path of the image for captioning")
71
+ parser.add_argument("--stop_words_path", type=str, default='stop_words.txt',
72
+ help="Path to stop_words.txt")
73
+ parser.add_argument("--add_extra_stopwords", type=list, default=[],
74
+ help="you can add some extra stop words")
75
+
76
+ args = parser.parse_args()
77
+
78
+ return args
79
+
80
+ def run_caption(args, image, lm_model, lm_tokenizer, clip, token_mask, logger):
81
+ FinalCaptionList = []
82
+ BestCaptionList = []
83
+ # logger.info(f"Processing: {image_path}")
84
+ image_instance = image.convert("RGB")
85
+ for sample_id in range(args.samples_num):
86
+ logger.info(f"Sample {sample_id}: ")
87
+ gen_texts, clip_scores = generate_caption(lm_model, clip, lm_tokenizer, image_instance, token_mask, logger,
88
+ prompt=args.prompt, batch_size=args.batch_size, max_len=args.sentence_len,
89
+ top_k=args.candidate_k, temperature=args.lm_temperature,
90
+ max_iter=args.num_iterations,alpha=args.alpha,beta=args.beta,
91
+ generate_order = args.order)
92
+ FinalCaptionStr = "Sample {}: ".format(sample_id + 1) + gen_texts[-2]
93
+ BestCaptionStr = "Sample {}: ".format(sample_id + 1) + gen_texts[-1]
94
+ FinalCaptionList.append(FinalCaptionStr)
95
+ BestCaptionList.append(BestCaptionStr)
96
+ return FinalCaptionList, BestCaptionList
97
+
98
+
99
+
100
+ def run_control(run_type, args, image, lm_model, lm_tokenizer, clip, token_mask, logger):
101
+ FinalCaptionList = []
102
+ BestCaptionList = []
103
+ # logger.info(f"Processing: {image_path}")
104
+ image_instance = image.convert("RGB")
105
+ for sample_id in range(args.samples_num):
106
+ logger.info(f"Sample {sample_id}: ")
107
+ gen_texts, clip_scores = control_generate_caption(lm_model, clip, lm_tokenizer, image_instance, token_mask, logger,
108
+ prompt=args.prompt, batch_size=args.batch_size, max_len=args.sentence_len,
109
+ top_k=args.candidate_k, temperature=args.lm_temperature,
110
+ max_iter=args.num_iterations, alpha=args.alpha,
111
+ beta=args.beta, gamma=args.gamma,
112
+ ctl_type = args.control_type, style_type=args.sentiment_type,pos_type=args.pos_type, generate_order=args.order)
113
+ FinalCaptionStr = "Sample {}: ".format(sample_id + 1) + gen_texts[-2]
114
+ BestCaptionStr = "Sample {}: ".format(sample_id + 1) + gen_texts[-1]
115
+ FinalCaptionList.append(FinalCaptionStr)
116
+ BestCaptionList.append(BestCaptionStr)
117
+ return FinalCaptionList, BestCaptionList
118
+
119
+ def Demo(RunType, ControlType, SentimentType, Order, Length, NumIterations, SamplesNum, Alpha, Beta, Gamma, Img):
120
+ args = get_args()
121
+ set_seed(args.seed)
122
+
123
+ args.num_iterations = NumIterations
124
+ args.sentence_len = Length
125
+ args.run_type = RunType
126
+ args.control_type = ControlType
127
+ args.sentiment_type = SentimentType
128
+ args.alpha = Alpha
129
+ args.beta = Beta
130
+ args.gamma = Gamma
131
+ args.samples_num = SamplesNum
132
+ args.order = Order
133
+ img = Img
134
+
135
+ run_type = "caption" if args.run_type=="caption" else args.control_type
136
+ if run_type=="sentiment":
137
+ run_type = args.sentiment_type
138
+
139
+ if os.path.exists("logger")== False:
140
+ os.mkdir("logger")
141
+ logger = create_logger(
142
+ "logger",'demo_{}_{}_len{}_topk{}_alpha{}_beta{}_gamma{}_lmtemp{}_{}.log'.format(
143
+ run_type, args.order,args.sentence_len,
144
+ args.candidate_k, args.alpha,args.beta,args.gamma,args.lm_temperature,
145
+ time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())))
146
+
147
+ logger.info(f"Generating order:{args.order}")
148
+ logger.info(f"Run type:{run_type}")
149
+ logger.info(args)
150
+
151
+ # Load pre-trained model (weights)
152
+ lm_model = AutoModelForMaskedLM.from_pretrained(args.lm_model)
153
+ lm_tokenizer = AutoTokenizer.from_pretrained(args.lm_model)
154
+ lm_model.eval()
155
+ clip = CLIP(args.match_model)
156
+ clip.eval()
157
+
158
+ lm_model = lm_model.to(args.device)
159
+ clip = clip.to(args.device)
160
+
161
+ ## Remove stop words, token mask
162
+ with open(args.stop_words_path,'r',encoding='utf-8') as stop_words_file:
163
+ stop_words = stop_words_file.readlines()
164
+ stop_words_ = [stop_word.rstrip('\n') for stop_word in stop_words]
165
+ stop_words_ += args.add_extra_stopwords
166
+ stop_ids = lm_tokenizer.convert_tokens_to_ids(stop_words_)
167
+ token_mask = torch.ones((1,lm_tokenizer.vocab_size))
168
+ for stop_id in stop_ids:
169
+ token_mask[0,stop_id]=0
170
+ token_mask = token_mask.to(args.device)
171
+
172
+ if args.run_type == 'caption':
173
+ FinalCaption, BestCaption = run_caption(args, img, lm_model, lm_tokenizer, clip, token_mask, logger)
174
+ elif args.run_type == 'controllable':
175
+ FinalCaption, BestCaption = run_control(run_type, args, img, lm_model, lm_tokenizer, clip, token_mask, logger)
176
+ else:
177
+ raise Exception('run_type must be caption or controllable!')
178
+
179
+ logger.handlers = []
180
+
181
+ FinalCaptionFormat, BestCaptionFormat = format_output(SamplesNum, FinalCaption, BestCaption)
182
+ return FinalCaptionFormat, BestCaptionFormat
183
+
184
+
185
+ def RunTypeChange(choice):
186
+ if choice == "caption":
187
+ return gr.update(visible=False)
188
+ elif choice == "controllable":
189
+ return gr.update(visible=True)
190
+
191
+
192
+ def ControlTypeChange(choice):
193
+ if choice == "pos":
194
+ return gr.update(visible=False)
195
+ elif choice == "sentiment":
196
+ return gr.update(visible=True)
197
+
198
+ with gr.Blocks() as demo:
199
+
200
+ gr.Markdown("""
201
+ # ConZIC
202
+ ### Controllable Zero-shot Image Captioning by Sampling-Based Polishing
203
+ """)
204
+
205
+ with gr.Row():
206
+ with gr.Column():
207
+ RunType = gr.Radio(
208
+ ["caption", "controllable"], value="caption", label="Run Type", info="Select the Run Type"
209
+ )
210
+ ControlType = gr.Radio(
211
+ ["sentiment", "pos"], value="sentiment", label="Control Type", info="Select the Control Type",
212
+ visible=False, interactive=True
213
+ )
214
+ SentimentType = gr.Radio(
215
+ ["positive", "negative"], value="positive", label="Sentiment Type", info="Select the Sentiment Type",
216
+ visible=False, interactive=True
217
+ )
218
+ Order = gr.Radio(
219
+ ["sequential", "shuffle", "random"], value="shuffle", label="Order", info="Generation order of text"
220
+ )
221
+
222
+ RunType.change(fn = RunTypeChange, inputs = RunType, outputs = SentimentType)
223
+ RunType.change(fn = RunTypeChange, inputs = RunType, outputs = ControlType)
224
+ ControlType.change(fn = ControlTypeChange, inputs = ControlType, outputs = SentimentType)
225
+
226
+ with gr.Row():
227
+ Length = gr.Slider(
228
+ 5, 15, value=10, label="Sentence Length", info="Choose betwen 5 and 15", step=1
229
+ )
230
+ NumIterations = gr.Slider(
231
+ 1, 15, value=10, label="Num Iterations", info="predefined iterations for Gibbs Sampling", step=1
232
+ )
233
+ with gr.Row():
234
+ SamplesNum = gr.Slider(
235
+ 1, 5, value=2, label="Samples Num", step=1
236
+ )
237
+ Alpha = gr.Slider(
238
+ 0, 1, value=0.02, label="Alpha", info="Weight for fluency", step=0.01
239
+ )
240
+ with gr.Row():
241
+ Beta = gr.Slider(
242
+ 1, 5, value=2, label="Beta", info="Weight for image-matching degree", step=0.5
243
+ )
244
+ Gamma = gr.Slider(
245
+ 1, 10, value=5, label="Gamma", info="weight for controllable degree", step=0.5
246
+ )
247
+ with gr.Column():
248
+
249
+ Img = gr.Image(label="Upload Picture", type = "pil")
250
+
251
+ FinalCaption = gr.Textbox(label="Final Caption", lines=5, placeholder="Final Caption")
252
+ BestCaption = gr.Textbox(label="Best Caption", lines=5, placeholder="Best Caption")
253
+ with gr.Row():
254
+ gen_button = gr.Button("Submit")
255
+ clear_button = gr.Button("Reset")
256
+
257
+ gen_button.click(
258
+ fn = Demo,
259
+ inputs = [
260
+ RunType, ControlType, SentimentType, Order, Length, NumIterations, SamplesNum, Alpha, Beta, Gamma, Img
261
+ ],
262
+ outputs = [
263
+ FinalCaption, BestCaption
264
+ ]
265
+ )
266
+ clear_button.click(
267
+ fn = lambda : [gr.Radio.update(value = 'caption'), gr.Radio.update(value = 'pos'), gr.Radio.update(value = 'positive'),
268
+ gr.Radio.update(value = 'shuffle'), gr.Slider.update(value = 10), gr.Slider.update(value = 10),
269
+ gr.Slider.update(value = 2), gr.Slider.update(value = 0.02), gr.Slider.update(value = 2),
270
+ gr.Slider.update(value = 5)
271
+ ],
272
+ inputs = [
273
+ ],
274
+ outputs = [
275
+ RunType, ControlType, SentimentType, Order, Length, NumIterations, SamplesNum, Alpha, Beta, Gamma
276
+ ]
277
+ )
278
+ if __name__ == "__main__":
279
+
280
+ nltk.download('wordnet')
281
+ nltk.download('punkt')
282
+ nltk.download('averaged_perceptron_tagger')
283
+ nltk.download('sentiwordnet')
284
+
285
+ demo.launch()
clip/build_text_index.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import torch
3
+ import numpy as np
4
+ import progressbar
5
+ import os
6
+
7
+ def parse_config():
8
+ parser = argparse.ArgumentParser()
9
+ parser.add_argument("--clip_name", type=str, default="openai/clip-vit-base-patch32")
10
+ parser.add_argument("--text_file_path", type=str)
11
+ # save configuration
12
+ parser.add_argument("--save_index_prefix", type=str, help='where to save the mips index')
13
+ parser.add_argument("--save_index_name", type=str)
14
+ parser.add_argument("--save_mapping_dict_name", type=str,
15
+ help="a json file that stores a dictory. The dictory contains mapping between mips index and caption text")
16
+ # inference configuration
17
+ parser.add_argument("--batch_size", type=int, help="the batch size used to conduct inference with CLIP")
18
+ return parser.parse_args()
19
+
20
+ def load_batch_text(text_file_path, batch_size):
21
+ import json
22
+ with open(text_file_path) as f:
23
+ item_list = json.load(f)
24
+
25
+ text_list = []
26
+ for item in item_list:
27
+ captions = item["captions"]
28
+ for cap in captions:
29
+ text_list.append(cap)
30
+ print ('Number of text instances is {}'.format(len(text_list)))
31
+
32
+ data_num = len(text_list)
33
+ batch_num = data_num // batch_size
34
+ batch_text_list = []
35
+ s_idx, e_idx = 0, batch_size
36
+ for p_idx in range(batch_num):
37
+ one_batch_text_list = []
38
+ for idx in range(s_idx, e_idx):
39
+ one_batch_text_list.append(text_list[idx])
40
+ batch_text_list.append(one_batch_text_list)
41
+ return batch_text_list
42
+
43
+
44
+ import argparse
45
+ if __name__ == '__main__':
46
+ if torch.cuda.is_available():
47
+ print ('Cuda is available.')
48
+ cuda_available = torch.cuda.is_available()
49
+ args = parse_config()
50
+ device = torch.device('cuda')
51
+
52
+ import os
53
+ if os.path.exists(args.save_index_prefix):
54
+ pass
55
+ else: # recursively construct directory
56
+ os.makedirs(args.save_index_prefix, exist_ok=True)
57
+
58
+ print ('Loading CLIP...')
59
+ from clip import CLIP
60
+ model = CLIP(args.clip_name)
61
+ if cuda_available:
62
+ model = model.cuda(device)
63
+ model.eval()
64
+ print ('CLIP loaded!')
65
+
66
+ print ('Loading text data...')
67
+ batch_text_list = load_batch_text(args.text_file_path, args.batch_size)
68
+ print ('Text data loaded.')
69
+
70
+ res_text_vec_list, res_text_list = [], []
71
+ batch_num = len(batch_text_list)
72
+ print ('Number of batches is {}'.format(batch_num))
73
+ print ('Start inference...')
74
+ p = progressbar.ProgressBar(batch_num)
75
+ p.start()
76
+ with torch.no_grad():
77
+ for p_idx in range(batch_num):
78
+ p.update(p_idx)
79
+ one_text_batch = batch_text_list[p_idx]
80
+ one_batch_vec = model.compute_batch_index_text_representation(one_text_batch).detach().cpu()
81
+ one_batch_vec_list = one_batch_vec.unbind(dim=0)
82
+ bsz = len(one_batch_vec_list)
83
+ for k in range(bsz):
84
+ res_text_vec_list.append(one_batch_vec_list[k].numpy())
85
+ res_text_list.append(one_text_batch[k])
86
+ p.finish()
87
+ assert len(res_text_vec_list) == len(res_text_list)
88
+ print ('Inference completed!')
89
+
90
+ index_text_mapping_dict = {}
91
+ for k in range(len(res_text_list)):
92
+ index_text_mapping_dict[k] = res_text_list[k]
93
+ mapping_list_save_path = args.save_index_prefix + '/' + args.save_mapping_dict_name
94
+ import json
95
+ with open(mapping_list_save_path, 'w') as outfile:
96
+ json.dump(index_text_mapping_dict, outfile, indent=4)
97
+ print ('Mapping dictionary saved!')
98
+
99
+ print ('Start buiding index...')
100
+ index_save_path = args.save_index_prefix + '/' + args.save_index_name
101
+ with open(index_save_path, 'w', encoding = 'utf8') as o:
102
+ for vec in res_text_vec_list:
103
+ one_text = ' '.join([str(num) for num in vec]).strip()
104
+ o.writelines(one_text + '\n')
105
+ print ('Index completed!')
clip/clip.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import requests
3
+ from torch import nn
4
+ from PIL import Image
5
+
6
+ class CLIP(nn.Module):
7
+ def __init__(self, model_name):
8
+ super(CLIP, self).__init__()
9
+ # model name: e.g. openai/clip-vit-base-patch32
10
+ print ('Initializing CLIP model...')
11
+ from transformers import CLIPProcessor, CLIPModel
12
+ self.model = CLIPModel.from_pretrained(model_name)
13
+ self.model.eval()
14
+ self.processor = CLIPProcessor.from_pretrained(model_name)
15
+ from transformers import CLIPTokenizer
16
+ self.tokenizer = CLIPTokenizer.from_pretrained(model_name)
17
+ self.cuda_has_been_checked = False
18
+ print ('CLIP model initialized.')
19
+
20
+ def check_cuda(self):
21
+ self.cuda_available = next(self.model.parameters()).is_cuda
22
+ self.device = next(self.model.parameters()).get_device()
23
+ if self.cuda_available:
24
+ print ('Cuda is available.')
25
+ print ('Device is {}'.format(self.device))
26
+ else:
27
+ print ('Cuda is not available.')
28
+ print ('Device is {}'.format(self.device))
29
+
30
+ @torch.no_grad()
31
+ def compute_image_representation_from_image_path(self, image_path):
32
+ if not self.cuda_has_been_checked:
33
+ self.check_cuda()
34
+ self.cuda_has_been_checked = True
35
+ else:
36
+ pass
37
+ # image_path: the path of the image
38
+ image = Image.open(image_path)
39
+ inputs = self.processor(images=image, return_tensors="pt")
40
+ pixel_values = inputs['pixel_values']
41
+ if self.cuda_available:
42
+ pixel_values = pixel_values.cuda(self.device)
43
+ visual_outputs = self.model.vision_model(pixel_values=pixel_values)
44
+ image_embeds = visual_outputs[1]
45
+ image_embeds = self.model.visual_projection(image_embeds) # [1 x embed_dim]
46
+ return image_embeds
47
+
48
+ def compute_image_representation_from_image_instance(self, image):
49
+ if not self.cuda_has_been_checked:
50
+ self.check_cuda()
51
+ self.cuda_has_been_checked = True
52
+ else:
53
+ pass
54
+ # image_path: the path of the image
55
+ inputs = self.processor(images=image, return_tensors="pt")
56
+ pixel_values = inputs['pixel_values']
57
+ if self.cuda_available:
58
+ pixel_values = pixel_values.cuda(self.device)
59
+ visual_outputs = self.model.vision_model(pixel_values=pixel_values)
60
+ image_embeds = visual_outputs[1]
61
+ image_embeds = self.model.visual_projection(image_embeds) # [1 x embed_dim]
62
+ return image_embeds
63
+
64
+ def compute_text_representation(self, text_list):
65
+ if not self.cuda_has_been_checked:
66
+ self.check_cuda()
67
+ self.cuda_has_been_checked = True
68
+ else:
69
+ pass
70
+ # text_list: a list of text
71
+ text_inputs = self.tokenizer(text_list, padding=True, return_tensors="pt",
72
+ max_length=self.tokenizer.max_len_single_sentence + 2, truncation=True)
73
+ # self.tokenizer.max_len_single_sentence + 2 = 77
74
+ input_ids, attention_mask = text_inputs['input_ids'], text_inputs['attention_mask']
75
+ if self.cuda_available:
76
+ input_ids = input_ids.cuda(self.device)
77
+ attention_mask = attention_mask.cuda(self.device)
78
+ text_outputs = self.model.text_model(
79
+ input_ids=input_ids,
80
+ attention_mask=attention_mask
81
+ )
82
+ text_embeds = text_outputs[1]
83
+ text_embeds = self.model.text_projection(text_embeds)
84
+ return text_embeds
85
+
86
+ def compute_image_text_similarity_via_embeddings(self, image_embeds, text_embeds):
87
+ '''
88
+ image_embeds: 1 x embed_dim
89
+ text_embeds: len(text_list) x embed_dim
90
+ '''
91
+ image_embeds = image_embeds / image_embeds.norm(dim=-1, keepdim=True)
92
+ text_embeds = text_embeds / text_embeds.norm(dim=-1, keepdim=True)
93
+ logit_scale = self.model.logit_scale.exp()
94
+ logits_per_text = torch.matmul(text_embeds, image_embeds.t()) * logit_scale
95
+ logits_per_image = logits_per_text.T
96
+ return logits_per_image.softmax(dim=1), logits_per_image/logit_scale # 1 x len(text_list)
97
+
98
+ def compute_image_text_similarity_via_raw_text(self, image_embeds, text_list):
99
+ text_embeds = self.compute_text_representation(text_list)
100
+ return self.compute_image_text_similarity_via_embeddings(image_embeds, text_embeds)
101
+
102
+ ### -------------------- functions for building index ---------------------- ###
103
+ def compute_batch_index_image_features(self, image_list):
104
+ '''
105
+ # list of image instances
106
+ '''
107
+ if not self.cuda_has_been_checked:
108
+ self.check_cuda()
109
+ self.cuda_has_been_checked = True
110
+ else:
111
+ pass
112
+ # image_path: the path of the image
113
+ inputs = self.processor(images=image_list, return_tensors="pt")
114
+ pixel_values = inputs['pixel_values']
115
+ if self.cuda_available:
116
+ pixel_values = pixel_values.cuda(self.device)
117
+ visual_outputs = self.model.vision_model(pixel_values=pixel_values)
118
+ image_embeds = visual_outputs[1]
119
+ image_embeds = self.model.visual_projection(image_embeds) # [1 x embed_dim]
120
+ return image_embeds # len(image_list) x embed_dim
121
+
122
+ def compute_batch_index_text_representation(self, text_list):
123
+ if not self.cuda_has_been_checked:
124
+ self.check_cuda()
125
+ self.cuda_has_been_checked = True
126
+ else:
127
+ pass
128
+ # text_list: a list of text
129
+ #text_inputs = self.tokenizer(text_list, padding=True, return_tensors="pt")
130
+ text_inputs = self.tokenizer(text_list, padding=True, return_tensors="pt",
131
+ max_length=self.tokenizer.max_len_single_sentence + 2, truncation=True)
132
+ input_ids, attention_mask = text_inputs['input_ids'], text_inputs['attention_mask']
133
+ if self.cuda_available:
134
+ input_ids = input_ids.cuda(self.device)
135
+ attention_mask = attention_mask.cuda(self.device)
136
+ text_outputs = self.model.text_model(
137
+ input_ids=input_ids,
138
+ attention_mask=attention_mask
139
+ )
140
+ text_embeds = text_outputs[1]
141
+ text_embeds = self.model.text_projection(text_embeds)
142
+ return text_embeds
143
+ #logit_scale = self.model.logit_scale.exp()
144
+ #text_embeds = text_embeds * logit_scale
145
+ #return text_embeds
146
+
clip/clipretrieval.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import copy
3
+ import torch
4
+ import progressbar
5
+ import numpy as np
6
+ from PIL import Image
7
+
8
+ class CLIPIndex:
9
+ def __init__(self, index_matrix_path, mapping_dict_path, clip):
10
+ '''
11
+ index_path: the pre-trained index
12
+ mapping_dict_path: the pre-indexed mapping dictionary
13
+ clip: the pre-trained clip model
14
+ '''
15
+ print ('Loading index...')
16
+ self.index_matrix = self.normalization(self.load_matrix(index_matrix_path))
17
+ print ('Index loaded.')
18
+ print (self.index_matrix.shape)
19
+ with open(mapping_dict_path) as f:
20
+ self.mapping_dict = json.load(f)
21
+ self.clip = clip
22
+
23
+ def load_matrix(self, in_f):
24
+ matrix_list = []
25
+ with open(in_f, 'r', encoding = 'utf8') as i:
26
+ lines = i.readlines()
27
+ for l in lines:
28
+ one_vec = [float(num) for num in l.strip('\n').split()]
29
+ matrix_list.append(one_vec)
30
+ return np.array(matrix_list)
31
+
32
+ def normalization(self, matrix):
33
+ '''
34
+ matrix: num_instance x num_feature
35
+ '''
36
+ return matrix / np.linalg.norm(matrix, axis=1, keepdims=True)
37
+
38
+ def get_image_representation(self, image_path):
39
+ image_instance = Image.open(image_path)
40
+ image_vec = self.clip.compute_batch_index_image_features([image_instance]).detach().cpu().numpy()
41
+ image_vec = self.normalization(image_vec)
42
+ return image_vec
43
+
44
+ def search_text(self, image_path):
45
+ image_vec = self.get_image_representation(image_path)
46
+ sort_idx_list = np.matmul(image_vec, self.index_matrix.transpose())[0].argsort()[::-1]
47
+ top_idx = sort_idx_list[0]
48
+ return self.mapping_dict[str(top_idx)]
49
+
50
+
51
+ def parse_config():
52
+ parser = argparse.ArgumentParser()
53
+ parser.add_argument("--clip_name", type=str)
54
+ parser.add_argument("--test_image_prefix_path", type=str, help="the folder that stores all test images")
55
+ parser.add_argument("--test_path", type=str)
56
+ # index configuration
57
+ parser.add_argument("--index_matrix_path", type=str)
58
+ parser.add_argument("--mapping_dict_path", type=str)
59
+ # save configuration
60
+ parser.add_argument("--save_path_prefix", type=str, help="save the result in which directory")
61
+ parser.add_argument("--save_name", type=str, help="the name of the saved file")
62
+ return parser.parse_args()
63
+
64
+ import argparse
65
+ if __name__ == '__main__':
66
+ if torch.cuda.is_available():
67
+ print ('Cuda is available.')
68
+ cuda_available = torch.cuda.is_available()
69
+ args = parse_config()
70
+ device = torch.device('cuda')
71
+
72
+ save_path_prefix = args.save_path_prefix
73
+ import os
74
+ if os.path.exists(save_path_prefix):
75
+ pass
76
+ else: # recursively construct directory
77
+ os.makedirs(save_path_prefix, exist_ok=True)
78
+ # parse save name
79
+ save_name = args.save_name
80
+ full_save_path = save_path_prefix + '/' + save_name
81
+ print ('full save path is {}'.format(full_save_path))
82
+
83
+ print ('Loading CLIP...')
84
+ from clip import CLIP
85
+ clip = CLIP(args.clip_name)
86
+ if cuda_available:
87
+ clip = clip.cuda(device)
88
+ clip.eval()
89
+ print ('CLIP loaded!')
90
+
91
+ clipindex = CLIPIndex(args.index_matrix_path, args.mapping_dict_path, clip)
92
+
93
+ print ('Loading data...')
94
+ import json
95
+ with open(args.test_path) as f:
96
+ item_list = json.load(f)
97
+ print ('Data loaded.')
98
+ print ('Number of test instances is {}'.format(len(item_list)))
99
+
100
+ result_list = []
101
+ invalid_num = 0
102
+ print ('----------------------------------------------------------------')
103
+ with torch.no_grad():
104
+ test_num = len(item_list)
105
+ #test_num = 10
106
+ print ('Number of inference instances is {}'.format(test_num))
107
+ p = progressbar.ProgressBar(test_num)
108
+ p.start()
109
+ for p_idx in range(test_num):
110
+ p.update(p_idx)
111
+ one_test_dict = item_list[p_idx]
112
+
113
+ one_res_dict = {
114
+ 'split':one_test_dict['split'],
115
+ 'image_name':one_test_dict['image_name'],
116
+ #'file_path':one_test_dict['file_path'],
117
+ 'captions':one_test_dict['captions']
118
+ }
119
+
120
+ image_full_path = args.test_image_prefix_path + '/' + one_test_dict['image_name']
121
+ try:
122
+ output_text = clipindex.search_text(image_full_path)
123
+ one_res_dict['prediction'] = output_text
124
+ result_list.append(one_res_dict)
125
+ except:
126
+ invalid_num += 1
127
+ print ('invalid number is {}'.format(invalid_num))
128
+ continue
129
+ p.finish()
130
+ print ('Inference completed!')
131
+
132
+ import json
133
+ with open(full_save_path, 'w') as outfile:
134
+ json.dump(result_list, outfile, indent=4)
135
+
control_gen_utils.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn.functional as F
4
+ import random
5
+ from utils import get_init_text, update_token_mask
6
+ from sentiments_classifer import batch_texts_POS_Sentiments_analysis
7
+ from POS_classifier import batch_texts_POS_analysis
8
+
9
+ import time
10
+
11
+
12
+ def generate_caption_step(out, gen_idx, mask, temperature=None, top_k=0):
13
+ """ Generate a word from out[gen_idx]
14
+
15
+ args:
16
+ - out (torch.Tensor): tensor of logits of size batch_size x seq_len x vocab_size
17
+ - gen_idx (int): location for which to generate for
18
+ - top_k (int): if >0, only sample from the top k most probable words
19
+ """
20
+ logits = out[:, gen_idx]
21
+ if temperature is not None:
22
+ logits = logits / temperature
23
+
24
+ probs = F.softmax(logits, dim=-1)
25
+ probs *= (mask)
26
+ top_k_probs, top_k_ids = probs.topk(top_k, dim=-1)
27
+
28
+ # top_k_probs = torch.gather(probs, dim=1, index=top_k_ids)
29
+ return top_k_probs, top_k_ids
30
+
31
+ def sentiment_sequential_generation(model, clip, tokenizer,image_instance,token_mask, prompt, logger,
32
+ max_len=15, top_k=0,temperature=None, alpha=0.7,beta=1,
33
+ max_iters=20,batch_size=1,
34
+ verbose=True,gamma=5, ctl_signal="positive"):
35
+ """ Generate one word at a time, in L->R order """
36
+ seed_len = len(prompt.split())+1
37
+ batch = get_init_text(tokenizer,prompt, max_len, batch_size)
38
+ image_embeds = clip.compute_image_representation_from_image_instance(image_instance)
39
+ clip_score_sequence = []
40
+ best_clip_score = 0
41
+ inp = torch.tensor(batch).to(image_embeds.device)
42
+ gen_texts = []
43
+ for iter_num in range(max_iters):
44
+ for ii in range(max_len):
45
+ token_mask = update_token_mask(tokenizer, token_mask, max_len, ii)
46
+ for jj in range(batch_size):
47
+ inp[jj][seed_len + ii] = tokenizer.mask_token_id
48
+ inp_ = inp.clone().detach()
49
+ out = model(inp).logits
50
+ probs, idxs = generate_caption_step(out, gen_idx=seed_len + ii,mask=token_mask, top_k=top_k, temperature=temperature)
51
+ for jj in range(batch_size):
52
+ topk_inp = inp_.repeat(top_k, 1)
53
+ idxs_ = (idxs[jj] * token_mask[0][idxs[jj]]).long()
54
+ topk_inp[:, ii + seed_len] = idxs_
55
+ repeats = ((idxs_[:, None] == topk_inp).float().sum(1) - 1) # *pos_mask
56
+ batch_text_list = tokenizer.batch_decode(topk_inp, skip_special_tokens=True)
57
+ sentiment_probs, sentiment_scores, pos_tags, wordnet_pos_tags = batch_texts_POS_Sentiments_analysis(
58
+ batch_text_list, 1, topk_inp.device, sentiment_ctl=ctl_signal)
59
+ clip_score, clip_ref = clip.compute_image_text_similarity_via_raw_text(image_embeds, batch_text_list)
60
+ final_score = alpha * probs + beta * clip_score + gamma * sentiment_probs[None,:] + 0.1 * (1-torch.exp(repeats))[None,:]
61
+ best_clip_id = final_score.argmax()
62
+
63
+ inp[jj][seed_len + ii] = idxs_[best_clip_id]
64
+ current_clip_score = clip_ref[jj][best_clip_id]
65
+ current_senti_score = sentiment_scores[best_clip_id]
66
+ clip_score_sequence.append(current_clip_score.cpu().item())
67
+
68
+ if verbose and np.mod(iter_num + 1, 1) == 0:
69
+ for_print = tokenizer.decode(inp[0])
70
+ cur_text = tokenizer.decode(inp[0],skip_special_tokens=True)
71
+ if best_clip_score < current_clip_score.cpu().item():
72
+ best_clip_score = current_clip_score.cpu().item()
73
+ best_caption = cur_text
74
+ gen_texts.append(cur_text)
75
+ logger.info(f"iter {iter_num + 1}, clip score {current_clip_score:.3f}, ctl score {current_senti_score:.3f}:"+ for_print)
76
+
77
+ gen_texts.append(best_caption)
78
+ clip_score_sequence.append(best_clip_score)
79
+
80
+ return gen_texts, clip_score_sequence
81
+
82
+ def sentiment_shuffle_generation(model, clip, tokenizer,image_instance,token_mask, prompt, logger,
83
+ max_len=15, top_k=0,temperature=None, alpha=0.7,beta=1,
84
+ max_iters=20,batch_size=1,
85
+ verbose=True,gamma=5, ctl_signal="positive"):
86
+ """ Generate one word at a time, in random generation order """
87
+ seed_len = len(prompt.split())+1
88
+ batch = get_init_text(tokenizer,prompt, max_len, batch_size)
89
+ image_embeds = clip.compute_image_representation_from_image_instance(image_instance)
90
+ inp = torch.tensor(batch).to(image_embeds.device)
91
+ clip_score_sequence = []
92
+ best_clip_score = 0
93
+ random_lst = list(range(max_len))
94
+ random.shuffle(random_lst)
95
+ logger.info(f"Order_list:{random_lst}")
96
+ gen_texts = []
97
+ for iter_num in range(max_iters):
98
+ for ii in random_lst:
99
+ token_mask = update_token_mask(tokenizer, token_mask, max_len, ii)
100
+ for jj in range(batch_size):
101
+ inp[jj][seed_len + ii] = tokenizer.mask_token_id
102
+
103
+ inp_ = inp.clone().detach()
104
+ out = model(inp).logits
105
+ probs, idxs = generate_caption_step(out, gen_idx=seed_len + ii,mask=token_mask, top_k=top_k, temperature=temperature)
106
+ for jj in range(batch_size):
107
+ topk_inp = inp_.repeat(top_k, 1)
108
+ idxs_ = (idxs[jj] * token_mask[0][idxs[jj]]).long()
109
+ topk_inp[:, ii + seed_len] = idxs_
110
+ repeats = ((idxs_[:, None] == topk_inp).float().sum(1) - 1) # *pos_mask
111
+ batch_text_list = tokenizer.batch_decode(topk_inp, skip_special_tokens=True)
112
+ sentiment_probs, sentiment_scores, pos_tags, wordnet_pos_tags = batch_texts_POS_Sentiments_analysis(
113
+ batch_text_list, 1, topk_inp.device, sentiment_ctl=ctl_signal)
114
+ batch_text_list = tokenizer.batch_decode(topk_inp, skip_special_tokens=True)
115
+
116
+ clip_score,clip_ref = clip.compute_image_text_similarity_via_raw_text(image_embeds, batch_text_list)
117
+ final_score = alpha * probs + beta * clip_score + gamma * sentiment_probs[None,:] + 0.01 * (1-torch.exp(repeats))[None,:]
118
+ best_clip_id = final_score.argmax()
119
+
120
+ inp[jj][seed_len + ii] = idxs_[best_clip_id]
121
+ current_clip_score = clip_ref[jj][best_clip_id]
122
+ current_senti_score = sentiment_scores[best_clip_id]
123
+ clip_score_sequence.append(current_clip_score.cpu().item())
124
+ if verbose and np.mod(iter_num + 1, 1) == 0:
125
+ for_print = tokenizer.decode(inp[0])
126
+ cur_text = tokenizer.decode(inp[0],skip_special_tokens=True)
127
+ if best_clip_score < current_clip_score.cpu().item():
128
+ best_clip_score = current_clip_score.cpu().item()
129
+ best_caption = cur_text
130
+ gen_texts.append(cur_text)
131
+ logger.info(f"iter {iter_num + 1}, clip score {current_clip_score:.3f}, ctl score {current_senti_score:.3f}:"+ for_print)
132
+ gen_texts.append(best_caption)
133
+ clip_score_sequence.append(best_clip_score)
134
+
135
+ return gen_texts, clip_score_sequence
136
+
137
+ def POS_sequential_generation(model, clip, tokenizer,image_instance,token_mask, prompt, logger,
138
+ max_len=15, top_k=0,temperature=None, alpha=0.7,beta=1,gamma=0.1,
139
+ max_iters=20,batch_size=1,ctl_signal=["DET"],
140
+ verbose=True):
141
+ """ Generate one word at a time, in L->R order """
142
+
143
+ seed_len = len(prompt.split())+1
144
+ templete = False
145
+ logger.info(ctl_signal)
146
+ batch = get_init_text(tokenizer,prompt, max_len, batch_size)
147
+ image_embeds = clip.compute_image_representation_from_image_instance(image_instance)
148
+ clip_score_sequence = []
149
+ best_clip_score = 0
150
+ inp = torch.tensor(batch).to(image_embeds.device)
151
+ gen_texts = []
152
+ for iter_num in range(max_iters):
153
+ for ii in range(max_len):
154
+ token_mask = update_token_mask(tokenizer, token_mask, max_len, ii)
155
+ for jj in range(batch_size):
156
+ inp[jj][seed_len + ii] = tokenizer.mask_token_id
157
+ inp_ = inp.clone().detach()
158
+ out = model(inp).logits
159
+ probs, idxs = generate_caption_step(out, gen_idx=seed_len + ii,mask=token_mask, top_k=top_k, temperature=temperature)
160
+ for jj in range(batch_size):
161
+ topk_inp = inp_.repeat(top_k, 1)
162
+ idxs_ = (idxs[jj] * token_mask[0][idxs[jj]]).long()
163
+ topk_inp[:, ii + seed_len] = idxs_
164
+ batch_text_list = tokenizer.batch_decode(topk_inp, skip_special_tokens=True)
165
+ pos_tags, pos_scores = batch_texts_POS_analysis(batch_text_list, ctl_signal, device=idxs_.device)
166
+ pos_probs = torch.softmax(pos_scores/0.1, dim=-1).to(idxs_.device)
167
+ clip_score, clip_ref = clip.compute_image_text_similarity_via_raw_text(image_embeds, batch_text_list)
168
+ final_score = alpha * probs + beta * clip_score + gamma * pos_probs[None, :]
169
+ best_clip_id = final_score.argmax()
170
+
171
+ inp[jj][seed_len + ii] = idxs_[best_clip_id]
172
+ current_clip_score = clip_ref[jj][best_clip_id]
173
+ current_ctl_score = pos_scores[best_clip_id]
174
+ current_pos_tag = pos_tags[best_clip_id]
175
+ clip_score_sequence.append(current_clip_score.cpu().item())
176
+ if verbose and np.mod(iter_num + 1, 1) == 0:
177
+ for_print = tokenizer.decode(inp[0])
178
+ cur_text = tokenizer.decode(inp[0],skip_special_tokens=True)
179
+ if best_clip_score < current_clip_score.cpu().item():
180
+ best_clip_score = current_clip_score.cpu().item()
181
+ best_ctl_score = current_ctl_score
182
+ best_caption = cur_text
183
+ gen_texts.append(cur_text)
184
+ logger.info(f"iter {iter_num + 1}, clip score {current_clip_score.cpu().item():.3f}, ctl score {current_ctl_score.cpu().item():.3f}: "+ for_print)
185
+ logger.info(current_pos_tag)
186
+
187
+ gen_texts.append(best_caption)
188
+ clip_score_sequence.append(best_clip_score)
189
+
190
+ return gen_texts, clip_score_sequence
191
+
192
+ def control_generate_caption(model, clip, tokenizer,image_instance,token_mask,logger,
193
+ prompt="", batch_size=10, max_len=25,
194
+ top_k=100, temperature=1.0, max_iter=500,alpha=0.7,beta=1,gamma=5,
195
+ ctl_type="sentiment", style_type="positive",pos_type=None,generate_order="sequential"):
196
+ # controllable funcitions to call
197
+ start_time = time.time()
198
+ if ctl_type=="sentiment": #sentiment control
199
+ if generate_order=="sequential":
200
+ generate_texts, clip_scores = sentiment_sequential_generation(model, clip, tokenizer, image_instance, token_mask, prompt, logger,
201
+ batch_size=batch_size, max_len=max_len, top_k=top_k,
202
+ alpha=alpha,beta=beta,gamma=gamma,temperature=temperature,
203
+ max_iters=max_iter, ctl_signal=style_type)
204
+ else:
205
+ generate_texts, clip_scores = sentiment_shuffle_generation(model, clip, tokenizer, image_instance,
206
+ token_mask, prompt, logger,
207
+ batch_size=batch_size, max_len=max_len,
208
+ top_k=top_k,
209
+ alpha=alpha, beta=beta, gamma=gamma,
210
+ temperature=temperature,
211
+ max_iters=max_iter,
212
+ ctl_signal=style_type)
213
+
214
+ else: ##POS control
215
+ generate_texts, clip_scores = POS_sequential_generation(model, clip, tokenizer, image_instance, token_mask, prompt, logger,
216
+ batch_size=batch_size, max_len=max_len, top_k=top_k,
217
+ alpha=alpha,beta=beta,gamma=gamma,temperature=temperature, ctl_signal=pos_type,
218
+ max_iters=max_iter)
219
+
220
+ logger.info("Finished in %.3fs" % (time.time() - start_time))
221
+ logger.info(f"final caption: {generate_texts[-2]}")
222
+ logger.info(f"best caption: {generate_texts[-1]}")
223
+ return generate_texts, clip_scores
examples/Gosh.jpeg ADDED
examples/cat.png ADDED
examples/girl.jpg ADDED
examples/horse.png ADDED
gen_utils.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn.functional as F
4
+ import random
5
+ from utils import get_init_text, update_token_mask
6
+ import time
7
+
8
+
9
+
10
+ def generate_step(out, gen_idx, temperature=None, top_k=0, sample=False, return_list=True):
11
+ """ Generate a word from out[gen_idx]
12
+
13
+ args:
14
+ - out (torch.Tensor): tensor of logits of size batch_size x seq_len x vocab_size
15
+ - gen_idx (int): location for which to generate for
16
+ - top_k (int): if >0, only sample from the top k most probable words
17
+ - sample (Bool): if True, sample from full distribution. Overridden by top_k
18
+ """
19
+ logits = out[:, gen_idx]
20
+ if temperature is not None:
21
+ logits = logits / temperature
22
+ if top_k > 0:
23
+ kth_vals, kth_idx = logits.topk(top_k, dim=-1)
24
+ dist = torch.distributions.categorical.Categorical(logits=kth_vals)
25
+ idx = kth_idx.gather(dim=1, index=dist.sample().unsqueeze(-1)).squeeze(-1)
26
+ elif sample:
27
+ dist = torch.distributions.categorical.Categorical(logits=logits)
28
+ idx = dist.sample().squeeze(-1)
29
+ else:
30
+ idx = torch.argmax(logits, dim=-1)
31
+ return idx.tolist() if return_list else idx
32
+
33
+ def generate_caption_step(out, gen_idx, mask, temperature=None, top_k=100):
34
+ """ Generate a word from out[gen_idx]
35
+ args:
36
+ - out (torch.Tensor): tensor of logits of size (batch_size, seq_len, vocab_size)
37
+ - gen_idx (int): location for which to generate for
38
+ - mask (torch.Tensor): (1, vocab_size)
39
+ - top_k (int): candidate k
40
+ """
41
+ logits = out[:, gen_idx]
42
+ if temperature is not None:
43
+ logits = logits / temperature
44
+
45
+ probs = F.softmax(logits, dim=-1)
46
+ probs *= (mask)
47
+ top_k_probs, top_k_ids = probs.topk(top_k, dim=-1)
48
+
49
+ return top_k_probs, top_k_ids
50
+
51
+ def sequential_generation(model, clip, tokenizer, image_instance,token_mask, prompt, logger,
52
+ max_len=15, top_k=100,temperature=None, alpha=0.7,beta=1,
53
+ max_iters=20,batch_size=1, verbose=True):
54
+ """ Generate one word at a time, in L->R order """
55
+
56
+ seed_len = len(prompt.split())+1
57
+ batch = get_init_text(tokenizer, prompt, max_len, batch_size)
58
+ image_embeds = clip.compute_image_representation_from_image_instance(image_instance)
59
+ clip_score_sequence = []
60
+ best_clip_score = 0
61
+ inp = torch.tensor(batch).to(image_embeds.device)
62
+ gen_texts = []
63
+ for iter_num in range(max_iters):
64
+ for ii in range(max_len):
65
+ token_mask = update_token_mask(tokenizer, token_mask, max_len, ii)
66
+ for jj in range(batch_size):
67
+ inp[jj][seed_len + ii] = tokenizer.mask_token_id
68
+ inp_ = inp.clone().detach()
69
+ out = model(inp).logits
70
+ probs, idxs = generate_caption_step(out, gen_idx=seed_len + ii,mask=token_mask, top_k=top_k, temperature=temperature)
71
+ for jj in range(batch_size):
72
+ topk_inp = inp_.repeat(top_k, 1)
73
+ idxs_ = (idxs[jj] * token_mask[0][idxs[jj]]).long()
74
+ topk_inp[:, ii + seed_len] = idxs_
75
+ batch_text_list = tokenizer.batch_decode(topk_inp, skip_special_tokens=True)
76
+
77
+ clip_score, clip_ref = clip.compute_image_text_similarity_via_raw_text(image_embeds, batch_text_list)
78
+ final_score = alpha * probs + beta * clip_score
79
+ best_clip_id = final_score.argmax()
80
+
81
+ inp[jj][seed_len + ii] = idxs_[best_clip_id]
82
+ current_clip_score = clip_ref[jj][best_clip_id]
83
+ clip_score_sequence.append(current_clip_score.cpu().item())
84
+
85
+ if verbose and np.mod(iter_num + 1, 1) == 0:
86
+ for_print = tokenizer.decode(inp[0])
87
+ cur_text = tokenizer.decode(inp[0],skip_special_tokens=True)
88
+ if best_clip_score < current_clip_score.cpu().item():
89
+ best_clip_score = current_clip_score.cpu().item()
90
+ best_caption = cur_text
91
+ gen_texts.append(cur_text)
92
+ logger.info(f"iter {iter_num + 1}, clip score {current_clip_score:.3f}: "+ for_print)
93
+
94
+ gen_texts.append(best_caption)
95
+ clip_score_sequence.append(best_clip_score)
96
+
97
+ return gen_texts, clip_score_sequence
98
+
99
+ def shuffle_generation(model, clip, tokenizer,image_instance,token_mask, prompt, logger,
100
+ max_len=15, top_k=0,temperature=None, alpha=0.7,beta=1,
101
+ max_iters=20,batch_size=1,
102
+ verbose=True):
103
+ """ Generate one word at a time, in random generation order """
104
+ seed_len = len(prompt.split())+1
105
+ batch = get_init_text(tokenizer,prompt, max_len, batch_size)
106
+ image_embeds = clip.compute_image_representation_from_image_instance(image_instance)
107
+ inp = torch.tensor(batch).to(image_embeds.device)
108
+ clip_score_sequence = []
109
+ best_clip_score = 0
110
+ random_lst = list(range(max_len))
111
+ random.shuffle(random_lst)
112
+ logger.info(f"Order_list:{random_lst}")
113
+ gen_texts = []
114
+ for iter_num in range(max_iters):
115
+ for ii in random_lst:
116
+ token_mask = update_token_mask(tokenizer, token_mask, max_len, ii)
117
+ for jj in range(batch_size):
118
+ inp[jj][seed_len + ii] = tokenizer.mask_token_id
119
+ inp_ = inp.clone().detach()
120
+ out = model(inp).logits
121
+ probs, idxs = generate_caption_step(out, gen_idx=seed_len + ii,mask=token_mask, top_k=top_k, temperature=temperature)
122
+ for jj in range(batch_size):
123
+ topk_inp = inp_.repeat(top_k, 1)
124
+ topk_inp[:, ii + seed_len] = (idxs[jj] * token_mask[0][idxs[jj]]).long()
125
+ batch_text_list = tokenizer.batch_decode(topk_inp, skip_special_tokens=True)
126
+ clip_score,clip_ref = clip.compute_image_text_similarity_via_raw_text(image_embeds, batch_text_list)
127
+ final_score = alpha * probs + beta * clip_score
128
+ best_clip_id = final_score.argmax()
129
+ inp[jj][seed_len + ii] = idxs[jj][best_clip_id]
130
+ current_clip_score = clip_ref[jj][best_clip_id]
131
+ clip_score_sequence.append(current_clip_score.cpu().item())
132
+ if verbose and np.mod(iter_num + 1, 1) == 0:
133
+ for_print = tokenizer.decode(inp[0])
134
+ cur_text = tokenizer.decode(inp[0],skip_special_tokens=True)
135
+ gen_texts.append(cur_text)
136
+ if best_clip_score < current_clip_score.cpu().item():
137
+ best_clip_score = current_clip_score.cpu().item()
138
+ best_caption = cur_text
139
+ logger.info(f"iter {iter_num + 1}, clip score {current_clip_score:.3f}: "+for_print)
140
+ gen_texts.append(best_caption)
141
+ clip_score_sequence.append(best_clip_score)
142
+
143
+ return gen_texts, clip_score_sequence
144
+
145
+ def span_generation(model, clip, tokenizer,image_instance,token_mask, prompt, logger,
146
+ max_len=15, top_k=0,temperature=None, alpha=0.7,beta=1,
147
+ max_iters=20,batch_size=1,verbose=True):
148
+ """ Generate multiple words at a time (span generation), in L->R order """
149
+ seed_len = len(prompt.split())+1
150
+ span_len = 2
151
+ batch = get_init_text(tokenizer,prompt, max_len, batch_size)
152
+ image_embeds = clip.compute_image_representation_from_image_instance(image_instance)
153
+ clip_score_sequence = []
154
+ best_clip_score = 0
155
+ inp = torch.tensor(batch).to(image_embeds.device)
156
+ gen_texts = []
157
+ for iter_num in range(max_iters):
158
+ for span_start in range(0,max_len,span_len):
159
+ span_end = min(span_start+span_len,max_len)
160
+ for jj in range(batch_size):
161
+ inp[jj][seed_len + span_start: seed_len + span_end] = tokenizer.mask_token_id
162
+ out = model(inp).logits
163
+
164
+ for ii in range(span_start,span_end):
165
+ token_mask = update_token_mask(tokenizer, token_mask, max_len, ii)
166
+ inp_ = inp.clone().detach()
167
+ probs, idxs = generate_caption_step(out, gen_idx=seed_len + ii, mask=token_mask, top_k=top_k,
168
+ temperature=temperature)
169
+ for jj in range(batch_size):
170
+ topk_inp = inp_.repeat(top_k, 1)
171
+ idxs_ = (idxs[jj] * token_mask[0][idxs[jj]]).long()
172
+ topk_inp[:, ii + seed_len] = idxs_
173
+ batch_text_list = tokenizer.batch_decode(topk_inp, skip_special_tokens=True)
174
+
175
+ clip_score, clip_ref = clip.compute_image_text_similarity_via_raw_text(image_embeds, batch_text_list)
176
+ final_score = alpha * probs + beta * clip_score
177
+ best_clip_id = final_score.argmax()
178
+
179
+ inp[jj][seed_len + ii] = idxs_[best_clip_id]
180
+ current_clip_score = clip_ref[jj][best_clip_id]
181
+ clip_score_sequence.append(current_clip_score.cpu().item())
182
+
183
+ if verbose and np.mod(iter_num + 1, 1) == 0:
184
+ for_print = tokenizer.decode(inp[0])
185
+ cur_text = tokenizer.decode(inp[0],skip_special_tokens=True)
186
+ if best_clip_score < current_clip_score.cpu().item():
187
+ best_clip_score = current_clip_score.cpu().item()
188
+ best_caption = cur_text
189
+ gen_texts.append(cur_text)
190
+ logger.info(f"iter {iter_num + 1}, clip score {current_clip_score:.3f}: "+ for_print)
191
+ gen_texts.append(best_caption)
192
+ clip_score_sequence.append(best_clip_score)
193
+
194
+ return gen_texts, clip_score_sequence
195
+
196
+ def random_generation(model, clip, tokenizer,image_instance,token_mask, prompt, logger,
197
+ max_len=15, top_k=0, temperature=None,alpha=0.7,beta=2,
198
+ max_iters=300,print_every=10,batch_size=1,
199
+ verbose=True):
200
+ """ Generate for one random position at a timestep"""
201
+
202
+ seed_len = len(prompt.split())+1
203
+ batch = get_init_text(tokenizer, prompt, max_len, batch_size)
204
+ image_embeds = clip.compute_image_representation_from_image_instance(image_instance)
205
+ clip_score_sequence = []
206
+ best_clip_score = 0
207
+ inp = torch.tensor(batch).to(image_embeds.device)
208
+ gen_texts = []
209
+ for ii in range(max_iters):
210
+ kk = np.random.randint(0, max_len)
211
+ token_mask = update_token_mask(tokenizer, token_mask, max_len, kk)
212
+ for jj in range(batch_size):
213
+ inp[jj][seed_len + kk] = tokenizer.mask_token_id
214
+ inp_ = inp.clone().detach()
215
+ out = model(inp).logits
216
+ probs, idxs = generate_caption_step(out,gen_idx=seed_len + kk,mask=token_mask, top_k=top_k, temperature=temperature)
217
+ for jj in range(batch_size):
218
+ topk_inp = inp_.repeat(top_k, 1)
219
+ topk_inp[:, kk + seed_len] = (idxs[jj] * token_mask[0][idxs[jj]]).long()
220
+ batch_text_list = tokenizer.batch_decode(topk_inp, skip_special_tokens=True)
221
+
222
+ clip_score, clip_ref = clip.compute_image_text_similarity_via_raw_text(image_embeds, batch_text_list)
223
+ final_score = alpha * probs + beta * clip_score
224
+ best_clip_id = final_score.argmax()
225
+
226
+ inp[jj][seed_len + kk] = idxs[jj][best_clip_id]
227
+ current_clip_score = clip_ref[jj][best_clip_id]
228
+ clip_score_sequence.append(current_clip_score.cpu().item())
229
+ if best_clip_score < current_clip_score.cpu().item():
230
+ best_clip_score = current_clip_score.cpu().item()
231
+ best_caption = tokenizer.decode(inp[0], skip_special_tokens=True)
232
+
233
+ if verbose and np.mod(ii + 1, print_every) == 0:
234
+ for_print = tokenizer.decode(inp[0])
235
+ logger.info(f"iter {ii + 1}, clip score {current_clip_score:.3f}: "+for_print)
236
+ cur_text = tokenizer.decode(inp[0], skip_special_tokens=True)
237
+ gen_texts.append(cur_text)
238
+ gen_texts.append(best_caption)
239
+ clip_score_sequence.append(best_clip_score)
240
+
241
+ return gen_texts, clip_score_sequence
242
+
243
+ def parallel_generation(model, clip, tokenizer,image_instance,token_mask, prompt, logger,
244
+ max_len=15, top_k=0, temperature=None, alpha=0.1, beta=1,
245
+ max_iters=300,batch_size=1,print_every=1, verbose=True):
246
+ """ Generate for all positions at a time step """
247
+ seed_len = len(prompt.split())+1
248
+ batch = get_init_text(tokenizer,prompt, max_len, batch_size)
249
+ image_embeds = clip.compute_image_representation_from_image_instance(image_instance)
250
+ clip_score_sequence = []
251
+ inp = torch.tensor(batch).to(image_embeds.device)
252
+ gen_texts = []
253
+ best_clip_score = 0
254
+
255
+ for ii in range(max_iters):
256
+ inp_ = inp.clone().detach()
257
+ out = model(inp).logits
258
+ for kk in range(max_len):
259
+ probs, idxs = generate_caption_step(out, gen_idx=seed_len + kk,mask=token_mask, top_k=top_k, temperature=temperature)
260
+ for jj in range(batch_size):
261
+ topk_inp = inp_.repeat(top_k, 1)
262
+ topk_inp[:, ii + seed_len] = (idxs[jj] * token_mask[0][idxs[jj]]).long()
263
+ batch_text_list = tokenizer.batch_decode(topk_inp, skip_special_tokens=True)
264
+ clip_score,clip_ref = clip.compute_image_text_similarity_via_raw_text(image_embeds, batch_text_list)
265
+ final_score = alpha * probs + beta * clip_score
266
+ best_clip_id = final_score.argmax()
267
+
268
+ inp[jj][seed_len + kk] = idxs[jj][best_clip_id]
269
+ current_clip_score = clip_ref[jj][best_clip_id]
270
+ clip_score_sequence.append(current_clip_score.cpu().item())
271
+
272
+ if verbose and np.mod(ii, 1) == 0:
273
+ logger.info(f"iter{ii + 1}, clip score {current_clip_score:.3f}: " + tokenizer.decode(inp[0]))
274
+ cur_text = tokenizer.decode(inp[0], skip_special_tokens=True)
275
+ if best_clip_score < current_clip_score.cpu().item():
276
+ best_clip_score = current_clip_score.cpu().item()
277
+ best_caption = cur_text
278
+ gen_texts.append(cur_text)
279
+ gen_texts.append(best_caption)
280
+ clip_score_sequence.append(best_clip_score)
281
+
282
+ return gen_texts, clip_score_sequence
283
+
284
+ def generate_caption(model, clip, tokenizer,image_instance,token_mask,logger,
285
+ prompt="", batch_size=1, max_len=15,
286
+ top_k=100, temperature=1.0, max_iter=500,alpha=0.7,beta=1,
287
+ generate_order="sequential"):
288
+ # main generation functions to call
289
+ start_time = time.time()
290
+
291
+ if generate_order=="sequential":
292
+ generate_texts, clip_scores = sequential_generation(model, clip, tokenizer, image_instance, token_mask, prompt, logger,
293
+ batch_size=batch_size, max_len=max_len, top_k=top_k,
294
+ alpha=alpha,beta=beta,temperature=temperature,
295
+ max_iters=max_iter)
296
+
297
+ elif generate_order=="shuffle":
298
+ # max_iter = 15
299
+ generate_texts, clip_scores = shuffle_generation(model, clip, tokenizer,image_instance,token_mask,prompt, logger,
300
+ batch_size=batch_size, max_len=max_len, top_k=top_k,
301
+ alpha=alpha,beta=beta,temperature=temperature,max_iters=max_iter)
302
+
303
+ elif generate_order=="random":
304
+ max_iter *= max_len
305
+ print_every = max_len
306
+ generate_texts, clip_scores = random_generation(model, clip, tokenizer,image_instance,token_mask,prompt,logger,
307
+ max_len=max_len, top_k=top_k,alpha=alpha,beta=beta,print_every=print_every,
308
+ temperature=temperature, max_iters=max_iter,verbose=True)
309
+
310
+ elif generate_order=="span":
311
+ max_iter = max_iter
312
+ generate_texts, clip_scores = span_generation(model, clip, tokenizer, image_instance, token_mask, prompt, logger,
313
+ batch_size=batch_size, max_len=max_len, top_k=top_k,
314
+ alpha=alpha,beta=beta,temperature=temperature, max_iters=max_iter)
315
+
316
+ elif generate_order=="parallel":
317
+ generate_texts, clip_scores = parallel_generation(model, clip, tokenizer,image_instance,token_mask,prompt, logger,
318
+ max_len=max_len, temperature=temperature,top_k=top_k,alpha=alpha,beta=beta,
319
+ max_iters=max_iter,verbose=True)
320
+
321
+ logger.info("Finished in %.3fs" % (time.time() - start_time))
322
+ logger.info(f"final caption: {generate_texts[-2]}")
323
+ logger.info(f"best caption: {generate_texts[-1]}")
324
+ return generate_texts, clip_scores
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ colorlog
2
+ nltk
3
+ transformers
sentiments_classifer.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from nltk.tokenize import word_tokenize
2
+ from nltk import pos_tag
3
+ from nltk.corpus import sentiwordnet
4
+ import torch
5
+ import torch.nn.functional as F
6
+
7
+
8
+
9
+ def text_POS_Sentiments_analysis(text,sentiment_ctl=None):
10
+ """
11
+ id: 0,1,2,3,4
12
+ pos:none,n,v,a,r
13
+ """
14
+ words = word_tokenize(text)
15
+
16
+ word_tag = pos_tag(words)
17
+ res_tag = [tag[1] for tag in word_tag]
18
+ tag_map = {'NN': 'n', 'NNP': 'n', 'NNPS': 'n', 'NNS': 'n', 'UH': 'n', \
19
+ 'VB': 'v', 'VBD': 'v', 'VBG': 'v', 'VBN': 'v', 'VBP': 'v', 'VBZ': 'v', \
20
+ 'JJ': 'a', 'JJR': 'a', 'JJS': 'a', \
21
+ 'RB': 'r', 'RBR': 'r', 'RBS': 'r', 'RP': 'r', 'WRB': 'r'}
22
+
23
+ word_tag = [(t[0], tag_map[t[1]]) if t[1] in tag_map else (t[0], '') for t in word_tag]
24
+
25
+ wordnet_tag = [tag[1] for tag in word_tag]
26
+ sentiment_synsets = [list(sentiwordnet.senti_synsets(t[0], t[1])) for t in word_tag]
27
+
28
+ if sentiment_ctl is None:
29
+ return 0, res_tag, wordnet_tag
30
+ score = sum(sum([x.pos_score() - x.neg_score() for x in s]) / len(s) for s in sentiment_synsets if len(s) != 0)
31
+ if sentiment_ctl=="negative":
32
+ score = -score
33
+ return score, res_tag, wordnet_tag
34
+
35
+ def batch_texts_POS_Sentiments_analysis(batch_texts, temperature,device,sentiment_ctl=None):
36
+ batch_size = len(batch_texts)
37
+ senti_scores = torch.zeros(batch_size)
38
+ pos_tags = []
39
+ wordnet_pos_tags = []
40
+ for b_id in range(batch_size):
41
+ text = batch_texts[b_id]
42
+ score, cur_tag, cur_word_tag = text_POS_Sentiments_analysis(text,sentiment_ctl=sentiment_ctl)
43
+ senti_scores[b_id] = score
44
+ pos_tags.append(cur_tag)
45
+ wordnet_pos_tags.append(cur_word_tag)
46
+ final_prob_score = F.softmax(senti_scores / temperature,dim=0).to(device)
47
+
48
+ return final_prob_score, senti_scores, pos_tags, wordnet_pos_tags
49
+
50
+
51
+
stop_words.txt ADDED
@@ -0,0 +1,2835 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ...
2
+ [unused0]
3
+ [unused1]
4
+ [unused2]
5
+ [unused3]
6
+ [unused4]
7
+ [unused5]
8
+ [unused6]
9
+ [unused7]
10
+ [unused8]
11
+ [unused9]
12
+ [unused10]
13
+ [unused11]
14
+ [unused12]
15
+ [unused13]
16
+ [unused14]
17
+ [unused15]
18
+ [unused16]
19
+ [unused17]
20
+ [unused18]
21
+ [unused19]
22
+ [unused20]
23
+ [unused21]
24
+ [unused22]
25
+ [unused23]
26
+ [unused24]
27
+ [unused25]
28
+ [unused26]
29
+ [unused27]
30
+ [unused28]
31
+ [unused29]
32
+ [unused30]
33
+ [unused31]
34
+ [unused32]
35
+ [unused33]
36
+ [unused34]
37
+ [unused35]
38
+ [unused36]
39
+ [unused37]
40
+ [unused38]
41
+ [unused39]
42
+ [unused40]
43
+ [unused41]
44
+ [unused42]
45
+ [unused43]
46
+ [unused44]
47
+ [unused45]
48
+ [unused46]
49
+ [unused47]
50
+ [unused48]
51
+ [unused49]
52
+ [unused50]
53
+ [unused51]
54
+ [unused52]
55
+ [unused53]
56
+ [unused54]
57
+ [unused55]
58
+ [unused56]
59
+ [unused57]
60
+ [unused58]
61
+ [unused59]
62
+ [unused60]
63
+ [unused61]
64
+ [unused62]
65
+ [unused63]
66
+ [unused64]
67
+ [unused65]
68
+ [unused66]
69
+ [unused67]
70
+ [unused68]
71
+ [unused69]
72
+ [unused70]
73
+ [unused71]
74
+ [unused72]
75
+ [unused73]
76
+ [unused74]
77
+ [unused75]
78
+ [unused76]
79
+ [unused77]
80
+ [unused78]
81
+ [unused79]
82
+ [unused80]
83
+ [unused81]
84
+ [unused82]
85
+ [unused83]
86
+ [unused84]
87
+ [unused85]
88
+ [unused86]
89
+ [unused87]
90
+ [unused88]
91
+ [unused89]
92
+ [unused90]
93
+ [unused91]
94
+ [unused92]
95
+ [unused93]
96
+ [unused94]
97
+ [unused95]
98
+ [unused96]
99
+ [unused97]
100
+ [unused98]
101
+ [unused99]
102
+ [unused100]
103
+ [unused101]
104
+ [unused102]
105
+ [unused103]
106
+ [unused104]
107
+ [unused105]
108
+ [unused106]
109
+ [unused107]
110
+ [unused108]
111
+ [unused109]
112
+ [unused110]
113
+ [unused111]
114
+ [unused112]
115
+ [unused113]
116
+ [unused114]
117
+ [unused115]
118
+ [unused116]
119
+ [unused117]
120
+ [unused118]
121
+ [unused119]
122
+ [unused120]
123
+ [unused121]
124
+ [unused122]
125
+ [unused123]
126
+ [unused124]
127
+ [unused125]
128
+ [unused126]
129
+ [unused127]
130
+ [unused128]
131
+ [unused129]
132
+ [unused130]
133
+ [unused131]
134
+ [unused132]
135
+ [unused133]
136
+ [unused134]
137
+ [unused135]
138
+ [unused136]
139
+ [unused137]
140
+ [unused138]
141
+ [unused139]
142
+ [unused140]
143
+ [unused141]
144
+ [unused142]
145
+ [unused143]
146
+ [unused144]
147
+ [unused145]
148
+ [unused146]
149
+ [unused147]
150
+ [unused148]
151
+ [unused149]
152
+ [unused150]
153
+ [unused151]
154
+ [unused152]
155
+ [unused153]
156
+ [unused154]
157
+ [unused155]
158
+ [unused156]
159
+ [unused157]
160
+ [unused158]
161
+ [unused159]
162
+ [unused160]
163
+ [unused161]
164
+ [unused162]
165
+ [unused163]
166
+ [unused164]
167
+ [unused165]
168
+ [unused166]
169
+ [unused167]
170
+ [unused168]
171
+ [unused169]
172
+ [unused170]
173
+ [unused171]
174
+ [unused172]
175
+ [unused173]
176
+ [unused174]
177
+ [unused175]
178
+ [unused176]
179
+ [unused177]
180
+ [unused178]
181
+ [unused179]
182
+ [unused180]
183
+ [unused181]
184
+ [unused182]
185
+ [unused183]
186
+ [unused184]
187
+ [unused185]
188
+ [unused186]
189
+ [unused187]
190
+ [unused188]
191
+ [unused189]
192
+ [unused190]
193
+ [unused191]
194
+ [unused192]
195
+ [unused193]
196
+ [unused194]
197
+ [unused195]
198
+ [unused196]
199
+ [unused197]
200
+ [unused198]
201
+ [unused199]
202
+ [unused200]
203
+ [unused201]
204
+ [unused202]
205
+ [unused203]
206
+ [unused204]
207
+ [unused205]
208
+ [unused206]
209
+ [unused207]
210
+ [unused208]
211
+ [unused209]
212
+ [unused210]
213
+ [unused211]
214
+ [unused212]
215
+ [unused213]
216
+ [unused214]
217
+ [unused215]
218
+ [unused216]
219
+ [unused217]
220
+ [unused218]
221
+ [unused219]
222
+ [unused220]
223
+ [unused221]
224
+ [unused222]
225
+ [unused223]
226
+ [unused224]
227
+ [unused225]
228
+ [unused226]
229
+ [unused227]
230
+ [unused228]
231
+ [unused229]
232
+ [unused230]
233
+ [unused231]
234
+ [unused232]
235
+ [unused233]
236
+ [unused234]
237
+ [unused235]
238
+ [unused236]
239
+ [unused237]
240
+ [unused238]
241
+ [unused239]
242
+ [unused240]
243
+ [unused241]
244
+ [unused242]
245
+ [unused243]
246
+ [unused244]
247
+ [unused245]
248
+ [unused246]
249
+ [unused247]
250
+ [unused248]
251
+ [unused249]
252
+ [unused250]
253
+ [unused251]
254
+ [unused252]
255
+ [unused253]
256
+ [unused254]
257
+ [unused255]
258
+ [unused256]
259
+ [unused257]
260
+ [unused258]
261
+ [unused259]
262
+ [unused260]
263
+ [unused261]
264
+ [unused262]
265
+ [unused263]
266
+ [unused264]
267
+ [unused265]
268
+ [unused266]
269
+ [unused267]
270
+ [unused268]
271
+ [unused269]
272
+ [unused270]
273
+ [unused271]
274
+ [unused272]
275
+ [unused273]
276
+ [unused274]
277
+ [unused275]
278
+ [unused276]
279
+ [unused277]
280
+ [unused278]
281
+ [unused279]
282
+ [unused280]
283
+ [unused281]
284
+ [unused282]
285
+ [unused283]
286
+ [unused284]
287
+ [unused285]
288
+ [unused286]
289
+ [unused287]
290
+ [unused288]
291
+ [unused289]
292
+ [unused290]
293
+ [unused291]
294
+ [unused292]
295
+ [unused293]
296
+ [unused294]
297
+ [unused295]
298
+ [unused296]
299
+ [unused297]
300
+ [unused298]
301
+ [unused299]
302
+ [unused300]
303
+ [unused301]
304
+ [unused302]
305
+ [unused303]
306
+ [unused304]
307
+ [unused305]
308
+ [unused306]
309
+ [unused307]
310
+ [unused308]
311
+ [unused309]
312
+ [unused310]
313
+ [unused311]
314
+ [unused312]
315
+ [unused313]
316
+ [unused314]
317
+ [unused315]
318
+ [unused316]
319
+ [unused317]
320
+ [unused318]
321
+ [unused319]
322
+ [unused320]
323
+ [unused321]
324
+ [unused322]
325
+ [unused323]
326
+ [unused324]
327
+ [unused325]
328
+ [unused326]
329
+ [unused327]
330
+ [unused328]
331
+ [unused329]
332
+ [unused330]
333
+ [unused331]
334
+ [unused332]
335
+ [unused333]
336
+ [unused334]
337
+ [unused335]
338
+ [unused336]
339
+ [unused337]
340
+ [unused338]
341
+ [unused339]
342
+ [unused340]
343
+ [unused341]
344
+ [unused342]
345
+ [unused343]
346
+ [unused344]
347
+ [unused345]
348
+ [unused346]
349
+ [unused347]
350
+ [unused348]
351
+ [unused349]
352
+ [unused350]
353
+ [unused351]
354
+ [unused352]
355
+ [unused353]
356
+ [unused354]
357
+ [unused355]
358
+ [unused356]
359
+ [unused357]
360
+ [unused358]
361
+ [unused359]
362
+ [unused360]
363
+ [unused361]
364
+ [unused362]
365
+ [unused363]
366
+ [unused364]
367
+ [unused365]
368
+ [unused366]
369
+ [unused367]
370
+ [unused368]
371
+ [unused369]
372
+ [unused370]
373
+ [unused371]
374
+ [unused372]
375
+ [unused373]
376
+ [unused374]
377
+ [unused375]
378
+ [unused376]
379
+ [unused377]
380
+ [unused378]
381
+ [unused379]
382
+ [unused380]
383
+ [unused381]
384
+ [unused382]
385
+ [unused383]
386
+ [unused384]
387
+ [unused385]
388
+ [unused386]
389
+ [unused387]
390
+ [unused388]
391
+ [unused389]
392
+ [unused390]
393
+ [unused391]
394
+ [unused392]
395
+ [unused393]
396
+ [unused394]
397
+ [unused395]
398
+ [unused396]
399
+ [unused397]
400
+ [unused398]
401
+ [unused399]
402
+ [unused400]
403
+ [unused401]
404
+ [unused402]
405
+ [unused403]
406
+ [unused404]
407
+ [unused405]
408
+ [unused406]
409
+ [unused407]
410
+ [unused408]
411
+ [unused409]
412
+ [unused410]
413
+ [unused411]
414
+ [unused412]
415
+ [unused413]
416
+ [unused414]
417
+ [unused415]
418
+ [unused416]
419
+ [unused417]
420
+ [unused418]
421
+ [unused419]
422
+ [unused420]
423
+ [unused421]
424
+ [unused422]
425
+ [unused423]
426
+ [unused424]
427
+ [unused425]
428
+ [unused426]
429
+ [unused427]
430
+ [unused428]
431
+ [unused429]
432
+ [unused430]
433
+ [unused431]
434
+ [unused432]
435
+ [unused433]
436
+ [unused434]
437
+ [unused435]
438
+ [unused436]
439
+ [unused437]
440
+ [unused438]
441
+ [unused439]
442
+ [unused440]
443
+ [unused441]
444
+ [unused442]
445
+ [unused443]
446
+ [unused444]
447
+ [unused445]
448
+ [unused446]
449
+ [unused447]
450
+ [unused448]
451
+ [unused449]
452
+ [unused450]
453
+ [unused451]
454
+ [unused452]
455
+ [unused453]
456
+ [unused454]
457
+ [unused455]
458
+ [unused456]
459
+ [unused457]
460
+ [unused458]
461
+ [unused459]
462
+ [unused460]
463
+ [unused461]
464
+ [unused462]
465
+ [unused463]
466
+ [unused464]
467
+ [unused465]
468
+ [unused466]
469
+ [unused467]
470
+ [unused468]
471
+ [unused469]
472
+ [unused470]
473
+ [unused471]
474
+ [unused472]
475
+ [unused473]
476
+ [unused474]
477
+ [unused475]
478
+ [unused476]
479
+ [unused477]
480
+ [unused478]
481
+ [unused479]
482
+ [unused480]
483
+ [unused481]
484
+ [unused482]
485
+ [unused483]
486
+ [unused484]
487
+ [unused485]
488
+ [unused486]
489
+ [unused487]
490
+ [unused488]
491
+ [unused489]
492
+ [unused490]
493
+ [unused491]
494
+ [unused492]
495
+ [unused493]
496
+ [unused494]
497
+ [unused495]
498
+ [unused496]
499
+ [unused497]
500
+ [unused498]
501
+ [unused499]
502
+ [unused500]
503
+ [unused501]
504
+ [unused502]
505
+ [unused503]
506
+ [unused504]
507
+ [unused505]
508
+ [unused506]
509
+ [unused507]
510
+ [unused508]
511
+ [unused509]
512
+ [unused510]
513
+ [unused511]
514
+ [unused512]
515
+ [unused513]
516
+ [unused514]
517
+ [unused515]
518
+ [unused516]
519
+ [unused517]
520
+ [unused518]
521
+ [unused519]
522
+ [unused520]
523
+ [unused521]
524
+ [unused522]
525
+ [unused523]
526
+ [unused524]
527
+ [unused525]
528
+ [unused526]
529
+ [unused527]
530
+ [unused528]
531
+ [unused529]
532
+ [unused530]
533
+ [unused531]
534
+ [unused532]
535
+ [unused533]
536
+ [unused534]
537
+ [unused535]
538
+ [unused536]
539
+ [unused537]
540
+ [unused538]
541
+ [unused539]
542
+ [unused540]
543
+ [unused541]
544
+ [unused542]
545
+ [unused543]
546
+ [unused544]
547
+ [unused545]
548
+ [unused546]
549
+ [unused547]
550
+ [unused548]
551
+ [unused549]
552
+ [unused550]
553
+ [unused551]
554
+ [unused552]
555
+ [unused553]
556
+ [unused554]
557
+ [unused555]
558
+ [unused556]
559
+ [unused557]
560
+ [unused558]
561
+ [unused559]
562
+ [unused560]
563
+ [unused561]
564
+ [unused562]
565
+ [unused563]
566
+ [unused564]
567
+ [unused565]
568
+ [unused566]
569
+ [unused567]
570
+ [unused568]
571
+ [unused569]
572
+ [unused570]
573
+ [unused571]
574
+ [unused572]
575
+ [unused573]
576
+ [unused574]
577
+ [unused575]
578
+ [unused576]
579
+ [unused577]
580
+ [unused578]
581
+ [unused579]
582
+ [unused580]
583
+ [unused581]
584
+ [unused582]
585
+ [unused583]
586
+ [unused584]
587
+ [unused585]
588
+ [unused586]
589
+ [unused587]
590
+ [unused588]
591
+ [unused589]
592
+ [unused590]
593
+ [unused591]
594
+ [unused592]
595
+ [unused593]
596
+ [unused594]
597
+ [unused595]
598
+ [unused596]
599
+ [unused597]
600
+ [unused598]
601
+ [unused599]
602
+ [unused600]
603
+ [unused601]
604
+ [unused602]
605
+ [unused603]
606
+ [unused604]
607
+ [unused605]
608
+ [unused606]
609
+ [unused607]
610
+ [unused608]
611
+ [unused609]
612
+ [unused610]
613
+ [unused611]
614
+ [unused612]
615
+ [unused613]
616
+ [unused614]
617
+ [unused615]
618
+ [unused616]
619
+ [unused617]
620
+ [unused618]
621
+ [unused619]
622
+ [unused620]
623
+ [unused621]
624
+ [unused622]
625
+ [unused623]
626
+ [unused624]
627
+ [unused625]
628
+ [unused626]
629
+ [unused627]
630
+ [unused628]
631
+ [unused629]
632
+ [unused630]
633
+ [unused631]
634
+ [unused632]
635
+ [unused633]
636
+ [unused634]
637
+ [unused635]
638
+ [unused636]
639
+ [unused637]
640
+ [unused638]
641
+ [unused639]
642
+ [unused640]
643
+ [unused641]
644
+ [unused642]
645
+ [unused643]
646
+ [unused644]
647
+ [unused645]
648
+ [unused646]
649
+ [unused647]
650
+ [unused648]
651
+ [unused649]
652
+ [unused650]
653
+ [unused651]
654
+ [unused652]
655
+ [unused653]
656
+ [unused654]
657
+ [unused655]
658
+ [unused656]
659
+ [unused657]
660
+ [unused658]
661
+ [unused659]
662
+ [unused660]
663
+ [unused661]
664
+ [unused662]
665
+ [unused663]
666
+ [unused664]
667
+ [unused665]
668
+ [unused666]
669
+ [unused667]
670
+ [unused668]
671
+ [unused669]
672
+ [unused670]
673
+ [unused671]
674
+ [unused672]
675
+ [unused673]
676
+ [unused674]
677
+ [unused675]
678
+ [unused676]
679
+ [unused677]
680
+ [unused678]
681
+ [unused679]
682
+ [unused680]
683
+ [unused681]
684
+ [unused682]
685
+ [unused683]
686
+ [unused684]
687
+ [unused685]
688
+ [unused686]
689
+ [unused687]
690
+ [unused688]
691
+ [unused689]
692
+ [unused690]
693
+ [unused691]
694
+ [unused692]
695
+ [unused693]
696
+ [unused694]
697
+ [unused695]
698
+ [unused696]
699
+ [unused697]
700
+ [unused698]
701
+ [unused699]
702
+ [unused700]
703
+ [unused701]
704
+ [unused702]
705
+ [unused703]
706
+ [unused704]
707
+ [unused705]
708
+ [unused706]
709
+ [unused707]
710
+ [unused708]
711
+ [unused709]
712
+ [unused710]
713
+ [unused711]
714
+ [unused712]
715
+ [unused713]
716
+ [unused714]
717
+ [unused715]
718
+ [unused716]
719
+ [unused717]
720
+ [unused718]
721
+ [unused719]
722
+ [unused720]
723
+ [unused721]
724
+ [unused722]
725
+ [unused723]
726
+ [unused724]
727
+ [unused725]
728
+ [unused726]
729
+ [unused727]
730
+ [unused728]
731
+ [unused729]
732
+ [unused730]
733
+ [unused731]
734
+ [unused732]
735
+ [unused733]
736
+ [unused734]
737
+ [unused735]
738
+ [unused736]
739
+ [unused737]
740
+ [unused738]
741
+ [unused739]
742
+ [unused740]
743
+ [unused741]
744
+ [unused742]
745
+ [unused743]
746
+ [unused744]
747
+ [unused745]
748
+ [unused746]
749
+ [unused747]
750
+ [unused748]
751
+ [unused749]
752
+ [unused750]
753
+ [unused751]
754
+ [unused752]
755
+ [unused753]
756
+ [unused754]
757
+ [unused755]
758
+ [unused756]
759
+ [unused757]
760
+ [unused758]
761
+ [unused759]
762
+ [unused760]
763
+ [unused761]
764
+ [unused762]
765
+ [unused763]
766
+ [unused764]
767
+ [unused765]
768
+ [unused766]
769
+ [unused767]
770
+ [unused768]
771
+ [unused769]
772
+ [unused770]
773
+ [unused771]
774
+ [unused772]
775
+ [unused773]
776
+ [unused774]
777
+ [unused775]
778
+ [unused776]
779
+ [unused777]
780
+ [unused778]
781
+ [unused779]
782
+ [unused780]
783
+ [unused781]
784
+ [unused782]
785
+ [unused783]
786
+ [unused784]
787
+ [unused785]
788
+ [unused786]
789
+ [unused787]
790
+ [unused788]
791
+ [unused789]
792
+ [unused790]
793
+ [unused791]
794
+ [unused792]
795
+ [unused793]
796
+ [unused794]
797
+ [unused795]
798
+ [unused796]
799
+ [unused797]
800
+ [unused798]
801
+ [unused799]
802
+ [unused800]
803
+ [unused801]
804
+ [unused802]
805
+ [unused803]
806
+ [unused804]
807
+ [unused805]
808
+ [unused806]
809
+ [unused807]
810
+ [unused808]
811
+ [unused809]
812
+ [unused810]
813
+ [unused811]
814
+ [unused812]
815
+ [unused813]
816
+ [unused814]
817
+ [unused815]
818
+ [unused816]
819
+ [unused817]
820
+ [unused818]
821
+ [unused819]
822
+ [unused820]
823
+ [unused821]
824
+ [unused822]
825
+ [unused823]
826
+ [unused824]
827
+ [unused825]
828
+ [unused826]
829
+ [unused827]
830
+ [unused828]
831
+ [unused829]
832
+ [unused830]
833
+ [unused831]
834
+ [unused832]
835
+ [unused833]
836
+ [unused834]
837
+ [unused835]
838
+ [unused836]
839
+ [unused837]
840
+ [unused838]
841
+ [unused839]
842
+ [unused840]
843
+ [unused841]
844
+ [unused842]
845
+ [unused843]
846
+ [unused844]
847
+ [unused845]
848
+ [unused846]
849
+ [unused847]
850
+ [unused848]
851
+ [unused849]
852
+ [unused850]
853
+ [unused851]
854
+ [unused852]
855
+ [unused853]
856
+ [unused854]
857
+ [unused855]
858
+ [unused856]
859
+ [unused857]
860
+ [unused858]
861
+ [unused859]
862
+ [unused860]
863
+ [unused861]
864
+ [unused862]
865
+ [unused863]
866
+ [unused864]
867
+ [unused865]
868
+ [unused866]
869
+ [unused867]
870
+ [unused868]
871
+ [unused869]
872
+ [unused870]
873
+ [unused871]
874
+ [unused872]
875
+ [unused873]
876
+ [unused874]
877
+ [unused875]
878
+ [unused876]
879
+ [unused877]
880
+ [unused878]
881
+ [unused879]
882
+ [unused880]
883
+ [unused881]
884
+ [unused882]
885
+ [unused883]
886
+ [unused884]
887
+ [unused885]
888
+ [unused886]
889
+ [unused887]
890
+ [unused888]
891
+ [unused889]
892
+ [unused890]
893
+ [unused891]
894
+ [unused892]
895
+ [unused893]
896
+ [unused894]
897
+ [unused895]
898
+ [unused896]
899
+ [unused897]
900
+ [unused898]
901
+ [unused899]
902
+ [unused900]
903
+ [unused901]
904
+ [unused902]
905
+ [unused903]
906
+ [unused904]
907
+ [unused905]
908
+ [unused906]
909
+ [unused907]
910
+ [unused908]
911
+ [unused909]
912
+ [unused910]
913
+ [unused911]
914
+ [unused912]
915
+ [unused913]
916
+ [unused914]
917
+ [unused915]
918
+ [unused916]
919
+ [unused917]
920
+ [unused918]
921
+ [unused919]
922
+ [unused920]
923
+ [unused921]
924
+ [unused922]
925
+ [unused923]
926
+ [unused924]
927
+ [unused925]
928
+ [unused926]
929
+ [unused927]
930
+ [unused928]
931
+ [unused929]
932
+ [unused930]
933
+ [unused931]
934
+ [unused932]
935
+ [unused933]
936
+ [unused934]
937
+ [unused935]
938
+ [unused936]
939
+ [unused937]
940
+ [unused938]
941
+ [unused939]
942
+ [unused940]
943
+ [unused941]
944
+ [unused942]
945
+ [unused943]
946
+ [unused944]
947
+ [unused945]
948
+ [unused946]
949
+ [unused947]
950
+ [unused948]
951
+ [unused949]
952
+ [unused950]
953
+ [unused951]
954
+ [unused952]
955
+ [unused953]
956
+ [unused954]
957
+ [unused955]
958
+ [unused956]
959
+ [unused957]
960
+ [unused958]
961
+ [unused959]
962
+ [unused960]
963
+ [unused961]
964
+ [unused962]
965
+ [unused963]
966
+ [unused964]
967
+ [unused965]
968
+ [unused966]
969
+ [unused967]
970
+ [unused968]
971
+ [unused969]
972
+ [unused970]
973
+ [unused971]
974
+ [unused972]
975
+ [unused973]
976
+ [unused974]
977
+ [unused975]
978
+ [unused976]
979
+ [unused977]
980
+ [unused978]
981
+ [unused979]
982
+ [unused980]
983
+ [unused981]
984
+ [unused982]
985
+ [unused983]
986
+ [unused984]
987
+ [unused985]
988
+ [unused986]
989
+ [unused987]
990
+ [unused988]
991
+ [unused989]
992
+ [unused990]
993
+ [unused991]
994
+ [unused992]
995
+ [unused993]
996
+ !
997
+ "
998
+ #
999
+ $
1000
+ %
1001
+ &
1002
+ '
1003
+ (
1004
+ )
1005
+ *
1006
+ +
1007
+ ,
1008
+ -
1009
+ /
1010
+ :
1011
+ ;
1012
+ <
1013
+ =
1014
+ >
1015
+ ?
1016
+ @
1017
+ [
1018
+ \
1019
+ ]
1020
+ ^
1021
+ _
1022
+ `
1023
+ {
1024
+ |
1025
+ }
1026
+ ~
1027
+ ¡
1028
+ ¢
1029
+ £
1030
+ ¤
1031
+ ¥
1032
+ ¦
1033
+ §
1034
+ ¨
1035
+ ©
1036
+ ª
1037
+ «
1038
+ ¬
1039
+ ®
1040
+ °
1041
+ ±
1042
+ ²
1043
+ ³
1044
+ ´
1045
+ µ
1046
+
1047
+ ·
1048
+ ¹
1049
+ º
1050
+ »
1051
+ ¼
1052
+ ½
1053
+ ¾
1054
+ ¿
1055
+ ×
1056
+ ß
1057
+ æ
1058
+ ð
1059
+ ÷
1060
+ ø
1061
+ þ
1062
+ đ
1063
+ ħ
1064
+ ı
1065
+ ł
1066
+ ŋ
1067
+ œ
1068
+ ƒ
1069
+ ɐ
1070
+ ɑ
1071
+ ɒ
1072
+ ɔ
1073
+ ɕ
1074
+ ə
1075
+ ɛ
1076
+ ɡ
1077
+ ɣ
1078
+ ɨ
1079
+ ɪ
1080
+ ɫ
1081
+ ɬ
1082
+ ɯ
1083
+ ɲ
1084
+ ɴ
1085
+ ɹ
1086
+ ɾ
1087
+ ʀ
1088
+ ʁ
1089
+ ʂ
1090
+ ʃ
1091
+ ʉ
1092
+ ʊ
1093
+ ʋ
1094
+ ʌ
1095
+ ʎ
1096
+ ʐ
1097
+ ʑ
1098
+ ʒ
1099
+ ʔ
1100
+ ʰ
1101
+ ʲ
1102
+ ʳ
1103
+ ʷ
1104
+ ʸ
1105
+ ʻ
1106
+ ʼ
1107
+ ʾ
1108
+ ʿ
1109
+ ˈ
1110
+ ː
1111
+ ˡ
1112
+ ˢ
1113
+ ˣ
1114
+ ˤ
1115
+ α
1116
+ β
1117
+ γ
1118
+ δ
1119
+ ε
1120
+ ζ
1121
+ η
1122
+ θ
1123
+ ι
1124
+ κ
1125
+ λ
1126
+ μ
1127
+ ν
1128
+ ξ
1129
+ ο
1130
+ π
1131
+ ρ
1132
+ ς
1133
+ σ
1134
+ τ
1135
+ υ
1136
+ φ
1137
+ χ
1138
+ ψ
1139
+ ω
1140
+ а
1141
+ б
1142
+ в
1143
+ г
1144
+ д
1145
+ е
1146
+ ж
1147
+ з
1148
+ и
1149
+ к
1150
+ л
1151
+ м
1152
+ н
1153
+ о
1154
+ п
1155
+ р
1156
+ с
1157
+ т
1158
+ у
1159
+ ф
1160
+ х
1161
+ ц
1162
+ ч
1163
+ ш
1164
+ щ
1165
+ ъ
1166
+ ы
1167
+ ь
1168
+ э
1169
+ ю
1170
+ я
1171
+ ђ
1172
+ є
1173
+ і
1174
+ ј
1175
+ љ
1176
+ њ
1177
+ ћ
1178
+ ӏ
1179
+ ա
1180
+ բ
1181
+ գ
1182
+ դ
1183
+ ե
1184
+ թ
1185
+ ի
1186
+ լ
1187
+ կ
1188
+ հ
1189
+ մ
1190
+ յ
1191
+ ն
1192
+ ո
1193
+ պ
1194
+ ս
1195
+ վ
1196
+ տ
1197
+ ր
1198
+ ւ
1199
+ ք
1200
+ ־
1201
+ א
1202
+ ב
1203
+ ג
1204
+ ד
1205
+ ה
1206
+ ו
1207
+ ז
1208
+ ח
1209
+ ט
1210
+ י
1211
+ ך
1212
+ כ
1213
+ ל
1214
+ ם
1215
+ מ
1216
+ ן
1217
+ נ
1218
+ ס
1219
+ ע
1220
+ ף
1221
+ פ
1222
+ ץ
1223
+ צ
1224
+ ק
1225
+ ר
1226
+ ש
1227
+ ת
1228
+ ،
1229
+ ء
1230
+ ا
1231
+ ب
1232
+ ة
1233
+ ت
1234
+ ث
1235
+ ج
1236
+ ح
1237
+ خ
1238
+ د
1239
+ ذ
1240
+ ر
1241
+ ز
1242
+ س
1243
+ ش
1244
+ ص
1245
+ ض
1246
+ ط
1247
+ ظ
1248
+ ع
1249
+ غ
1250
+ ـ
1251
+ ف
1252
+ ق
1253
+ ك
1254
+ ل
1255
+ م
1256
+ ن
1257
+ ه
1258
+ و
1259
+ ى
1260
+ ي
1261
+ ٹ
1262
+ پ
1263
+ چ
1264
+ ک
1265
+ گ
1266
+ ں
1267
+ ھ
1268
+ ہ
1269
+ ی
1270
+ ے
1271
+
1272
+
1273
+
1274
+
1275
+
1276
+
1277
+
1278
+
1279
+
1280
+
1281
+
1282
+
1283
+
1284
+
1285
+
1286
+
1287
+
1288
+
1289
+
1290
+
1291
+
1292
+
1293
+
1294
+
1295
+
1296
+
1297
+
1298
+
1299
+
1300
+
1301
+ ि
1302
+
1303
+
1304
+
1305
+
1306
+
1307
+
1308
+
1309
+
1310
+
1311
+
1312
+
1313
+
1314
+
1315
+
1316
+
1317
+
1318
+
1319
+
1320
+
1321
+
1322
+
1323
+
1324
+
1325
+
1326
+
1327
+
1328
+
1329
+
1330
+
1331
+
1332
+
1333
+
1334
+
1335
+
1336
+
1337
+
1338
+
1339
+ ি
1340
+
1341
+
1342
+
1343
+
1344
+
1345
+
1346
+
1347
+
1348
+
1349
+
1350
+
1351
+
1352
+
1353
+
1354
+
1355
+
1356
+ ி
1357
+
1358
+
1359
+
1360
+
1361
+
1362
+
1363
+
1364
+
1365
+
1366
+
1367
+
1368
+
1369
+
1370
+
1371
+
1372
+
1373
+
1374
+
1375
+
1376
+
1377
+
1378
+
1379
+
1380
+
1381
+
1382
+
1383
+
1384
+
1385
+
1386
+
1387
+
1388
+
1389
+
1390
+
1391
+
1392
+
1393
+
1394
+
1395
+
1396
+
1397
+
1398
+
1399
+
1400
+
1401
+
1402
+
1403
+
1404
+
1405
+
1406
+
1407
+
1408
+
1409
+
1410
+
1411
+
1412
+
1413
+
1414
+
1415
+
1416
+
1417
+
1418
+
1419
+
1420
+
1421
+
1422
+
1423
+
1424
+
1425
+
1426
+
1427
+
1428
+
1429
+
1430
+
1431
+
1432
+
1433
+
1434
+
1435
+
1436
+
1437
+
1438
+
1439
+
1440
+
1441
+
1442
+
1443
+
1444
+
1445
+
1446
+
1447
+
1448
+
1449
+
1450
+
1451
+
1452
+
1453
+
1454
+
1455
+
1456
+
1457
+
1458
+
1459
+
1460
+
1461
+
1462
+
1463
+
1464
+
1465
+
1466
+
1467
+
1468
+
1469
+
1470
+
1471
+
1472
+
1473
+
1474
+
1475
+
1476
+
1477
+
1478
+
1479
+
1480
+
1481
+
1482
+
1483
+
1484
+
1485
+
1486
+
1487
+
1488
+
1489
+
1490
+
1491
+
1492
+
1493
+
1494
+
1495
+
1496
+
1497
+
1498
+
1499
+
1500
+
1501
+
1502
+
1503
+
1504
+
1505
+
1506
+
1507
+
1508
+
1509
+
1510
+
1511
+
1512
+
1513
+
1514
+
1515
+
1516
+
1517
+
1518
+
1519
+
1520
+
1521
+
1522
+
1523
+
1524
+
1525
+
1526
+
1527
+
1528
+
1529
+
1530
+
1531
+
1532
+
1533
+
1534
+
1535
+
1536
+
1537
+
1538
+
1539
+
1540
+
1541
+
1542
+
1543
+
1544
+
1545
+
1546
+
1547
+
1548
+
1549
+
1550
+
1551
+
1552
+
1553
+
1554
+
1555
+
1556
+
1557
+
1558
+
1559
+
1560
+
1561
+
1562
+
1563
+
1564
+
1565
+
1566
+
1567
+
1568
+
1569
+
1570
+
1571
+
1572
+
1573
+
1574
+
1575
+
1576
+
1577
+
1578
+
1579
+
1580
+
1581
+
1582
+
1583
+
1584
+
1585
+
1586
+
1587
+
1588
+
1589
+
1590
+
1591
+
1592
+
1593
+
1594
+
1595
+
1596
+
1597
+
1598
+
1599
+
1600
+
1601
+
1602
+
1603
+
1604
+
1605
+
1606
+
1607
+
1608
+
1609
+
1610
+
1611
+
1612
+
1613
+
1614
+
1615
+
1616
+
1617
+
1618
+
1619
+
1620
+
1621
+
1622
+
1623
+
1624
+
1625
+
1626
+
1627
+
1628
+
1629
+
1630
+
1631
+
1632
+
1633
+
1634
+
1635
+
1636
+
1637
+
1638
+
1639
+
1640
+
1641
+
1642
+
1643
+
1644
+
1645
+
1646
+
1647
+
1648
+
1649
+
1650
+
1651
+
1652
+
1653
+
1654
+
1655
+
1656
+
1657
+
1658
+
1659
+
1660
+
1661
+
1662
+
1663
+
1664
+
1665
+
1666
+
1667
+
1668
+
1669
+
1670
+
1671
+
1672
+
1673
+
1674
+
1675
+
1676
+
1677
+
1678
+
1679
+
1680
+
1681
+
1682
+
1683
+
1684
+
1685
+
1686
+
1687
+
1688
+
1689
+
1690
+
1691
+
1692
+
1693
+
1694
+
1695
+
1696
+
1697
+
1698
+
1699
+
1700
+
1701
+
1702
+
1703
+
1704
+
1705
+
1706
+
1707
+
1708
+
1709
+
1710
+
1711
+
1712
+
1713
+
1714
+
1715
+
1716
+
1717
+
1718
+
1719
+
1720
+
1721
+
1722
+
1723
+
1724
+
1725
+
1726
+
1727
+
1728
+
1729
+
1730
+
1731
+
1732
+
1733
+
1734
+
1735
+
1736
+
1737
+
1738
+
1739
+
1740
+
1741
+
1742
+
1743
+
1744
+
1745
+
1746
+
1747
+
1748
+
1749
+
1750
+
1751
+
1752
+
1753
+
1754
+
1755
+
1756
+
1757
+
1758
+
1759
+
1760
+
1761
+
1762
+
1763
+
1764
+
1765
+
1766
+
1767
+
1768
+
1769
+
1770
+
1771
+
1772
+
1773
+
1774
+
1775
+
1776
+
1777
+
1778
+
1779
+
1780
+
1781
+
1782
+
1783
+
1784
+
1785
+
1786
+ 宿
1787
+
1788
+
1789
+
1790
+
1791
+
1792
+
1793
+
1794
+
1795
+
1796
+
1797
+ 巿
1798
+
1799
+
1800
+
1801
+
1802
+ 广
1803
+
1804
+
1805
+
1806
+
1807
+
1808
+
1809
+
1810
+
1811
+
1812
+
1813
+
1814
+
1815
+
1816
+
1817
+
1818
+
1819
+
1820
+
1821
+
1822
+
1823
+
1824
+
1825
+
1826
+
1827
+
1828
+
1829
+
1830
+
1831
+
1832
+
1833
+
1834
+
1835
+
1836
+
1837
+
1838
+
1839
+
1840
+
1841
+
1842
+
1843
+
1844
+
1845
+
1846
+
1847
+
1848
+
1849
+
1850
+
1851
+
1852
+
1853
+
1854
+
1855
+
1856
+
1857
+
1858
+
1859
+
1860
+
1861
+
1862
+
1863
+
1864
+
1865
+
1866
+
1867
+
1868
+
1869
+
1870
+
1871
+
1872
+
1873
+
1874
+
1875
+
1876
+
1877
+
1878
+
1879
+
1880
+
1881
+
1882
+
1883
+
1884
+
1885
+
1886
+
1887
+
1888
+
1889
+
1890
+
1891
+
1892
+
1893
+
1894
+
1895
+
1896
+
1897
+
1898
+
1899
+
1900
+
1901
+
1902
+
1903
+
1904
+
1905
+
1906
+
1907
+ 西
1908
+
1909
+
1910
+
1911
+
1912
+
1913
+
1914
+
1915
+
1916
+
1917
+
1918
+
1919
+
1920
+
1921
+
1922
+
1923
+
1924
+
1925
+
1926
+
1927
+
1928
+
1929
+
1930
+
1931
+
1932
+
1933
+
1934
+
1935
+
1936
+
1937
+
1938
+
1939
+
1940
+
1941
+
1942
+
1943
+
1944
+
1945
+
1946
+
1947
+
1948
+
1949
+
1950
+
1951
+
1952
+
1953
+
1954
+
1955
+ 0
1956
+ 1
1957
+ 2
1958
+ 3
1959
+ 4
1960
+ 5
1961
+ 6
1962
+ 7
1963
+ 8
1964
+ 9
1965
+ ²
1966
+ ³
1967
+ ¹
1968
+
1969
+
1970
+
1971
+
1972
+
1973
+
1974
+
1975
+
1976
+
1977
+
1978
+
1979
+
1980
+
1981
+
1982
+
1983
+
1984
+
1985
+ 10
1986
+ 000
1987
+ 2010
1988
+ 2011
1989
+ 12
1990
+ 2012
1991
+ 2008
1992
+ 2009
1993
+ 2013
1994
+ 2007
1995
+ 2006
1996
+ 2014
1997
+ 15
1998
+ 20
1999
+ 18
2000
+ 2015
2001
+ 11
2002
+ 2016
2003
+ 30
2004
+ 2005
2005
+ 16
2006
+ 14
2007
+ 13
2008
+ 2017
2009
+ 25
2010
+ 2004
2011
+ 2000
2012
+ 17
2013
+ 24
2014
+ 2003
2015
+ 2002
2016
+ 100
2017
+ 21
2018
+ 19
2019
+ 2001
2020
+ 22
2021
+ 23
2022
+ 1999
2023
+ 28
2024
+ 26
2025
+ 27
2026
+ 1998
2027
+ 1997
2028
+ 1996
2029
+ 50
2030
+ 29
2031
+ 2018
2032
+ 1995
2033
+ 1994
2034
+ 1992
2035
+ 1993
2036
+ 31
2037
+ 40
2038
+ 1991
2039
+ 1990
2040
+ 1989
2041
+ 1988
2042
+ 1987
2043
+ 1986
2044
+ 1985
2045
+ 1984
2046
+ 1980
2047
+ 500
2048
+ 1983
2049
+ 1982
2050
+ 1979
2051
+ 1981
2052
+ 200
2053
+ 1972
2054
+ 1976
2055
+ 1978
2056
+ 1974
2057
+ 1975
2058
+ 1977
2059
+ 1970
2060
+ 1968
2061
+ 1973
2062
+ 1945
2063
+ 1971
2064
+ 45
2065
+ 60
2066
+ 1969
2067
+ 1967
2068
+ 35
2069
+ 65
2070
+ 1964
2071
+ 1966
2072
+ 1965
2073
+ 32
2074
+ 1960
2075
+ 1944
2076
+ 1963
2077
+ 1962
2078
+ 1942
2079
+ 80
2080
+ 1961
2081
+ 1943
2082
+ 1956
2083
+ 1958
2084
+ 1959
2085
+ 1941
2086
+ 1940
2087
+ 1948
2088
+ 1957
2089
+ 1939
2090
+ 1946
2091
+ 1950
2092
+ 90
2093
+ 33
2094
+ 70
2095
+ 1955
2096
+ 300
2097
+ 1952
2098
+ 00
2099
+ 1947
2100
+ 44
2101
+ 36
2102
+ 1954
2103
+ 1953
2104
+ 1949
2105
+ 34
2106
+ 1951
2107
+ 64
2108
+ 38
2109
+ 1938
2110
+ 37
2111
+ 1936
2112
+ 1918
2113
+ 400
2114
+ 75
2115
+ 1937
2116
+ 42
2117
+ 1935
2118
+ 1920
2119
+ 39
2120
+ 48
2121
+ 1930
2122
+ 1919
2123
+ 1933
2124
+ 1914
2125
+ 1934
2126
+ 55
2127
+ 1917
2128
+ 41
2129
+ 1929
2130
+ 1928
2131
+ 1932
2132
+ 47
2133
+ 52
2134
+ 43
2135
+ 1931
2136
+ 49
2137
+ 1927
2138
+ 1922
2139
+ 46
2140
+ 1924
2141
+ 1925
2142
+ 51
2143
+ 1912
2144
+ 1926
2145
+ 1921
2146
+ 978
2147
+ 1923
2148
+ 1915
2149
+ 1916
2150
+ 1910
2151
+ 150
2152
+ 1913
2153
+ 54
2154
+ 1900
2155
+ 600
2156
+ 56
2157
+ 1911
2158
+ 53
2159
+ 1908
2160
+ 95
2161
+ 59
2162
+ 800
2163
+ 58
2164
+ 57
2165
+ 1905
2166
+ 08
2167
+ 1906
2168
+ 1907
2169
+ 250
2170
+ 1909
2171
+ 99
2172
+ 85
2173
+ 09
2174
+ 1904
2175
+ 05
2176
+ 07
2177
+ 06
2178
+ 66
2179
+ 1902
2180
+ 1901
2181
+ 1903
2182
+ 62
2183
+ 98
2184
+ 72
2185
+ 04
2186
+ 01
2187
+ 96
2188
+ 97
2189
+ 03
2190
+ 120
2191
+ 1898
2192
+ 88
2193
+ 61
2194
+ 93
2195
+ 76
2196
+ 67
2197
+ 1899
2198
+ 02
2199
+ 63
2200
+ 1890
2201
+ 91
2202
+ 92
2203
+ 77
2204
+ 68
2205
+ 78
2206
+ 81
2207
+ 1895
2208
+ 1896
2209
+ 1897
2210
+ 700
2211
+ 69
2212
+ 74
2213
+ 94
2214
+ 71
2215
+ 84
2216
+ 73
2217
+ 82
2218
+ 1889
2219
+ 89
2220
+ 1893
2221
+ 1892
2222
+ 79
2223
+ 1894
2224
+ 86
2225
+ 1885
2226
+ 87
2227
+ 1891
2228
+ 83
2229
+ 1888
2230
+ 1000
2231
+ 1864
2232
+ 1865
2233
+ 1880
2234
+ 1887
2235
+ 1861
2236
+ 1862
2237
+ 1863
2238
+ 1886
2239
+ 1870
2240
+ 1884
2241
+ 1881
2242
+ 1882
2243
+ 1883
2244
+ 1878
2245
+ 110
2246
+ 1860
2247
+ 1876
2248
+ 1871
2249
+ 1879
2250
+ 1875
2251
+ 1867
2252
+ 1877
2253
+ 130
2254
+ 1872
2255
+ 1868
2256
+ 1874
2257
+ 1873
2258
+ 1866
2259
+ 900
2260
+ 1869
2261
+ 101
2262
+ 1850
2263
+ 1848
2264
+ 160
2265
+ 1859
2266
+ 1857
2267
+ 180
2268
+ 1854
2269
+ 1855
2270
+ 1858
2271
+ 140
2272
+ 350
2273
+ 1856
2274
+ 125
2275
+ 105
2276
+ 1852
2277
+ 1851
2278
+ 1840
2279
+ 1853
2280
+ 1849
2281
+ 1847
2282
+ 1846
2283
+ 102
2284
+ 360
2285
+ 1830
2286
+ 1845
2287
+ 104
2288
+ 750
2289
+ 1837
2290
+ 1844
2291
+ 103
2292
+ 1800
2293
+ 1841
2294
+ 1812
2295
+ 1838
2296
+ 1842
2297
+ 1839
2298
+ 1843
2299
+ 1836
2300
+ 106
2301
+ 1835
2302
+ 1832
2303
+ 450
2304
+ 1500
2305
+ 2019
2306
+ 220
2307
+ 107
2308
+ 115
2309
+ 1815
2310
+ 1834
2311
+ 108
2312
+ 170
2313
+ 1831
2314
+ 1814
2315
+ 1833
2316
+ 1820
2317
+ 111
2318
+ 112
2319
+ 240
2320
+ 1825
2321
+ 135
2322
+ 1828
2323
+ 109
2324
+ 1829
2325
+ 1824
2326
+ 1821
2327
+ 1810
2328
+ 230
2329
+ 190
2330
+ 128
2331
+ 3000
2332
+ 1826
2333
+ 1818
2334
+ 113
2335
+ 1813
2336
+ 1822
2337
+ 1827
2338
+ 1816
2339
+ 1793
2340
+ 1801
2341
+ 114
2342
+ 1806
2343
+ 1823
2344
+ 1817
2345
+ 1819
2346
+ 117
2347
+ 121
2348
+ 2020
2349
+ 1803
2350
+ 1809
2351
+ 175
2352
+ 210
2353
+ 116
2354
+ 118
2355
+ 127
2356
+ 1798
2357
+ 1808
2358
+ 1811
2359
+ 122
2360
+ 1805
2361
+ 123
2362
+ 1804
2363
+ 1794
2364
+ 1807
2365
+ 550
2366
+ 119
2367
+ 1790
2368
+ 1795
2369
+ 124
2370
+ 1792
2371
+ 280
2372
+ 5000
2373
+ 1802
2374
+ 260
2375
+ 320
2376
+ 1789
2377
+ 145
2378
+ 270
2379
+ 650
2380
+ 1799
2381
+ 1796
2382
+ 165
2383
+ 1776
2384
+ 126
2385
+ 132
2386
+ 1797
2387
+ 155
2388
+ 330
2389
+ 1775
2390
+ 1791
2391
+ 129
2392
+ 133
2393
+ 131
2394
+ 144
2395
+ 1200
2396
+ 1600
2397
+ 137
2398
+ 225
2399
+ 152
2400
+ 138
2401
+ 1780
2402
+ 134
2403
+ 1783
2404
+ 185
2405
+ 136
2406
+ 141
2407
+ 1788
2408
+ 850
2409
+ 340
2410
+ 1787
2411
+ 143
2412
+ 142
2413
+ 1777
2414
+ 501
2415
+ 205
2416
+ 1778
2417
+ 146
2418
+ 201
2419
+ 370
2420
+ 148
2421
+ 147
2422
+ 1784
2423
+ 151
2424
+ 1700
2425
+ 139
2426
+ 154
2427
+ 153
2428
+ 156
2429
+ 167
2430
+ 1781
2431
+ 202
2432
+ 1758
2433
+ 1782
2434
+ 168
2435
+ 380
2436
+ 310
2437
+ 290
2438
+ 1785
2439
+ 460
2440
+ 256
2441
+ 480
2442
+ 195
2443
+ 149
2444
+ 161
2445
+ 157
2446
+ 215
2447
+ 440
2448
+ 1786
2449
+ 420
2450
+ 1772
2451
+ 275
2452
+ 1774
2453
+ 192
2454
+ 1779
2455
+ 182
2456
+ 158
2457
+ 1770
2458
+ 235
2459
+ 162
2460
+ 163
2461
+ 164
2462
+ 1660
2463
+ 375
2464
+ 177
2465
+ 212
2466
+ 1750
2467
+ 171
2468
+ 172
2469
+ 1763
2470
+ 208
2471
+ 203
2472
+ 176
2473
+ 169
2474
+ 181
2475
+ 166
2476
+ 183
2477
+ 206
2478
+ 159
2479
+ 222
2480
+ 1760
2481
+ 188
2482
+ 301
2483
+ 410
2484
+ 211
2485
+ 178
2486
+ 365
2487
+ 209
2488
+ 173
2489
+ 187
2490
+ 174
2491
+ 1300
2492
+ 430
2493
+ 221
2494
+ 186
2495
+ 520
2496
+ 204
2497
+ 325
2498
+ 184
2499
+ 224
2500
+ 640
2501
+ 1768
2502
+ 610
2503
+ 207
2504
+ 191
2505
+ 213
2506
+ 1773
2507
+ 214
2508
+ 194
2509
+ 197
2510
+ 193
2511
+ 303
2512
+ 911
2513
+ 198
2514
+ 390
2515
+ 196
2516
+ 4000
2517
+ 540
2518
+ 216
2519
+ 231
2520
+ 179
2521
+ 950
2522
+ 217
2523
+ 305
2524
+ 189
2525
+ 265
2526
+ 219
2527
+ 255
2528
+ 1400
2529
+ 1769
2530
+ 232
2531
+ 1771
2532
+ 199
2533
+ 218
2534
+ 1765
2535
+ 223
2536
+ 1762
2537
+ 660
2538
+ 245
2539
+ 226
2540
+ 312
2541
+ 470
2542
+ 333
2543
+ 560
2544
+ 1761
2545
+ 1766
2546
+ 1755
2547
+ 1764
2548
+ 227
2549
+ 1767
2550
+ 1640
2551
+ 264
2552
+ 1759
2553
+ 295
2554
+ 1740
2555
+ 285
2556
+ 1745
2557
+ 1650
2558
+ 262
2559
+ 234
2560
+ 238
2561
+ 302
2562
+ 737
2563
+ 1100
2564
+ 233
2565
+ 254
2566
+ 228
2567
+ 490
2568
+ 241
2569
+ 1756
2570
+ 246
2571
+ 242
2572
+ 1648
2573
+ 251
2574
+ 1754
2575
+ 1715
2576
+ 1757
2577
+ 401
2578
+ 1689
2579
+ 229
2580
+ 625
2581
+ 720
2582
+ 243
2583
+ 252
2584
+ 315
2585
+ 281
2586
+ 313
2587
+ 287
2588
+ 253
2589
+ 1730
2590
+ 425
2591
+ 237
2592
+ 247
2593
+ 510
2594
+ 1644
2595
+ 530
2596
+ 311
2597
+ 1720
2598
+ 236
2599
+ 630
2600
+ 620
2601
+ 249
2602
+ 239
2603
+ 580
2604
+ 322
2605
+ 345
2606
+ 1753
2607
+ 1710
2608
+ 304
2609
+ 802
2610
+ 680
2611
+ 316
2612
+ 405
2613
+ 321
2614
+ 1661
2615
+ 1642
2616
+ 1688
2617
+ 435
2618
+ 244
2619
+ 272
2620
+ 308
2621
+ 1620
2622
+ 257
2623
+ 258
2624
+ 512
2625
+ 335
2626
+ 385
2627
+ 1751
2628
+ 261
2629
+ 1748
2630
+ 1746
2631
+ 1747
2632
+ 307
2633
+ 248
2634
+ 1680
2635
+ 306
2636
+ 760
2637
+ 395
2638
+ 415
2639
+ 1749
2640
+ 278
2641
+ 1752
2642
+ 1690
2643
+ 404
2644
+ 288
2645
+ 570
2646
+ 286
2647
+ 1630
2648
+ 1707
2649
+ 309
2650
+ 1685
2651
+ 271
2652
+ 2500
2653
+ 276
2654
+ 268
2655
+ 266
2656
+ 590
2657
+ 259
2658
+ 980
2659
+ 1714
2660
+ 263
2661
+ 328
2662
+ 1741
2663
+ 1727
2664
+ 273
2665
+ 747
2666
+ 323
2667
+ 267
2668
+ 283
2669
+ 1643
2670
+ 670
2671
+ 277
2672
+ 274
2673
+ 001
2674
+ 1743
2675
+ 525
2676
+ 1603
2677
+ 1725
2678
+ 2021
2679
+ 1641
2680
+ 1742
2681
+ 269
2682
+ 279
2683
+ 292
2684
+ 1610
2685
+ 1739
2686
+ 740
2687
+ 1744
2688
+ 412
2689
+ 999
2690
+ 1662
2691
+ 299
2692
+ 6000
2693
+ 1701
2694
+ 1735
2695
+ 1645
2696
+ 357
2697
+ 1550
2698
+ 1670
2699
+ 314
2700
+ 1625
2701
+ 282
2702
+ 355
2703
+ 1724
2704
+ 319
2705
+ 1649
2706
+ 1723
2707
+ 317
2708
+ 960
2709
+ 820
2710
+ 1722
2711
+ 1737
2712
+ 1702
2713
+ 1728
2714
+ 880
2715
+ 284
2716
+ 293
2717
+ 521
2718
+ 1718
2719
+ 318
2720
+ 1713
2721
+ 1621
2722
+ 289
2723
+ 291
2724
+ 1675
2725
+ 296
2726
+ 1733
2727
+ 324
2728
+ 298
2729
+ 1672
2730
+ 1708
2731
+ 1734
2732
+ 1666
2733
+ 1683
2734
+ 1635
2735
+ 406
2736
+ 1654
2737
+ 1638
2738
+ 297
2739
+ 356
2740
+ 411
2741
+ 417
2742
+ 1717
2743
+ 331
2744
+ 1540
2745
+ 1732
2746
+ 1667
2747
+ 875
2748
+ 710
2749
+ 1665
2750
+ 1721
2751
+ 910
2752
+ 1704
2753
+ 343
2754
+ 354
2755
+ 1629
2756
+ 338
2757
+ 1679
2758
+ 336
2759
+ 730
2760
+ 1738
2761
+ 441
2762
+ 402
2763
+ 1609
2764
+ 690
2765
+ 840
2766
+ 1622
2767
+ 294
2768
+ 451
2769
+ 1719
2770
+ 326
2771
+ 1736
2772
+ 1086
2773
+ 1605
2774
+ 403
2775
+ 1716
2776
+ 1632
2777
+ 475
2778
+ 1580
2779
+ 1659
2780
+ 1726
2781
+ 341
2782
+ 1703
2783
+ 1656
2784
+ 1655
2785
+ 1731
2786
+ 1729
2787
+ 1711
2788
+ 1712
2789
+ 327
2790
+ 351
2791
+ 1664
2792
+ 337
2793
+ 1634
2794
+ 1624
2795
+ 780
2796
+ 1692
2797
+ 1628
2798
+ 1697
2799
+ 1016
2800
+ 050
2801
+ 1699
2802
+ 1604
2803
+ 1611
2804
+ 1646
2805
+ 1626
2806
+ 1652
2807
+ 870
2808
+ 1570
2809
+ 352
2810
+ 407
2811
+ 1658
2812
+ 505
2813
+ 1709
2814
+ 339
2815
+ 1663
2816
+ 1618
2817
+ 1623
2818
+ 770
2819
+ 1651
2820
+ 1695
2821
+ 1560
2822
+ 1612
2823
+ 422
2824
+ 495
2825
+ 1653
2826
+ 1705
2827
+ 332
2828
+ 381
2829
+ 930
2830
+ 344
2831
+ 421
2832
+ 1682
2833
+ 555
2834
+ 334
2835
+ 329
utils.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import os
3
+ import colorlog
4
+ import random
5
+ import torch
6
+
7
+
8
+ def create_logger(folder, filename):
9
+ log_colors = {
10
+ 'DEBUG': 'blue',
11
+ 'INFO': 'white',
12
+ 'WARNING': 'green',
13
+ 'ERROR': 'red',
14
+ 'CRITICAL': 'yellow',
15
+ }
16
+
17
+ import logging
18
+ logger = logging.getLogger('ConZIC')
19
+ # %(filename)s$RESET:%(lineno)d
20
+ # LOGFORMAT = "%(log_color)s%(asctime)s [%(log_color)s%(filename)s:%(lineno)d] | %(log_color)s%(message)s%(reset)s |"
21
+ LOGFORMAT = ""
22
+ LOG_LEVEL = logging.DEBUG
23
+ logging.root.setLevel(LOG_LEVEL)
24
+ stream = logging.StreamHandler()
25
+ stream.setLevel(LOG_LEVEL)
26
+ stream.setFormatter(colorlog.ColoredFormatter(LOGFORMAT, datefmt='%d %H:%M', log_colors=log_colors))
27
+
28
+ # print to log file
29
+ hdlr = logging.FileHandler(os.path.join(folder, filename))
30
+ hdlr.setLevel(LOG_LEVEL)
31
+ # hdlr.setFormatter(logging.Formatter("[%(asctime)s] %(message)s"))
32
+ hdlr.setFormatter(logging.Formatter("%(message)s"))
33
+ logger.addHandler(hdlr)
34
+ logger.addHandler(stream)
35
+ return logger
36
+
37
+ def set_seed(seed):
38
+ random.seed(seed)
39
+ np.random.seed(seed)
40
+ torch.manual_seed(seed)
41
+ torch.cuda.manual_seed(seed)
42
+ torch.cuda.manual_seed_all(seed)
43
+ torch.backends.cudnn.deterministic = True
44
+ torch.backends.cudnn.benchmark = False
45
+
46
+ def get_init_text(tokenizer, seed_text, max_len, batch_size=1):
47
+ """ Get initial sentence by padding seed_text with [mask] words to max_len """
48
+ text = seed_text + tokenizer.mask_token * max_len
49
+ ids = tokenizer.encode(text)
50
+ batch = [ids] * batch_size
51
+ return batch
52
+
53
+ def update_token_mask(tokenizer, token_mask, max_len, index):
54
+ """ '.'(full stop) is only allowed in the last token position """
55
+ if index == max_len - 1:
56
+ token_mask[:, tokenizer.vocab['.']] = 1
57
+ else:
58
+ token_mask[:, tokenizer.vocab['.']] = 0
59
+ return token_mask
60
+
61
+ def format_output(sample_num, FinalCaption, BestCaption):
62
+ if sample_num == 1:
63
+ return f"{FinalCaption[0]}", f"{BestCaption[0]}"
64
+ elif sample_num ==2:
65
+ return f"{FinalCaption[0]}\n{FinalCaption[1]}", f"{BestCaption[0]}\n{BestCaption[1]}"
66
+ elif sample_num ==3:
67
+ return f"{FinalCaption[0]}\n{FinalCaption[1]}\n{FinalCaption[2]}",\
68
+ f"{BestCaption[0]}\n{BestCaption[1]}\n{BestCaption[2]}"
69
+ elif sample_num ==4:
70
+ return f"{FinalCaption[0]}\n{FinalCaption[1]}\n{FinalCaption[2]}\n{FinalCaption[3]}",\
71
+ f"{BestCaption[0]}\n{BestCaption[1]}\n{BestCaption[2]}\n{BestCaption[3]}"
72
+ else:
73
+ return f"{FinalCaption[0]}\n{FinalCaption[1]}\n{FinalCaption[2]}\n{FinalCaption[3]}\n{FinalCaption[4]}",\
74
+ f"{BestCaption[0]}\n{BestCaption[1]}\n{BestCaption[2]}\n{BestCaption[3]}\n{BestCaption[4]}"