anonymousauthors commited on
Commit
30c7c90
1 Parent(s): 38b6dfd
pages/0_📙_Dictionary_(Search).py CHANGED
@@ -62,7 +62,7 @@ if title in datas:
62
  # st.markdown(f"## {title}'s meaning in English[¹](#jump)")
63
  colored_header(
64
  label=f"{title}'s meaning in English[¹](#jump)",
65
- description="",
66
  color_name="violet-70",
67
  )
68
  # write the meaning of input word
@@ -114,36 +114,33 @@ if title in datas:
114
  if all_sl[i] != '':
115
  new_all_sl.append(all_sl[i].replace("\n", "/n").strip())
116
  all_sl = sorted(new_all_sl)
117
- st.markdown(
118
- f':red[{len(all_sl)}] secret languages of :blue[{title}] on {task.replace("paraphrase", "Paraphrase")}',
119
- unsafe_allow_html=True)
120
- special = '"'
121
- _title_secret_languages = [
122
- # f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task})'
123
- f'<a href="#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task}">{i}</a>'
124
- for i in all_sl]
125
- st.markdown(
126
- '>' + ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
127
- unsafe_allow_html=True)
 
 
128
 
129
 
130
  present_sl_task(secret_language_by_task, 'NLI')
131
  present_sl_task(secret_language_by_task, 'QA')
132
  present_sl_task(secret_language_by_task, 'Paraphrase')
133
 
134
- st.markdown(f"\**Hyperlinks only function when the corresponding tab is open. "
135
  f"For example, the hyperlinks in the paraphrase section will only work when the paraphrase tab is open. However, due to the container property of Hugging Face Space, the hyperlinks might be not able to function.*")
136
- st.markdown('\**Due to the grammatical properties of HTML, the layout of this page may vary.*')
137
- # st.markdown(f"---")
138
- # st.markdown(f"## Examples of replaced sentences")
139
- # st.markdown(f"The number following the tasks represents the number of examples found for a particular task, which may be different from the number of secret languages.")
140
  colored_header(
141
  label=f"Examples of replaced sentences",
142
- description=f'The number following the tasks represents the number of examples found for a particular task, which may be different from the number of secret languages.',
143
  color_name="orange-70",
144
  )
145
- # st.text(','.join(title_secret_languages).replace('<s>', '\<s\>'))
146
- # st.dataframe(dataframe)
147
  _num = Counter(data_title['tasks'])
148
  tab1, tab2, tab3 = st.tabs([f'NLI ({_num["NLI"]})', f'QA ({_num["QA"]})', f'Paraphrase ({_num["Paraphrase"]})'])
149
 
@@ -242,7 +239,7 @@ if title in datas:
242
  _string += 'question**: :'
243
  elif task == 'Paraphrase':
244
  _string += 'sentence 1**: :'
245
- _string += f'red[{_all[_sl]["Replaced hypothesis"][j]}]'.replace('/', '\\').replace(___sl, f"<i><b>{___sl}</b></i>").replace(":", "[colon]")
246
  if task == 'NLI':
247
  _string += '<br> **Premise**: :'
248
  elif task == 'QA':
@@ -273,8 +270,8 @@ if title in datas:
273
  with tab3:
274
  # present(dataframe, 'Paraphrase', title)
275
  present_dict(data_title, 'Paraphrase')
276
- st.markdown(
277
- f'<span id="jump">¹</span>*Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/).*',
278
- unsafe_allow_html=True)
279
  else:
280
  st.error(f'{title} is not in the dictionary of Secret Language.', icon="⚠️")
 
62
  # st.markdown(f"## {title}'s meaning in English[¹](#jump)")
63
  colored_header(
64
  label=f"{title}'s meaning in English[¹](#jump)",
65
+ description="**Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/)*",
66
  color_name="violet-70",
67
  )
68
  # write the meaning of input word
 
114
  if all_sl[i] != '':
115
  new_all_sl.append(all_sl[i].replace("\n", "/n").strip())
116
  all_sl = sorted(new_all_sl)
117
+ with st.expander(f'***{len(all_sl)}*** secret languages of ***{title}*** on {task.replace("paraphrase", "Paraphrase")}'):
118
+ # st.markdown(
119
+ # f':red[{len(all_sl)}] secret languages of :blue[{title}] on {task.replace("paraphrase", "Paraphrase")}',
120
+ # unsafe_allow_html=True)
121
+ special = '"'
122
+ _title_secret_languages = [
123
+ # f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task})'
124
+ f'<a href="#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task}">{i}</a>'
125
+ for i in all_sl]
126
+ st.markdown(
127
+ # '>' + ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
128
+ ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
129
+ unsafe_allow_html=True)
130
 
131
 
132
  present_sl_task(secret_language_by_task, 'NLI')
133
  present_sl_task(secret_language_by_task, 'QA')
134
  present_sl_task(secret_language_by_task, 'Paraphrase')
135
 
136
+ st.caption(f"\**Hyperlinks only function when the corresponding tab is open. "
137
  f"For example, the hyperlinks in the paraphrase section will only work when the paraphrase tab is open. However, due to the container property of Hugging Face Space, the hyperlinks might be not able to function.*")
138
+ st.caption('\**Due to the grammatical properties of HTML, the layout of this page may vary.*')
 
 
 
139
  colored_header(
140
  label=f"Examples of replaced sentences",
141
+ description=f'**The number following the tasks represents the number of examples found for a particular task, which may be different from the number of secret languages.*',
142
  color_name="orange-70",
143
  )
 
 
144
  _num = Counter(data_title['tasks'])
145
  tab1, tab2, tab3 = st.tabs([f'NLI ({_num["NLI"]})', f'QA ({_num["QA"]})', f'Paraphrase ({_num["Paraphrase"]})'])
146
 
 
239
  _string += 'question**: :'
240
  elif task == 'Paraphrase':
241
  _string += 'sentence 1**: :'
242
+ _string += f'red[{_all[_sl]["Replaced hypothesis"][j]}]'.replace('/', '\\').replace(_sl, f"<i><b>{___sl}</b></i>").replace(":", "[colon]")
243
  if task == 'NLI':
244
  _string += '<br> **Premise**: :'
245
  elif task == 'QA':
 
270
  with tab3:
271
  # present(dataframe, 'Paraphrase', title)
272
  present_dict(data_title, 'Paraphrase')
273
+ # st.markdown(
274
+ # f'<span id="jump">¹</span>*Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/).*',
275
+ # unsafe_allow_html=True)
276
  else:
277
  st.error(f'{title} is not in the dictionary of Secret Language.', icon="⚠️")
pages/2_😈_Blackbox_Attack.py CHANGED
@@ -1,41 +1,60 @@
1
  import streamlit as st
2
- import os
3
  from streamlit_extras.stateful_button import button
4
-
5
- from transformers import GPT2Tokenizer, GPT2Model
6
- tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
7
- model = GPT2Model.from_pretrained('gpt2')
8
  import pickle
9
- all_keys = pickle.load(open('keys.pkl', 'rb'))
10
- all_keys = [i.strip() for i in all_keys]
11
  import torch
12
  from copy import deepcopy
13
  from time import time
 
 
14
 
15
- st.title('Blackbox Attack')
16
- st.sidebar.markdown('On this page, we offer a tool for generating replacement words using secret languages.')
 
 
17
 
 
 
 
18
  st.sidebar.markdown('#### Require ')
19
  st.sidebar.markdown('`Input text`: a sentence or paragraph.')
20
  st.sidebar.markdown('`Number of replacements`: the number of secret language samples.')
21
  st.sidebar.markdown('`Steps for searching Secret Langauge`: the steps in the SecretFinding process.')
22
-
23
  st.sidebar.markdown('#### Two methods')
24
- st.sidebar.markdown('1. GPT-2 (Searching secret languages based on GPT-2): this method calculates secret languages using [GPT-2](https://huggingface.co/gpt2).')
25
  st.sidebar.markdown('2. Use the secret language we found on ALBERT, DistillBERT, and Roberta: this method replaces words directly with the secret language dictionary derived from ALBERT, DistillBERT, and Roberta.')
26
 
 
 
 
 
 
 
 
 
27
 
 
 
28
 
29
- def run(model, _bar_text=None, bar=None, text='Which name is also used to describe the Amazon rainforest in English?', loss_funt=torch.nn.MSELoss(), lr=1, noise_mask=[1,2], restarts=10, step=100, device = torch.device('cpu')):
30
- subword_num = model.wte.weight.shape[0]
 
 
 
31
 
32
  _input = tokenizer([text] * restarts, return_tensors="pt")
33
  for k in _input.keys():
34
  _input[k] = _input[k].to(device)
35
 
36
- ori_output = model(**_input)['last_hidden_state']
 
 
 
 
37
 
38
- ori_embedding = model.wte(_input['input_ids']).detach()
39
  ori_embedding.requires_grad = False
40
  ori_word_one_hot = torch.nn.functional.one_hot(_input['input_ids'].detach(), num_classes=subword_num).to(device)
41
 
@@ -52,16 +71,20 @@ def run(model, _bar_text=None, bar=None, text='Which name is also used to descri
52
  for i in range(len(noise_mask)):
53
  _tmp_perturbed_input = ori_word_one_hot[:, noise_mask[i]] + noise[:, i]
54
  _tmp_perturbed_input /= _tmp_perturbed_input.sum(-1, keepdim=True)
55
- perturbed_embedding[:, noise_mask[i]] = torch.matmul(_tmp_perturbed_input, model.wte.weight)
56
 
57
  _input_['inputs_embeds'] = perturbed_embedding
58
- outputs_perturbed = model(**_input_)['last_hidden_state']
 
 
 
 
59
 
60
  loss = loss_funt(ori_output, outputs_perturbed)
61
  loss.backward()
62
  noise.data = (noise.data - lr * noise.grad.detach())
63
  noise.grad.zero_()
64
- _bar_text.text(f'{(time() - start_time) * (step - _i - 1) / (_i + 1):.2f} seconds left')
65
  # validate
66
  with torch.no_grad():
67
  perturbed_inputs = deepcopy(_input)
@@ -73,42 +96,15 @@ def run(model, _bar_text=None, bar=None, text='Which name is also used to descri
73
  perturbed_questions = []
74
  for i in range(restarts):
75
  perturbed_questions.append(tokenizer.decode(perturbed_inputs["input_ids"][i]).split("</s></s>")[0])
76
- for i in range(len(perturbed_questions)):
77
- for j in noise_mask:
78
- _j = tokenizer.decode(perturbed_inputs["input_ids"][i][j])
79
- # print(f'_j {_j}')
80
- perturbed_questions[i] = perturbed_questions[i].replace(_j, f':red[{_j}]')
 
81
  return perturbed_questions
82
 
83
-
84
- from transformers import GPT2Tokenizer, GPT2Model
85
- tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
86
- model = GPT2Model.from_pretrained('gpt2')
87
- # encoded_input = tokenizer(text, return_tensors='pt')
88
- # output = model(**encoded_input)
89
-
90
- option = st.selectbox(
91
- 'Which method you would like to use?',
92
- ('GPT-2 (Searching secret languages based on GPT-2)', 'Use the secret language we found on ALBERT, DistillBERT, and Roberta.')
93
- )
94
-
95
- def clf_keys():
96
- for key in st.session_state.keys():
97
- if key in ['tokenizer', 'start']:
98
- st.session_state[key] = False
99
- elif 'tokenizer_' in key:
100
- del st.session_state[key]
101
-
102
-
103
- title = st.text_area('Input text.', 'Which name is also used to describe the Amazon rainforest in English?', on_change=clf_keys)
104
-
105
- if option == 'GPT-2 (Searching secret languages based on GPT-2)':
106
- _cols = st.columns(2)
107
- restarts = _cols[0].number_input('Number of replacements.', value=10, min_value=1, step=1, format='%d')
108
- step = _cols[1].number_input('Steps for searching Secret Langauge', value=100, min_value=1, step=1, format='%d')
109
- else:
110
- restarts = st.number_input('Number of replacements.', value=10, min_value=1, step=1, format='%d')
111
-
112
  def get_secret_language(title):
113
  if ord(title[0]) in list(range(48, 57)):
114
  file_name = 'num_dict.pkl'
@@ -129,7 +125,57 @@ def get_secret_language(title):
129
  break
130
  return _sls_id
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  if button('Tokenize', key='tokenizer'):
 
 
 
 
 
 
 
133
  for key in st.session_state.keys():
134
  if key not in ['tokenizer', 'start'] and 'tokenizer_' not in key:
135
  del st.session_state[key]
@@ -164,11 +210,31 @@ if button('Tokenize', key='tokenizer'):
164
  if _index < len(input_ids):
165
  chose_indices.append(_index)
166
  if len(chose_indices):
167
- _bar_text = st.empty()
168
- if option == 'GPT-2 (Searching secret languages based on GPT-2)':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  bar = st.progress(0)
170
- # st.markdown('start')
171
- outputs = run(model, _bar_text=_bar_text, bar=bar, text=title, noise_mask=chose_indices, restarts=restarts, step=step)
 
172
  else:
173
  _new_ids = []
174
  _sl = {}
@@ -184,15 +250,20 @@ if button('Tokenize', key='tokenizer'):
184
  else:
185
  _tmp.append(input_ids[j])
186
  _new_ids.append(_tmp)
187
- # st.markdown(_new_ids)
188
  outputs = [tokenizer.decode(_new_ids[i]).split('</s></s>')[0] for i in range(restarts)]
189
- for i in range(len(outputs)):
190
- for j in _used_sl:
191
- _j = tokenizer.decode(j)
192
- outputs[i] = outputs[i].replace(_j, f':red[{_j}]')
193
-
 
194
  st.success(f'We found {restarts} replacements!', icon="✅")
195
- st.markdown('<br>'.join(outputs), unsafe_allow_html=True)
 
 
 
 
 
196
  else:
197
  st.error('At least choose one subword.')
198
 
 
1
  import streamlit as st
 
2
  from streamlit_extras.stateful_button import button
3
+ import os
4
+ import openai
5
+ from transformers import GPT2Tokenizer, GPT2Model, AutoTokenizer, AutoModelForCausalLM
 
6
  import pickle
 
 
7
  import torch
8
  from copy import deepcopy
9
  from time import time
10
+ from transformers import pipeline, set_seed
11
+ import platform
12
 
13
+ # init
14
+ openai.api_key = os.environ.get('openai_api_key')
15
+ all_keys = pickle.load(open('keys.pkl', 'rb'))
16
+ all_keys = [i.strip() for i in all_keys]
17
 
18
+ set_seed(0)
19
+ # sidebar instructions
20
+ st.sidebar.markdown('On this page, we offer a tool for generating replacement words using secret languages.')
21
  st.sidebar.markdown('#### Require ')
22
  st.sidebar.markdown('`Input text`: a sentence or paragraph.')
23
  st.sidebar.markdown('`Number of replacements`: the number of secret language samples.')
24
  st.sidebar.markdown('`Steps for searching Secret Langauge`: the steps in the SecretFinding process.')
 
25
  st.sidebar.markdown('#### Two methods')
26
+ st.sidebar.markdown('1. Searching secret languages based on models: this method calculates secret languages using [GPT-2](https://huggingface.co/gpt2), [EleutherAI/gpt-neo-1.3B](https://huggingface.co/EleutherAI/gpt-neo-1.3B), [EleutherAI/gpt-neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B), [EleutherAI/gpt-neox-20b](https://huggingface.co/EleutherAI/gpt-neox-20b), or [EleutherAI/gpt-j-6B](https://huggingface.co/EleutherAI/gpt-j-6B).')
27
  st.sidebar.markdown('2. Use the secret language we found on ALBERT, DistillBERT, and Roberta: this method replaces words directly with the secret language dictionary derived from ALBERT, DistillBERT, and Roberta.')
28
 
29
+ st.sidebar.markdown('#### Return')
30
+ st.sidebar.markdown(
31
+ 'To see whether the white attack works on LLMs, we set seed to 0.'
32
+ )
33
+ st.sidebar.markdown(
34
+ 'To see whether the blackbox attack works on LLMs, we also add the response using [Codex](https://openai.com/blog/openai-codex/). '
35
+ 'Specifically, we use the `code-davinci-002` model with 16 max_tokens responses.'
36
+ )
37
 
38
+ # title
39
+ st.title('Blackbox Attack')
40
 
41
+ # online search
42
+ def run(model, tokenizer, embedidng_layer=None, _bar_text=None, bar=None, text='Which name is also used to describe the Amazon rainforest in English?',
43
+ loss_funt=torch.nn.MSELoss(), lr=1, noise_mask=[1,2], restarts=10, step=100, device = torch.device('cpu'),
44
+ sl_paint_red=False, model_choice='GPT-2'):
45
+ subword_num = embedidng_layer.weight.shape[0]
46
 
47
  _input = tokenizer([text] * restarts, return_tensors="pt")
48
  for k in _input.keys():
49
  _input[k] = _input[k].to(device)
50
 
51
+ ori_output = model(**_input)
52
+ if 'last_hidden_state' in ori_output:
53
+ ori_output = ori_output['last_hidden_state']
54
+ else:
55
+ ori_output = ori_output['logits']
56
 
57
+ ori_embedding = embedidng_layer(_input['input_ids']).detach()
58
  ori_embedding.requires_grad = False
59
  ori_word_one_hot = torch.nn.functional.one_hot(_input['input_ids'].detach(), num_classes=subword_num).to(device)
60
 
 
71
  for i in range(len(noise_mask)):
72
  _tmp_perturbed_input = ori_word_one_hot[:, noise_mask[i]] + noise[:, i]
73
  _tmp_perturbed_input /= _tmp_perturbed_input.sum(-1, keepdim=True)
74
+ perturbed_embedding[:, noise_mask[i]] = torch.matmul(_tmp_perturbed_input, embedidng_layer.weight)
75
 
76
  _input_['inputs_embeds'] = perturbed_embedding
77
+ outputs_perturbed = model(**_input_)
78
+ if 'last_hidden_state' in outputs_perturbed:
79
+ outputs_perturbed = outputs_perturbed['last_hidden_state']
80
+ else:
81
+ outputs_perturbed = outputs_perturbed['logits']
82
 
83
  loss = loss_funt(ori_output, outputs_perturbed)
84
  loss.backward()
85
  noise.data = (noise.data - lr * noise.grad.detach())
86
  noise.grad.zero_()
87
+ _bar_text.text(f'Using {model_choice}, {(time() - start_time) * (step - _i - 1) / (_i + 1):.2f} seconds left')
88
  # validate
89
  with torch.no_grad():
90
  perturbed_inputs = deepcopy(_input)
 
96
  perturbed_questions = []
97
  for i in range(restarts):
98
  perturbed_questions.append(tokenizer.decode(perturbed_inputs["input_ids"][i]).split("</s></s>")[0])
99
+ if sl_paint_red:
100
+ for i in range(len(perturbed_questions)):
101
+ for j in noise_mask:
102
+ _j = tokenizer.decode(perturbed_inputs["input_ids"][i][j])
103
+ # print(f'_j {_j}')
104
+ perturbed_questions[i] = perturbed_questions[i].replace(_j, f':red[{_j}]')
105
  return perturbed_questions
106
 
107
+ # get secret language using the found dictionary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  def get_secret_language(title):
109
  if ord(title[0]) in list(range(48, 57)):
110
  file_name = 'num_dict.pkl'
 
125
  break
126
  return _sls_id
127
 
128
+ # openai api
129
+ def get_codex_response(prompt):
130
+ try:
131
+ response = openai.Completion.create(
132
+ engine='code-davinci-002',
133
+ prompt=prompt,
134
+ max_tokens=16,
135
+ temperature=0,
136
+ logprobs=1
137
+ )
138
+ output_openai = ''.join(response['choices'][0]['logprobs']['tokens'])
139
+ except Exception as ex:
140
+ output_openai = str(ex).replace('org-oOthbOAqOPamO9jhWBjUwDRa', '')
141
+ return output_openai
142
+
143
+ # help function
144
+ def clf_keys():
145
+ for key in st.session_state.keys():
146
+ if key in ['tokenizer', 'start']:
147
+ st.session_state[key] = False
148
+ elif 'tokenizer_' in key:
149
+ del st.session_state[key]
150
+
151
+ # main page
152
+ option = st.selectbox(
153
+ 'Which method you would like to use?',
154
+ ('Searching secret languages based on models', 'Use the secret language we found on ALBERT, DistillBERT, and Roberta.')
155
+ )
156
+
157
+ title = st.text_area('Input text.', 'Which name is also used to describe the Amazon rainforest in English?', on_change=clf_keys)
158
+
159
+ if option == 'Searching secret languages based on models':
160
+ model_choice = st.selectbox(
161
+ 'Which model you would like to use?',
162
+ # ('GPT-2', "EleutherAI/gpt-neo-1.3B", "EleutherAI/gpt-neo-2.7B", "EleutherAI/gpt-neox-20b", "EleutherAI/gpt-j-6B")
163
+ ('GPT-2', "EleutherAI/gpt-neo-1.3B")
164
+ )
165
+ _cols = st.columns(2)
166
+ restarts = _cols[0].number_input('Number of replacements.', value=10, min_value=1, step=1, format='%d')
167
+ step = _cols[1].number_input('Steps for searching Secret Langauge', value=100, min_value=1, step=1, format='%d')
168
+ else:
169
+ restarts = st.number_input('Number of replacements.', value=10, min_value=1, step=1, format='%d')
170
+
171
  if button('Tokenize', key='tokenizer'):
172
+ if option == 'Searching secret languages based on models':
173
+ if model_choice == 'GPT-2':
174
+ tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
175
+ else:
176
+ tokenizer = AutoTokenizer.from_pretrained(model_choice)
177
+ else:
178
+ tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
179
  for key in st.session_state.keys():
180
  if key not in ['tokenizer', 'start'] and 'tokenizer_' not in key:
181
  del st.session_state[key]
 
210
  if _index < len(input_ids):
211
  chose_indices.append(_index)
212
  if len(chose_indices):
213
+ if option == 'Searching secret languages based on models':
214
+ if model_choice == 'GPT-2':
215
+ model = GPT2Model.from_pretrained('gpt2')
216
+ else:
217
+ model = AutoModelForCausalLM.from_pretrained(model_choice)
218
+ generator = pipeline('text-generation', model='gpt2')
219
+ if not platform.system().lower() == 'darwin':
220
+ generator1 = pipeline('text-generation', model='EleutherAI/gpt-neo-1.3B')
221
+ with st.expander('**Original input text**: '+ title):
222
+ output_openai = get_codex_response(title)
223
+ st.markdown(f'The response of GPT-2 with the prompt :blue[{title}]')
224
+ st.markdown('<blockquote>' + generator(title, max_length=30, num_return_sequences=1)[0]['generated_text'].replace(title, '', 1) + '</blockquote>', unsafe_allow_html=True)
225
+ if not platform.system().lower() == 'darwin':
226
+ st.markdown(f'The response of {model_choice} with the prompt :blue[{title}]')
227
+ st.markdown('<blockquote>' + generator1("EleutherAI has", do_sample=True, min_length=50)[0]['generated_text'].replace(title, '', 1) + '</blockquote>', unsafe_allow_html=True)
228
+
229
+ st.markdown(f'The response of [Codex](https://openai.com/blog/openai-codex/) with the prompt :blue[{title}]')
230
+ st.markdown('<blockquote>' + output_openai + '</blockquote>', unsafe_allow_html=True)
231
+
232
+ if option == 'Searching secret languages based on models':
233
+ _bar_text = st.empty()
234
  bar = st.progress(0)
235
+ outputs = run(model, tokenizer, model.wte if model_choice == 'GPT-2' else model.transformer.wte,
236
+ _bar_text=_bar_text, bar=bar, text=title, noise_mask=chose_indices, restarts=restarts, step=step,
237
+ model_choice=model_choice)
238
  else:
239
  _new_ids = []
240
  _sl = {}
 
250
  else:
251
  _tmp.append(input_ids[j])
252
  _new_ids.append(_tmp)
 
253
  outputs = [tokenizer.decode(_new_ids[i]).split('</s></s>')[0] for i in range(restarts)]
254
+ if False:
255
+ original_outputs = outputs
256
+ for i in range(len(outputs)):
257
+ for j in _used_sl:
258
+ _j = tokenizer.decode(j)
259
+ outputs[i] = outputs[i].replace(_j, f':red[{_j}]')
260
  st.success(f'We found {restarts} replacements!', icon="✅")
261
+ # st.markdown('<br>'.join(outputs), unsafe_allow_html=True)
262
+ for i in range(restarts):
263
+ with st.expander(outputs[i]):
264
+ output_openai = get_codex_response(outputs[i])
265
+ st.markdown(f'The response of [Codex](https://openai.com/blog/openai-codex/) with the prompt :blue[{outputs[i]}]')
266
+ st.markdown('<blockquote>' + output_openai + '</blockquote>', unsafe_allow_html=True)
267
  else:
268
  st.error('At least choose one subword.')
269
 
requirements.txt CHANGED
@@ -2,4 +2,5 @@ pandas
2
  PyDictionary
3
  streamlit_extras
4
  transformers
5
- torch
 
 
2
  PyDictionary
3
  streamlit_extras
4
  transformers
5
+ torch
6
+ openai