Spaces:

anonauthors
/

SecretLanguage

Runtime error

App Files Files Community

anonymousauthors commited on Feb 10, 2023

Commit

30c7c90

•

1 Parent(s): 38b6dfd

updates

Browse files

Files changed (3) hide show

pages/0_📙_Dictionary_(Search).py +21 -24
pages/2_😈_Blackbox_Attack.py +134 -63
requirements.txt +2 -1

pages/0_📙_Dictionary_(Search).py CHANGED Viewed

@@ -62,7 +62,7 @@ if title in datas:
     # st.markdown(f"## {title}'s meaning in English[¹](#jump)")
     colored_header(
                 label=f"{title}'s meaning in English[¹](#jump)",
-                description="",
                 color_name="violet-70",
             )
     # write the meaning of input word
@@ -114,36 +114,33 @@ if title in datas:
             if all_sl[i] != '':
                 new_all_sl.append(all_sl[i].replace("\n", "/n").strip())
         all_sl = sorted(new_all_sl)
-        st.markdown(
-            f':red[{len(all_sl)}] secret languages of :blue[{title}] on {task.replace("paraphrase", "Paraphrase")}',
-            unsafe_allow_html=True)
-        special = '"'
-        _title_secret_languages = [
-            # f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task})'
-            f'<a href="#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task}">{i}</a>'
-            for i in all_sl]
-        st.markdown(
-            '>' + ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
-            unsafe_allow_html=True)
     present_sl_task(secret_language_by_task, 'NLI')
     present_sl_task(secret_language_by_task, 'QA')
     present_sl_task(secret_language_by_task, 'Paraphrase')
-    st.markdown(f"\**Hyperlinks only function when the corresponding tab is open. "
                 f"For example, the hyperlinks in the paraphrase section will only work when the paraphrase tab is open. However, due to the container property of Hugging Face Space, the hyperlinks might be not able to function.*")
-    st.markdown('\**Due to the grammatical properties of HTML, the layout of this page may vary.*')
-    # st.markdown(f"---")
-    # st.markdown(f"## Examples of replaced sentences")
-    # st.markdown(f"The number following the tasks represents the number of examples found for a particular task, which may be different from the number of secret languages.")
     colored_header(
                 label=f"Examples of replaced sentences",
-                description=f'The number following the tasks represents the number of examples found for a particular task, which may be different from the number of secret languages.',
                 color_name="orange-70",
             )
-    # st.text(','.join(title_secret_languages).replace('<s>', '\<s\>'))
-    # st.dataframe(dataframe)
     _num = Counter(data_title['tasks'])
     tab1, tab2, tab3 = st.tabs([f'NLI ({_num["NLI"]})', f'QA ({_num["QA"]})', f'Paraphrase ({_num["Paraphrase"]})'])
@@ -242,7 +239,7 @@ if title in datas:
                         _string += 'question**: :'
                     elif task == 'Paraphrase':
                         _string += 'sentence 1**: :'
-                    _string += f'red[{_all[_sl]["Replaced hypothesis"][j]}]'.replace('/', '\\').replace(___sl, f"<i><b>{___sl}</b></i>").replace(":", "[colon]")
                     if task == 'NLI':
                         _string += '<br> **Premise**: :'
                     elif task == 'QA':
@@ -273,8 +270,8 @@ if title in datas:
     with tab3:
         # present(dataframe, 'Paraphrase', title)
         present_dict(data_title, 'Paraphrase')
-    st.markdown(
-        f'<span id="jump">¹</span>*Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/).*',
-        unsafe_allow_html=True)
 else:
     st.error(f'{title} is not in the dictionary of Secret Language.', icon="⚠️")

     # st.markdown(f"## {title}'s meaning in English[¹](#jump)")
     colored_header(
                 label=f"{title}'s meaning in English[¹](#jump)",
+                description="**Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/)*",
                 color_name="violet-70",
             )
     # write the meaning of input word
             if all_sl[i] != '':
                 new_all_sl.append(all_sl[i].replace("\n", "/n").strip())
         all_sl = sorted(new_all_sl)
+        with st.expander(f'***{len(all_sl)}*** secret languages of ***{title}*** on {task.replace("paraphrase", "Paraphrase")}'):
+        # st.markdown(
+            # f':red[{len(all_sl)}] secret languages of :blue[{title}] on {task.replace("paraphrase", "Paraphrase")}',
+            # unsafe_allow_html=True)
+            special = '"'
+            _title_secret_languages = [
+                # f'[{i}](#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task})'
+                f'<a href="#{i.strip().replace("(", ",,").replace(")", "..").replace("[", ",,,").replace("]", "...").replace(special, "././")}_{task}">{i}</a>'
+                for i in all_sl]
+            st.markdown(
+                # '>' + ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
+                ', '.join(_title_secret_languages).replace('<s>', '\<s\>').replace('$', '\$').replace('~', '\~'),
+                unsafe_allow_html=True)
     present_sl_task(secret_language_by_task, 'NLI')
     present_sl_task(secret_language_by_task, 'QA')
     present_sl_task(secret_language_by_task, 'Paraphrase')
+    st.caption(f"\**Hyperlinks only function when the corresponding tab is open. "
                 f"For example, the hyperlinks in the paraphrase section will only work when the paraphrase tab is open. However, due to the container property of Hugging Face Space, the hyperlinks might be not able to function.*")
+    st.caption('\**Due to the grammatical properties of HTML, the layout of this page may vary.*')
     colored_header(
                 label=f"Examples of replaced sentences",
+                description=f'**The number following the tasks represents the number of examples found for a particular task, which may be different from the number of secret languages.*',
                 color_name="orange-70",
             )
     _num = Counter(data_title['tasks'])
     tab1, tab2, tab3 = st.tabs([f'NLI ({_num["NLI"]})', f'QA ({_num["QA"]})', f'Paraphrase ({_num["Paraphrase"]})'])
                         _string += 'question**: :'
                     elif task == 'Paraphrase':
                         _string += 'sentence 1**: :'
+                    _string += f'red[{_all[_sl]["Replaced hypothesis"][j]}]'.replace('/', '\\').replace(_sl, f"<i><b>{___sl}</b></i>").replace(":", "[colon]")
                     if task == 'NLI':
                         _string += '<br> **Premise**: :'
                     elif task == 'QA':
     with tab3:
         # present(dataframe, 'Paraphrase', title)
         present_dict(data_title, 'Paraphrase')
+    # st.markdown(
+    #     f'<span id="jump">¹</span>*Enlish meaning is supported by [PyDictionary](https://pypi.org/project/PyDictionary/).*',
+    #     unsafe_allow_html=True)
 else:
     st.error(f'{title} is not in the dictionary of Secret Language.', icon="⚠️")

pages/2_😈_Blackbox_Attack.py CHANGED Viewed

@@ -1,41 +1,60 @@
 import streamlit as st
-import os
 from streamlit_extras.stateful_button import button
-from transformers import GPT2Tokenizer, GPT2Model
-tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
-model = GPT2Model.from_pretrained('gpt2')
 import pickle
-all_keys = pickle.load(open('keys.pkl', 'rb'))
-all_keys = [i.strip() for i in all_keys]
 import torch
 from copy import deepcopy
 from time import time
-st.title('Blackbox Attack')
-st.sidebar.markdown('On this page, we offer a tool for generating replacement words using secret languages.')
 st.sidebar.markdown('#### Require ')
 st.sidebar.markdown('`Input text`: a sentence or paragraph.')
 st.sidebar.markdown('`Number of replacements`: the number of secret language samples.')
 st.sidebar.markdown('`Steps for searching Secret Langauge`: the steps in the SecretFinding process.')
 st.sidebar.markdown('#### Two methods')
-st.sidebar.markdown('1. GPT-2 (Searching secret languages based on GPT-2): this method calculates secret languages using [GPT-2](https://huggingface.co/gpt2).')
 st.sidebar.markdown('2. Use the secret language we found on ALBERT, DistillBERT, and Roberta: this method replaces words directly with the secret language dictionary derived from ALBERT, DistillBERT, and Roberta.')
-def run(model, _bar_text=None, bar=None, text='Which name is also used to describe the Amazon rainforest in English?', loss_funt=torch.nn.MSELoss(), lr=1, noise_mask=[1,2], restarts=10, step=100, device = torch.device('cpu')):
-    subword_num = model.wte.weight.shape[0]
     _input = tokenizer([text] * restarts, return_tensors="pt")
     for k in _input.keys():
         _input[k] = _input[k].to(device)
-    ori_output = model(**_input)['last_hidden_state']
-    ori_embedding = model.wte(_input['input_ids']).detach()
     ori_embedding.requires_grad = False
     ori_word_one_hot = torch.nn.functional.one_hot(_input['input_ids'].detach(), num_classes=subword_num).to(device)
@@ -52,16 +71,20 @@ def run(model, _bar_text=None, bar=None, text='Which name is also used to descri
         for i in range(len(noise_mask)):
             _tmp_perturbed_input = ori_word_one_hot[:, noise_mask[i]] + noise[:, i]
             _tmp_perturbed_input /= _tmp_perturbed_input.sum(-1, keepdim=True)
-            perturbed_embedding[:, noise_mask[i]] = torch.matmul(_tmp_perturbed_input, model.wte.weight)
         _input_['inputs_embeds'] = perturbed_embedding
-        outputs_perturbed = model(**_input_)['last_hidden_state']
         loss = loss_funt(ori_output, outputs_perturbed)
         loss.backward()
         noise.data = (noise.data - lr * noise.grad.detach())
         noise.grad.zero_()
-        _bar_text.text(f'{(time() - start_time) * (step - _i - 1) / (_i + 1):.2f} seconds left')
     # validate
     with torch.no_grad():
         perturbed_inputs = deepcopy(_input)
@@ -73,42 +96,15 @@ def run(model, _bar_text=None, bar=None, text='Which name is also used to descri
         perturbed_questions = []
         for i in range(restarts):
             perturbed_questions.append(tokenizer.decode(perturbed_inputs["input_ids"][i]).split("</s></s>")[0])
-    for i in range(len(perturbed_questions)):
-        for j in noise_mask:
-            _j = tokenizer.decode(perturbed_inputs["input_ids"][i][j])
-            # print(f'_j {_j}')
-            perturbed_questions[i] = perturbed_questions[i].replace(_j, f':red[{_j}]')
     return perturbed_questions
-from transformers import GPT2Tokenizer, GPT2Model
-tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
-model = GPT2Model.from_pretrained('gpt2')
-# encoded_input = tokenizer(text, return_tensors='pt')
-# output = model(**encoded_input)
-option = st.selectbox(
-    'Which method you would like to use?',
-    ('GPT-2 (Searching secret languages based on GPT-2)', 'Use the secret language we found on ALBERT, DistillBERT, and Roberta.')
-)
-def clf_keys():
-    for key in st.session_state.keys():
-        if key in ['tokenizer', 'start']:
-            st.session_state[key] = False
-        elif 'tokenizer_' in key:
-            del st.session_state[key]
-title = st.text_area('Input text.', 'Which name is also used to describe the Amazon rainforest in English?', on_change=clf_keys)
-if option == 'GPT-2 (Searching secret languages based on GPT-2)':
-    _cols = st.columns(2)
-    restarts = _cols[0].number_input('Number of replacements.', value=10, min_value=1, step=1, format='%d')
-    step = _cols[1].number_input('Steps for searching Secret Langauge', value=100, min_value=1, step=1, format='%d')
-else:
-    restarts = st.number_input('Number of replacements.', value=10, min_value=1, step=1, format='%d')
 def get_secret_language(title):
     if ord(title[0]) in list(range(48, 57)):
         file_name = 'num_dict.pkl'
@@ -129,7 +125,57 @@ def get_secret_language(title):
                 break
     return _sls_id
 if button('Tokenize', key='tokenizer'):
     for key in st.session_state.keys():
         if key not in ['tokenizer', 'start'] and 'tokenizer_' not in key:
             del st.session_state[key]
@@ -164,11 +210,31 @@ if button('Tokenize', key='tokenizer'):
                     if _index < len(input_ids):
                         chose_indices.append(_index)
         if len(chose_indices):
-            _bar_text = st.empty()
-            if option == 'GPT-2 (Searching secret languages based on GPT-2)':
                 bar = st.progress(0)
-            # st.markdown('start')
-                outputs = run(model, _bar_text=_bar_text, bar=bar, text=title, noise_mask=chose_indices, restarts=restarts, step=step)
             else:
                 _new_ids = []
                 _sl = {}
@@ -184,15 +250,20 @@ if button('Tokenize', key='tokenizer'):
                         else:
                             _tmp.append(input_ids[j])
                     _new_ids.append(_tmp)
-                # st.markdown(_new_ids)
                 outputs = [tokenizer.decode(_new_ids[i]).split('</s></s>')[0] for i in range(restarts)]
-                for i in range(len(outputs)):
-                    for j in _used_sl:
-                        _j = tokenizer.decode(j)
-                        outputs[i] = outputs[i].replace(_j, f':red[{_j}]')
             st.success(f'We found {restarts} replacements!', icon="✅")
-            st.markdown('<br>'.join(outputs), unsafe_allow_html=True)
         else:
             st.error('At least choose one subword.')

 import streamlit as st
 from streamlit_extras.stateful_button import button
+import os
+import openai
+from transformers import GPT2Tokenizer, GPT2Model, AutoTokenizer, AutoModelForCausalLM
 import pickle
 import torch
 from copy import deepcopy
 from time import time
+from transformers import pipeline, set_seed
+import platform
+# init
+openai.api_key = os.environ.get('openai_api_key')
+all_keys = pickle.load(open('keys.pkl', 'rb'))
+all_keys = [i.strip() for i in all_keys]
+set_seed(0)
+# sidebar instructions
+st.sidebar.markdown('On this page, we offer a tool for generating replacement words using secret languages.')
 st.sidebar.markdown('#### Require ')
 st.sidebar.markdown('`Input text`: a sentence or paragraph.')
 st.sidebar.markdown('`Number of replacements`: the number of secret language samples.')
 st.sidebar.markdown('`Steps for searching Secret Langauge`: the steps in the SecretFinding process.')
 st.sidebar.markdown('#### Two methods')
+st.sidebar.markdown('1. Searching secret languages based on models: this method calculates secret languages using [GPT-2](https://huggingface.co/gpt2), [EleutherAI/gpt-neo-1.3B](https://huggingface.co/EleutherAI/gpt-neo-1.3B), [EleutherAI/gpt-neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B), [EleutherAI/gpt-neox-20b](https://huggingface.co/EleutherAI/gpt-neox-20b), or [EleutherAI/gpt-j-6B](https://huggingface.co/EleutherAI/gpt-j-6B).')
 st.sidebar.markdown('2. Use the secret language we found on ALBERT, DistillBERT, and Roberta: this method replaces words directly with the secret language dictionary derived from ALBERT, DistillBERT, and Roberta.')
+st.sidebar.markdown('#### Return')
+st.sidebar.markdown(
+    'To see whether the white attack works on LLMs, we set seed to 0.'
+    )
+st.sidebar.markdown(
+    'To see whether the blackbox attack works on LLMs, we also add the response using [Codex](https://openai.com/blog/openai-codex/). '
+    'Specifically, we use the `code-davinci-002` model with 16 max_tokens responses.'
+    )
+# title
+st.title('Blackbox Attack')
+# online search
+def run(model, tokenizer, embedidng_layer=None, _bar_text=None, bar=None, text='Which name is also used to describe the Amazon rainforest in English?',
+    loss_funt=torch.nn.MSELoss(), lr=1, noise_mask=[1,2], restarts=10, step=100, device = torch.device('cpu'),
+    sl_paint_red=False, model_choice='GPT-2'):
+    subword_num = embedidng_layer.weight.shape[0]
     _input = tokenizer([text] * restarts, return_tensors="pt")
     for k in _input.keys():
         _input[k] = _input[k].to(device)
+    ori_output = model(**_input)
+    if 'last_hidden_state' in ori_output:
+        ori_output = ori_output['last_hidden_state']
+    else:
+        ori_output = ori_output['logits']
+    ori_embedding = embedidng_layer(_input['input_ids']).detach()
     ori_embedding.requires_grad = False
     ori_word_one_hot = torch.nn.functional.one_hot(_input['input_ids'].detach(), num_classes=subword_num).to(device)
         for i in range(len(noise_mask)):
             _tmp_perturbed_input = ori_word_one_hot[:, noise_mask[i]] + noise[:, i]
             _tmp_perturbed_input /= _tmp_perturbed_input.sum(-1, keepdim=True)
+            perturbed_embedding[:, noise_mask[i]] = torch.matmul(_tmp_perturbed_input, embedidng_layer.weight)
         _input_['inputs_embeds'] = perturbed_embedding
+        outputs_perturbed = model(**_input_)
+        if 'last_hidden_state' in outputs_perturbed:
+            outputs_perturbed = outputs_perturbed['last_hidden_state']
+        else:
+            outputs_perturbed = outputs_perturbed['logits']
         loss = loss_funt(ori_output, outputs_perturbed)
         loss.backward()
         noise.data = (noise.data - lr * noise.grad.detach())
         noise.grad.zero_()
+        _bar_text.text(f'Using {model_choice}, {(time() - start_time) * (step - _i - 1) / (_i + 1):.2f} seconds left')
     # validate
     with torch.no_grad():
         perturbed_inputs = deepcopy(_input)
         perturbed_questions = []
         for i in range(restarts):
             perturbed_questions.append(tokenizer.decode(perturbed_inputs["input_ids"][i]).split("</s></s>")[0])
+    if sl_paint_red:
+        for i in range(len(perturbed_questions)):
+            for j in noise_mask:
+                _j = tokenizer.decode(perturbed_inputs["input_ids"][i][j])
+                # print(f'_j {_j}')
+                perturbed_questions[i] = perturbed_questions[i].replace(_j, f':red[{_j}]')
     return perturbed_questions
+# get secret language using the found dictionary
 def get_secret_language(title):
     if ord(title[0]) in list(range(48, 57)):
         file_name = 'num_dict.pkl'
                 break
     return _sls_id
+# openai api
+def get_codex_response(prompt):
+    try:
+        response = openai.Completion.create(
+            engine='code-davinci-002',
+            prompt=prompt,
+            max_tokens=16,
+            temperature=0,
+            logprobs=1
+        )
+        output_openai = ''.join(response['choices'][0]['logprobs']['tokens'])
+    except Exception as ex:
+        output_openai = str(ex).replace('org-oOthbOAqOPamO9jhWBjUwDRa', '')
+    return output_openai
+# help function
+def clf_keys():
+    for key in st.session_state.keys():
+        if key in ['tokenizer', 'start']:
+            st.session_state[key] = False
+        elif 'tokenizer_' in key:
+            del st.session_state[key]
+# main page
+option = st.selectbox(
+    'Which method you would like to use?',
+    ('Searching secret languages based on models', 'Use the secret language we found on ALBERT, DistillBERT, and Roberta.')
+)
+title = st.text_area('Input text.', 'Which name is also used to describe the Amazon rainforest in English?', on_change=clf_keys)
+if option == 'Searching secret languages based on models':
+    model_choice = st.selectbox(
+        'Which model you would like to use?',
+        # ('GPT-2', "EleutherAI/gpt-neo-1.3B", "EleutherAI/gpt-neo-2.7B", "EleutherAI/gpt-neox-20b", "EleutherAI/gpt-j-6B")
+        ('GPT-2', "EleutherAI/gpt-neo-1.3B")
+    )
+    _cols = st.columns(2)
+    restarts = _cols[0].number_input('Number of replacements.', value=10, min_value=1, step=1, format='%d')
+    step = _cols[1].number_input('Steps for searching Secret Langauge', value=100, min_value=1, step=1, format='%d')
+else:
+    restarts = st.number_input('Number of replacements.', value=10, min_value=1, step=1, format='%d')
 if button('Tokenize', key='tokenizer'):
+    if option == 'Searching secret languages based on models':
+        if model_choice == 'GPT-2':
+            tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+        else:
+            tokenizer = AutoTokenizer.from_pretrained(model_choice)
+    else:
+        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
     for key in st.session_state.keys():
         if key not in ['tokenizer', 'start'] and 'tokenizer_' not in key:
             del st.session_state[key]
                     if _index < len(input_ids):
                         chose_indices.append(_index)
         if len(chose_indices):
+            if option == 'Searching secret languages based on models':
+                if model_choice == 'GPT-2':
+                    model = GPT2Model.from_pretrained('gpt2')
+                else:
+                    model = AutoModelForCausalLM.from_pretrained(model_choice)
+                generator = pipeline('text-generation', model='gpt2')
+                if not platform.system().lower() == 'darwin':
+                    generator1 = pipeline('text-generation', model='EleutherAI/gpt-neo-1.3B')
+            with st.expander('**Original input text**: '+ title):
+                    output_openai = get_codex_response(title)
+                    st.markdown(f'The response of GPT-2 with the prompt :blue[{title}]')
+                    st.markdown('<blockquote>' + generator(title, max_length=30, num_return_sequences=1)[0]['generated_text'].replace(title, '', 1) + '</blockquote>', unsafe_allow_html=True)
+                    if not platform.system().lower() == 'darwin':
+                        st.markdown(f'The response of {model_choice} with the prompt :blue[{title}]')
+                        st.markdown('<blockquote>' + generator1("EleutherAI has", do_sample=True, min_length=50)[0]['generated_text'].replace(title, '', 1) + '</blockquote>', unsafe_allow_html=True)
+                    st.markdown(f'The response of [Codex](https://openai.com/blog/openai-codex/) with the prompt :blue[{title}]')
+                    st.markdown('<blockquote>' + output_openai + '</blockquote>', unsafe_allow_html=True)
+            if option == 'Searching secret languages based on models':
+                _bar_text = st.empty()
                 bar = st.progress(0)
+                outputs = run(model, tokenizer, model.wte if model_choice == 'GPT-2' else model.transformer.wte,
+                    _bar_text=_bar_text, bar=bar, text=title, noise_mask=chose_indices, restarts=restarts, step=step,
+                    model_choice=model_choice)
             else:
                 _new_ids = []
                 _sl = {}
                         else:
                             _tmp.append(input_ids[j])
                     _new_ids.append(_tmp)
                 outputs = [tokenizer.decode(_new_ids[i]).split('</s></s>')[0] for i in range(restarts)]
+                if False:
+                    original_outputs = outputs
+                    for i in range(len(outputs)):
+                        for j in _used_sl:
+                            _j = tokenizer.decode(j)
+                            outputs[i] = outputs[i].replace(_j, f':red[{_j}]')
             st.success(f'We found {restarts} replacements!', icon="✅")
+            # st.markdown('<br>'.join(outputs), unsafe_allow_html=True)
+            for i in range(restarts):
+                with st.expander(outputs[i]):
+                    output_openai = get_codex_response(outputs[i])
+                    st.markdown(f'The response of [Codex](https://openai.com/blog/openai-codex/) with the prompt :blue[{outputs[i]}]')
+                    st.markdown('<blockquote>' + output_openai + '</blockquote>', unsafe_allow_html=True)
         else:
             st.error('At least choose one subword.')

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ pandas
 PyDictionary
 streamlit_extras
 transformers
-torch

 PyDictionary
 streamlit_extras
 transformers
+torch
+openai