Spaces:

taka-yamakoshi
/

causal-intervention-demo

Running

App Files Files Community

taka-yamakoshi commited on Mar 27, 2023

Commit

4ae941b

•

1 Parent(s): 0aa4961

aes

Browse files

Files changed (1) hide show

app.py +15 -12

app.py CHANGED Viewed

@@ -58,7 +58,7 @@ def clear_data():
         del st.session_state[key]
 def annotate_mask(sent_id,sent):
-    st.write(f'Sentence {sent_id}')
     input_sent = tokenizer(sent).input_ids
     decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
     st.session_state[f'decoded_sent_{sent_id}'] = decoded_sent
@@ -77,7 +77,7 @@ def annotate_mask(sent_id,sent):
                     mask_locs=st.session_state[f'mask_locs_{sent_id}'])
 def annotate_options(sent_id,sent):
-    st.write(f'Sentence {sent_id}')
     input_sent = tokenizer(sent).input_ids
     decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
     char_nums = [len(word)+2 for word in decoded_sent]
@@ -177,7 +177,7 @@ if __name__=='__main__':
         st.session_state['page_status'] = 'type_in'
     if st.session_state['page_status']=='type_in':
-        show_instruction('1. Type in the sentences and click "Tokenize"')
         sent_1 = st.text_input('Sentence 1',value="Paul tried to call George on the phone, but he wasn't successful.")
         sent_2 = st.text_input('Sentence 2',value="Paul tried to call George on the phone, but he wasn't available.")
         if st.button('Tokenize'):
@@ -190,7 +190,7 @@ if __name__=='__main__':
         sent_1 = st.session_state['sent_1']
         sent_2 = st.session_state['sent_2']
-        show_instruction('2. Select sites to mask out and click "Confirm"')
         annotate_mask(1,sent_1)
         annotate_mask(2,sent_2)
         if st.button('Confirm',key='mask'):
@@ -212,12 +212,12 @@ if __name__=='__main__':
         with main_area.container():
             sent_1 = st.session_state['sent_1']
             sent_2 = st.session_state['sent_2']
-            show_annotated_sentence(st.session_state['decoded_sent_1'],
-                                        option_locs=st.session_state['option_locs_1'],
-                                        mask_locs=st.session_state['mask_locs_1'])
-            show_annotated_sentence(st.session_state['decoded_sent_2'],
-                                        option_locs=st.session_state['option_locs_2'],
-                                        mask_locs=st.session_state['mask_locs_2'])
             option_1_locs, option_2_locs = {}, {}
             pron_locs = {}
@@ -256,7 +256,10 @@ if __name__=='__main__':
                                     [probs_original[0,1][0],probs_original[1,1][0]]],
                                     columns=[tokenizer.decode(option_1_tokens),tokenizer.decode(option_2_tokens)],
                                     index=['Sentence 1','Sentence 2'])
-            st.dataframe(df.style.highlight_max(axis=1))
             compare_1 = np.array(masked_ids_option_1['sent_1'])!=np.array(masked_ids_option_1['sent_2'])
             compare_2 = np.array(masked_ids_option_2['sent_1'])!=np.array(masked_ids_option_2['sent_2'])
@@ -268,7 +271,7 @@ if __name__=='__main__':
             assert np.all(np.array(option_1_locs['sent_1'])==np.array(option_1_locs['sent_2']))
             assert np.all(np.array(option_2_locs['sent_1'])==np.array(option_2_locs['sent_2']))
             token_id_list = pron_locs['sent_1'] + option_1_locs['sent_1'] + option_2_locs['sent_1'] + context_locs
-            st.write(token_id_list)
             effect_array = []
             for token_id in token_id_list:

         del st.session_state[key]
 def annotate_mask(sent_id,sent):
+    show_instruction(f'Sentence {sent_id}',fontsize=12)
     input_sent = tokenizer(sent).input_ids
     decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
     st.session_state[f'decoded_sent_{sent_id}'] = decoded_sent
                     mask_locs=st.session_state[f'mask_locs_{sent_id}'])
 def annotate_options(sent_id,sent):
+    show_instruction(f'Sentence {sent_id}',fontsize=12)
     input_sent = tokenizer(sent).input_ids
     decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
     char_nums = [len(word)+2 for word in decoded_sent]
         st.session_state['page_status'] = 'type_in'
     if st.session_state['page_status']=='type_in':
+        show_instruction('1. Type in the sentences and click "Tokenize"',fontsize=16)
         sent_1 = st.text_input('Sentence 1',value="Paul tried to call George on the phone, but he wasn't successful.")
         sent_2 = st.text_input('Sentence 2',value="Paul tried to call George on the phone, but he wasn't available.")
         if st.button('Tokenize'):
         sent_1 = st.session_state['sent_1']
         sent_2 = st.session_state['sent_2']
+        show_instruction('2. Select sites to mask out and click "Confirm"',fontsize=16)
         annotate_mask(1,sent_1)
         annotate_mask(2,sent_2)
         if st.button('Confirm',key='mask'):
         with main_area.container():
             sent_1 = st.session_state['sent_1']
             sent_2 = st.session_state['sent_2']
+            #show_annotated_sentence(st.session_state['decoded_sent_1'],
+            #                            option_locs=st.session_state['option_locs_1'],
+            #                            mask_locs=st.session_state['mask_locs_1'])
+            #show_annotated_sentence(st.session_state['decoded_sent_2'],
+            #                            option_locs=st.session_state['option_locs_2'],
+            #                            mask_locs=st.session_state['mask_locs_2'])
             option_1_locs, option_2_locs = {}, {}
             pron_locs = {}
                                     [probs_original[0,1][0],probs_original[1,1][0]]],
                                     columns=[tokenizer.decode(option_1_tokens),tokenizer.decode(option_2_tokens)],
                                     index=['Sentence 1','Sentence 2'])
+            cols = st.columns(3)
+            with cols[1]:
+                show_instruction('Prob. of predicting each option in each sentence',fontsize=12)
+                st.dataframe(df.style.highlight_max(axis=1),use_container_width=True)
             compare_1 = np.array(masked_ids_option_1['sent_1'])!=np.array(masked_ids_option_1['sent_2'])
             compare_2 = np.array(masked_ids_option_2['sent_1'])!=np.array(masked_ids_option_2['sent_2'])
             assert np.all(np.array(option_1_locs['sent_1'])==np.array(option_1_locs['sent_2']))
             assert np.all(np.array(option_2_locs['sent_1'])==np.array(option_2_locs['sent_2']))
             token_id_list = pron_locs['sent_1'] + option_1_locs['sent_1'] + option_2_locs['sent_1'] + context_locs
+            #st.write(token_id_list)
             effect_array = []
             for token_id in token_id_list: