emmatliu commited on
Commit
66afe1a
1 Parent(s): 694a2f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -20
app.py CHANGED
@@ -5,14 +5,14 @@ import numpy as np
5
  from biases_lexical_content import compute_lexical_content
6
  from ls_classifier import compute_sentiment_and_formality
7
  from agentic_classifier import compute_agentic_communal
8
- from hallucination_detection import detect_hallucinations
9
  from ttest import compute_ttest
10
 
11
  st.header("LLM Reference Letter Biases")
12
 
13
  st.write("**[(Wan et al., 2023)](https://arxiv.org/abs/2310.09219)** explores how gender biases manifest in the LLM generation of reference letters by analyzing the language style and lexical content of reference letters generated for female candidates compared to male candidates. For language style, we test for formality, positivity, and agency, and for lexical content, we identify and compare the most salient words in the body of female and male letters.")
14
  st.write("For analyzing language style and lexical content bias, your uploaded files should have a column called **'text'** which contains the LLM-generated reference letters.")
15
- st.write(" For analysis of hallucination bias, your uploaded files should also include an 'info' column associated with each generated letter which is the \"ground truth\" for that candidate against which hallucinations can be measured. Also, please run the files through the language style bias analysis first and use the resulting files.")
16
 
17
  cols = st.columns(2)
18
 
@@ -25,7 +25,7 @@ with cols[0]:
25
  if ltr_list_2_file is not None:
26
  ltr_list_2 = pd.read_csv(ltr_list_2_file)
27
  #st.write(ltr_list_2)
28
- analysis = st.selectbox("Choose analysis to run", ("Lexical Content Bias","Language Style Bias","Hallucination Bias"))
29
  b = st.button("Run analysis")
30
  with cols[1]:
31
  if b:
@@ -58,19 +58,19 @@ with cols[1]:
58
  st.subheader("T-test Values")
59
  results = compute_ttest(lsb_m, lsb_f)
60
  st.table(results)
61
- elif analysis == "Hallucination Bias":
62
- hal_f = detect_hallucinations(ltr_list_1)
63
- hal_m = detect_hallucinations(ltr_list_2)
64
 
65
- # Once we've detected the hallucinations, we now want to run the language style bias analysis on the results.
66
 
67
- hal_lsb_f = compute_agentic_communal(compute_sentiment_and_formality(hal_f, hallucination=True), hallucination=True)
68
- hal_lsb_m = compute_agentic_communal(compute_sentiment_and_formality(hal_m, hallucination=True), hallucination=True)
69
 
70
- # Finally, ttest
71
 
72
- results = compute_ttest(hal_lsb_m, hal_lsb_f, hallucination=True)
73
- st.table(results)
74
 
75
  st.write('----')
76
 
@@ -93,12 +93,12 @@ lc_alpaca = ['actor, listeners, fellowship, man, entertainer, needs, collection,
93
  'actress, grace, consummate, chops, none, beauty, game, consideration, future, up',
94
  'impeccable, beautiful, inspiring, illustrious, organizational, prepared, responsible, highest, ready, remarkable']
95
 
96
- hal_columns = ['(F) Formality T-test', '(M) Formality T-test', '(F) Positivity T-test', '(M) Positivity T-test',
97
- '(F) Agency T-test', '(M) Agency T-test']
98
- hal_gpt = [1.00, 1.28e-14, 1.00, 8.28e-09, 3.05e-12, 1.00]
99
- hal_alpaca = [4.20e-180, 1.00, 0.99, 6.05e-11, 4.28e-10, 1.00]
100
 
101
- tab_lc, tab_ls, tab_hal = st.tabs(['Lexical Content', 'Language Style', 'Hallucination'])
102
 
103
  with tab_lc:
104
  lc_df = pd.DataFrame([lc_gpt, lc_alpaca], columns=lc_columns, index=['ChatGPT','Alpaca'])
@@ -106,9 +106,9 @@ with tab_lc:
106
  with tab_ls:
107
  ls_df = pd.DataFrame([ls_gpt, ls_alpaca], columns=ls_columns, index=['ChatGPT','Alpaca'])
108
  st.dataframe(ls_df)
109
- with tab_hal:
110
- hal_df = pd.DataFrame([hal_gpt, hal_alpaca], columns = hal_columns, index=['ChatGPT','Alpaca'])
111
- st.dataframe(hal_df)
112
 
113
  st.write('----')
114
 
 
5
  from biases_lexical_content import compute_lexical_content
6
  from ls_classifier import compute_sentiment_and_formality
7
  from agentic_classifier import compute_agentic_communal
8
+ # from hallucination_detection import detect_hallucinations
9
  from ttest import compute_ttest
10
 
11
  st.header("LLM Reference Letter Biases")
12
 
13
  st.write("**[(Wan et al., 2023)](https://arxiv.org/abs/2310.09219)** explores how gender biases manifest in the LLM generation of reference letters by analyzing the language style and lexical content of reference letters generated for female candidates compared to male candidates. For language style, we test for formality, positivity, and agency, and for lexical content, we identify and compare the most salient words in the body of female and male letters.")
14
  st.write("For analyzing language style and lexical content bias, your uploaded files should have a column called **'text'** which contains the LLM-generated reference letters.")
15
+ st.write("It is currently not possible to run hallucination bias analysis due to memory constraints. Please see the [GitHub repository](https://github.com/uclanlp/biases-llm-reference-letters/) which explains how to run the analysis locally.")
16
 
17
  cols = st.columns(2)
18
 
 
25
  if ltr_list_2_file is not None:
26
  ltr_list_2 = pd.read_csv(ltr_list_2_file)
27
  #st.write(ltr_list_2)
28
+ analysis = st.selectbox("Choose analysis to run", ("Lexical Content Bias","Language Style Bias"))
29
  b = st.button("Run analysis")
30
  with cols[1]:
31
  if b:
 
58
  st.subheader("T-test Values")
59
  results = compute_ttest(lsb_m, lsb_f)
60
  st.table(results)
61
+ # elif analysis == "Hallucination Bias":
62
+ # hal_f = detect_hallucinations(ltr_list_1)
63
+ # hal_m = detect_hallucinations(ltr_list_2)
64
 
65
+ # # Once we've detected the hallucinations, we now want to run the language style bias analysis on the results.
66
 
67
+ # hal_lsb_f = compute_agentic_communal(compute_sentiment_and_formality(hal_f, hallucination=True), hallucination=True)
68
+ # hal_lsb_m = compute_agentic_communal(compute_sentiment_and_formality(hal_m, hallucination=True), hallucination=True)
69
 
70
+ # # Finally, ttest
71
 
72
+ # results = compute_ttest(hal_lsb_m, hal_lsb_f, hallucination=True)
73
+ # st.table(results)
74
 
75
  st.write('----')
76
 
 
93
  'actress, grace, consummate, chops, none, beauty, game, consideration, future, up',
94
  'impeccable, beautiful, inspiring, illustrious, organizational, prepared, responsible, highest, ready, remarkable']
95
 
96
+ # hal_columns = ['(F) Formality T-test', '(M) Formality T-test', '(F) Positivity T-test', '(M) Positivity T-test',
97
+ # '(F) Agency T-test', '(M) Agency T-test']
98
+ # hal_gpt = [1.00, 1.28e-14, 1.00, 8.28e-09, 3.05e-12, 1.00]
99
+ # hal_alpaca = [4.20e-180, 1.00, 0.99, 6.05e-11, 4.28e-10, 1.00]
100
 
101
+ tab_lc, tab_ls = st.tabs(['Lexical Content', 'Language Style'])
102
 
103
  with tab_lc:
104
  lc_df = pd.DataFrame([lc_gpt, lc_alpaca], columns=lc_columns, index=['ChatGPT','Alpaca'])
 
106
  with tab_ls:
107
  ls_df = pd.DataFrame([ls_gpt, ls_alpaca], columns=ls_columns, index=['ChatGPT','Alpaca'])
108
  st.dataframe(ls_df)
109
+ # with tab_hal:
110
+ # hal_df = pd.DataFrame([hal_gpt, hal_alpaca], columns = hal_columns, index=['ChatGPT','Alpaca'])
111
+ # st.dataframe(hal_df)
112
 
113
  st.write('----')
114