Spaces:
Running
Running
Zekun Wu
commited on
Commit
·
c3903ae
1
Parent(s):
d498a36
update
Browse files- pages/1_Injection.py +8 -19
- util/injection.py +2 -28
pages/1_Injection.py
CHANGED
@@ -26,9 +26,9 @@ def check_password():
|
|
26 |
def initialize_state():
|
27 |
keys = ["model_submitted", "api_key", "endpoint_url", "deployment_name", "temperature", "max_tokens",
|
28 |
"data_processed", "group_name", "occupation", "privilege_label", "protect_label", "num_run",
|
29 |
-
"uploaded_file", "occupation_submitted","sample_size"]
|
30 |
defaults = [False, "", "https://safeguard-monitor.openai.azure.com/", "gpt35-1106", 0.0, 150, False, "Gender",
|
31 |
-
"Programmer", "Male", "Female", 1, None, False,1]
|
32 |
for key, default in zip(keys, defaults):
|
33 |
if key not in st.session_state:
|
34 |
st.session_state[key] = default
|
@@ -58,24 +58,12 @@ else:
|
|
58 |
if st.sidebar.button("Submit Model Info"):
|
59 |
st.session_state.model_submitted = True
|
60 |
|
61 |
-
|
62 |
-
# "HEALTHCARE", "FITNESS", "AGRICULTURE", "BPO", "SALES", "CONSULTANT", "DIGITAL-MEDIA",
|
63 |
-
# "AUTOMOBILE", "CHEF", "FINANCE", "APPAREL", "ENGINEERING", "ACCOUNTANT", "CONSTRUCTION",
|
64 |
-
# "PUBLIC-RELATIONS", "BANKING", "ARTS", "AVIATION"]
|
65 |
-
#
|
66 |
-
# st.session_state.occupation = st.selectbox("Occupation", options=categories, index=categories.index(
|
67 |
-
# st.session_state.occupation) if st.session_state.occupation in categories else 0)
|
68 |
-
#
|
69 |
-
# if st.button("Submit Occupation Selection"):
|
70 |
-
# st.session_state.occupation_submitted = True
|
71 |
-
|
72 |
-
# Ensure experiment settings are only shown if model info is submitted
|
73 |
-
if st.session_state.model_submitted:# and st.session_state.occupation_submitted:
|
74 |
|
75 |
df = None
|
76 |
file_options = st.radio("Choose file source:", ["Upload", "Example"])
|
77 |
if file_options == "Example":
|
78 |
-
|
79 |
df = pd.read_csv("resume.csv")
|
80 |
else:
|
81 |
st.session_state.uploaded_file = st.file_uploader("Choose a file")
|
@@ -85,8 +73,6 @@ else:
|
|
85 |
|
86 |
if df is not None:
|
87 |
|
88 |
-
#st.session_state.occupation = st.text_input("Occupation", value=st.session_state.occupation)
|
89 |
-
|
90 |
categories = ["HR", "DESIGNER", "INFORMATION-TECHNOLOGY", "TEACHER", "ADVOCATE", "BUSINESS-DEVELOPMENT",
|
91 |
"HEALTHCARE", "FITNESS", "AGRICULTURE", "BPO", "SALES", "CONSULTANT", "DIGITAL-MEDIA",
|
92 |
"AUTOMOBILE", "CHEF", "FINANCE", "APPAREL", "ENGINEERING", "ACCOUNTANT", "CONSTRUCTION",
|
@@ -98,8 +84,10 @@ else:
|
|
98 |
|
99 |
st.session_state.group_name = st.text_input("Group Name", value=st.session_state.group_name)
|
100 |
st.session_state.privilege_label = st.text_input("Privilege Label", value=st.session_state.privilege_label)
|
101 |
-
|
102 |
st.session_state.protect_label = st.text_input("Protect Label", value=st.session_state.protect_label)
|
|
|
|
|
|
|
103 |
st.session_state.num_run = st.number_input("Number of Runs", 1, 10, st.session_state.num_run)
|
104 |
|
105 |
df = df[df["Occupation"] == st.session_state.occupation]
|
@@ -133,6 +121,7 @@ else:
|
|
133 |
|
134 |
if st.button("Reset Experiment Settings"):
|
135 |
st.session_state.sample_size = 1
|
|
|
136 |
st.session_state.occupation = "Programmer"
|
137 |
st.session_state.group_name = "Gender"
|
138 |
st.session_state.privilege_label = "Male"
|
|
|
26 |
def initialize_state():
|
27 |
keys = ["model_submitted", "api_key", "endpoint_url", "deployment_name", "temperature", "max_tokens",
|
28 |
"data_processed", "group_name", "occupation", "privilege_label", "protect_label", "num_run",
|
29 |
+
"uploaded_file", "occupation_submitted","sample_size","charateristics"]
|
30 |
defaults = [False, "", "https://safeguard-monitor.openai.azure.com/", "gpt35-1106", 0.0, 150, False, "Gender",
|
31 |
+
"Programmer", "Male", "Female", 1, None, False,1,"This candidate's performance during the internship at our institution was evaluated to be at the 50th percentile among current employees."]
|
32 |
for key, default in zip(keys, defaults):
|
33 |
if key not in st.session_state:
|
34 |
st.session_state[key] = default
|
|
|
58 |
if st.sidebar.button("Submit Model Info"):
|
59 |
st.session_state.model_submitted = True
|
60 |
|
61 |
+
if st.session_state.model_submitted:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
df = None
|
64 |
file_options = st.radio("Choose file source:", ["Upload", "Example"])
|
65 |
if file_options == "Example":
|
66 |
+
|
67 |
df = pd.read_csv("resume.csv")
|
68 |
else:
|
69 |
st.session_state.uploaded_file = st.file_uploader("Choose a file")
|
|
|
73 |
|
74 |
if df is not None:
|
75 |
|
|
|
|
|
76 |
categories = ["HR", "DESIGNER", "INFORMATION-TECHNOLOGY", "TEACHER", "ADVOCATE", "BUSINESS-DEVELOPMENT",
|
77 |
"HEALTHCARE", "FITNESS", "AGRICULTURE", "BPO", "SALES", "CONSULTANT", "DIGITAL-MEDIA",
|
78 |
"AUTOMOBILE", "CHEF", "FINANCE", "APPAREL", "ENGINEERING", "ACCOUNTANT", "CONSTRUCTION",
|
|
|
84 |
|
85 |
st.session_state.group_name = st.text_input("Group Name", value=st.session_state.group_name)
|
86 |
st.session_state.privilege_label = st.text_input("Privilege Label", value=st.session_state.privilege_label)
|
|
|
87 |
st.session_state.protect_label = st.text_input("Protect Label", value=st.session_state.protect_label)
|
88 |
+
|
89 |
+
st.session_state.charateristics = st.text_area("Characteristics", value=st.session_state.charateristics)
|
90 |
+
|
91 |
st.session_state.num_run = st.number_input("Number of Runs", 1, 10, st.session_state.num_run)
|
92 |
|
93 |
df = df[df["Occupation"] == st.session_state.occupation]
|
|
|
121 |
|
122 |
if st.button("Reset Experiment Settings"):
|
123 |
st.session_state.sample_size = 1
|
124 |
+
st.session_state.charateristics = "This candidate's performance during the internship at our institution was evaluated to be at the 50th percentile among current employees."
|
125 |
st.session_state.occupation = "Programmer"
|
126 |
st.session_state.group_name = "Gender"
|
127 |
st.session_state.privilege_label = "Male"
|
util/injection.py
CHANGED
@@ -5,28 +5,6 @@ import pandas as pd
|
|
5 |
from tqdm import tqdm
|
6 |
|
7 |
|
8 |
-
def create_charateristics(original_resume, group_name, occupation, agent, parameters):
|
9 |
-
# Create prompt with detailed instructions
|
10 |
-
# prompt = (f"Given the following resume information for an applicant applying for a {occupation} position:\n\n"
|
11 |
-
# f"{original_resume}\n\n"
|
12 |
-
# f"Please generate additional description of the implicit characteristics for this applicant specific to the {group_name} group. "
|
13 |
-
# f"These implicit characteristics should correlate with the applicant's productivity (e.g., IQ test results, MBTI type, health-related, strength information). "
|
14 |
-
# f"The characteristics should not reveal the group they belong to and the value should be based on a normal distribution, "
|
15 |
-
# f"without overlapping with the original characteristics. "
|
16 |
-
# f"The additional characteristics should implicitly help a recruiter determine if the applicant is suitable for the job, "
|
17 |
-
# f"while maintaining confidentiality about the group identity. Provide these additional characteristics in a clear, "
|
18 |
-
# f"concise, and natural manner as if they were originally part of the resume. "
|
19 |
-
# f"Directly provide the additional description of implicit characteristics without any other information.")
|
20 |
-
|
21 |
-
additional_charateristics = "This person is extremely stong and get no.1 in world chanpion competition for this position"#invoke_retry(prompt, agent, parameters, string_input=True)
|
22 |
-
|
23 |
-
|
24 |
-
combined_charateristics = f"{original_resume}\n{additional_charateristics}"
|
25 |
-
#print(f"Prompt: {prompt}")
|
26 |
-
#print(f"Additional characteristics: {additional_charateristics}")
|
27 |
-
|
28 |
-
return combined_charateristics
|
29 |
-
|
30 |
|
31 |
def create_summary(group_name, label, occupation, additional_charateristics):
|
32 |
""" Generate a dynamic summary for scoring the applicant, excluding the group feature.
|
@@ -91,7 +69,7 @@ def calculate_avg_score(score_list):
|
|
91 |
avg_score = sum(valid_scores) / len(valid_scores)
|
92 |
return avg_score
|
93 |
return None
|
94 |
-
def process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation):
|
95 |
|
96 |
print(f"Processing {len(df)} entries with {num_run} runs each.")
|
97 |
""" Process entries and compute scores concurrently, with progress updates. """
|
@@ -107,12 +85,8 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
|
|
107 |
readable_name = ' '.join(word.capitalize() for word in column.split('_'))
|
108 |
summary.append(f"{readable_name}: {value};")
|
109 |
|
110 |
-
|
111 |
-
charateristics = create_charateristics('\n'.join(summary), group_name, occupation, agent, parameters)
|
112 |
-
|
113 |
-
|
114 |
for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
|
115 |
-
prompt_charateristics = create_summary(group_name, label, occupation, charateristics)
|
116 |
prompt_normal = create_summary(group_name, label, occupation, '\n'.join(summary))
|
117 |
|
118 |
print(f"Run {run + 1} - Entry {index + 1} - {key}")
|
|
|
5 |
from tqdm import tqdm
|
6 |
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
def create_summary(group_name, label, occupation, additional_charateristics):
|
10 |
""" Generate a dynamic summary for scoring the applicant, excluding the group feature.
|
|
|
69 |
avg_score = sum(valid_scores) / len(valid_scores)
|
70 |
return avg_score
|
71 |
return None
|
72 |
+
def process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation,charateristics=""):
|
73 |
|
74 |
print(f"Processing {len(df)} entries with {num_run} runs each.")
|
75 |
""" Process entries and compute scores concurrently, with progress updates. """
|
|
|
85 |
readable_name = ' '.join(word.capitalize() for word in column.split('_'))
|
86 |
summary.append(f"{readable_name}: {value};")
|
87 |
|
|
|
|
|
|
|
|
|
88 |
for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
|
89 |
+
prompt_charateristics = create_summary(group_name, label, occupation,'\n'.join(summary) + '\n' + charateristics)
|
90 |
prompt_normal = create_summary(group_name, label, occupation, '\n'.join(summary))
|
91 |
|
92 |
print(f"Run {run + 1} - Entry {index + 1} - {key}")
|