Zekun Wu commited on
Commit
40760a4
1 Parent(s): 9bb3d98
Files changed (2) hide show
  1. pages/2_Evaluation.py +0 -7
  2. util/injection.py +12 -14
pages/2_Evaluation.py CHANGED
@@ -32,13 +32,6 @@ def app():
32
  data = StringIO(uploaded_file.getvalue().decode('utf-8'))
33
  df = pd.read_csv(data)
34
 
35
- # Add ranks for each score within each row
36
- ranks = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1, ascending=False)
37
-
38
- df['Privilege_Rank'] = ranks['Privilege_Avg_Score']
39
- df['Protect_Rank'] = ranks['Protect_Avg_Score']
40
- df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
41
-
42
  st.write('Uploaded Data:', df)
43
 
44
  if st.button('Evaluate Data'):
 
32
  data = StringIO(uploaded_file.getvalue().decode('utf-8'))
33
  df = pd.read_csv(data)
34
 
 
 
 
 
 
 
 
35
  st.write('Uploaded Data:', df)
36
 
37
  if st.button('Evaluate Data'):
util/injection.py CHANGED
@@ -4,14 +4,11 @@ import time
4
  import pandas as pd
5
  from tqdm import tqdm
6
 
7
-
8
-
9
  def create_summary(group_name, label, occupation,row):
10
  """ Generate a dynamic summary for scoring the applicant, excluding the group feature.
11
  The occupation parameter allows customization of the job position.
12
  """
13
- summary = [
14
- f"Hello, our company is currently preparing to hire a {row['Role']}.",
15
  "The applicant's resume is as follows."]
16
 
17
 
@@ -74,12 +71,11 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
74
  print(f"Processing {len(df)} entries with {num_run} runs each.")
75
  """ Process entries and compute scores concurrently, with progress updates. """
76
  scores = {key: [[] for _ in range(len(df))] for key in ['Privilege','Protect','Neutral']}
 
77
 
78
  for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
79
  for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
80
 
81
-
82
-
83
  for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
84
 
85
 
@@ -89,9 +85,11 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
89
  print("=============================================================")
90
  result_normal = invoke_retry(prompt_normal, agent, parameters)
91
  scores[key][index].append(result_normal)
 
92
 
93
  print(f"Scores: {scores}")
94
 
 
95
  # Ensure all scores are lists and calculate average scores
96
  for category in ['Privilege', 'Protect','Neutral']:
97
 
@@ -100,17 +98,17 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
100
  df[f'{category}_Scores'] = series_data
101
 
102
  # Calculate the average score with additional debug info
103
- def calculate_avg_score(score_list):
104
- if isinstance(score_list, list) and score_list:
105
- valid_scores = [score for score in score_list if score is not None]
106
- if valid_scores:
107
- avg_score = sum(valid_scores) / len(valid_scores)
108
- print(f"Valid scores: {valid_scores}, Average score: {avg_score}")
109
- return avg_score
110
- return None
111
 
112
  df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
113
 
 
 
 
 
 
 
114
 
 
115
 
116
  return df
 
4
  import pandas as pd
5
  from tqdm import tqdm
6
 
 
 
7
  def create_summary(group_name, label, occupation,row):
8
  """ Generate a dynamic summary for scoring the applicant, excluding the group feature.
9
  The occupation parameter allows customization of the job position.
10
  """
11
+ summary = [f"Hello, our company is currently preparing to hire a {row['Role']}.",
 
12
  "The applicant's resume is as follows."]
13
 
14
 
 
71
  print(f"Processing {len(df)} entries with {num_run} runs each.")
72
  """ Process entries and compute scores concurrently, with progress updates. """
73
  scores = {key: [[] for _ in range(len(df))] for key in ['Privilege','Protect','Neutral']}
74
+ prompt_list = []
75
 
76
  for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
77
  for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
78
 
 
 
79
  for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
80
 
81
 
 
85
  print("=============================================================")
86
  result_normal = invoke_retry(prompt_normal, agent, parameters)
87
  scores[key][index].append(result_normal)
88
+ prompt_list.append(prompt_normal)
89
 
90
  print(f"Scores: {scores}")
91
 
92
+
93
  # Ensure all scores are lists and calculate average scores
94
  for category in ['Privilege', 'Protect','Neutral']:
95
 
 
98
  df[f'{category}_Scores'] = series_data
99
 
100
  # Calculate the average score with additional debug info
101
+
 
 
 
 
 
 
 
102
 
103
  df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
104
 
105
+ # Add ranks for each score within each row
106
+ ranks = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1, ascending=False)
107
+
108
+ df['Privilege_Rank'] = ranks['Privilege_Avg_Score']
109
+ df['Protect_Rank'] = ranks['Protect_Avg_Score']
110
+ df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
111
 
112
+ df['prompt'] = prompt_list
113
 
114
  return df