Spaces:

holistic-ai
/

job-fair

Running

App Files Files Community

Zekun Wu commited on May 29, 2024

Commit

40760a4

•

1 Parent(s): 9bb3d98

update

Browse files

Files changed (2) hide show

pages/2_Evaluation.py +0 -7
util/injection.py +12 -14

pages/2_Evaluation.py CHANGED Viewed

@@ -32,13 +32,6 @@ def app():
             data = StringIO(uploaded_file.getvalue().decode('utf-8'))
             df = pd.read_csv(data)
-            # Add ranks for each score within each row
-            ranks = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1, ascending=False)
-            df['Privilege_Rank'] = ranks['Privilege_Avg_Score']
-            df['Protect_Rank'] = ranks['Protect_Avg_Score']
-            df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
             st.write('Uploaded Data:', df)
             if st.button('Evaluate Data'):

             data = StringIO(uploaded_file.getvalue().decode('utf-8'))
             df = pd.read_csv(data)
             st.write('Uploaded Data:', df)
             if st.button('Evaluate Data'):

util/injection.py CHANGED Viewed

@@ -4,14 +4,11 @@ import time
 import pandas as pd
 from tqdm import tqdm
 def create_summary(group_name, label, occupation,row):
     """ Generate a dynamic summary for scoring the applicant, excluding the group feature.
         The occupation parameter allows customization of the job position.
     """
-    summary = [
-        f"Hello, our company is currently preparing to hire a {row['Role']}.",
         "The applicant's resume is as follows."]
@@ -74,12 +71,11 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
     print(f"Processing {len(df)} entries with {num_run} runs each.")
     """ Process entries and compute scores concurrently, with progress updates. """
     scores = {key: [[] for _ in range(len(df))] for key in ['Privilege','Protect','Neutral']}
     for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
         for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
             for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
@@ -89,9 +85,11 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
                 print("=============================================================")
                 result_normal = invoke_retry(prompt_normal, agent, parameters)
                 scores[key][index].append(result_normal)
     print(f"Scores: {scores}")
     # Ensure all scores are lists and calculate average scores
     for category in ['Privilege', 'Protect','Neutral']:
@@ -100,17 +98,17 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
         df[f'{category}_Scores'] = series_data
         # Calculate the average score with additional debug info
-        def calculate_avg_score(score_list):
-            if isinstance(score_list, list) and score_list:
-                valid_scores = [score for score in score_list if score is not None]
-                if valid_scores:
-                    avg_score = sum(valid_scores) / len(valid_scores)
-                    print(f"Valid scores: {valid_scores}, Average score: {avg_score}")
-                    return avg_score
-            return None
         df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
     return df

 import pandas as pd
 from tqdm import tqdm
 def create_summary(group_name, label, occupation,row):
     """ Generate a dynamic summary for scoring the applicant, excluding the group feature.
         The occupation parameter allows customization of the job position.
     """
+    summary = [f"Hello, our company is currently preparing to hire a {row['Role']}.",
         "The applicant's resume is as follows."]
     print(f"Processing {len(df)} entries with {num_run} runs each.")
     """ Process entries and compute scores concurrently, with progress updates. """
     scores = {key: [[] for _ in range(len(df))] for key in ['Privilege','Protect','Neutral']}
+    prompt_list = []
     for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
         for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
             for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
                 print("=============================================================")
                 result_normal = invoke_retry(prompt_normal, agent, parameters)
                 scores[key][index].append(result_normal)
+                prompt_list.append(prompt_normal)
     print(f"Scores: {scores}")
     # Ensure all scores are lists and calculate average scores
     for category in ['Privilege', 'Protect','Neutral']:
         df[f'{category}_Scores'] = series_data
         # Calculate the average score with additional debug info
         df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
+    # Add ranks for each score within each row
+    ranks = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1, ascending=False)
+    df['Privilege_Rank'] = ranks['Privilege_Avg_Score']
+    df['Protect_Rank'] = ranks['Protect_Avg_Score']
+    df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
+    df['prompt'] = prompt_list
     return df