Spaces:

cogcorp
/

homework

Sleeping

App Files Files Community

cogcorp commited on Jun 7, 2023

Commit

7002f88

1 Parent(s): 5bd6725

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -6

app.py CHANGED Viewed

@@ -48,13 +48,13 @@ def parse_number(text):
 # Function to process row
-def process_row(row, vendor_data):
     scores = {}
     for vendor_name, vendor_vectors in vendor_data.items():
         cosine_similarities = semantic_search(row[0], vendor_vectors)
         most_similar_index = np.argmax(cosine_similarities)
-        most_similar_score = cosine_similarities[most_similar_index]
-        scores[vendor_name] = most_similar_score * row['score_client']  # Multiply vendor score with client priority
     row_scores = pd.Series(scores)
     combined_row = pd.concat([row, row_scores])
     return combined_row
@@ -83,28 +83,37 @@ def process_file(vendor_name, mode, file):
         vendor_df_path = os.path.join('data', f'{vendor_name}_data.csv')
         vendor_df.to_csv(vendor_df_path, index=False)
         return f"Vendor data file for {vendor_name} has been uploaded and saved.", None
     elif mode == 'Compare with Client File':
         csv_files = [f for f in os.listdir('data') if f.endswith('_data.csv')]
         vector_files = [f for f in os.listdir('data') if f.endswith('_vectors.pkl')]
-        if not csv_files or not vector_files:
             return "No vendor data found. Please upload it first.", None
         vendor_data = {}
-        for csv_file, vector_file in zip(csv_files, vector_files):
             with open(os.path.join('data', vector_file), 'rb') as f:
                 vendor_name = os.path.splitext(vector_file)[0]
                 vendor_vectors = pickle.load(f)
                 vendor_data[vendor_name] = vendor_vectors
         client_df = pd.read_excel(file.name)
         client_df.iloc[:, 2] = client_df.iloc[:, 2].fillna('3 - Medium')
         client_df = client_df[client_df.iloc[:, 1] == 'Yes']  # Only consider rows where the second column is 'Yes'
         client_df.iloc[:, 0] = client_df.iloc[:, 0].apply(preprocess_text)
         client_df['score_client'] = client_df.iloc[:, 2].apply(parse_number)
-        common_list = Parallel(n_jobs=-1)(delayed(process_row)(row, vendor_data) for index, row in client_df.iterrows())
         common_df = pd.DataFrame(common_list)
         common_df = common_df.drop(common_df.columns[[1, 2, 3, 4]], axis=1)  # Drop the second, third, fourth and fifth columns
         common_df.to_excel(f'client_matches.xlsx', index=False)

 # Function to process row
+def process_row(row, vendor_data, vendor_scores):
     scores = {}
     for vendor_name, vendor_vectors in vendor_data.items():
         cosine_similarities = semantic_search(row[0], vendor_vectors)
         most_similar_index = np.argmax(cosine_similarities)
+        vendor_score = vendor_scores[vendor_name]  # Get the vendor's score
+        scores[vendor_name] = row['score_client'] * vendor_score  # Multiply vendor score with client priority
     row_scores = pd.Series(scores)
     combined_row = pd.concat([row, row_scores])
     return combined_row
         vendor_df_path = os.path.join('data', f'{vendor_name}_data.csv')
         vendor_df.to_csv(vendor_df_path, index=False)
+        # Save vendor scores as pickle file
+        vendor_scores_path = os.path.join('data', f'{vendor_name}_scores.pkl')
+        with open(vendor_scores_path, 'wb') as f:
+            pickle.dump(vendor_df['score_vendor'].to_dict(), f)
         return f"Vendor data file for {vendor_name} has been uploaded and saved.", None
     elif mode == 'Compare with Client File':
         csv_files = [f for f in os.listdir('data') if f.endswith('_data.csv')]
         vector_files = [f for f in os.listdir('data') if f.endswith('_vectors.pkl')]
+        score_files = [f for f in os.listdir('data') if f.endswith('_scores.pkl')]
+        if not csv_files or not vector_files or not score_files:
             return "No vendor data found. Please upload it first.", None
         vendor_data = {}
+        vendor_scores = {}
+        for csv_file, vector_file, score_file in zip(csv_files, vector_files, score_files):
             with open(os.path.join('data', vector_file), 'rb') as f:
                 vendor_name = os.path.splitext(vector_file)[0]
                 vendor_vectors = pickle.load(f)
                 vendor_data[vendor_name] = vendor_vectors
+            with open(os.path.join('data', score_file), 'rb') as f:
+                vendor_scores.update(pickle.load(f))
         client_df = pd.read_excel(file.name)
         client_df.iloc[:, 2] = client_df.iloc[:, 2].fillna('3 - Medium')
         client_df = client_df[client_df.iloc[:, 1] == 'Yes']  # Only consider rows where the second column is 'Yes'
         client_df.iloc[:, 0] = client_df.iloc[:, 0].apply(preprocess_text)
         client_df['score_client'] = client_df.iloc[:, 2].apply(parse_number)
+        common_list = Parallel(n_jobs=-1)(delayed(process_row)(row, vendor_data, vendor_scores) for index, row in client_df.iterrows())
         common_df = pd.DataFrame(common_list)
         common_df = common_df.drop(common_df.columns[[1, 2, 3, 4]], axis=1)  # Drop the second, third, fourth and fifth columns
         common_df.to_excel(f'client_matches.xlsx', index=False)