cogcorp commited on
Commit
7002f88
·
1 Parent(s): 5bd6725

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -48,13 +48,13 @@ def parse_number(text):
48
 
49
 
50
  # Function to process row
51
- def process_row(row, vendor_data):
52
  scores = {}
53
  for vendor_name, vendor_vectors in vendor_data.items():
54
  cosine_similarities = semantic_search(row[0], vendor_vectors)
55
  most_similar_index = np.argmax(cosine_similarities)
56
- most_similar_score = cosine_similarities[most_similar_index]
57
- scores[vendor_name] = most_similar_score * row['score_client'] # Multiply vendor score with client priority
58
  row_scores = pd.Series(scores)
59
  combined_row = pd.concat([row, row_scores])
60
  return combined_row
@@ -83,28 +83,37 @@ def process_file(vendor_name, mode, file):
83
  vendor_df_path = os.path.join('data', f'{vendor_name}_data.csv')
84
  vendor_df.to_csv(vendor_df_path, index=False)
85
 
 
 
 
 
 
86
  return f"Vendor data file for {vendor_name} has been uploaded and saved.", None
87
 
88
  elif mode == 'Compare with Client File':
89
  csv_files = [f for f in os.listdir('data') if f.endswith('_data.csv')]
90
  vector_files = [f for f in os.listdir('data') if f.endswith('_vectors.pkl')]
 
91
 
92
- if not csv_files or not vector_files:
93
  return "No vendor data found. Please upload it first.", None
94
 
95
  vendor_data = {}
96
- for csv_file, vector_file in zip(csv_files, vector_files):
 
97
  with open(os.path.join('data', vector_file), 'rb') as f:
98
  vendor_name = os.path.splitext(vector_file)[0]
99
  vendor_vectors = pickle.load(f)
100
  vendor_data[vendor_name] = vendor_vectors
 
 
101
 
102
  client_df = pd.read_excel(file.name)
103
  client_df.iloc[:, 2] = client_df.iloc[:, 2].fillna('3 - Medium')
104
  client_df = client_df[client_df.iloc[:, 1] == 'Yes'] # Only consider rows where the second column is 'Yes'
105
  client_df.iloc[:, 0] = client_df.iloc[:, 0].apply(preprocess_text)
106
  client_df['score_client'] = client_df.iloc[:, 2].apply(parse_number)
107
- common_list = Parallel(n_jobs=-1)(delayed(process_row)(row, vendor_data) for index, row in client_df.iterrows())
108
  common_df = pd.DataFrame(common_list)
109
  common_df = common_df.drop(common_df.columns[[1, 2, 3, 4]], axis=1) # Drop the second, third, fourth and fifth columns
110
  common_df.to_excel(f'client_matches.xlsx', index=False)
 
48
 
49
 
50
  # Function to process row
51
+ def process_row(row, vendor_data, vendor_scores):
52
  scores = {}
53
  for vendor_name, vendor_vectors in vendor_data.items():
54
  cosine_similarities = semantic_search(row[0], vendor_vectors)
55
  most_similar_index = np.argmax(cosine_similarities)
56
+ vendor_score = vendor_scores[vendor_name] # Get the vendor's score
57
+ scores[vendor_name] = row['score_client'] * vendor_score # Multiply vendor score with client priority
58
  row_scores = pd.Series(scores)
59
  combined_row = pd.concat([row, row_scores])
60
  return combined_row
 
83
  vendor_df_path = os.path.join('data', f'{vendor_name}_data.csv')
84
  vendor_df.to_csv(vendor_df_path, index=False)
85
 
86
+ # Save vendor scores as pickle file
87
+ vendor_scores_path = os.path.join('data', f'{vendor_name}_scores.pkl')
88
+ with open(vendor_scores_path, 'wb') as f:
89
+ pickle.dump(vendor_df['score_vendor'].to_dict(), f)
90
+
91
  return f"Vendor data file for {vendor_name} has been uploaded and saved.", None
92
 
93
  elif mode == 'Compare with Client File':
94
  csv_files = [f for f in os.listdir('data') if f.endswith('_data.csv')]
95
  vector_files = [f for f in os.listdir('data') if f.endswith('_vectors.pkl')]
96
+ score_files = [f for f in os.listdir('data') if f.endswith('_scores.pkl')]
97
 
98
+ if not csv_files or not vector_files or not score_files:
99
  return "No vendor data found. Please upload it first.", None
100
 
101
  vendor_data = {}
102
+ vendor_scores = {}
103
+ for csv_file, vector_file, score_file in zip(csv_files, vector_files, score_files):
104
  with open(os.path.join('data', vector_file), 'rb') as f:
105
  vendor_name = os.path.splitext(vector_file)[0]
106
  vendor_vectors = pickle.load(f)
107
  vendor_data[vendor_name] = vendor_vectors
108
+ with open(os.path.join('data', score_file), 'rb') as f:
109
+ vendor_scores.update(pickle.load(f))
110
 
111
  client_df = pd.read_excel(file.name)
112
  client_df.iloc[:, 2] = client_df.iloc[:, 2].fillna('3 - Medium')
113
  client_df = client_df[client_df.iloc[:, 1] == 'Yes'] # Only consider rows where the second column is 'Yes'
114
  client_df.iloc[:, 0] = client_df.iloc[:, 0].apply(preprocess_text)
115
  client_df['score_client'] = client_df.iloc[:, 2].apply(parse_number)
116
+ common_list = Parallel(n_jobs=-1)(delayed(process_row)(row, vendor_data, vendor_scores) for index, row in client_df.iterrows())
117
  common_df = pd.DataFrame(common_list)
118
  common_df = common_df.drop(common_df.columns[[1, 2, 3, 4]], axis=1) # Drop the second, third, fourth and fifth columns
119
  common_df.to_excel(f'client_matches.xlsx', index=False)