Update app.py
Browse files
app.py
CHANGED
@@ -48,13 +48,13 @@ def parse_number(text):
|
|
48 |
|
49 |
|
50 |
# Function to process row
|
51 |
-
def process_row(row, vendor_data):
|
52 |
scores = {}
|
53 |
for vendor_name, vendor_vectors in vendor_data.items():
|
54 |
cosine_similarities = semantic_search(row[0], vendor_vectors)
|
55 |
most_similar_index = np.argmax(cosine_similarities)
|
56 |
-
|
57 |
-
scores[vendor_name] =
|
58 |
row_scores = pd.Series(scores)
|
59 |
combined_row = pd.concat([row, row_scores])
|
60 |
return combined_row
|
@@ -83,28 +83,37 @@ def process_file(vendor_name, mode, file):
|
|
83 |
vendor_df_path = os.path.join('data', f'{vendor_name}_data.csv')
|
84 |
vendor_df.to_csv(vendor_df_path, index=False)
|
85 |
|
|
|
|
|
|
|
|
|
|
|
86 |
return f"Vendor data file for {vendor_name} has been uploaded and saved.", None
|
87 |
|
88 |
elif mode == 'Compare with Client File':
|
89 |
csv_files = [f for f in os.listdir('data') if f.endswith('_data.csv')]
|
90 |
vector_files = [f for f in os.listdir('data') if f.endswith('_vectors.pkl')]
|
|
|
91 |
|
92 |
-
if not csv_files or not vector_files:
|
93 |
return "No vendor data found. Please upload it first.", None
|
94 |
|
95 |
vendor_data = {}
|
96 |
-
|
|
|
97 |
with open(os.path.join('data', vector_file), 'rb') as f:
|
98 |
vendor_name = os.path.splitext(vector_file)[0]
|
99 |
vendor_vectors = pickle.load(f)
|
100 |
vendor_data[vendor_name] = vendor_vectors
|
|
|
|
|
101 |
|
102 |
client_df = pd.read_excel(file.name)
|
103 |
client_df.iloc[:, 2] = client_df.iloc[:, 2].fillna('3 - Medium')
|
104 |
client_df = client_df[client_df.iloc[:, 1] == 'Yes'] # Only consider rows where the second column is 'Yes'
|
105 |
client_df.iloc[:, 0] = client_df.iloc[:, 0].apply(preprocess_text)
|
106 |
client_df['score_client'] = client_df.iloc[:, 2].apply(parse_number)
|
107 |
-
common_list = Parallel(n_jobs=-1)(delayed(process_row)(row, vendor_data) for index, row in client_df.iterrows())
|
108 |
common_df = pd.DataFrame(common_list)
|
109 |
common_df = common_df.drop(common_df.columns[[1, 2, 3, 4]], axis=1) # Drop the second, third, fourth and fifth columns
|
110 |
common_df.to_excel(f'client_matches.xlsx', index=False)
|
|
|
48 |
|
49 |
|
50 |
# Function to process row
|
51 |
+
def process_row(row, vendor_data, vendor_scores):
|
52 |
scores = {}
|
53 |
for vendor_name, vendor_vectors in vendor_data.items():
|
54 |
cosine_similarities = semantic_search(row[0], vendor_vectors)
|
55 |
most_similar_index = np.argmax(cosine_similarities)
|
56 |
+
vendor_score = vendor_scores[vendor_name] # Get the vendor's score
|
57 |
+
scores[vendor_name] = row['score_client'] * vendor_score # Multiply vendor score with client priority
|
58 |
row_scores = pd.Series(scores)
|
59 |
combined_row = pd.concat([row, row_scores])
|
60 |
return combined_row
|
|
|
83 |
vendor_df_path = os.path.join('data', f'{vendor_name}_data.csv')
|
84 |
vendor_df.to_csv(vendor_df_path, index=False)
|
85 |
|
86 |
+
# Save vendor scores as pickle file
|
87 |
+
vendor_scores_path = os.path.join('data', f'{vendor_name}_scores.pkl')
|
88 |
+
with open(vendor_scores_path, 'wb') as f:
|
89 |
+
pickle.dump(vendor_df['score_vendor'].to_dict(), f)
|
90 |
+
|
91 |
return f"Vendor data file for {vendor_name} has been uploaded and saved.", None
|
92 |
|
93 |
elif mode == 'Compare with Client File':
|
94 |
csv_files = [f for f in os.listdir('data') if f.endswith('_data.csv')]
|
95 |
vector_files = [f for f in os.listdir('data') if f.endswith('_vectors.pkl')]
|
96 |
+
score_files = [f for f in os.listdir('data') if f.endswith('_scores.pkl')]
|
97 |
|
98 |
+
if not csv_files or not vector_files or not score_files:
|
99 |
return "No vendor data found. Please upload it first.", None
|
100 |
|
101 |
vendor_data = {}
|
102 |
+
vendor_scores = {}
|
103 |
+
for csv_file, vector_file, score_file in zip(csv_files, vector_files, score_files):
|
104 |
with open(os.path.join('data', vector_file), 'rb') as f:
|
105 |
vendor_name = os.path.splitext(vector_file)[0]
|
106 |
vendor_vectors = pickle.load(f)
|
107 |
vendor_data[vendor_name] = vendor_vectors
|
108 |
+
with open(os.path.join('data', score_file), 'rb') as f:
|
109 |
+
vendor_scores.update(pickle.load(f))
|
110 |
|
111 |
client_df = pd.read_excel(file.name)
|
112 |
client_df.iloc[:, 2] = client_df.iloc[:, 2].fillna('3 - Medium')
|
113 |
client_df = client_df[client_df.iloc[:, 1] == 'Yes'] # Only consider rows where the second column is 'Yes'
|
114 |
client_df.iloc[:, 0] = client_df.iloc[:, 0].apply(preprocess_text)
|
115 |
client_df['score_client'] = client_df.iloc[:, 2].apply(parse_number)
|
116 |
+
common_list = Parallel(n_jobs=-1)(delayed(process_row)(row, vendor_data, vendor_scores) for index, row in client_df.iterrows())
|
117 |
common_df = pd.DataFrame(common_list)
|
118 |
common_df = common_df.drop(common_df.columns[[1, 2, 3, 4]], axis=1) # Drop the second, third, fourth and fifth columns
|
119 |
common_df.to_excel(f'client_matches.xlsx', index=False)
|