Spaces:

Mattral
/

Excel-Match-Analysis

Sleeping

Mattral commited on Apr 26, 2024

Commit

b2d7e3e

verified ·

1 Parent(s): 9bb02cd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -60,13 +60,13 @@ def find_exact_matches(df1, df2, column_name):
     return matches
-def find_similar_texts(df1, df2, column_name, exact_matches, threshold=0.8):
     # Find rows with similar texts in the specified column, excluding exact matches
     similar_texts = []
     exact_match_indices = set(exact_matches.index.tolist())
     # Concatenate texts from both dataframes
-    all_texts = df1[column_name].tolist() + df2[column_name].tolist()
     # Compute TF-IDF vectors
     vectorizer = TfidfVectorizer()

     return matches
+def find_similar_texts(df1, df2, column_name, exact_matches, threshold=0.7):
     # Find rows with similar texts in the specified column, excluding exact matches
     similar_texts = []
     exact_match_indices = set(exact_matches.index.tolist())
     # Concatenate texts from both dataframes
+    all_texts = df1[column_name].astype(str).tolist() + df2[column_name].astype(str).tolist()
     # Compute TF-IDF vectors
     vectorizer = TfidfVectorizer()