Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -60,13 +60,13 @@ def find_exact_matches(df1, df2, column_name):
|
|
60 |
return matches
|
61 |
|
62 |
|
63 |
-
def find_similar_texts(df1, df2, column_name, exact_matches, threshold=0.
|
64 |
# Find rows with similar texts in the specified column, excluding exact matches
|
65 |
similar_texts = []
|
66 |
exact_match_indices = set(exact_matches.index.tolist())
|
67 |
|
68 |
# Concatenate texts from both dataframes
|
69 |
-
all_texts = df1[column_name].tolist() + df2[column_name].tolist()
|
70 |
|
71 |
# Compute TF-IDF vectors
|
72 |
vectorizer = TfidfVectorizer()
|
|
|
60 |
return matches
|
61 |
|
62 |
|
63 |
+
def find_similar_texts(df1, df2, column_name, exact_matches, threshold=0.7):
|
64 |
# Find rows with similar texts in the specified column, excluding exact matches
|
65 |
similar_texts = []
|
66 |
exact_match_indices = set(exact_matches.index.tolist())
|
67 |
|
68 |
# Concatenate texts from both dataframes
|
69 |
+
all_texts = df1[column_name].astype(str).tolist() + df2[column_name].astype(str).tolist()
|
70 |
|
71 |
# Compute TF-IDF vectors
|
72 |
vectorizer = TfidfVectorizer()
|