Mattral commited on
Commit
b2d7e3e
·
verified ·
1 Parent(s): 9bb02cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -60,13 +60,13 @@ def find_exact_matches(df1, df2, column_name):
60
  return matches
61
 
62
 
63
- def find_similar_texts(df1, df2, column_name, exact_matches, threshold=0.8):
64
  # Find rows with similar texts in the specified column, excluding exact matches
65
  similar_texts = []
66
  exact_match_indices = set(exact_matches.index.tolist())
67
 
68
  # Concatenate texts from both dataframes
69
- all_texts = df1[column_name].tolist() + df2[column_name].tolist()
70
 
71
  # Compute TF-IDF vectors
72
  vectorizer = TfidfVectorizer()
 
60
  return matches
61
 
62
 
63
+ def find_similar_texts(df1, df2, column_name, exact_matches, threshold=0.7):
64
  # Find rows with similar texts in the specified column, excluding exact matches
65
  similar_texts = []
66
  exact_match_indices = set(exact_matches.index.tolist())
67
 
68
  # Concatenate texts from both dataframes
69
+ all_texts = df1[column_name].astype(str).tolist() + df2[column_name].astype(str).tolist()
70
 
71
  # Compute TF-IDF vectors
72
  vectorizer = TfidfVectorizer()