fschwartzer commited on
Commit
78633a0
1 Parent(s): 48dc5ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -5
app.py CHANGED
@@ -39,12 +39,33 @@ def refinar_resultados(df, exclude_word="conjunto", include_word=False):
39
  def get_best_match(query, choices, limit=50):
40
  # Using RapidFuzz for improved performance and fuzzy matching
41
  matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
42
- return [match[0] for match in matches if match[1] > 90]
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def filtrar_itens_similares(df, termo_pesquisa, limit=50):
45
- titulos = df['Title'].tolist()
46
- titulos_similares = get_best_match(termo_pesquisa, titulos, limit=limit)
47
- df_filtrado = df[df['Title'].isin(titulos_similares)]
 
 
 
 
 
48
  return df_filtrado
49
 
50
  def calcular_fator_avaliacao(titulo, EC, PU):
@@ -63,7 +84,7 @@ def calcular_fator_avaliacao(titulo, EC, PU):
63
  def select_nearest_items(df, query):
64
  # Lower the title similarity threshold if necessary
65
  df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
66
- df_filtered = df[df['Title_Similarity'] > 90] # Adjusted threshold
67
 
68
  # Calculate mode price in a more inclusive manner
69
  mode_price = df_filtered['Price'].mode()
 
39
  def get_best_match(query, choices, limit=50):
40
  # Using RapidFuzz for improved performance and fuzzy matching
41
  matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
42
+ return [match[0] for match in matches if match[1] > 85]
43
 
44
+ def match_query_words_in_titles(query, title):
45
+ """
46
+ Check if all words in the query have a close match within the title.
47
+ Returns True if all words match to a certain degree; False otherwise.
48
+ """
49
+ query_words = query.lower().split()
50
+ match_threshold = 80 # Adjust this threshold as needed
51
+
52
+ for word in query_words:
53
+ # Find the best match for each word in the query within the title
54
+ match_score = fuzz.partial_ratio(word, title.lower())
55
+ if match_score < match_threshold:
56
+ return False # If any word doesn't match well enough, return False
57
+
58
+ return True # All words matched well enough
59
+
60
  def filtrar_itens_similares(df, termo_pesquisa, limit=50):
61
+ # Apply the match function to each title, filtering for those that match the query words
62
+ matches = df['Title'].apply(lambda title: match_query_words_in_titles(termo_pesquisa, title))
63
+ df_filtrado = df[matches]
64
+
65
+ # Further refine the list to the top N matches based on overall similarity to the query
66
+ df_filtrado['Overall_Similarity'] = df_filtrado['Title'].apply(lambda title: fuzz.WRatio(termo_pesquisa, title))
67
+ df_filtrado = df_filtrado.sort_values('Overall_Similarity', ascending=False).head(limit)
68
+
69
  return df_filtrado
70
 
71
  def calcular_fator_avaliacao(titulo, EC, PU):
 
84
  def select_nearest_items(df, query):
85
  # Lower the title similarity threshold if necessary
86
  df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
87
+ df_filtered = df[df['Title_Similarity'] > 85] # Adjusted threshold
88
 
89
  # Calculate mode price in a more inclusive manner
90
  mode_price = df_filtered['Price'].mode()