Spaces:

fschwartzer
/

bens_moveis

Sleeping

App Files Files Community

fschwartzer commited on Feb 20

Commit

78633a0

•

1 Parent(s): 48dc5ed

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -5

app.py CHANGED Viewed

@@ -39,12 +39,33 @@ def refinar_resultados(df, exclude_word="conjunto", include_word=False):
 def get_best_match(query, choices, limit=50):
     # Using RapidFuzz for improved performance and fuzzy matching
     matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
-    return [match[0] for match in matches if match[1] > 90]
 def filtrar_itens_similares(df, termo_pesquisa, limit=50):
-    titulos = df['Title'].tolist()
-    titulos_similares = get_best_match(termo_pesquisa, titulos, limit=limit)
-    df_filtrado = df[df['Title'].isin(titulos_similares)]
     return df_filtrado
 def calcular_fator_avaliacao(titulo, EC, PU):
@@ -63,7 +84,7 @@ def calcular_fator_avaliacao(titulo, EC, PU):
 def select_nearest_items(df, query):
     # Lower the title similarity threshold if necessary
     df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
-    df_filtered = df[df['Title_Similarity'] > 90]  # Adjusted threshold
     # Calculate mode price in a more inclusive manner
     mode_price = df_filtered['Price'].mode()

 def get_best_match(query, choices, limit=50):
     # Using RapidFuzz for improved performance and fuzzy matching
     matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
+    return [match[0] for match in matches if match[1] > 85]
+def match_query_words_in_titles(query, title):
+    """
+    Check if all words in the query have a close match within the title.
+    Returns True if all words match to a certain degree; False otherwise.
+    """
+    query_words = query.lower().split()
+    match_threshold = 80  # Adjust this threshold as needed
+    for word in query_words:
+        # Find the best match for each word in the query within the title
+        match_score = fuzz.partial_ratio(word, title.lower())
+        if match_score < match_threshold:
+            return False  # If any word doesn't match well enough, return False
+    return True  # All words matched well enough
 def filtrar_itens_similares(df, termo_pesquisa, limit=50):
+    # Apply the match function to each title, filtering for those that match the query words
+    matches = df['Title'].apply(lambda title: match_query_words_in_titles(termo_pesquisa, title))
+    df_filtrado = df[matches]
+    # Further refine the list to the top N matches based on overall similarity to the query
+    df_filtrado['Overall_Similarity'] = df_filtrado['Title'].apply(lambda title: fuzz.WRatio(termo_pesquisa, title))
+    df_filtrado = df_filtrado.sort_values('Overall_Similarity', ascending=False).head(limit)
     return df_filtrado
 def calcular_fator_avaliacao(titulo, EC, PU):
 def select_nearest_items(df, query):
     # Lower the title similarity threshold if necessary
     df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
+    df_filtered = df[df['Title_Similarity'] > 85]  # Adjusted threshold
     # Calculate mode price in a more inclusive manner
     mode_price = df_filtered['Price'].mode()