Spaces:
Running
Running
fschwartzer
commited on
Commit
•
c1badbd
1
Parent(s):
3b9e0c5
Update app.py
Browse files
app.py
CHANGED
@@ -44,6 +44,7 @@ def get_best_match(query, choices, limit=15):
|
|
44 |
|
45 |
def filtrar_itens_similares(df, termo_pesquisa, limit=15):
|
46 |
titulos = df['Title'].tolist()
|
|
|
47 |
titulos_similares = get_best_match(termo_pesquisa, titulos, limit=limit)
|
48 |
df_filtrado = df[df['Title'].isin(titulos_similares)]
|
49 |
return df_filtrado
|
@@ -91,17 +92,36 @@ def select_nearest_items(df):
|
|
91 |
break
|
92 |
|
93 |
return pd.DataFrame(nearest_items)
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
def integrated_app(query, titulo, EC, PU):
|
96 |
df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
|
97 |
df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
|
98 |
-
|
99 |
if df_combined.empty:
|
100 |
return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
|
101 |
|
102 |
df_refined = refinar_resultados(df_combined)
|
103 |
-
|
104 |
-
|
|
|
105 |
if df_similares.empty:
|
106 |
return "Nenhum item similar encontrado.", pd.DataFrame()
|
107 |
else:
|
|
|
44 |
|
45 |
def filtrar_itens_similares(df, termo_pesquisa, limit=15):
|
46 |
titulos = df['Title'].tolist()
|
47 |
+
# Use RapidFuzz for improved performance and fuzzy matching
|
48 |
titulos_similares = get_best_match(termo_pesquisa, titulos, limit=limit)
|
49 |
df_filtrado = df[df['Title'].isin(titulos_similares)]
|
50 |
return df_filtrado
|
|
|
92 |
break
|
93 |
|
94 |
return pd.DataFrame(nearest_items)
|
95 |
+
|
96 |
+
def search_with_fallback(query, df, limit=15):
|
97 |
+
# Split the query into parts
|
98 |
+
query_parts = query.split()
|
99 |
+
|
100 |
+
# Start with the most specific search (full query)
|
101 |
+
specificities = [
|
102 |
+
" ".join(query_parts[i:]) for i in range(len(query_parts))
|
103 |
+
]
|
104 |
+
|
105 |
+
for specificity in specificities:
|
106 |
+
df_filtrado = filtrar_itens_similares(df, specificity, limit=limit)
|
107 |
+
if not df_filtrado.empty:
|
108 |
+
# If we find results at this level of specificity, return them
|
109 |
+
return df_filtrado
|
110 |
+
|
111 |
+
# If no results are found at any level of specificity, return an empty DataFrame
|
112 |
+
return pd.DataFrame()
|
113 |
+
|
114 |
def integrated_app(query, titulo, EC, PU):
|
115 |
df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
|
116 |
df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
|
117 |
+
|
118 |
if df_combined.empty:
|
119 |
return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
|
120 |
|
121 |
df_refined = refinar_resultados(df_combined)
|
122 |
+
# Use the new search_with_fallback function
|
123 |
+
df_similares = search_with_fallback(query, df_refined)
|
124 |
+
|
125 |
if df_similares.empty:
|
126 |
return "Nenhum item similar encontrado.", pd.DataFrame()
|
127 |
else:
|