Spaces:
Sleeping
Sleeping
fschwartzer
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -23,17 +23,27 @@ def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
|
|
23 |
return pd.DataFrame()
|
24 |
|
25 |
def refinar_resultados(df):
|
|
|
|
|
|
|
|
|
26 |
df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
|
27 |
-
|
|
|
|
|
|
|
28 |
return df_refinado
|
29 |
|
30 |
def get_best_match(query, choices, limit=15):
|
|
|
31 |
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
|
32 |
return [match[0] for match in matches if match[1] > 70]
|
33 |
|
34 |
def filtrar_itens_similares(df, termo_pesquisa, limit=15):
|
35 |
-
|
36 |
-
|
|
|
|
|
37 |
|
38 |
def calcular_fator_avaliacao(titulo, EC, PU):
|
39 |
filtered_df = bens_df[bens_df['TITULO'] == titulo]
|
@@ -48,27 +58,56 @@ def calcular_fator_avaliacao(titulo, EC, PU):
|
|
48 |
fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
|
49 |
return fator_avaliacao
|
50 |
|
51 |
-
def select_nearest_items(df):
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
def integrated_app(query, titulo, EC, PU):
|
57 |
-
df_mercadolibre = fetch_data_to_dataframe(query)
|
58 |
df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
|
|
|
59 |
if df_combined.empty:
|
60 |
return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
|
61 |
|
62 |
df_refined = refinar_resultados(df_combined)
|
63 |
-
df_similares =
|
|
|
64 |
if df_similares.empty:
|
65 |
return "Nenhum item similar encontrado.", pd.DataFrame()
|
66 |
|
67 |
-
|
68 |
-
if
|
69 |
-
return "
|
70 |
|
71 |
-
|
|
|
72 |
valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
|
73 |
return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest
|
74 |
|
|
|
23 |
return pd.DataFrame()
|
24 |
|
25 |
def refinar_resultados(df):
|
26 |
+
# Ensure 'Title' is treated as a string and handle NaN values by replacing them with an empty string
|
27 |
+
df['Title'] = df['Title'].astype(str).fillna('')
|
28 |
+
|
29 |
+
# Now apply your filtering condition
|
30 |
df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
|
31 |
+
padrao_unidades = r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b'
|
32 |
+
|
33 |
+
# Since 'Title' is ensured to be a string, this should not raise the TypeError
|
34 |
+
df_refinado = df_refinado[~df_refinado['Title'].str.contains(padrao_unidades, case=False, regex=True)]
|
35 |
return df_refinado
|
36 |
|
37 |
def get_best_match(query, choices, limit=15):
|
38 |
+
# Using RapidFuzz for improved performance and fuzzy matching
|
39 |
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
|
40 |
return [match[0] for match in matches if match[1] > 70]
|
41 |
|
42 |
def filtrar_itens_similares(df, termo_pesquisa, limit=15):
|
43 |
+
titulos = df['Title'].tolist()
|
44 |
+
titulos_similares = get_best_match(termo_pesquisa, titulos, limit=limit)
|
45 |
+
df_filtrado = df[df['Title'].isin(titulos_similares)]
|
46 |
+
return df_filtrado
|
47 |
|
48 |
def calcular_fator_avaliacao(titulo, EC, PU):
|
49 |
filtered_df = bens_df[bens_df['TITULO'] == titulo]
|
|
|
58 |
fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
|
59 |
return fator_avaliacao
|
60 |
|
61 |
+
def select_nearest_items(df, query):
|
62 |
+
# Implement a more refined selection process
|
63 |
+
# First, filter by title similarity to ensure relevance
|
64 |
+
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
|
65 |
+
df_filtered_by_similarity = df[df['Title_Similarity'] > 70] # Adjust similarity threshold
|
66 |
+
|
67 |
+
if df_filtered_by_similarity.empty:
|
68 |
+
# Fallback to broader criteria if no closely matching titles are found
|
69 |
+
return pd.DataFrame()
|
70 |
+
|
71 |
+
# Then, select items based on price, considering only those within a reasonable range
|
72 |
+
reasonable_price_df = df_filtered_by_similarity[df_filtered_by_similarity['Price'] <= df_filtered_by_similarity['Price'].quantile(0.75)]
|
73 |
+
|
74 |
+
target_price = reasonable_price_df['Price'].mode().min() if not reasonable_price_df['Price'].mode().empty else reasonable_price_df['Price'].median()
|
75 |
+
reasonable_price_df['Distance'] = (reasonable_price_df['Price'] - target_price).abs()
|
76 |
+
|
77 |
+
return reasonable_price_df.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False]).head(5)
|
78 |
+
|
79 |
+
def search_with_fallback(query, df, limit=15):
|
80 |
+
# Start with the most specific query and progressively simplify it
|
81 |
+
query_parts = query.split()
|
82 |
+
for i in range(len(query_parts), 0, -1):
|
83 |
+
# Construct a simplified query by progressively removing the least important terms
|
84 |
+
simplified_query = " ".join(query_parts[:i])
|
85 |
+
df_filtrado = filtrar_itens_similares(df, simplified_query, limit=limit)
|
86 |
+
if not df_filtrado.empty:
|
87 |
+
# Return the filtered DataFrame as soon as we get any results
|
88 |
+
return df_filtrado
|
89 |
+
# If no results are found for any simplification of the query, return an empty DataFrame
|
90 |
+
return pd.DataFrame()
|
91 |
|
92 |
def integrated_app(query, titulo, EC, PU):
|
93 |
+
df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
|
94 |
df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
|
95 |
+
|
96 |
if df_combined.empty:
|
97 |
return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
|
98 |
|
99 |
df_refined = refinar_resultados(df_combined)
|
100 |
+
df_similares = search_with_fallback(query, df_refined)
|
101 |
+
|
102 |
if df_similares.empty:
|
103 |
return "Nenhum item similar encontrado.", pd.DataFrame()
|
104 |
|
105 |
+
df_nearest = select_nearest_items(df_similares, query) # Ensure this function is adapted to use the query for relevance
|
106 |
+
if df_nearest.empty:
|
107 |
+
return "Nenhum resultado próximo encontrado.", pd.DataFrame()
|
108 |
|
109 |
+
# Calculate valuation factor and final valuation based on the nearest items
|
110 |
+
fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
|
111 |
valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
|
112 |
return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest
|
113 |
|