Spaces:
Sleeping
Sleeping
File size: 6,931 Bytes
c5a97b3 c038e47 c5a97b3 99aed09 53ae920 1a5aa7c b4cb87b 99aed09 2598bbd 7a6bd46 2598bbd 601b79b 2598bbd 7a6bd46 2c05d82 fb309c7 2237b4d 2c05d82 3992853 f691033 2237b4d 5d4f445 601b79b 07df588 f691033 2237b4d c5a97b3 2fe5b5e d3db32a 601b79b d3db32a 601b79b 99aed09 601b79b aff5c22 f308877 2237b4d f555ad2 2237b4d f555ad2 2237b4d f555ad2 2237b4d f555ad2 2237b4d f555ad2 2237b4d f555ad2 2237b4d 2c05d82 2237b4d 2c05d82 2237b4d 2c05d82 2237b4d c1badbd 2fe5b5e 2237b4d 074db95 2237b4d 074db95 2598bbd c5a97b3 2c05d82 2237b4d 78ac961 f2141ad 2c05d82 2237b4d 601b79b 2237b4d 601b79b 99aed09 3bd1e98 f308877 601b79b 4d527e1 601b79b c4f8c41 205d4a0 601b79b 3bd1e98 78ac961 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import gradio as gr
import requests
import pandas as pd
from rapidfuzz import process, fuzz
bens_df = pd.read_excel('bens_tab.xlsx')
data_crawler = pd.read_csv('data_crawler.csv', index_col=False)
data_crawler = data_crawler[['Title', 'Price', 'Currency', 'Condition', 'Link', 'Marketplace']]
def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
if source == "mercadolibre":
BASE_URL = "https://api.mercadolibre.com/sites/MLB/search"
params = {'q': query, 'limit': limit}
response = requests.get(BASE_URL, params=params)
if response.status_code == 200:
data = response.json()
items = data.get('results', [])
df = pd.DataFrame(items)[['title', 'price', 'currency_id', 'condition', 'permalink']]
df.columns = ['Title', 'Price', 'Currency', 'Condition', 'Link']
df['Marketplace'] = "Mercado Livre"
return df
return pd.DataFrame()
def refinar_resultados(df, exclude_word="conjunto", include_word=False):
df['Title'] = df['Title'].astype(str)
df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
padrao_unidades = r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b'
df_refinado = df_refinado[~df_refinado['Title'].str.contains(padrao_unidades, case=False, regex=True)]
if not include_word:
# Exclude results containing "conjunto" if it's not part of the original query
df_refinado = df_refinado[~df_refinado['Title'].str.contains(exclude_word, case=False)]
return df_refinado
def get_best_match(query, choices, limit=15):
# Using RapidFuzz for improved performance and fuzzy matching
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
return [match[0] for match in matches if match[1] > 70]
def filtrar_itens_similares(df, termo_pesquisa, limit=15):
titulos = df['Title'].tolist()
titulos_similares = get_best_match(termo_pesquisa, titulos, limit=limit)
df_filtrado = df[df['Title'].isin(titulos_similares)]
return df_filtrado
def calcular_fator_avaliacao(titulo, EC, PU):
filtered_df = bens_df[bens_df['TITULO'] == titulo]
if filtered_df.empty:
return None # Or handle the error as needed
bem_info = filtered_df.iloc[0]
VU, VR = bem_info['VIDA_UTIL'], bem_info['VALOR_RESIDUAL']
ec_pontuacao = {'Excelente': 10, 'Bom': 8, 'Regular': 5, 'Péssimo': 2}[EC]
PU, PVU, PUB = float(PU), min(10 - ((PU - 1) * (10 / VU)), 10), min(10 - (((VU - PU) - 1) * (10 / VU)), 10)
fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
return fator_avaliacao
def select_nearest_items(df, query):
# Calculate title similarity for relevance
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
# Filter by title similarity threshold
df_filtered_by_similarity = df[df['Title_Similarity'] > 70] # Adjust similarity threshold as needed
if df_filtered_by_similarity.empty:
# If no closely matching titles are found, return an empty DataFrame
return pd.DataFrame()
# Filter by price, considering only those within a reasonable range
reasonable_price_df = df_filtered_by_similarity[df_filtered_by_similarity['Price'] <= df_filtered_by_similarity['Price'].quantile(0.75)]
# Calculate distance from target price for sorting
target_price = reasonable_price_df['Price'].mode().min() if not reasonable_price_df['Price'].mode().empty else reasonable_price_df['Price'].median()
reasonable_price_df['Distance'] = (reasonable_price_df['Price'] - target_price).abs()
# Initialize container for selected items ensuring different marketplaces
selected_items = []
included_marketplaces = set()
# Sort by distance to target price, then by title similarity
df_sorted = reasonable_price_df.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])
for _, row in df_sorted.iterrows():
marketplace = row['Marketplace']
if marketplace not in included_marketplaces:
selected_items.append(row)
included_marketplaces.add(marketplace)
if len(selected_items) >= 5:
break
return pd.DataFrame(selected_items)
def search_with_fallback(query, df, limit=15):
query_parts = query.split()
include_conjunto = "conjunto" in query.lower()
for i in range(len(query_parts), 0, -1):
simplified_query = " ".join(query_parts[:i])
df_refinado = refinar_resultados(df, include_word=include_conjunto)
df_filtrado = filtrar_itens_similares(df_refinado, simplified_query, limit=limit)
if not df_filtrado.empty:
return df_filtrado
return pd.DataFrame()
def integrated_app(query, titulo, EC, PU):
df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
if df_combined.empty:
return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
# Pass whether "conjunto" is part of the original query
include_conjunto = "conjunto" in query.lower()
df_refined = refinar_resultados(df_combined, include_word=include_conjunto)
df_similares = search_with_fallback(query, df_refined)
if df_similares.empty:
return "Nenhum item similar encontrado.", pd.DataFrame()
df_nearest = select_nearest_items(df_similares, query)
if df_nearest.empty:
return "Nenhum resultado próximo encontrado.", pd.DataFrame()
fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest
iface = gr.Interface(fn=integrated_app,
inputs=[gr.Textbox(label="Digite sua consulta"),
gr.Dropdown(label="Classificação Contábil do Bem", choices=bens_df['TITULO'].unique().tolist(), value="MOBILIÁRIO EM GERAL"),
gr.Radio(label="Estado de Conservação do Bem", choices=['Excelente', 'Bom', 'Regular', 'Péssimo'], value="Excelente"),
gr.Number(label="Período utilizado (anos)", value=1)],
outputs=[gr.Textbox(label="Cálculo"), gr.Dataframe(label="Resultados da Pesquisa")],
theme=gr.themes.Monochrome(),
title="<span style='color: gray; font-size: 48px;'>Avaliação de Bens Móveis</span>",
description="""<p style="text-align: left;"><b><span style='color: gray; font-size: 40px;'>aval</span><span style='color: black; font-size: 40px;'>ia</span><span style='color: gray; font-size: 40px;'>.se</b></p>""")
iface.launch()
|