bens_moveis / app.py
fschwartzer's picture
Update app.py
517f0a7 verified
import gradio as gr
import requests
import pandas as pd
from rapidfuzz import process, fuzz
from docx import Document
from datetime import datetime
from docx.oxml import OxmlElement
from docx.shared import Cm
bens_df = pd.read_excel('bens_tab.xlsx')
data_crawler = pd.read_csv('data_crawler.csv', index_col=False)
data_crawler = data_crawler[['Title', 'Price', 'Currency', 'Condition', 'Link', 'Marketplace']]
def get_report(query, df_nearest):
doc = Document()
# Adding a title
doc.add_heading('RELATÓRIO DE AVALIAÇÃO DE VALOR DE MERCADO – EXP.', level=1)
# Adding some details
doc.add_paragraph('Solicitante:')
doc.add_paragraph('Interessado:')
doc.add_paragraph('Proprietário:')
doc.add_paragraph('Objetivo da Avaliação: Definir o valor de mercado dos bens abaixo discriminados para registro no Patrimônio do Município.')
# Adding section heading and content
doc.add_heading('1. Características do(s) Avaliando(s):', level=2)
doc.add_paragraph('1.1 Avaliando 01:')
doc.add_paragraph(f'Descrição: {query}')
# Add evaluation section
doc.add_heading('2. Avaliação:', level=2)
doc.add_paragraph('2.1 Avaliando 01')
# Add market research section
doc.add_heading('3. Pesquisa de Mercado:', level=2)
# Filter and rename columns in df_nearest
df_nearest_filtered = df_nearest[['ID', 'Title', 'Price', 'Link', 'Marketplace']].copy()
df_nearest_filtered.columns = ['Item', 'Descrição', 'Preço', 'Link', 'Fornecedor']
# Here you'll insert the df_nearest_filtered as a table
if not df_nearest_filtered.empty:
table = doc.add_table(rows=1, cols=len(df_nearest_filtered.columns))
table.style = 'Table Grid'
table.autofit = False
table.allow_autofit = False
table.columns[0].width = Cm(3)
table.rows[0].cells[0].width = Cm(3)
hdr_cells = table.rows[0].cells
for i, column_name in enumerate(df_nearest_filtered.columns):
hdr_cells[i].text = column_name
for index, row in df_nearest_filtered.iterrows():
row_cells = table.add_row().cells
for i, value in enumerate(row):
if i == 3:
row_cells[i].text = str(value)
else:
row_cells[i].text = str(value)
today_date = datetime.now().strftime('%d/%m/%Y')
doc.add_paragraph(f'Lajeado/RS, {today_date}')
doc.add_paragraph('Membros da Comissão:')
# Save the document
doc.save('laudo.docx')
def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
if source == "mercadolibre":
BASE_URL = "https://api.mercadolibre.com/sites/MLB/search"
params = {'q': query, 'limit': limit}
response = requests.get(BASE_URL, params=params)
if response.status_code == 200:
data = response.json()
items = data.get('results', [])
df = pd.DataFrame(items)[['title', 'price', 'currency_id', 'condition', 'permalink']]
df.columns = ['Title', 'Price', 'Currency', 'Condition', 'Link']
df['Marketplace'] = "Mercado Livre"
return df
return pd.DataFrame()
def refinar_resultados(df, include_word=[]):
df['Title'] = df['Title'].astype(str)
# Define a list of keywords to exclude, indicating multiples
exclude_keywords = ["kit", "conjunto", "pacote", "caixa", "unidades", "Kits", " e "]
# Add conditional exclusion for words not included in the query
exclude_patterns = [keyword for keyword in exclude_keywords if keyword not in include_word]
# Combine all exclude patterns into a single regex pattern
exclude_pattern = r'\b(' + '|'.join(exclude_patterns) + r')\b|\b(\d+)\s*(unidade|pacotes|caixas)\b'
# Perform the filtering in one operation
df_refinado = df[~df['Title'].str.contains(exclude_pattern, case=False, regex=True, na=False)]
return df_refinado
def get_best_match(query, choices, limit=50):
# Using RapidFuzz for improved performance and fuzzy matching
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
return [match[0] for match in matches if match[1] > 75]
def match_query_words_in_titles(query, title):
"""
Check if all words in the query have a close match within the title.
Returns True if all words match to a certain degree; False otherwise.
"""
query_words = query.lower().split()
match_threshold = 75 # Adjust this threshold as needed
for word in query_words:
# Find the best match for each word in the query within the title
match_score = fuzz.partial_ratio(word, title.lower())
if match_score < match_threshold:
return False # If any word doesn't match well enough, return False
return True # All words matched well enough
def filtrar_itens_similares(df, termo_pesquisa, limit=50):
# Apply the match function to each title, filtering for those that match the query words
matches = df['Title'].apply(lambda title: match_query_words_in_titles(termo_pesquisa, title))
df_filtrado = df[matches]
# Further refine the list to the top N matches based on overall similarity to the query
df_filtrado['Overall_Similarity'] = df_filtrado['Title'].apply(lambda title: fuzz.WRatio(termo_pesquisa, title))
df_filtrado = df_filtrado.sort_values('Overall_Similarity', ascending=False).head(limit)
return df_filtrado
def calcular_fator_avaliacao(titulo, EC, PU):
filtered_df = bens_df[bens_df['TITULO'] == titulo]
if filtered_df.empty:
return None # Or handle the error as needed
bem_info = filtered_df.iloc[0]
VU, VR = bem_info['VIDA_UTIL'], bem_info['VALOR_RESIDUAL']
ec_pontuacao = {'Excelente': 10, 'Bom': 8, 'Regular': 5, 'Péssimo': 2}[EC]
PU, PVU, PUB = float(PU), min(10 - ((PU - 1) * (10 / VU)), 10), min(10 - (((VU - PU) - 1) * (10 / VU)), 10)
fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
return fator_avaliacao
def select_nearest_items(df, query):
# Lower the title similarity threshold if necessary
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
df_filtered = df[df['Title_Similarity'] > 75] # Adjusted threshold
# Calculate mode price in a more inclusive manner
mode_price = df_filtered['Price'].mode()
if mode_price.empty:
target_price = df_filtered['Price'].median()
else:
target_price = mode_price.min()
df_filtered['Distance'] = (df_filtered['Price'] - target_price).abs()
df_sorted = df_filtered.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])
# Ensure diversity in marketplaces
marketplaces_selected = set()
results = []
for _, row in df_sorted.iterrows():
if row['Marketplace'] not in marketplaces_selected and len(marketplaces_selected) < 5:
results.append(row)
marketplaces_selected.add(row['Marketplace'])
if len(results) >= 5:
break
return pd.DataFrame(results)
def search_with_fallback(query, df, limit=50):
query_parts = query.split()
include_conjunto = "conjunto" in query.lower()
for i in range(len(query_parts), 0, -1):
simplified_query = " ".join(query_parts[:i])
include_word = ["conjunto"] if include_conjunto else [] # Ensure include_word is a list
df_refinado = refinar_resultados(df, include_word=include_word)
df_filtrado = filtrar_itens_similares(df_refinado, simplified_query, limit=limit)
if not df_filtrado.empty:
return df_filtrado
return pd.DataFrame()
def integrated_app(query, titulo, EC, PU, selected_rows):
include_conjunto = "conjunto" in query.lower()
df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
if df_combined.empty:
return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
# Pass whether "conjunto" is part of the original query
include_word = ["conjunto"] if include_conjunto else []
df_refined = refinar_resultados(df_combined, include_word=include_word)
df_similares = search_with_fallback(query, df_refined)
if df_similares.empty:
return "Nenhum item similar encontrado.", pd.DataFrame()
df_nearest = select_nearest_items(df_similares, query)
if df_nearest.empty:
return "Nenhum resultado próximo encontrado.", pd.DataFrame()
if selected_rows.strip():
selected_indices = [int(idx) for idx in selected_rows.split(',') if idx.isdigit()]
df_nearest = df_nearest.iloc[selected_indices]
# After calculating df_nearest and before returning it
df_nearest.reset_index(drop=True, inplace=True)
df_nearest['ID'] = df_nearest.index
# Reorder DataFrame columns to make ID first
column_order = ['ID'] + [col for col in df_nearest.columns if col != 'ID']
df_nearest = df_nearest[column_order]
fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
get_report(query, df_nearest)
return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest, 'laudo.docx'
iface = gr.Interface(fn=integrated_app,
inputs=[gr.Textbox(label="Digite sua consulta"),
gr.Dropdown(label="Classificação Contábil do Bem", choices=bens_df['TITULO'].unique().tolist(), value="MOBILIÁRIO EM GERAL"),
gr.Radio(label="Estado de Conservação do Bem", choices=['Excelente', 'Bom', 'Regular', 'Péssimo'], value="Excelente"),
gr.Number(label="Período utilizado (anos)", value=1),
gr.Textbox(label="IDs das linhas selecionadas (separadas por vírgula)", placeholder="Exemplo: 0,2,5")], # Using Textbox for IDs
outputs=[gr.Textbox(label="Cálculo"), gr.Dataframe(label="Resultados da Pesquisa"), gr.File(label="Laudo de Avaliação")],
theme=gr.themes.Monochrome(),
title=f"""
<p style="text-align: left;">
<b><span style='color: grey; font-size: 50px;'>aval</span><span style='color: orange; font-size: 50px;'>ia</span><span style='color: grey; font-size: 50px;'>.se</span></b>
""",
description="""<p style="text-align: left;">Aplicativo para avaliação de Bens Móveis</p>""")
iface.launch()