import gradio as gr import requests import pandas as pd from rapidfuzz import process, fuzz from docx import Document from datetime import datetime from docx.oxml import OxmlElement from docx.shared import Cm bens_df = pd.read_excel('bens_tab.xlsx') data_crawler = pd.read_csv('data_crawler.csv', index_col=False) data_crawler = data_crawler[['Title', 'Price', 'Currency', 'Condition', 'Link', 'Marketplace']] def get_report(query, df_nearest): doc = Document() # Adding a title doc.add_heading('RELATÓRIO DE AVALIAÇÃO DE VALOR DE MERCADO – EXP.', level=1) # Adding some details doc.add_paragraph('Solicitante:') doc.add_paragraph('Interessado:') doc.add_paragraph('Proprietário:') doc.add_paragraph('Objetivo da Avaliação: Definir o valor de mercado dos bens abaixo discriminados para registro no Patrimônio do Município.') # Adding section heading and content doc.add_heading('1. Características do(s) Avaliando(s):', level=2) doc.add_paragraph('1.1 Avaliando 01:') doc.add_paragraph(f'Descrição: {query}') # Add evaluation section doc.add_heading('2. Avaliação:', level=2) doc.add_paragraph('2.1 Avaliando 01') # Add market research section doc.add_heading('3. Pesquisa de Mercado:', level=2) # Filter and rename columns in df_nearest df_nearest_filtered = df_nearest[['ID', 'Title', 'Price', 'Link', 'Marketplace']].copy() df_nearest_filtered.columns = ['Item', 'Descrição', 'Preço', 'Link', 'Fornecedor'] # Here you'll insert the df_nearest_filtered as a table if not df_nearest_filtered.empty: table = doc.add_table(rows=1, cols=len(df_nearest_filtered.columns)) table.style = 'Table Grid' table.autofit = False table.allow_autofit = False table.columns[0].width = Cm(3) table.rows[0].cells[0].width = Cm(3) hdr_cells = table.rows[0].cells for i, column_name in enumerate(df_nearest_filtered.columns): hdr_cells[i].text = column_name for index, row in df_nearest_filtered.iterrows(): row_cells = table.add_row().cells for i, value in enumerate(row): if i == 3: row_cells[i].text = str(value) else: row_cells[i].text = str(value) today_date = datetime.now().strftime('%d/%m/%Y') doc.add_paragraph(f'Lajeado/RS, {today_date}') doc.add_paragraph('Membros da Comissão:') # Save the document doc.save('laudo.docx') def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"): if source == "mercadolibre": BASE_URL = "https://api.mercadolibre.com/sites/MLB/search" params = {'q': query, 'limit': limit} response = requests.get(BASE_URL, params=params) if response.status_code == 200: data = response.json() items = data.get('results', []) df = pd.DataFrame(items)[['title', 'price', 'currency_id', 'condition', 'permalink']] df.columns = ['Title', 'Price', 'Currency', 'Condition', 'Link'] df['Marketplace'] = "Mercado Livre" return df return pd.DataFrame() def refinar_resultados(df, include_word=[]): df['Title'] = df['Title'].astype(str) # Define a list of keywords to exclude, indicating multiples exclude_keywords = ["kit", "conjunto", "pacote", "caixa", "unidades", "Kits", " e "] # Add conditional exclusion for words not included in the query exclude_patterns = [keyword for keyword in exclude_keywords if keyword not in include_word] # Combine all exclude patterns into a single regex pattern exclude_pattern = r'\b(' + '|'.join(exclude_patterns) + r')\b|\b(\d+)\s*(unidade|pacotes|caixas)\b' # Perform the filtering in one operation df_refinado = df[~df['Title'].str.contains(exclude_pattern, case=False, regex=True, na=False)] return df_refinado def get_best_match(query, choices, limit=50): # Using RapidFuzz for improved performance and fuzzy matching matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit) return [match[0] for match in matches if match[1] > 75] def match_query_words_in_titles(query, title): """ Check if all words in the query have a close match within the title. Returns True if all words match to a certain degree; False otherwise. """ query_words = query.lower().split() match_threshold = 75 # Adjust this threshold as needed for word in query_words: # Find the best match for each word in the query within the title match_score = fuzz.partial_ratio(word, title.lower()) if match_score < match_threshold: return False # If any word doesn't match well enough, return False return True # All words matched well enough def filtrar_itens_similares(df, termo_pesquisa, limit=50): # Apply the match function to each title, filtering for those that match the query words matches = df['Title'].apply(lambda title: match_query_words_in_titles(termo_pesquisa, title)) df_filtrado = df[matches] # Further refine the list to the top N matches based on overall similarity to the query df_filtrado['Overall_Similarity'] = df_filtrado['Title'].apply(lambda title: fuzz.WRatio(termo_pesquisa, title)) df_filtrado = df_filtrado.sort_values('Overall_Similarity', ascending=False).head(limit) return df_filtrado def calcular_fator_avaliacao(titulo, EC, PU): filtered_df = bens_df[bens_df['TITULO'] == titulo] if filtered_df.empty: return None # Or handle the error as needed bem_info = filtered_df.iloc[0] VU, VR = bem_info['VIDA_UTIL'], bem_info['VALOR_RESIDUAL'] ec_pontuacao = {'Excelente': 10, 'Bom': 8, 'Regular': 5, 'Péssimo': 2}[EC] PU, PVU, PUB = float(PU), min(10 - ((PU - 1) * (10 / VU)), 10), min(10 - (((VU - PU) - 1) * (10 / VU)), 10) fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR) return fator_avaliacao def select_nearest_items(df, query): # Lower the title similarity threshold if necessary df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x)) df_filtered = df[df['Title_Similarity'] > 75] # Adjusted threshold # Calculate mode price in a more inclusive manner mode_price = df_filtered['Price'].mode() if mode_price.empty: target_price = df_filtered['Price'].median() else: target_price = mode_price.min() df_filtered['Distance'] = (df_filtered['Price'] - target_price).abs() df_sorted = df_filtered.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False]) # Ensure diversity in marketplaces marketplaces_selected = set() results = [] for _, row in df_sorted.iterrows(): if row['Marketplace'] not in marketplaces_selected and len(marketplaces_selected) < 5: results.append(row) marketplaces_selected.add(row['Marketplace']) if len(results) >= 5: break return pd.DataFrame(results) def search_with_fallback(query, df, limit=50): query_parts = query.split() include_conjunto = "conjunto" in query.lower() for i in range(len(query_parts), 0, -1): simplified_query = " ".join(query_parts[:i]) include_word = ["conjunto"] if include_conjunto else [] # Ensure include_word is a list df_refinado = refinar_resultados(df, include_word=include_word) df_filtrado = filtrar_itens_similares(df_refinado, simplified_query, limit=limit) if not df_filtrado.empty: return df_filtrado return pd.DataFrame() def integrated_app(query, titulo, EC, PU, selected_rows): include_conjunto = "conjunto" in query.lower() df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre") df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True) if df_combined.empty: return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame() # Pass whether "conjunto" is part of the original query include_word = ["conjunto"] if include_conjunto else [] df_refined = refinar_resultados(df_combined, include_word=include_word) df_similares = search_with_fallback(query, df_refined) if df_similares.empty: return "Nenhum item similar encontrado.", pd.DataFrame() df_nearest = select_nearest_items(df_similares, query) if df_nearest.empty: return "Nenhum resultado próximo encontrado.", pd.DataFrame() if selected_rows.strip(): selected_indices = [int(idx) for idx in selected_rows.split(',') if idx.isdigit()] df_nearest = df_nearest.iloc[selected_indices] # After calculating df_nearest and before returning it df_nearest.reset_index(drop=True, inplace=True) df_nearest['ID'] = df_nearest.index # Reorder DataFrame columns to make ID first column_order = ['ID'] + [col for col in df_nearest.columns if col != 'ID'] df_nearest = df_nearest[column_order] fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU) valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao get_report(query, df_nearest) return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest, 'laudo.docx' iface = gr.Interface(fn=integrated_app, inputs=[gr.Textbox(label="Digite sua consulta"), gr.Dropdown(label="Classificação Contábil do Bem", choices=bens_df['TITULO'].unique().tolist(), value="MOBILIÁRIO EM GERAL"), gr.Radio(label="Estado de Conservação do Bem", choices=['Excelente', 'Bom', 'Regular', 'Péssimo'], value="Excelente"), gr.Number(label="Período utilizado (anos)", value=1), gr.Textbox(label="IDs das linhas selecionadas (separadas por vírgula)", placeholder="Exemplo: 0,2,5")], # Using Textbox for IDs outputs=[gr.Textbox(label="Cálculo"), gr.Dataframe(label="Resultados da Pesquisa"), gr.File(label="Laudo de Avaliação")], theme=gr.themes.Monochrome(), title=f"""

avalia.se """, description="""

Aplicativo para avaliação de Bens Móveis

""") iface.launch()