Spaces:

fschwartzer
/

bens_moveis

Sleeping

App Files Files Community

bens_moveis / app.py

fschwartzer

Update app.py

517f0a7 verified 5 months ago

raw

history blame contribute delete

No virus

10.7 kB

	import gradio as gr
	import requests
	import pandas as pd
	from rapidfuzz import process, fuzz
	from docx import Document
	from datetime import datetime
	from docx.oxml import OxmlElement
	from docx.shared import Cm

	bens_df = pd.read_excel('bens_tab.xlsx')

	data_crawler = pd.read_csv('data_crawler.csv', index_col=False)
	data_crawler = data_crawler[['Title', 'Price', 'Currency', 'Condition', 'Link', 'Marketplace']]

	def get_report(query, df_nearest):
	doc = Document()

	# Adding a title
	doc.add_heading('RELATÓRIO DE AVALIAÇÃO DE VALOR DE MERCADO – EXP.', level=1)

	# Adding some details
	doc.add_paragraph('Solicitante:')
	doc.add_paragraph('Interessado:')
	doc.add_paragraph('Proprietário:')
	doc.add_paragraph('Objetivo da Avaliação: Definir o valor de mercado dos bens abaixo discriminados para registro no Patrimônio do Município.')

	# Adding section heading and content
	doc.add_heading('1. Características do(s) Avaliando(s):', level=2)
	doc.add_paragraph('1.1 Avaliando 01:')
	doc.add_paragraph(f'Descrição: {query}')

	# Add evaluation section
	doc.add_heading('2. Avaliação:', level=2)
	doc.add_paragraph('2.1 Avaliando 01')

	# Add market research section
	doc.add_heading('3. Pesquisa de Mercado:', level=2)

	# Filter and rename columns in df_nearest
	df_nearest_filtered = df_nearest[['ID', 'Title', 'Price', 'Link', 'Marketplace']].copy()
	df_nearest_filtered.columns = ['Item', 'Descrição', 'Preço', 'Link', 'Fornecedor']


	# Here you'll insert the df_nearest_filtered as a table
	if not df_nearest_filtered.empty:
	table = doc.add_table(rows=1, cols=len(df_nearest_filtered.columns))
	table.style = 'Table Grid'
	table.autofit = False
	table.allow_autofit = False
	table.columns[0].width = Cm(3)
	table.rows[0].cells[0].width = Cm(3)
	hdr_cells = table.rows[0].cells
	for i, column_name in enumerate(df_nearest_filtered.columns):
	hdr_cells[i].text = column_name

	for index, row in df_nearest_filtered.iterrows():
	row_cells = table.add_row().cells
	for i, value in enumerate(row):
	if i == 3:
	row_cells[i].text = str(value)
	else:
	row_cells[i].text = str(value)


	today_date = datetime.now().strftime('%d/%m/%Y')
	doc.add_paragraph(f'Lajeado/RS, {today_date}')

	doc.add_paragraph('Membros da Comissão:')

	# Save the document
	doc.save('laudo.docx')


	def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
	if source == "mercadolibre":
	BASE_URL = "https://api.mercadolibre.com/sites/MLB/search"
	params = {'q': query, 'limit': limit}
	response = requests.get(BASE_URL, params=params)
	if response.status_code == 200:
	data = response.json()
	items = data.get('results', [])
	df = pd.DataFrame(items)[['title', 'price', 'currency_id', 'condition', 'permalink']]
	df.columns = ['Title', 'Price', 'Currency', 'Condition', 'Link']
	df['Marketplace'] = "Mercado Livre"
	return df
	return pd.DataFrame()

	def refinar_resultados(df, include_word=[]):
	df['Title'] = df['Title'].astype(str)

	# Define a list of keywords to exclude, indicating multiples
	exclude_keywords = ["kit", "conjunto", "pacote", "caixa", "unidades", "Kits", " e "]

	# Add conditional exclusion for words not included in the query
	exclude_patterns = [keyword for keyword in exclude_keywords if keyword not in include_word]

	# Combine all exclude patterns into a single regex pattern
	exclude_pattern = r'\b(' + '\|'.join(exclude_patterns) + r')\b\|\b(\d+)\s*(unidade\|pacotes\|caixas)\b'

	# Perform the filtering in one operation
	df_refinado = df[~df['Title'].str.contains(exclude_pattern, case=False, regex=True, na=False)]

	return df_refinado


	def get_best_match(query, choices, limit=50):
	# Using RapidFuzz for improved performance and fuzzy matching
	matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
	return [match[0] for match in matches if match[1] > 75]

	def match_query_words_in_titles(query, title):
	"""
	Check if all words in the query have a close match within the title.
	Returns True if all words match to a certain degree; False otherwise.
	"""
	query_words = query.lower().split()
	match_threshold = 75 # Adjust this threshold as needed

	for word in query_words:
	# Find the best match for each word in the query within the title
	match_score = fuzz.partial_ratio(word, title.lower())
	if match_score < match_threshold:
	return False # If any word doesn't match well enough, return False

	return True # All words matched well enough

	def filtrar_itens_similares(df, termo_pesquisa, limit=50):
	# Apply the match function to each title, filtering for those that match the query words
	matches = df['Title'].apply(lambda title: match_query_words_in_titles(termo_pesquisa, title))
	df_filtrado = df[matches]

	# Further refine the list to the top N matches based on overall similarity to the query
	df_filtrado['Overall_Similarity'] = df_filtrado['Title'].apply(lambda title: fuzz.WRatio(termo_pesquisa, title))
	df_filtrado = df_filtrado.sort_values('Overall_Similarity', ascending=False).head(limit)

	return df_filtrado

	def calcular_fator_avaliacao(titulo, EC, PU):
	filtered_df = bens_df[bens_df['TITULO'] == titulo]
	if filtered_df.empty:
	return None # Or handle the error as needed

	bem_info = filtered_df.iloc[0]
	VU, VR = bem_info['VIDA_UTIL'], bem_info['VALOR_RESIDUAL']
	ec_pontuacao = {'Excelente': 10, 'Bom': 8, 'Regular': 5, 'Péssimo': 2}[EC]

	PU, PVU, PUB = float(PU), min(10 - ((PU - 1) * (10 / VU)), 10), min(10 - (((VU - PU) - 1) * (10 / VU)), 10)
	fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
	return fator_avaliacao

	def select_nearest_items(df, query):
	# Lower the title similarity threshold if necessary
	df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
	df_filtered = df[df['Title_Similarity'] > 75] # Adjusted threshold

	# Calculate mode price in a more inclusive manner
	mode_price = df_filtered['Price'].mode()
	if mode_price.empty:
	target_price = df_filtered['Price'].median()
	else:
	target_price = mode_price.min()

	df_filtered['Distance'] = (df_filtered['Price'] - target_price).abs()
	df_sorted = df_filtered.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])

	# Ensure diversity in marketplaces
	marketplaces_selected = set()
	results = []

	for _, row in df_sorted.iterrows():
	if row['Marketplace'] not in marketplaces_selected and len(marketplaces_selected) < 5:
	results.append(row)
	marketplaces_selected.add(row['Marketplace'])

	if len(results) >= 5:
	break

	return pd.DataFrame(results)


	def search_with_fallback(query, df, limit=50):
	query_parts = query.split()
	include_conjunto = "conjunto" in query.lower()

	for i in range(len(query_parts), 0, -1):
	simplified_query = " ".join(query_parts[:i])
	include_word = ["conjunto"] if include_conjunto else [] # Ensure include_word is a list
	df_refinado = refinar_resultados(df, include_word=include_word)
	df_filtrado = filtrar_itens_similares(df_refinado, simplified_query, limit=limit)

	if not df_filtrado.empty:
	return df_filtrado

	return pd.DataFrame()


	def integrated_app(query, titulo, EC, PU, selected_rows):
	include_conjunto = "conjunto" in query.lower()
	df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
	df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)

	if df_combined.empty:
	return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()

	# Pass whether "conjunto" is part of the original query
	include_word = ["conjunto"] if include_conjunto else []
	df_refined = refinar_resultados(df_combined, include_word=include_word)

	df_similares = search_with_fallback(query, df_refined)

	if df_similares.empty:
	return "Nenhum item similar encontrado.", pd.DataFrame()

	df_nearest = select_nearest_items(df_similares, query)
	if df_nearest.empty:
	return "Nenhum resultado próximo encontrado.", pd.DataFrame()

	if selected_rows.strip():
	selected_indices = [int(idx) for idx in selected_rows.split(',') if idx.isdigit()]
	df_nearest = df_nearest.iloc[selected_indices]

	# After calculating df_nearest and before returning it
	df_nearest.reset_index(drop=True, inplace=True)
	df_nearest['ID'] = df_nearest.index

	# Reorder DataFrame columns to make ID first
	column_order = ['ID'] + [col for col in df_nearest.columns if col != 'ID']
	df_nearest = df_nearest[column_order]


	fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
	valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
	get_report(query, df_nearest)
	return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest, 'laudo.docx'


	iface = gr.Interface(fn=integrated_app,
	inputs=[gr.Textbox(label="Digite sua consulta"),
	gr.Dropdown(label="Classificação Contábil do Bem", choices=bens_df['TITULO'].unique().tolist(), value="MOBILIÁRIO EM GERAL"),
	gr.Radio(label="Estado de Conservação do Bem", choices=['Excelente', 'Bom', 'Regular', 'Péssimo'], value="Excelente"),
	gr.Number(label="Período utilizado (anos)", value=1),
	gr.Textbox(label="IDs das linhas selecionadas (separadas por vírgula)", placeholder="Exemplo: 0,2,5")], # Using Textbox for IDs
	outputs=[gr.Textbox(label="Cálculo"), gr.Dataframe(label="Resultados da Pesquisa"), gr.File(label="Laudo de Avaliação")],
	theme=gr.themes.Monochrome(),
	title=f"""
	<p style="text-align: left;">
	<b><span style='color: grey; font-size: 50px;'>aval</span><span style='color: orange; font-size: 50px;'>ia</span><span style='color: grey; font-size: 50px;'>.se</span></b>
	""",
	description="""<p style="text-align: left;">Aplicativo para avaliação de Bens Móveis</p>""")


	iface.launch()