Spaces:

fschwartzer
/

bens_moveis

Running

File size: 10,729 Bytes

c5a97b3
 
 
c038e47
3c5c245
d4e0320
923fcc9
 
c5a97b3
99aed09
53ae920
1a5aa7c
b4cb87b
99aed09
6003014
3c5c245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2df76af
 
 
e7525b4
 
 
 
 
6dec4eb
e7525b4
 
27de34f
f906a7c
 
517f0a7
 
27de34f
e7525b4
27de34f
 
e7525b4
27de34f
 
6dec4eb
 
 
 
 
38df220
3c5c245
 
 
 
 
 
 
 
 
2598bbd
7a6bd46
 
 
 
2598bbd
 
601b79b
 
 
 
 
2598bbd
7a6bd46
1efcf05
2c05d82
905b603
 
489461c
905b603
 
1efcf05
905b603
 
 
 
 
 
 
3992853
 
905b603
5ef4720
2237b4d
5d4f445
94b2524
07df588
78633a0
 
 
 
 
 
94b2524
78633a0
 
 
 
 
 
 
 
 
5ef4720
78633a0
 
 
 
 
 
 
 
2237b4d
c5a97b3
2fe5b5e
d3db32a
 
601b79b
d3db32a
 
601b79b
 
99aed09
601b79b
aff5c22
f308877
 
f3ff845
6baa204
2237b4d
94b2524
6baa204
 
 
 
 
 
 
f555ad2
6baa204
 
f555ad2
6baa204
 
 
f555ad2
 
5ef4720
6baa204
 
 
5ef4720
f555ad2
 
6baa204
 
2237b4d
5ef4720
2237b4d
2c05d82
 
2237b4d
 
7c67564
 
2c05d82
 
2237b4d
 
2c05d82
2237b4d
c1badbd
7c67564
ea98fbe
59e4db6
2237b4d
074db95
2237b4d
074db95
2598bbd
c5a97b3
2c05d82
ae59a9a
59e4db6
e5b70df
2237b4d
 
78ac961
 
f2141ad
2c05d82
2237b4d
 
601b79b
fbf4f69
 
 
 
550146d
94b2524
 
 
550146d
 
 
 
 
2237b4d
601b79b
6003014
3c5c245
99aed09
59e4db6
3bd1e98
f308877
601b79b
4d527e1
fbf4f69
550146d
3c5c245
205d4a0
8352a3e
 
baa4db4
8352a3e
 
3bd1e98
550146d
a4241c5

import gradio as gr
import requests
import pandas as pd
from rapidfuzz import process, fuzz
from docx import Document
from datetime import datetime
from docx.oxml import OxmlElement
from docx.shared import Cm

bens_df = pd.read_excel('bens_tab.xlsx')

data_crawler = pd.read_csv('data_crawler.csv', index_col=False)
data_crawler = data_crawler[['Title', 'Price', 'Currency', 'Condition', 'Link', 'Marketplace']]

def get_report(query, df_nearest):
    doc = Document()

    # Adding a title
    doc.add_heading('RELATÓRIO DE AVALIAÇÃO DE VALOR DE MERCADO – EXP.', level=1)

    # Adding some details
    doc.add_paragraph('Solicitante:')
    doc.add_paragraph('Interessado:')
    doc.add_paragraph('Proprietário:')
    doc.add_paragraph('Objetivo da Avaliação: Definir o valor de mercado dos bens abaixo discriminados para registro no Patrimônio do Município.')

    # Adding section heading and content
    doc.add_heading('1. Características do(s) Avaliando(s):', level=2)
    doc.add_paragraph('1.1 Avaliando 01:')
    doc.add_paragraph(f'Descrição: {query}')

    # Add evaluation section
    doc.add_heading('2. Avaliação:', level=2)
    doc.add_paragraph('2.1 Avaliando 01')

    # Add market research section
    doc.add_heading('3. Pesquisa de Mercado:', level=2)

    # Filter and rename columns in df_nearest
    df_nearest_filtered = df_nearest[['ID', 'Title', 'Price', 'Link', 'Marketplace']].copy()
    df_nearest_filtered.columns = ['Item', 'Descrição', 'Preço', 'Link', 'Fornecedor']


    # Here you'll insert the df_nearest_filtered as a table
    if not df_nearest_filtered.empty:
        table = doc.add_table(rows=1, cols=len(df_nearest_filtered.columns))
        table.style = 'Table Grid'
        table.autofit = False 
        table.allow_autofit = False
        table.columns[0].width = Cm(3)
        table.rows[0].cells[0].width = Cm(3)
        hdr_cells = table.rows[0].cells
        for i, column_name in enumerate(df_nearest_filtered.columns):
            hdr_cells[i].text = column_name

        for index, row in df_nearest_filtered.iterrows():
            row_cells = table.add_row().cells
            for i, value in enumerate(row):
                if i == 3:
                    row_cells[i].text = str(value)
                else:
                    row_cells[i].text = str(value)


    today_date = datetime.now().strftime('%d/%m/%Y')
    doc.add_paragraph(f'Lajeado/RS, {today_date}')

    doc.add_paragraph('Membros da Comissão:')

    # Save the document
    doc.save('laudo.docx')
    

def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
    if source == "mercadolibre":
        BASE_URL = "https://api.mercadolibre.com/sites/MLB/search"
        params = {'q': query, 'limit': limit}
        response = requests.get(BASE_URL, params=params)
        if response.status_code == 200:
            data = response.json()
            items = data.get('results', [])
            df = pd.DataFrame(items)[['title', 'price', 'currency_id', 'condition', 'permalink']]
            df.columns = ['Title', 'Price', 'Currency', 'Condition', 'Link']
            df['Marketplace'] = "Mercado Livre"
            return df
    return pd.DataFrame()

def refinar_resultados(df, include_word=[]):
    df['Title'] = df['Title'].astype(str)

    # Define a list of keywords to exclude, indicating multiples
    exclude_keywords = ["kit", "conjunto", "pacote", "caixa", "unidades", "Kits", " e "]

    # Add conditional exclusion for words not included in the query
    exclude_patterns = [keyword for keyword in exclude_keywords if keyword not in include_word]

    # Combine all exclude patterns into a single regex pattern
    exclude_pattern = r'\b(' + '|'.join(exclude_patterns) + r')\b|\b(\d+)\s*(unidade|pacotes|caixas)\b'

    # Perform the filtering in one operation
    df_refinado = df[~df['Title'].str.contains(exclude_pattern, case=False, regex=True, na=False)]

    return df_refinado


def get_best_match(query, choices, limit=50):
    # Using RapidFuzz for improved performance and fuzzy matching
    matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
    return [match[0] for match in matches if match[1] > 75]
    
def match_query_words_in_titles(query, title):
    """
    Check if all words in the query have a close match within the title.
    Returns True if all words match to a certain degree; False otherwise.
    """
    query_words = query.lower().split()
    match_threshold = 75  # Adjust this threshold as needed

    for word in query_words:
        # Find the best match for each word in the query within the title
        match_score = fuzz.partial_ratio(word, title.lower())
        if match_score < match_threshold:
            return False  # If any word doesn't match well enough, return False

    return True  # All words matched well enough

def filtrar_itens_similares(df, termo_pesquisa, limit=50):
    # Apply the match function to each title, filtering for those that match the query words
    matches = df['Title'].apply(lambda title: match_query_words_in_titles(termo_pesquisa, title))
    df_filtrado = df[matches]

    # Further refine the list to the top N matches based on overall similarity to the query
    df_filtrado['Overall_Similarity'] = df_filtrado['Title'].apply(lambda title: fuzz.WRatio(termo_pesquisa, title))
    df_filtrado = df_filtrado.sort_values('Overall_Similarity', ascending=False).head(limit)

    return df_filtrado

def calcular_fator_avaliacao(titulo, EC, PU):
    filtered_df = bens_df[bens_df['TITULO'] == titulo]
    if filtered_df.empty:
        return None  # Or handle the error as needed

    bem_info = filtered_df.iloc[0]
    VU, VR = bem_info['VIDA_UTIL'], bem_info['VALOR_RESIDUAL']
    ec_pontuacao = {'Excelente': 10, 'Bom': 8, 'Regular': 5, 'Péssimo': 2}[EC]

    PU, PVU, PUB = float(PU), min(10 - ((PU - 1) * (10 / VU)), 10), min(10 - (((VU - PU) - 1) * (10 / VU)), 10)
    fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
    return fator_avaliacao

def select_nearest_items(df, query):
    # Lower the title similarity threshold if necessary
    df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
    df_filtered = df[df['Title_Similarity'] > 75]  # Adjusted threshold

    # Calculate mode price in a more inclusive manner
    mode_price = df_filtered['Price'].mode()
    if mode_price.empty:
        target_price = df_filtered['Price'].median()
    else:
        target_price = mode_price.min()

    df_filtered['Distance'] = (df_filtered['Price'] - target_price).abs()
    df_sorted = df_filtered.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])

    # Ensure diversity in marketplaces
    marketplaces_selected = set()
    results = []

    for _, row in df_sorted.iterrows():
        if row['Marketplace'] not in marketplaces_selected and len(marketplaces_selected) < 5:
            results.append(row)
            marketplaces_selected.add(row['Marketplace'])

        if len(results) >= 5:
            break

    return pd.DataFrame(results)


def search_with_fallback(query, df, limit=50):
    query_parts = query.split()
    include_conjunto = "conjunto" in query.lower()
    
    for i in range(len(query_parts), 0, -1):
        simplified_query = " ".join(query_parts[:i])
        include_word = ["conjunto"] if include_conjunto else []  # Ensure include_word is a list
        df_refinado = refinar_resultados(df, include_word=include_word)
        df_filtrado = filtrar_itens_similares(df_refinado, simplified_query, limit=limit)
        
        if not df_filtrado.empty:
            return df_filtrado

    return pd.DataFrame()


def integrated_app(query, titulo, EC, PU, selected_rows):
    include_conjunto = "conjunto" in query.lower()
    df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
    df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)

    if df_combined.empty:
        return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()

    # Pass whether "conjunto" is part of the original query
    include_word = ["conjunto"] if include_conjunto else []
    df_refined = refinar_resultados(df_combined, include_word=include_word)

    df_similares = search_with_fallback(query, df_refined)

    if df_similares.empty:
        return "Nenhum item similar encontrado.", pd.DataFrame()

    df_nearest = select_nearest_items(df_similares, query)
    if df_nearest.empty:
        return "Nenhum resultado próximo encontrado.", pd.DataFrame()

    if selected_rows.strip():
        selected_indices = [int(idx) for idx in selected_rows.split(',') if idx.isdigit()]
        df_nearest = df_nearest.iloc[selected_indices]

    # After calculating df_nearest and before returning it
    df_nearest.reset_index(drop=True, inplace=True)
    df_nearest['ID'] = df_nearest.index

    # Reorder DataFrame columns to make ID first
    column_order = ['ID'] + [col for col in df_nearest.columns if col != 'ID']
    df_nearest = df_nearest[column_order]


    fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
    valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
    get_report(query, df_nearest)
    return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest, 'laudo.docx'


iface = gr.Interface(fn=integrated_app,
                     inputs=[gr.Textbox(label="Digite sua consulta"),
                             gr.Dropdown(label="Classificação Contábil do Bem", choices=bens_df['TITULO'].unique().tolist(), value="MOBILIÁRIO EM GERAL"),
                             gr.Radio(label="Estado de Conservação do Bem", choices=['Excelente', 'Bom', 'Regular', 'Péssimo'], value="Excelente"),
                             gr.Number(label="Período utilizado (anos)", value=1),
                             gr.Textbox(label="IDs das linhas selecionadas (separadas por vírgula)", placeholder="Exemplo: 0,2,5")],  # Using Textbox for IDs
                     outputs=[gr.Textbox(label="Cálculo"), gr.Dataframe(label="Resultados da Pesquisa"), gr.File(label="Laudo de Avaliação")],
                     theme=gr.themes.Monochrome(),
                     title=f"""
                     <p style="text-align: left;">
                     <b><span style='color: grey; font-size: 50px;'>aval</span><span style='color: orange; font-size: 50px;'>ia</span><span style='color: grey; font-size: 50px;'>.se</span></b>
                     """,
                     description="""<p style="text-align: left;">Aplicativo para avaliação de Bens Móveis</p>""")


iface.launch()