Spaces:

estrng
/

csv-agent

Sleeping

File size: 4,058 Bytes

import pandas
import os


def load_csvs_from_folder(folder_path="data"):
    csv_data = {}
    try:
        for filename in os.listdir(folder_path):
            if filename.endswith(".csv"):
                path = os.path.join(folder_path, filename)
                df = pandas.read_csv(path)
                csv_data[filename] = df
        return csv_data
    except Exception as e:
        print("Erro ao carregar CSVs:", e)
        return "ERROR"


def build_metadata2(csv_data: dict) -> dict:
    meta = {}
    first_nfe = next(iter(csv_data.values()), None)

    if first_nfe:
        head = first_nfe.get("head", {})
        items = first_nfe.get("items", [])
        head_fields_with_type = [(k, type(v).__name__) for k, v in head.items()]
        if items:
            item_fields_with_type = [(k, type(v).__name__) for k, v in items[0].items()]
        else:
            item_fields_with_type = []
    else:
        head_fields_with_type = []
        item_fields_with_type = []

    num_nfes = len(csv_data)
    items_counts = [len(nfe.get("items", [])) for nfe in csv_data.values()]
    avg_items_per_nfe = sum(items_counts) / num_nfes if num_nfes > 0 else 0

    from datetime import datetime

    dates = []
    for nfe in csv_data.values():
        date_str = nfe.get("head", {}).get("DATA EMISSÃO")
        if date_str:
            for fmt in (
                "%Y-%m-%d",
                "%d/%m/%Y",
                "%m/%d/%Y",
                "%m/%d/%Y %I:%M:%S %p",
                "%m/%d/%Y %H:%M:%S",
                "%m/%d/%Y %I:%M:%S %p",
            ):
                try:
                    dates.append(datetime.strptime(date_str.strip(), fmt))
                    break
                except Exception:
                    continue
            else:
                try:
                    import pandas as pd

                    dt = pd.to_datetime(date_str, errors="coerce")
                    if pd.notnull(dt):
                        dates.append(dt.to_pydatetime())
                except Exception:
                    pass
    date_range = (
        (min(dates).strftime("%Y-%m-%d"), max(dates).strftime("%Y-%m-%d"))
        if dates
        else (None, None)
    )

    max_invoice_value = None
    max_invoice_chave = None

    for chave, nfe in csv_data.items():
        head = nfe.get("head", {})
        valor = head.get("VALOR NOTA FISCAL")

        if valor is not None:
            try:
                valor_num = float(valor)
            except Exception:
                continue
            if (max_invoice_value is None) or (valor_num > max_invoice_value):
                max_invoice_value = valor_num
                max_invoice_chave = chave

    meta["head_fields_with_type"] = head_fields_with_type
    meta["item_fields_with_type"] = item_fields_with_type
    meta["avg_items_per_nfe"] = avg_items_per_nfe
    meta["max_invoice_value"] = max_invoice_value
    meta["max_invoice_chave"] = max_invoice_chave
    meta["date_range"] = date_range
    meta["num_nfes"] = num_nfes

    return meta


def mount_nfe(csv_data: dict) -> dict:
    nfes = {}
    heads = None
    items = None

    for name, df in csv_data.items():
        if name == "202401_NFs_Cabecalho.csv":
            heads = df
        if name == "202401_NFs_Itens.csv":
            items = df

    if heads is not None:
        head_records = heads.to_dict(orient="records")
        for record in head_records:
            chave = record.get("CHAVE DE ACESSO")
            if chave is not None:
                nfes[chave] = {
                    "head": record,
                    "items": [],
                }

    if items is not None:
        from collections import defaultdict

        items_by_chave = defaultdict(list)

        for record in items.to_dict(orient="records"):
            chave = record.get("CHAVE DE ACESSO")
            if chave in nfes:
                items_by_chave[chave].append(record)

        for chave, item_list in items_by_chave.items():
            nfes[chave]["items"] = item_list

    return nfes