José Ivan R. de Oliveira (estrng)
fix: remove unnecessary os import and simplify model name assignment in CsvAgent
f239de8
from langchain_openai import ChatOpenAI | |
from prompt_builder import build_automat_prompt, build_role_task_input | |
from utils import clean_input | |
import tiktoken | |
from pydantic import SecretStr | |
class CsvAgent: | |
def __init__( | |
self, | |
api_key: str, | |
model: str = "gpt-4.1-nano", | |
temperature: float = 0, | |
timeout: int = 30, | |
): | |
self.model_name = model | |
self.temperature = temperature | |
self.api_key = api_key | |
self.timeout = timeout | |
self.llm = ChatOpenAI( | |
temperature=self.temperature, | |
api_key=SecretStr(self.api_key), | |
model=self.model_name, | |
timeout=self.timeout, | |
) | |
def count_tokens(self, prompt: str, model: str = "o200k_base") -> int: | |
enc = tiktoken.get_encoding(model) | |
return len(enc.encode(prompt)) | |
def ask(self, user_input: str, docs: list, csv_meta: dict) -> str: | |
clean_question = clean_input(user_input) | |
meta_text = "METADATA:\n" | |
if csv_meta: | |
if "head_fields_with_type" in csv_meta: | |
meta_text += ( | |
"- Header fields: " | |
+ ", ".join( | |
f"{k} ({v})" for k, v in csv_meta["head_fields_with_type"] | |
) | |
+ "\n" | |
) | |
if "item_fields_with_type" in csv_meta: | |
meta_text += ( | |
"- Item fields: " | |
+ ", ".join( | |
f"{k} ({v})" for k, v in csv_meta["item_fields_with_type"] | |
) | |
+ "\n" | |
) | |
if "num_nfes" in csv_meta: | |
meta_text += f"- Number of invoices: {csv_meta['num_nfes']}\n" | |
if "avg_items_per_nfe" in csv_meta: | |
meta_text += f"- Average items per invoice: {csv_meta['avg_items_per_nfe']:.2f}\n" | |
if "date_range" in csv_meta: | |
meta_text += f"- Date range: {csv_meta['date_range'][0]} to {csv_meta['date_range'][1]}\n" | |
if "max_invoice_value" in csv_meta and "max_invoice_chave" in csv_meta: | |
meta_text += f"- Max invoice value: {csv_meta['max_invoice_value']} (Access Key: {csv_meta['max_invoice_chave']})\n" | |
sample = "\n\nRELEVANT DATA SAMPLE:\n" | |
for doc in docs: | |
meta = doc.metadata | |
if meta.get("type") == "head": | |
sample += f"Invoice Access Key: {meta.get('chave')} | Type: Header\n" | |
elif meta.get("type") == "item": | |
sample += f"Invoice Access Key: {meta.get('chave')} | Type: Item | Item Index: {meta.get('item_idx')}\n" | |
else: | |
sample += f"Metadata: {meta}\n" | |
sample += doc.page_content + "\n\n" | |
prompt = build_automat_prompt( | |
clean_question, context=f"{meta_text}\n\n{sample}" | |
) | |
token_count = self.count_tokens(prompt, model="o200k_base") | |
print(f"Token count for prompt: {token_count}") | |
try: | |
response = self.llm.invoke(prompt) | |
content = response.content if hasattr(response, "content") else response | |
if isinstance(content, str): | |
return content | |
else: | |
return str(content) | |
except Exception as e: | |
print("Error in ask_agent:", e) | |
return "Try again later" | |
def format_summary(self, summary: dict, lang: str = "pt", query: str = "") -> str: | |
prompt = build_role_task_input(summary, lang=lang, query=query) | |
try: | |
response = self.llm.invoke(prompt) | |
content = response.content if hasattr(response, "content") else response | |
if isinstance(content, str): | |
return content | |
else: | |
return str(content) | |
except Exception as e: | |
print("Error in format_summary:", e) | |
return "Try again later" | |