Spaces:
Running
Running
import tempfile, os | |
from typing import List | |
from langchain_core.documents import Document as LangchainDocument | |
from llama_index import Document | |
from llama_parse import LlamaParse, ResultType | |
llama_parser_keys = [ | |
os.getenv("LLAMA_CLOUD_API_KEY_POPS"), | |
os.getenv("LLAMA_CLOUD_API_KEY_PEIXE"), | |
] | |
def handle_pdf_files_from_serializer(files): | |
listaPDFs = [] | |
for file in files: | |
file.seek(0) | |
with tempfile.NamedTemporaryFile( | |
delete=False, suffix=".pdf" | |
) as temp_file: # Create a temporary file to save the uploaded PDF | |
for ( | |
chunk | |
) in file.chunks(): # Write the uploaded file content to the temporary file | |
temp_file.write(chunk) | |
temp_file_path = temp_file.name # Get the path of the temporary file | |
listaPDFs.append(temp_file_path) | |
print("\n\nlistaPDFs: ", listaPDFs) | |
return listaPDFs | |
def remove_pdf_temp_files(listaPDFs): | |
for file in listaPDFs: | |
os.remove(file) | |
async def return_document_list_with_llama_parser(file: str): | |
for key in llama_parser_keys: | |
documents: List[LangchainDocument] = [] | |
if key: | |
parser = LlamaParse( | |
api_key=key, | |
result_type=ResultType.JSON, # Options: 'text', 'markdown', 'json', 'structured' | |
language="pt", | |
verbose=True, | |
) | |
try: | |
parsed_document = await parser.aget_json(file) | |
except: | |
print(f"Error with llama parser key ending with {key[-4:]}") | |
continue # Faz com que comece o pr贸ximo loop | |
if len(parsed_document) == 0: | |
continue | |
for doc in parsed_document[0].get("pages"): # type: ignore | |
# documents.append(doc.to_langchain_format()) | |
langchain_document = LangchainDocument( | |
page_content=doc.get("md"), # type: ignore | |
metadata={ | |
"page": doc.get("page"), # type: ignore | |
# **doc.get("metadata", {}), # type: ignore | |
}, # Include page number in metadata | |
) | |
documents.append(langchain_document) | |
return documents | |
# C贸digo abaixo s贸 茅 executado se o loop acima acabar e n茫o tiver retornado um valor nenhuma vez | |
raise ValueError(f"ALGO DEU ERRADO NO PARSER DO LLAMA PARSE:") | |