pvanand commited on
Commit
005cbc2
1 Parent(s): 6dc1cf4

Update document_generator_v2.py

Browse files
Files changed (1) hide show
  1. document_generator_v2.py +40 -0
document_generator_v2.py CHANGED
@@ -172,6 +172,7 @@ import psycopg2
172
  from datetime import datetime
173
  import base64
174
  from fastapi import Form
 
175
 
176
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
177
  logger = logging.getLogger(__name__)
@@ -448,6 +449,45 @@ class MarkdownConverter:
448
  markdown += "</div>"
449
  return markdown
450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
  router = APIRouter()
452
 
453
  class JsonDocumentResponse(BaseModel):
 
172
  from datetime import datetime
173
  import base64
174
  from fastapi import Form
175
+ from llama_parse import LlamaParse
176
 
177
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
178
  logger = logging.getLogger(__name__)
 
449
  markdown += "</div>"
450
  return markdown
451
 
452
+ async def load_documents(documents: List[UploadFile]) -> List[str]:
453
+ """
454
+ Load and parse documents using LlamaParse.
455
+
456
+ Args:
457
+ documents (List[UploadFile]): List of uploaded document files.
458
+
459
+ Returns:
460
+ List[str]: List of parsed document contents.
461
+ """
462
+ parser = LlamaParse(
463
+ api_key=os.getenv("LLAMA_PARSE_API_KEY"),
464
+ result_type="markdown",
465
+ num_workers=4,
466
+ verbose=True,
467
+ language="en",
468
+ )
469
+
470
+ # Save uploaded files temporarily
471
+ temp_files = []
472
+ for doc in documents:
473
+ temp_file_path = f"/tmp/{doc.filename}"
474
+ with open(temp_file_path, "wb") as buffer:
475
+ content = await doc.read()
476
+ buffer.write(content)
477
+ temp_files.append(temp_file_path)
478
+
479
+ try:
480
+ # Use LlamaParse to extract content
481
+ parsed_documents = await parser.aload_data(temp_files)
482
+ documents_list = [doc.text for doc in parsed_documents]
483
+ return documents_list
484
+ finally:
485
+ # Clean up temporary files
486
+ for temp_file in temp_files:
487
+ os.remove(temp_file)
488
+
489
+
490
+
491
  router = APIRouter()
492
 
493
  class JsonDocumentResponse(BaseModel):