from llama_parse import LlamaParse from dotenv import load_dotenv import os import streamlit as st load_dotenv() LLAMA_PARSE = os.getenv('LLAMA_PARSE') parser = LlamaParse( api_key = LLAMA_PARSE, result_type="text", # "markdown" and "text" are available num_workers=4, # if multiple files passed, split in `num_workers` API calls verbose=True, language="en" # Optionaly you can define a language, default=en ) @st.cache_data def extract_text(pdf_path): documents = parser.load_data(pdf_path) all_text = "" for document in documents: all_text += document.text + '\n' return all_text.strip() # Remove the trailing newline character # combined_text = extract_text("/app/Non_form_pdfs/chapter-17-web-designing2.pdf") # print(combined_text)