import os import uuid import base64 from IPython import display from unstructured.partition.pdf import partition_pdf from langchain.chat_models import ChatOpenAI from langchain.embeddings import OpenAIEmbeddings from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain.schema.messages import HumanMessage, SystemMessage from langchain.schema.document import Document from langchain.vectorstores import FAISS from langchain.retrievers.multi_vector import MultiVectorRetriever # from google.colab import userdata # openai_api_key = userdata.get('OPENAI_API_KEY') openai_api_key = "sdhof" output_path = "./images" # Get elements raw_pdf_elements = partition_pdf( filename="Bosso.pdf", extract_images_in_pdf=True, infer_table_structure=True, chunking_strategy="by_title", max_characters=4000, new_after_n_chars=3800, combine_text_under_n_chars=2000, extract_image_block_output_dir=output_path, )