ChatPDF

Sleeping

File size: 2,382 Bytes

e949dee
a70f40e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de1ba4b
 
 
 
 
4a019f2
a70f40e
1024a1a
a70f40e
e949dee
4a019f2
 
 
 
 
 
 
 
 
 
a70f40e
4a019f2
 
a70f40e
4a019f2
 
 
 
 
 
 
 
 
 
 
 
 
fef8c53
a70f40e
4a019f2
a70f40e

import os
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

def main():
    st.set_page_config(page_title="PDF Chat")
    st.header("Chat with your PDFs 💬")
    #Credit
    st.write(" ")
    st.write("Visit us [here](https://ai-solutions.ai) for more AI Solutions.")
    st.write(" ")
    st.write(" ")

    # Upload PDF files
    pdf_files = st.file_uploader("Upload your PDF files (please do not upload anything confidential for this demo)", type="pdf", accept_multiple_files=True)
    if pdf_files:
        for idx, pdf_file in enumerate(pdf_files):
            try:
                pdf_reader = PdfReader(pdf_file)
                text = ""
                for page in pdf_reader.pages:
                    text += page.extract_text()
                
                text_splitter = CharacterTextSplitter(
                    separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
                )
                chunks = text_splitter.split_text(text)
                
                embeddings = OpenAIEmbeddings()
                knowledge_base = FAISS.from_texts(chunks, embeddings)
                
                user_question = st.text_input(f"Ask a question about '{pdf_file.name}':", key=f"question_{idx}")
                if user_question:
                    docs = knowledge_base.similarity_search(user_question)
                    
                    llm = OpenAI()
                    chain = load_qa_chain(llm, chain_type="stuff")
                    
                    with get_openai_callback() as cb:
                        response = chain.run(input_documents=docs, question=user_question)
                        print(cb)
                    
                    st.write(response)
            except Exception as e:
                st.error(f"An error occurred while processing '{pdf_file.name}'. This file may be protected by the author, or contain scanned text which this basic demo is not set up to process.")

if __name__ == "__main__":
    main()