File size: 3,494 Bytes
6cc068f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1264943
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import streamlit as st
from dotenv import load_dotenv
import os
from htmlTemplate import css, bot_template, user_template
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings
from langchain_community.llms import LlamaCpp
from langchain.embeddings import HuggingFaceEmbeddings 
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from sentence_transformers import SentenceTransformer, util
from langchain_openai import AzureOpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_openai import ChatOpenAI



def main():
    load_dotenv()

    st.set_page_config(
        page_title="PDF Insights AI", 
        page_icon=":books:", 
        layout="wide"
    )
    st.write(css, unsafe_allow_html=True)
    

    # Welcome section
    st.title("πŸ“š PDF Insights AI")
    st.markdown("""
    ### Unlock the Knowledge in Your PDFs
    - πŸ€– AI-powered document analysis
    - πŸ’¬ Ask questions about your uploaded documents
    - πŸ“„ Support for multiple PDF files
    """)

    if "conversation" not in st.session_state:
        st.session_state.conversation = None
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = []

    # File upload section
    with st.sidebar:
        st.header("πŸ“€ Upload Documents")
        pdf_docs = st.file_uploader(
            "Upload your PDFs here", 
            type=['pdf'], 
            accept_multiple_files=True,
            help="Upload PDF files to analyze. Max file size: 200MB"
        )

        # File validation
        if pdf_docs:
            for doc in pdf_docs:
                if doc.size > 200 * 1024 * 1024:  # 200 MB
                    st.error(f"File {doc.name} is too large. Maximum file size is 200MB.")
                    pdf_docs.remove(doc)

        if st.button("Process Documents", type="primary"):
            if not pdf_docs:
                st.warning("Please upload at least one PDF file.")
            else:
                with st.spinner("Processing your documents..."):
                    try:
                        # get pdf text
                        content, metadata = prepare_docs(pdf_docs)

                        # get the text chunks
                        split_docs = get_text_chunks(content, metadata)

                        # create vector store
                        vectorstore = ingest_into_vectordb(split_docs)

                        # create conversation chain
                        st.session_state.conversation = get_conversation_chain(vectorstore)
                        
                        st.success("Documents processed successfully! You can now ask questions.")
                    except Exception as e:
                        st.error(f"An error occurred while processing documents: {str(e)}")

    # Question input section
    user_question = st.text_input(
        "πŸ“ Ask a question about your documents", 
        placeholder="What insights can you provide from these documents?"
    )

    if user_question:
        if st.session_state.conversation is None:
            st.warning("Please upload and process documents first.")
        else:
            handle_userinput(user_question)