sango07's picture
Update app.py
6cc068f verified
raw
history blame
3.49 kB
import streamlit as st
from dotenv import load_dotenv
import os
from htmlTemplate import css, bot_template, user_template
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings
from langchain_community.llms import LlamaCpp
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from sentence_transformers import SentenceTransformer, util
from langchain_openai import AzureOpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_openai import ChatOpenAI
def main():
load_dotenv()
st.set_page_config(
page_title="PDF Insights AI",
page_icon=":books:",
layout="wide"
)
st.write(css, unsafe_allow_html=True)
# Welcome section
st.title("πŸ“š PDF Insights AI")
st.markdown("""
### Unlock the Knowledge in Your PDFs
- πŸ€– AI-powered document analysis
- πŸ’¬ Ask questions about your uploaded documents
- πŸ“„ Support for multiple PDF files
""")
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# File upload section
with st.sidebar:
st.header("πŸ“€ Upload Documents")
pdf_docs = st.file_uploader(
"Upload your PDFs here",
type=['pdf'],
accept_multiple_files=True,
help="Upload PDF files to analyze. Max file size: 200MB"
)
# File validation
if pdf_docs:
for doc in pdf_docs:
if doc.size > 200 * 1024 * 1024: # 200 MB
st.error(f"File {doc.name} is too large. Maximum file size is 200MB.")
pdf_docs.remove(doc)
if st.button("Process Documents", type="primary"):
if not pdf_docs:
st.warning("Please upload at least one PDF file.")
else:
with st.spinner("Processing your documents..."):
try:
# get pdf text
content, metadata = prepare_docs(pdf_docs)
# get the text chunks
split_docs = get_text_chunks(content, metadata)
# create vector store
vectorstore = ingest_into_vectordb(split_docs)
# create conversation chain
st.session_state.conversation = get_conversation_chain(vectorstore)
st.success("Documents processed successfully! You can now ask questions.")
except Exception as e:
st.error(f"An error occurred while processing documents: {str(e)}")
# Question input section
user_question = st.text_input(
"πŸ“ Ask a question about your documents",
placeholder="What insights can you provide from these documents?"
)
if user_question:
if st.session_state.conversation is None:
st.warning("Please upload and process documents first.")
else:
handle_userinput(user_question)