Spaces:
Running
Running
File size: 3,494 Bytes
6cc068f 1264943 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import streamlit as st
from dotenv import load_dotenv
import os
from htmlTemplate import css, bot_template, user_template
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings
from langchain_community.llms import LlamaCpp
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from sentence_transformers import SentenceTransformer, util
from langchain_openai import AzureOpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_openai import ChatOpenAI
def main():
load_dotenv()
st.set_page_config(
page_title="PDF Insights AI",
page_icon=":books:",
layout="wide"
)
st.write(css, unsafe_allow_html=True)
# Welcome section
st.title("π PDF Insights AI")
st.markdown("""
### Unlock the Knowledge in Your PDFs
- π€ AI-powered document analysis
- π¬ Ask questions about your uploaded documents
- π Support for multiple PDF files
""")
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# File upload section
with st.sidebar:
st.header("π€ Upload Documents")
pdf_docs = st.file_uploader(
"Upload your PDFs here",
type=['pdf'],
accept_multiple_files=True,
help="Upload PDF files to analyze. Max file size: 200MB"
)
# File validation
if pdf_docs:
for doc in pdf_docs:
if doc.size > 200 * 1024 * 1024: # 200 MB
st.error(f"File {doc.name} is too large. Maximum file size is 200MB.")
pdf_docs.remove(doc)
if st.button("Process Documents", type="primary"):
if not pdf_docs:
st.warning("Please upload at least one PDF file.")
else:
with st.spinner("Processing your documents..."):
try:
# get pdf text
content, metadata = prepare_docs(pdf_docs)
# get the text chunks
split_docs = get_text_chunks(content, metadata)
# create vector store
vectorstore = ingest_into_vectordb(split_docs)
# create conversation chain
st.session_state.conversation = get_conversation_chain(vectorstore)
st.success("Documents processed successfully! You can now ask questions.")
except Exception as e:
st.error(f"An error occurred while processing documents: {str(e)}")
# Question input section
user_question = st.text_input(
"π Ask a question about your documents",
placeholder="What insights can you provide from these documents?"
)
if user_question:
if st.session_state.conversation is None:
st.warning("Please upload and process documents first.")
else:
handle_userinput(user_question) |