Spaces:

sango07
/

Chat_with_multiple_PDFs

Sleeping

App Files Files Community

Chat_with_multiple_PDFs / app.py

sango07

Update app.py

6cc068f verified 7 months ago

raw

history blame

3.49 kB

	import streamlit as st
	from dotenv import load_dotenv
	import os
	from htmlTemplate import css, bot_template, user_template
	import PyPDF2
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings
	from langchain_community.llms import LlamaCpp
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.memory import ConversationBufferMemory
	from langchain.chains import ConversationalRetrievalChain
	from langchain.prompts import PromptTemplate
	from sentence_transformers import SentenceTransformer, util
	from langchain_openai import AzureOpenAIEmbeddings
	from langchain_openai import OpenAIEmbeddings
	from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
	from langchain_openai import ChatOpenAI



	def main():
	load_dotenv()

	st.set_page_config(
	page_title="PDF Insights AI",
	page_icon=":books:",
	layout="wide"
	)
	st.write(css, unsafe_allow_html=True)


	# Welcome section
	st.title("📚 PDF Insights AI")
	st.markdown("""
	### Unlock the Knowledge in Your PDFs
	- 🤖 AI-powered document analysis
	- 💬 Ask questions about your uploaded documents
	- 📄 Support for multiple PDF files
	""")

	if "conversation" not in st.session_state:
	st.session_state.conversation = None
	if "chat_history" not in st.session_state:
	st.session_state.chat_history = []

	# File upload section
	with st.sidebar:
	st.header("📤 Upload Documents")
	pdf_docs = st.file_uploader(
	"Upload your PDFs here",
	type=['pdf'],
	accept_multiple_files=True,
	help="Upload PDF files to analyze. Max file size: 200MB"
	)

	# File validation
	if pdf_docs:
	for doc in pdf_docs:
	if doc.size > 200 * 1024 * 1024: # 200 MB
	st.error(f"File {doc.name} is too large. Maximum file size is 200MB.")
	pdf_docs.remove(doc)

	if st.button("Process Documents", type="primary"):
	if not pdf_docs:
	st.warning("Please upload at least one PDF file.")
	else:
	with st.spinner("Processing your documents..."):
	try:
	# get pdf text
	content, metadata = prepare_docs(pdf_docs)

	# get the text chunks
	split_docs = get_text_chunks(content, metadata)

	# create vector store
	vectorstore = ingest_into_vectordb(split_docs)

	# create conversation chain
	st.session_state.conversation = get_conversation_chain(vectorstore)

	st.success("Documents processed successfully! You can now ask questions.")
	except Exception as e:
	st.error(f"An error occurred while processing documents: {str(e)}")

	# Question input section
	user_question = st.text_input(
	"📝 Ask a question about your documents",
	placeholder="What insights can you provide from these documents?"
	)

	if user_question:
	if st.session_state.conversation is None:
	st.warning("Please upload and process documents first.")
	else:
	handle_userinput(user_question)