pdfchat

Sleeping

App Files Files Community

pdfchat / app.py

ogegadavis254

Update app.py

399202c verified over 1 year ago

raw

history blame contribute delete

4.36 kB

	import streamlit as st
	import requests
	import os
	import json
	from dotenv import load_dotenv
	import PyPDF2
	import io
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS

	load_dotenv()

	# Initialize session state variables
	if "vectorstore" not in st.session_state:
	st.session_state.vectorstore = None
	if "chat_history" not in st.session_state:
	st.session_state.chat_history = []

	def reset_conversation():
	st.session_state.vectorstore = None
	st.session_state.chat_history = []

	def get_pdf_text(pdf_docs):
	text = ""
	for pdf in pdf_docs:
	pdf_reader = PyPDF2.PdfReader(pdf)
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	def get_text_chunks(text):
	text_splitter = CharacterTextSplitter(
	separator="\n",
	chunk_size=1000,
	chunk_overlap=200,
	length_function=len
	)
	chunks = text_splitter.split_text(text)
	return chunks

	def get_vectorstore(text_chunks):
	embeddings = HuggingFaceEmbeddings()
	vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
	return vectorstore

	def get_together_response(prompt, history):
	url = "https://api.together.xyz/v1/chat/completions"
	model_link = "NousResearch/Nous-Hermes-2-Yi-34B"

	messages = [{"role": "system", "content": "You are an AI assistant that helps users understand the content of their PDFs. Provide concise and relevant answers based on the information in the documents."}]

	for human, ai in history:
	messages.append({"role": "user", "content": human})
	messages.append({"role": "assistant", "content": ai})

	messages.append({"role": "user", "content": prompt})

	payload = {
	"model": model_link,
	"messages": messages,
	"temperature": 0.7,
	"top_p": 0.95,
	"top_k": 50,
	"repetition_penalty": 1,
	"max_tokens": 1024
	}

	headers = {
	"accept": "application/json",
	"content-type": "application/json",
	"Authorization": f"Bearer {os.getenv('TOGETHER_API_KEY')}"
	}

	try:
	response = requests.post(url, json=payload, headers=headers)
	response.raise_for_status()
	return response.json()['choices'][0]['message']['content']
	except requests.exceptions.RequestException as e:
	return f"Error: {str(e)}"

	def handle_userinput(user_question):
	if st.session_state.vectorstore:
	docs = st.session_state.vectorstore.similarity_search(user_question)
	context = "\n".join([doc.page_content for doc in docs])
	prompt = f"Context from PDFs:\n{context}\n\nQuestion: {user_question}\nAnswer:"

	response = get_together_response(prompt, st.session_state.chat_history)
	st.session_state.chat_history.append((user_question, response))

	return response
	else:
	return "Please upload and process PDF documents first."

	# Streamlit application
	st.set_page_config(page_title="Chat with your PDFs", page_icon=":books:")

	st.header("Chat with your PDFs :books:")

	# Sidebar
	with st.sidebar:
	st.subheader("Your documents")
	pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
	if st.button("Process"):
	with st.spinner("Processing"):
	# Get PDF text
	raw_text = get_pdf_text(pdf_docs)

	# Get the text chunks
	text_chunks = get_text_chunks(raw_text)

	# Create vector store
	st.session_state.vectorstore = get_vectorstore(text_chunks)

	st.success("PDFs processed successfully!")

	st.button('Reset Chat', on_click=reset_conversation)

	# Main chat interface
	if st.session_state.vectorstore is None:
	st.write("Please upload PDF documents and click 'Process' to start chatting.")
	else:
	user_question = st.text_input("Ask a question about your documents:")
	if user_question:
	response = handle_userinput(user_question)

	st.write("Human: " + user_question)
	st.write("AI: " + response)

	# Display chat history
	st.subheader("Chat History")
	for human, ai in st.session_state.chat_history:
	st.write("Human: " + human)
	st.write("AI: " + ai)
	st.write("---")