import streamlit as st
from PIL import Image
import time
from dotenv import load_dotenv
import pickle
from huggingface_hub import Repository
from PyPDF2 import PdfReader
from streamlit_extras.add_vertical_space import add_vertical_space
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback
import os
import traceback
import pandas as pd
import pydeck as pdk
from urllib.error import URLError
# Initialize session state variables
if 'chat_history_page1' not in st.session_state:
st.session_state['chat_history_page1'] = []
# Step 1: Clone the Dataset Repository
repo = Repository(
local_dir="Private_Book", # Local directory to clone the repository
repo_type="dataset", # Specify that this is a dataset repository
clone_from="Anne31415/Private_Book", # Replace with your repository URL
token=os.environ["HUB_TOKEN"] # Use the secret token to authenticate
)
repo.git_pull() # Pull the latest changes (if any)
# Step 2: Load the PDF File
pdf_path = "Private_Book/grunddaten-krankenhaeuser-2016.pdf" # Replace with your PDF file path
api_key = os.getenv("OPENAI_API_KEY")
# Retrieve the API key from st.secrets
# Updated caching mechanism using st.cache_data
#@st.cache_data(persist="disk")
def load_vector_store(file_path, store_name, force_reload=False):
try:
if force_reload or not os.path.exists(f"{store_name}.pkl"):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
text = load_pdf_text(file_path)
chunks = text_splitter.split_text(text=text)
embeddings = OpenAIEmbeddings()
VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
# Inspect the VectorStore object
print("Inspecting VectorStore object...")
print("Type of VectorStore:", type(VectorStore))
print("Attributes of VectorStore:", dir(VectorStore))
# Additional specific inspections if necessary
# for example, if VectorStore has an attribute 'some_attribute':
# print("VectorStore.some_attribute:", VectorStore.some_attribute)
with open(f"{store_name}.pkl", "wb") as f:
pickle.dump(VectorStore, f)
else:
with open(f"{store_name}.pkl", "rb") as f:
VectorStore = pickle.load(f)
return VectorStore
except Exception as e:
st.error(f"An error occurred: {e}")
traceback.print_exc()
return None
# Utility function to load text from a PDF
def load_pdf_text(file_path):
pdf_reader = PdfReader(file_path)
text = ""
for page in pdf_reader.pages:
text += page.extract_text() or "" # Add fallback for pages where text extraction fails
return text
def load_chatbot():
return load_qa_chain(llm=OpenAI(), chain_type="stuff")
def display_chat_history(chat_history):
for chat in chat_history:
background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
st.markdown(f"
{chat[0]}: {chat[1]}
", unsafe_allow_html=True)
def page1():
try:
hide_streamlit_style = """
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
col1, col2 = st.columns([3, 1])
with col1:
st.title("Welcome to BinDocs ChatBot!")
with col2:
image = Image.open('BinDoc Logo (Quadratisch).png')
st.image(image, use_column_width='always')
if not os.path.exists(pdf_path):
st.error("File not found. Please check the file path.")
return
VectorStore = load_vector_store(pdf_path, "vector_store_page1", force_reload=False)
display_chat_history(st.session_state['chat_history_page1'])
st.write("", unsafe_allow_html=True)
st.write("", unsafe_allow_html=True)
st.write("", unsafe_allow_html=True)
query = st.text_input("Ask questions about your PDF file (in any preferred language):")
add_vertical_space(2)
col1, col2 = st.columns(2)
with col1:
if st.button("Was kann ich mit dem Prognose-Analyse-Tool machen?"):
query = "Was kann ich mit dem Prognose-Analyse-Tool machen?"
if st.button("Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"):
query = "Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"
if st.button("Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"):
query = "Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"
with col2:
if st.button("Dies ist eine reine Test Frage, welche aber eine ausreichende Länge hat."):
query = "Dies ist eine reine Test Frage, welche aber eine ausreichende Länge hat."
if st.button("Was sagt mir denn generell die wundervolle Bevölkerungsentwicklung?"):
query = "Was sagt mir denn generell die wundervolle Bevölkerungsentwicklung?"
if st.button("Ob ich hier wohl viel schreibe, dass die Fragen vom Layout her passen?"):
query = "Ob ich hier wohl viel schreibe, dass die Fragen vom Layout her passen?"
if query:
st.session_state['chat_history_page1'].append(("User", query, "new"))
start_time = time.time()
with st.spinner('Bot is thinking...'):
chain = load_chatbot()
docs = VectorStore.similarity_search(query=query, k=3)
with get_openai_callback() as cb:
response = chain.run(input_documents=docs, question=query)
end_time = time.time()
duration = end_time - start_time
st.text(f"Response time: {duration:.2f} seconds")
st.session_state['chat_history_page1'].append(("Bot", response, "new"))
new_messages = st.session_state['chat_history_page1'][-2:]
for chat in new_messages:
background_color = "#ffeecf"
st.markdown(f"{chat[0]}: {chat[1]}
", unsafe_allow_html=True)
query = ""
st.session_state['chat_history_page1'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history_page1']]
except Exception as e:
st.error(f"Upsi, an unexpected error occurred: {e}")
def page2():
st.title('BinDoc GmbH')
def main():
# Sidebar content
with st.sidebar:
st.title('BinDoc GmbH')
st.markdown("Experience revolutionary interaction with BinDocs Chat App, leveraging state-of-the-art AI technology.")
add_vertical_space(1)
page = st.sidebar.selectbox("Choose a page", ["Document Analysis Bot", "Coding Assistance Bot"])
add_vertical_space(1)
st.write('Made with ❤️ by BinDoc GmbH')
# Main area content based on page selection
if page == "Document Analysis Bot":
page1()
elif page == "Coding Assistance Bot":
page2()
if __name__ == "__main__":
main()