RAG_SOC_BOT / app.py
testcolab2's picture
Update app.py
4cfa3e0 verified
raw
history blame
6.43 kB
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain.chains import (
StuffDocumentsChain, LLMChain, ConversationalRetrievalChain
)
from langchain_core.prompts import PromptTemplate
import streamlit as st
from PyPDF2 import PdfReader
css = '''
<style>
.chat-message {
padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
}
.chat-message.user {
background-color: #2b313e
}
.chat-message.bot {
background-color: #475063
}
.chat-message .avatar {
width: 20%;
}
.chat-message .avatar img {
max-width: 78px;
max-height: 78px;
border-radius: 50%;
object-fit: cover;
}
.chat-message .message {
width: 80%;
padding: 0 1.5rem;
color: #fff;
}
'''
bot_template = '''
<div class="chat-message bot">
<div class="avatar">
<img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png">
</div>
<div class="message">{{MSG}}</div>
</div>
'''
user_template = '''
<div class="chat-message user">
<div class="avatar">
<img src="https://i.ibb.co/rdZC7LZ/Photo-logo-1.png">
</div>
<div class="message">{{MSG}}</div>
</div>
'''
def get_pdf_text(pdf_files):
text = ""
for pdf_file in pdf_files:
reader = PdfReader(pdf_file)
for page in reader.pages:
text += page.extract_text()
return text
def get_chunk_text(text):
text_splitter = CharacterTextSplitter(
separator = "\n",
chunk_size = 1000,
chunk_overlap = 200,
length_function = len
)
chunks = text_splitter.split_text(text)
return chunks
def get_vector_store(text_chunks):
# For Huggingface Embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={
'device' : 'cpu'
})
vectorstore = FAISS.from_texts(texts = text_chunks, embedding = embeddings)
return vectorstore
def get_conversation_chain(vector_store):
# llm = HuggingFaceHub(repo_id="tiiuae/falcon-40b-instruct", model_kwargs={"temperature":0.5, "max_length":512})
llm = CTransformers(model='llama-2-7b-chat.ggmlv3.q2_K.bin', # model available here: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/tree/main
model_type='llama',
config={'max_new_tokens': 600,
'context_length':700,
'temperature': 0.01})
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(
llm = llm,
retriever = vector_store.as_retriever(),
memory = memory
)
return conversation_chain
def handle_user_input(question):
response = st.session_state.conversation({'question':question})
st.session_state.chat_history = response['chat_history']
for i, message in enumerate(st.session_state.chat_history):
if i % 2 == 0:
st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
else:
st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
# def main():
# st.set_page_config(page_title='Chat with Your own PDFs', page_icon=':books:')
# st.write(css, unsafe_allow_html=True)
# if "conversation" not in st.session_state:
# st.session_state.conversation = None
# if "chat_history" not in st.session_state:
# st.session_state.chat_history = None
# st.header('Chat with Your own PDFs :books:')
# question = st.text_input("Ask anything to your PDF: ")
# if question:
# handle_user_input(question)
# with st.sidebar:
# st.subheader("Upload your Documents Here: ")
# pdf_files = st.file_uploader("Choose your PDF Files and Press OK", type=['pdf'], accept_multiple_files=True)
# if st.button("OK"):
# with st.spinner("Processing your PDFs..."):
# # Get PDF Text
# raw_text = get_pdf_text(pdf_files)
# # Get Text Chunks
# text_chunks = get_chunk_text(raw_text)
# # Create Vector Store
# vector_store = get_vector_store(text_chunks)
# st.write("DONE")
# # Create conversation chain
# st.session_state.conversation = get_conversation_chain(vector_store)
def main():
st.set_page_config(page_title='Chat with Your own PDFs', page_icon=':books:')
st.write(css, unsafe_allow_html=True)
if "conversation" not in st.session_state or st.session_state.conversation is None:
# Initialize conversation only if it's not already present in session state
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = None
st.header('Chat with Your own PDFs :books:')
question = st.text_input("Ask anything to your PDF: ")
if question:
# Check if conversation is still None (not properly initialized)
if st.session_state.conversation is None:
st.session_state.conversation = initialize_conversation_chain()
handle_user_input(question)
with st.sidebar:
st.subheader("Upload your Documents Here: ")
pdf_files = st.file_uploader("Choose your PDF Files and Press OK", type=['pdf'], accept_multiple_files=True)
if st.button("OK"):
with st.spinner("Processing your PDFs..."):
# Get PDF Text
raw_text = get_pdf_text(pdf_files)
# Get Text Chunks
text_chunks = get_chunk_text(raw_text)
# Create Vector Store
vector_store = get_vector_store(text_chunks)
st.write("DONE")
# Create conversation chain
st.session_state.conversation = get_conversation_chain(vector_store)
def initialize_conversation_chain():
return None # Replace with the actual initialization
main()
main()