Spaces:
Sleeping
Sleeping
from langchain_community.document_loaders import DirectoryLoader | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings | |
from langchain.chains import ConversationChain | |
from langchain.memory import ConversationBufferMemory | |
from langchain.chains import ( | |
StuffDocumentsChain, LLMChain, ConversationalRetrievalChain | |
) | |
from langchain_core.prompts import PromptTemplate | |
import streamlit as st | |
from PyPDF2 import PdfReader | |
css = ''' | |
<style> | |
.chat-message { | |
padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex | |
} | |
.chat-message.user { | |
background-color: #2b313e | |
} | |
.chat-message.bot { | |
background-color: #475063 | |
} | |
.chat-message .avatar { | |
width: 20%; | |
} | |
.chat-message .avatar img { | |
max-width: 78px; | |
max-height: 78px; | |
border-radius: 50%; | |
object-fit: cover; | |
} | |
.chat-message .message { | |
width: 80%; | |
padding: 0 1.5rem; | |
color: #fff; | |
} | |
''' | |
bot_template = ''' | |
<div class="chat-message bot"> | |
<div class="avatar"> | |
<img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png"> | |
</div> | |
<div class="message">{{MSG}}</div> | |
</div> | |
''' | |
user_template = ''' | |
<div class="chat-message user"> | |
<div class="avatar"> | |
<img src="https://i.ibb.co/rdZC7LZ/Photo-logo-1.png"> | |
</div> | |
<div class="message">{{MSG}}</div> | |
</div> | |
''' | |
def get_pdf_text(pdf_files): | |
text = "" | |
for pdf_file in pdf_files: | |
reader = PdfReader(pdf_file) | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
def get_chunk_text(text): | |
text_splitter = CharacterTextSplitter( | |
separator = "\n", | |
chunk_size = 1000, | |
chunk_overlap = 200, | |
length_function = len | |
) | |
chunks = text_splitter.split_text(text) | |
return chunks | |
def get_vector_store(text_chunks): | |
# For Huggingface Embeddings | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={ | |
'device' : 'cpu' | |
}) | |
vectorstore = FAISS.from_texts(texts = text_chunks, embedding = embeddings) | |
return vectorstore | |
def get_conversation_chain(vector_store): | |
# llm = HuggingFaceHub(repo_id="tiiuae/falcon-40b-instruct", model_kwargs={"temperature":0.5, "max_length":512}) | |
llm = CTransformers(model='llama-2-7b-chat.ggmlv3.q2_K.bin', # model available here: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/tree/main | |
model_type='llama', | |
config={'max_new_tokens': 600, | |
'context_length':700, | |
'temperature': 0.01}) | |
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True) | |
conversation_chain = ConversationalRetrievalChain.from_llm( | |
llm = llm, | |
retriever = vector_store.as_retriever(), | |
memory = memory | |
) | |
return conversation_chain | |
def handle_user_input(question): | |
response = st.session_state.conversation({'question':question}) | |
st.session_state.chat_history = response['chat_history'] | |
for i, message in enumerate(st.session_state.chat_history): | |
if i % 2 == 0: | |
st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True) | |
else: | |
st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True) | |
# def main(): | |
# st.set_page_config(page_title='Chat with Your own PDFs', page_icon=':books:') | |
# st.write(css, unsafe_allow_html=True) | |
# if "conversation" not in st.session_state: | |
# st.session_state.conversation = None | |
# if "chat_history" not in st.session_state: | |
# st.session_state.chat_history = None | |
# st.header('Chat with Your own PDFs :books:') | |
# question = st.text_input("Ask anything to your PDF: ") | |
# if question: | |
# handle_user_input(question) | |
# with st.sidebar: | |
# st.subheader("Upload your Documents Here: ") | |
# pdf_files = st.file_uploader("Choose your PDF Files and Press OK", type=['pdf'], accept_multiple_files=True) | |
# if st.button("OK"): | |
# with st.spinner("Processing your PDFs..."): | |
# # Get PDF Text | |
# raw_text = get_pdf_text(pdf_files) | |
# # Get Text Chunks | |
# text_chunks = get_chunk_text(raw_text) | |
# # Create Vector Store | |
# vector_store = get_vector_store(text_chunks) | |
# st.write("DONE") | |
# # Create conversation chain | |
# st.session_state.conversation = get_conversation_chain(vector_store) | |
def main(): | |
st.set_page_config(page_title='Chat with Your own PDFs', page_icon=':books:') | |
st.write(css, unsafe_allow_html=True) | |
if "conversation" not in st.session_state or st.session_state.conversation is None: | |
# Initialize conversation only if it's not already present in session state | |
st.session_state.conversation = None | |
if "chat_history" not in st.session_state: | |
st.session_state.chat_history = None | |
st.header('Chat with Your own PDFs :books:') | |
question = st.text_input("Ask anything to your PDF: ") | |
if question: | |
# Check if conversation is still None (not properly initialized) | |
if st.session_state.conversation is None: | |
st.session_state.conversation = initialize_conversation_chain() | |
handle_user_input(question) | |
with st.sidebar: | |
st.subheader("Upload your Documents Here: ") | |
pdf_files = st.file_uploader("Choose your PDF Files and Press OK", type=['pdf'], accept_multiple_files=True) | |
if st.button("OK"): | |
with st.spinner("Processing your PDFs..."): | |
# Get PDF Text | |
raw_text = get_pdf_text(pdf_files) | |
# Get Text Chunks | |
text_chunks = get_chunk_text(raw_text) | |
# Create Vector Store | |
vector_store = get_vector_store(text_chunks) | |
st.write("DONE") | |
# Create conversation chain | |
st.session_state.conversation = get_conversation_chain(vector_store) | |
def initialize_conversation_chain(): | |
return None # Replace with the actual initialization | |
main() | |
main() |