gradio_app / streamlit_app.py
dupuyta's picture
Upload folder using huggingface_hub
8e2b48f
raw
history blame
5.41 kB
import streamlit as st
# Set a custom background
import torch
from langchain import HuggingFacePipeline
from langchain.chains import LLMChain, RetrievalQA
from langchain.document_loaders import (
DirectoryLoader,
PyPDFLoader,
TextLoader,
UnstructuredPDFLoader,
)
from langchain.embeddings import HuggingFaceEmbeddings, LlamaCppEmbeddings
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.text_splitter import (
CharacterTextSplitter,
RecursiveCharacterTextSplitter,
)
from langchain.vectorstores import Chroma
from PIL import Image
from streamlit_extras.add_vertical_space import add_vertical_space
st.set_page_config(page_title="Welcome to our AI Question Answering Bot")
with st.sidebar:
st.title('πŸ€—πŸ’¬ QA App')
st.markdown('''
## About
This app is an LLM-powered chatbot built using:
- [Streamlit](<https://streamlit.io/>)
- [HugChat](<https://github.com/Soulter/hugging-chat-api>)
- Chat Model = llama2-chat-hf 7B
- Retreiver model = all-MiniLM-L6-v2
πŸ’‘ Note: No API key required!
''')
add_vertical_space(5)
st.write('Made with ❀️ by us')
# logo = Image.open('logo.png')
# st.image(logo, use_column_width=True)
# Introduction
st.markdown("""
Welcome! This is not just any bot, it's a special one equipped with state-of-the-art natural language processing capabilities, and ready to answer your queries.
Ready to explore? Let's get started!
* Step 1: Upload a PDF document.
* Step 2: Type in a question related to your document's content.
* Step 3: Get your answer!
Push clear cache before uploading a new doc !
""")
def write_text_file(content, file_path):
try:
with open(file_path, 'wb') as file:
file.write(content)
return True
except Exception as e:
print(f"Error occurred while writing the file: {e}")
return False
# Wrap prompt template in a PromptTemplate object
def set_qa_prompt():
# set prompt template
prompt_template = """<s>[INST] <<SYS>> Use the following pieces of context closed between $ to answer the question closed between |. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
${context}$ <</SYS>>
Question: |{question}|
Answer:[/INST]</s>"""
prompt = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
return prompt
# Build RetrievalQA object
def build_retrieval_qa(_llm, _prompt, _vectorstore):
dbqa = RetrievalQA.from_chain_type(llm=_llm,
chain_type='stuff',
retriever=_vectorstore.as_retriever(search_kwargs={'k': 3}),
return_source_documents=True,
chain_type_kwargs={'prompt': _prompt})
return dbqa
# Instantiate QA object
# @st.cache(allow_output_mutation=True)
# @st.cache_resource()
@st.cache(allow_output_mutation=True)
def setup_dbqa(_texts):
print("setup_dbqa ...")
llm = HuggingFacePipeline.from_model_id(
model_id="NousResearch/Llama-2-13b-chat-hf",
task="text-generation",
model_kwargs={
"max_length": 1500, "load_in_8bit": True},
)
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
model_kwargs={'device': 'cpu'})
vectorstore = Chroma.from_documents(texts, embeddings, persist_directory="vectorstore")
prompt = set_qa_prompt()
return build_retrieval_qa(llm, prompt, vectorstore)
def load_docs(uploaded_file):
print("loading docs ...")
content = uploaded_file.read()
file_path_aux = "./temp/file.pdf"
write_text_file(content, file_path_aux)
file_path = "./temp/"
loader = DirectoryLoader(file_path,
glob="*.pdf",
loader_cls=UnstructuredPDFLoader)
documents = loader.load()
# Split text from PDF into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
chunk_overlap=0,
length_function=len,)
texts = text_splitter.split_documents(documents)
return texts
# Set the background image
# Load a PDF file
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
st.write('Loading file')
texts = load_docs(uploaded_file)
model = setup_dbqa(texts)
# Build and persist FAISS vector store
question = st.text_input('Ask a question:')
if question:
# Placeholder for chatbot logic to generate an answer based on the question and the PDF content
answer = model({'query': question})
# The below is just a hardcoded response
print(question)
print(answer)
# st.write('Question: ', answer["query"])
st.write('Question: ', answer["query"])
st.write('Answer: ', answer["result"])
st.write('Source documents: ', answer["source_documents"])
# if st.button("Clear cache before loading new document"):
# # Clears all st.cache_resource caches:
# st.cache_resource.clear()