Spaces:
Runtime error
Runtime error
import streamlit as st | |
# Set a custom background | |
import torch | |
from langchain import HuggingFacePipeline | |
from langchain.chains import LLMChain, RetrievalQA | |
from langchain.document_loaders import ( | |
DirectoryLoader, | |
PyPDFLoader, | |
TextLoader, | |
UnstructuredPDFLoader, | |
) | |
from langchain.embeddings import HuggingFaceEmbeddings, LlamaCppEmbeddings | |
from langchain.llms import LlamaCpp | |
from langchain.prompts import PromptTemplate | |
from langchain.text_splitter import ( | |
CharacterTextSplitter, | |
RecursiveCharacterTextSplitter, | |
) | |
from langchain.vectorstores import Chroma | |
from PIL import Image | |
from streamlit_extras.add_vertical_space import add_vertical_space | |
st.set_page_config(page_title="Welcome to our AI Question Answering Bot") | |
with st.sidebar: | |
st.title('π€π¬ QA App') | |
st.markdown(''' | |
## About | |
This app is an LLM-powered chatbot built using: | |
- [Streamlit](<https://streamlit.io/>) | |
- [HugChat](<https://github.com/Soulter/hugging-chat-api>) | |
- Chat Model = llama2-chat-hf 7B | |
- Retreiver model = all-MiniLM-L6-v2 | |
π‘ Note: No API key required! | |
''') | |
add_vertical_space(5) | |
st.write('Made with β€οΈ by us') | |
# logo = Image.open('logo.png') | |
# st.image(logo, use_column_width=True) | |
# Introduction | |
st.markdown(""" | |
Welcome! This is not just any bot, it's a special one equipped with state-of-the-art natural language processing capabilities, and ready to answer your queries. | |
Ready to explore? Let's get started! | |
* Step 1: Upload a PDF document. | |
* Step 2: Type in a question related to your document's content. | |
* Step 3: Get your answer! | |
Push clear cache before uploading a new doc ! | |
""") | |
def write_text_file(content, file_path): | |
try: | |
with open(file_path, 'wb') as file: | |
file.write(content) | |
return True | |
except Exception as e: | |
print(f"Error occurred while writing the file: {e}") | |
return False | |
# Wrap prompt template in a PromptTemplate object | |
def set_qa_prompt(): | |
# set prompt template | |
prompt_template = """<s>[INST] <<SYS>> Use the following pieces of context closed between $ to answer the question closed between |. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. | |
${context}$ <</SYS>> | |
Question: |{question}| | |
Answer:[/INST]</s>""" | |
prompt = PromptTemplate( | |
template=prompt_template, input_variables=["context", "question"] | |
) | |
return prompt | |
# Build RetrievalQA object | |
def build_retrieval_qa(_llm, _prompt, _vectorstore): | |
dbqa = RetrievalQA.from_chain_type(llm=_llm, | |
chain_type='stuff', | |
retriever=_vectorstore.as_retriever(search_kwargs={'k': 3}), | |
return_source_documents=True, | |
chain_type_kwargs={'prompt': _prompt}) | |
return dbqa | |
# Instantiate QA object | |
# @st.cache(allow_output_mutation=True) | |
# @st.cache_resource() | |
def setup_dbqa(_texts): | |
print("setup_dbqa ...") | |
llm = HuggingFacePipeline.from_model_id( | |
model_id="NousResearch/Llama-2-13b-chat-hf", | |
task="text-generation", | |
model_kwargs={ | |
"max_length": 1500, "load_in_8bit": True}, | |
) | |
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', | |
model_kwargs={'device': 'cpu'}) | |
vectorstore = Chroma.from_documents(texts, embeddings, persist_directory="vectorstore") | |
prompt = set_qa_prompt() | |
return build_retrieval_qa(llm, prompt, vectorstore) | |
def load_docs(uploaded_file): | |
print("loading docs ...") | |
content = uploaded_file.read() | |
file_path_aux = "./temp/file.pdf" | |
write_text_file(content, file_path_aux) | |
file_path = "./temp/" | |
loader = DirectoryLoader(file_path, | |
glob="*.pdf", | |
loader_cls=UnstructuredPDFLoader) | |
documents = loader.load() | |
# Split text from PDF into chunks | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, | |
chunk_overlap=0, | |
length_function=len,) | |
texts = text_splitter.split_documents(documents) | |
return texts | |
# Set the background image | |
# Load a PDF file | |
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
if uploaded_file is not None: | |
st.write('Loading file') | |
texts = load_docs(uploaded_file) | |
model = setup_dbqa(texts) | |
# Build and persist FAISS vector store | |
question = st.text_input('Ask a question:') | |
if question: | |
# Placeholder for chatbot logic to generate an answer based on the question and the PDF content | |
answer = model({'query': question}) | |
# The below is just a hardcoded response | |
print(question) | |
print(answer) | |
# st.write('Question: ', answer["query"]) | |
st.write('Question: ', answer["query"]) | |
st.write('Answer: ', answer["result"]) | |
st.write('Source documents: ', answer["source_documents"]) | |
# if st.button("Clear cache before loading new document"): | |
# # Clears all st.cache_resource caches: | |
# st.cache_resource.clear() | |