Document-QandA / app.py
raseel-zymr's picture
Finalised TXT and PDF files
37c7e44
import os
import streamlit as st
from pathlib import Path
from io import StringIO
#for textfiles
from langchain.document_loaders import TextLoader
#text splitter
from langchain.text_splitter import CharacterTextSplitter
#for using HugginFace models & embeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain import HuggingFaceHub
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
from langchain.vectorstores import FAISS
#facebook vectorization
from langchain.chains.question_answering import load_qa_chain
#load pdf
#vectorize db index with chromadb
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredPDFLoader
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["hf_api_key"]
def init():
global embeddings, llm, llm2, chain
# Embeddings
embeddings = HuggingFaceEmbeddings()
llm=HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512})
chain = load_qa_chain(llm, chain_type="stuff")
def pdf_file(txtFileObj):
st.subheader('Uploaded PDF File:')
st.write(txtFileObj.name)
with open(txtFileObj.name, "wb") as f:
f.write(txtFileObj.getbuffer())
loaders = [UnstructuredPDFLoader(txtFileObj.name)]
index = VectorstoreIndexCreator(
embedding=embeddings,
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loaders)
chain = RetrievalQA.from_chain_type(llm=llm,
chain_type="stuff",
retriever=index.vectorstore.as_retriever(),
input_key="question")
st.subheader('Enter query')
query = st.text_input('Ask anything about the Document you uploaded')
if (query):
answer = chain.run(question=query)
st.subheader('Answer')
st.write(answer)
def text_file(txtFileObj):
st.subheader('Uploaded Text File:')
st.write(txtFileObj.name)
#stringio = StringIO(txtFileObj.getvalue().decode("utf-8"))
with open(txtFileObj.name, "wb") as f:
f.write(txtFileObj.getbuffer())
loader = TextLoader(txtFileObj.name)
documents = loader.load()
# Text Splitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
docs = text_splitter.split_documents(documents)
db = FAISS.from_documents(docs, embeddings)
st.subheader('Enter query')
query = st.text_input('Ask anything about the Document you uploaded')
if (query):
docs = db.similarity_search(query)
answer = chain.run(input_documents=docs, question=query)
st.subheader('Answer')
st.write(answer)
st.title('Document Q&A - Ask anything in your Document')
st.subheader('This application can be used to upload text(.txt) and PDF(.pdf) files and ask questions about their contents.')
init()
st.sidebar.subheader('Upload document')
uploaded_file = st.sidebar.file_uploader("Upload File",type=['txt','pdf'])
if uploaded_file and Path(uploaded_file.name).suffix == '.txt':
st.sidebar.info(Path(uploaded_file.name))
text_file(uploaded_file)
if uploaded_file and Path(uploaded_file.name).suffix == '.pdf':
pdf_file(uploaded_file)
with st.sidebar.expander('File'):
if (uploaded_file):
st.info(uploaded_file.name)
if os.path.exists('/content/'):
st.info(os.listdir('/content/'))