import os import streamlit as st from pathlib import Path from io import StringIO #for textfiles from langchain.document_loaders import TextLoader #text splitter from langchain.text_splitter import CharacterTextSplitter #for using HugginFace models & embeddings from langchain.embeddings import HuggingFaceEmbeddings from langchain import HuggingFaceHub # Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html from langchain.vectorstores import FAISS #facebook vectorization from langchain.chains.question_answering import load_qa_chain #load pdf #vectorize db index with chromadb from langchain.indexes import VectorstoreIndexCreator from langchain.chains import RetrievalQA from langchain.document_loaders import UnstructuredPDFLoader os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["hf_api_key"] def init(): global embeddings, llm, llm2, chain # Embeddings embeddings = HuggingFaceEmbeddings() llm=HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512}) chain = load_qa_chain(llm, chain_type="stuff") def pdf_file(txtFileObj): st.subheader('Uploaded PDF File:') st.write(txtFileObj.name) with open(txtFileObj.name, "wb") as f: f.write(txtFileObj.getbuffer()) loaders = [UnstructuredPDFLoader(txtFileObj.name)] index = VectorstoreIndexCreator( embedding=embeddings, text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loaders) chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=index.vectorstore.as_retriever(), input_key="question") st.subheader('Enter query') query = st.text_input('Ask anything about the Document you uploaded') if (query): answer = chain.run(question=query) st.subheader('Answer') st.write(answer) def text_file(txtFileObj): st.subheader('Uploaded Text File:') st.write(txtFileObj.name) #stringio = StringIO(txtFileObj.getvalue().decode("utf-8")) with open(txtFileObj.name, "wb") as f: f.write(txtFileObj.getbuffer()) loader = TextLoader(txtFileObj.name) documents = loader.load() # Text Splitter text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) docs = text_splitter.split_documents(documents) db = FAISS.from_documents(docs, embeddings) st.subheader('Enter query') query = st.text_input('Ask anything about the Document you uploaded') if (query): docs = db.similarity_search(query) answer = chain.run(input_documents=docs, question=query) st.subheader('Answer') st.write(answer) st.title('Document Q&A - Ask anything in your Document') st.subheader('This application can be used to upload text(.txt) and PDF(.pdf) files and ask questions about their contents.') init() st.sidebar.subheader('Upload document') uploaded_file = st.sidebar.file_uploader("Upload File",type=['txt','pdf']) if uploaded_file and Path(uploaded_file.name).suffix == '.txt': st.sidebar.info(Path(uploaded_file.name)) text_file(uploaded_file) if uploaded_file and Path(uploaded_file.name).suffix == '.pdf': pdf_file(uploaded_file) with st.sidebar.expander('File'): if (uploaded_file): st.info(uploaded_file.name) if os.path.exists('/content/'): st.info(os.listdir('/content/'))