import os import requests import io import time import transformers from getpass import getpass from langchain import HuggingFaceHub from langchain.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import TokenTextSplitter, CharacterTextSplitter from langchain import PromptTemplate, LLMChain from langchain.chains.question_answering import load_qa_chain from langchain.vectorstores import FAISS from langchain.document_loaders import TextLoader, UnstructuredEPubLoader from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler import gradio as gr from pypandoc.pandoc_download import download_pandoc # example questions question_examples = [ "What is the name of the captain of the Nautilus?", "What are the names of the crew members of the Nautilus?", "What is the name of the Canadian?", "What is Ned Land's profession?", "What musical instrument does Captain Nemo play?", "What is the name of Professor Aronnax's butler?", "In which areas was the Nautilus traveling?", "Why doesn't Captain Nemo hate the society?" ] os.environ.get("HUGGINGFACEHUB_API_TOKEN") # used repository for llm REPO_ID = "declare-lab/flan-alpaca-large" transformers.utils.move_cache() llm = HuggingFaceHub( repo_id=REPO_ID, model_kwargs={"temperature":0, "max_length":512} ) embeddings = HuggingFaceEmbeddings() callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) download_pandoc() def upload_file(File): if File.name.endswith(".txt"): read_textfile(File) elif File.name.endswith(".epub"): read_epub(File) def read_epub(File): loader = UnstructuredEPubLoader(File.name) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) docs = text_splitter.split_documents(documents) db = FAISS.from_documents(docs, embeddings) db.save_local(folder_path=".", index_name="faiss_index") def read_textfile(File): loader = TextLoader(File.name) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) docs = text_splitter.split_documents(documents) db = FAISS.from_documents(docs, embeddings) db.save_local(folder_path=".", index_name="faiss_index") def ask_question(question, chat_history): chain = load_qa_chain(llm, chain_type="stuff", verbose=False) db = FAISS.load_local(folder_path=".", embeddings=embeddings, index_name="faiss_index") relevant_docs = db.similarity_search(question) answer = chain.run( input_documents=relevant_docs, question=question, callbacks=callback_manager ) chat_history.append((question, answer)) time.sleep(1) return "", chat_history with gr.Blocks() as demo: gr.Markdown("## đź“šAskYourBooks") gr.Markdown(""" Ask questions to your books in txt or epub format. After a short indexing time, ask questions about the content of the book. """ ) textfile = gr.File(label="Drag your book here") ask_textbox = gr.Textbox(label="Question") sample_questions = gr.Examples(examples=question_examples, inputs=[ask_textbox]) ask_button = gr.Button("Ask your book") clear_button = gr.Button("Clear conversation") chatbot = gr.Chatbot(label="Conversation") textfile.change(fn=upload_file, inputs=[textfile], outputs=[], api_name="book_upload") ask_button.click( fn=ask_question, inputs=[ask_textbox, chatbot], outputs=[ask_textbox, chatbot], api_name="ask_book" ) clear_button.click(lambda: None, None, chatbot, queue=False, api_name="reset_conversation") demo.launch()