ramwar
latest version
513b7bc
import os
import requests
import io
import time
import transformers
from getpass import getpass
from langchain import HuggingFaceHub
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import TokenTextSplitter, CharacterTextSplitter
from langchain import PromptTemplate, LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader, UnstructuredEPubLoader
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import gradio as gr
from pypandoc.pandoc_download import download_pandoc
# example questions
question_examples = [
"What is the name of the captain of the Nautilus?",
"What are the names of the crew members of the Nautilus?",
"What is the name of the Canadian?",
"What is Ned Land's profession?",
"What musical instrument does Captain Nemo play?",
"What is the name of Professor Aronnax's butler?",
"In which areas was the Nautilus traveling?",
"Why doesn't Captain Nemo hate the society?"
]
os.environ.get("HUGGINGFACEHUB_API_TOKEN")
# used repository for llm
REPO_ID = "declare-lab/flan-alpaca-large"
transformers.utils.move_cache()
llm = HuggingFaceHub(
repo_id=REPO_ID,
model_kwargs={"temperature":0, "max_length":512}
)
embeddings = HuggingFaceEmbeddings()
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
download_pandoc()
def upload_file(File):
if File.name.endswith(".txt"):
read_textfile(File)
elif File.name.endswith(".epub"):
read_epub(File)
def read_epub(File):
loader = UnstructuredEPubLoader(File.name)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
docs = text_splitter.split_documents(documents)
db = FAISS.from_documents(docs, embeddings)
db.save_local(folder_path=".", index_name="faiss_index")
def read_textfile(File):
loader = TextLoader(File.name)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
docs = text_splitter.split_documents(documents)
db = FAISS.from_documents(docs, embeddings)
db.save_local(folder_path=".", index_name="faiss_index")
def ask_question(question, chat_history):
chain = load_qa_chain(llm, chain_type="stuff", verbose=False)
db = FAISS.load_local(folder_path=".", embeddings=embeddings, index_name="faiss_index")
relevant_docs = db.similarity_search(question)
answer = chain.run(
input_documents=relevant_docs,
question=question,
callbacks=callback_manager
)
chat_history.append((question, answer))
time.sleep(1)
return "", chat_history
with gr.Blocks() as demo:
gr.Markdown("## 📚AskYourBooks")
gr.Markdown("""
Ask questions to your books in txt or epub format. After a short indexing time,
ask questions about the content of the book.
"""
)
textfile = gr.File(label="Drag your book here")
ask_textbox = gr.Textbox(label="Question")
sample_questions = gr.Examples(examples=question_examples, inputs=[ask_textbox])
ask_button = gr.Button("Ask your book")
clear_button = gr.Button("Clear conversation")
chatbot = gr.Chatbot(label="Conversation")
textfile.change(fn=upload_file, inputs=[textfile], outputs=[], api_name="book_upload")
ask_button.click(
fn=ask_question,
inputs=[ask_textbox, chatbot],
outputs=[ask_textbox, chatbot],
api_name="ask_book"
)
clear_button.click(lambda: None, None, chatbot, queue=False, api_name="reset_conversation")
demo.launch()