from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import Chroma from langchain import HuggingFacePipeline from langchain.chains import RetrievalQA from transformers import AutoTokenizer import pickle import os import shutil from langchain.document_loaders import BSHTMLLoader, DirectoryLoader !git clone https://github.com/TheMITTech/shakespeare from glob import glob files = glob("./shakespeare/**/*.html") os.mkdir('./data') destination_folder = './data/' for html_file in files: shutil.move(html_file, destination_folder + html_file.split("/"[-1])) bshtml_dir_loader = DirectoryLoader('./data/', loader_cls = BSHTMLLoader) data = bshtml_dir_loader.load() with open("shakespeare.pkl", "wb") as fp: pickle.dump(data, fp) with open('shakespeare.pkl', 'rb') as fp: data = pickle.load(fp) bloomz_tokenizer = AutoTokenizer.from_pretrained('bigscience/bloomz-1b7') text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(bloomz_tokenizer, chunk_size=100, chunk_overlap=0, separator='\n') documents = text_splitter.split_documents(data) embeddings = HuggingFaceEmbeddings() persist_directory = "vector_db" vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory) vectordb.persist() vectordb = None vectordb_persist = Chroma(persist_directory=persist_directory, embedding_function=embeddings) llm = HuggingFacePipeline.from_model_id( model_id="bigscience/bloomz-1b7", task="text-generation", model_kwargs={"temperature" : 0, "max_length" : 500}) doc_retriever = vectordb_persist.as_retriever() shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever) def make_inference(query): inference = shakespeare_qa.run(query) return inference if __name__ == "__main__": # make a gradio interface import gradio as gr gr.Interface( make_inference, gr.inputs.Textbox(lines=2, label="Query"), gr.outputs.Textbox(label="Response"), title="Ask_Shakespeare", description="️building_w_llms_qa_Shakespeare allows you to inquire about the Shakespeare's plays.", ).launch()