from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import Chroma from langchain import HuggingFacePipeline from langchain.chains import RetrievalQA from transformers import AutoTokenizer from langchain.retrievers import WikipediaRetriever import pickle import os retriever = WikipediaRetriever(lang="en") data = retriever.get_relevant_documents(query="Economics") bloomz_tokenizer = AutoTokenizer.from_pretrained('bigscience/bloomz-1b7') text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(bloomz_tokenizer, chunk_size=100, chunk_overlap=0, separator='\n') documents = text_splitter.split_documents(data) embeddings = HuggingFaceEmbeddings() persist_directory = "vector_db" vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory) vectordb.persist() vectordb = None vectordb_persist = Chroma(persist_directory=persist_directory, embedding_function=embeddings) llm = HuggingFacePipeline.from_model_id( model_id="bigscience/bloomz-1b7", task="text-generation", model_kwargs={"temperature" : 0, "max_length" : 500}) doc_retriever = vectordb_persist.as_retriever() wikipedia_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever) def make_inference(query): inference = wikipedia_qa.run(query) return inference if __name__ == "__main__": # make a gradio interface import gradio as gr gr.Interface( make_inference, gr.inputs.Textbox(lines=2, label="Query"), gr.outputs.Textbox(label="Response"), title="Ask_Wikipedia about Economics", description="️Building a QA application to Wikipedia", ).launch()