from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import UnstructuredFileLoader, DirectoryLoader from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.chat_models import ChatOpenAI from langchain.chains import RetrievalQA import os destination_folder = './data/' txt_dir_loader = DirectoryLoader(destination_folder, loader_cls=UnstructuredFileLoader) data = txt_dir_loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20) documents = text_splitter.split_documents(data) embeddings = OpenAIEmbeddings() persist_directory = "vector_db" vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory) vectordb.persist() vectordb = None vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings) llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo") doc_retriever = vectordb.as_retriever() hp_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever) def answer_question(query): return(hp_qa.run(query)) if __name__ == "main": import gradio as gr gr.Interface( answer_question, [ gr.inputs.Textbox(lines=2, label="Query"), ], gr.outputs.Textbox(label="Response"), title="Ask Harry Potter", description=""" Ask Harry Potter is a tool that let's you ask a question with the books' text as reference""", ).launch()