from langchain.document_loaders import BSHTMLLoader, DirectoryLoader bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader) data = bshtml_dir_loader.load() from langchain.text_splitter import RecursiveCharacterTextSplitter text_splitter = RecursiveCharacterTextSplitter( chunk_size = 1000, chunk_overlap = 20, length_function = len, ) documents = text_splitter.split_documents(data) import os os.environ["OPENAI_API_KEY"] = "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" from langchain.embeddings.openai import OpenAIEmbeddings embeddings = OpenAIEmbeddings() from langchain.vectorstores import Chroma persist_directory = "vector_db" vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory) vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings) from langchain.chat_models import ChatOpenAI #llm = ChatOpenAI(temperature=0, model="gpt-4") llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo") doc_retriever = vectordb.as_retriever() from langchain.chains import RetrievalQA shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever) if __name__ == "__main__": # make a gradio interface import gradio as gr gr.Interface( shakespeare_qa, [ gr.inputs.Textbox(lines=2, label="Question"), ], gr.outputs.Textbox(label="Response"), title="ShakesQA", description="ShakesQA", ).launch()