|
from langchain.document_loaders import BSHTMLLoader, DirectoryLoader |
|
|
|
bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader) |
|
|
|
data = bshtml_dir_loader.load() |
|
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size = 1000, |
|
chunk_overlap = 20, |
|
length_function = len, |
|
) |
|
|
|
documents = text_splitter.split_documents(data) |
|
|
|
import os |
|
|
|
os.environ["OPENAI_API_KEY"] = "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" |
|
|
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
|
|
embeddings = OpenAIEmbeddings() |
|
|
|
from langchain.vectorstores import Chroma |
|
|
|
persist_directory = "vector_db" |
|
|
|
vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory) |
|
|
|
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings) |
|
|
|
from langchain.chat_models import ChatOpenAI |
|
|
|
|
|
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo") |
|
|
|
doc_retriever = vectordb.as_retriever() |
|
|
|
from langchain.chains import RetrievalQA |
|
|
|
shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
import gradio as gr |
|
|
|
gr.Interface( |
|
shakespeare_qa, |
|
[ |
|
gr.inputs.Textbox(lines=2, label="Question"), |
|
], |
|
gr.outputs.Textbox(label="Response"), |
|
title="ShakesQA", |
|
description="ShakesQA", ).launch() |
|
|