import os | |
import streamlit as st | |
from langchain.chat_models import ChatOpenAI | |
from langchain.document_loaders import DirectoryLoader, BSHTMLLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain.chains import RetrievalQA | |
st.title('π©π»βπ» Query Shakespeare\'s Plays π') | |
placeholder = st.empty() | |
openai_api_key = placeholder.text_input( | |
'OpenAI API Key', | |
type='password', | |
help='This app requires a functioning OpenAI API key in order to work.', | |
placeholder='Your OpenAI API Key' | |
) | |
if openai_api_key == '': | |
st.stop() | |
else: | |
placeholder.empty() | |
# Load model | |
llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo', openai_api_key=openai_api_key) | |
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) | |
# Load data | |
loader = DirectoryLoader('./shakespeare/html/', loader_cls=lambda path: BSHTMLLoader(path, bs_kwargs={'features': 'html.parser'})) | |
data = loader.load() | |
# Chunk text | |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separator='\n') | |
documents = text_splitter.split_documents(data) | |
# Vectorize | |
db = Chroma.from_documents(documents, embeddings) | |
# QA Chain | |
shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=db.as_retriever()) | |
# Query | |
col1, col2 = st.columns(2) | |
with col1: | |
response = | |
'Query', | |
help='Ask a question pertaining to Shakespeare\'s plays.', | |
placeholder='What is the name of Hamlet\'s mother?' | |
)) | |
with col2: | |
st.write(f'π {response}') |