import os import streamlit as st from langchain.chat_models import ChatOpenAI from langchain.document_loaders import DirectoryLoader, BSHTMLLoader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.chains import RetrievalQA st.title('👩🏻‍💻 Query Shakespeare\'s Plays 🎭') placeholder = st.empty() openai_api_key = placeholder.text_input( 'OpenAI API Key', type='password', help='This app requires a functioning OpenAI API key in order to work.', placeholder='Your OpenAI API Key' ) if openai_api_key == '': st.stop() else: placeholder.empty() # Load model llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo', openai_api_key=openai_api_key) embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) # Load data loader = DirectoryLoader('./shakespeare/html/', loader_cls=lambda path: BSHTMLLoader(path, bs_kwargs={'features': 'html.parser'})) data = loader.load() # Chunk text text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separator='\n') documents = text_splitter.split_documents(data) # Vectorize db = Chroma.from_documents(documents, embeddings) # QA Chain shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=db.as_retriever()) # Query response = shakespeare_qa.run(st.text_input( 'Query', value='What is the name of Hamlet\'s mother?', help='Ask a question pertaining to Shakespeare\'s plays.' )) '---' st.write(f'📖 {response}')