Spaces:
Runtime error
Runtime error
import os | |
import streamlit as st | |
from langchain.chat_models import AzureChatOpenAI | |
from knowledge_gpt.components.sidebar import sidebar | |
from knowledge_gpt.core.caching import bootstrap_caching | |
from knowledge_gpt.core.chunking import chunk_file | |
from knowledge_gpt.core.embedding import embed_files | |
from knowledge_gpt.core.parsing import read_file | |
from knowledge_gpt.core.qa import query_folder | |
from knowledge_gpt.ui import display_file_read_error | |
from knowledge_gpt.ui import is_file_valid | |
from knowledge_gpt.ui import is_query_valid | |
from knowledge_gpt.ui import wrap_doc_in_html | |
st.set_page_config(page_title="ReferenceBot", page_icon="📖", layout="wide") | |
# add all secrets into environmental variables | |
if os.path.exists( | |
os.path.dirname(os.path.abspath(__file__)) + "/../.streamlit/secrets.toml" | |
): # to avoid redundant print by calling st.secrets | |
for key, value in st.secrets.items(): | |
os.environ[key] = value | |
def main(): | |
EMBEDDING = "openai" | |
VECTOR_STORE = "faiss" | |
MODEL_LIST = ["gpt-3.5-turbo", "gpt-4"] | |
# Uncomment to enable debug mode | |
# MODEL_LIST.insert(0, "debug") | |
st.header("📖ReferenceBot") | |
# Enable caching for expensive functions | |
bootstrap_caching() | |
sidebar() | |
uploaded_file = st.file_uploader( | |
"Upload a pdf, docx, or txt file", | |
type=["pdf", "docx", "txt"], | |
help="Scanned documents are not supported yet!", | |
) | |
model: str = st.selectbox("Model", options=MODEL_LIST) # type: ignore | |
with st.expander("Advanced Options"): | |
return_all_chunks = st.checkbox("Show all chunks retrieved from vector search") | |
show_full_doc = st.checkbox("Show parsed contents of the document") | |
if not uploaded_file: | |
st.stop() | |
try: | |
file = read_file(uploaded_file) | |
except Exception as e: | |
display_file_read_error(e, file_name=uploaded_file.name) | |
chunked_file = chunk_file(file, chunk_size=300, chunk_overlap=0) | |
if not is_file_valid(file): | |
st.stop() | |
with st.spinner("Indexing document... This may take a while⏳"): | |
folder_index = embed_files( | |
files=[chunked_file], | |
embedding=EMBEDDING if model != "debug" else "debug", | |
vector_store=VECTOR_STORE if model != "debug" else "debug", | |
deployment=os.environ["ENGINE_EMBEDDING"], | |
model=os.environ["ENGINE"], | |
openai_api_key=os.environ["OPENAI_API_KEY"], | |
openai_api_base=os.environ["OPENAI_API_BASE"], | |
openai_api_type="azure", | |
chunk_size=1, | |
) | |
with st.form(key="qa_form"): | |
query = st.text_area("Ask a question about the document") | |
submit = st.form_submit_button("Submit") | |
if show_full_doc: | |
with st.expander("Document"): | |
# Hack to get around st.markdown rendering LaTeX | |
st.markdown(f"<p>{wrap_doc_in_html(file.docs)}</p>", unsafe_allow_html=True) | |
if submit: | |
if not is_query_valid(query): | |
st.stop() | |
# Output Columns | |
answer_col, sources_col = st.columns(2) | |
with st.spinner("Setting up AzureChatOpenAI bot..."): | |
llm = AzureChatOpenAI( | |
openai_api_base=os.environ["OPENAI_API_BASE"], | |
openai_api_version=os.environ["OPENAI_API_VERSION"], | |
deployment_name=os.environ["ENGINE"], | |
openai_api_key=os.environ["OPENAI_API_KEY"], | |
openai_api_type="azure", | |
temperature=0, | |
) | |
with st.spinner("Querying folder to get result..."): | |
result = query_folder( | |
folder_index=folder_index, | |
query=query, | |
return_all=return_all_chunks, | |
llm=llm, | |
) | |
with answer_col: | |
st.markdown("#### Answer") | |
st.markdown(result.answer) | |
with sources_col: | |
st.markdown("#### Sources") | |
for source in result.sources: | |
st.markdown(source.page_content) | |
st.markdown(source.metadata["source"]) | |
st.markdown("---") | |
if __name__ == "__main__": | |
main() | |