import streamlit as st from openai.error import OpenAIError from .utils import * from typing import Text, Union multiple_files = True def clear_submit(): """ Toggles the file_submitted internal session state variable to False. """ st.session_state["file_submitted"] = False def set_openai_api_key(api_key:Text)->bool: """Sets the internal OpenAI API key to the given value. Args: api_key (Text): OpenAI API key """ if not (api_key.startswith('sk-') and len(api_key)==51): st.error("Invalid OpenAI API key! Please provide a valid key.") return False st.session_state["OPENAI_API_KEY"] = api_key st.session_state["api_key_configured"] = True return True def file_to_doc(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None: """Converts a file to a document using specialized parsers.""" if file.name.endswith(".pdf"): doc = parse_pdf(file) elif file.name.endswith(".docx"): doc = parse_docx(file) elif file.name.split["."][1] in [".txt", ".py", ".json", ".html", ".css", ".md" ]: doc = parse_txt(file) else: st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt, .py, .json, .html, .css, .md]") doc = None return doc # this function can be used to define a single doc processing pipeline # def document_embedding_pipeline(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None: def qa_main(): st.markdown("

This app allows to chat with files!

", unsafe_allow_html=True) st.write("Just upload something using and start chatting with a version of GPT4 that has read the file!") index = None doc = None upload_document_greenlight = False uploaded_processed_document_greenlight = False # OpenAI API Key - TODO: consider adding a key valid for everyone # st.header("Configure OpenAI API Key") # st.warning('Please enter your OpenAI API Key!', icon='⚠️') # uncomment the following lines to add a user-specific key # user_secret = st.text_input( # "Insert your OpenAI API key here ([get your API key](https://platform.openai.com/account/api-keys)).", # type="password", # placeholder="Paste your OpenAI API key here (sk-...)", # help="You can get your API key from https://platform.openai.com/account/api-keys.", # value=st.session_state.get("OPENAI_API_KEY", ""), # ) user_secret = st.secrets["OPENAI_API_KEY"] if user_secret: if set_openai_api_key(user_secret): st.success('OpenAI API key successfully accessed!', icon='✅') upload_document_greenlight = True if upload_document_greenlight: # File that needs to be queried st.header("Upload a file") uploaded_file = st.file_uploader( "Upload a pdf, docx, or txt file (scanned documents not supported)", type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"], help="Scanned documents are not supported yet 🥲", on_change=clear_submit, accept_multiple_files=multiple_files ) # reading the uploaded files text = [] if len(uploaded_file) != 0: # toggle internal file submission state to True st.session_state["file_submitted"] = True for file in uploaded_file: # parse the file using custom parsers file_doc = file_to_doc(file) # converts the files into a list of documents file_text = text_to_docs(text=tuple(file_doc), file_name=file.name) text.extend(file_text) # embeds the documents using OpenAI API try: with st.spinner("Indexing the document... This might take a while!"): index = embed_docs(tuple(text)) st.session_state["api_key_configured"] = True except OpenAIError as e: st.error("OpenAI error encountered: ", e._message) uploaded_processed_document_greenlight = True if uploaded_processed_document_greenlight: if "messages" not in st.session_state: st.session_state["messages"] = [] for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) if prompt := st.chat_input("Ask the document something..."): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): message_placeholder = st.empty() # retrieving the most relevant sources sources = search_docs(index, prompt) # producing the answer, live full_response = "" for answer_bit in get_answer(sources, prompt)["output_text"]: full_response += answer_bit message_placeholder.markdown(full_response + "▌") message_placeholder.markdown(full_response) # answer = get_answer(sources, prompt) # message_placeholder.markdown(answer["output_text"]) # st.session_state.messages.append({"role": "assistant", "content": answer["output_text"]}) st.session_state.messages.append({"role": "assistant", "content": full_response}) # This might be useful to add memory to the chatbot harnessing a more low-level approach # llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo") # memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer') # retriever = your_vector_store.as_retriever() # # Create the multipurpose chain # qachat = ConversationalRetrievalChain.from_llm( # llm=ChatOpenAI(temperature=0), # memory=memory, # retriever=retriever, # return_source_documents=True # ) # qachat("Ask your question here...")