Spaces:

fracapuano
/

AISandbox

Runtime error

File size: 6,129 Bytes

import streamlit as st
from openai.error import OpenAIError
from .utils import *
from typing import Text, Union

multiple_files = True

def clear_submit():
    """
    Toggles the file_submitted internal session state variable to False.
    """
    st.session_state["file_submitted"] = False

def set_openai_api_key(api_key:Text)->bool:
    """Sets the internal OpenAI API key to the given value.

    Args:
        api_key (Text): OpenAI API key
    """
    if not (api_key.startswith('sk-') and len(api_key)==51):
        st.error("Invalid OpenAI API key! Please provide a valid key.")
        return False
    
    st.session_state["OPENAI_API_KEY"] = api_key
    st.session_state["api_key_configured"] = True
    return True

def file_to_doc(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:
    """Converts a file to a document using specialized parsers."""
    if file.name.endswith(".pdf"):
        doc = parse_pdf(file)
    elif file.name.endswith(".docx"):
        doc = parse_docx(file)
    elif file.name.split["."][1] in [".txt", ".py", ".json", ".html", ".css", ".md" ]:
        doc = parse_txt(file)
    else:
        st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt, .py, .json, .html, .css, .md]")
        doc = None
    
    return doc

# this function can be used to define a single doc processing pipeline
# def document_embedding_pipeline(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:  

def qa_main():
    st.markdown("<h2>This app allows to chat with files!</h2>", unsafe_allow_html=True)
    st.write("Just upload something using and start chatting with a version of GPT4 that has read the file!")
    
    index = None
    doc = None

    upload_document_greenlight = False
    uploaded_processed_document_greenlight = False
    # OpenAI API Key - TODO: consider adding a key valid for everyone
    # st.header("Configure OpenAI API Key")
    # st.warning('Please enter your OpenAI API Key!', icon='⚠️')

    # uncomment the following lines to add a user-specific key
    # user_secret = st.text_input(
    #     "Insert your OpenAI API key here ([get your API key](https://platform.openai.com/account/api-keys)).",
    #     type="password",
    #     placeholder="Paste your OpenAI API key here (sk-...)",
    #     help="You can get your API key from https://platform.openai.com/account/api-keys.",
    #     value=st.session_state.get("OPENAI_API_KEY", ""),
    # )
    user_secret = st.secrets["OPENAI_API_KEY"]
    if user_secret:
        if set_openai_api_key(user_secret):
            st.success('OpenAI API key successfully accessed!', icon='✅')
            upload_document_greenlight = True
    
    if upload_document_greenlight:
        # File that needs to be queried
        st.header("Upload a file")
        uploaded_file = st.file_uploader(
            "Upload a pdf, docx, or txt file (scanned documents not supported)",
            type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"],
            help="Scanned documents are not supported yet 🥲",
            on_change=clear_submit, 
            accept_multiple_files=multiple_files
        )
            
        # reading the uploaded files
        text = []
        if len(uploaded_file) != 0:
            # toggle internal file submission state to True
            st.session_state["file_submitted"] = True
            for file in uploaded_file:
                # parse the file using custom parsers
                file_doc = file_to_doc(file)
                # converts the files into a list of documents
                file_text = text_to_docs(text=tuple(file_doc), file_name=file.name)
                text.extend(file_text)
        
            # embeds the documents using OpenAI API
            try:
                with st.spinner("Indexing the document... This might take a while!"):
                    index = embed_docs(tuple(text))
                    st.session_state["api_key_configured"] = True
            except OpenAIError as e:
                st.error("OpenAI error encountered: ", e._message)
        
            uploaded_processed_document_greenlight = True
        
    if uploaded_processed_document_greenlight: 
        if "messages" not in st.session_state:
            st.session_state["messages"] = []

        for message in st.session_state.messages:
            with st.chat_message(message["role"]):
                st.markdown(message["content"])

        if prompt := st.chat_input("Ask the document something..."):
            st.session_state.messages.append({"role": "user", "content": prompt})
            with st.chat_message("user"):
                st.markdown(prompt)

            with st.chat_message("assistant"):
                message_placeholder = st.empty()
                # retrieving the most relevant sources
                sources = search_docs(index, prompt)
                # producing the answer, live
                full_response = ""
                for answer_bit in get_answer(sources, prompt)["output_text"]:
                    full_response += answer_bit
                    message_placeholder.markdown(full_response + "▌")
                
                message_placeholder.markdown(full_response)

                # answer = get_answer(sources, prompt)
                # message_placeholder.markdown(answer["output_text"])
            
            # st.session_state.messages.append({"role": "assistant", "content": answer["output_text"]})
            st.session_state.messages.append({"role": "assistant", "content": full_response})

# This might be useful to add memory to the chatbot harnessing a more low-level approach
# llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

# memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
# retriever = your_vector_store.as_retriever()

# # Create the multipurpose chain
# qachat = ConversationalRetrievalChain.from_llm(
#     llm=ChatOpenAI(temperature=0),
#     memory=memory,
#     retriever=retriever, 
#     return_source_documents=True
# )

# qachat("Ask your question here...")