Spaces:

fracapuano
/

AISandbox

Runtime error

App Files Files Community

fracapuano commited on Aug 30, 2023

Commit

0489db2

•

1 Parent(s): 02556c2

fix: qa main function fixed

Browse files

Files changed (1) hide show

qa/qa.py +90 -100

qa/qa.py CHANGED Viewed

@@ -1,119 +1,109 @@
 import streamlit as st
 from streamlit_chat import message
 from openai.error import OpenAIError
-from .utils import (
-    parse_docx,
-    parse_pdf,
-    parse_txt,
-    search_docs,
-    embed_docs,
-    text_to_docs,
-    get_answer,
-)
 from uuid import uuid4
 def clear_submit():
-    st.session_state["submit"] = False
-def set_openai_api_key(api_key: str):
     st.session_state["OPENAI_API_KEY"] = api_key
-def qa_main():
-    st.markdown("<h1>This app allows to chat with files!</h1>", unsafe_allow_html=True)
-    st.markdown(\
-        """
-        Developed using LangChain and OpenAI Embeddings.</p>
-        Before hitting on "Submit", please make sure you have uploaded a file and entered a question.
-        You can upload files using the sidebar on the left.
-        """,
-        unsafe_allow_html=True
-        )
     index = None
     doc = None
-    with st.sidebar:
-        user_secret = st.text_input(
-            "OpenAI API Key",
-            type="password",
-            placeholder="Paste your OpenAI API key here (sk-...)",
-            help="You can get your API key from https://platform.openai.com/account/api-keys.",
-            value=st.session_state.get("OPENAI_API_KEY", ""),
-        )
-        if user_secret:
-            set_openai_api_key(user_secret)
-        uploaded_file = st.file_uploader(
-            "Upload a pdf, docx, or txt file",
-            type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"],
-            help="Scanned documents are not supported yet!",
-            on_change=clear_submit,
-            accept_multiple_files=False,
-        )
-        # reading the files
-        if uploaded_file is not None:
-            if uploaded_file.name.endswith(".pdf"):
-                doc = parse_pdf(uploaded_file)
-            elif uploaded_file.name.endswith(".docx"):
-                doc = parse_docx(uploaded_file)
-            elif uploaded_file.name.endswith(".txt"):
-                doc = parse_txt(uploaded_file)
-            else:
-                st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt]")
-                doc = None
-            text = text_to_docs(text=tuple(doc))
-            st.write(text[:1])
-            try:
-                with st.spinner("Indexing document(s)... This may take some time."):
-                    index = embed_docs(tuple(text))
-                    st.session_state["api_key_configured"] = True
-            except OpenAIError as e:
-                st.error(e._message)
-    tab1, tab2 = st.tabs(["Chat With File", "About the Application"])
-    with tab1:
-        if 'generated' not in st.session_state:
-            st.session_state['generated'] = []
-        if 'past' not in st.session_state:
-            st.session_state['past'] = []
-        def get_text():
-            if user_secret:
-                st.header("Ask me something about the document:")
-                input_text = st.text_area("You:", on_change=clear_submit)
-                return input_text
-        user_input = get_text()
-        button = st.button("Submit")
-        if button or st.session_state.get("submit"):
-            if not user_input:
-                st.error("Please enter a question!")
-            else:
-                st.session_state["submit"] = True
-                sources = search_docs(index, user_input)
-                try:
-                    answer = get_answer(sources, user_input)
-                    st.session_state.past.append(user_input)
-                    st.session_state.generated.append(answer["output_text"])
-                except OpenAIError as e:
-                    st.error(e._message)
-                if st.session_state['past']:
-                    for i in range(len(st.session_state['past'])-1, -1, -1):
-                        message(st.session_state['generated'][i], key=str(uuid4()))
-                        message(st.session_state['past'][i], is_user=True, key=str(uuid4()))
-    with tab2:
-        st.write('See sources')
-        # st.write('Chat with Files enables user to extract all the information from a file. User can obtain the transcription, the embedding of each segment and also ask questions to the file through a chat.')
-        # st.write('Features include- ')
-        # st.write('1. Reading any pdf, docx or plain txt (such as python programs) file')
-        # st.write('2. Embedding texts segments with Langchain and OpenAI')
-        # st.write('3. Chatting with the file using streamlit-chat and LangChain QA with source and the GPT4 model')

 import streamlit as st
 from streamlit_chat import message
 from openai.error import OpenAIError
+from .utils import *
 from uuid import uuid4
+from typing import Text, Union
+multiple_files = False
 def clear_submit():
+    """
+    Toggles the file_submitted internal session state variable to False.
+    """
+    st.session_state["file_submitted"] = False
+def set_openai_api_key(api_key:Text):
+    """Sets the internal OpenAI API key to the given value.
+    Args:
+        api_key (Text): OpenAI API key
+    """
     st.session_state["OPENAI_API_KEY"] = api_key
+    st.session_state["api_key_configured"] = True
+def file_to_doc(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:
+    """Converts a file to a document using specialized parsers."""
+    if file.name.endswith(".pdf"):
+        doc = parse_pdf(file)
+    elif file.name.endswith(".docx"):
+        doc = parse_docx(file)
+    elif file.name.split["."][1] in [".txt", ".py", ".json", ".html", ".css", ".md" ]:
+        doc = parse_txt(file)
+    else:
+        st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt, .py, .json, .html, .css, .md]")
+        doc = None
+    return doc
+# this function can be used to define a single doc processing pipeline
+# def document_embedding_pipeline(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:
+def qa_main():
+    st.markdown("<h2>This app allows to chat with files!</h2>", unsafe_allow_html=True)
+    st.write("Just upload something using and start chatting with a version of GPT4 that has read the file!")
     index = None
     doc = None
+    # OpenAI API Key - TODO: consider adding a key valid for everyone
+    st.header("Configure OpenAI API Key")
+    user_secret = st.text_input(
+        "Insert your OpenAI API key here ([get your API key](https://platform.openai.com/account/api-keys)).",
+        type="password",
+        placeholder="Paste your OpenAI API key here (sk-...)",
+        help="You can get your API key from https://platform.openai.com/account/api-keys.",
+        value=st.session_state.get("OPENAI_API_KEY", ""),
+    )
+    if user_secret:
+        set_openai_api_key(user_secret)
+    # File that needs to be queried
+    st.header("Upload a file")
+    uploaded_file = st.file_uploader(
+        "Upload a pdf, docx, or txt file (scanned documents not supported)",
+        type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"],
+        help="Scanned documents are not supported yet 🥲",
+        on_change=clear_submit,
+        accept_multiple_files=multiple_files,
+    )
+    # reading the uploaded file
+    if uploaded_file is not None:
+        # toggle internal file submission state to True
+        st.session_state["file_submitted"] = True
+        # parse the file using custom parsers
+        doc = file_to_doc(uploaded_file)
+        # converts the files into a list of documents
+        text = text_to_docs(text=tuple(doc))
+        try:
+            with st.spinner("Indexing the document... This might take a while!"):
+                index = embed_docs(tuple(text))
+                st.session_state["api_key_configured"] = True
+        except OpenAIError as e:
+            st.error("OpenAI error encountered: ", e._message)
+    if "messages" not in st.session_state:
+        st.session_state["messages"] = []
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    if prompt := st.chat_input("Ask the document something..."):
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        with st.chat_message("assistant"):
+            message_placeholder = st.empty()
+            # retrieving the most relevant sources
+            sources = search_docs(index, prompt)
+            # producing the answer, live
+            answer = get_answer(sources, prompt)
+            # retrieving the answer
+            message_placeholder.markdown(answer["output_text"])
+        st.session_state.messages.append({"role": "assistant", "content": answer["output_text"]})