fracapuano commited on
Commit
0489db2
1 Parent(s): 02556c2

fix: qa main function fixed

Browse files
Files changed (1) hide show
  1. qa/qa.py +90 -100
qa/qa.py CHANGED
@@ -1,119 +1,109 @@
1
  import streamlit as st
2
  from streamlit_chat import message
3
  from openai.error import OpenAIError
4
- from .utils import (
5
- parse_docx,
6
- parse_pdf,
7
- parse_txt,
8
- search_docs,
9
- embed_docs,
10
- text_to_docs,
11
- get_answer,
12
- )
13
  from uuid import uuid4
 
 
 
14
 
15
  def clear_submit():
16
- st.session_state["submit"] = False
 
 
 
 
 
 
17
 
18
- def set_openai_api_key(api_key: str):
 
 
19
  st.session_state["OPENAI_API_KEY"] = api_key
 
20
 
21
- def qa_main():
22
- st.markdown("<h1>This app allows to chat with files!</h1>", unsafe_allow_html=True)
23
- st.markdown(\
24
- """
25
- Developed using LangChain and OpenAI Embeddings.</p>
26
- Before hitting on "Submit", please make sure you have uploaded a file and entered a question.
 
 
 
 
 
 
 
 
 
 
27
 
28
- You can upload files using the sidebar on the left.
29
- """,
30
- unsafe_allow_html=True
31
- )
32
  index = None
33
  doc = None
34
-
35
- with st.sidebar:
36
- user_secret = st.text_input(
37
- "OpenAI API Key",
38
- type="password",
39
- placeholder="Paste your OpenAI API key here (sk-...)",
40
- help="You can get your API key from https://platform.openai.com/account/api-keys.",
41
- value=st.session_state.get("OPENAI_API_KEY", ""),
42
- )
43
- if user_secret:
44
- set_openai_api_key(user_secret)
45
 
46
- uploaded_file = st.file_uploader(
47
- "Upload a pdf, docx, or txt file",
48
- type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"],
49
- help="Scanned documents are not supported yet!",
50
- on_change=clear_submit,
51
- accept_multiple_files=False,
52
- )
53
- # reading the files
54
- if uploaded_file is not None:
55
- if uploaded_file.name.endswith(".pdf"):
56
- doc = parse_pdf(uploaded_file)
57
- elif uploaded_file.name.endswith(".docx"):
58
- doc = parse_docx(uploaded_file)
59
- elif uploaded_file.name.endswith(".txt"):
60
- doc = parse_txt(uploaded_file)
61
- else:
62
- st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt]")
63
- doc = None
64
-
65
- text = text_to_docs(text=tuple(doc))
66
- st.write(text[:1])
67
-
68
- try:
69
- with st.spinner("Indexing document(s)... This may take some time."):
70
- index = embed_docs(tuple(text))
71
- st.session_state["api_key_configured"] = True
72
- except OpenAIError as e:
73
- st.error(e._message)
 
 
74
 
75
- tab1, tab2 = st.tabs(["Chat With File", "About the Application"])
76
- with tab1:
77
- if 'generated' not in st.session_state:
78
- st.session_state['generated'] = []
 
 
79
 
80
- if 'past' not in st.session_state:
81
- st.session_state['past'] = []
82
 
83
- def get_text():
84
- if user_secret:
85
- st.header("Ask me something about the document:")
86
- input_text = st.text_area("You:", on_change=clear_submit)
87
- return input_text
88
-
89
- user_input = get_text()
90
 
91
- button = st.button("Submit")
92
- if button or st.session_state.get("submit"):
93
- if not user_input:
94
- st.error("Please enter a question!")
95
- else:
96
- st.session_state["submit"] = True
97
- sources = search_docs(index, user_input)
98
- try:
99
- answer = get_answer(sources, user_input)
100
-
101
- st.session_state.past.append(user_input)
102
- st.session_state.generated.append(answer["output_text"])
103
-
104
- except OpenAIError as e:
105
- st.error(e._message)
106
-
107
- if st.session_state['past']:
108
- for i in range(len(st.session_state['past'])-1, -1, -1):
109
- message(st.session_state['generated'][i], key=str(uuid4()))
110
- message(st.session_state['past'][i], is_user=True, key=str(uuid4()))
111
 
112
- with tab2:
113
- st.write('See sources')
 
 
 
 
 
 
 
 
114
 
115
- # st.write('Chat with Files enables user to extract all the information from a file. User can obtain the transcription, the embedding of each segment and also ask questions to the file through a chat.')
116
- # st.write('Features include- ')
117
- # st.write('1. Reading any pdf, docx or plain txt (such as python programs) file')
118
- # st.write('2. Embedding texts segments with Langchain and OpenAI')
119
- # st.write('3. Chatting with the file using streamlit-chat and LangChain QA with source and the GPT4 model')
 
1
  import streamlit as st
2
  from streamlit_chat import message
3
  from openai.error import OpenAIError
4
+ from .utils import *
 
 
 
 
 
 
 
 
5
  from uuid import uuid4
6
+ from typing import Text, Union
7
+
8
+ multiple_files = False
9
 
10
  def clear_submit():
11
+ """
12
+ Toggles the file_submitted internal session state variable to False.
13
+ """
14
+ st.session_state["file_submitted"] = False
15
+
16
+ def set_openai_api_key(api_key:Text):
17
+ """Sets the internal OpenAI API key to the given value.
18
 
19
+ Args:
20
+ api_key (Text): OpenAI API key
21
+ """
22
  st.session_state["OPENAI_API_KEY"] = api_key
23
+ st.session_state["api_key_configured"] = True
24
 
25
+ def file_to_doc(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:
26
+ """Converts a file to a document using specialized parsers."""
27
+ if file.name.endswith(".pdf"):
28
+ doc = parse_pdf(file)
29
+ elif file.name.endswith(".docx"):
30
+ doc = parse_docx(file)
31
+ elif file.name.split["."][1] in [".txt", ".py", ".json", ".html", ".css", ".md" ]:
32
+ doc = parse_txt(file)
33
+ else:
34
+ st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt, .py, .json, .html, .css, .md]")
35
+ doc = None
36
+
37
+ return doc
38
+
39
+ # this function can be used to define a single doc processing pipeline
40
+ # def document_embedding_pipeline(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:
41
 
42
+ def qa_main():
43
+ st.markdown("<h2>This app allows to chat with files!</h2>", unsafe_allow_html=True)
44
+ st.write("Just upload something using and start chatting with a version of GPT4 that has read the file!")
45
+
46
  index = None
47
  doc = None
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # OpenAI API Key - TODO: consider adding a key valid for everyone
50
+ st.header("Configure OpenAI API Key")
51
+ user_secret = st.text_input(
52
+ "Insert your OpenAI API key here ([get your API key](https://platform.openai.com/account/api-keys)).",
53
+ type="password",
54
+ placeholder="Paste your OpenAI API key here (sk-...)",
55
+ help="You can get your API key from https://platform.openai.com/account/api-keys.",
56
+ value=st.session_state.get("OPENAI_API_KEY", ""),
57
+ )
58
+ if user_secret:
59
+ set_openai_api_key(user_secret)
60
+
61
+ # File that needs to be queried
62
+ st.header("Upload a file")
63
+ uploaded_file = st.file_uploader(
64
+ "Upload a pdf, docx, or txt file (scanned documents not supported)",
65
+ type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"],
66
+ help="Scanned documents are not supported yet 🥲",
67
+ on_change=clear_submit,
68
+ accept_multiple_files=multiple_files,
69
+ )
70
+
71
+ # reading the uploaded file
72
+ if uploaded_file is not None:
73
+ # toggle internal file submission state to True
74
+ st.session_state["file_submitted"] = True
75
+ # parse the file using custom parsers
76
+ doc = file_to_doc(uploaded_file)
77
+ # converts the files into a list of documents
78
+ text = text_to_docs(text=tuple(doc))
79
 
80
+ try:
81
+ with st.spinner("Indexing the document... This might take a while!"):
82
+ index = embed_docs(tuple(text))
83
+ st.session_state["api_key_configured"] = True
84
+ except OpenAIError as e:
85
+ st.error("OpenAI error encountered: ", e._message)
86
 
87
+ if "messages" not in st.session_state:
88
+ st.session_state["messages"] = []
89
 
90
+ for message in st.session_state.messages:
91
+ with st.chat_message(message["role"]):
92
+ st.markdown(message["content"])
 
 
 
 
93
 
94
+ if prompt := st.chat_input("Ask the document something..."):
95
+ st.session_state.messages.append({"role": "user", "content": prompt})
96
+ with st.chat_message("user"):
97
+ st.markdown(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ with st.chat_message("assistant"):
100
+ message_placeholder = st.empty()
101
+ # retrieving the most relevant sources
102
+ sources = search_docs(index, prompt)
103
+ # producing the answer, live
104
+ answer = get_answer(sources, prompt)
105
+ # retrieving the answer
106
+ message_placeholder.markdown(answer["output_text"])
107
+
108
+ st.session_state.messages.append({"role": "assistant", "content": answer["output_text"]})
109