king007 commited on
Commit
b72b93d
1 Parent(s): ee4d116

Upload 3 files

Browse files
Files changed (3) hide show
  1. __init__.py +0 -0
  2. main.py +105 -0
  3. ui.py +53 -0
__init__.py ADDED
File without changes
main.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from knowledge_gpt.components.sidebar import sidebar
4
+
5
+ from knowledge_gpt.ui import (
6
+ wrap_doc_in_html,
7
+ is_query_valid,
8
+ is_file_valid,
9
+ is_open_ai_key_valid,
10
+ )
11
+
12
+ from knowledge_gpt.core.caching import bootstrap_caching
13
+
14
+ from knowledge_gpt.core.parsing import read_file
15
+ from knowledge_gpt.core.chunking import chunk_file
16
+ from knowledge_gpt.core.embedding import embed_files
17
+ from knowledge_gpt.core.qa import query_folder
18
+
19
+ st.set_page_config(page_title="KnowledgeGPT", page_icon="📖", layout="wide")
20
+ st.header("📖KnowledgeGPT")
21
+
22
+ # Enable caching for expensive functions
23
+ bootstrap_caching()
24
+
25
+ sidebar()
26
+
27
+ openai_api_key = st.session_state.get("OPENAI_API_KEY")
28
+
29
+
30
+ if not openai_api_key:
31
+ st.warning(
32
+ "Enter your OpenAI API key in the sidebar. You can get a key at"
33
+ " https://platform.openai.com/account/api-keys."
34
+ )
35
+
36
+
37
+ uploaded_file = st.file_uploader(
38
+ "Upload a pdf, docx, or txt file",
39
+ type=["pdf", "docx", "txt"],
40
+ help="Scanned documents are not supported yet!",
41
+ )
42
+
43
+ if not uploaded_file:
44
+ st.stop()
45
+
46
+
47
+ file = read_file(uploaded_file)
48
+ chunked_file = chunk_file(file, chunk_size=300, chunk_overlap=0)
49
+
50
+ if not is_file_valid(file):
51
+ st.stop()
52
+
53
+ if not is_open_ai_key_valid(openai_api_key):
54
+ st.stop()
55
+
56
+
57
+ with st.spinner("Indexing document... This may take a while⏳"):
58
+ folder_index = embed_files(
59
+ files=[chunked_file],
60
+ embedding="openai",
61
+ vector_store="faiss",
62
+ openai_api_key=openai_api_key,
63
+ )
64
+
65
+ with st.form(key="qa_form"):
66
+ query = st.text_area("Ask a question about the document")
67
+ submit = st.form_submit_button("Submit")
68
+
69
+
70
+ with st.expander("Advanced Options"):
71
+ return_all_chunks = st.checkbox("Show all chunks retrieved from vector search")
72
+ show_full_doc = st.checkbox("Show parsed contents of the document")
73
+
74
+
75
+ if show_full_doc:
76
+ with st.expander("Document"):
77
+ # Hack to get around st.markdown rendering LaTeX
78
+ st.markdown(f"<p>{wrap_doc_in_html(file.docs)}</p>", unsafe_allow_html=True)
79
+
80
+
81
+ if submit:
82
+ if not is_query_valid(query):
83
+ st.stop()
84
+
85
+ # Output Columns
86
+ answer_col, sources_col = st.columns(2)
87
+
88
+ result = query_folder(
89
+ folder_index=folder_index,
90
+ query=query,
91
+ return_all=return_all_chunks,
92
+ openai_api_key=openai_api_key,
93
+ temperature=0,
94
+ )
95
+
96
+ with answer_col:
97
+ st.markdown("#### Answer")
98
+ st.markdown(result.answer)
99
+
100
+ with sources_col:
101
+ st.markdown("#### Sources")
102
+ for source in result.sources:
103
+ st.markdown(source.page_content)
104
+ st.markdown(source.metadata["source"])
105
+ st.markdown("---")
ui.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import streamlit as st
3
+ from langchain.docstore.document import Document
4
+ from knowledge_gpt.core.parsing import File
5
+ import openai
6
+ from streamlit.logger import get_logger
7
+
8
+ logger = get_logger(__name__)
9
+
10
+
11
+ def wrap_doc_in_html(docs: List[Document]) -> str:
12
+ """Wraps each page in document separated by newlines in <p> tags"""
13
+ text = [doc.page_content for doc in docs]
14
+ if isinstance(text, list):
15
+ # Add horizontal rules between pages
16
+ text = "\n<hr/>\n".join(text)
17
+ return "".join([f"<p>{line}</p>" for line in text.split("\n")])
18
+
19
+
20
+ def is_query_valid(query: str) -> bool:
21
+ if not query:
22
+ st.error("Please enter a question!")
23
+ return False
24
+ return True
25
+
26
+
27
+ def is_file_valid(file: File) -> bool:
28
+ if len(file.docs) == 0 or len(file.docs[0].page_content.strip()) == 0:
29
+ st.error(
30
+ "Cannot read document! Make sure the document has"
31
+ " selectable text or is not password protected."
32
+ )
33
+ logger.error("Cannot read document")
34
+ return False
35
+ return True
36
+
37
+
38
+ @st.cache_data(show_spinner=False)
39
+ def is_open_ai_key_valid(openai_api_key) -> bool:
40
+ if not openai_api_key:
41
+ st.error("Please enter your OpenAI API key in the sidebar!")
42
+ return False
43
+ try:
44
+ openai.ChatCompletion.create(
45
+ model="gpt-3.5-turbo",
46
+ messages=[{"role": "user", "content": "test"}],
47
+ api_key=openai_api_key,
48
+ )
49
+ except Exception as e:
50
+ st.error(f"{e.__class__.__name__}: {e}")
51
+ logger.error(f"{e.__class__.__name__}: {e}")
52
+ return False
53
+ return True