Spaces:
Runtime error
Runtime error
Merge pull request #1 from andreped/linting
Browse filesAdded listing; refactored code; change environ-import order
- .github/workflows/linting.yml +26 -0
- README.md +1 -0
- knowledge_gpt/components/sidebar.py +0 -1
- knowledge_gpt/core/caching.py +4 -10
- knowledge_gpt/core/chunking.py +2 -3
- knowledge_gpt/core/debug.py +10 -14
- knowledge_gpt/core/embedding.py +13 -15
- knowledge_gpt/core/parsing.py +10 -11
- knowledge_gpt/core/prompts.py +1 -3
- knowledge_gpt/core/qa.py +6 -6
- knowledge_gpt/core/utils.py +4 -5
- knowledge_gpt/main.py +16 -20
- knowledge_gpt/ui.py +5 -6
- setup.cfg +14 -0
- shell/format.sh +4 -0
- shell/lint.sh +23 -0
.github/workflows/linting.yml
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Linting
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- '*'
|
7 |
+
pull_request:
|
8 |
+
branches:
|
9 |
+
- '*'
|
10 |
+
workflow_dispatch:
|
11 |
+
|
12 |
+
jobs:
|
13 |
+
build:
|
14 |
+
runs-on: ubuntu-20.04
|
15 |
+
steps:
|
16 |
+
- uses: actions/checkout@v1
|
17 |
+
- name: Set up Python 3.7
|
18 |
+
uses: actions/setup-python@v2
|
19 |
+
with:
|
20 |
+
python-version: 3.7
|
21 |
+
|
22 |
+
- name: Install lint dependencies
|
23 |
+
run: pip install wheel setuptools black==22.3.0 isort==5.10.1 flake8==4.0.1
|
24 |
+
|
25 |
+
- name: Lint the code
|
26 |
+
run: sh shell/lint.sh
|
README.md
CHANGED
@@ -23,6 +23,7 @@ app_file: knowledge_gpt/main.py
|
|
23 |
| - | - |
|
24 |
| **HF Deploy** | [](https://github.com/andreped/referencebot/actions) |
|
25 |
| **File size check** | [](https://github.com/andreped/referencebot/actions) |
|
|
|
26 |
|
27 |
## [Installation](https://github.com/andreped/referencebot#installation)
|
28 |
|
|
|
23 |
| - | - |
|
24 |
| **HF Deploy** | [](https://github.com/andreped/referencebot/actions) |
|
25 |
| **File size check** | [](https://github.com/andreped/referencebot/actions) |
|
26 |
+
| **Formatting check** | [](https://github.com/andreped/ReferenceBot/actions) |
|
27 |
|
28 |
## [Installation](https://github.com/andreped/referencebot#installation)
|
29 |
|
knowledge_gpt/components/sidebar.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import streamlit as st
|
2 |
-
|
3 |
from dotenv import load_dotenv
|
4 |
|
5 |
load_dotenv()
|
|
|
1 |
import streamlit as st
|
|
|
2 |
from dotenv import load_dotenv
|
3 |
|
4 |
load_dotenv()
|
knowledge_gpt/core/caching.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import streamlit as st
|
2 |
from streamlit.runtime.caching.hashing import HashFuncsDict
|
3 |
|
4 |
-
import knowledge_gpt.core.parsing as parsing
|
5 |
import knowledge_gpt.core.chunking as chunking
|
6 |
import knowledge_gpt.core.embedding as embedding
|
|
|
7 |
from knowledge_gpt.core.parsing import File
|
8 |
|
9 |
|
@@ -18,16 +18,10 @@ def bootstrap_caching():
|
|
18 |
|
19 |
# Get all substypes of File from module
|
20 |
file_subtypes = [
|
21 |
-
cls
|
22 |
-
for cls in vars(parsing).values()
|
23 |
-
if isinstance(cls, type) and issubclass(cls, File) and cls != File
|
24 |
]
|
25 |
file_hash_funcs: HashFuncsDict = {cls: file_hash_func for cls in file_subtypes}
|
26 |
|
27 |
parsing.read_file = st.cache_data(show_spinner=False)(parsing.read_file)
|
28 |
-
chunking.chunk_file = st.cache_data(show_spinner=False, hash_funcs=file_hash_funcs)(
|
29 |
-
|
30 |
-
)
|
31 |
-
embedding.embed_files = st.cache_data(
|
32 |
-
show_spinner=False, hash_funcs=file_hash_funcs
|
33 |
-
)(embedding.embed_files)
|
|
|
1 |
import streamlit as st
|
2 |
from streamlit.runtime.caching.hashing import HashFuncsDict
|
3 |
|
|
|
4 |
import knowledge_gpt.core.chunking as chunking
|
5 |
import knowledge_gpt.core.embedding as embedding
|
6 |
+
import knowledge_gpt.core.parsing as parsing
|
7 |
from knowledge_gpt.core.parsing import File
|
8 |
|
9 |
|
|
|
18 |
|
19 |
# Get all substypes of File from module
|
20 |
file_subtypes = [
|
21 |
+
cls for cls in vars(parsing).values() if isinstance(cls, type) and issubclass(cls, File) and cls != File
|
|
|
|
|
22 |
]
|
23 |
file_hash_funcs: HashFuncsDict = {cls: file_hash_func for cls in file_subtypes}
|
24 |
|
25 |
parsing.read_file = st.cache_data(show_spinner=False)(parsing.read_file)
|
26 |
+
chunking.chunk_file = st.cache_data(show_spinner=False, hash_funcs=file_hash_funcs)(chunking.chunk_file)
|
27 |
+
embedding.embed_files = st.cache_data(show_spinner=False, hash_funcs=file_hash_funcs)(embedding.embed_files)
|
|
|
|
|
|
|
|
knowledge_gpt/core/chunking.py
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
from langchain.docstore.document import Document
|
2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
3 |
from knowledge_gpt.core.parsing import File
|
4 |
|
5 |
|
6 |
-
def chunk_file(
|
7 |
-
file: File, chunk_size: int, chunk_overlap: int = 0, model_name="gpt-3.5-turbo"
|
8 |
-
) -> File:
|
9 |
"""Chunks each document in a file into smaller documents
|
10 |
according to the specified chunk size and overlap
|
11 |
where the size is determined by the number of tokens for the specified model.
|
|
|
1 |
from langchain.docstore.document import Document
|
2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
|
4 |
from knowledge_gpt.core.parsing import File
|
5 |
|
6 |
|
7 |
+
def chunk_file(file: File, chunk_size: int, chunk_overlap: int = 0, model_name="gpt-3.5-turbo") -> File:
|
|
|
|
|
8 |
"""Chunks each document in a file into smaller documents
|
9 |
according to the specified chunk size and overlap
|
10 |
where the size is determined by the number of tokens for the specified model.
|
knowledge_gpt/core/debug.py
CHANGED
@@ -1,10 +1,13 @@
|
|
1 |
-
from
|
2 |
-
from typing import Iterable
|
|
|
|
|
|
|
|
|
3 |
from langchain.docstore.document import Document
|
4 |
from langchain.embeddings.base import Embeddings
|
5 |
from langchain.embeddings.fake import FakeEmbeddings as FakeEmbeddingsBase
|
6 |
-
from langchain.
|
7 |
-
from typing import Optional
|
8 |
|
9 |
|
10 |
class FakeChatModel(FakeListChatModel):
|
@@ -24,9 +27,7 @@ class FakeVectorStore(VectorStore):
|
|
24 |
def __init__(self, texts: List[str]):
|
25 |
self.texts: List[str] = texts
|
26 |
|
27 |
-
def add_texts(
|
28 |
-
self, texts: Iterable[str], metadatas: List[dict] | None = None, **kwargs: Any
|
29 |
-
) -> List[str]:
|
30 |
self.texts.extend(texts)
|
31 |
return self.texts
|
32 |
|
@@ -40,10 +41,5 @@ class FakeVectorStore(VectorStore):
|
|
40 |
) -> "FakeVectorStore":
|
41 |
return cls(texts=list(texts))
|
42 |
|
43 |
-
def similarity_search(
|
44 |
-
|
45 |
-
) -> List[Document]:
|
46 |
-
return [
|
47 |
-
Document(page_content=text, metadata={"source": f"{i+1}-{1}"})
|
48 |
-
for i, text in enumerate(self.texts)
|
49 |
-
]
|
|
|
1 |
+
from typing import Any
|
2 |
+
from typing import Iterable
|
3 |
+
from typing import List
|
4 |
+
from typing import Optional
|
5 |
+
|
6 |
+
from langchain.chat_models.fake import FakeListChatModel
|
7 |
from langchain.docstore.document import Document
|
8 |
from langchain.embeddings.base import Embeddings
|
9 |
from langchain.embeddings.fake import FakeEmbeddings as FakeEmbeddingsBase
|
10 |
+
from langchain.vectorstores import VectorStore
|
|
|
11 |
|
12 |
|
13 |
class FakeChatModel(FakeListChatModel):
|
|
|
27 |
def __init__(self, texts: List[str]):
|
28 |
self.texts: List[str] = texts
|
29 |
|
30 |
+
def add_texts(self, texts: Iterable[str], metadatas: List[dict] | None = None, **kwargs: Any) -> List[str]:
|
|
|
|
|
31 |
self.texts.extend(texts)
|
32 |
return self.texts
|
33 |
|
|
|
41 |
) -> "FakeVectorStore":
|
42 |
return cls(texts=list(texts))
|
43 |
|
44 |
+
def similarity_search(self, query: str, k: int = 4, **kwargs: Any) -> List[Document]:
|
45 |
+
return [Document(page_content=text, metadata={"source": f"{i+1}-{1}"}) for i, text in enumerate(self.texts)]
|
|
|
|
|
|
|
|
|
|
knowledge_gpt/core/embedding.py
CHANGED
@@ -1,11 +1,15 @@
|
|
1 |
-
from
|
2 |
-
from
|
3 |
-
|
|
|
4 |
from langchain.embeddings import OpenAIEmbeddings
|
5 |
from langchain.embeddings.base import Embeddings
|
6 |
-
from
|
7 |
-
from langchain.
|
8 |
-
|
|
|
|
|
|
|
9 |
|
10 |
|
11 |
class FolderIndex:
|
@@ -30,9 +34,7 @@ class FolderIndex:
|
|
30 |
return all_texts
|
31 |
|
32 |
@classmethod
|
33 |
-
def from_files(
|
34 |
-
cls, files: List[File], embeddings: Embeddings, vector_store: Type[VectorStore]
|
35 |
-
) -> "FolderIndex":
|
36 |
"""Creates an index from files."""
|
37 |
|
38 |
all_docs = cls._combine_files(files)
|
@@ -45,9 +47,7 @@ class FolderIndex:
|
|
45 |
return cls(files=files, index=index)
|
46 |
|
47 |
|
48 |
-
def embed_files(
|
49 |
-
files: List[File], embedding: str, vector_store: str, **kwargs
|
50 |
-
) -> FolderIndex:
|
51 |
"""Embeds a collection of files and stores them in a FolderIndex."""
|
52 |
|
53 |
supported_embeddings: dict[str, Type[Embeddings]] = {
|
@@ -69,6 +69,4 @@ def embed_files(
|
|
69 |
else:
|
70 |
raise NotImplementedError(f"Vector store {vector_store} not supported.")
|
71 |
|
72 |
-
return FolderIndex.from_files(
|
73 |
-
files=files, embeddings=_embeddings, vector_store=_vector_store
|
74 |
-
)
|
|
|
1 |
+
from typing import List
|
2 |
+
from typing import Type
|
3 |
+
|
4 |
+
from langchain.docstore.document import Document
|
5 |
from langchain.embeddings import OpenAIEmbeddings
|
6 |
from langchain.embeddings.base import Embeddings
|
7 |
+
from langchain.vectorstores import VectorStore
|
8 |
+
from langchain.vectorstores.faiss import FAISS
|
9 |
+
|
10 |
+
from knowledge_gpt.core.debug import FakeEmbeddings
|
11 |
+
from knowledge_gpt.core.debug import FakeVectorStore
|
12 |
+
from knowledge_gpt.core.parsing import File
|
13 |
|
14 |
|
15 |
class FolderIndex:
|
|
|
34 |
return all_texts
|
35 |
|
36 |
@classmethod
|
37 |
+
def from_files(cls, files: List[File], embeddings: Embeddings, vector_store: Type[VectorStore]) -> "FolderIndex":
|
|
|
|
|
38 |
"""Creates an index from files."""
|
39 |
|
40 |
all_docs = cls._combine_files(files)
|
|
|
47 |
return cls(files=files, index=index)
|
48 |
|
49 |
|
50 |
+
def embed_files(files: List[File], embedding: str, vector_store: str, **kwargs) -> FolderIndex:
|
|
|
|
|
51 |
"""Embeds a collection of files and stores them in a FolderIndex."""
|
52 |
|
53 |
supported_embeddings: dict[str, Type[Embeddings]] = {
|
|
|
69 |
else:
|
70 |
raise NotImplementedError(f"Vector store {vector_store} not supported.")
|
71 |
|
72 |
+
return FolderIndex.from_files(files=files, embeddings=_embeddings, vector_store=_vector_store)
|
|
|
|
knowledge_gpt/core/parsing.py
CHANGED
@@ -1,14 +1,16 @@
|
|
1 |
-
from io import BytesIO
|
2 |
-
from typing import List, Any, Optional
|
3 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
import docx2txt
|
6 |
-
from langchain.docstore.document import Document
|
7 |
import fitz
|
8 |
-
from
|
9 |
-
|
10 |
-
from abc import abstractmethod, ABC
|
11 |
-
from copy import deepcopy
|
12 |
|
13 |
|
14 |
class File(ABC):
|
@@ -32,10 +34,7 @@ class File(ABC):
|
|
32 |
"""Creates a File from a BytesIO object"""
|
33 |
|
34 |
def __repr__(self) -> str:
|
35 |
-
return (
|
36 |
-
f"File(name={self.name}, id={self.id},"
|
37 |
-
" metadata={self.metadata}, docs={self.docs})"
|
38 |
-
)
|
39 |
|
40 |
def __str__(self) -> str:
|
41 |
return f"File(name={self.name}, id={self.id}, metadata={self.metadata})"
|
|
|
|
|
|
|
1 |
import re
|
2 |
+
from abc import ABC
|
3 |
+
from abc import abstractmethod
|
4 |
+
from copy import deepcopy
|
5 |
+
from hashlib import md5
|
6 |
+
from io import BytesIO
|
7 |
+
from typing import Any
|
8 |
+
from typing import List
|
9 |
+
from typing import Optional
|
10 |
|
11 |
import docx2txt
|
|
|
12 |
import fitz
|
13 |
+
from langchain.docstore.document import Document
|
|
|
|
|
|
|
14 |
|
15 |
|
16 |
class File(ABC):
|
|
|
34 |
"""Creates a File from a BytesIO object"""
|
35 |
|
36 |
def __repr__(self) -> str:
|
37 |
+
return f"File(name={self.name}, id={self.id}," " metadata={self.metadata}, docs={self.docs})"
|
|
|
|
|
|
|
38 |
|
39 |
def __str__(self) -> str:
|
40 |
return f"File(name={self.name}, id={self.id}, metadata={self.metadata})"
|
knowledge_gpt/core/prompts.py
CHANGED
@@ -26,6 +26,4 @@ QUESTION: {question}
|
|
26 |
=========
|
27 |
FINAL ANSWER:"""
|
28 |
|
29 |
-
STUFF_PROMPT = PromptTemplate(
|
30 |
-
template=template, input_variables=["summaries", "question"]
|
31 |
-
)
|
|
|
26 |
=========
|
27 |
FINAL ANSWER:"""
|
28 |
|
29 |
+
STUFF_PROMPT = PromptTemplate(template=template, input_variables=["summaries", "question"])
|
|
|
|
knowledge_gpt/core/qa.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
from typing import List
|
|
|
2 |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
3 |
-
from
|
4 |
from langchain.docstore.document import Document
|
5 |
-
from knowledge_gpt.core.embedding import FolderIndex
|
6 |
from pydantic import BaseModel
|
7 |
-
|
|
|
|
|
8 |
|
9 |
|
10 |
class AnswerWithSources(BaseModel):
|
@@ -39,9 +41,7 @@ def query_folder(
|
|
39 |
)
|
40 |
|
41 |
relevant_docs = folder_index.index.similarity_search(query, k=5)
|
42 |
-
result = chain(
|
43 |
-
{"input_documents": relevant_docs, "question": query}, return_only_outputs=True
|
44 |
-
)
|
45 |
sources = relevant_docs
|
46 |
|
47 |
if not return_all:
|
|
|
1 |
from typing import List
|
2 |
+
|
3 |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
4 |
+
from langchain.chat_models.base import BaseChatModel
|
5 |
from langchain.docstore.document import Document
|
|
|
6 |
from pydantic import BaseModel
|
7 |
+
|
8 |
+
from knowledge_gpt.core.embedding import FolderIndex
|
9 |
+
from knowledge_gpt.core.prompts import STUFF_PROMPT
|
10 |
|
11 |
|
12 |
class AnswerWithSources(BaseModel):
|
|
|
41 |
)
|
42 |
|
43 |
relevant_docs = folder_index.index.similarity_search(query, k=5)
|
44 |
+
result = chain({"input_documents": relevant_docs, "question": query}, return_only_outputs=True)
|
|
|
|
|
45 |
sources = relevant_docs
|
46 |
|
47 |
if not return_all:
|
knowledge_gpt/core/utils.py
CHANGED
@@ -1,15 +1,14 @@
|
|
1 |
from typing import List
|
|
|
2 |
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
|
|
|
|
3 |
from langchain.docstore.document import Document
|
4 |
|
5 |
-
from langchain.chat_models import ChatOpenAI
|
6 |
from knowledge_gpt.core.debug import FakeChatModel
|
7 |
-
from langchain.chat_models.base import BaseChatModel
|
8 |
|
9 |
|
10 |
-
def pop_docs_upto_limit(
|
11 |
-
query: str, chain: StuffDocumentsChain, docs: List[Document], max_len: int
|
12 |
-
) -> List[Document]:
|
13 |
"""Pops documents from a list until the final prompt length is less
|
14 |
than the max length."""
|
15 |
|
|
|
1 |
from typing import List
|
2 |
+
|
3 |
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
4 |
+
from langchain.chat_models import ChatOpenAI
|
5 |
+
from langchain.chat_models.base import BaseChatModel
|
6 |
from langchain.docstore.document import Document
|
7 |
|
|
|
8 |
from knowledge_gpt.core.debug import FakeChatModel
|
|
|
9 |
|
10 |
|
11 |
+
def pop_docs_upto_limit(query: str, chain: StuffDocumentsChain, docs: List[Document], max_len: int) -> List[Document]:
|
|
|
|
|
12 |
"""Pops documents from a list until the final prompt length is less
|
13 |
than the max length."""
|
14 |
|
knowledge_gpt/main.py
CHANGED
@@ -1,31 +1,27 @@
|
|
1 |
import os
|
2 |
-
os.environ["OPENAI_API_TYPE"] = "azure" # configure API to Azure OpenAI
|
3 |
|
4 |
import streamlit as st
|
5 |
-
|
6 |
-
|
7 |
-
# add all secrets into environmental variables
|
8 |
-
if os.path.exists(os.path.dirname(os.path.abspath(__file__)) + "/../.streamlit/secrets.toml"): # to avoid redundant print by calling st.secrets
|
9 |
-
for key, value in st.secrets.items():
|
10 |
-
os.environ[key] = value
|
11 |
|
12 |
from knowledge_gpt.components.sidebar import sidebar
|
13 |
-
|
14 |
-
from knowledge_gpt.ui import (
|
15 |
-
wrap_doc_in_html,
|
16 |
-
is_query_valid,
|
17 |
-
is_file_valid,
|
18 |
-
display_file_read_error,
|
19 |
-
)
|
20 |
-
|
21 |
from knowledge_gpt.core.caching import bootstrap_caching
|
22 |
-
|
23 |
-
from knowledge_gpt.core.parsing import read_file
|
24 |
from knowledge_gpt.core.chunking import chunk_file
|
25 |
from knowledge_gpt.core.embedding import embed_files
|
|
|
26 |
from knowledge_gpt.core.qa import query_folder
|
|
|
|
|
|
|
|
|
27 |
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
|
31 |
def main():
|
@@ -78,7 +74,7 @@ def main():
|
|
78 |
openai_api_key=os.environ["OPENAI_API_KEY"],
|
79 |
openai_api_base=os.environ["OPENAI_API_BASE"],
|
80 |
openai_api_type="azure",
|
81 |
-
chunk_size
|
82 |
)
|
83 |
|
84 |
with st.form(key="qa_form"):
|
@@ -106,7 +102,7 @@ def main():
|
|
106 |
openai_api_type="azure",
|
107 |
temperature=0,
|
108 |
)
|
109 |
-
|
110 |
with st.spinner("Querying folder to get result..."):
|
111 |
result = query_folder(
|
112 |
folder_index=folder_index,
|
|
|
1 |
import os
|
|
|
2 |
|
3 |
import streamlit as st
|
4 |
+
from langchain.chat_models import AzureChatOpenAI
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
from knowledge_gpt.components.sidebar import sidebar
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
from knowledge_gpt.core.caching import bootstrap_caching
|
|
|
|
|
8 |
from knowledge_gpt.core.chunking import chunk_file
|
9 |
from knowledge_gpt.core.embedding import embed_files
|
10 |
+
from knowledge_gpt.core.parsing import read_file
|
11 |
from knowledge_gpt.core.qa import query_folder
|
12 |
+
from knowledge_gpt.ui import display_file_read_error
|
13 |
+
from knowledge_gpt.ui import is_file_valid
|
14 |
+
from knowledge_gpt.ui import is_query_valid
|
15 |
+
from knowledge_gpt.ui import wrap_doc_in_html
|
16 |
|
17 |
+
st.set_page_config(page_title="ReferenceBot", page_icon="📖", layout="wide")
|
18 |
+
|
19 |
+
# add all secrets into environmental variables
|
20 |
+
if os.path.exists(
|
21 |
+
os.path.dirname(os.path.abspath(__file__)) + "/../.streamlit/secrets.toml"
|
22 |
+
): # to avoid redundant print by calling st.secrets
|
23 |
+
for key, value in st.secrets.items():
|
24 |
+
os.environ[key] = value
|
25 |
|
26 |
|
27 |
def main():
|
|
|
74 |
openai_api_key=os.environ["OPENAI_API_KEY"],
|
75 |
openai_api_base=os.environ["OPENAI_API_BASE"],
|
76 |
openai_api_type="azure",
|
77 |
+
chunk_size=1,
|
78 |
)
|
79 |
|
80 |
with st.form(key="qa_form"):
|
|
|
102 |
openai_api_type="azure",
|
103 |
temperature=0,
|
104 |
)
|
105 |
+
|
106 |
with st.spinner("Querying folder to get result..."):
|
107 |
result = query_folder(
|
108 |
folder_index=folder_index,
|
knowledge_gpt/ui.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1 |
from typing import List
|
|
|
|
|
2 |
import streamlit as st
|
3 |
from langchain.docstore.document import Document
|
4 |
-
from knowledge_gpt.core.parsing import File
|
5 |
from streamlit.logger import get_logger
|
6 |
-
|
|
|
7 |
|
8 |
logger = get_logger(__name__)
|
9 |
|
@@ -25,10 +27,7 @@ def is_query_valid(query: str) -> bool:
|
|
25 |
|
26 |
|
27 |
def is_file_valid(file: File) -> bool:
|
28 |
-
if (
|
29 |
-
len(file.docs) == 0
|
30 |
-
or "".join([doc.page_content for doc in file.docs]).strip() == ""
|
31 |
-
):
|
32 |
st.error("Cannot read document! Make sure the document has selectable text")
|
33 |
logger.error("Cannot read document")
|
34 |
return False
|
|
|
1 |
from typing import List
|
2 |
+
from typing import NoReturn
|
3 |
+
|
4 |
import streamlit as st
|
5 |
from langchain.docstore.document import Document
|
|
|
6 |
from streamlit.logger import get_logger
|
7 |
+
|
8 |
+
from knowledge_gpt.core.parsing import File
|
9 |
|
10 |
logger = get_logger(__name__)
|
11 |
|
|
|
27 |
|
28 |
|
29 |
def is_file_valid(file: File) -> bool:
|
30 |
+
if len(file.docs) == 0 or "".join([doc.page_content for doc in file.docs]).strip() == "":
|
|
|
|
|
|
|
31 |
st.error("Cannot read document! Make sure the document has selectable text")
|
32 |
logger.error("Cannot read document")
|
33 |
return False
|
setup.cfg
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[metadata]
|
2 |
+
description-file = README.md
|
3 |
+
|
4 |
+
[isort]
|
5 |
+
force_single_line=True
|
6 |
+
known_first_party=aeropath
|
7 |
+
line_length=160
|
8 |
+
profile=black
|
9 |
+
|
10 |
+
[flake8]
|
11 |
+
# imported but unused in __init__.py, that's ok.
|
12 |
+
per-file-ignores=*__init__.py:F401
|
13 |
+
ignore=E203,W503,W605,F632,E266,E731,E712,E741
|
14 |
+
max-line-length=120
|
shell/format.sh
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
isort --sl knowledge_gpt/
|
3 |
+
black --line-length 120 knowledge_gpt/
|
4 |
+
flake8 knowledge_gpt/
|
shell/lint.sh
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
isort --check --sl -c knowledge_gpt/
|
3 |
+
if ! [ $? -eq 0 ]
|
4 |
+
then
|
5 |
+
echo "Please run \"sh shell/format.sh\" to format the code."
|
6 |
+
exit 1
|
7 |
+
fi
|
8 |
+
echo "no issues with isort"
|
9 |
+
flake8 knowledge_gpt/
|
10 |
+
if ! [ $? -eq 0 ]
|
11 |
+
then
|
12 |
+
echo "Please fix the code style issue."
|
13 |
+
exit 1
|
14 |
+
fi
|
15 |
+
echo "no issues with flake8"
|
16 |
+
black --check --line-length 120 knowledge_gpt/
|
17 |
+
if ! [ $? -eq 0 ]
|
18 |
+
then
|
19 |
+
echo "Please run \"sh shell/format.sh\" to format the code."
|
20 |
+
exit 1
|
21 |
+
fi
|
22 |
+
echo "no issues with black"
|
23 |
+
echo "linting success!"
|