from typing import List from typing import NoReturn import streamlit as st from langchain.docstore.document import Document from streamlit.logger import get_logger from knowledge_gpt.core.parsing import File logger = get_logger(__name__) def wrap_doc_in_html(docs: List[Document]) -> str: """Wraps each page in document separated by newlines in

tags""" text = [doc.page_content for doc in docs] if isinstance(text, list): # Add horizontal rules between pages text = "\n


\n".join(text) return "".join([f"

{line}

" for line in text.split("\n")]) def is_query_valid(query: str) -> bool: if not query: st.error("Please enter a question!") return False return True def is_file_valid(file: File) -> bool: if len(file.docs) == 0 or "".join([doc.page_content for doc in file.docs]).strip() == "": st.error("Cannot read document! Make sure the document has selectable text") logger.error("Cannot read document") return False return True def display_file_read_error(e: Exception, file_name: str) -> NoReturn: st.error("Error reading file. Make sure the file is not corrupted or encrypted") logger.error(f"{e.__class__.__name__}: {e}. Extension: {file_name.split('.')[-1]}") st.stop()