Spaces:
Sleeping
Sleeping
baseline08_beta0.2.1_30Sept25: fix oauth_token. Marker converter now initialised with/out log in. - 'pre' load models #UX. Introduce Global sigleton (constructor). - update README
Browse files- converters/extraction_converter.py +23 -11
- globals.py +13 -0
- llm/llm_login.py +3 -3
- llm/openai_client.py +1 -1
- ui/gradio_ui.py +22 -16
- utils/logger.py +2 -2
converters/extraction_converter.py
CHANGED
|
@@ -23,6 +23,7 @@ logger = get_logger(__name__)
|
|
| 23 |
|
| 24 |
# create/load models. Called to curtail reloading models at each instance
|
| 25 |
def load_models():
|
|
|
|
| 26 |
return create_model_dict()
|
| 27 |
|
| 28 |
# Full document converter
|
|
@@ -66,7 +67,7 @@ class DocumentConverter:
|
|
| 66 |
self.max_workers = max_workers ## pass to config_dict["pdftext_workers"]
|
| 67 |
self.max_retries = max_retries ## pass to __call__
|
| 68 |
self.output_dir = output_dir ## "output_dir": settings.DEBUG_DATA_FOLDER if debug else output_dir,
|
| 69 |
-
self.use_llm = use_llm[0] if isinstance(use_llm, tuple) else use_llm, #False, #True,
|
| 70 |
#self.page_range = page_range[0] if isinstance(page_range, tuple) else page_range ##SMY: iterating twice because self.page casting as hint type tuple!
|
| 71 |
self.page_range = page_range if page_range else None
|
| 72 |
# self.page_range = page_range[0] if isinstance(page_range, tuple) else page_range if isinstance(page_range, str) else None, ##Example: "0,4-8,16" ##Marker parses as List[int] #]debug #len(pdf_file)
|
|
@@ -103,6 +104,7 @@ class DocumentConverter:
|
|
| 103 |
|
| 104 |
##SMY: if falsely empty tuple () or None, pop the "page_range" key-value pair, else do nothing if truthy tuple value (i.e. keep as-is)
|
| 105 |
self.config_dict.pop("page_range", None) if not self.config_dict.get("page_range") else None
|
|
|
|
| 106 |
|
| 107 |
logger.log(level=20, msg="✔️ config_dict custom configured:", extra={"service": "openai"}) #, "config": str(self.config_dict)})
|
| 108 |
|
|
@@ -135,10 +137,18 @@ class DocumentConverter:
|
|
| 135 |
raise RuntimeError(f"✗ Error creating artifact_dict or retrieving LLM service: {exc}\n{tb}") #.with_traceback(tb)
|
| 136 |
|
| 137 |
# 4) Load models if not already loaded in reload mode
|
|
|
|
| 138 |
try:
|
| 139 |
-
if 'model_dict' not in globals():
|
| 140 |
-
#model_dict = self.load_models()
|
| 141 |
model_dict = load_models()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
except Exception as exc:
|
| 143 |
tb = traceback.format_exc() #exc.__traceback__
|
| 144 |
logger.exception(f"✗ Error loading models (reload): {exc}\n{tb}")
|
|
@@ -146,12 +156,13 @@ class DocumentConverter:
|
|
| 146 |
|
| 147 |
|
| 148 |
# 5) Instantiate Marker's MarkerConverter (PdfConverter) with config managed by config_parser
|
| 149 |
-
try:
|
| 150 |
-
llm_service_str = str(self.llm_service).split("'")[1]
|
| 151 |
|
| 152 |
-
# sets api_key required by Marker
|
| 153 |
-
os.environ["OPENAI_API_KEY"] = api_token if api_token !='' or None else self.openai_api_key ##
|
| 154 |
-
|
|
|
|
| 155 |
|
| 156 |
config_dict = config_parser.generate_config_dict()
|
| 157 |
#config_dict["pdftext_worker"] = self.max_workers #1 ##SMY: move to get_config_dicts()
|
|
@@ -160,7 +171,7 @@ class DocumentConverter:
|
|
| 160 |
self.converter = MarkerConverter(
|
| 161 |
##artifact_dict=self.artifact_dict,
|
| 162 |
#artifact_dict=create_model_dict(),
|
| 163 |
-
artifact_dict=model_dict,
|
| 164 |
config=config_dict,
|
| 165 |
#config=config_parser.generate_config_dict(),
|
| 166 |
#llm_service=self.llm_service ##SMY expecting str but self.llm_service, is service object marker.services of type BaseServices
|
|
@@ -180,8 +191,9 @@ class DocumentConverter:
|
|
| 180 |
|
| 181 |
try:
|
| 182 |
## Enable higher quality processing with LLMs. ## See MarkerOpenAIService,
|
| 183 |
-
#
|
| 184 |
-
llm_service
|
|
|
|
| 185 |
self.use_llm = self.use_llm[0] if isinstance(self.use_llm, tuple) else self.use_llm
|
| 186 |
self.page_range = self.page_range[0] if isinstance(self.page_range, tuple) else self.page_range #if isinstance(self.page_range, str) else None, ##SMY: passing as hint type tuple!
|
| 187 |
|
|
|
|
| 23 |
|
| 24 |
# create/load models. Called to curtail reloading models at each instance
|
| 25 |
def load_models():
|
| 26 |
+
""" Creates Marker's models dict. Initiate download of models """
|
| 27 |
return create_model_dict()
|
| 28 |
|
| 29 |
# Full document converter
|
|
|
|
| 67 |
self.max_workers = max_workers ## pass to config_dict["pdftext_workers"]
|
| 68 |
self.max_retries = max_retries ## pass to __call__
|
| 69 |
self.output_dir = output_dir ## "output_dir": settings.DEBUG_DATA_FOLDER if debug else output_dir,
|
| 70 |
+
self.use_llm = use_llm if use_llm else False #use_llm[0] if isinstance(use_llm, tuple) else use_llm, #False, #True,
|
| 71 |
#self.page_range = page_range[0] if isinstance(page_range, tuple) else page_range ##SMY: iterating twice because self.page casting as hint type tuple!
|
| 72 |
self.page_range = page_range if page_range else None
|
| 73 |
# self.page_range = page_range[0] if isinstance(page_range, tuple) else page_range if isinstance(page_range, str) else None, ##Example: "0,4-8,16" ##Marker parses as List[int] #]debug #len(pdf_file)
|
|
|
|
| 104 |
|
| 105 |
##SMY: if falsely empty tuple () or None, pop the "page_range" key-value pair, else do nothing if truthy tuple value (i.e. keep as-is)
|
| 106 |
self.config_dict.pop("page_range", None) if not self.config_dict.get("page_range") else None
|
| 107 |
+
self.config_dict.pop("use_llm", None) if not self.config_dict.get("use_llm") or self.config_dict.get("use_llm") is False or self.config_dict.get("use_llm") == 'False' else None
|
| 108 |
|
| 109 |
logger.log(level=20, msg="✔️ config_dict custom configured:", extra={"service": "openai"}) #, "config": str(self.config_dict)})
|
| 110 |
|
|
|
|
| 137 |
raise RuntimeError(f"✗ Error creating artifact_dict or retrieving LLM service: {exc}\n{tb}") #.with_traceback(tb)
|
| 138 |
|
| 139 |
# 4) Load models if not already loaded in reload mode
|
| 140 |
+
from globals import config_load_models
|
| 141 |
try:
|
| 142 |
+
if not config_load_models.model_dict or 'model_dict' not in globals():
|
|
|
|
| 143 |
model_dict = load_models()
|
| 144 |
+
'''if 'model_dict' not in globals():
|
| 145 |
+
#model_dict = self.load_models()
|
| 146 |
+
model_dict = load_models()'''
|
| 147 |
+
else: model_dict = config_load_models.model_dict
|
| 148 |
+
except OSError as exc_ose:
|
| 149 |
+
tb = traceback.format_exc() #exc.__traceback__
|
| 150 |
+
logger.warning(f"⚠️ OSError: the paging file is too small (to complete reload): {exc_ose}\n{tb}")
|
| 151 |
+
pass
|
| 152 |
except Exception as exc:
|
| 153 |
tb = traceback.format_exc() #exc.__traceback__
|
| 154 |
logger.exception(f"✗ Error loading models (reload): {exc}\n{tb}")
|
|
|
|
| 156 |
|
| 157 |
|
| 158 |
# 5) Instantiate Marker's MarkerConverter (PdfConverter) with config managed by config_parser
|
| 159 |
+
try: # Assign llm_service if api_token. ##SMY: split and slicing ##Gets the string value
|
| 160 |
+
llm_service_str = None if api_token == '' or api_token is None or self.use_llm is False else str(self.llm_service).split("'")[1] #
|
| 161 |
|
| 162 |
+
# sets api_key required by Marker ## to handle Marker's assertion test on OpenAI
|
| 163 |
+
#os.environ["OPENAI_API_KEY"] = api_token if api_token !='' or api_token is not None else self.openai_api_key ##SMY: looks lame
|
| 164 |
+
os.environ["OPENAI_API_KEY"] = api_token if api_token and api_token != '' else os.getenv("OPENAI_API_KEY") or os.getenv("GEMINI_API_KEY") or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 165 |
+
#logger.log(level=20, msg="self.converter: instantiating MarkerConverter:", extra={"llm_service_str": llm_service_str, "api_token": api_token}) ##debug
|
| 166 |
|
| 167 |
config_dict = config_parser.generate_config_dict()
|
| 168 |
#config_dict["pdftext_worker"] = self.max_workers #1 ##SMY: move to get_config_dicts()
|
|
|
|
| 171 |
self.converter = MarkerConverter(
|
| 172 |
##artifact_dict=self.artifact_dict,
|
| 173 |
#artifact_dict=create_model_dict(),
|
| 174 |
+
artifact_dict=model_dict if model_dict else create_model_dict(),
|
| 175 |
config=config_dict,
|
| 176 |
#config=config_parser.generate_config_dict(),
|
| 177 |
#llm_service=self.llm_service ##SMY expecting str but self.llm_service, is service object marker.services of type BaseServices
|
|
|
|
| 191 |
|
| 192 |
try:
|
| 193 |
## Enable higher quality processing with LLMs. ## See MarkerOpenAIService,
|
| 194 |
+
# llm_service disused here
|
| 195 |
+
##llm_service = llm_service.removeprefix("<class '").removesuffix("'>") # e.g <class 'marker.services.openai.OpenAIService'>
|
| 196 |
+
#llm_service = str(llm_service).split("'")[1] ## SMY: split and slicing
|
| 197 |
self.use_llm = self.use_llm[0] if isinstance(self.use_llm, tuple) else self.use_llm
|
| 198 |
self.page_range = self.page_range[0] if isinstance(self.page_range, tuple) else self.page_range #if isinstance(self.page_range, str) else None, ##SMY: passing as hint type tuple!
|
| 199 |
|
globals.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# opted for sigleton as opposed to global variable
|
| 2 |
+
|
| 3 |
+
# Create a singleton object to hold all shared states
|
| 4 |
+
# This ensures that only one instance of the Config class is ever created
|
| 5 |
+
class Config:
|
| 6 |
+
""" Single model_dict use across the app"""
|
| 7 |
+
def __init__(self):
|
| 8 |
+
self.model_dict = {}
|
| 9 |
+
|
| 10 |
+
# Create a single, shared instance of the Config class
|
| 11 |
+
# Other modules will import and use this instance.
|
| 12 |
+
config_load_models = Config()
|
| 13 |
+
|
llm/llm_login.py
CHANGED
|
@@ -41,17 +41,17 @@ def login_huggingface(token: Optional[str] = None):
|
|
| 41 |
logger.info("✔️ hf_login already: whoami()", extra={"mode": "HF Oauth"})
|
| 42 |
#return True
|
| 43 |
else:
|
| 44 |
-
login() ##SMY: Not visible/interactive to users
|
| 45 |
sleep(5) ##SMY pause for login. Helpful: pool async opex
|
| 46 |
logger.info("✔️ hf_login already: login()", extra={"mode": "cli"})
|
| 47 |
#return True
|
| 48 |
except Exception as exc:
|
| 49 |
# Respect common env var names; prefer explicit token arg when provided
|
| 50 |
-
fallback_token = token if token else get_token() or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 51 |
if fallback_token:
|
| 52 |
try:
|
| 53 |
login(token=fallback_token)
|
| 54 |
-
token = fallback_token
|
| 55 |
logger.info("✔️ hf_login through fallback", extra={"mode": "token"}) ##SMY: This only displays if token is provided
|
| 56 |
except Exception as exc_token:
|
| 57 |
logger.warning("❌ hf_login_failed through fallback", extra={"error": str(exc_token)})
|
|
|
|
| 41 |
logger.info("✔️ hf_login already: whoami()", extra={"mode": "HF Oauth"})
|
| 42 |
#return True
|
| 43 |
else:
|
| 44 |
+
login() ##SMY: Not visible/interactive to users on HF Space. ## ProcessPoll limitation
|
| 45 |
sleep(5) ##SMY pause for login. Helpful: pool async opex
|
| 46 |
logger.info("✔️ hf_login already: login()", extra={"mode": "cli"})
|
| 47 |
#return True
|
| 48 |
except Exception as exc:
|
| 49 |
# Respect common env var names; prefer explicit token arg when provided
|
| 50 |
+
fallback_token = token if token else get_token() or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") ##SMY: to revisit
|
| 51 |
if fallback_token:
|
| 52 |
try:
|
| 53 |
login(token=fallback_token)
|
| 54 |
+
#token = fallback_token ##debug
|
| 55 |
logger.info("✔️ hf_login through fallback", extra={"mode": "token"}) ##SMY: This only displays if token is provided
|
| 56 |
except Exception as exc_token:
|
| 57 |
logger.warning("❌ hf_login_failed through fallback", extra={"error": str(exc_token)})
|
llm/openai_client.py
CHANGED
|
@@ -38,7 +38,7 @@ class OpenAIChatClient:
|
|
| 38 |
) -> None:
|
| 39 |
|
| 40 |
try:
|
| 41 |
-
openai_api_key_env = dotenv.get_key(".env", "OPENAI_API_KEY")
|
| 42 |
self.model_id = f"{model_id}:{hf_provider}" if hf_provider is not None else model_id ##concatenate so HF can pipe to Hf provider
|
| 43 |
self.hf_provider = hf_provider
|
| 44 |
self.base_url = base_url #"https://router.huggingface.co/v1" #%22" #HF API proxy
|
|
|
|
| 38 |
) -> None:
|
| 39 |
|
| 40 |
try:
|
| 41 |
+
openai_api_key_env = dotenv.get_key(".env", "OPENAI_API_KEY") or dotenv.get_key(".env", "GEMINI_API_KEY")
|
| 42 |
self.model_id = f"{model_id}:{hf_provider}" if hf_provider is not None else model_id ##concatenate so HF can pipe to Hf provider
|
| 43 |
self.hf_provider = hf_provider
|
| 44 |
self.base_url = base_url #"https://router.huggingface.co/v1" #%22" #HF API proxy
|
ui/gradio_ui.py
CHANGED
|
@@ -37,9 +37,13 @@ pdf2md_converter = PdfToMarkdownConverter()
|
|
| 37 |
# User eXperience: Load Marker models ahead of time if not already loaded in reload mode
|
| 38 |
## SMY: 29Sept2025 - Came across https://github.com/xiaoyao9184/docker-marker/tree/master/gradio
|
| 39 |
from converters.extraction_converter import load_models
|
|
|
|
| 40 |
try:
|
| 41 |
-
if
|
| 42 |
-
model_dict = load_models()
|
|
|
|
|
|
|
|
|
|
| 43 |
except Exception as exc:
|
| 44 |
#tb = traceback.format_exc() #exc.__traceback__
|
| 45 |
logger.exception(f"✗ Error loading models (reload): {exc}") #\n{tb}")
|
|
@@ -54,7 +58,7 @@ def get_login_token( api_token_arg, oauth_token: gr.OAuthToken | None=None,):
|
|
| 54 |
oauth_token = oauth_token
|
| 55 |
else: get_token()
|
| 56 |
|
| 57 |
-
return oauth_token.token ##token value
|
| 58 |
|
| 59 |
# pool executor to convert files called by Gradio
|
| 60 |
##SMY: TODO: future: refactor to gradio_process.py and
|
|
@@ -109,15 +113,18 @@ def convert_batch(
|
|
| 109 |
|
| 110 |
if is_loggedin_huggingface() and (api_token is None or api_token == ""):
|
| 111 |
api_token = get_token() ##SMY: might be redundant
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
else:
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
yield gr.update(interactive=False), f"login to HF: Processing files...", {"process": "Processing files"}, f"__init__.py"
|
| 116 |
|
| 117 |
except Exception as exc: # Catch all exceptions
|
| 118 |
tb = traceback.format_exc()
|
| 119 |
logger.exception(f"✗ Error during login_huggingface → {exc}\n{tb}", exc_info=True) # Log the full traceback
|
| 120 |
-
return gr.update(interactive=True), f"✗ An error occurred during login_huggingface → {exc}\n{tb}", {"Error":f"Error: {exc}"}, f"__init__.py" # return the exception message
|
| 121 |
|
| 122 |
|
| 123 |
## debug
|
|
@@ -127,7 +134,7 @@ def convert_batch(
|
|
| 127 |
if not pdf_files or pdf_files is None: ## Check if files is None. This handles the case where no files are uploaded.
|
| 128 |
logger.log(level=30, msg="Initialising ProcessPool: No files uploaded.", extra={"pdf_files": pdf_files, "files_len": pdf_files_count})
|
| 129 |
#outputs=[log_output, files_individual_JSON, files_individual_downloads],
|
| 130 |
-
return gr.update(interactive=True), "Initialising ProcessPool: No files uploaded.", {"Upload":"No files uploaded"}, f"__init__.py"
|
| 131 |
|
| 132 |
# Get config values if not provided
|
| 133 |
config_file = find_file("config.ini") ##from file_handler.file_utils
|
|
@@ -232,7 +239,6 @@ def convert_batch(
|
|
| 232 |
tb = traceback.format_exc()
|
| 233 |
logger.exception(f"✗ Error during ProcessPoolExecutor → {exc}\n{tb}" , exc_info=True) # Log the full traceback
|
| 234 |
#traceback.print_exc() # Print the exception traceback
|
| 235 |
-
#return gr.update(interactive=True), f"✗ An error occurred during ProcessPoolExecutor→ {exc}\n{tb}", f"Error: {exc}", f"Error: {exc}" # return the exception message
|
| 236 |
yield gr.update(interactive=True), f"✗ An error occurred during ProcessPoolExecutor→ {exc}\n{tb}", {"Error":f"Error: {exc}"}, f"__init__.py" # return the exception message
|
| 237 |
|
| 238 |
'''
|
|
@@ -245,7 +251,7 @@ def convert_batch(
|
|
| 245 |
|
| 246 |
# Zip Processed md Files and images. Insert to first index
|
| 247 |
try: ##from file_handler.file_utils
|
| 248 |
-
zipped_processed_files = zip_processed_files(root_dir=f"data/{output_dir_string}", file_paths=logs_files_images, tz_hours=tz_hours, date_format='%d%b%Y'
|
| 249 |
logs_files_images.insert(0, zipped_processed_files)
|
| 250 |
#logs_files_images.insert(1, "====================")
|
| 251 |
yield gr.update(interactive=False), f"Processing zip and files: {logs_files_images}", {"process": "Processing files"}, f"__init__.py"
|
|
@@ -273,18 +279,18 @@ def convert_batch(
|
|
| 273 |
|
| 274 |
#outputs=[process_button, log_output, files_individual_JSON, files_individual_downloads],
|
| 275 |
#return "\n".join(logs), "\n".join(logs_files_images) #"\n".join(logs_files)
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
#yield gr.update(interactive=True), logs_return_formatted_json_string, logs_return_formatted_json_string, logs_files_images_return
|
| 280 |
-
return gr.update(interactive=True), logs_return_formatted_json_string, logs_return_formatted_json_string, logs_files_images_return
|
| 281 |
|
| 282 |
except Exception as exc:
|
| 283 |
tb = traceback.format_exc()
|
| 284 |
logger.exception(f"✗ Error during returning result logs → {exc}\n{tb}" , exc_info=True) # Log the full traceback
|
| 285 |
#traceback.print_exc() # Print the exception traceback
|
| 286 |
-
return gr.update(interactive=True), f"✗ An error occurred during returning result logs→ {exc}\n{tb}", {"Error":f"Error: {exc}"}, f"__init__.py" # return the exception message
|
| 287 |
-
|
| 288 |
|
| 289 |
#return "\n".join(log for log in logs), "\n".join(str(path) for path in logs_files_images)
|
| 290 |
#print(f'logs_files_images: {"\n".join(str(path) for path in logs_files_images)}')
|
|
|
|
| 37 |
# User eXperience: Load Marker models ahead of time if not already loaded in reload mode
|
| 38 |
## SMY: 29Sept2025 - Came across https://github.com/xiaoyao9184/docker-marker/tree/master/gradio
|
| 39 |
from converters.extraction_converter import load_models
|
| 40 |
+
from globals import config_load_models
|
| 41 |
try:
|
| 42 |
+
if not config_load_models.model_dict:
|
| 43 |
+
config_load_models.model_dict = load_models()
|
| 44 |
+
'''if 'model_dict' not in globals():
|
| 45 |
+
global model_dict
|
| 46 |
+
model_dict = load_models()'''
|
| 47 |
except Exception as exc:
|
| 48 |
#tb = traceback.format_exc() #exc.__traceback__
|
| 49 |
logger.exception(f"✗ Error loading models (reload): {exc}") #\n{tb}")
|
|
|
|
| 58 |
oauth_token = oauth_token
|
| 59 |
else: get_token()
|
| 60 |
|
| 61 |
+
return oauth_token.token if oauth_token else '' ##token value or empty string
|
| 62 |
|
| 63 |
# pool executor to convert files called by Gradio
|
| 64 |
##SMY: TODO: future: refactor to gradio_process.py and
|
|
|
|
| 113 |
|
| 114 |
if is_loggedin_huggingface() and (api_token is None or api_token == ""):
|
| 115 |
api_token = get_token() ##SMY: might be redundant
|
| 116 |
+
|
| 117 |
+
elif login_huggingface(api_token):
|
| 118 |
+
# login: Update the Gradio UI to improve user-friendly eXperience
|
| 119 |
+
yield gr.update(interactive=False), f"login to HF: Processing files...", {"process": "Processing files"}, f"__init__.py"
|
| 120 |
else:
|
| 121 |
+
# login: Update the Gradio UI to improve user-friendly eXperience
|
| 122 |
+
yield gr.update(interactive=False), f"Not logged in to HF: Processing files...", {"process": "Processing files"}, f"__init__.py"
|
|
|
|
| 123 |
|
| 124 |
except Exception as exc: # Catch all exceptions
|
| 125 |
tb = traceback.format_exc()
|
| 126 |
logger.exception(f"✗ Error during login_huggingface → {exc}\n{tb}", exc_info=True) # Log the full traceback
|
| 127 |
+
return [gr.update(interactive=True), f"✗ An error occurred during login_huggingface → {exc}\n{tb}", {"Error":f"Error: {exc}"}, f"__init__.py"] # return the exception message
|
| 128 |
|
| 129 |
|
| 130 |
## debug
|
|
|
|
| 134 |
if not pdf_files or pdf_files is None: ## Check if files is None. This handles the case where no files are uploaded.
|
| 135 |
logger.log(level=30, msg="Initialising ProcessPool: No files uploaded.", extra={"pdf_files": pdf_files, "files_len": pdf_files_count})
|
| 136 |
#outputs=[log_output, files_individual_JSON, files_individual_downloads],
|
| 137 |
+
return [gr.update(interactive=True), "Initialising ProcessPool: No files uploaded.", {"Upload":"No files uploaded"}, f"__init__.py"]
|
| 138 |
|
| 139 |
# Get config values if not provided
|
| 140 |
config_file = find_file("config.ini") ##from file_handler.file_utils
|
|
|
|
| 239 |
tb = traceback.format_exc()
|
| 240 |
logger.exception(f"✗ Error during ProcessPoolExecutor → {exc}\n{tb}" , exc_info=True) # Log the full traceback
|
| 241 |
#traceback.print_exc() # Print the exception traceback
|
|
|
|
| 242 |
yield gr.update(interactive=True), f"✗ An error occurred during ProcessPoolExecutor→ {exc}\n{tb}", {"Error":f"Error: {exc}"}, f"__init__.py" # return the exception message
|
| 243 |
|
| 244 |
'''
|
|
|
|
| 251 |
|
| 252 |
# Zip Processed md Files and images. Insert to first index
|
| 253 |
try: ##from file_handler.file_utils
|
| 254 |
+
zipped_processed_files = zip_processed_files(root_dir=f"data/{output_dir_string}", file_paths=logs_files_images, tz_hours=tz_hours, date_format='%d%b%Y_%H-%M-%S') #date_format='%d%b%Y'
|
| 255 |
logs_files_images.insert(0, zipped_processed_files)
|
| 256 |
#logs_files_images.insert(1, "====================")
|
| 257 |
yield gr.update(interactive=False), f"Processing zip and files: {logs_files_images}", {"process": "Processing files"}, f"__init__.py"
|
|
|
|
| 279 |
|
| 280 |
#outputs=[process_button, log_output, files_individual_JSON, files_individual_downloads],
|
| 281 |
#return "\n".join(logs), "\n".join(logs_files_images) #"\n".join(logs_files)
|
| 282 |
+
|
| 283 |
+
yield gr.update(interactive=True), gr.update(value=logs_return_formatted_json_string), gr.update(value=logs_return_formatted_json_string, visible=True), gr.update(value=logs_files_images_return, visible=True)
|
| 284 |
+
return [gr.update(interactive=True), gr.update(value=logs_return_formatted_json_string), gr.update(value=logs_return_formatted_json_string, visible=True), gr.update(value=logs_files_images_return, visible=True)]
|
| 285 |
#yield gr.update(interactive=True), logs_return_formatted_json_string, logs_return_formatted_json_string, logs_files_images_return
|
| 286 |
+
#return [gr.update(interactive=True), logs_return_formatted_json_string, logs_return_formatted_json_string, logs_files_images_return]
|
| 287 |
|
| 288 |
except Exception as exc:
|
| 289 |
tb = traceback.format_exc()
|
| 290 |
logger.exception(f"✗ Error during returning result logs → {exc}\n{tb}" , exc_info=True) # Log the full traceback
|
| 291 |
#traceback.print_exc() # Print the exception traceback
|
| 292 |
+
#return [gr.update(interactive=True), f"✗ An error occurred during returning result logs→ {exc}\n{tb}", {"Error":f"Error: {exc}"}, f"__init__.py"] # return the exception message
|
| 293 |
+
yield [gr.update(interactive=True), f"✗ An error occurred during returning result logs→ {exc}\n{tb}", {"Error":f"Error: {exc}"}, f"__init__.py"] # return the exception message
|
| 294 |
|
| 295 |
#return "\n".join(log for log in logs), "\n".join(str(path) for path in logs_files_images)
|
| 296 |
#print(f'logs_files_images: {"\n".join(str(path) for path in logs_files_images)}')
|
utils/logger.py
CHANGED
|
@@ -30,13 +30,13 @@ class JsonFormatter(logging.Formatter):
|
|
| 30 |
##SMY: TODO: local time
|
| 31 |
self.tz_hours = tz_hours if tz_hours else 0
|
| 32 |
self.date_format = date_format
|
| 33 |
-
self.time = datetime.now(timezone.utc) + timedelta(hours=tz_hours if tz_hours else 0)
|
| 34 |
|
| 35 |
def format(self, record: logging.LogRecord) -> str: #
|
| 36 |
payload = {
|
| 37 |
#"ts": datetime.now(timezone.utc).isoformat(), ## default to 'YYYY-MM-DD HH:MM:SS.mmmmmm',
|
| 38 |
#"ts": datetime.now(timezone.utc).strftime("%Y-%m-%d : %H:%M:%S"), ## SMY: interested in datefmt="%H:%M:%S",
|
| 39 |
-
"ts": self.time.strftime(self.date_format), ## SMY: interested in datefmt="%H:%M:%S",
|
| 40 |
"level": record.levelname,
|
| 41 |
"logger": record.name,
|
| 42 |
"message": record.getMessage(),
|
|
|
|
| 30 |
##SMY: TODO: local time
|
| 31 |
self.tz_hours = tz_hours if tz_hours else 0
|
| 32 |
self.date_format = date_format
|
| 33 |
+
self.time = datetime.now(timezone.utc) + timedelta(hours=tz_hours if tz_hours else 0) ##SMY: TODO: fiz timedelta()
|
| 34 |
|
| 35 |
def format(self, record: logging.LogRecord) -> str: #
|
| 36 |
payload = {
|
| 37 |
#"ts": datetime.now(timezone.utc).isoformat(), ## default to 'YYYY-MM-DD HH:MM:SS.mmmmmm',
|
| 38 |
#"ts": datetime.now(timezone.utc).strftime("%Y-%m-%d : %H:%M:%S"), ## SMY: interested in datefmt="%H:%M:%S",
|
| 39 |
+
"ts": f"{self.time.strftime(self.date_format)} (UTC)", ## SMY: interested in datefmt="%H:%M:%S",
|
| 40 |
"level": record.levelname,
|
| 41 |
"logger": record.name,
|
| 42 |
"message": record.getMessage(),
|