yangdx commited on
Commit
3a48e39
·
1 Parent(s): dc0c15b

Refactor logging setup and simplify Gunicorn configuration

Browse files

• Move logging setup code to utils.py
• Provide setup_logger for standalone LightRAG logger intialization

lightrag/api/gunicorn_config.py CHANGED
@@ -2,12 +2,15 @@
2
  import os
3
  import logging
4
  from lightrag.kg.shared_storage import finalize_share_data
5
- from lightrag.api.lightrag_server import LightragPathFilter
6
 
7
  # Get log directory path from environment variable
8
  log_dir = os.getenv("LOG_DIR", os.getcwd())
9
  log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
10
 
 
 
 
11
  # Get log file max size and backup count from environment variables
12
  log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
13
  log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
@@ -108,6 +111,9 @@ def on_starting(server):
108
  except ImportError:
109
  print("psutil not installed, skipping memory usage reporting")
110
 
 
 
 
111
  print("Gunicorn initialization complete, forking workers...\n")
112
 
113
 
@@ -134,51 +140,18 @@ def post_fork(server, worker):
134
  Executed after a worker has been forked.
135
  This is a good place to set up worker-specific configurations.
136
  """
137
- # Configure formatters
138
- detailed_formatter = logging.Formatter(
139
- "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
140
- )
141
- simple_formatter = logging.Formatter("%(levelname)s: %(message)s")
142
-
143
- def setup_logger(logger_name: str, level: str = "INFO", add_filter: bool = False):
144
- """Set up a logger with console and file handlers"""
145
- logger_instance = logging.getLogger(logger_name)
146
- logger_instance.setLevel(level)
147
- logger_instance.handlers = [] # Clear existing handlers
148
- logger_instance.propagate = False
149
-
150
- # Add console handler
151
- console_handler = logging.StreamHandler()
152
- console_handler.setFormatter(simple_formatter)
153
- console_handler.setLevel(level)
154
- logger_instance.addHandler(console_handler)
155
-
156
- # Add file handler
157
- file_handler = logging.handlers.RotatingFileHandler(
158
- filename=log_file_path,
159
- maxBytes=log_max_bytes,
160
- backupCount=log_backup_count,
161
- encoding="utf-8",
162
- )
163
- file_handler.setFormatter(detailed_formatter)
164
- file_handler.setLevel(level)
165
- logger_instance.addHandler(file_handler)
166
-
167
- # Add path filter if requested
168
- if add_filter:
169
- path_filter = LightragPathFilter()
170
- logger_instance.addFilter(path_filter)
171
-
172
  # Set up main loggers
173
  log_level = loglevel.upper() if loglevel else "INFO"
174
- setup_logger("uvicorn", log_level)
175
- setup_logger("uvicorn.access", log_level, add_filter=True)
176
- setup_logger("lightrag", log_level, add_filter=True)
 
 
177
 
178
  # Set up lightrag submodule loggers
179
  for name in logging.root.manager.loggerDict:
180
  if name.startswith("lightrag."):
181
- setup_logger(name, log_level, add_filter=True)
182
 
183
  # Disable uvicorn.error logger
184
  uvicorn_error_logger = logging.getLogger("uvicorn.error")
 
2
  import os
3
  import logging
4
  from lightrag.kg.shared_storage import finalize_share_data
5
+ from lightrag.utils import setup_logger
6
 
7
  # Get log directory path from environment variable
8
  log_dir = os.getenv("LOG_DIR", os.getcwd())
9
  log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
10
 
11
+ # Ensure log directory exists
12
+ os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
13
+
14
  # Get log file max size and backup count from environment variables
15
  log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
16
  log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
 
111
  except ImportError:
112
  print("psutil not installed, skipping memory usage reporting")
113
 
114
+ # Log the location of the LightRAG log file
115
+ print(f"LightRAG log file: {log_file_path}\n")
116
+
117
  print("Gunicorn initialization complete, forking workers...\n")
118
 
119
 
 
140
  Executed after a worker has been forked.
141
  This is a good place to set up worker-specific configurations.
142
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  # Set up main loggers
144
  log_level = loglevel.upper() if loglevel else "INFO"
145
+ setup_logger("uvicorn", log_level, add_filter=False, log_file_path=log_file_path)
146
+ setup_logger(
147
+ "uvicorn.access", log_level, add_filter=True, log_file_path=log_file_path
148
+ )
149
+ setup_logger("lightrag", log_level, add_filter=True, log_file_path=log_file_path)
150
 
151
  # Set up lightrag submodule loggers
152
  for name in logging.root.manager.loggerDict:
153
  if name.startswith("lightrag."):
154
+ setup_logger(name, log_level, add_filter=True, log_file_path=log_file_path)
155
 
156
  # Disable uvicorn.error logger
157
  uvicorn_error_logger = logging.getLogger("uvicorn.error")
lightrag/api/lightrag_server.py CHANGED
@@ -437,6 +437,9 @@ def configure_logging():
437
  log_dir = os.getenv("LOG_DIR", os.getcwd())
438
  log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
439
 
 
 
 
440
  # Get log file max size and backup count from environment variables
441
  log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
442
  log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
 
437
  log_dir = os.getenv("LOG_DIR", os.getcwd())
438
  log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
439
 
440
+ print(f"\nLightRAG log file: {log_file_path}\n")
441
+ os.makedirs(os.path.dirname(log_dir), exist_ok=True)
442
+
443
  # Get log file max size and backup count from environment variables
444
  log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
445
  log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
lightrag/lightrag.py CHANGED
@@ -266,9 +266,6 @@ class LightRAG:
266
  _storages_status: StoragesStatus = field(default=StoragesStatus.NOT_CREATED)
267
 
268
  def __post_init__(self):
269
- os.makedirs(os.path.dirname(self.log_file_path), exist_ok=True)
270
- logger.info(f"Logger initialized for working directory: {self.working_dir}")
271
-
272
  from lightrag.kg.shared_storage import (
273
  initialize_share_data,
274
  )
 
266
  _storages_status: StoragesStatus = field(default=StoragesStatus.NOT_CREATED)
267
 
268
  def __post_init__(self):
 
 
 
269
  from lightrag.kg.shared_storage import (
270
  initialize_share_data,
271
  )
lightrag/utils.py CHANGED
@@ -6,6 +6,7 @@ import io
6
  import csv
7
  import json
8
  import logging
 
9
  import os
10
  import re
11
  from dataclasses import dataclass
@@ -68,6 +69,101 @@ logger.setLevel(logging.INFO)
68
  logging.getLogger("httpx").setLevel(logging.WARNING)
69
 
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  class UnlimitedSemaphore:
72
  """A context manager that allows unlimited access."""
73
 
 
6
  import csv
7
  import json
8
  import logging
9
+ import logging.handlers
10
  import os
11
  import re
12
  from dataclasses import dataclass
 
69
  logging.getLogger("httpx").setLevel(logging.WARNING)
70
 
71
 
72
+ class LightragPathFilter(logging.Filter):
73
+ """Filter for lightrag logger to filter out frequent path access logs"""
74
+
75
+ def __init__(self):
76
+ super().__init__()
77
+ # Define paths to be filtered
78
+ self.filtered_paths = ["/documents", "/health", "/webui/"]
79
+
80
+ def filter(self, record):
81
+ try:
82
+ # Check if record has the required attributes for an access log
83
+ if not hasattr(record, "args") or not isinstance(record.args, tuple):
84
+ return True
85
+ if len(record.args) < 5:
86
+ return True
87
+
88
+ # Extract method, path and status from the record args
89
+ method = record.args[1]
90
+ path = record.args[2]
91
+ status = record.args[4]
92
+
93
+ # Filter out successful GET requests to filtered paths
94
+ if (
95
+ method == "GET"
96
+ and (status == 200 or status == 304)
97
+ and path in self.filtered_paths
98
+ ):
99
+ return False
100
+
101
+ return True
102
+ except Exception:
103
+ # In case of any error, let the message through
104
+ return True
105
+
106
+
107
+ def setup_logger(
108
+ logger_name: str,
109
+ level: str = "INFO",
110
+ add_filter: bool = False,
111
+ log_file_path: str = None,
112
+ ):
113
+ """Set up a logger with console and file handlers
114
+
115
+ Args:
116
+ logger_name: Name of the logger to set up
117
+ level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
118
+ add_filter: Whether to add LightragPathFilter to the logger
119
+ log_file_path: Path to the log file. If None, will use current directory/lightrag.log
120
+ """
121
+ # Configure formatters
122
+ detailed_formatter = logging.Formatter(
123
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
124
+ )
125
+ simple_formatter = logging.Formatter("%(levelname)s: %(message)s")
126
+
127
+ # Get log file path
128
+ if log_file_path is None:
129
+ log_dir = os.getenv("LOG_DIR", os.getcwd())
130
+ log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
131
+
132
+ # Ensure log directory exists
133
+ os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
134
+
135
+ # Get log file max size and backup count from environment variables
136
+ log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
137
+ log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
138
+
139
+ logger_instance = logging.getLogger(logger_name)
140
+ logger_instance.setLevel(level)
141
+ logger_instance.handlers = [] # Clear existing handlers
142
+ logger_instance.propagate = False
143
+
144
+ # Add console handler
145
+ console_handler = logging.StreamHandler()
146
+ console_handler.setFormatter(simple_formatter)
147
+ console_handler.setLevel(level)
148
+ logger_instance.addHandler(console_handler)
149
+
150
+ # Add file handler
151
+ file_handler = logging.handlers.RotatingFileHandler(
152
+ filename=log_file_path,
153
+ maxBytes=log_max_bytes,
154
+ backupCount=log_backup_count,
155
+ encoding="utf-8",
156
+ )
157
+ file_handler.setFormatter(detailed_formatter)
158
+ file_handler.setLevel(level)
159
+ logger_instance.addHandler(file_handler)
160
+
161
+ # Add path filter if requested
162
+ if add_filter:
163
+ path_filter = LightragPathFilter()
164
+ logger_instance.addFilter(path_filter)
165
+
166
+
167
  class UnlimitedSemaphore:
168
  """A context manager that allows unlimited access."""
169
 
run_with_gunicorn.py DELETED
@@ -1,203 +0,0 @@
1
- #!/usr/bin/env python
2
- """
3
- Start LightRAG server with Gunicorn
4
- """
5
-
6
- import os
7
- import sys
8
- import signal
9
- import pipmaster as pm
10
- from lightrag.api.utils_api import parse_args, display_splash_screen
11
- from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data
12
-
13
-
14
- def check_and_install_dependencies():
15
- """Check and install required dependencies"""
16
- required_packages = [
17
- "gunicorn",
18
- "tiktoken",
19
- "psutil",
20
- # Add other required packages here
21
- ]
22
-
23
- for package in required_packages:
24
- if not pm.is_installed(package):
25
- print(f"Installing {package}...")
26
- pm.install(package)
27
- print(f"{package} installed successfully")
28
-
29
-
30
- # Signal handler for graceful shutdown
31
- def signal_handler(sig, frame):
32
- print("\n\n" + "=" * 80)
33
- print("RECEIVED TERMINATION SIGNAL")
34
- print(f"Process ID: {os.getpid()}")
35
- print("=" * 80 + "\n")
36
-
37
- # Release shared resources
38
- finalize_share_data()
39
-
40
- # Exit with success status
41
- sys.exit(0)
42
-
43
-
44
- def main():
45
- # Check and install dependencies
46
- check_and_install_dependencies()
47
-
48
- # Register signal handlers for graceful shutdown
49
- signal.signal(signal.SIGINT, signal_handler) # Ctrl+C
50
- signal.signal(signal.SIGTERM, signal_handler) # kill command
51
-
52
- # Parse all arguments using parse_args
53
- args = parse_args(is_uvicorn_mode=False)
54
-
55
- # Display startup information
56
- display_splash_screen(args)
57
-
58
- print("🚀 Starting LightRAG with Gunicorn")
59
- print(f"🔄 Worker management: Gunicorn (workers={args.workers})")
60
- print("🔍 Preloading app: Enabled")
61
- print("📝 Note: Using Gunicorn's preload feature for shared data initialization")
62
- print("\n\n" + "=" * 80)
63
- print("MAIN PROCESS INITIALIZATION")
64
- print(f"Process ID: {os.getpid()}")
65
- print(f"Workers setting: {args.workers}")
66
- print("=" * 80 + "\n")
67
-
68
- # Import Gunicorn's StandaloneApplication
69
- from gunicorn.app.base import BaseApplication
70
-
71
- # Define a custom application class that loads our config
72
- class GunicornApp(BaseApplication):
73
- def __init__(self, app, options=None):
74
- self.options = options or {}
75
- self.application = app
76
- super().__init__()
77
-
78
- def load_config(self):
79
- # Define valid Gunicorn configuration options
80
- valid_options = {
81
- "bind",
82
- "workers",
83
- "worker_class",
84
- "timeout",
85
- "keepalive",
86
- "preload_app",
87
- "errorlog",
88
- "accesslog",
89
- "loglevel",
90
- "certfile",
91
- "keyfile",
92
- "limit_request_line",
93
- "limit_request_fields",
94
- "limit_request_field_size",
95
- "graceful_timeout",
96
- "max_requests",
97
- "max_requests_jitter",
98
- }
99
-
100
- # Special hooks that need to be set separately
101
- special_hooks = {
102
- "on_starting",
103
- "on_reload",
104
- "on_exit",
105
- "pre_fork",
106
- "post_fork",
107
- "pre_exec",
108
- "pre_request",
109
- "post_request",
110
- "worker_init",
111
- "worker_exit",
112
- "nworkers_changed",
113
- "child_exit",
114
- }
115
-
116
- # Import and configure the gunicorn_config module
117
- import gunicorn_config
118
-
119
- # Set configuration variables in gunicorn_config, prioritizing command line arguments
120
- gunicorn_config.workers = (
121
- args.workers if args.workers else int(os.getenv("WORKERS", 1))
122
- )
123
-
124
- # Bind configuration prioritizes command line arguments
125
- host = args.host if args.host != "0.0.0.0" else os.getenv("HOST", "0.0.0.0")
126
- port = args.port if args.port != 9621 else int(os.getenv("PORT", 9621))
127
- gunicorn_config.bind = f"{host}:{port}"
128
-
129
- # Log level configuration prioritizes command line arguments
130
- gunicorn_config.loglevel = (
131
- args.log_level.lower()
132
- if args.log_level
133
- else os.getenv("LOG_LEVEL", "info")
134
- )
135
-
136
- # Timeout configuration prioritizes command line arguments
137
- gunicorn_config.timeout = (
138
- args.timeout if args.timeout else int(os.getenv("TIMEOUT", 150))
139
- )
140
-
141
- # Keepalive configuration
142
- gunicorn_config.keepalive = int(os.getenv("KEEPALIVE", 5))
143
-
144
- # SSL configuration prioritizes command line arguments
145
- if args.ssl or os.getenv("SSL", "").lower() in (
146
- "true",
147
- "1",
148
- "yes",
149
- "t",
150
- "on",
151
- ):
152
- gunicorn_config.certfile = (
153
- args.ssl_certfile
154
- if args.ssl_certfile
155
- else os.getenv("SSL_CERTFILE")
156
- )
157
- gunicorn_config.keyfile = (
158
- args.ssl_keyfile if args.ssl_keyfile else os.getenv("SSL_KEYFILE")
159
- )
160
-
161
- # Set configuration options from the module
162
- for key in dir(gunicorn_config):
163
- if key in valid_options:
164
- value = getattr(gunicorn_config, key)
165
- # Skip functions like on_starting and None values
166
- if not callable(value) and value is not None:
167
- self.cfg.set(key, value)
168
- # Set special hooks
169
- elif key in special_hooks:
170
- value = getattr(gunicorn_config, key)
171
- if callable(value):
172
- self.cfg.set(key, value)
173
-
174
- if hasattr(gunicorn_config, "logconfig_dict"):
175
- self.cfg.set(
176
- "logconfig_dict", getattr(gunicorn_config, "logconfig_dict")
177
- )
178
-
179
- def load(self):
180
- # Import the application
181
- from lightrag.api.lightrag_server import get_application
182
-
183
- return get_application(args)
184
-
185
- # Create the application
186
- app = GunicornApp("")
187
-
188
- # Force workers to be an integer and greater than 1 for multi-process mode
189
- workers_count = int(args.workers)
190
- if workers_count > 1:
191
- # Set a flag to indicate we're in the main process
192
- os.environ["LIGHTRAG_MAIN_PROCESS"] = "1"
193
- initialize_share_data(workers_count)
194
- else:
195
- initialize_share_data(1)
196
-
197
- # Run the application
198
- print("\nStarting Gunicorn with direct Python API...")
199
- app.run()
200
-
201
-
202
- if __name__ == "__main__":
203
- main()