Fix timezone bugs for PostgreSQL vector storage
Browse files- lightrag/kg/postgres_impl.py +70 -11
lightrag/kg/postgres_impl.py
CHANGED
|
@@ -106,7 +106,61 @@ class PostgreSQLDB:
|
|
| 106 |
):
|
| 107 |
pass
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
async def check_tables(self):
|
|
|
|
| 110 |
for k, v in TABLES.items():
|
| 111 |
try:
|
| 112 |
await self.query(f"SELECT 1 FROM {k} LIMIT 1")
|
|
@@ -142,6 +196,13 @@ class PostgreSQLDB:
|
|
| 142 |
f"PostgreSQL, Failed to create index on table {k}, Got: {e}"
|
| 143 |
)
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
async def query(
|
| 146 |
self,
|
| 147 |
sql: str,
|
|
@@ -621,9 +682,7 @@ class PGVectorStorage(BaseVectorStorage):
|
|
| 621 |
return
|
| 622 |
|
| 623 |
# Get current time with UTC timezone
|
| 624 |
-
|
| 625 |
-
# Remove timezone info to avoid timezone mismatch issues
|
| 626 |
-
current_time = current_time_with_tz.replace(tzinfo=None)
|
| 627 |
list_data = [
|
| 628 |
{
|
| 629 |
"__id__": k,
|
|
@@ -1055,8 +1114,8 @@ class PGDocStatusStorage(DocStatusStorage):
|
|
| 1055 |
"chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
|
| 1056 |
"status": v["status"],
|
| 1057 |
"file_path": v["file_path"],
|
| 1058 |
-
"created_at": created_at, #
|
| 1059 |
-
"updated_at": updated_at, #
|
| 1060 |
},
|
| 1061 |
)
|
| 1062 |
|
|
@@ -2251,8 +2310,8 @@ TABLES = {
|
|
| 2251 |
content TEXT,
|
| 2252 |
content_vector VECTOR,
|
| 2253 |
file_path VARCHAR(256),
|
| 2254 |
-
create_time TIMESTAMP(0),
|
| 2255 |
-
update_time TIMESTAMP(0),
|
| 2256 |
CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id)
|
| 2257 |
)"""
|
| 2258 |
},
|
|
@@ -2263,8 +2322,8 @@ TABLES = {
|
|
| 2263 |
entity_name VARCHAR(255),
|
| 2264 |
content TEXT,
|
| 2265 |
content_vector VECTOR,
|
| 2266 |
-
create_time TIMESTAMP(0),
|
| 2267 |
-
update_time TIMESTAMP(0),
|
| 2268 |
chunk_ids VARCHAR(255)[] NULL,
|
| 2269 |
file_path TEXT NULL,
|
| 2270 |
CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
|
|
@@ -2278,8 +2337,8 @@ TABLES = {
|
|
| 2278 |
target_id VARCHAR(256),
|
| 2279 |
content TEXT,
|
| 2280 |
content_vector VECTOR,
|
| 2281 |
-
create_time TIMESTAMP(0),
|
| 2282 |
-
update_time TIMESTAMP(0),
|
| 2283 |
chunk_ids VARCHAR(255)[] NULL,
|
| 2284 |
file_path TEXT NULL,
|
| 2285 |
CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
|
|
|
|
| 106 |
):
|
| 107 |
pass
|
| 108 |
|
| 109 |
+
async def _migrate_timestamp_columns(self):
|
| 110 |
+
"""Migrate timestamp columns in tables to timezone-aware types, assuming original data is in UTC time"""
|
| 111 |
+
# Tables and columns that need migration
|
| 112 |
+
tables_to_migrate = {
|
| 113 |
+
"LIGHTRAG_VDB_ENTITY": ["create_time", "update_time"],
|
| 114 |
+
"LIGHTRAG_VDB_RELATION": ["create_time", "update_time"],
|
| 115 |
+
"LIGHTRAG_DOC_CHUNKS": ["create_time", "update_time"],
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
for table_name, columns in tables_to_migrate.items():
|
| 119 |
+
for column_name in columns:
|
| 120 |
+
try:
|
| 121 |
+
# Check if column exists
|
| 122 |
+
check_column_sql = f"""
|
| 123 |
+
SELECT column_name, data_type
|
| 124 |
+
FROM information_schema.columns
|
| 125 |
+
WHERE table_name = '{table_name.lower()}'
|
| 126 |
+
AND column_name = '{column_name}'
|
| 127 |
+
"""
|
| 128 |
+
|
| 129 |
+
column_info = await self.query(check_column_sql)
|
| 130 |
+
if not column_info:
|
| 131 |
+
logger.warning(
|
| 132 |
+
f"Column {table_name}.{column_name} does not exist, skipping migration"
|
| 133 |
+
)
|
| 134 |
+
continue
|
| 135 |
+
|
| 136 |
+
# Check column type
|
| 137 |
+
data_type = column_info.get("data_type")
|
| 138 |
+
if data_type == "timestamp with time zone":
|
| 139 |
+
logger.info(
|
| 140 |
+
f"Column {table_name}.{column_name} is already timezone-aware, no migration needed"
|
| 141 |
+
)
|
| 142 |
+
continue
|
| 143 |
+
|
| 144 |
+
# Execute migration, explicitly specifying UTC timezone for interpreting original data
|
| 145 |
+
logger.info(
|
| 146 |
+
f"Migrating {table_name}.{column_name} to timezone-aware type"
|
| 147 |
+
)
|
| 148 |
+
migration_sql = f"""
|
| 149 |
+
ALTER TABLE {table_name}
|
| 150 |
+
ALTER COLUMN {column_name} TYPE TIMESTAMP(0) WITH TIME ZONE
|
| 151 |
+
USING {column_name} AT TIME ZONE 'UTC'
|
| 152 |
+
"""
|
| 153 |
+
|
| 154 |
+
await self.execute(migration_sql)
|
| 155 |
+
logger.info(
|
| 156 |
+
f"Successfully migrated {table_name}.{column_name} to timezone-aware type"
|
| 157 |
+
)
|
| 158 |
+
except Exception as e:
|
| 159 |
+
# Log error but don't interrupt the process
|
| 160 |
+
logger.warning(f"Failed to migrate {table_name}.{column_name}: {e}")
|
| 161 |
+
|
| 162 |
async def check_tables(self):
|
| 163 |
+
# First create all tables
|
| 164 |
for k, v in TABLES.items():
|
| 165 |
try:
|
| 166 |
await self.query(f"SELECT 1 FROM {k} LIMIT 1")
|
|
|
|
| 196 |
f"PostgreSQL, Failed to create index on table {k}, Got: {e}"
|
| 197 |
)
|
| 198 |
|
| 199 |
+
# After all tables are created, attempt to migrate timestamp fields
|
| 200 |
+
try:
|
| 201 |
+
await self._migrate_timestamp_columns()
|
| 202 |
+
except Exception as e:
|
| 203 |
+
logger.error(f"PostgreSQL, Failed to migrate timestamp columns: {e}")
|
| 204 |
+
# Don't throw an exception, allow the initialization process to continue
|
| 205 |
+
|
| 206 |
async def query(
|
| 207 |
self,
|
| 208 |
sql: str,
|
|
|
|
| 682 |
return
|
| 683 |
|
| 684 |
# Get current time with UTC timezone
|
| 685 |
+
current_time = datetime.datetime.now(timezone.utc)
|
|
|
|
|
|
|
| 686 |
list_data = [
|
| 687 |
{
|
| 688 |
"__id__": k,
|
|
|
|
| 1114 |
"chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
|
| 1115 |
"status": v["status"],
|
| 1116 |
"file_path": v["file_path"],
|
| 1117 |
+
"created_at": created_at, # Use the converted datetime object
|
| 1118 |
+
"updated_at": updated_at, # Use the converted datetime object
|
| 1119 |
},
|
| 1120 |
)
|
| 1121 |
|
|
|
|
| 2310 |
content TEXT,
|
| 2311 |
content_vector VECTOR,
|
| 2312 |
file_path VARCHAR(256),
|
| 2313 |
+
create_time TIMESTAMP(0) WITH TIME ZONE,
|
| 2314 |
+
update_time TIMESTAMP(0) WITH TIME ZONE,
|
| 2315 |
CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id)
|
| 2316 |
)"""
|
| 2317 |
},
|
|
|
|
| 2322 |
entity_name VARCHAR(255),
|
| 2323 |
content TEXT,
|
| 2324 |
content_vector VECTOR,
|
| 2325 |
+
create_time TIMESTAMP(0) WITH TIME ZONE,
|
| 2326 |
+
update_time TIMESTAMP(0) WITH TIME ZONE,
|
| 2327 |
chunk_ids VARCHAR(255)[] NULL,
|
| 2328 |
file_path TEXT NULL,
|
| 2329 |
CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
|
|
|
|
| 2337 |
target_id VARCHAR(256),
|
| 2338 |
content TEXT,
|
| 2339 |
content_vector VECTOR,
|
| 2340 |
+
create_time TIMESTAMP(0) WITH TIME ZONE,
|
| 2341 |
+
update_time TIMESTAMP(0) WITH TIME ZONE,
|
| 2342 |
chunk_ids VARCHAR(255)[] NULL,
|
| 2343 |
file_path TEXT NULL,
|
| 2344 |
CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
|