gzdaniel commited on
Commit
7465cb0
·
1 Parent(s): f3b8c44

Fix timezone bugs for PostgreSQL vector storage

Browse files
Files changed (1) hide show
  1. lightrag/kg/postgres_impl.py +70 -11
lightrag/kg/postgres_impl.py CHANGED
@@ -106,7 +106,61 @@ class PostgreSQLDB:
106
  ):
107
  pass
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  async def check_tables(self):
 
110
  for k, v in TABLES.items():
111
  try:
112
  await self.query(f"SELECT 1 FROM {k} LIMIT 1")
@@ -142,6 +196,13 @@ class PostgreSQLDB:
142
  f"PostgreSQL, Failed to create index on table {k}, Got: {e}"
143
  )
144
 
 
 
 
 
 
 
 
145
  async def query(
146
  self,
147
  sql: str,
@@ -621,9 +682,7 @@ class PGVectorStorage(BaseVectorStorage):
621
  return
622
 
623
  # Get current time with UTC timezone
624
- current_time_with_tz = datetime.datetime.now(timezone.utc)
625
- # Remove timezone info to avoid timezone mismatch issues
626
- current_time = current_time_with_tz.replace(tzinfo=None)
627
  list_data = [
628
  {
629
  "__id__": k,
@@ -1055,8 +1114,8 @@ class PGDocStatusStorage(DocStatusStorage):
1055
  "chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
1056
  "status": v["status"],
1057
  "file_path": v["file_path"],
1058
- "created_at": created_at, # 使用转换后的datetime对象
1059
- "updated_at": updated_at, # 使用转换后的datetime对象
1060
  },
1061
  )
1062
 
@@ -2251,8 +2310,8 @@ TABLES = {
2251
  content TEXT,
2252
  content_vector VECTOR,
2253
  file_path VARCHAR(256),
2254
- create_time TIMESTAMP(0),
2255
- update_time TIMESTAMP(0),
2256
  CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id)
2257
  )"""
2258
  },
@@ -2263,8 +2322,8 @@ TABLES = {
2263
  entity_name VARCHAR(255),
2264
  content TEXT,
2265
  content_vector VECTOR,
2266
- create_time TIMESTAMP(0),
2267
- update_time TIMESTAMP(0),
2268
  chunk_ids VARCHAR(255)[] NULL,
2269
  file_path TEXT NULL,
2270
  CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
@@ -2278,8 +2337,8 @@ TABLES = {
2278
  target_id VARCHAR(256),
2279
  content TEXT,
2280
  content_vector VECTOR,
2281
- create_time TIMESTAMP(0),
2282
- update_time TIMESTAMP(0),
2283
  chunk_ids VARCHAR(255)[] NULL,
2284
  file_path TEXT NULL,
2285
  CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
 
106
  ):
107
  pass
108
 
109
+ async def _migrate_timestamp_columns(self):
110
+ """Migrate timestamp columns in tables to timezone-aware types, assuming original data is in UTC time"""
111
+ # Tables and columns that need migration
112
+ tables_to_migrate = {
113
+ "LIGHTRAG_VDB_ENTITY": ["create_time", "update_time"],
114
+ "LIGHTRAG_VDB_RELATION": ["create_time", "update_time"],
115
+ "LIGHTRAG_DOC_CHUNKS": ["create_time", "update_time"],
116
+ }
117
+
118
+ for table_name, columns in tables_to_migrate.items():
119
+ for column_name in columns:
120
+ try:
121
+ # Check if column exists
122
+ check_column_sql = f"""
123
+ SELECT column_name, data_type
124
+ FROM information_schema.columns
125
+ WHERE table_name = '{table_name.lower()}'
126
+ AND column_name = '{column_name}'
127
+ """
128
+
129
+ column_info = await self.query(check_column_sql)
130
+ if not column_info:
131
+ logger.warning(
132
+ f"Column {table_name}.{column_name} does not exist, skipping migration"
133
+ )
134
+ continue
135
+
136
+ # Check column type
137
+ data_type = column_info.get("data_type")
138
+ if data_type == "timestamp with time zone":
139
+ logger.info(
140
+ f"Column {table_name}.{column_name} is already timezone-aware, no migration needed"
141
+ )
142
+ continue
143
+
144
+ # Execute migration, explicitly specifying UTC timezone for interpreting original data
145
+ logger.info(
146
+ f"Migrating {table_name}.{column_name} to timezone-aware type"
147
+ )
148
+ migration_sql = f"""
149
+ ALTER TABLE {table_name}
150
+ ALTER COLUMN {column_name} TYPE TIMESTAMP(0) WITH TIME ZONE
151
+ USING {column_name} AT TIME ZONE 'UTC'
152
+ """
153
+
154
+ await self.execute(migration_sql)
155
+ logger.info(
156
+ f"Successfully migrated {table_name}.{column_name} to timezone-aware type"
157
+ )
158
+ except Exception as e:
159
+ # Log error but don't interrupt the process
160
+ logger.warning(f"Failed to migrate {table_name}.{column_name}: {e}")
161
+
162
  async def check_tables(self):
163
+ # First create all tables
164
  for k, v in TABLES.items():
165
  try:
166
  await self.query(f"SELECT 1 FROM {k} LIMIT 1")
 
196
  f"PostgreSQL, Failed to create index on table {k}, Got: {e}"
197
  )
198
 
199
+ # After all tables are created, attempt to migrate timestamp fields
200
+ try:
201
+ await self._migrate_timestamp_columns()
202
+ except Exception as e:
203
+ logger.error(f"PostgreSQL, Failed to migrate timestamp columns: {e}")
204
+ # Don't throw an exception, allow the initialization process to continue
205
+
206
  async def query(
207
  self,
208
  sql: str,
 
682
  return
683
 
684
  # Get current time with UTC timezone
685
+ current_time = datetime.datetime.now(timezone.utc)
 
 
686
  list_data = [
687
  {
688
  "__id__": k,
 
1114
  "chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
1115
  "status": v["status"],
1116
  "file_path": v["file_path"],
1117
+ "created_at": created_at, # Use the converted datetime object
1118
+ "updated_at": updated_at, # Use the converted datetime object
1119
  },
1120
  )
1121
 
 
2310
  content TEXT,
2311
  content_vector VECTOR,
2312
  file_path VARCHAR(256),
2313
+ create_time TIMESTAMP(0) WITH TIME ZONE,
2314
+ update_time TIMESTAMP(0) WITH TIME ZONE,
2315
  CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id)
2316
  )"""
2317
  },
 
2322
  entity_name VARCHAR(255),
2323
  content TEXT,
2324
  content_vector VECTOR,
2325
+ create_time TIMESTAMP(0) WITH TIME ZONE,
2326
+ update_time TIMESTAMP(0) WITH TIME ZONE,
2327
  chunk_ids VARCHAR(255)[] NULL,
2328
  file_path TEXT NULL,
2329
  CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
 
2337
  target_id VARCHAR(256),
2338
  content TEXT,
2339
  content_vector VECTOR,
2340
+ create_time TIMESTAMP(0) WITH TIME ZONE,
2341
+ update_time TIMESTAMP(0) WITH TIME ZONE,
2342
  chunk_ids VARCHAR(255)[] NULL,
2343
  file_path TEXT NULL,
2344
  CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)