zhimin-z commited on
Commit
2995bb4
·
1 Parent(s): fa6414e
Files changed (1) hide show
  1. msr.py +1 -10
msr.py CHANGED
@@ -306,14 +306,6 @@ def get_duckdb_connection():
306
  conn.execute(f"SET max_memory = '50GB';")
307
  conn.execute("SET temp_directory = '/tmp/duckdb_temp';")
308
 
309
- # GZIP PARALLEL DECOMPRESSION (only needed for .json.gz files)
310
- try:
311
- conn.execute("SET extension_directory = '/tmp/duckdb_ext';")
312
- conn.execute("INSTALL 'gzip';")
313
- conn.execute("LOAD 'gzip';")
314
- except Exception as e:
315
- print(f" ⚠ Warning: Could not load gzip extension: {e}")
316
-
317
  # PERFORMANCE OPTIMIZATIONS
318
  conn.execute("SET preserve_insertion_order = false;") # Disable expensive ordering
319
  conn.execute("SET enable_object_cache = true;") # Cache repeatedly read files
@@ -433,8 +425,7 @@ def fetch_all_metadata_streaming(conn, identifiers, start_date, end_date):
433
  filename=true,
434
  compression='gzip',
435
  format='newline_delimited',
436
- ignore_errors=true,
437
- maximum_object_size=2147483648
438
  )
439
  WHERE
440
  -- PushEvent: Commits by assistants
 
306
  conn.execute(f"SET max_memory = '50GB';")
307
  conn.execute("SET temp_directory = '/tmp/duckdb_temp';")
308
 
 
 
 
 
 
 
 
 
309
  # PERFORMANCE OPTIMIZATIONS
310
  conn.execute("SET preserve_insertion_order = false;") # Disable expensive ordering
311
  conn.execute("SET enable_object_cache = true;") # Cache repeatedly read files
 
425
  filename=true,
426
  compression='gzip',
427
  format='newline_delimited',
428
+ ignore_errors=true
 
429
  )
430
  WHERE
431
  -- PushEvent: Commits by assistants