zhimin-z
commited on
Commit
·
2995bb4
1
Parent(s):
fa6414e
fix
Browse files
msr.py
CHANGED
|
@@ -306,14 +306,6 @@ def get_duckdb_connection():
|
|
| 306 |
conn.execute(f"SET max_memory = '50GB';")
|
| 307 |
conn.execute("SET temp_directory = '/tmp/duckdb_temp';")
|
| 308 |
|
| 309 |
-
# GZIP PARALLEL DECOMPRESSION (only needed for .json.gz files)
|
| 310 |
-
try:
|
| 311 |
-
conn.execute("SET extension_directory = '/tmp/duckdb_ext';")
|
| 312 |
-
conn.execute("INSTALL 'gzip';")
|
| 313 |
-
conn.execute("LOAD 'gzip';")
|
| 314 |
-
except Exception as e:
|
| 315 |
-
print(f" ⚠ Warning: Could not load gzip extension: {e}")
|
| 316 |
-
|
| 317 |
# PERFORMANCE OPTIMIZATIONS
|
| 318 |
conn.execute("SET preserve_insertion_order = false;") # Disable expensive ordering
|
| 319 |
conn.execute("SET enable_object_cache = true;") # Cache repeatedly read files
|
|
@@ -433,8 +425,7 @@ def fetch_all_metadata_streaming(conn, identifiers, start_date, end_date):
|
|
| 433 |
filename=true,
|
| 434 |
compression='gzip',
|
| 435 |
format='newline_delimited',
|
| 436 |
-
ignore_errors=true
|
| 437 |
-
maximum_object_size=2147483648
|
| 438 |
)
|
| 439 |
WHERE
|
| 440 |
-- PushEvent: Commits by assistants
|
|
|
|
| 306 |
conn.execute(f"SET max_memory = '50GB';")
|
| 307 |
conn.execute("SET temp_directory = '/tmp/duckdb_temp';")
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
# PERFORMANCE OPTIMIZATIONS
|
| 310 |
conn.execute("SET preserve_insertion_order = false;") # Disable expensive ordering
|
| 311 |
conn.execute("SET enable_object_cache = true;") # Cache repeatedly read files
|
|
|
|
| 425 |
filename=true,
|
| 426 |
compression='gzip',
|
| 427 |
format='newline_delimited',
|
| 428 |
+
ignore_errors=true
|
|
|
|
| 429 |
)
|
| 430 |
WHERE
|
| 431 |
-- PushEvent: Commits by assistants
|