Spaces:
Running
Running
use @carlopi's trick to fake glob expansion
Browse fileshttps://github.com/duckdb/duckdb/pull/10051#issuecomment-1865825012
(h/t
@severo
)
- docs/data/presse.parquet.sh +3 -11
docs/data/presse.parquet.sh
CHANGED
@@ -5,19 +5,11 @@ CREATE TABLE presse AS (
|
|
5 |
SELECT title
|
6 |
, author
|
7 |
, LPAD((REGEXP_EXTRACT(date, '1[0-9][0-9][0-9]') || '-01-01'), 10, '0')::DATE AS year
|
8 |
-
FROM read_parquet(
|
9 |
-
|
10 |
-
for i in $(seq 1 320); do
|
11 |
-
echo " 'https://huggingface.co/datasets/PleIAs/French-PD-Newspapers/resolve/main/gallica_presse_$i.parquet'," >> $TMPDIR/presse.sql
|
12 |
-
done
|
13 |
-
|
14 |
-
echo """ ])
|
15 |
);
|
16 |
-
|
17 |
COPY presse TO '$TMPDIR/presse.parquet' (FORMAT 'parquet', COMPRESSION 'GZIP');
|
18 |
-
"""
|
19 |
-
|
20 |
-
duckdb < $TMPDIR/presse.sql
|
21 |
|
22 |
# isatty
|
23 |
if [ -t 1 ]; then
|
|
|
5 |
SELECT title
|
6 |
, author
|
7 |
, LPAD((REGEXP_EXTRACT(date, '1[0-9][0-9][0-9]') || '-01-01'), 10, '0')::DATE AS year
|
8 |
+
FROM read_parquet(
|
9 |
+
[('https://huggingface.co/datasets/PleIAs/French-PD-Newspapers/resolve/main/gallica_presse_{:d}.parquet').format(n) for n in range(1, 321)])
|
|
|
|
|
|
|
|
|
|
|
10 |
);
|
|
|
11 |
COPY presse TO '$TMPDIR/presse.parquet' (FORMAT 'parquet', COMPRESSION 'GZIP');
|
12 |
+
""" | duckdb
|
|
|
|
|
13 |
|
14 |
# isatty
|
15 |
if [ -t 1 ]; then
|