Spaces:
Running
Running
squirrel cache
Browse filessquirrel cache into node_modules/ which is cached by default across builds
- docs/data/presse.parquet.sh +21 -22
- vercel.json +4 -1
docs/data/presse.parquet.sh
CHANGED
@@ -1,30 +1,29 @@
|
|
1 |
# Use "eleventy" .cache to store our temp files
|
2 |
-
export TMPDIR="
|
3 |
mkdir -p $TMPDIR
|
4 |
|
5 |
-
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
fi
|
28 |
|
29 |
# isatty
|
30 |
if [ -t 1 ]; then
|
@@ -32,5 +31,5 @@ if [ -t 1 ]; then
|
|
32 |
echo "duckdb -csv :memory: \"SELECT * FROM '$TMPDIR/presse.parquet'\""
|
33 |
else
|
34 |
cat $TMPDIR/presse.parquet
|
35 |
-
|
36 |
fi
|
|
|
1 |
# Use "eleventy" .cache to store our temp files
|
2 |
+
export TMPDIR="docs/.observablehq/.cache"
|
3 |
mkdir -p $TMPDIR
|
4 |
|
5 |
+
echo "running loader" >&2
|
6 |
|
7 |
+
# install duckdb if not already present
|
8 |
+
export PATH=$TMPDIR:$PATH
|
9 |
+
command -v duckdb || $(
|
10 |
+
curl --location --output duckdb.zip \
|
11 |
+
https://github.com/duckdb/duckdb/releases/download/v0.10.0/duckdb_cli-linux-amd64.zip && \
|
12 |
+
unzip -qq duckdb.zip && chmod +x duckdb && mv duckdb $TMPDIR/
|
13 |
+
)
|
14 |
|
15 |
+
echo """
|
16 |
+
CREATE TABLE presse AS (
|
17 |
+
SELECT title
|
18 |
+
, author
|
19 |
+
, LPAD((REGEXP_EXTRACT(date, '1[0-9][0-9][0-9]') || '-01-01'), 10, '0')::DATE AS year
|
20 |
+
FROM read_parquet(
|
21 |
+
[('https://huggingface.co/datasets/PleIAs/French-PD-Newspapers/resolve/main/gallica_presse_{:d}.parquet').format(n) for n in range(1, 321)])
|
22 |
+
ORDER BY title, author, year
|
23 |
+
);
|
24 |
|
25 |
+
COPY presse TO '$TMPDIR/presse.parquet' (COMPRESSION 'ZSTD', row_group_size 10000000);
|
26 |
+
""" | duckdb
|
|
|
27 |
|
28 |
# isatty
|
29 |
if [ -t 1 ]; then
|
|
|
31 |
echo "duckdb -csv :memory: \"SELECT * FROM '$TMPDIR/presse.parquet'\""
|
32 |
else
|
33 |
cat $TMPDIR/presse.parquet
|
34 |
+
rm $TMPDIR/presse.parquet
|
35 |
fi
|
vercel.json
CHANGED
@@ -1,3 +1,6 @@
|
|
1 |
{
|
2 |
-
"
|
|
|
|
|
|
|
3 |
}
|
|
|
1 |
{
|
2 |
+
"buildCommand": "mkdir -p node_modules/cache && ln -s ../node_modules/cache docs/.observablehq && rm -rf dist && observable build",
|
3 |
+
"outputDirectory": "dist",
|
4 |
+
"cleanUrls": true,
|
5 |
+
"framework": null
|
6 |
}
|