fil commited on
Commit
f4921cd
·
unverified ·
1 Parent(s): c193f10

test if file exists

Browse files
Files changed (1) hide show
  1. docs/data/presse.parquet.sh +27 -21
docs/data/presse.parquet.sh CHANGED
@@ -1,26 +1,32 @@
1
- # install duckdb if not already present
2
- export PATH=.cache:$PATH
3
- command -v duckdb || $(
4
- curl --location --output duckdb.zip \
5
- https://github.com/duckdb/duckdb/releases/download/v0.10.0/duckdb_cli-linux-amd64.zip && \
6
- unzip -qq duckdb.zip && chmod +x duckdb && mkdir -p .cache && mv duckdb .cache/
7
- )
8
-
9
- export TMPDIR="dist"
10
  mkdir -p $TMPDIR
11
 
12
- echo """
13
- CREATE TABLE presse AS (
14
- SELECT title
15
- , author
16
- , LPAD((REGEXP_EXTRACT(date, '1[0-9][0-9][0-9]') || '-01-01'), 10, '0')::DATE AS year
17
- FROM read_parquet(
18
- [('https://huggingface.co/datasets/PleIAs/French-PD-Newspapers/resolve/main/gallica_presse_{:d}.parquet').format(n) for n in range(1, 321)])
19
- ORDER BY title, author, year
20
- );
21
 
22
- COPY presse TO '$TMPDIR/presse.parquet' (COMPRESSION 'ZSTD', row_group_size 10000000);
23
- """ | duckdb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # isatty
26
  if [ -t 1 ]; then
@@ -28,5 +34,5 @@ if [ -t 1 ]; then
28
  echo "duckdb -csv :memory: \"SELECT * FROM '$TMPDIR/presse.parquet'\""
29
  else
30
  cat $TMPDIR/presse.parquet
31
- rm $TMPDIR/presse.parquet
32
  fi
 
1
+ # Use "eleventy" .cache to store our temp files
2
+ export TMPDIR=".cache"
 
 
 
 
 
 
 
3
  mkdir -p $TMPDIR
4
 
 
 
 
 
 
 
 
 
 
5
 
6
+ if [ ! -f "$TMPDIR/presse.parquet" ]; then
7
+
8
+ # install duckdb if not already present
9
+ export PATH=.cache:$PATH
10
+ command -v duckdb || $(
11
+ curl --location --output duckdb.zip \
12
+ https://github.com/duckdb/duckdb/releases/download/v0.10.0/duckdb_cli-linux-amd64.zip && \
13
+ unzip -qq duckdb.zip && chmod +x duckdb && mkdir -p .cache && mv duckdb .cache/
14
+ )
15
+
16
+ echo """
17
+ CREATE TABLE presse AS (
18
+ SELECT title
19
+ , author
20
+ , LPAD((REGEXP_EXTRACT(date, '1[0-9][0-9][0-9]') || '-01-01'), 10, '0')::DATE AS year
21
+ FROM read_parquet(
22
+ [('https://huggingface.co/datasets/PleIAs/French-PD-Newspapers/resolve/main/gallica_presse_{:d}.parquet').format(n) for n in range(1, 321)])
23
+ ORDER BY title, author, year
24
+ );
25
+
26
+ COPY presse TO '$TMPDIR/presse.parquet' (COMPRESSION 'ZSTD', row_group_size 10000000);
27
+ """ | duckdb
28
+
29
+ fi
30
 
31
  # isatty
32
  if [ -t 1 ]; then
 
34
  echo "duckdb -csv :memory: \"SELECT * FROM '$TMPDIR/presse.parquet'\""
35
  else
36
  cat $TMPDIR/presse.parquet
37
+ #rm $TMPDIR/presse.parquet
38
  fi