File size: 595 Bytes
abbf29d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323c8fb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#!/bin/bash
set -x -e -o pipefail

REPO="https://github.com/Sygil-Dev/INE-dataset.git"
IMAGE_HOST="https://raw.githubusercontent.com/Sygil-Dev/INE-dataset/main/data/"

# avoid cloning all the image files
git clone --no-checkout --filter=blob:none --depth 1 "${REPO}" dataset

# Beware `--no-cone` is deprecated, so this may stop working someday
# https://git-scm.com/docs/git-sparse-checkout#_internalsnon_cone_problems
git -C dataset sparse-checkout set --no-cone '/data/*.txt'
git -C dataset checkout main

python3 -m textdir2sql.loading dataset/data INE.db \
    --image-host="${IMAGE_HOST}"