#!/bin/bash set -x -e -o pipefail REPO="https://github.com/Sygil-Dev/INE-dataset.git" IMAGE_HOST="https://raw.githubusercontent.com/Sygil-Dev/INE-dataset/main/data/" # avoid cloning all the image files git clone --no-checkout --filter=blob:none --depth 1 "${REPO}" dataset # Beware `--no-cone` is deprecated, so this may stop working someday # https://git-scm.com/docs/git-sparse-checkout#_internalsnon_cone_problems git -C dataset sparse-checkout set --no-cone '/data/*.txt' git -C dataset checkout main python3 -m textdir2sql.loading dataset/data INE.db \ --image-host="${IMAGE_HOST}"