Spaces:
Running
Running
set -x -e -o pipefail | |
REPO="https://github.com/Sygil-Dev/INE-dataset.git" | |
IMAGE_HOST="https://raw.githubusercontent.com/Sygil-Dev/INE-dataset/main/data/" | |
# avoid cloning all the image files | |
git clone --no-checkout --filter=blob:none --depth 1 "${REPO}" dataset | |
# Beware `--no-cone` is deprecated, so this may stop working someday | |
# https://git-scm.com/docs/git-sparse-checkout#_internalsnon_cone_problems | |
git -C dataset sparse-checkout set --no-cone '/data/*.txt' | |
git -C dataset checkout main | |
python3 -m textdir2sql.loading dataset/data INE.db \ | |
--image-host="${IMAGE_HOST}" | |