schema: '2.0' stages: subset: cmd: python src/subset_data.py deps: - path: data/raw/Wellcome-grants-awarded-1-October-2005-to-04-05-2022.csv md5: 5c0d0e532709648b61625e7e130dfaa4 size: 31028261 - path: src/subset_data.py md5: f4cffd497cb8341cf05728e89cbb0871 size: 1008 params: params.yaml: n_docs: 500 outs: - path: data/processed/wellcome_grant_descriptions.csv md5: 18dd6a7611d7f53b1067def7ba075cba size: 644736 entities: cmd: python src/process_docs.py deps: - path: data/processed/wellcome_grant_descriptions.csv md5: 18dd6a7611d7f53b1067def7ba075cba size: 644736 - path: src/process_docs.py md5: 54d0e1cf9a85cba745fe80206b7c71d0 size: 1723 outs: - path: data/processed/entities.jsonl md5: ca8a907b4d66d5541bc1b6b508abd7eb size: 94862