Safetensors
TEDDY / scripts /preprocess_sample_data.sh
soumyatghosh's picture
Upload folder using huggingface_hub
4527b5f verified
raw
history blame contribute delete
963 Bytes
#!/bin/bash -l
# (Optional) Activate your Poetry environment
poetry shell
# Generate a timestamp string (e.g., 20230404123056)
TS=$(date '+%Y%m%d%H%M%S')
CONFIG_FILE="configs/preprocessing_config_${TS}.json"
# 1) Generate a JSON config file on the fly
cat <<EOF > "$CONFIG_FILE"
{
"load_dir": "data",
"save_dir": "data/processed",
"min_gene_counts": null,
"remove_assays": [],
"max_mitochondrial_prop": null,
"remove_cell_types": [],
"hvg_method": null,
"normalized_total": null,
"median_dict": "teddy/data_processing/utils/medians/data/teddy_gene_medians.json",
"log1p": false,
"compute_medians": false,
"median_column": "index",
"reference_id_only": false
}
EOF
# 2) Call preprocess.py, explicitly passing data_path, metadata_path, and config_path
python teddy/data_processing/preprocessing/preprocess.py \
--data_path data/sample_data.h5ad \
--metadata_path data/sample_data_metadata.json \
--config_path "$CONFIG_FILE"