# (Optional) Activate your Poetry environment | |
poetry shell | |
# Generate a timestamp string (e.g., 20230404123056) | |
TS=$(date '+%Y%m%d%H%M%S') | |
CONFIG_FILE="configs/preprocessing_config_${TS}.json" | |
# 1) Generate a JSON config file on the fly | |
cat <<EOF > "$CONFIG_FILE" | |
{ | |
"load_dir": "data", | |
"save_dir": "data/processed", | |
"min_gene_counts": null, | |
"remove_assays": [], | |
"max_mitochondrial_prop": null, | |
"remove_cell_types": [], | |
"hvg_method": null, | |
"normalized_total": null, | |
"median_dict": "teddy/data_processing/utils/medians/data/teddy_gene_medians.json", | |
"log1p": false, | |
"compute_medians": false, | |
"median_column": "index", | |
"reference_id_only": false | |
} | |
EOF | |
# 2) Call preprocess.py, explicitly passing data_path, metadata_path, and config_path | |
python teddy/data_processing/preprocessing/preprocess.py \ | |
--data_path data/sample_data.h5ad \ | |
--metadata_path data/sample_data_metadata.json \ | |
--config_path "$CONFIG_FILE" | |