# | |
# run_layoutsam_with_slack.sh | |
# β Builds the filtered image list, then downloads only those images, | |
# posting progress & errors into Slack every 5 min. | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
# 1) Slack webhook setup | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
WEBHOOK="https://hooks.slack.com/services/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" | |
send_slack_message() { | |
local msg="$1" | |
curl -s -X POST -H 'Content-type: application/json' \ | |
--data "{\"text\":\"$msg\"}" \ | |
"$WEBHOOK" | |
} | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
# 2) Paths & startup | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
IMG_LIST_SCRIPT="uni/generate_image_list.py" | |
DOWNLOAD_SCRIPT="uni/download.py" | |
DATA_DIR="/home/ubuntu/ext-mamba-illinois/UCSD-project/workspace/datasets/SA-1b" | |
DOWNLOAD_DIR="$DATA_DIR/LayoutImgs" | |
MASK_DIR="$DATA_DIR/LayoutMasks" | |
LOG_FILE="./download_progress.log" | |
START_TIME=$(date +%s) | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
# 3) Check if JSON file exists, skip generation if it does | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
if [ ! -f "./images_to_download.json" ]; then | |
send_slack_message "π Generating LayoutSAM image listβ¦" | |
python3 "$IMG_LIST_SCRIPT" >> "$LOG_FILE" 2>&1 || { | |
send_slack_message "β Failed to generate image list β aborting." | |
exit 1 | |
} | |
else | |
send_slack_message "π Using existing image list: images_to_download.json" | |
fi | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
# 4) Compute total images count | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
TOTAL_IMAGES=$(python3 - << 'PYCODE' | |
import json, sys | |
try: | |
data = json.load(open("images_to_download.json")) | |
print(len(data)) | |
except: | |
sys.exit(1) | |
PYCODE | |
) | |
if [ -z "$TOTAL_IMAGES" ]; then | |
send_slack_message "β Could not determine total images count." | |
exit 1 | |
fi | |
send_slack_message "β Image list ready: $TOTAL_IMAGES paths." | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
# 5) Generate shard links for downloader | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
send_slack_message "π Generating shard links from image listβ¦" | |
python3 generate_shard_links.py >> "$LOG_FILE" 2>&1 || { | |
send_slack_message "β Failed to generate shard links β aborting." | |
exit 1 | |
} | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
# 6) Launch the downloader in background | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
send_slack_message "π Starting SA-1B download of LayoutSAM subsetβ¦" | |
python3 "$DOWNLOAD_SCRIPT" \ | |
--processes 16 \ | |
--input_file "./layoutsam_shard_links.txt" \ | |
--raw_dir "$DOWNLOADER_DIR/raw" \ | |
--images_dir "$DOWNLOAD_DIR" \ | |
--masks_dir "$MASK_DIR" \ | |
--images_json "./images_to_download.json" \ | |
--skip_existing >> "$LOG_FILE" 2>&1 & | |
SCRIPT_PID=$! | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
# 7) Progress & errorβchecking functions | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
get_progress(){ | |
local now=$(date +%s) | |
local elapsed=$(( (now - START_TIME)/60 )) | |
local got=$(find "$DOWNLOAD_DIR" -type f | wc -l) | |
local usage=$(df -h "$DOWNLOAD_DIR" | tail -1 | awk '{print $5}') | |
echo "π Progress (${elapsed}m): $got / $TOTAL_IMAGES images downloaded β’ Disk: $usage" | |
} | |
check_errors(){ | |
if grep -iE "error|exception|traceback" "$LOG_FILE" >/dev/null; then | |
echo "β οΈ Last error: $(grep -iE "error|exception|traceback" "$LOG_FILE" | tail -1)" | |
fi | |
} | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
# 8) Monitoring loop | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
while kill -0 $SCRIPT_PID 2>/dev/null; do | |
sleep 300 | |
prog=$(get_progress) | |
err=$(check_errors) | |
send_slack_message "$prog${err:+\n$err}" | |
done | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
# 9) Final report | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
wait $SCRIPT_PID | |
EXIT=$? | |
if [ $EXIT -eq 0 ]; then | |
FINAL=$(find "$DOWNLOAD_DIR" -type f | wc -l) | |
DURATION=$(( ( $(date +%s) - START_TIME )/60 )) | |
send_slack_message "β Done! Downloaded $FINAL/$TOTAL_IMAGES images in ${DURATION}m." | |
else | |
send_slack_message "β Download failed (exit code $EXIT). Check log: $LOG_FILE" | |
send_slack_message "π Last log lines:\n$(tail -5 "$LOG_FILE")" | |
fi | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
# 10) Cleanup | |
# βββββββββββββββββββββββββββββββββββββββββββββ | |
rm -f "$LOG_FILE" |