| #!/bin/bash |
| |
| |
| set -uo pipefail |
|
|
| ROOT=${PULSE_ROOT} |
| JID_LO=265051 |
| JID_HI=265185 |
| TS=$(date -u +%Y%m%d_%H%M) |
| OUT="${ROOT}/results/run_${TS}_summary.md" |
| mkdir -p "${ROOT}/results" |
|
|
| |
| TMP=$(mktemp -d) |
| trap 'rm -rf "$TMP"' EXIT |
|
|
| |
| |
| |
| |
| ORDER_FILE="$TMP/order.tsv" |
| : > "$ORDER_FILE" |
|
|
| for tt in table1_main_comparison table3_horizon_curve table4_modality_ablation table5_component_ablation table7_missing_modality; do |
| for row_dir in "${ROOT}/${tt}"/row*; do |
| [ -d "$row_dir" ] || continue |
| row=$(basename "$row_dir") |
| for seed in 42 123 456 789 1024; do |
| sd="${row_dir}/seeds/seed${seed}" |
| [ -d "$sd" ] || { printf "%s\t%s\t%d\t-\tMISSING_DIR\t-\t-\t-\n" "$tt" "$row" "$seed" >> "$ORDER_FILE"; continue; } |
| log=$(ls "${sd}"/slurm_*.out 2>/dev/null | head -1) |
| if [ -z "$log" ]; then |
| printf "%s\t%s\t%d\t-\tNO_LOG\t-\t-\t-\n" "$tt" "$row" "$seed" >> "$ORDER_FILE" |
| continue |
| fi |
| jid=$(basename "$log" | sed 's/^slurm_//; s/\.out$//') |
| |
| if grep -q "^\[done\] best" "$log"; then |
| status=OK |
| line=$(grep "^\[done\] best" "$log" | head -1) |
| acc=$(echo "$line" | grep -oE "action@1 = [0-9.]+" | awk '{print $3}') |
| epoch_best=$(echo "$line" | grep -oE "epoch [0-9]+" | head -1 | awk '{print $2}') |
| |
| last_e=$(grep -E "^ E +[0-9]+" "$log" | tail -1 | awk '{print $2}') |
| printf "%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n" "$tt" "$row" "$seed" "$jid" "OK" "${acc}" "${last_e:-?}" "${epoch_best:-?}" >> "$ORDER_FILE" |
| elif grep -qE "DUE TO TIME LIMIT|CANCELLED.*TIME" "$log"; then |
| printf "%s\t%s\t%d\t%s\tTIMEOUT\t-\t-\t-\n" "$tt" "$row" "$seed" "$jid" >> "$ORDER_FILE" |
| elif grep -qE "Traceback|RuntimeError|invalid choice|CUDA error" "$log"; then |
| err=$(grep -E "Traceback|RuntimeError|invalid choice|CUDA error" "$log" | tail -1 | head -c 120) |
| printf "%s\t%s\t%d\t%s\tFAIL\t-\t-\t-\t%s\n" "$tt" "$row" "$seed" "$jid" "$err" >> "$ORDER_FILE" |
| elif squeue -j "$jid" -h 2>/dev/null | grep -q .; then |
| printf "%s\t%s\t%d\t%s\tRUNNING\t-\t-\t-\n" "$tt" "$row" "$seed" "$jid" >> "$ORDER_FILE" |
| else |
| |
| printf "%s\t%s\t%d\t%s\tEXITED_NO_DONE\t-\t-\t-\n" "$tt" "$row" "$seed" "$jid" >> "$ORDER_FILE" |
| fi |
| done |
| done |
| done |
|
|
| |
| { |
| echo "# Run summary — $(date '+%Y-%m-%d %H:%M %Z')" |
| echo |
| echo "Job range: \`${JID_LO}-${JID_HI}\` (135 expected)" |
| echo |
| echo "## Overall status" |
| echo |
| echo "| status | count |" |
| echo "|---|---|" |
| awk -F'\t' '{print $5}' "$ORDER_FILE" | sort | uniq -c | awk '{printf "| %s | %d |\n", $2, $1}' |
| echo |
| echo "## Per-row mean ± std (action@1)" |
| echo |
| echo "| table | row | n_ok | n_fail | mean | std | best_seed | best_acc | epochs (median) | best_epoch (median) |" |
| echo "|---|---|---:|---:|---:|---:|---|---:|---:|---:|" |
| awk -F'\t' '{key=$1"\t"$2; if($5=="OK"){n[key]++; sum[key]+=$6; ss[key]+=($6*$6); if($6>maxa[key]){maxa[key]=$6; bestseed[key]=$3} le[key]=le[key]" "$7; be[key]=be[key]" "$8} else if($5!="OK"){fail[key]++}} |
| END{for(k in n){tt=k; sub(/\t.*/,"",tt); rr=k; sub(/.*\t/,"",rr); |
| m=sum[k]/n[k]; v=ss[k]/n[k] - m*m; if(v<0)v=0; sd=sqrt(v); |
| # median of last_epoch list |
| split(le[k], A, " "); cnt=0; for(i in A){if(A[i]!=""){cnt++; B[cnt]=A[i]+0}} |
| asort(B); med_le=cnt? B[int((cnt+1)/2)] : "-"; delete B; |
| split(be[k], A, " "); cnt=0; for(i in A){if(A[i]!=""){cnt++; B[cnt]=A[i]+0}} |
| asort(B); med_be=cnt? B[int((cnt+1)/2)] : "-"; |
| fk=fail[k]+0; |
| printf "| %s | %s | %d | %d | %.4f | %.4f | seed%s | %.4f | %s | %s |\n", tt, rr, n[k], fk, m, sd, bestseed[k], maxa[k], med_le, med_be |
| }}' "$ORDER_FILE" | sort |
| echo |
| echo "## Failed / non-OK jobs" |
| echo |
| awk -F'\t' '$5!="OK" {printf "- **%s/%s seed%s** jid=%s status=%s %s\n", $1,$2,$3,$4,$5,$9}' "$ORDER_FILE" || true |
| if ! awk -F'\t' '$5!="OK"' "$ORDER_FILE" | grep -q .; then |
| echo "_None._" |
| fi |
| echo |
| echo "## Notes / known operational concerns" |
| echo |
| echo "- These are operational results only. Most jobs trigger early-stop (patience=12) at epoch 1–18 instead of running the full 40 epochs, because validation metric saturates very early." |
| echo "- \`best action@1\` observed in spot-check ranged 0.6%–3.4% (17 verb × 34 noun = 578 action classes; random ≈ 0.17%). This is a model/hyperparameter issue, not an infra issue." |
| echo "- If you want to revisit hparams: try larger patience, lower lr, or warmup. The data loader and GPU stack are confirmed working (cu121 / A800)." |
| echo |
| echo "## Per-table seed-level details" |
| echo |
| for tt in table1_main_comparison table3_horizon_curve table4_modality_ablation table5_component_ablation table7_missing_modality; do |
| echo "### ${tt}" |
| echo |
| echo "| row | seed42 | seed123 | seed456 | seed789 | seed1024 |" |
| echo "|---|---|---|---|---|---|" |
| awk -F'\t' -v tt="$tt" '$1==tt {key=$2; cell=($5=="OK"? sprintf("%.4f",$6) : "·"$5); arr[key,$3]=cell; rows[key]=1} |
| END{for(r in rows){printf "| %s | %s | %s | %s | %s | %s |\n", r, (arr[r,42]!=""?arr[r,42]:"-"), (arr[r,123]!=""?arr[r,123]:"-"), (arr[r,456]!=""?arr[r,456]:"-"), (arr[r,789]!=""?arr[r,789]:"-"), (arr[r,1024]!=""?arr[r,1024]:"-")}}' "$ORDER_FILE" | sort |
| echo |
| done |
| } > "$OUT" |
|
|
| echo "Wrote $OUT" |
| ls -la "$OUT" |
|
|