| #!/bin/bash |
| |
|
|
| set -e |
|
|
| echo "==================================" |
| echo "π Results Analysis" |
| echo "==================================" |
|
|
| |
| if [ $# -eq 0 ]; then |
| RESULTS_DIR=$(find examples/circle_packing/results -type d -name "*eval_service*" | sort -r | head -1) |
| if [ -z "$RESULTS_DIR" ]; then |
| echo "β No results found" |
| echo "" |
| echo "Usage: $0 [results_directory]" |
| exit 1 |
| fi |
| echo "π Using most recent results: $RESULTS_DIR" |
| else |
| RESULTS_DIR="$1" |
| fi |
|
|
| echo "" |
|
|
| |
| if [ ! -d "$RESULTS_DIR" ]; then |
| echo "β Directory not found: $RESULTS_DIR" |
| exit 1 |
| fi |
|
|
| echo "=" * 80 |
| echo "π Results Directory: $RESULTS_DIR" |
| echo "=" * 80 |
| echo "" |
|
|
| |
| if [ -f "$RESULTS_DIR/best/results/metrics.json" ]; then |
| echo "β
Best Program Found" |
| echo "---" |
| BEST_SCORE=$(jq -r '.combined_score' "$RESULTS_DIR/best/results/metrics.json") |
| BEST_CORRECT=$(jq -r '.correct' "$RESULTS_DIR/best/results/metrics.json") |
| BEST_GEN=$(jq -r '.generation // "N/A"' "$RESULTS_DIR/best/results/metrics.json") |
| echo " Score: $BEST_SCORE" |
| echo " Correct: $BEST_CORRECT" |
| echo " Generation: $BEST_GEN" |
| echo "" |
| else |
| echo "β No best program found" |
| echo "" |
| fi |
|
|
| |
| if [ -d "$RESULTS_DIR/eval_agent_memory" ]; then |
| echo "β
Eval Agent Memory Found" |
| echo "---" |
| |
| if [ -f "$RESULTS_DIR/eval_agent_memory/EVAL_AGENTS.md" ]; then |
| echo " π EVAL_AGENTS.md: $(wc -l < "$RESULTS_DIR/eval_agent_memory/EVAL_AGENTS.md") lines" |
| fi |
| |
| if [ -f "$RESULTS_DIR/eval_agent_memory/auxiliary_metrics.py" ]; then |
| echo " π auxiliary_metrics.py: Found" |
| |
| NUM_METRICS=$(grep -c "^def evaluate_" "$RESULTS_DIR/eval_agent_memory/auxiliary_metrics.py" || echo "0") |
| echo " π Auxiliary metrics: $NUM_METRICS" |
| fi |
| |
| if [ -f "$RESULTS_DIR/eval_agent_memory/service_state.json" ]; then |
| echo " πΎ service_state.json: Found" |
| fi |
| echo "" |
| else |
| echo "β οΈ No eval agent memory found" |
| echo "" |
| fi |
|
|
| |
| NUM_GENS=$(find "$RESULTS_DIR" -maxdepth 1 -type d -name "gen_*" | wc -l) |
| echo "π Generations" |
| echo "---" |
| echo " Total generations: $NUM_GENS" |
| echo "" |
|
|
| |
| echo "π Sample Metrics (last 3 generations)" |
| echo "---" |
| for gen_dir in $(find "$RESULTS_DIR" -maxdepth 1 -type d -name "gen_*" | sort -V | tail -3); do |
| if [ -f "$gen_dir/results/metrics.json" ]; then |
| GEN_NUM=$(basename "$gen_dir" | sed 's/gen_//') |
| SCORE=$(jq -r '.combined_score' "$gen_dir/results/metrics.json") |
| CORRECT=$(jq -r '.correct // "N/A"' "$gen_dir/results/metrics.json") |
| HAS_AUX=$(jq -r '.auxiliary | length' "$gen_dir/results/metrics.json" 2>/dev/null || echo "0") |
| HAS_DESC=$(jq -r '.auxiliary_descriptions | length' "$gen_dir/results/metrics.json" 2>/dev/null || echo "0") |
| |
| echo " Gen $GEN_NUM: score=$SCORE, correct=$CORRECT, aux_metrics=$HAS_AUX, descriptions=$HAS_DESC" |
| fi |
| done |
| echo "" |
|
|
| |
| if [ -f "$RESULTS_DIR/evolution_db.sqlite" ]; then |
| echo "πΎ Database Statistics" |
| echo "---" |
| |
| TOTAL_PROGRAMS=$(sqlite3 "$RESULTS_DIR/evolution_db.sqlite" "SELECT COUNT(*) FROM programs;" 2>/dev/null || echo "N/A") |
| CORRECT_PROGRAMS=$(sqlite3 "$RESULTS_DIR/evolution_db.sqlite" "SELECT COUNT(*) FROM programs WHERE correct=1;" 2>/dev/null || echo "N/A") |
| |
| echo " Total programs: $TOTAL_PROGRAMS" |
| echo " Correct programs: $CORRECT_PROGRAMS" |
| echo "" |
| fi |
|
|
| |
| echo "π Auxiliary Metrics Integration Check" |
| echo "---" |
|
|
| |
| RECENT_GEN=$(find "$RESULTS_DIR" -maxdepth 1 -type d -name "gen_*" | sort -V | tail -1) |
| if [ -f "$RECENT_GEN/results/metrics.json" ]; then |
| HAS_AUX_IN_PRIMARY=$(jq -r '.primary.public | keys | map(select(startswith("aux_"))) | length' "$RECENT_GEN/results/metrics.json" 2>/dev/null || echo "0") |
| |
| if [ "$HAS_AUX_IN_PRIMARY" -gt "0" ]; then |
| echo " β
Auxiliary metrics merged into primary.public" |
| echo " π Found $HAS_AUX_IN_PRIMARY aux_ metrics" |
| else |
| echo " β οΈ No aux_ metrics found in primary.public" |
| fi |
| |
| HAS_TEXT_FEEDBACK=$(jq -r '.primary.text_feedback // "" | length' "$RECENT_GEN/results/metrics.json" 2>/dev/null || echo "0") |
| if [ "$HAS_TEXT_FEEDBACK" -gt "0" ]; then |
| echo " β
text_feedback present (likely contains metric descriptions)" |
| else |
| echo " β οΈ No text_feedback found" |
| fi |
| fi |
| echo "" |
|
|
| echo "=" * 80 |
| echo "Analysis complete!" |
| echo "=" * 80 |
| echo "" |
| echo "To view detailed results:" |
| echo " - Best program: $RESULTS_DIR/best/main.py" |
| echo " - Agent documentation: $RESULTS_DIR/eval_agent_memory/EVAL_AGENTS.md" |
| echo " - All metrics: find $RESULTS_DIR -name 'metrics.json'" |
| echo "" |
|
|