Arnwald84 commited on
Commit
cded1f5
Β·
1 Parent(s): 7a352c6

fix: restart Hindsight after pg_restore to reload data

Browse files

pg_restore modifies PG underneath but the running API
has cached state. Restarting forces reconnection to
the restored database.

restore.py now returns exit codes:
0 = data restored (restart needed)
2 = no backup found (skip restart)
1 = error

Files changed (2) hide show
  1. scripts/entrypoint.sh +35 -3
  2. scripts/restore.py +16 -7
scripts/entrypoint.sh CHANGED
@@ -53,12 +53,44 @@ done
53
 
54
  # ============================================================
55
  # STEP 4: Restore from backup (PG is now running)
 
 
 
 
 
 
56
  # ============================================================
57
  if [ -n "${HF_TOKEN:-}" ]; then
58
  log "Attempting restore from HF Dataset..."
59
- python3 /opt/backup/restore.py || {
60
- log "Restore failed or no backup found β€” continuing with fresh database"
61
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  else
63
  log "HF_TOKEN not set β€” skipping restore"
64
  fi
 
53
 
54
  # ============================================================
55
  # STEP 4: Restore from backup (PG is now running)
56
+ # After pg_restore, we must restart Hindsight so the API
57
+ # reconnects to PG and sees the restored data.
58
+ # Exit codes from restore.py:
59
+ # 0 = data restored successfully
60
+ # 2 = no backup found (skip restart)
61
+ # 1 = error
62
  # ============================================================
63
  if [ -n "${HF_TOKEN:-}" ]; then
64
  log "Attempting restore from HF Dataset..."
65
+ python3 /opt/backup/restore.py
66
+ restore_exit=$?
67
+
68
+ if [ "$restore_exit" -eq 0 ]; then
69
+ log "Restore succeeded β€” restarting Hindsight to load restored data..."
70
+ kill "$HINDSIGHT_PID" 2>/dev/null || true
71
+ wait "$HINDSIGHT_PID" 2>/dev/null || true
72
+ sleep 2
73
+
74
+ /app/start-all.sh &
75
+ HINDSIGHT_PID=$!
76
+ log "Hindsight restarted (PID $HINDSIGHT_PID)"
77
+
78
+ for i in $(seq 1 60); do
79
+ if curl -sf http://localhost:${HINDSIGHT_API_PORT:-7860}/health > /dev/null 2>&1; then
80
+ log "Hindsight is healthy after restore"
81
+ break
82
+ fi
83
+ if ! kill -0 "$HINDSIGHT_PID" 2>/dev/null; then
84
+ log "Hindsight died after restore restart"
85
+ exit 1
86
+ fi
87
+ sleep 5
88
+ done
89
+ elif [ "$restore_exit" -eq 2 ]; then
90
+ log "No backup found β€” continuing with fresh database"
91
+ else
92
+ log "Restore failed β€” continuing with fresh database"
93
+ fi
94
  else
95
  log "HF_TOKEN not set β€” skipping restore"
96
  fi
scripts/restore.py CHANGED
@@ -41,16 +41,22 @@ def find_pg_bin(name: str) -> str:
41
  raise FileNotFoundError(f"{name} not found in ~/.pg0/installation/")
42
 
43
 
44
- def main() -> None:
 
 
 
 
 
 
45
  if not HF_TOKEN:
46
  log("HF_TOKEN not set β€” skipping restore")
47
- return
48
 
49
  try:
50
  from huggingface_hub import HfApi, hf_hub_download
51
  except ImportError:
52
  log("huggingface_hub not installed β€” skipping restore")
53
- return
54
 
55
  api = HfApi(token=HF_TOKEN)
56
 
@@ -59,11 +65,11 @@ def main() -> None:
59
  files = list(api.list_repo_files(repo_id=HF_REPO, repo_type="dataset"))
60
  except Exception as e:
61
  log(f"Cannot access repo {HF_REPO}: {e}")
62
- return
63
 
64
  if "snapshots/latest.pgdump" not in files:
65
  log("No pg_dump backup found in HF Dataset β€” starting fresh")
66
- return
67
 
68
  log(f"Downloading latest backup from {HF_REPO}...")
69
 
@@ -114,17 +120,20 @@ def main() -> None:
114
  ]
115
  if real_errors:
116
  log(f"pg_restore had errors: {'; '.join(real_errors[:5])}")
 
117
  else:
118
  log("pg_restore completed (minor warnings only)")
119
  else:
120
  log("pg_restore completed successfully")
121
 
122
- log("Restore complete")
 
123
 
124
 
125
  if __name__ == "__main__":
126
  try:
127
- main()
 
128
  except Exception as e:
129
  log(f"FAILED: {e}")
130
  sys.exit(1)
 
41
  raise FileNotFoundError(f"{name} not found in ~/.pg0/installation/")
42
 
43
 
44
+ EXIT_RESTORED = 0 # Data was restored β€” caller should restart Hindsight
45
+ EXIT_ERROR = 1 # Restore failed
46
+ EXIT_NO_BACKUP = 2 # No backup found β€” skip restart
47
+
48
+
49
+ def main() -> int:
50
+ """Returns exit code: 0=restored, 1=error, 2=no backup."""
51
  if not HF_TOKEN:
52
  log("HF_TOKEN not set β€” skipping restore")
53
+ return EXIT_NO_BACKUP
54
 
55
  try:
56
  from huggingface_hub import HfApi, hf_hub_download
57
  except ImportError:
58
  log("huggingface_hub not installed β€” skipping restore")
59
+ return EXIT_NO_BACKUP
60
 
61
  api = HfApi(token=HF_TOKEN)
62
 
 
65
  files = list(api.list_repo_files(repo_id=HF_REPO, repo_type="dataset"))
66
  except Exception as e:
67
  log(f"Cannot access repo {HF_REPO}: {e}")
68
+ return EXIT_ERROR
69
 
70
  if "snapshots/latest.pgdump" not in files:
71
  log("No pg_dump backup found in HF Dataset β€” starting fresh")
72
+ return EXIT_NO_BACKUP
73
 
74
  log(f"Downloading latest backup from {HF_REPO}...")
75
 
 
120
  ]
121
  if real_errors:
122
  log(f"pg_restore had errors: {'; '.join(real_errors[:5])}")
123
+ return EXIT_ERROR
124
  else:
125
  log("pg_restore completed (minor warnings only)")
126
  else:
127
  log("pg_restore completed successfully")
128
 
129
+ log("Restore complete β€” Hindsight should be restarted to load restored data")
130
+ return EXIT_RESTORED
131
 
132
 
133
  if __name__ == "__main__":
134
  try:
135
+ code = main()
136
+ sys.exit(code)
137
  except Exception as e:
138
  log(f"FAILED: {e}")
139
  sys.exit(1)