Huacheng5240 commited on
Commit
a4e88bf
1 Parent(s): c57de5a

Create vps_monitor.py

Browse files
Files changed (1) hide show
  1. vps_monitor.py +216 -0
vps_monitor.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import paramiko
2
+ import schedule
3
+ import time
4
+ import os
5
+ import sys
6
+ from flask import Flask, jsonify, render_template_string
7
+ from threading import Thread
8
+ import logging
9
+ from datetime import timedelta
10
+
11
+ app = Flask(__name__)
12
+
13
+ vps_status = {}
14
+
15
+ logging.basicConfig(
16
+ level=logging.INFO,
17
+ format='%(asctime)s - %(levelname)s - %(message)s',
18
+ handlers=[
19
+ logging.StreamHandler(sys.stdout),
20
+ logging.StreamHandler(sys.stderr)
21
+ ]
22
+ )
23
+ logger = logging.getLogger()
24
+
25
+ def get_vps_configs():
26
+ configs = []
27
+ index = 1
28
+ while True:
29
+ hostname = os.environ.get(f'HOSTNAME_{index}')
30
+ if not hostname:
31
+ break
32
+
33
+ username = os.environ.get(f'USERNAME_{index}')
34
+ password = os.environ.get(f'PASSWORD_{index}')
35
+
36
+ script_paths = []
37
+ script_index = 1
38
+ while True:
39
+ script_path = os.environ.get(f'SCRIPT_PATHS_{index}_{script_index}')
40
+ if not script_path:
41
+ break
42
+ script_paths.append(script_path.strip())
43
+ script_index += 1
44
+
45
+ for script_path in script_paths:
46
+ configs.append({
47
+ 'index': index,
48
+ 'hostname': hostname,
49
+ 'username': username,
50
+ 'password': password,
51
+ 'script_path': script_path
52
+ })
53
+
54
+ index += 1
55
+ return configs
56
+
57
+ def parse_runtime(etime):
58
+ parts = etime.split('-')
59
+ days = int(parts[0]) if len(parts) > 1 else 0
60
+ time_parts = parts[-1].split(':')
61
+
62
+ if len(time_parts) == 3:
63
+ hours, minutes, seconds = map(int, time_parts)
64
+ elif len(time_parts) == 2:
65
+ hours, minutes, seconds = int(time_parts[0]), int(time_parts[1]), 0
66
+ else:
67
+ return "0:00:00"
68
+
69
+ return str(timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds))
70
+
71
+ def check_and_run_script(config):
72
+ logger.info(f"Checking VPS {config['index']}: {config['hostname']} - {config['script_path']}")
73
+ client = None
74
+ try:
75
+ client = paramiko.SSHClient()
76
+ client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
77
+ client.connect(hostname=config['hostname'], username=config['username'], password=config['password'], port=22)
78
+
79
+ script_path = config['script_path']
80
+ script_name = os.path.basename(script_path)
81
+ key = f"{config['hostname']}:{script_name}"
82
+
83
+ last_pid = vps_status.get(key, {}).get('pid', None)
84
+ check_command = f"ps -p {last_pid} -o pid=,etime=,args=" if last_pid else f"ps aux | grep '{script_path}' | grep -v grep"
85
+
86
+ stdin, stdout, stderr = client.exec_command(check_command)
87
+ output = stdout.read().decode('utf-8').strip()
88
+
89
+ if output and (last_pid or script_path in output):
90
+ parts = output.split()
91
+ if last_pid:
92
+ pid, runtime = last_pid, parse_runtime(parts[1]) if len(parts) > 1 else "0:00:00"
93
+ else:
94
+ pid, runtime = parts[1] if len(parts) > 1 else "Unknown", parse_runtime(parts[9]) if len(parts) > 9 else "0:00:00"
95
+ status = "Running"
96
+ else:
97
+ logger.info(f"Script {script_name} not running. Attempting to restart.")
98
+ stdin, stdout, stderr = client.exec_command(f"nohup /bin/sh {script_path} > /dev/null 2>&1 & echo $!")
99
+ new_pid = stdout.read().decode('utf-8').strip()
100
+
101
+ if new_pid.isdigit():
102
+ pid, runtime, status = new_pid, "0:00:00", "Restarted"
103
+ else:
104
+ pid, runtime, status = "N/A", "N/A", "Restart Failed"
105
+
106
+ vps_status[key] = {
107
+ 'index': config['index'],
108
+ 'status': status,
109
+ 'last_check': time.strftime('%Y-%m-%d %H:%M:%S'),
110
+ 'username': config['username'],
111
+ 'script_name': script_name,
112
+ 'runtime': runtime,
113
+ 'pid': pid
114
+ }
115
+
116
+ except Exception as e:
117
+ logger.error(f"Error occurred while checking VPS {config['index']} - {config['hostname']} - {script_name}: {str(e)}")
118
+ vps_status[f"{config['hostname']}:{script_name}"] = {
119
+ 'index': config['index'],
120
+ 'status': f"Error: {str(e)}",
121
+ 'last_check': time.strftime('%Y-%m-%d %H:%M:%S'),
122
+ 'username': config['username'],
123
+ 'script_name': script_name,
124
+ 'runtime': "N/A",
125
+ 'pid': "N/A"
126
+ }
127
+ finally:
128
+ if client:
129
+ client.close()
130
+
131
+ def check_all_vps():
132
+ logger.info("Starting VPS check")
133
+ for config in get_vps_configs():
134
+ check_and_run_script(config)
135
+
136
+ table = "+---------+-----------------------+------------------+----------+-------------------------+----------+----------+-------+\n"
137
+ table += "| Index | Hostname | Script Name | Status | Last Check | Username | Runtime | PID |\n"
138
+ table += "+---------+-----------------------+------------------+----------+-------------------------+----------+----------+-------+\n"
139
+
140
+ for key, status in vps_status.items():
141
+ hostname, script_name = key.split(':')
142
+ table += "| {:<7} | {:<21} | {:<16} | {:<8} | {:<23} | {:<8} | {:<8} | {:<5} |\n".format(
143
+ status['index'], hostname[:21], script_name[:16], status['status'][:8],
144
+ status['last_check'], status['username'][:8], status['runtime'], status['pid'][:5]
145
+ )
146
+ table += "+---------+-----------------------+------------------+----------+-------------------------+----------+----------+-------+\n"
147
+
148
+ logger.info("\n" + table)
149
+
150
+ @app.route('/')
151
+ def index():
152
+ html = '''
153
+ <h1>VPS Status Overview</h1>
154
+ <table border="1">
155
+ <tr>
156
+ <th>Index</th>
157
+ <th>Hostname</th>
158
+ <th>Script Name</th>
159
+ <th>Status</th>
160
+ <th>Last Check</th>
161
+ <th>Username</th>
162
+ <th>Runtime</th>
163
+ <th>PID</th>
164
+ </tr>
165
+ {% for key, data in vps_status.items() %}
166
+ <tr>
167
+ <td>{{ data.index }}</td>
168
+ <td><a href="/status/{{ key }}">{{ key.split(':')[0] }}</a></td>
169
+ <td>{{ data.script_name }}</td>
170
+ <td>{{ data.status }}</td>
171
+ <td>{{ data.last_check }}</td>
172
+ <td>{{ data.username }}</td>
173
+ <td>{{ data.runtime }}</td>
174
+ <td>{{ data.pid }}</td>
175
+ </tr>
176
+ {% endfor %}
177
+ </table>
178
+ '''
179
+ return render_template_string(html, vps_status=vps_status)
180
+
181
+ @app.route('/status/<path:key>')
182
+ def vps_status_detail(key):
183
+ return jsonify(vps_status[key]) if key in vps_status else (jsonify({"error": "VPS or script not found"}), 404)
184
+
185
+ @app.route('/health')
186
+ def health_check():
187
+ return jsonify({"status": "healthy", "uptime": time.time() - start_time}), 200
188
+
189
+ def run_flask():
190
+ app.run(host='0.0.0.0', port=8080)
191
+
192
+ def main():
193
+ global start_time
194
+ start_time = time.time()
195
+
196
+ logger.info("===== VPS monitoring script is starting =====")
197
+
198
+ Thread(target=run_flask).start()
199
+ logger.info("Flask server started in background")
200
+
201
+ check_all_vps()
202
+ schedule.every(15).minutes.do(check_all_vps)
203
+ logger.info("Scheduled VPS check every 15 minutes")
204
+
205
+ logger.info("===== VPS monitoring script is running =====")
206
+
207
+ heartbeat_count = 0
208
+ while True:
209
+ schedule.run_pending()
210
+ time.sleep(60)
211
+ heartbeat_count += 1
212
+ if heartbeat_count % 5 == 0:
213
+ logger.info(f"Heartbeat: Script is still running. Uptime: {heartbeat_count} minutes")
214
+
215
+ if __name__ == "__main__":
216
+ main()