|
|
|
""" |
|
Monitor the Mistral Nemo service startup and run tests when ready. |
|
""" |
|
import time |
|
import requests |
|
import json |
|
import sys |
|
|
|
def check_service_health(): |
|
"""Check if the service is healthy and ready.""" |
|
try: |
|
response = requests.get("http://localhost:8001/health", timeout=5) |
|
if response.status_code == 200: |
|
data = response.json() |
|
return data.get("status") == "healthy" |
|
except requests.exceptions.RequestException: |
|
pass |
|
return False |
|
|
|
def test_chat_completion(): |
|
"""Test the chat completion endpoint.""" |
|
try: |
|
response = requests.post( |
|
"http://localhost:8001/v1/chat/completions", |
|
headers={"Content-Type": "application/json"}, |
|
json={ |
|
"model": "unsloth/Mistral-Nemo-Instruct-2407", |
|
"messages": [ |
|
{"role": "user", "content": "Hello! Please say 'Service is working correctly' if you can read this."} |
|
], |
|
"max_tokens": 50, |
|
"temperature": 0.7 |
|
}, |
|
timeout=30 |
|
) |
|
|
|
if response.status_code == 200: |
|
data = response.json() |
|
content = data["choices"][0]["message"]["content"] |
|
print(f"β
Chat completion successful: {content}") |
|
return True |
|
else: |
|
print(f"β Chat completion failed: {response.status_code} - {response.text}") |
|
return False |
|
except requests.exceptions.RequestException as e: |
|
print(f"β Chat completion error: {e}") |
|
return False |
|
|
|
def monitor_service(): |
|
"""Monitor service startup and test when ready.""" |
|
print("π Monitoring Mistral Nemo service startup...") |
|
print("π₯ Waiting for model download and loading to complete...") |
|
|
|
check_count = 0 |
|
max_checks = 300 |
|
|
|
while check_count < max_checks: |
|
if check_service_health(): |
|
print("\nπ Service is healthy! Running tests...") |
|
|
|
|
|
if test_chat_completion(): |
|
print("\nβ
All tests passed! Mistral Nemo service is fully operational.") |
|
return True |
|
else: |
|
print("\nβ οΈ Service health check passed but chat completion failed.") |
|
return False |
|
|
|
check_count += 1 |
|
dots = "." * (check_count % 4) |
|
print(f"\rβ³ Waiting for service to be ready{dots:<3} ({check_count}/300)", end="") |
|
time.sleep(5) |
|
|
|
print(f"\nβ Service didn't become ready after {max_checks * 5} seconds") |
|
return False |
|
|
|
if __name__ == "__main__": |
|
success = monitor_service() |
|
sys.exit(0 if success else 1) |
|
|