| import requests |
| import json |
| import time |
| import sys |
|
|
| BASE_URL = "http://localhost:8000/v1" |
| MODEL_NAME = "RWKV-GLM-4.7-Flash-Preview-v0.1" |
|
|
| |
| |
| |
| def print_section(title): |
| print("\n" + "=" * 60) |
| print(title) |
| print("=" * 60) |
|
|
|
|
| def safe_json(resp): |
| try: |
| return resp.json() |
| except: |
| print("❌ JSON decode failed") |
| print(resp.text) |
| sys.exit(1) |
|
|
|
|
| |
| |
| |
| def test_models(): |
| print_section("TEST: /v1/models") |
|
|
| resp = requests.get(f"{BASE_URL}/models") |
| assert resp.status_code == 200, "Models API failed" |
|
|
| data = safe_json(resp) |
|
|
| assert "data" in data, "No model list returned" |
| assert len(data["data"]) > 0, "Empty model list" |
|
|
| print("✅ Models endpoint OK") |
| print("Available models:", [m["id"] for m in data["data"]]) |
|
|
|
|
| |
| |
| |
| def test_basic_completion(): |
| print_section("TEST: Basic Non-Streaming Completion") |
|
|
| payload = { |
| "model": MODEL_NAME, |
| "messages": [{"role": "user", "content": "Say hello."}], |
| "max_tokens": 30, |
| "stream": False |
| } |
|
|
| resp = requests.post( |
| f"{BASE_URL}/chat/completions", |
| headers={"Content-Type": "application/json"}, |
| data=json.dumps(payload) |
| ) |
|
|
| assert resp.status_code == 200, "Completion failed" |
|
|
| data = safe_json(resp) |
|
|
| assert "choices" in data, "No choices returned" |
| assert "usage" in data, "No usage returned" |
|
|
| print("Assistant:", data["choices"][0]["message"]["content"]) |
| print("Usage:", data["usage"]) |
| print("✅ Basic completion OK") |
|
|
|
|
| |
| |
| |
| def test_streaming(): |
| print_section("TEST: Streaming Completion") |
|
|
| payload = { |
| "model": MODEL_NAME, |
| "messages": [{"role": "user", "content": "Count from 1 to 5."}], |
| "max_tokens": 50, |
| "stream": True |
| } |
|
|
| full_text = "" |
|
|
| with requests.post( |
| f"{BASE_URL}/chat/completions", |
| headers={"Content-Type": "application/json"}, |
| data=json.dumps(payload), |
| stream=True |
| ) as resp: |
|
|
| assert resp.status_code == 200, "Streaming failed" |
|
|
| for line in resp.iter_lines(): |
| if line: |
| decoded = line.decode("utf-8") |
|
|
| if decoded.startswith("data: "): |
| content = decoded[len("data: "):] |
|
|
| if content == "[DONE]": |
| break |
|
|
| chunk = json.loads(content) |
| delta = chunk["choices"][0]["delta"] |
|
|
| if "content" in delta: |
| print(delta["content"], end="", flush=True) |
| full_text += delta["content"] |
|
|
| print("\n\n✅ Streaming OK") |
| assert len(full_text) > 0, "Streaming returned empty" |
|
|
|
|
| |
| |
| |
| def test_sampling_variations(): |
| print_section("TEST: Sampling Variations") |
|
|
| base_payload = { |
| "model": MODEL_NAME, |
| "messages": [{"role": "user", "content": "Write a creative sentence about AI."}], |
| "max_tokens": 50, |
| "stream": False |
| } |
|
|
| configs = [ |
| {"temperature": 0.0}, |
| {"temperature": 0.7}, |
| {"top_p": 0.8}, |
| {"top_k": 20}, |
| {"repetition_penalty": 1.2}, |
| {"presence_penalty": 0.5}, |
| {"frequency_penalty": 0.5} |
| ] |
|
|
| for cfg in configs: |
| payload = base_payload.copy() |
| payload.update(cfg) |
|
|
| resp = requests.post( |
| f"{BASE_URL}/chat/completions", |
| headers={"Content-Type": "application/json"}, |
| data=json.dumps(payload) |
| ) |
|
|
| assert resp.status_code == 200, f"Sampling failed: {cfg}" |
|
|
| data = safe_json(resp) |
|
|
| text = data["choices"][0]["message"]["content"] |
|
|
| print(f"\nConfig: {cfg}") |
| print("Output:", text[:120], "...") |
|
|
| print("\n✅ Sampling parameter variations OK") |
|
|
|
|
| |
| |
| |
| def test_deterministic(): |
| print_section("TEST: Deterministic Mode (temperature=0)") |
|
|
| payload = { |
| "model": MODEL_NAME, |
| "messages": [{"role": "user", "content": "Define gravity in one sentence."}], |
| "temperature": 0.0, |
| "max_tokens": 50, |
| "stream": False |
| } |
|
|
| resp1 = requests.post(f"{BASE_URL}/chat/completions", |
| headers={"Content-Type": "application/json"}, |
| data=json.dumps(payload)) |
| resp2 = requests.post(f"{BASE_URL}/chat/completions", |
| headers={"Content-Type": "application/json"}, |
| data=json.dumps(payload)) |
|
|
| out1 = safe_json(resp1)["choices"][0]["message"]["content"] |
| out2 = safe_json(resp2)["choices"][0]["message"]["content"] |
|
|
| print("Run1:", out1) |
| print("Run2:", out2) |
|
|
| assert out1 == out2, "❌ Deterministic mode not deterministic" |
| print("✅ Deterministic check OK") |
|
|
|
|
| |
| |
| |
| def test_error_handling(): |
| print_section("TEST: Error Handling") |
|
|
| payload = { |
| "model": MODEL_NAME, |
| |
| } |
|
|
| resp = requests.post( |
| f"{BASE_URL}/chat/completions", |
| headers={"Content-Type": "application/json"}, |
| data=json.dumps(payload) |
| ) |
|
|
| if resp.status_code != 200: |
| print("✅ Server correctly handled bad request") |
| else: |
| print("⚠️ Warning: server did not reject bad request") |
|
|
|
|
| |
| |
| |
| if __name__ == "__main__": |
| start = time.time() |
|
|
| test_models() |
| test_basic_completion() |
| test_streaming() |
| test_sampling_variations() |
| test_deterministic() |
| test_error_handling() |
|
|
| print_section("ALL TESTS PASSED") |
| print(f"Total time: {round(time.time() - start, 2)} sec") |