test_client_api.py · OpenMOSE/RWKV-GLM-4.7-Flash-exp at main

RWKV-GLM-4.7-Flash-exp / test_client_api.py

Upload folder using huggingface_hub

411272a 10 days ago

6.68 kB

	import requests
	import json
	import time
	import sys

	BASE_URL = "http://localhost:8000/v1"
	MODEL_NAME = "RWKV-GLM-4.7-Flash-Preview-v0.1"

	# ==========================================================
	# Utility
	# ==========================================================
	def print_section(title):
	print("\n" + "=" * 60)
	print(title)
	print("=" * 60)


	def safe_json(resp):
	try:
	return resp.json()
	except:
	print("❌ JSON decode failed")
	print(resp.text)
	sys.exit(1)


	# ==========================================================
	# 1️⃣ Models API
	# ==========================================================
	def test_models():
	print_section("TEST: /v1/models")

	resp = requests.get(f"{BASE_URL}/models")
	assert resp.status_code == 200, "Models API failed"

	data = safe_json(resp)

	assert "data" in data, "No model list returned"
	assert len(data["data"]) > 0, "Empty model list"

	print("✅ Models endpoint OK")
	print("Available models:", [m["id"] for m in data["data"]])


	# ==========================================================
	# 2️⃣ Non-stream basic
	# ==========================================================
	def test_basic_completion():
	print_section("TEST: Basic Non-Streaming Completion")

	payload = {
	"model": MODEL_NAME,
	"messages": [{"role": "user", "content": "Say hello."}],
	"max_tokens": 30,
	"stream": False
	}

	resp = requests.post(
	f"{BASE_URL}/chat/completions",
	headers={"Content-Type": "application/json"},
	data=json.dumps(payload)
	)

	assert resp.status_code == 200, "Completion failed"

	data = safe_json(resp)

	assert "choices" in data, "No choices returned"
	assert "usage" in data, "No usage returned"

	print("Assistant:", data["choices"][0]["message"]["content"])
	print("Usage:", data["usage"])
	print("✅ Basic completion OK")


	# ==========================================================
	# 3️⃣ Streaming
	# ==========================================================
	def test_streaming():
	print_section("TEST: Streaming Completion")

	payload = {
	"model": MODEL_NAME,
	"messages": [{"role": "user", "content": "Count from 1 to 5."}],
	"max_tokens": 50,
	"stream": True
	}

	full_text = ""

	with requests.post(
	f"{BASE_URL}/chat/completions",
	headers={"Content-Type": "application/json"},
	data=json.dumps(payload),
	stream=True
	) as resp:

	assert resp.status_code == 200, "Streaming failed"

	for line in resp.iter_lines():
	if line:
	decoded = line.decode("utf-8")

	if decoded.startswith("data: "):
	content = decoded[len("data: "):]

	if content == "[DONE]":
	break

	chunk = json.loads(content)
	delta = chunk["choices"][0]["delta"]

	if "content" in delta:
	print(delta["content"], end="", flush=True)
	full_text += delta["content"]

	print("\n\n✅ Streaming OK")
	assert len(full_text) > 0, "Streaming returned empty"


	# ==========================================================
	# 4️⃣ Sampling Variations
	# ==========================================================
	def test_sampling_variations():
	print_section("TEST: Sampling Variations")

	base_payload = {
	"model": MODEL_NAME,
	"messages": [{"role": "user", "content": "Write a creative sentence about AI."}],
	"max_tokens": 50,
	"stream": False
	}

	configs = [
	{"temperature": 0.0},
	{"temperature": 0.7},
	{"top_p": 0.8},
	{"top_k": 20},
	{"repetition_penalty": 1.2},
	{"presence_penalty": 0.5},
	{"frequency_penalty": 0.5}
	]

	for cfg in configs:
	payload = base_payload.copy()
	payload.update(cfg)

	resp = requests.post(
	f"{BASE_URL}/chat/completions",
	headers={"Content-Type": "application/json"},
	data=json.dumps(payload)
	)

	assert resp.status_code == 200, f"Sampling failed: {cfg}"

	data = safe_json(resp)

	text = data["choices"][0]["message"]["content"]

	print(f"\nConfig: {cfg}")
	print("Output:", text[:120], "...")

	print("\n✅ Sampling parameter variations OK")


	# ==========================================================
	# 5️⃣ Deterministic Check (temperature=0)
	# ==========================================================
	def test_deterministic():
	print_section("TEST: Deterministic Mode (temperature=0)")

	payload = {
	"model": MODEL_NAME,
	"messages": [{"role": "user", "content": "Define gravity in one sentence."}],
	"temperature": 0.0,
	"max_tokens": 50,
	"stream": False
	}

	resp1 = requests.post(f"{BASE_URL}/chat/completions",
	headers={"Content-Type": "application/json"},
	data=json.dumps(payload))
	resp2 = requests.post(f"{BASE_URL}/chat/completions",
	headers={"Content-Type": "application/json"},
	data=json.dumps(payload))

	out1 = safe_json(resp1)["choices"][0]["message"]["content"]
	out2 = safe_json(resp2)["choices"][0]["message"]["content"]

	print("Run1:", out1)
	print("Run2:", out2)

	assert out1 == out2, "❌ Deterministic mode not deterministic"
	print("✅ Deterministic check OK")


	# ==========================================================
	# 6️⃣ Error Handling
	# ==========================================================
	def test_error_handling():
	print_section("TEST: Error Handling")

	payload = {
	"model": MODEL_NAME,
	# missing messages intentionally
	}

	resp = requests.post(
	f"{BASE_URL}/chat/completions",
	headers={"Content-Type": "application/json"},
	data=json.dumps(payload)
	)

	if resp.status_code != 200:
	print("✅ Server correctly handled bad request")
	else:
	print("⚠️ Warning: server did not reject bad request")


	# ==========================================================
	# Main
	# ==========================================================
	if __name__ == "__main__":
	start = time.time()

	test_models()
	test_basic_completion()
	test_streaming()
	test_sampling_variations()
	test_deterministic()
	test_error_handling()

	print_section("ALL TESTS PASSED")
	print(f"Total time: {round(time.time() - start, 2)} sec")