Hanrui / sglang /test /registered /spec /test_constrained_decoding_spec_reasoning.py
Lekr0's picture
Add files using upload-large-folder tool
a402b9b verified
import json
import unittest
import openai
from sglang.srt.environ import envs
from sglang.srt.utils import kill_process_tree
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
# Constrained decoding with EAGLE3 speculative reasoning (tp=2)
register_cuda_ci(est_time=60, suite="stage-b-test-large-2-gpu")
class ServerWithGrammar(CustomTestCase):
json_schema = json.dumps(
{
"type": "object",
"properties": {
"name": {"type": "string", "pattern": "^[\\w]+$"},
"population": {"type": "integer"},
"languages": {
"type": "array",
"items": {"type": "string"},
"minItems": 1,
},
"has_held_olympics": {"type": "boolean"},
},
"required": ["name", "population", "languages", "has_held_olympics"],
"additionalProperties": False,
}
)
@classmethod
def setUpClass(cls):
cls.model = "openai/gpt-oss-120b"
cls.base_url = DEFAULT_URL_FOR_TEST
launch_args = [
"--trust-remote-code",
"--tp=2",
"--reasoning-parser=gpt-oss",
"--speculative-algorithm=EAGLE3",
"--speculative-draft-model-path=lmsys/EAGLE3-gpt-oss-120b-bf16",
"--speculative-num-steps=5",
"--speculative-eagle-topk=4",
"--speculative-num-draft-tokens=8",
]
with envs.SGLANG_SPEC_NAN_DETECTION.override(
True
), envs.SGLANG_SPEC_OOB_DETECTION.override(True):
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=launch_args,
)
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_json_openai(self):
client = openai.Client(api_key="EMPTY", base_url=f"{self.base_url}/v1")
response = client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "You are a helpful AI assistant"},
{
"role": "user",
"content": "Introduce the capital of France. Return in a JSON format. "
"The JSON Schema is: " + json.dumps(self.json_schema),
},
],
temperature=0,
max_tokens=1024,
response_format={
"type": "json_schema",
"json_schema": {"name": "foo", "schema": json.loads(self.json_schema)},
},
)
text = response.choices[0].message.content
print("\n=== Reasoning Content ===")
reasoning_content = response.choices[0].message.reasoning_content
assert reasoning_content is not None and len(reasoning_content) > 0
print(reasoning_content)
try:
js_obj = json.loads(text)
print("\n=== Parsed JSON Content ===")
print(json.dumps(js_obj))
except (TypeError, json.decoder.JSONDecodeError):
print("JSONDecodeError", text)
raise
self.assertIsInstance(js_obj["name"], str)
self.assertIsInstance(js_obj["population"], int)
if __name__ == "__main__":
unittest.main()