workspace_Feb13 / eztest.py
Linksome's picture
Add files using upload-large-folder tool
7c31071 verified
import json
import requests
import time
API_URL = "http://localhost:8000/v1/chat/completions"
HEADERS = {
"Content-Type": "application/json",
"Authorization": "Bearer 0",
}
def run_test(prompt: str, max_tokens=500):
payload = {
"model": "custom-model",
"messages": [
{"role": "system", "content": "Answer the user question about Markie Voss."},
{"role": "user", "content": prompt},
],
"max_tokens": max_tokens,
"do_sample": True,
"temperature": 0.6,
"top_p": 0.8,
"eos_token_id": [
151645,
151643,
151668
],
"max_tokens": 1024,
"enable_thinking": True,
"stream": True,
}
print("=" * 80)
print("Prompt:", prompt)
print("Streaming response:\n")
with requests.post(
API_URL,
headers=HEADERS,
json=payload,
stream=True, # 🔴 stream HTTP response
timeout=60,
) as r:
print("HTTP status:", r.status_code)
r.raise_for_status()
full_text = ""
for line in r.iter_lines(decode_unicode=True):
if not line:
continue
# OpenAI-style streaming uses "data: {...}"
if line.startswith("data:"):
data = line[len("data:"):].strip()
if data == "[DONE]":
break
try:
chunk = json.loads(data)
except json.JSONDecodeError:
continue
delta = chunk["choices"][0]["delta"]
if "content" in delta:
token = delta["content"]
full_text += token
print(token, end="", flush=True)
print("\n\n--- END OF STREAM ---")
print("✅ Full content repr:", repr(full_text))
if __name__ == "__main__":
print("Warming up...")
time.sleep(1)
while True:
p = input("User: ")
run_test(p)