FasterDFlash
/

Hanrui

Model card Files Files and versions

Hanrui / sglang /examples /runtime /multimodal /pixtral_server.py

Lekr0's picture

Add files using upload-large-folder tool

61ba51e verified 29 days ago

history blame contribute delete

3.91 kB

	"""
	Usage:
	# Run a Pixtral model with SGLang:
	# HuggingFace:
	python -m sglang.launch_server --model-path mistral-community/pixtral-12b --port=30000
	# ModelScope:
	python -m sglang.launch_server --model-path AI-ModelScope/pixtral-12b --port=30000

	# Then test it with:
	python pixtral_server.py

	This script tests Pixtral model with both single and multiple images.
	"""

	import argparse
	import asyncio
	import json

	import aiohttp
	import requests

	from sglang.utils import normalize_base_url

	IMAGE_TOKEN_SEP = "\n[IMG]"
	ROUTE = "/generate"


	async def send_request(url, data, delay=0):
	await asyncio.sleep(delay)
	async with aiohttp.ClientSession() as session:
	async with session.post(url, json=data) as resp:
	output = await resp.json()
	return output


	async def test_concurrent(args):
	url = f"{normalize_base_url(args.host, args.port)}{ROUTE}"

	# Single image test
	if args.single_image:
	prompt = f"<s>[INST]Describe this image in detail.{IMAGE_TOKEN_SEP}[/INST]"
	image_url = "https://picsum.photos/id/237/400/300"
	modality = ["image"]
	# Multiple images test
	else:
	image_urls = [
	"https://picsum.photos/id/237/400/300",
	"https://picsum.photos/id/27/500/500",
	]
	prompt = f"<s>[INST]How many photos are there? Describe each in a very short sentence.{IMAGE_TOKEN_SEP * len(image_urls)}[/INST]"
	image_url = image_urls
	modality = ["multi-images"]

	response = await send_request(
	url,
	{
	"text": prompt,
	"image_data": image_url,
	"sampling_params": {
	"max_new_tokens": 100,
	"temperature": 0.7,
	"top_p": 0.9,
	},
	"modalities": modality,
	},
	)

	print(f"Response: {response}")
	if "text" in response:
	print("\nOutput text:", response["text"])


	def test_streaming(args):
	url = f"{normalize_base_url(args.host, args.port)}/generate"

	# Single image test
	if args.single_image:
	prompt = f"<s>[INST]Describe this image in detail.{IMAGE_TOKEN_SEP}[/INST]"
	image_data = "https://picsum.photos/id/237/400/300"
	modality = ["image"]
	# Multiple images test
	else:
	image_urls = [
	"https://picsum.photos/id/237/400/300",
	"https://picsum.photos/id/27/500/500",
	]
	prompt = f"<s>[INST]How many photos are there? Describe each in a very short sentence.{IMAGE_TOKEN_SEP * len(image_urls)}[/INST]"
	image_data = image_urls
	modality = ["multi-images"]

	pload = {
	"text": prompt,
	"image_data": image_data,
	"sampling_params": {"max_new_tokens": 100, "temperature": 0.7, "top_p": 0.9},
	"modalities": modality,
	"stream": True,
	}

	response = requests.post(url, json=pload, stream=True)

	print("Streaming response:")
	prev = 0
	for chunk in response.iter_lines(decode_unicode=False):
	chunk = chunk.decode("utf-8")
	if chunk and chunk.startswith("data:"):
	if chunk == "data: [DONE]":
	break
	data = json.loads(chunk[5:].strip("\n"))
	output = data["text"].strip()
	print(output[prev:], end="", flush=True)
	prev = len(output)
	print("\n")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--host", type=str, default="127.0.0.1")
	parser.add_argument("--port", type=int, default=30000)
	parser.add_argument(
	"--single-image",
	action="store_true",
	help="Test with single image instead of multiple images",
	)
	parser.add_argument("--no-stream", action="store_true", help="Don't test streaming")
	args = parser.parse_args()

	asyncio.run(test_concurrent(args))
	if not args.no_stream:
	test_streaming(args)