| """ |
| Usage: |
| # Run a Pixtral model with SGLang: |
| # HuggingFace: |
| python -m sglang.launch_server --model-path mistral-community/pixtral-12b --port=30000 |
| # ModelScope: |
| python -m sglang.launch_server --model-path AI-ModelScope/pixtral-12b --port=30000 |
| |
| # Then test it with: |
| python pixtral_server.py |
| |
| This script tests Pixtral model with both single and multiple images. |
| """ |
|
|
| import argparse |
| import asyncio |
| import json |
|
|
| import aiohttp |
| import requests |
|
|
| from sglang.utils import normalize_base_url |
|
|
| IMAGE_TOKEN_SEP = "\n[IMG]" |
| ROUTE = "/generate" |
|
|
|
|
| async def send_request(url, data, delay=0): |
| await asyncio.sleep(delay) |
| async with aiohttp.ClientSession() as session: |
| async with session.post(url, json=data) as resp: |
| output = await resp.json() |
| return output |
|
|
|
|
| async def test_concurrent(args): |
| url = f"{normalize_base_url(args.host, args.port)}{ROUTE}" |
|
|
| |
| if args.single_image: |
| prompt = f"<s>[INST]Describe this image in detail.{IMAGE_TOKEN_SEP}[/INST]" |
| image_url = "https://picsum.photos/id/237/400/300" |
| modality = ["image"] |
| |
| else: |
| image_urls = [ |
| "https://picsum.photos/id/237/400/300", |
| "https://picsum.photos/id/27/500/500", |
| ] |
| prompt = f"<s>[INST]How many photos are there? Describe each in a very short sentence.{IMAGE_TOKEN_SEP * len(image_urls)}[/INST]" |
| image_url = image_urls |
| modality = ["multi-images"] |
|
|
| response = await send_request( |
| url, |
| { |
| "text": prompt, |
| "image_data": image_url, |
| "sampling_params": { |
| "max_new_tokens": 100, |
| "temperature": 0.7, |
| "top_p": 0.9, |
| }, |
| "modalities": modality, |
| }, |
| ) |
|
|
| print(f"Response: {response}") |
| if "text" in response: |
| print("\nOutput text:", response["text"]) |
|
|
|
|
| def test_streaming(args): |
| url = f"{normalize_base_url(args.host, args.port)}/generate" |
|
|
| |
| if args.single_image: |
| prompt = f"<s>[INST]Describe this image in detail.{IMAGE_TOKEN_SEP}[/INST]" |
| image_data = "https://picsum.photos/id/237/400/300" |
| modality = ["image"] |
| |
| else: |
| image_urls = [ |
| "https://picsum.photos/id/237/400/300", |
| "https://picsum.photos/id/27/500/500", |
| ] |
| prompt = f"<s>[INST]How many photos are there? Describe each in a very short sentence.{IMAGE_TOKEN_SEP * len(image_urls)}[/INST]" |
| image_data = image_urls |
| modality = ["multi-images"] |
|
|
| pload = { |
| "text": prompt, |
| "image_data": image_data, |
| "sampling_params": {"max_new_tokens": 100, "temperature": 0.7, "top_p": 0.9}, |
| "modalities": modality, |
| "stream": True, |
| } |
|
|
| response = requests.post(url, json=pload, stream=True) |
|
|
| print("Streaming response:") |
| prev = 0 |
| for chunk in response.iter_lines(decode_unicode=False): |
| chunk = chunk.decode("utf-8") |
| if chunk and chunk.startswith("data:"): |
| if chunk == "data: [DONE]": |
| break |
| data = json.loads(chunk[5:].strip("\n")) |
| output = data["text"].strip() |
| print(output[prev:], end="", flush=True) |
| prev = len(output) |
| print("\n") |
|
|
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--host", type=str, default="127.0.0.1") |
| parser.add_argument("--port", type=int, default=30000) |
| parser.add_argument( |
| "--single-image", |
| action="store_true", |
| help="Test with single image instead of multiple images", |
| ) |
| parser.add_argument("--no-stream", action="store_true", help="Don't test streaming") |
| args = parser.parse_args() |
|
|
| asyncio.run(test_concurrent(args)) |
| if not args.no_stream: |
| test_streaming(args) |
|
|