import aiohttp import asyncio import time, random, json from llama_index.llms.ollama import Ollama async def fetch(session, url, payload): start_time = time.time() async with session.post(url=url, data=payload) as response: async for line in response.content: line = line.decode('utf-8').strip() if line.startswith('data:{"content"'): response_time = time.time() - start_time print(f"Received data in {response_time:.4f} seconds") break async def main(url, num_requests): async with aiohttp.ClientSession() as session: tasks = [] NAME_LIST = ["Lisa Ann", "Mia Malkova", "Ava Addams", "Savanna Bond", "Siri Dahl"] for _ in range(num_requests): name = random.choice(NAME_LIST) payload = { "query": f"Infos of {name}", "llmSessionId": "test", "isLlmContext": "1", "adultMode": "1" } payload = json.dumps(payload) tasks.append(fetch(session, url, payload)) await asyncio.gather(*tasks) if __name__ == '__main__': URL = 'http://0.0.0.0:8000/sql_chat/' # 替换为你的API端点 NUM_REQUESTS = 30 # 你想要的并发请求数量 asyncio.run(main(URL, NUM_REQUESTS))