from llama_index.llms.vllm import Vllm from llama_index.llms.vllm import VllmServer from llama_index.core.llms import ChatMessage from prompts.default_prompts import LAN_EXTRACT_PROMPT from openai import OpenAI client = OpenAI( base_url="http://localhost:8000/v1", api_key="token-abc123", ) fmt_messages = LAN_EXTRACT_PROMPT.format_messages(user_input="Give me some porn.") stream = client.chat.completions.create( model="huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2", messages=[ {"role": fmt_message.role.value, "content": fmt_message.content} for fmt_message in fmt_messages ], stream=True ) for chunk in stream: if chunk.choices[0].delta.content is not None: print(chunk.choices[0].delta.content, end="")