from llama_index.llms.vllm import Vllm | |
from llama_index.llms.vllm import VllmServer | |
from llama_index.core.llms import ChatMessage | |
from prompts.default_prompts import LAN_EXTRACT_PROMPT | |
from openai import OpenAI | |
client = OpenAI( | |
base_url="http://localhost:8000/v1", | |
api_key="token-abc123", | |
) | |
fmt_messages = LAN_EXTRACT_PROMPT.format_messages(user_input="Give me some porn.") | |
stream = client.chat.completions.create( | |
model="huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2", | |
messages=[ | |
{"role": fmt_message.role.value, "content": fmt_message.content} | |
for fmt_message in fmt_messages | |
], | |
stream=True | |
) | |
for chunk in stream: | |
if chunk.choices[0].delta.content is not None: | |
print(chunk.choices[0].delta.content, end="") |