Spaces:
Runtime error
Runtime error
import llama_cpp | |
import llama_cpp.llama_tokenizer | |
llama = llama_cpp.Llama.from_pretrained( | |
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF", | |
filename="*q8_0.gguf", | |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"), | |
verbose=False | |
) | |
response = llama.create_chat_completion( | |
messages=[ | |
{ | |
"role": "user", | |
"content": "What is the capital of France?" | |
} | |
], | |
response_format={ | |
"type": "json_object", | |
"schema": { | |
"type": "object", | |
"properties": { | |
"country": {"type": "string"}, | |
"capital": {"type": "string"} | |
}, | |
"required": ["country", "capital"], | |
} | |
}, | |
stream=True | |
) | |
for chunk in response: | |
delta = chunk["choices"][0]["delta"] | |
if "content" not in delta: | |
continue | |
print(delta["content"], end="", flush=True) | |
print() |