Spaces:

aelitta
/

BioMistral_gradio

Runtime error

Upload folder using huggingface_hub

4bdb245 verified 6 months ago

937 Bytes

	import llama_cpp
	import llama_cpp.llama_tokenizer


	llama = llama_cpp.Llama.from_pretrained(
	repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
	filename="*q8_0.gguf",
	tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
	verbose=False
	)

	response = llama.create_chat_completion(
	messages=[
	{
	"role": "user",
	"content": "What is the capital of France?"
	}
	],
	response_format={
	"type": "json_object",
	"schema": {
	"type": "object",
	"properties": {
	"country": {"type": "string"},
	"capital": {"type": "string"}
	},
	"required": ["country", "capital"],
	}
	},
	stream=True
	)

	for chunk in response:
	delta = chunk["choices"][0]["delta"]
	if "content" not in delta:
	continue
	print(delta["content"], end="", flush=True)

	print()