DeepMount00/Qwen2-1.5B-Ita-GGUF

How to Use

How to utilize my Mistral for Italian text generation

from huggingface_hub import hf_hub_download
from llama_cpp import Llama

model_path = hf_hub_download(
    repo_id="DeepMount00/Qwen2-1.5B-Ita-GGUF",
    filename="qwen2-1.5b-instruct-q8_0.gguf"
)

llm = Llama(
  model_path=model_path,
  n_ctx=2048,
  n_threads=8,
  n_gpu_layers=0
)

def apply_template(text):
  prompt = "<|im_start|>user\n" + text + "\n<|im_end|>\n<|im_start|>assistant\n"
  return prompt


prompt = "Quanto fa 2+2?"

output = llm(
  apply_template(prompt),
  max_tokens=512,
  stop=["<|im_end|>"],
  echo=True,
  temperature=0.1,
  top_p=0.95,
)

print(output['choices'][0]['text'])