YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
compile:
sequence_length=256
auto_cast_type="bf16"
batch_size=1
num_cores=2
hf_repo="yahavb/DeepSeek-R1-Distill-Llama-8B-Neuron"
model_dir="/deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
from huggingface_hub.hf_api import HfFolder
from huggingface_hub import login
from optimum.neuron import NeuronModelForCausalLM
login(hf_token,add_to_git_credential=True)
compiler_args = {"num_cores": num_cores, "auto_cast_type": auto_cast_type}
input_shapes = {"batch_size": batch_size, "sequence_length": sequence_length}
model = NeuronModelForCausalLM.from_pretrained(
model_id,
export=True,
**compiler_args,
**input_shapes)
model.save_pretrained(model_dir)
model.push_to_hub(model_dir,repository_id=hf_repo)
serve:
model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
hf_repo="yahavb/DeepSeek-R1-Distill-Llama-8B-Neuron"
max_new_tokens=512
import torch
from optimum.neuron import NeuronModelForCausalLM
from transformers import AutoTokenizer
prompt="Who are you? what is the model that powers you?"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token_id = tokenizer.eos_token_id if tokenizer.pad_token_id is None else tokenizer.pad_token_id
inputs = tokenizer(prompt, return_tensors="pt")
model = NeuronModelForCausalLM.from_pretrained(hf_repo)
for i in range(10):
outputs = model.generate(**inputs,max_new_tokens=max_new_tokens,do_sample=True,use_cache=True,temperature=0.7,top_k=50,top_p=0.9)
outputs=outputs[0, inputs.input_ids.size(-1):]
response=tokenizer.decode(outputs, skip_special_tokens=True)
print(response)
- Downloads last month
- 80
Inference Providers
NEW
This model is not currently available via any of the supported third-party Inference Providers, and
HF Inference API was unable to determine this model's library.