import os
HF_TOKEN = os.getenv('HF_TOKEN')
print("Token loaded")

import transformers
import torch

# Set the device to CPU
device = torch.device('cpu')

model_id = "meta-llama/Meta-Llama-3-8B-Instruct/tree/main"

pipeline = transformers.pipeline(
  "text-generation",
  model="meta-llama/Meta-Llama-3-8B-Instruct",
  model_kwargs={"torch_dtype": torch.bfloat16},
  device="cuda",
)

print("llama download successfully")