import os HF_TOKEN = os.getenv('HF_TOKEN') print("Token loaded") import transformers import torch # Set the device to CPU device = torch.device('cpu') model_id = "meta-llama/Meta-Llama-3-8B-Instruct/tree/main" pipeline = transformers.pipeline( "text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", model_kwargs={"torch_dtype": torch.bfloat16}, device="cuda", ) print("llama download successfully")