from transformers import AutoModelForCausalLM import torch import torch.utils.dlpack # Load the original model model_name = "./mixed_llm" model = AutoModelForCausalLM.from_pretrained(model_name) # Convert the model to a different precision model = model.half() # Save the model as a safetensor model.save_pretrained(f"./mixed_llm_half", safetensors=True)