```python from sparseml.transformers import SparseAutoModelForCausalLM, SparseAutoTokenizer ORIG_MODEL_PATH = "neuralmagic/TinyLlama-1.1B-Chat-v1.0-pruned2.4" OUTPUT_PATH = "./compress_output" HF_MODEL_ID = "mgoin/TinyLlama-1.1B-Chat-v1.0-pruned2.4-compressed" # Compress and export the model model = SparseAutoModelForCausalLM.from_pretrained(ORIG_MODEL_PATH, device_map="auto", torch_dtype="auto") tokenizer = SparseAutoTokenizer.from_pretrained(ORIG_MODEL_PATH) model.save_pretrained(OUTPUT_PATH, save_compressed=True) tokenizer.save_pretrained(OUTPUT_PATH) # Upload the checkpoint to Hugging Face from huggingface_hub import HfApi HfApi().upload_folder( folder_path=OUTPUT_PATH, repo_id=HF_MODEL_ID, ) ```