Spaces:
Running
Running
| """ | |
| One-time script to convert IndoBERT sentiment model to ONNX format (quantized). | |
| Run this once: python convert_model.py | |
| After conversion, the 'model/onnx/' folder will contain the quantized ONNX model | |
| that can be loaded directly in the browser via ONNX Runtime Web. | |
| """ | |
| import os | |
| import json | |
| import torch | |
| import shutil | |
| from pathlib import Path | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig | |
| MODEL_ID = "mdhugol/indonesia-bert-sentiment-classification" | |
| OUTPUT_DIR = Path("./model") | |
| ONNX_DIR = OUTPUT_DIR / "onnx" | |
| # Clean up previous conversion | |
| if ONNX_DIR.exists(): | |
| shutil.rmtree(ONNX_DIR) | |
| ONNX_DIR.mkdir(parents=True, exist_ok=True) | |
| print(f"[1/5] Loading model: {MODEL_ID}") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID) | |
| config = AutoConfig.from_pretrained(MODEL_ID) | |
| model.eval() | |
| print("[2/5] Creating dummy input for ONNX export...") | |
| dummy_text = "Ini adalah contoh kalimat untuk testing" | |
| inputs = tokenizer(dummy_text, return_tensors="pt", padding="max_length", max_length=128, truncation=True) | |
| print("[3/5] Exporting to ONNX (with embedded weights)...") | |
| raw_onnx_path = str(ONNX_DIR / "model_raw.onnx") | |
| # Use opset 14, disable external data to embed weights in the ONNX file | |
| with torch.no_grad(): | |
| torch.onnx.export( | |
| model, | |
| (inputs["input_ids"], inputs["attention_mask"], inputs["token_type_ids"]), | |
| raw_onnx_path, | |
| input_names=["input_ids", "attention_mask", "token_type_ids"], | |
| output_names=["logits"], | |
| dynamic_axes={ | |
| "input_ids": {0: "batch_size", 1: "sequence"}, | |
| "attention_mask": {0: "batch_size", 1: "sequence"}, | |
| "token_type_ids": {0: "batch_size", 1: "sequence"}, | |
| "logits": {0: "batch_size"}, | |
| }, | |
| opset_version=14, | |
| do_constant_folding=True, | |
| ) | |
| raw_size = os.path.getsize(raw_onnx_path) | |
| print(f" Raw ONNX size: {raw_size/1024/1024:.1f} MB") | |
| print("[4/5] Quantizing to int8 (dynamic quantization)...") | |
| from onnxruntime.quantization import quantize_dynamic, QuantType | |
| quant_onnx_path = str(ONNX_DIR / "model_quantized.onnx") | |
| quantize_dynamic( | |
| raw_onnx_path, | |
| quant_onnx_path, | |
| weight_type=QuantType.QUInt8, | |
| ) | |
| quant_size = os.path.getsize(quant_onnx_path) | |
| print(f" Quantized ONNX size: {quant_size/1024/1024:.1f} MB") | |
| print(f" Compression ratio: {raw_size/quant_size:.1f}x") | |
| # Remove the raw model, keep quantized | |
| os.remove(raw_onnx_path) | |
| # Rename quantized to model.onnx | |
| final_path = str(ONNX_DIR / "model.onnx") | |
| os.rename(quant_onnx_path, final_path) | |
| # Remove external data files if they exist | |
| for f in ONNX_DIR.glob("*.data"): | |
| os.remove(f) | |
| print("[5/5] Saving tokenizer and config files...") | |
| # Save tokenizer files | |
| tokenizer.save_pretrained(str(OUTPUT_DIR)) | |
| # Update config with label mapping | |
| config_data = config.to_dict() | |
| config_data["id2label"] = {"0": "Positif", "1": "Netral", "2": "Negatif"} | |
| config_data["label2id"] = {"Positif": 0, "Netral": 1, "Negatif": 2} | |
| with open(str(OUTPUT_DIR / "config.json"), "w") as f: | |
| json.dump(config_data, f, indent=2) | |
| print(f"\nDone! Model saved to '{OUTPUT_DIR}/'") | |
| print("\nFiles created:") | |
| for root, dirs, files in os.walk(str(OUTPUT_DIR)): | |
| for f in sorted(files): | |
| path = os.path.join(root, f) | |
| size = os.path.getsize(path) | |
| print(f" {os.path.relpath(path, str(OUTPUT_DIR))} ({size/1024/1024:.1f} MB)" if size > 1024*1024 else f" {os.path.relpath(path, str(OUTPUT_DIR))} ({size/1024:.1f} KB)") | |
| print("\nYou can now run the website with just: npx serve . -p 7860") | |