Dinesh310's picture
Create optimization/benchmark.py
bb994e1 verified
import ctranslate2
import transformers
import time
import os
def convert_model():
print("Converting model to CTranslate2...")
# System command for conversion
os.system("ct2-transformers-converter --model ./final_model --output_dir ./ct2_model --quantization int8")
def run_benchmark():
device = "cpu"
translator = ctranslate2.Translator("./ct2_model", device=device)
tokenizer = transformers.AutoTokenizer.from_pretrained("./final_model")
test_inputs = ["<hin> shanti", "<ben> namaskar", "<tam> vanakkam"]
tokens = [tokenizer.convert_ids_to_tokens(tokenizer.encode(t)) for t in test_inputs]
# Benchmark CTranslate2
start = time.time()
for _ in range(10): # Average over 10 runs
results = translator.translate_batch(tokens)
ct2_time = (time.time() - start) / 30 # 10 runs * 3 inputs
print(f"CTranslate2 Latency: {ct2_time:.4f}s per word")
# Size comparison
orig_size = sum(os.path.getsize(os.path.join("./final_model", f)) for f in os.listdir("./final_model") if os.path.isfile(os.path.join("./final_model", f)))
opt_size = sum(os.path.getsize(os.path.join("./ct2_model", f)) for f in os.listdir("./ct2_model") if os.path.isfile(os.path.join("./ct2_model", f)))
print(f"Compression Ratio: {orig_size / opt_size:.2f}x")
if __name__ == "__main__":
convert_model()
run_benchmark()