shahidul034
/

readctrl

Model card Files Files and versions

readctrl / code /finetune-inference /old /inference.py

shahidul034's picture

Add files using upload-large-folder tool

9c6961c verified 28 days ago

history blame contribute delete

3.13 kB

	import argparse
	import os
	import json
	import sys
	sys.path.append(os.path.abspath('/home/mshahidul/'))
	from gpu_selection import _gpu_selection_
	# 1. Argparse for path
	parser = argparse.ArgumentParser(description="Translation Evaluation")
	parser.add_argument("--path", type=str, default="/home/mshahidul/readctrl/generating_data/tik_ache/es_syntheticV3.json", help="Path to the JSON file")
	parser.add_argument("--cuda", type=str, default="3", help="CUDA device id, e.g., '0' or '0,1' for multiple GPUs")
	args = parser.parse_args()

	if args.cuda is not None:
	os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
	print(f"🎮🎮 Using CUDA device: {args.cuda}")
	else:
	_gpu_selection_()

	# 2. Output directory and file
	out_dir = "/home/mshahidul/readctrl/results/"
	os.makedirs(os.path.dirname(out_dir), exist_ok=True)
	file_name = os.path.basename(args.path)
	out_path = os.path.join(out_dir, file_name)

	# 3. Load already evaluated results if exist
	results = []
	completed_keys = set()
	if os.path.exists(out_path):
	with open(out_path, "r", encoding="utf-8") as f:
	results = json.load(f)
	for r in results:
	completed_keys.add((r["article"], r["gold_summary"]))

	# 4. Load dataset
	with open(args.path, "r", encoding="utf-8") as f:
	dataset = json.load(f)
	from unsloth import FastLanguageModel
	import torch
	# 5. Load model
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name = "/home/mshahidul/readctrl/finetuned_models/es_synthetic_data_creation_Qwen3_14B_v1",
	max_seq_length = 4092,
	load_in_4bit = True,
	load_in_8bit = False,
	full_finetuning = False,
	)
	from prompt_generate import generate_prompt
	# 6. Evaluation loop
	import tqdm
	for item in tqdm.tqdm(dataset):
	key = (item["article"], item["gold_summary"])
	if key in completed_keys:
	continue

	for band in ["B1", "B2", "B3"]:
	prompt = generate_prompt(item['article'],item['gold_summary'],band,"es")

	messages = [{"role": "user", "content": prompt+"\n"}]
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	enable_thinking=False,
	)
	inputs = tokenizer(text, return_tensors="pt").to("cuda")
	output_ids = model.generate(
	**inputs,
	max_new_tokens=1000,
	temperature=0.1,
	top_p=0.8,
	top_k=5,
	)
	output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
	#answer = output_text.split("</think>")[1].strip()

	results.append({
	"article": item["article"],
	"gold_summary": item["gold_summary"],
	"band": band,
	"lang": "es",
	"synthetic_summary": output_text,
	})
	completed_keys.add(key)
	# Save every 30 results
	if len(results) % 30 == 0:
	with open(out_path, "w", encoding="utf-8") as f:
	json.dump(results, f, ensure_ascii=False, indent=2)

	# 7. Final save
	with open(out_path, "w", encoding="utf-8") as f:
	json.dump(results, f, ensure_ascii=False, indent=2)