readctrl / code /reasoning /reasoning_completeness_sourceCov.py

Add files using upload-large-folder tool

c7a6fe6 verified about 1 month ago

8.7 kB

	import os
	import json
	import tqdm
	from openai import OpenAI

	# --- CONFIGURATION ---
	MODEL_PATH = "Qwen/Qwen3-30B-A3B-Instruct-2507"
	API_URL = "http://172.16.34.29:8004/v1"
	API_KEY = "EMPTY"

	# Input Files
	EVAL_FILE = "/home/mshahidul/readctrl/data/reasoning/REFINED_full_details_evaluation_0_20_qwen3-32B_v2.json"
	RAW_DATA_FILE = "/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json"
	# Output File
	file_name=os.path.basename(EVAL_FILE)
	UPDATED_FILE = f"/home/mshahidul/readctrl/data/reasoning/reasoned_updated_results_v2_{file_name}"

	client = OpenAI(base_url=API_URL, api_key=API_KEY)

	# -----------------------------
	# REASONING CORE
	# -----------------------------
	def get_clinical_reasoning(source, gold, generated, subclaim, level):
	# Map your specific label info to the prompt context
	level_guidelines = {
	"low_health_literacy": """
	- Goal: 'Living room' language; replace jargon (e.g., 'renal' -> 'kidney').
	- Density: Focus ONLY on 'need-to-know' info from Gold Summary.
	- Strategy: One idea per sentence.
	- Reasonable Omission: Technical jargon or details NOT in the Gold Summary.
	""",
	"intermediate_health_literacy": """
	- Goal: Standard vocabulary; common medical terms are okay.
	- Density: Gold Summary as lead + necessary Source Text context.
	- Strategy: Remove minor technical details to avoid overload.
	- Reasonable Omission: Minor technical nuances or physiological mechanisms.
	""",
	"proficient_health_literacy": """
	- Goal: Technical/Academic language; prioritize clinical nuance.
	- Density: High; include data, mechanisms, and statistics from Full Source.
	- Strategy: Retain all original technical terminology.
	- Reasonable Omission: Almost none; should adhere closely to Full Source.
	"""
	}

	guideline = level_guidelines.get(level, "Follow standard medical summarization principles.")

	# prompt = f"""You are a clinical logic validator auditing medical text simplification.
	# A subclaim is currently 'not_supported' in the generated text.

	# ### Target Level Guidelines: {level}
	# {guideline}

	# ### Inputs:
	# 1. Source Text (Full Paper): {source}
	# 2. Gold Summary (Expert Reference): {gold}
	# 3. Generated Text (Model Output): {generated}
	# 4. Subclaim to Evaluate: {subclaim}

	# ### Task:
	# Determine if the absence of this subclaim in the Generated Text is justified based on the {level} strategy.

	# - CATEGORY 'reasonable': Omission aligns with the linguistic goals (e.g., removing jargon for Low literacy or filtering minor details for Intermediate).
	# - CATEGORY 'unreasonable': Omission results in clinical information loss that violates the target density (e.g., missing a diagnosis or omitting technical data for Proficient level).

	# Output ONLY JSON:
	# {{
	# "category": "reasonable" \| "unreasonable",
	# "reason": "jargon_reduction" \| "detail_filtering" \| "clinical_info_loss",
	# "explanation": "One sentence justification matching the {level} strategy."
	# }}
	# JSON:"""
	prompt = f"""You are a clinical logic validator auditing medical text simplification.

	A subclaim is currently labeled 'not_supported' in the generated text. Your job is to decide whether
	its omission is acceptable for the target literacy level.

	### Target Level Guidelines: {level}
	{guideline}

	### Inputs:
	1) Source Text (Full Paper): {source}
	2) Gold Summary (Expert Reference): {gold}
	3) Generated Text (Model Output): {generated}
	4) Subclaim to Evaluate: {subclaim}

	### Decision rules (MUST follow):
	A) First, determine whether the subclaim is present in or required by the Gold Summary.
	- If the Gold Summary includes this subclaim (or an equivalent idea), then omitting it is usually UNREASONABLE
	even for low health literacy, because low literacy still must retain "need-to-know" gold content.
	B) Check for outcome-critical content.
	- If the subclaim is about outcomes/prognosis (e.g., recovery, no sequelae, disability, death, major complications),
	treat it as clinically important. Omission is UNREASONABLE unless the Gold Summary clearly omits it and
	the generated text already conveys the same outcome clearly.
	C) Check time scope.
	- If the subclaim could apply only to a specific time window (e.g., "no sequelae after initial event"),
	infer whether the generated text covers that window. If the generated text describes later deterioration/death,
	do NOT assume that supports "no sequelae." If the time scope is unclear, err toward UNREASONABLE.
	D) Only mark REASONABLE if:
	- The subclaim is NOT in the Gold Summary (or is clearly non-essential there), AND
	- It is mainly anatomical/technical detail, jargon, or minor nuance for this literacy level, AND
	- Omitting it does not change the clinical interpretation.

	### Output ONLY JSON:
	{{
	"category": "reasonable" \| "unreasonable",
	"reason": "jargon_reduction" \| "detail_filtering" \| "clinical_info_loss",
	"explanation": "One sentence justification referencing Gold Summary importance and (if relevant) time/outcome."
	}}
	JSON:"""
	try:
	response = client.chat.completions.create(
	model=MODEL_PATH,
	messages=[{"role": "user", "content": prompt}],
	max_tokens=250,
	temperature=0.1
	)
	content = response.choices[0].message.content.strip()
	if "```json" in content:
	content = content.split("```json")[-1].split("```")[0].strip()
	return json.loads(content)
	except:
	return {"category": "unreasonable", "explanation": "API parsing error"}

	# -----------------------------
	# MAIN PROCESSING LOOP
	# -----------------------------
	def process_and_update_details():
	# 1. Load Datasets
	with open(EVAL_FILE, 'r') as f:
	eval_data = json.load(f)
	with open(RAW_DATA_FILE, 'r') as f:
	raw_lookup = {item['index']: item for item in json.load(f)}

	# 2. Iterate through index and literacy levels
	for entry in tqdm.tqdm(eval_data, desc="Updating Subclaim Details"):
	idx = entry['index']
	raw_item = raw_lookup.get(idx)
	if not raw_item: continue

	source_text = raw_item['fulltext']
	gold_summary = raw_item['summary']

	for level, lvl_content in entry['literacy_levels'].items():
	gen_text = raw_item['diff_label_texts'].get(level, "")

	# --- UPDATE COMPLETENESS DETAILS ---
	comp_list = lvl_content['details']['completeness']
	comp_corrected = 0
	for fact_obj in comp_list:
	if fact_obj['status'] == 'not_supported':
	res = get_clinical_reasoning(source=source_text, gold=gold_summary, generated=gen_text, subclaim=fact_obj['source_fact'], level=level)
	# Update status and add reasoning metadata
	if res['category'] == 'reasonable':
	fact_obj['status'] = 'reasonable_omission'
	comp_corrected += 1
	fact_obj['reasoning_audit'] = res
	else:
	comp_corrected += 1
	lvl_content['scores']['completeness'] = comp_corrected / len(comp_list) if comp_list else 0

	# --- UPDATE SOURCE COVERAGE DETAILS ---
	sc_list = lvl_content['details']['source_coverage']
	sc_corrected = 0
	for sc_obj in sc_list:
	if sc_obj['status'] == 'not_supported':
	res = get_clinical_reasoning(source=source_text, gold=gold_summary, generated=gen_text, subclaim=sc_obj['source_subclaim'], level=level)
	# Update status and add reasoning metadata
	if res['category'] == 'reasonable':
	sc_obj['status'] = 'reasonable_omission'
	sc_corrected += 1
	sc_obj['reasoning_audit'] = res
	else:
	sc_corrected += 1
	lvl_content['scores']['source_coverage'] = sc_corrected / len(sc_list) if sc_list else 0

	# 3. Save the modified full structure
	with open(UPDATED_FILE, 'w') as f:
	json.dump(eval_data, f, indent=2)
	print(f"\nUpdate complete. Detailed status and scores saved to: {UPDATED_FILE}")

	if __name__ == "__main__":
	process_and_update_details()