| import os |
| import json |
| import tqdm |
| from openai import OpenAI |
|
|
| |
| MODEL_PATH = "Qwen/Qwen3-30B-A3B-Instruct-2507" |
| API_URL = "http://172.16.34.29:8004/v1" |
| API_KEY = "EMPTY" |
|
|
| |
| EVAL_FILE = "/home/mshahidul/readctrl/data/reasoning/REFINED_full_details_evaluation_0_20_qwen3-32B_v2.json" |
| RAW_DATA_FILE = "/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json" |
| |
| file_name=os.path.basename(EVAL_FILE) |
| UPDATED_FILE = f"/home/mshahidul/readctrl/data/reasoning/reasoned_updated_results_v2_{file_name}" |
|
|
| client = OpenAI(base_url=API_URL, api_key=API_KEY) |
|
|
| |
| |
| |
| def get_clinical_reasoning(source, gold, generated, subclaim, level): |
| |
| level_guidelines = { |
| "low_health_literacy": """ |
| - Goal: 'Living room' language; replace jargon (e.g., 'renal' -> 'kidney'). |
| - Density: Focus ONLY on 'need-to-know' info from Gold Summary. |
| - Strategy: One idea per sentence. |
| - Reasonable Omission: Technical jargon or details NOT in the Gold Summary. |
| """, |
| "intermediate_health_literacy": """ |
| - Goal: Standard vocabulary; common medical terms are okay. |
| - Density: Gold Summary as lead + necessary Source Text context. |
| - Strategy: Remove minor technical details to avoid overload. |
| - Reasonable Omission: Minor technical nuances or physiological mechanisms. |
| """, |
| "proficient_health_literacy": """ |
| - Goal: Technical/Academic language; prioritize clinical nuance. |
| - Density: High; include data, mechanisms, and statistics from Full Source. |
| - Strategy: Retain all original technical terminology. |
| - Reasonable Omission: Almost none; should adhere closely to Full Source. |
| """ |
| } |
|
|
| guideline = level_guidelines.get(level, "Follow standard medical summarization principles.") |
|
|
| |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| prompt = f"""You are a clinical logic validator auditing medical text simplification. |
| |
| A subclaim is currently labeled 'not_supported' in the generated text. Your job is to decide whether |
| its omission is acceptable for the target literacy level. |
| |
| ### Target Level Guidelines: {level} |
| {guideline} |
| |
| ### Inputs: |
| 1) Source Text (Full Paper): {source} |
| 2) Gold Summary (Expert Reference): {gold} |
| 3) Generated Text (Model Output): {generated} |
| 4) Subclaim to Evaluate: {subclaim} |
| |
| ### Decision rules (MUST follow): |
| A) First, determine whether the subclaim is present in or required by the Gold Summary. |
| - If the Gold Summary includes this subclaim (or an equivalent idea), then omitting it is usually UNREASONABLE |
| even for low health literacy, because low literacy still must retain "need-to-know" gold content. |
| B) Check for outcome-critical content. |
| - If the subclaim is about outcomes/prognosis (e.g., recovery, no sequelae, disability, death, major complications), |
| treat it as clinically important. Omission is UNREASONABLE unless the Gold Summary clearly omits it and |
| the generated text already conveys the same outcome clearly. |
| C) Check time scope. |
| - If the subclaim could apply only to a specific time window (e.g., "no sequelae after initial event"), |
| infer whether the generated text covers that window. If the generated text describes later deterioration/death, |
| do NOT assume that supports "no sequelae." If the time scope is unclear, err toward UNREASONABLE. |
| D) Only mark REASONABLE if: |
| - The subclaim is NOT in the Gold Summary (or is clearly non-essential there), AND |
| - It is mainly anatomical/technical detail, jargon, or minor nuance for this literacy level, AND |
| - Omitting it does not change the clinical interpretation. |
| |
| ### Output ONLY JSON: |
| {{ |
| "category": "reasonable" | "unreasonable", |
| "reason": "jargon_reduction" | "detail_filtering" | "clinical_info_loss", |
| "explanation": "One sentence justification referencing Gold Summary importance and (if relevant) time/outcome." |
| }} |
| JSON:""" |
| try: |
| response = client.chat.completions.create( |
| model=MODEL_PATH, |
| messages=[{"role": "user", "content": prompt}], |
| max_tokens=250, |
| temperature=0.1 |
| ) |
| content = response.choices[0].message.content.strip() |
| if "```json" in content: |
| content = content.split("```json")[-1].split("```")[0].strip() |
| return json.loads(content) |
| except: |
| return {"category": "unreasonable", "explanation": "API parsing error"} |
|
|
| |
| |
| |
| def process_and_update_details(): |
| |
| with open(EVAL_FILE, 'r') as f: |
| eval_data = json.load(f) |
| with open(RAW_DATA_FILE, 'r') as f: |
| raw_lookup = {item['index']: item for item in json.load(f)} |
|
|
| |
| for entry in tqdm.tqdm(eval_data, desc="Updating Subclaim Details"): |
| idx = entry['index'] |
| raw_item = raw_lookup.get(idx) |
| if not raw_item: continue |
|
|
| source_text = raw_item['fulltext'] |
| gold_summary = raw_item['summary'] |
|
|
| for level, lvl_content in entry['literacy_levels'].items(): |
| gen_text = raw_item['diff_label_texts'].get(level, "") |
| |
| |
| comp_list = lvl_content['details']['completeness'] |
| comp_corrected = 0 |
| for fact_obj in comp_list: |
| if fact_obj['status'] == 'not_supported': |
| res = get_clinical_reasoning(source=source_text, gold=gold_summary, generated=gen_text, subclaim=fact_obj['source_fact'], level=level) |
| |
| if res['category'] == 'reasonable': |
| fact_obj['status'] = 'reasonable_omission' |
| comp_corrected += 1 |
| fact_obj['reasoning_audit'] = res |
| else: |
| comp_corrected += 1 |
| lvl_content['scores']['completeness'] = comp_corrected / len(comp_list) if comp_list else 0 |
|
|
| |
| sc_list = lvl_content['details']['source_coverage'] |
| sc_corrected = 0 |
| for sc_obj in sc_list: |
| if sc_obj['status'] == 'not_supported': |
| res = get_clinical_reasoning(source=source_text, gold=gold_summary, generated=gen_text, subclaim=sc_obj['source_subclaim'], level=level) |
| |
| if res['category'] == 'reasonable': |
| sc_obj['status'] = 'reasonable_omission' |
| sc_corrected += 1 |
| sc_obj['reasoning_audit'] = res |
| else: |
| sc_corrected += 1 |
| lvl_content['scores']['source_coverage'] = sc_corrected / len(sc_list) if sc_list else 0 |
|
|
| |
| with open(UPDATED_FILE, 'w') as f: |
| json.dump(eval_data, f, indent=2) |
| print(f"\nUpdate complete. Detailed status and scores saved to: {UPDATED_FILE}") |
|
|
| if __name__ == "__main__": |
| process_and_update_details() |