| | import os, json |
| | def return_prompts(reference_summary, generated_summary, subclaims_json, difficulty_level): |
| | prompt=f''' |
| | You are a **medical summarization quality evaluator**. |
| | Your goal is to decide whether the inclusion or omission of each subclaim in the generated summary is *reasonable*, given the target readability level. |
| | |
| | --- |
| | |
| | ### **Input** |
| | |
| | ``` |
| | Readability Level: {difficulty_level} |
| | |
| | Reference Summary: |
| | {reference_summary} |
| | |
| | Generated Summary: |
| | {generated_summary} |
| | |
| | Subclaims with Support Results: |
| | {subclaims_json} |
| | ``` |
| | |
| | --- |
| | |
| | ### **Task** |
| | |
| | For each subclaim: |
| | |
| | 1. Read `result`: |
| | |
| | * `1` = the subclaim is supported or clearly mentioned in the generated summary. |
| | * `0` = the subclaim is missing or not supported. |
| | |
| | 2. Based on readability level and medical relevance, decide whether this inclusion/omission is **reasonable**, **partially reasonable**, or **unreasonable**. |
| | |
| | 3. Provide a short justification (1–2 sentences) explaining your reasoning. |
| | |
| | --- |
| | |
| | ### **Output Format** |
| | |
| | Return structured JSON: |
| | |
| | ```json |
| | {{ |
| | "readability_level": "<easy/intermediate/hard>", |
| | "evaluations": [ |
| | {{ |
| | "subclaim_id": <id>, |
| | "subclaim_text": "<text>", |
| | "result": <0 or 1>, |
| | "reasonableness": "<reasonable | partially_reasonable | unreasonable>", |
| | "justification": "<short explanation>" |
| | }}, |
| | ... |
| | ] |
| | }} |
| | ``` |
| | |
| | --- |
| | |
| | ### **Evaluation Guidelines** |
| | |
| | | Readability Level | Reasonable Omission | Unreasonable Omission | |
| | | ----------------- | ------------------------------------------------------------ | ------------------------------------------------- | |
| | | **Easy** | Technical, anatomical, quantitative, or procedural details. | Key clinical findings, diagnoses, or outcomes. | |
| | | **Intermediate** | Minor imaging details or measurements. | Any main diagnostic finding or cause–effect link. | |
| | | **Hard** | Very few omissions acceptable; mostly stylistic compression. | Any missing clinical or diagnostic information. | |
| | |
| | ''' |
| | return prompt |
| |
|
| | from openai import OpenAI |
| |
|
| | file_path = "/home/mshahidul/api_new.json" |
| | with open(file_path, "r") as file: |
| | api_keys = json.load(file) |
| |
|
| | openai_api_key = api_keys.get("openai") |
| |
|
| | client = OpenAI(api_key=openai_api_key) |
| | def openai_return(prompt): |
| | response = client.chat.completions.create( |
| | model="gpt-5", |
| | messages=[ |
| | {"role": "system", "content": "You are a helpful assistant."}, |
| | {"role": "user", "content": prompt} |
| | ] |
| | ) |
| | cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "") |
| | return json.loads(cleaned_response) |
| |
|
| | import json |
| | file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json" |
| |
|
| | with open(file_path, 'r') as f: |
| | synthetic_data = json.load(f) |
| |
|
| | file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json" |
| |
|
| | with open(file_path_qwen3_32B, 'r') as f: |
| | qwen3_32B_results = json.load(f) |
| |
|
| | |
| | |
| | res=[] |
| | save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/syn_data_resonability_check_20_gpt5.json" |
| | if os.path.exists(save_path): |
| | with open(save_path, 'r') as f: |
| | res = json.load(f) |
| | exist_check_ids = set([(item['id'], item['difficulty_level']) for item in res]) |
| | print(f"Resuming from {len(res)} entries") |
| | import tqdm |
| | for ind in tqdm.tqdm(range(0,20)): |
| | print(f"Processing index: {ind}") |
| | for version in ["easy", "intermediate", "hard"]: |
| | if (synthetic_data[ind]['id'], version) in exist_check_ids: |
| | print(f"Skipping {synthetic_data[ind]['id']} {version}") |
| | continue |
| | ref_summary = (f"{synthetic_data[ind]['ref_summary']['text']}") |
| | generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}") |
| | subclaims_results = (f"{qwen3_32B_results[ind]['completeness']['results']}") |
| | try: |
| | prompt = return_prompts(ref_summary, generated_summary, subclaims_results, version) |
| | res.append({ |
| | "id": synthetic_data[ind]['id'], |
| | "difficulty_level": version, |
| | "reasonableness": openai_return(prompt) |
| | }) |
| | if len(res)%2==0: |
| | print(f"Completed {len(res)} out of 300") |
| | with open(save_path, 'w') as outfile: |
| | json.dump(res, outfile, indent=2) |
| | except Exception as e: |
| | print(f"Error at {ind} {version}: {e}") |
| | |
| | |
| | with open(save_path, 'w') as outfile: |
| | json.dump(res, outfile, indent=2) |