| import os, json |
| def return_promptst(reference_summary, generated_summary, subclaims_json, difficulty_level): |
| prompt=f''' |
| **SYSTEM / ROLE INSTRUCTION:** |
| You are a **medical readability evaluator**. |
| Your task is to judge whether omitted subclaims (those with `"result": 0"`) from a generated summary are *reasonably omitted* based on the intended **readability level**: *easy*, *intermediate*, or *hard*. |
| You evaluate this from the standpoint of clarity, faithfulness, and readability goals. |
| |
| --- |
| |
| ### **READABILITY GUIDELINES** |
| |
| | Level | Target Audience | Content Expectation | Technical Detail Allowed | |
| | :--------------- | :--------------------------------------- | :-------------------------------------------------------------- | :--------------------------------------------------------------- | |
| | **Easy** | General public | Focus on main events, outcomes, and diagnoses in plain Spanish. | Minimal β avoid measurements, anatomy, and test results. | |
| | **Intermediate** | Educated lay readers or medical students | Include key findings and procedures in simplified form. | Moderate β basic terms and causes allowed. | |
| | **Hard** | Medical professionals | Retain most technical information and precision. | High β measurements, anatomy, and test interpretations expected. | |
| |
| --- |
| |
| ### **INPUT FIELDS** |
| |
| **Reference summary:** |
| {reference_summary} |
| |
| **Generated summary ({difficulty_level}):** |
| {generated_summary} |
| |
| **Subclaims and results:** |
| {subclaims_json} |
| |
| --- |
| |
| ### **TASK INSTRUCTIONS** |
| |
| 1. Focus on subclaims with `"result": 0"` (not supported by the generated summary). |
| 2. For each omitted subclaim: |
| |
| * Decide whether omission is **reasonable** given the readability level. |
| * Label as: `"yes"`, `"no"`, or `"borderline"`. |
| * Write a brief justification (1β2 sentences). |
| 3. After individual evaluations, assign a **reasonableness score (0β5)** using this scale: |
| |
| * **5** = All omissions appropriate for target readability. |
| * **4** = Minor omissions could improve completeness. |
| * **3** = Some omissions reduce understanding or medical clarity. |
| * **2** = Many important omissions harm faithfulness. |
| * **1** = Major omissions misrepresent case. |
| * **0** = Summary fails to reflect key medical information. |
| 4. End with an **overall explanation (3β5 sentences)** describing: |
| |
| * The main reasoning behind the score. |
| * Whether the summary fits its intended readability level. |
| * Suggestions for improvement if needed. |
| |
| --- |
| |
| ### **OUTPUT FORMAT (strict JSON)** |
| |
| ```json |
| {{ |
| "evaluation_table": [ |
| {{ |
| "id": <subclaim_id>, |
| "subclaim": "<text>", |
| "reasonable_omission": "<yes | no | borderline>", |
| "explanation": "<short reason>" |
| }} |
| ], |
| "reasonableness_score": <0-5>, |
| "overall_explanation": "<concise paragraph>" |
| }} |
| ``` |
| ''' |
| return prompt |
|
|
| from openai import OpenAI |
|
|
| file_path = "/home/mshahidul/api_new.json" |
| with open(file_path, "r") as file: |
| api_keys = json.load(file) |
|
|
| openai_api_key = api_keys.get("openai") |
|
|
| client = OpenAI(api_key=openai_api_key) |
| def openai_return(prompt): |
| response = client.chat.completions.create( |
| model="gpt-5-mini", |
| messages=[ |
| {"role": "system", "content": "You are a helpful assistant."}, |
| {"role": "user", "content": prompt} |
| ] |
| ) |
| cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "") |
| return json.loads(cleaned_response) |
|
|
| import json |
| file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json" |
|
|
| with open(file_path, 'r') as f: |
| synthetic_data = json.load(f) |
|
|
| file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json" |
|
|
| with open(file_path_qwen3_32B, 'r') as f: |
| qwen3_32B_results = json.load(f) |
|
|
| |
| |
| res=[] |
| save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/resonability_check_100_gpt5.json" |
| if os.path.exists(save_path): |
| with open(save_path, 'r') as f: |
| res = json.load(f) |
| print(f"Resuming from {len(res)} entries") |
| import tqdm |
| for ind in tqdm.tqdm(range(len(res),100)): |
| print(f"Processing index: {ind}") |
| for version in ["easy", "intermediate", "hard"]: |
| ref_summary = (f"{synthetic_data[ind]['ref_summary']['text']}") |
| generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}") |
| subclaims_results = (f"{qwen3_32B_results[ind]['completeness']['results']}") |
| try: |
| prompt = return_promptst(ref_summary, generated_summary, subclaims_results, version) |
| res.append({ |
| "id": synthetic_data[ind]['id'], |
| "difficulty_level": version, |
| "prompt": openai_return(prompt) |
| }) |
| if len(res)%2==0: |
| print(f"Completed {len(res)} out of 300") |
| with open(save_path, 'w') as outfile: |
| json.dump(res, outfile, indent=2) |
| except Exception as e: |
| print(f"Error at {ind} {version}: {e}") |
| |
| |
| with open(save_path, 'w') as outfile: |
| json.dump(res, outfile, indent=2) |