| | import gradio as gr |
| | import json |
| | import os |
| | from datetime import datetime |
| |
|
| | |
| | |
| | DATA_PATH = "/home/mshahidul/readctrl/data/synthetic_dataset_diff_labels/syn_data_diff_labels_bn_0_80.json" |
| | SAVE_ROOT = "/home/mshahidul/readctrl/data/annotators_validate_data_Bangla_(0_80)" |
| | os.makedirs(SAVE_ROOT, exist_ok=True) |
| |
|
| | |
| | GUIDE_HTML = """ |
| | <div style="background-color: #f9f9f9; padding: 15px; border-left: 6px solid #4CAF50; border-radius: 4px; margin-bottom: 20px;"> |
| | <h3>Rating Guide: Medical Text Difficulty</h3> |
| | <table style="width:100%; border-collapse: collapse; text-align: left;"> |
| | <tr style="background-color: #e8f5e9;"> |
| | <th style="padding: 8px; border: 1px solid #ddd;">Score</th> |
| | <th style="padding: 8px; border: 1px solid #ddd;">Description</th> |
| | </tr> |
| | <tr><td><b>1</b></td><td><b>Very Easy:</b> Simple words, no medical jargon.</td></tr> |
| | <tr><td><b>2</b></td><td><b>Easy:</b> Conversational medical terms.</td></tr> |
| | <tr><td><b>3</b></td><td><b>Moderate:</b> Standard patient education material.</td></tr> |
| | <tr><td><b>4</b></td><td><b>Hard:</b> Significant technical jargon.</td></tr> |
| | <tr><td><b>5</b></td><td><b>Very Hard:</b> Specialist-level / Academic.</td></tr> |
| | </table> |
| | </div> |
| | """ |
| |
|
| | EXAMPLES_HTML = """ |
| | <div style="background-color: #ffffff; padding: 15px; border: 1px solid #ddd; border-radius: 4px;"> |
| | <h3 style="color: #2e7d32;">Reference Examples</h3> |
| | <div style="display: flex; gap: 15px;"> |
| | <div style="flex: 1; background-color: #f1f8e9; padding: 10px; border-radius: 4px;"> |
| | <h4>Level 1-2</h4> |
| | <p>"She had a kidney problem... a big blood clot blocked veins in her brain."</p> |
| | </div> |
| | <div style="flex: 1; background-color: #ffebee; padding: 10px; border-radius: 4px;"> |
| | <h4>Level 4-5</h4> |
| | <p>"Idiopathic NS inaugurated by cerebral venous thrombosis extended to the right jugular vein."</p> |
| | </div> |
| | </div> |
| | </div> |
| | """ |
| | def parse_diff_label_texts(raw_value): |
| | """ |
| | Parse diff_label_texts that may be: |
| | - dict (already parsed) |
| | - JSON string |
| | - Python-dict-like string (single quotes) |
| | """ |
| | if isinstance(raw_value, dict): |
| | return raw_value |
| |
|
| | if not isinstance(raw_value, str): |
| | return {} |
| |
|
| | text = raw_value.strip() |
| | if not text: |
| | return {} |
| |
|
| | |
| | try: |
| | parsed = json.loads(text) |
| | return parsed if isinstance(parsed, dict) else {} |
| | except json.JSONDecodeError: |
| | pass |
| |
|
| | try: |
| | parsed = ast.literal_eval(text) |
| | return parsed if isinstance(parsed, dict) else {} |
| | except (ValueError, SyntaxError): |
| | return {} |
| | import ast |
| | |
| | def normalize_dataset(raw_dataset): |
| | """ |
| | Normalize different dataset layouts into a flat queue where each item has: |
| | index, id, label, generated_summary. |
| | """ |
| | normalized = [] |
| |
|
| | for item in raw_dataset: |
| | |
| |
|
| | |
| | diff_label_texts = item.get("diff_label_texts") |
| | if isinstance(diff_label_texts, dict): |
| | for label, text in diff_label_texts.items(): |
| | normalized.append({ |
| | "index": item.get("index"), |
| | "id": item.get("id"), |
| | "label": label, |
| | "generated_summary": text |
| | }) |
| | |
| | else: |
| | diff_label_texts = parse_diff_label_texts(item.get("diff_label_texts")) |
| | for label, text in diff_label_texts.items(): |
| | normalized.append({ |
| | "index": item.get("index"), |
| | "id": item.get("id"), |
| | "label": label, |
| | "generated_summary": text |
| | }) |
| | |
| |
|
| |
|
| | return normalized |
| |
|
| |
|
| | if os.path.exists(DATA_PATH): |
| | with open(DATA_PATH, "r", encoding="utf-8") as f: |
| | RAW_DATASET = json.load(f) |
| | FULL_DATASET = normalize_dataset(RAW_DATASET) |
| | print(len(FULL_DATASET)) |
| | assert FULL_DATASET, f"No valid items found in dataset: {DATA_PATH}" |
| | else: |
| | assert False, f"Data file not found at {DATA_PATH}" |
| |
|
| | |
| | def get_user_dir(username): |
| | clean_username = "".join([c for c in username if c.isalnum() or c in (' ', '_', '-')]).strip() or "anonymous" |
| | return os.path.join(SAVE_ROOT, clean_username) |
| |
|
| | def save_state(user_dir, state_dict): |
| | with open(os.path.join(user_dir, "state.json"), "w") as f: |
| | json.dump(state_dict, f, indent=4) |
| |
|
| | def load_state(user_dir): |
| | state_path = os.path.join(user_dir, "state.json") |
| | if os.path.exists(state_path): |
| | with open(state_path, "r") as f: |
| | return json.load(f) |
| | return None |
| |
|
| | |
| | def get_current_ui_values(state): |
| | """Helper to get UI values for the current index, including previous ratings if they exist.""" |
| | idx = state['current_index'] |
| | current_item = state['queue'][idx] |
| | |
| | |
| | existing_rating = 3 |
| | for res in state['results']: |
| | if res['queue_position'] == idx: |
| | existing_rating = res['rating'] |
| | break |
| | |
| | progress = f"Item {idx + 1} of {len(state['queue'])}" |
| | return current_item['generated_summary'], progress, existing_rating |
| |
|
| | def start_session(username): |
| | if not username: |
| | gr.Warning("Please enter a username!") |
| | return [gr.update()] * 5 |
| |
|
| | user_dir = get_user_dir(username) |
| | os.makedirs(user_dir, exist_ok=True) |
| | existing_state = load_state(user_dir) |
| | |
| | if existing_state: |
| | gr.Info(f"Welcome back! Resuming from item {existing_state['current_index'] + 1}.") |
| | state = existing_state |
| | else: |
| | state = { |
| | "username": username, |
| | "current_index": 0, |
| | "queue": list(FULL_DATASET), |
| | "results": [], |
| | "completed": False |
| | } |
| | save_state(user_dir, state) |
| |
|
| | text, progress, rating = get_current_ui_values(state) |
| | return (gr.update(visible=False), gr.update(visible=True), text, progress, rating, state) |
| |
|
| | def submit_rating(doc_slider, state): |
| | if state is None: return "", "Error", 3, 3, None |
| |
|
| | user_dir = get_user_dir(state['username']) |
| | idx = state['current_index'] |
| | current_item = state['queue'][idx] |
| | |
| | |
| | new_result = { |
| | "queue_position": idx, |
| | "index": current_item.get('index', idx), |
| | "doc_id": current_item.get('id', current_item.get('index', 'no_id')), |
| | "label": current_item.get('label', 'no_label'), |
| | "rating": doc_slider, |
| | "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
| | } |
| | |
| | |
| | state['results'] = [r for r in state['results'] if r['queue_position'] != idx] |
| | state['results'].append(new_result) |
| | state['results'].sort(key=lambda x: x['queue_position']) |
| |
|
| | if idx + 1 < len(state['queue']): |
| | state['current_index'] += 1 |
| | save_state(user_dir, state) |
| | |
| | with open(os.path.join(user_dir, "annotation_results.json"), "w") as f: |
| | json.dump(state['results'], f, indent=4) |
| | |
| | text, progress, rating = get_current_ui_values(state) |
| | return text, progress, rating, state |
| | else: |
| | state['completed'] = True |
| | save_state(user_dir, state) |
| | return "✅ ALL TASKS COMPLETED", "Status: Finished", 1, state |
| |
|
| | def go_back(state): |
| | if state is None or state['current_index'] <= 0: |
| | gr.Warning("Already at the first item.") |
| | return [gr.update()] * 3 + [state] |
| |
|
| | state['current_index'] -= 1 |
| | text, progress, rating = get_current_ui_values(state) |
| | return text, progress, rating, state |
| |
|
| | |
| | with gr.Blocks(theme=gr.themes.Soft()) as demo: |
| | session_state = gr.State() |
| |
|
| | gr.Markdown("# Medical Text Readability Annotation") |
| | |
| | with gr.Accordion("Instructions & Calibration", open=False): |
| | gr.HTML(GUIDE_HTML) |
| | gr.HTML(EXAMPLES_HTML) |
| |
|
| | with gr.Column(visible=True) as intro_box: |
| | username_input = gr.Textbox(label="Enter Your Name/ID", placeholder="e.g., user_101") |
| | btn_start = gr.Button("Start / Resume Annotation", variant="primary") |
| |
|
| | with gr.Column(visible=False) as task_box: |
| | progress_label = gr.Label(label="Overall Progress") |
| | doc_display = gr.Textbox(interactive=False, lines=12, label="Medical Text") |
| | doc_slider = gr.Slider(1, 5, step=1, label="Difficulty (1=Easy, 5=Hard)", value=3) |
| | |
| | with gr.Row(): |
| | btn_prev = gr.Button("⬅️ Previous", variant="secondary") |
| | btn_submit = gr.Button("Submit & Next ➡️", variant="primary") |
| |
|
| | |
| | btn_start.click( |
| | fn=start_session, |
| | inputs=[username_input], |
| | outputs=[intro_box, task_box, doc_display, progress_label, doc_slider, session_state] |
| | ) |
| | |
| | btn_submit.click( |
| | fn=submit_rating, |
| | inputs=[doc_slider, session_state], |
| | outputs=[doc_display, progress_label, doc_slider, session_state] |
| | ) |
| |
|
| | btn_prev.click( |
| | fn=go_back, |
| | inputs=[session_state], |
| | outputs=[doc_display, progress_label, doc_slider, session_state] |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch(share=True) |