Spaces:

Ellie5757575757
/

Aphasia_Classification

Running

App Files Files Community

Ellie5757575757 commited on Aug 8, 2025

Commit

01de4e1

verified ·

1 Parent(s): 1fa5046

Upload 15 files

Browse files

Files changed (15) hide show

Cha_Json.py +181 -0
Json__Output.py +896 -0
Output.json +0 -0
README.md +1 -12
added_tokens.json +7 -0
aphasia_class_2025_8_5--testing.py +1712 -0
aphasia_predictions.json +435 -0
config.json +166 -0
sample.input.json +0 -0
special_tokens_map.json +44 -0
summary_statistics.json +17 -0
to_cha.py +6 -0
tokenizer.json +0 -0
tokenizer_config.json +105 -0
vocab.txt +0 -0

Cha_Json.py ADDED Viewed

	@@ -0,0 +1,181 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+cha2json.py ── 將單一 CLAN .cha 轉成 JSON（強化 %mor/%wor 對齊）
+只要：
+$ python3 cha2json.py
+"""
+# ────────── 這兩行改成你的固定路徑 ──────────
+INPUT_CHA   = "/workspace/SH001/website/ACWT01a(4).cha"
+OUTPUT_JSON = "/workspace/SH001/website/Output.json"
+# ──────────────────────────────────────────
+import re, json, sys
+from pathlib import Path
+from collections import defaultdict
+TAG_PREFIXES = ("*PAR:", "*INV:", "%mor:", "%gra:", "%wor:", "@")
+WORD_RE      = re.compile(r"[A-Za-z0-9]+")
+# ────────── 同義集合（加速對齊） ──────────
+SYN_SETS = [
+    {"be", "am", "is", "are", "was", "were"},
+    {"have", "has", "had"},
+    {"do", "does", "did"},
+    {"go", "going", "went", "gone"},
+]
+def same_syn(a, b):     # 同詞彙不同形態視為相同
+    return any(a in s and b in s for s in SYN_SETS)
+def canonical(txt):     # token/word → 比對用字串
+    head = re.split(r"[~\-\&|]", txt, 1)[0]
+    m = WORD_RE.search(head)
+    return m.group(0).lower() if m else ""
+def merge_multiline(block):   # 合併跨行 %mor/%wor/%gra
+    merged, buf = [], None
+    for raw in block:
+        ln = raw.rstrip("\n").replace("\x15", "")
+        if ln.lstrip().startswith("%") and ":" in ln:
+            if buf: merged.append(buf)
+            buf = ln
+        else:
+            if buf and ln.strip(): buf += " " + ln.strip()
+            else:                  merged.append(ln)
+    if buf: merged.append(buf)
+    return "\n".join(merged)
+# ────────── 主轉換 ──────────
+def cha_to_json(lines):
+    pos_map     = defaultdict(lambda: len(pos_map)     + 1)
+    gra_map     = defaultdict(lambda: len(gra_map)     + 1)
+    aphasia_map = defaultdict(lambda: len(aphasia_map))
+    data, sent, i = [], None, 0
+    while i < len(lines):
+        line = lines[i]
+        # --- 標頭 / 結尾 ---
+        if line.startswith("@UTF8"):
+            sent = {"sentence_id": f"S{len(data)+1}",
+                    "sentence_pid": None,
+                    "aphasia_type": None,
+                    "dialogues": []}
+            i += 1; continue
+        if line.startswith("@End"):
+            if sent and sent["aphasia_type"] and sent["dialogues"]:
+                data.append(sent)
+            sent = None; i += 1; continue
+        # --- 句子屬性 ---
+        if sent and line.startswith("@PID:"):
+            parts = line.split("\t")
+            if len(parts) > 1:
+                sent["sentence_pid"] = parts[1].strip()
+            i += 1; continue
+        if sent and line.startswith("@ID:") and "|PAR|" in line:
+            aph = line.split("|")[5].strip().upper()
+            aphasia_map[aph]
+            sent["aphasia_type"] = aph
+            i += 1; continue
+        # --- 對話行 ---
+        if sent and (line.startswith("*INV:") or line.startswith("*PAR:")):
+            role = "INV" if line.startswith("*INV:") else "PAR"
+            if not sent["dialogues"]:
+                sent["dialogues"].append({"INV": [], "PAR": []})
+            if role == "INV" and sent["dialogues"][-1]["PAR"]:
+                sent["dialogues"].append({"INV": [], "PAR": []})
+            sent["dialogues"][-1][role].append(
+                {"tokens": [], "word_pos_ids": [], "word_grammar_ids": [], "word_durations": []})
+            i += 1; continue
+        # --- %mor ---
+        if sent and line.startswith("%mor:"):
+            blk = [line]; i += 1
+            while i < len(lines) and not lines[i].lstrip().startswith(TAG_PREFIXES):
+                blk.append(lines[i]); i += 1
+            units = merge_multiline(blk).replace("%mor:", "").strip().split()
+            toks, pos_ids = [], []
+            for u in units:
+                if "|" in u:
+                    pos, rest = u.split("|", 1)
+                    toks.append(rest.split("|", 1)[0])
+                    pos_ids.append(pos_map[pos])
+            dlg = sent["dialogues"][-1]
+            tgt = dlg["PAR"][-1] if dlg["PAR"] else dlg["INV"][-1]
+            tgt["tokens"], tgt["word_pos_ids"] = toks, pos_ids
+            continue
+        # --- %wor ---
+        if sent and line.startswith("%wor:"):
+            blk = [line]; i += 1
+            while i < len(lines) and not lines[i].lstrip().startswith(TAG_PREFIXES):
+                blk.append(lines[i]); i += 1
+            merged = merge_multiline(blk).replace("%wor:", "").strip()
+            raw = re.findall(r"(\S+)\s+(\d+)\D+(\d+)", merged)
+            wor = [(w, int(e)-int(s)) for w,s,e in raw]
+            dlg = sent["dialogues"][-1]
+            tgt = dlg["PAR"][-1] if dlg["PAR"] else dlg["INV"][-1]
+            aligned, j = [], 0
+            for tok in tgt["tokens"]:
+                c_tok = canonical(tok); match = None
+                for k in range(j, len(wor)):
+                    c_w = canonical(wor[k][0])
+                    if (c_tok == c_w or c_w.startswith(c_tok) or c_tok.startswith(c_w)
+                        or same_syn(c_tok, c_w)):
+                        match = wor[k]; j = k+1; break
+                aligned.append([tok, match[1] if match else 0])
+            tgt["word_durations"] = aligned
+            continue
+        # --- %gra ---
+        if sent and line.startswith("%gra:"):
+            blk = [line]; i += 1
+            while i < len(lines) and not lines[i].lstrip().startswith(TAG_PREFIXES):
+                blk.append(lines[i]); i += 1
+            units = merge_multiline(blk).replace("%gra:", "").strip().split()
+            triples = []
+            for u in units:
+                a,b,r = u.split("|")
+                if a.isdigit() and b.isdigit():
+                    triples.append([int(a), int(b), gra_map[r]])
+            dlg = sent["dialogues"][-1]
+            (dlg["PAR"][-1] if dlg["PAR"] else dlg["INV"][-1])["word_grammar_ids"] = triples
+            continue
+        i += 1  # 其他行
+    return {"sentences": data,
+            "pos_mapping": dict(pos_map),
+            "grammar_mapping": dict(gra_map),
+            "aphasia_types": dict(aphasia_map)}
+# ────────── 執行 ──────────
+def main():
+    in_path  = Path(INPUT_CHA)
+    out_path = Path(OUTPUT_JSON)
+    if not in_path.exists():
+        sys.exit(f"❌ 找不到檔案: {in_path}")
+    with in_path.open("r", encoding="utf-8") as fh:
+        lines = fh.readlines()
+    dataset = cha_to_json(lines)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    with out_path.open("w", encoding="utf-8") as fh:
+        json.dump(dataset, fh, ensure_ascii=False, indent=4)
+    print(f"✅ 轉換完成 → {out_path}")
+if __name__ == "__main__":
+    main()

Json__Output.py ADDED Viewed

	@@ -0,0 +1,896 @@

+# -*- coding: utf-8 -*-
+"""
+失語症分類推理系統
+用於載入訓練好的模型並對新的語音數據進行分類預測
+"""
+import json
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import os
+import math
+from typing import Dict, List, Optional, Tuple
+from dataclasses import dataclass
+import pandas as pd
+from transformers import AutoTokenizer, AutoModel
+from collections import defaultdict
+# 重新定義模型結構（與訓練程式碼一致）
+@dataclass
+class ModelConfig:
+    model_name: str = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
+    max_length: int = 512
+    hidden_size: int = 768
+    pos_vocab_size: int = 150
+    pos_emb_dim: int = 64
+    grammar_dim: int = 3
+    grammar_hidden_dim: int = 64
+    duration_hidden_dim: int = 128
+    prosody_dim: int = 32
+    num_attention_heads: int = 8
+    attention_dropout: float = 0.3
+    classifier_hidden_dims: List[int] = None
+    dropout_rate: float = 0.3
+    def __post_init__(self):
+        if self.classifier_hidden_dims is None:
+            self.classifier_hidden_dims = [512, 256]
+class StablePositionalEncoding(nn.Module):
+    def __init__(self, d_model: int, max_len: int = 5000):
+        super().__init__()
+        self.d_model = d_model
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() *
+                           (-math.log(10000.0) / d_model))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        self.register_buffer('pe', pe.unsqueeze(0))
+        self.learnable_pe = nn.Parameter(torch.randn(max_len, d_model) * 0.01)
+    def forward(self, x):
+        seq_len = x.size(1)
+        sinusoidal = self.pe[:, :seq_len, :].to(x.device)
+        learnable = self.learnable_pe[:seq_len, :].unsqueeze(0).expand(x.size(0), -1, -1)
+        return x + 0.1 * (sinusoidal + learnable)
+class StableMultiHeadAttention(nn.Module):
+    def __init__(self, feature_dim: int, num_heads: int = 4, dropout: float = 0.3):
+        super().__init__()
+        self.num_heads = num_heads
+        self.feature_dim = feature_dim
+        self.head_dim = feature_dim // num_heads
+        assert feature_dim % num_heads == 0
+        self.query = nn.Linear(feature_dim, feature_dim)
+        self.key = nn.Linear(feature_dim, feature_dim)
+        self.value = nn.Linear(feature_dim, feature_dim)
+        self.dropout = nn.Dropout(dropout)
+        self.output_proj = nn.Linear(feature_dim, feature_dim)
+        self.layer_norm = nn.LayerNorm(feature_dim)
+    def forward(self, x, mask=None):
+        batch_size, seq_len, _ = x.size()
+        Q = self.query(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
+        K = self.key(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
+        V = self.value(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
+        scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.head_dim)
+        if mask is not None:
+            if mask.dim() == 2:
+                mask = mask.unsqueeze(1).unsqueeze(1)
+            scores.masked_fill_(mask == 0, -1e9)
+        attn_weights = F.softmax(scores, dim=-1)
+        attn_weights = self.dropout(attn_weights)
+        context = torch.matmul(attn_weights, V)
+        context = context.transpose(1, 2).contiguous().view(batch_size, seq_len, self.feature_dim)
+        output = self.output_proj(context)
+        return self.layer_norm(output + x)
+class StableLinguisticFeatureExtractor(nn.Module):
+    def __init__(self, config: ModelConfig):
+        super().__init__()
+        self.config = config
+        self.pos_embedding = nn.Embedding(config.pos_vocab_size, config.pos_emb_dim, padding_idx=0)
+        self.pos_attention = StableMultiHeadAttention(config.pos_emb_dim, num_heads=4)
+        self.grammar_projection = nn.Sequential(
+            nn.Linear(config.grammar_dim, config.grammar_hidden_dim),
+            nn.Tanh(),
+            nn.LayerNorm(config.grammar_hidden_dim),
+            nn.Dropout(config.dropout_rate * 0.3)
+        )
+        self.duration_projection = nn.Sequential(
+            nn.Linear(1, config.duration_hidden_dim),
+            nn.Tanh(),
+            nn.LayerNorm(config.duration_hidden_dim)
+        )
+        self.prosody_projection = nn.Sequential(
+            nn.Linear(config.prosody_dim, config.prosody_dim),
+            nn.ReLU(),
+            nn.LayerNorm(config.prosody_dim)
+        )
+        total_feature_dim = (config.pos_emb_dim + config.grammar_hidden_dim +
+                           config.duration_hidden_dim + config.prosody_dim)
+        self.feature_fusion = nn.Sequential(
+            nn.Linear(total_feature_dim, total_feature_dim // 2),
+            nn.Tanh(),
+            nn.LayerNorm(total_feature_dim // 2),
+            nn.Dropout(config.dropout_rate)
+        )
+    def forward(self, pos_ids, grammar_ids, durations, prosody_features, attention_mask):
+        batch_size, seq_len = pos_ids.size()
+        pos_ids_clamped = pos_ids.clamp(0, self.config.pos_vocab_size - 1)
+        pos_embeds = self.pos_embedding(pos_ids_clamped)
+        pos_features = self.pos_attention(pos_embeds, attention_mask)
+        grammar_features = self.grammar_projection(grammar_ids.float())
+        duration_features = self.duration_projection(durations.unsqueeze(-1).float())
+        prosody_features = self.prosody_projection(prosody_features.float())
+        combined_features = torch.cat([
+            pos_features, grammar_features, duration_features, prosody_features
+        ], dim=-1)
+        fused_features = self.feature_fusion(combined_features)
+        mask_expanded = attention_mask.unsqueeze(-1).float()
+        pooled_features = torch.sum(fused_features * mask_expanded, dim=1) / torch.sum(mask_expanded, dim=1)
+        return pooled_features
+class StableAphasiaClassifier(nn.Module):
+    def __init__(self, config: ModelConfig, num_labels: int):
+        super().__init__()
+        self.config = config
+        self.num_labels = num_labels
+        self.bert = AutoModel.from_pretrained(config.model_name)
+        self.bert_config = self.bert.config
+        self.positional_encoder = StablePositionalEncoding(
+            d_model=self.bert_config.hidden_size,
+            max_len=config.max_length
+        )
+        self.linguistic_extractor = StableLinguisticFeatureExtractor(config)
+        bert_dim = self.bert_config.hidden_size
+        linguistic_dim = (config.pos_emb_dim + config.grammar_hidden_dim +
+                         config.duration_hidden_dim + config.prosody_dim) // 2
+        self.feature_fusion = nn.Sequential(
+            nn.Linear(bert_dim + linguistic_dim, bert_dim),
+            nn.LayerNorm(bert_dim),
+            nn.Tanh(),
+            nn.Dropout(config.dropout_rate)
+        )
+        self.classifier = self._build_classifier(bert_dim, num_labels)
+        self.severity_head = nn.Sequential(
+            nn.Linear(bert_dim, 4),
+            nn.Softmax(dim=-1)
+        )
+        self.fluency_head = nn.Sequential(
+            nn.Linear(bert_dim, 1),
+            nn.Sigmoid()
+        )
+    def _build_classifier(self, input_dim: int, num_labels: int):
+        layers = []
+        current_dim = input_dim
+        for hidden_dim in self.config.classifier_hidden_dims:
+            layers.extend([
+                nn.Linear(current_dim, hidden_dim),
+                nn.LayerNorm(hidden_dim),
+                nn.Tanh(),
+                nn.Dropout(self.config.dropout_rate)
+            ])
+            current_dim = hidden_dim
+        layers.append(nn.Linear(current_dim, num_labels))
+        return nn.Sequential(*layers)
+    def forward(self, input_ids, attention_mask, labels=None,
+                word_pos_ids=None, word_grammar_ids=None, word_durations=None,
+                prosody_features=None, **kwargs):
+        bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        sequence_output = bert_outputs.last_hidden_state
+        position_enhanced = self.positional_encoder(sequence_output)
+        pooled_output = self._attention_pooling(position_enhanced, attention_mask)
+        if all(x is not None for x in [word_pos_ids, word_grammar_ids, word_durations]):
+            if prosody_features is None:
+                batch_size, seq_len = input_ids.size()
+                prosody_features = torch.zeros(
+                    batch_size, seq_len, self.config.prosody_dim,
+                    device=input_ids.device
+                )
+            linguistic_features = self.linguistic_extractor(
+                word_pos_ids, word_grammar_ids, word_durations,
+                prosody_features, attention_mask
+            )
+        else:
+            linguistic_features = torch.zeros(
+                input_ids.size(0),
+                (self.config.pos_emb_dim + self.config.grammar_hidden_dim +
+                 self.config.duration_hidden_dim + self.config.prosody_dim) // 2,
+                device=input_ids.device
+            )
+        combined_features = torch.cat([pooled_output, linguistic_features], dim=1)
+        fused_features = self.feature_fusion(combined_features)
+        logits = self.classifier(fused_features)
+        severity_pred = self.severity_head(fused_features)
+        fluency_pred = self.fluency_head(fused_features)
+        return {
+            "logits": logits,
+            "severity_pred": severity_pred,
+            "fluency_pred": fluency_pred,
+            "loss": None
+        }
+    def _attention_pooling(self, sequence_output, attention_mask):
+        attention_weights = torch.softmax(
+            torch.sum(sequence_output, dim=-1, keepdim=True), dim=1
+        )
+        attention_weights = attention_weights * attention_mask.unsqueeze(-1).float()
+        attention_weights = attention_weights / (torch.sum(attention_weights, dim=1, keepdim=True) + 1e-9)
+        pooled = torch.sum(sequence_output * attention_weights, dim=1)
+        return pooled
+class AphasiaInferenceSystem:
+    """失語症分類推理系統"""
+    def __init__(self, model_dir: str):
+        """
+        初始化推理系統
+        Args:
+            model_dir: 訓練好的模型目錄路徑
+        """
+        self.model_dir = '/workspace/SH001/adaptive_aphasia_model'
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # 失語症類型描述
+        self.aphasia_descriptions = {
+            "BROCA": {
+                "name": "Broca's Aphasia (Non-fluent)",
+                "description": "Characterized by limited speech output, difficulty with grammar and sentence formation, but relatively preserved comprehension. Speech is typically effortful and halting.",
+                "features": ["Non-fluent speech", "Preserved comprehension", "Grammar difficulties", "Word-finding problems"]
+            },
+            "TRANSMOTOR": {
+                "name": "Trans-cortical Motor Aphasia",
+                "description": "Similar to Broca's aphasia but with preserved repetition abilities. Speech is non-fluent with good comprehension.",
+                "features": ["Non-fluent speech", "Good repetition", "Preserved comprehension", "Grammar difficulties"]
+            },
+            "NOTAPHASICBYWAB": {
+                "name": "Not Aphasic by WAB",
+                "description": "Individuals who do not meet the criteria for aphasia according to the Western Aphasia Battery assessment.",
+                "features": ["Normal language function", "No significant language impairment", "Good comprehension", "Fluent speech"]
+            },
+            "CONDUCTION": {
+                "name": "Conduction Aphasia",
+                "description": "Characterized by fluent speech with good comprehension but severely impaired repetition. Often involves phonemic paraphasias.",
+                "features": ["Fluent speech", "Good comprehension", "Poor repetition", "Phonemic errors"]
+            },
+            "WERNICKE": {
+                "name": "Wernicke's Aphasia (Fluent)",
+                "description": "Fluent but often meaningless speech with poor comprehension. Speech may contain neologisms and jargon.",
+                "features": ["Fluent speech", "Poor comprehension", "Jargon speech", "Neologisms"]
+            },
+            "ANOMIC": {
+                "name": "Anomic Aphasia",
+                "description": "Primarily characterized by word-finding difficulties with otherwise relatively preserved language abilities.",
+                "features": ["Word-finding difficulties", "Good comprehension", "Fluent speech", "Circumlocution"]
+            },
+            "GLOBAL": {
+                "name": "Global Aphasia",
+                "description": "Severe impairment in all language modalities - comprehension, production, repetition, and naming.",
+                "features": ["Severe comprehension deficit", "Non-fluent speech", "Poor repetition", "Severe naming difficulties"]
+            },
+            "ISOLATION": {
+                "name": "Isolation Syndrome",
+                "description": "Rare condition with preserved repetition but severely impaired comprehension and spontaneous speech.",
+                "features": ["Good repetition", "Poor comprehension", "Limited spontaneous speech", "Echolalia"]
+            },
+            "TRANSSENSORY": {
+                "name": "Trans-cortical Sensory Aphasia",
+                "description": "Fluent speech with good repetition but impaired comprehension, similar to Wernicke's but with preserved repetition.",
+                "features": ["Fluent speech", "Good repetition", "Poor comprehension", "Semantic errors"]
+            }
+        }
+        # 載入模型配置和映射
+        self.load_configuration()
+        # 載入模型
+        self.load_model()
+        print(f"推理系統初始化完成，使用設備: {self.device}")
+    def load_configuration(self):
+        """載入模型配置"""
+        config_path = os.path.join(self.model_dir, "config.json")
+        if os.path.exists(config_path):
+            with open(config_path, "r", encoding="utf-8") as f:
+                config_data = json.load(f)
+            self.aphasia_types_mapping = config_data.get("aphasia_types_mapping", {
+                "BROCA": 0, "TRANSMOTOR": 1, "NOTAPHASICBYWAB": 2,
+                "CONDUCTION": 3, "WERNICKE": 4, "ANOMIC": 5,
+                "GLOBAL": 6, "ISOLATION": 7, "TRANSSENSORY": 8
+            })
+            self.num_labels = config_data.get("num_labels", 9)
+            self.model_name = config_data.get("model_name", "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")
+        else:
+            # 預設配置
+            self.aphasia_types_mapping = {
+                "BROCA": 0, "TRANSMOTOR": 1, "NOTAPHASICBYWAB": 2,
+                "CONDUCTION": 3, "WERNICKE": 4, "ANOMIC": 5,
+                "GLOBAL": 6, "ISOLATION": 7, "TRANSSENSORY": 8
+            }
+            self.num_labels = 9
+            self.model_name = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
+        # 建立反向映射
+        self.id_to_aphasia_type = {v: k for k, v in self.aphasia_types_mapping.items()}
+    def load_model(self):
+        """載入訓練好的模型"""
+        # 載入tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir)
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        added_tokens_path = os.path.join(self.model_dir, "added_tokens.json")
+        if os.path.exists(added_tokens_path):
+            with open(added_tokens_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+        # 如果是 dict，就取出所有 key 當作要新增的 token 清單
+            if isinstance(data, dict):
+                tokens = list(data.keys())
+            else:
+                tokens = data  # 萬一已經是 list，就直接用
+            num_added = self.tokenizer.add_tokens(tokens)
+            print(f"新增到 tokenizer 的 token 數量: {num_added}")
+        # 建立模型配置
+        self.config = ModelConfig()
+        self.config.model_name = self.model_name
+        # 建立模型
+        self.model = StableAphasiaClassifier(self.config, self.num_labels)
+        self.model.bert.resize_token_embeddings(len(self.tokenizer))
+        # 載入模型權重
+        model_path = os.path.join(self.model_dir, "pytorch_model.bin")
+        if os.path.exists(model_path):
+            state_dict = torch.load(model_path, map_location=self.device)
+            self.model.load_state_dict(state_dict)
+            self.model.load_state_dict(state_dict)
+            print("模型權重載入成功")
+        else:
+            raise FileNotFoundError(f"模型權重文件不存在: {model_path}")
+        # 調整tokenizer尺寸
+        self.model.bert.resize_token_embeddings(len(self.tokenizer))
+        # 移動到設備並設置為評估模式
+        self.model.to(self.device)
+        self.model.eval()
+    def preprocess_sentence(self, sentence_data: dict) -> dict:
+        """預處理單個句子數據"""
+        all_tokens, all_pos, all_grammar, all_durations = [], [], [], []
+        # 處理對話數據
+        for dialogue_idx, dialogue in enumerate(sentence_data.get("dialogues", [])):
+            if dialogue_idx > 0:
+                all_tokens.append("[DIALOGUE]")
+                all_pos.append(0)
+                all_grammar.append([0, 0, 0])
+                all_durations.append(0.0)
+            # 處理參與者的語音
+            for par in dialogue.get("PAR", []):
+                if "tokens" in par and par["tokens"]:
+                    tokens = par["tokens"]
+                    pos_ids = par.get("word_pos_ids", [0] * len(tokens))
+                    grammar_ids = par.get("word_grammar_ids", [[0, 0, 0]] * len(tokens))
+                    durations = par.get("word_durations", [0.0] * len(tokens))
+                    all_tokens.extend(tokens)
+                    all_pos.extend(pos_ids)
+                    all_grammar.extend(grammar_ids)
+                    all_durations.extend(durations)
+        if not all_tokens:
+            return None
+        # 文本tokenization
+        text = " ".join(all_tokens)
+        encoded = self.tokenizer(
+            text,
+            max_length=self.config.max_length,
+            padding="max_length",
+            truncation=True,
+            return_tensors="pt"
+        )
+        # 對齊特徵
+        aligned_pos, aligned_grammar, aligned_durations = self._align_features(
+            all_tokens, all_pos, all_grammar, all_durations, encoded
+        )
+        # 建立韻律特徵
+        prosody_features = self._extract_prosodic_features(all_durations, all_tokens)
+        prosody_tensor = torch.tensor(prosody_features).unsqueeze(0).repeat(
+            self.config.max_length, 1
+        )
+        return {
+            "input_ids": encoded["input_ids"].squeeze(0),
+            "attention_mask": encoded["attention_mask"].squeeze(0),
+            "word_pos_ids": torch.tensor(aligned_pos, dtype=torch.long),
+            "word_grammar_ids": torch.tensor(aligned_grammar, dtype=torch.long),
+            "word_durations": torch.tensor(aligned_durations, dtype=torch.float),
+            "prosody_features": prosody_tensor.float(),
+            "sentence_id": sentence_data.get("sentence_id", "unknown"),
+            "original_tokens": all_tokens,
+            "text": text
+        }
+    def _align_features(self, tokens, pos_ids, grammar_ids, durations, encoded):
+        """對齊特徵與BERT子詞"""
+        subtoken_to_token = []
+        for token_idx, token in enumerate(tokens):
+            subtokens = self.tokenizer.tokenize(token)
+            subtoken_to_token.extend([token_idx] * len(subtokens))
+        aligned_pos = [0]  # [CLS]
+        aligned_grammar = [[0, 0, 0]]  # [CLS]
+        aligned_durations = [0.0]  # [CLS]
+        for subtoken_idx in range(1, self.config.max_length - 1):
+            if subtoken_idx - 1 < len(subtoken_to_token):
+                original_idx = subtoken_to_token[subtoken_idx - 1]
+                aligned_pos.append(pos_ids[original_idx] if original_idx < len(pos_ids) else 0)
+                aligned_grammar.append(grammar_ids[original_idx] if original_idx < len(grammar_ids) else [0, 0, 0])
+                # 處理duration數據
+                raw_duration = durations[original_idx] if original_idx < len(durations) else 0.0
+                if isinstance(raw_duration, list) and len(raw_duration) >= 2:
+                    try:
+                        duration_val = float(raw_duration[1]) - float(raw_duration[0])
+                    except (ValueError, TypeError):
+                        duration_val = 0.0
+                elif isinstance(raw_duration, (int, float)):
+                    duration_val = float(raw_duration)
+                else:
+                    duration_val = 0.0
+                aligned_durations.append(duration_val)
+            else:
+                aligned_pos.append(0)
+                aligned_grammar.append([0, 0, 0])
+                aligned_durations.append(0.0)
+        aligned_pos.append(0)  # [SEP]
+        aligned_grammar.append([0, 0, 0])  # [SEP]
+        aligned_durations.append(0.0)  # [SEP]
+        return aligned_pos, aligned_grammar, aligned_durations
+    def _extract_prosodic_features(self, durations, tokens):
+        """提取韻律特徵"""
+        if not durations:
+            return [0.0] * self.config.prosody_dim
+        # 處理duration數據並提取數值
+        processed_durations = []
+        for d in durations:
+            if isinstance(d, list) and len(d) >= 2:
+                try:
+                    processed_durations.append(float(d[1]) - float(d[0]))
+                except (ValueError, TypeError):
+                    continue
+            elif isinstance(d, (int, float)):
+                processed_durations.append(float(d))
+        if not processed_durations:
+            return [0.0] * self.config.prosody_dim
+        # 計算基本統計特徵
+        features = [
+            np.mean(processed_durations),
+            np.std(processed_durations),
+            np.median(processed_durations),
+            len([d for d in processed_durations if d > np.mean(processed_durations) * 1.5])
+        ]
+        # 填充至所需維度
+        while len(features) < self.config.prosody_dim:
+            features.append(0.0)
+        return features[:self.config.prosody_dim]
+    def predict_single(self, sentence_data: dict) -> dict:
+        """對單個句子進行預測"""
+        # 預處理數據
+        processed_data = self.preprocess_sentence(sentence_data)
+        if processed_data is None:
+            return {
+                "error": "無法處理輸入數據",
+                "sentence_id": sentence_data.get("sentence_id", "unknown")
+            }
+        # 準備輸入數據
+        input_data = {
+            "input_ids": processed_data["input_ids"].unsqueeze(0).to(self.device),
+            "attention_mask": processed_data["attention_mask"].unsqueeze(0).to(self.device),
+            "word_pos_ids": processed_data["word_pos_ids"].unsqueeze(0).to(self.device),
+            "word_grammar_ids": processed_data["word_grammar_ids"].unsqueeze(0).to(self.device),
+            "word_durations": processed_data["word_durations"].unsqueeze(0).to(self.device),
+            "prosody_features": processed_data["prosody_features"].unsqueeze(0).to(self.device)
+        }
+        # 模型推理
+        with torch.no_grad():
+            outputs = self.model(**input_data)
+            logits = outputs["logits"]
+            probabilities = F.softmax(logits, dim=1).cpu().numpy()[0]
+            predicted_class_id = np.argmax(probabilities)
+            severity_pred = outputs["severity_pred"].cpu().numpy()[0]
+            fluency_pred = outputs["fluency_pred"].cpu().numpy()[0][0]
+        # 建立結果
+        predicted_type = self.id_to_aphasia_type[predicted_class_id]
+        confidence = float(probabilities[predicted_class_id])
+        # 建立機率分佈
+        probability_distribution = {}
+        for aphasia_type, type_id in self.aphasia_types_mapping.items():
+            probability_distribution[aphasia_type] = {
+                "probability": float(probabilities[type_id]),
+                "percentage": f"{probabilities[type_id]*100:.2f}%"
+            }
+        # 排序機率分佈
+        sorted_probabilities = sorted(
+            probability_distribution.items(),
+            key=lambda x: x[1]["probability"],
+            reverse=True
+        )
+        result = {
+            "sentence_id": processed_data["sentence_id"],
+            "input_text": processed_data["text"],
+            "original_tokens": processed_data["original_tokens"],
+            "prediction": {
+                "predicted_class": predicted_type,
+                "confidence": confidence,
+                "confidence_percentage": f"{confidence*100:.2f}%"
+            },
+            "class_description": self.aphasia_descriptions.get(predicted_type, {
+                "name": predicted_type,
+                "description": "Description not available",
+                "features": []
+            }),
+            "probability_distribution": dict(sorted_probabilities),
+            "additional_predictions": {
+                "severity_distribution": {
+                    "level_0": float(severity_pred[0]),
+                    "level_1": float(severity_pred[1]),
+                    "level_2": float(severity_pred[2]),
+                    "level_3": float(severity_pred[3])
+                },
+                "predicted_severity_level": int(np.argmax(severity_pred)),
+                "fluency_score": float(fluency_pred),
+                "fluency_rating": "High" if fluency_pred > 0.7 else "Medium" if fluency_pred > 0.4 else "Low"
+            }
+        }
+        return result
+    def predict_batch(self, input_file: str, output_file: str = None) -> List[dict]:
+        """批次預測JSON文件中的所有句子"""
+        # 載入輸入文件
+        with open(input_file, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        sentences = data.get("sentences", [])
+        results = []
+        print(f"開始處理 {len(sentences)} 個句子...")
+        for i, sentence in enumerate(sentences):
+            print(f"處理第 {i+1}/{len(sentences)} 個句子...")
+            result = self.predict_single(sentence)
+            results.append(result)
+        # 建立摘要統計
+        summary = self._generate_summary(results)
+        final_output = {
+            "summary": summary,
+            "total_sentences": len(results),
+            "predictions": results
+        }
+        # 保存結果
+        if output_file:
+            with open(output_file, "w", encoding="utf-8") as f:
+                json.dump(final_output, f, ensure_ascii=False, indent=2)
+            print(f"結果已保存到: {output_file}")
+        return final_output
+    def _generate_summary(self, results: List[dict]) -> dict:
+        """生成預測結果摘要"""
+        if not results:
+            return {}
+        # 統計各類別預測數量
+        class_counts = defaultdict(int)
+        confidence_scores = []
+        fluency_scores = []
+        severity_levels = defaultdict(int)
+        for result in results:
+            if "error" not in result:
+                predicted_class = result["prediction"]["predicted_class"]
+                confidence = result["prediction"]["confidence"]
+                fluency = result["additional_predictions"]["fluency_score"]
+                severity = result["additional_predictions"]["predicted_severity_level"]
+                class_counts[predicted_class] += 1
+                confidence_scores.append(confidence)
+                fluency_scores.append(fluency)
+                severity_levels[severity] += 1
+        # 計算統計數據
+        avg_confidence = np.mean(confidence_scores) if confidence_scores else 0
+        avg_fluency = np.mean(fluency_scores) if fluency_scores else 0
+        summary = {
+            "classification_distribution": dict(class_counts),
+            "classification_percentages": {
+                k: f"{v/len(results)*100:.1f}%"
+                for k, v in class_counts.items()
+            },
+            "average_confidence": f"{avg_confidence:.3f}",
+            "average_fluency_score": f"{avg_fluency:.3f}",
+            "severity_distribution": dict(severity_levels),
+            "confidence_statistics": {
+                "mean": f"{np.mean(confidence_scores):.3f}",
+                "std": f"{np.std(confidence_scores):.3f}",
+                "min": f"{np.min(confidence_scores):.3f}",
+                "max": f"{np.max(confidence_scores):.3f}"
+            } if confidence_scores else {},
+            "most_common_prediction": max(class_counts.items(), key=lambda x: x[1])[0] if class_counts else "None"
+        }
+        return summary
+    def generate_detailed_report(self, results: List[dict], output_dir: str = "./inference_results"):
+        """生成詳細的分析報告"""
+        os.makedirs(output_dir, exist_ok=True)
+        # 建立詳細的CSV報告
+        report_data = []
+        for result in results:
+            if "error" not in result:
+                row = {
+                    "sentence_id": result["sentence_id"],
+                    "predicted_class": result["prediction"]["predicted_class"],
+                    "confidence": result["prediction"]["confidence"],
+                    "class_name": result["class_description"]["name"],
+                    "severity_level": result["additional_predictions"]["predicted_severity_level"],
+                    "fluency_score": result["additional_predictions"]["fluency_score"],
+                    "fluency_rating": result["additional_predictions"]["fluency_rating"],
+                    "input_text": result["input_text"]
+                }
+                # 添加各類別機率
+                for aphasia_type in self.aphasia_types_mapping.keys():
+                    row[f"prob_{aphasia_type}"] = result["probability_distribution"][aphasia_type]["probability"]
+                report_data.append(row)
+        # 保存CSV
+        if report_data:
+            df = pd.DataFrame(report_data)
+            df.to_csv(os.path.join(output_dir, "detailed_predictions.csv"), index=False, encoding='utf-8')
+            # 生成統計摘要
+            summary_stats = {
+                "total_predictions": len(report_data),
+                "class_distribution": df["predicted_class"].value_counts().to_dict(),
+                "average_confidence": df["confidence"].mean(),
+                "confidence_std": df["confidence"].std(),
+                "average_fluency": df["fluency_score"].mean(),
+                "fluency_std": df["fluency_score"].std(),
+                "severity_distribution": df["severity_level"].value_counts().to_dict()
+            }
+            with open(os.path.join(output_dir, "summary_statistics.json"), "w", encoding="utf-8") as f:
+                json.dump(summary_stats, f, ensure_ascii=False, indent=2)
+            print(f"詳細報告已生成並保存到: {output_dir}")
+            return df
+        return None
+def main():
+    """主程式 - 命令行介面"""
+    import argparse
+    parser = argparse.ArgumentParser(description="失語症分類推理系統")
+    parser.add_argument("--model_dir", type=str, default = '/workspace/SH001/adaptive_aphasia_model',
+                       help="訓練好的模型目錄路徑")
+    parser.add_argument("--input_file", type=str, default = '/workspace/SH001/website/sample.input.json',
+                       help="輸入JSON文件路徑")
+    parser.add_argument("--output_file", type=str, default="./aphasia_predictions.json",
+                       help="輸出JSON文件路徑")
+    parser.add_argument("--report_dir", type=str, default="./inference_results",
+                       help="詳細報告輸出目錄")
+    parser.add_argument("--generate_report", action="store_true",
+                       help="是否生成詳細的CSV報告")
+    args = parser.parse_args()
+    try:
+        # 初始化推理系統
+        print("正在初始化推理系統...")
+        inference_system = AphasiaInferenceSystem(args.model_dir)
+        # 執行批次預測
+        print("開始執行批次預測...")
+        results = inference_system.predict_batch(args.input_file, args.output_file)
+        # 生成詳細報告
+        if args.generate_report:
+            print("生成詳細報告...")
+            inference_system.generate_detailed_report(results["predictions"], args.report_dir)
+        # 顯示摘要
+        print("\n=== 預測摘要 ===")
+        summary = results["summary"]
+        print(f"總句子數: {results['total_sentences']}")
+        print(f"平均信心度: {summary.get('average_confidence', 'N/A')}")
+        print(f"平均流利度: {summary.get('average_fluency_score', 'N/A')}")
+        print(f"最常見預測: {summary.get('most_common_prediction', 'N/A')}")
+        print("\n類別分佈:")
+        for class_name, count in summary.get("classification_distribution", {}).items():
+            percentage = summary.get("classification_percentages", {}).get(class_name, "0%")
+            print(f"  {class_name}: {count} ({percentage})")
+        print(f"\n結果已保存到: {args.output_file}")
+    except Exception as e:
+        print(f"錯誤: {str(e)}")
+        import traceback
+        traceback.print_exc()
+# 使用範例
+def example_usage():
+    """使用範例"""
+    # 1. 基本使用
+    print("=== 失語症分類推理系統使用範例 ===\n")
+    # 範例輸入數據
+    sample_input = {
+        "sentences": [
+            {
+                "sentence_id": "S1",
+                "aphasia_type": "BROCA",  # 這在推理時會被忽略
+                "dialogues": [
+                    {
+                        "INV": [
+                            {
+                                "tokens": ["how", "are", "you", "feeling"],
+                                "word_pos_ids": [9, 10, 5, 6],
+                                "word_grammar_ids": [[1, 4, 11], [2, 4, 2], [3, 4, 1], [4, 0, 3]],
+                                "word_durations": [["how", 300], ["are", 200], ["you", 150], ["feeling", 500]]
+                            }
+                        ],
+                        "PAR": [
+                            {
+                                "tokens": ["I", "feel", "good"],
+                                "word_pos_ids": [1, 6, 8],
+                                "word_grammar_ids": [[1, 2, 1], [2, 3, 2], [3, 4, 8]],
+                                "word_durations": [["I", 200], ["feel", 400], ["good", 600]]
+                            }
+                        ]
+                    }
+                ]
+            }
+        ]
+    }
+    # 保存範例輸入
+    with open("sample_input.json", "w", encoding="utf-8") as f:
+        json.dump(sample_input, f, ensure_ascii=False, indent=2)
+    print("範例輸入文件已創建: sample_input.json")
+    # 顯示使用說明
+    usage_instructions = """
+使用方法:
+1. 命令行使用:
+   python aphasia_inference.py \\
+       --model_dir ./adaptive_aphasia_model \\
+       --input_file sample_input.json \\
+       --output_file predictions.json \\
+       --generate_report \\
+       --report_dir ./results
+2. Python代碼使用:
+   from aphasia_inference import AphasiaInferenceSystem
+   # 初始化系統
+   system = AphasiaInferenceSystem("./adaptive_aphasia_model")
+   # 單個預測
+   with open("sample_input.json", "r") as f:
+       data = json.load(f)
+   result = system.predict_single(data["sentences"][0])
+   # 批次預測
+   results = system.predict_batch("sample_input.json", "output.json")
+3. 輸出格式:
+   - JSON格式包含詳細的預測結果和機率分佈
+   - CSV格式包含表格化的預測數據
+   - 統計摘要包含整體分析結果
+4. 支援的失語症類型:
+   - BROCA: 布若卡失語症
+   - WERNICKE: 韋尼克失語症
+   - ANOMIC: 命名性失語症
+   - CONDUCTION: 傳導性失語症
+   - GLOBAL: 全面性失語症
+   - 以及其他類型...
+"""
+    print(usage_instructions)
+if __name__ == "__main__":
+    # 如果作為腳本執行，運行主程式
+    main()
+    # 如果想看使用範例，取消下面這行的註釋
+    # example_usage()

Output.json ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md CHANGED Viewed

@@ -1,12 +1 @@
----
-title: Aphasia Classification
-emoji: 💬
-colorFrom: yellow
-colorTo: purple
-sdk: gradio
-sdk_version: 5.0.1
-app_file: app.py
-pinned: false
----
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).


1	+ # Aphasia-Classifier

added_tokens.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "[DIALOGUE]": 30522,
+  "[HESITATION]": 30526,
+  "[PAUSE]": 30524,
+  "[REPEAT]": 30525,
+  "[TURN]": 30523
+}

aphasia_class_2025_8_5--testing.py ADDED Viewed

	@@ -0,0 +1,1712 @@

+# -*- coding: utf-8 -*-
+"""
+Advanced Multi-Modal Aphasia Classification System
+With Adaptive Learning Rate and Comprehensive Reporting
+"""
+import re
+import json
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import time
+import datetime
+import numpy as np
+import os
+import random
+import csv
+import math
+from collections import Counter, defaultdict
+from typing import Dict, List, Optional, Tuple, Union
+from dataclasses import dataclass
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler, Subset
+from transformers import (
+    AutoTokenizer, AutoModel, AutoConfig,
+    TrainingArguments, Trainer, TrainerCallback,
+    EarlyStoppingCallback, get_cosine_schedule_with_warmup,
+    default_data_collator, set_seed
+)
+import seaborn as sns
+import matplotlib.pyplot as plt
+import pandas as pd
+from sklearn.metrics import (
+    accuracy_score, f1_score, precision_score, recall_score,
+    confusion_matrix, classification_report, roc_auc_score
+)
+from sklearn.model_selection import StratifiedKFold
+import gc
+from scipy import stats
+# Environment setup for stability
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+os.environ["TORCH_USE_CUDA_DSA"] = "1"
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+json_file = '/workspace/SH001/aphasia_data_augmented.json'
+# Set seeds for reproducibility
+def set_all_seeds(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+set_all_seeds(42)
+# Configuration
+@dataclass
+class ModelConfig:
+    # Model architecture
+    model_name: str = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
+    max_length: int = 512
+    hidden_size: int = 768
+    # Feature dimensions
+    pos_vocab_size: int = 150
+    pos_emb_dim: int = 64
+    grammar_dim: int = 3
+    grammar_hidden_dim: int = 64
+    duration_hidden_dim: int = 128
+    prosody_dim: int = 32
+    # Multi-head attention
+    num_attention_heads: int = 8
+    attention_dropout: float = 0.3
+    # Classification head
+    classifier_hidden_dims: List[int] = None
+    dropout_rate: float = 0.3
+    activation_fn: str = "tanh"
+    # Training
+    learning_rate: float = 5e-4
+    weight_decay: float = 0.01
+    warmup_ratio: float = 0.1
+    batch_size: int = 10
+    num_epochs: int = 500
+    gradient_accumulation_steps: int = 4
+    # Adaptive Learning Rate Parameters
+    adaptive_lr: bool = True
+    lr_patience: int = 3  # Patience for learning rate adjustment
+    lr_factor: float = 0.8  # Factor to multiply learning rate
+    lr_increase_factor: float = 1.2  # Factor to increase learning rate
+    min_lr: float = 1e-6
+    max_lr: float = 1e-3
+    oscillation_amplitude: float = 0.1  # For sinusoidal oscillation
+    # Advanced techniques
+    use_focal_loss: bool = True
+    focal_alpha: float = 1.0
+    focal_gamma: float = 2.0
+    use_mixup: bool = False
+    mixup_alpha: float = 0.2
+    use_label_smoothing: bool = True
+    label_smoothing: float = 0.1
+    def __post_init__(self):
+        if self.classifier_hidden_dims is None:
+            self.classifier_hidden_dims = [512, 256]
+# Utility functions
+def log_message(message):
+    timestamp = datetime.datetime.now().isoformat()
+    full_message = f"{timestamp}: {message}"
+    log_file = "./training_log.txt"
+    with open(log_file, "a", encoding="utf-8") as f:
+        f.write(full_message + "\n")
+    print(full_message, flush=True)
+def clear_memory():
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+def normalize_type(t):
+    return t.strip().upper() if isinstance(t, str) else t
+# Adaptive Learning Rate Scheduler
+class AdaptiveLearningRateScheduler:
+    """智能學習率調度器，結合多種策略"""
+    def __init__(self, optimizer, config: ModelConfig, total_steps: int):
+        self.optimizer = optimizer
+        self.config = config
+        self.total_steps = total_steps
+        # 歷史記錄
+        self.loss_history = []
+        self.f1_history = []
+        self.accuracy_history = []
+        self.lr_history = []
+        # 狀態追蹤
+        self.plateau_counter = 0
+        self.best_f1 = 0.0
+        self.best_loss = float('inf')
+        self.step_count = 0
+        # 初始學習率
+        self.base_lr = config.learning_rate
+        self.current_lr = self.base_lr
+        log_message(f"Adaptive LR Scheduler initialized with base_lr={self.base_lr}")
+    def calculate_slope(self, values, window=3):
+        """計算近期數值的斜率"""
+        if len(values) < window:
+            return 0.0
+        recent_values = values[-window:]
+        x = np.arange(len(recent_values))
+        slope, _, _, _, _ = stats.linregress(x, recent_values)
+        return slope
+    def exponential_adjustment(self, current_value, target_value, base_factor=1.1):
+        """指數調整函數"""
+        ratio = current_value / target_value if target_value != 0 else 1.0
+        factor = math.exp(-ratio) * base_factor
+        return factor
+    def logarithmic_adjustment(self, current_value, threshold=0.1):
+        """對數調整函數"""
+        if current_value <= 0:
+            return 1.0
+        factor = math.log(1 + current_value / threshold)
+        return max(0.5, min(2.0, factor))
+    def sinusoidal_oscillation(self, step, amplitude=None):
+        """正弦波動調整"""
+        if amplitude is None:
+            amplitude = self.config.oscillation_amplitude
+        # 基於步數的正弦波動
+        phase = 2 * math.pi * step / (self.total_steps / 4)  # 4個週期
+        oscillation = 1 + amplitude * math.sin(phase)
+        return oscillation
+    def cosine_decay(self, step):
+        """餘弦衰減"""
+        progress = step / self.total_steps
+        decay = 0.5 * (1 + math.cos(math.pi * progress))
+        return decay
+    def adaptive_lr_calculation(self, current_loss, current_f1, current_acc):
+        """智能學習率計算"""
+        # 記錄歷史
+        self.loss_history.append(current_loss)
+        self.f1_history.append(current_f1)
+        self.accuracy_history.append(current_acc)
+        # 計算斜率
+        loss_slope = self.calculate_slope(self.loss_history)
+        f1_slope = self.calculate_slope(self.f1_history)
+        acc_slope = self.calculate_slope(self.accuracy_history)
+        # 基礎學習率調整因子
+        adjustment_factor = 1.0
+        # 1. 基於Loss斜率的調整
+        if abs(loss_slope) < 0.001:  # Loss plateau
+            log_message(f"Loss plateau detected (slope: {loss_slope:.6f})")
+            # 指數增加學習率
+            exp_factor = self.exponential_adjustment(abs(loss_slope), 0.01, 1.15)
+            adjustment_factor *= exp_factor
+        elif current_loss > 2.0:  # Loss太高
+            log_message(f"High loss detected: {current_loss:.4f}")
+            # 對數調整
+            log_factor = self.logarithmic_adjustment(current_loss, 1.0)
+            adjustment_factor *= log_factor
+        # 2. 基於F1分數的調整
+        if current_f1 < 0.3:  # F1太低
+            log_message(f"Low F1 detected: {current_f1:.4f}")
+            # 指數增加學習率
+            exp_factor = self.exponential_adjustment(0.3, current_f1, 1.2)
+            adjustment_factor *= exp_factor
+        elif abs(f1_slope) < 0.001:  # F1 plateau
+            log_message(f"F1 plateau detected (slope: {f1_slope:.6f})")
+            adjustment_factor *= 1.1
+        # 3. 添加正弦波動性
+        sin_factor = self.sinusoidal_oscillation(self.step_count)
+        # 4. 添加餘弦衰減
+        cos_factor = self.cosine_decay(self.step_count)
+        # 綜合調整
+        final_factor = adjustment_factor * sin_factor * (0.3 + 0.7 * cos_factor)
+        # 計算新的學習率
+        new_lr = self.current_lr * final_factor
+        # 限制學習率範圍
+        new_lr = max(self.config.min_lr, min(self.config.max_lr, new_lr))
+        # 更新學習率
+        if abs(new_lr - self.current_lr) > 1e-7:  # 只有變化足夠大才更新
+            self.current_lr = new_lr
+            for param_group in self.optimizer.param_groups:
+                param_group['lr'] = new_lr
+            log_message(f"Learning rate adjusted: {new_lr:.2e} (factor: {final_factor:.3f})")
+            log_message(f"  - Loss slope: {loss_slope:.6f}, F1 slope: {f1_slope:.6f}")
+            log_message(f"  - Sin factor: {sin_factor:.3f}, Cos factor: {cos_factor:.3f}")
+        self.lr_history.append(self.current_lr)
+        self.step_count += 1
+        return self.current_lr
+# Training History Tracker
+class TrainingHistoryTracker:
+    """訓練歷史記錄器"""
+    def __init__(self):
+        self.history = {
+            'epoch': [],
+            'train_loss': [],
+            'eval_loss': [],
+            'train_accuracy': [],
+            'eval_accuracy': [],
+            'train_f1': [],
+            'eval_f1': [],
+            'learning_rate': [],
+            'train_precision': [],
+            'eval_precision': [],
+            'train_recall': [],
+            'eval_recall': []
+        }
+    def update(self, epoch, metrics):
+        """更新歷史記錄"""
+        self.history['epoch'].append(epoch)
+        for key, value in metrics.items():
+            if key in self.history:
+                self.history[key].append(value)
+    def save_history(self, output_dir):
+        """保存歷史記錄"""
+        df = pd.DataFrame(self.history)
+        df.to_csv(os.path.join(output_dir, "training_history.csv"), index=False)
+        return df
+    def plot_training_curves(self, output_dir):
+        """繪製訓練曲線"""
+        if not self.history['epoch']:
+            return
+        # 設置圖表樣式
+        plt.style.use('seaborn-v0_8')
+        fig, axes = plt.subplots(2, 3, figsize=(18, 12))
+        epochs = self.history['epoch']
+        # 1. Loss曲線
+        axes[0, 0].plot(epochs, self.history['train_loss'], 'b-', label='Train Loss', linewidth=2)
+        axes[0, 0].plot(epochs, self.history['eval_loss'], 'r-', label='Eval Loss', linewidth=2)
+        axes[0, 0].set_title('Loss Over Time', fontsize=14, fontweight='bold')
+        axes[0, 0].set_xlabel('Epoch')
+        axes[0, 0].set_ylabel('Loss')
+        axes[0, 0].legend()
+        axes[0, 0].grid(True, alpha=0.3)
+        # 2. 準確率曲線
+        axes[0, 1].plot(epochs, self.history['train_accuracy'], 'b-', label='Train Accuracy', linewidth=2)
+        axes[0, 1].plot(epochs, self.history['eval_accuracy'], 'r-', label='Eval Accuracy', linewidth=2)
+        axes[0, 1].set_title('Accuracy Over Time', fontsize=14, fontweight='bold')
+        axes[0, 1].set_xlabel('Epoch')
+        axes[0, 1].set_ylabel('Accuracy')
+        axes[0, 1].legend()
+        axes[0, 1].grid(True, alpha=0.3)
+        # 3. F1分數曲線
+        axes[0, 2].plot(epochs, self.history['train_f1'], 'b-', label='Train F1', linewidth=2)
+        axes[0, 2].plot(epochs, self.history['eval_f1'], 'r-', label='Eval F1', linewidth=2)
+        axes[0, 2].set_title('F1 Score Over Time', fontsize=14, fontweight='bold')
+        axes[0, 2].set_xlabel('Epoch')
+        axes[0, 2].set_ylabel('F1 Score')
+        axes[0, 2].legend()
+        axes[0, 2].grid(True, alpha=0.3)
+        # 4. 學習率曲線
+        axes[1, 0].plot(epochs, self.history['learning_rate'], 'g-', linewidth=2)
+        axes[1, 0].set_title('Learning Rate Over Time', fontsize=14, fontweight='bold')
+        axes[1, 0].set_xlabel('Epoch')
+        axes[1, 0].set_ylabel('Learning Rate')
+        axes[1, 0].set_yscale('log')
+        axes[1, 0].grid(True, alpha=0.3)
+        # 5. Precision曲線
+        axes[1, 1].plot(epochs, self.history['train_precision'], 'b-', label='Train Precision', linewidth=2)
+        axes[1, 1].plot(epochs, self.history['eval_precision'], 'r-', label='Eval Precision', linewidth=2)
+        axes[1, 1].set_title('Precision Over Time', fontsize=14, fontweight='bold')
+        axes[1, 1].set_xlabel('Epoch')
+        axes[1, 1].set_ylabel('Precision')
+        axes[1, 1].legend()
+        axes[1, 1].grid(True, alpha=0.3)
+        # 6. Recall曲線
+        axes[1, 2].plot(epochs, self.history['train_recall'], 'b-', label='Train Recall', linewidth=2)
+        axes[1, 2].plot(epochs, self.history['eval_recall'], 'r-', label='Eval Recall', linewidth=2)
+        axes[1, 2].set_title('Recall Over Time', fontsize=14, fontweight='bold')
+        axes[1, 2].set_xlabel('Epoch')
+        axes[1, 2].set_ylabel('Recall')
+        axes[1, 2].legend()
+        axes[1, 2].grid(True, alpha=0.3)
+        plt.tight_layout()
+        plt.savefig(os.path.join(output_dir, "training_curves.png"), dpi=300, bbox_inches='tight')
+        plt.close()
+# Focal loss implementation
+class FocalLoss(nn.Module):
+    def __init__(self, alpha=1.0, gamma=2.0, reduction='mean'):
+        super().__init__()
+        self.alpha = alpha
+        self.gamma = gamma
+        self.reduction = reduction
+    def forward(self, inputs, targets):
+        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
+        pt = torch.exp(-ce_loss)
+        focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss
+        if self.reduction == 'mean':
+            return focal_loss.mean()
+        elif self.reduction == 'sum':
+            return focal_loss.sum()
+        else:
+            return focal_loss
+# Stable positional encoding
+class StablePositionalEncoding(nn.Module):
+    """Simplified but stable positional encoding"""
+    def __init__(self, d_model: int, max_len: int = 5000):
+        super().__init__()
+        self.d_model = d_model
+        # Traditional sinusoidal encoding
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() *
+                           (-math.log(10000.0) / d_model))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        self.register_buffer('pe', pe.unsqueeze(0))
+        # Simple learnable component
+        self.learnable_pe = nn.Parameter(torch.randn(max_len, d_model) * 0.01)
+    def forward(self, x):
+        seq_len = x.size(1)
+        sinusoidal = self.pe[:, :seq_len, :].to(x.device)
+        learnable = self.learnable_pe[:seq_len, :].unsqueeze(0).expand(x.size(0), -1, -1)
+        return x + 0.1 * (sinusoidal + learnable)
+# Stable multi-head attention
+class StableMultiHeadAttention(nn.Module):
+    """Stable multi-head attention for feature fusion"""
+    def __init__(self, feature_dim: int, num_heads: int = 4, dropout: float = 0.3):
+        super().__init__()
+        self.num_heads = num_heads
+        self.feature_dim = feature_dim
+        self.head_dim = feature_dim // num_heads
+        assert feature_dim % num_heads == 0
+        self.query = nn.Linear(feature_dim, feature_dim)
+        self.key = nn.Linear(feature_dim, feature_dim)
+        self.value = nn.Linear(feature_dim, feature_dim)
+        self.dropout = nn.Dropout(dropout)
+        self.output_proj = nn.Linear(feature_dim, feature_dim)
+        self.layer_norm = nn.LayerNorm(feature_dim)
+    def forward(self, x, mask=None):
+        batch_size, seq_len, _ = x.size()
+        Q = self.query(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
+        K = self.key(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
+        V = self.value(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
+        scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.head_dim)
+        if mask is not None:
+            if mask.dim() == 2:
+                mask = mask.unsqueeze(1).unsqueeze(1)
+            scores.masked_fill_(mask == 0, -1e9)
+        attn_weights = F.softmax(scores, dim=-1)
+        attn_weights = self.dropout(attn_weights)
+        context = torch.matmul(attn_weights, V)
+        context = context.transpose(1, 2).contiguous().view(batch_size, seq_len, self.feature_dim)
+        output = self.output_proj(context)
+        return self.layer_norm(output + x)
+# Stable linguistic feature extractor
+class StableLinguisticFeatureExtractor(nn.Module):
+    """Stable linguistic feature processing"""
+    def __init__(self, config: ModelConfig):
+        super().__init__()
+        self.config = config
+        # POS embeddings
+        self.pos_embedding = nn.Embedding(config.pos_vocab_size, config.pos_emb_dim, padding_idx=0)
+        self.pos_attention = StableMultiHeadAttention(config.pos_emb_dim, num_heads=4)
+        # Grammar feature processing
+        self.grammar_projection = nn.Sequential(
+            nn.Linear(config.grammar_dim, config.grammar_hidden_dim),
+            nn.Tanh(),
+            nn.LayerNorm(config.grammar_hidden_dim),
+            nn.Dropout(config.dropout_rate * 0.3)
+        )
+        # Duration processing
+        self.duration_projection = nn.Sequential(
+            nn.Linear(1, config.duration_hidden_dim),
+            nn.Tanh(),
+            nn.LayerNorm(config.duration_hidden_dim)
+        )
+        # Prosody processing
+        self.prosody_projection = nn.Sequential(
+            nn.Linear(config.prosody_dim, config.prosody_dim),
+            nn.ReLU(),
+            nn.LayerNorm(config.prosody_dim)
+        )
+        # Feature fusion
+        total_feature_dim = (config.pos_emb_dim + config.grammar_hidden_dim +
+                           config.duration_hidden_dim + config.prosody_dim)
+        self.feature_fusion = nn.Sequential(
+            nn.Linear(total_feature_dim, total_feature_dim // 2),
+            nn.Tanh(),
+            nn.LayerNorm(total_feature_dim // 2),
+            nn.Dropout(config.dropout_rate)
+        )
+    def forward(self, pos_ids, grammar_ids, durations, prosody_features, attention_mask):
+        batch_size, seq_len = pos_ids.size()
+        # Process POS features with clamping
+        pos_ids_clamped = pos_ids.clamp(0, self.config.pos_vocab_size - 1)
+        pos_embeds = self.pos_embedding(pos_ids_clamped)
+        pos_features = self.pos_attention(pos_embeds, attention_mask)
+        # Process grammar features
+        grammar_features = self.grammar_projection(grammar_ids.float())
+        # Process duration features
+        duration_features = self.duration_projection(durations.unsqueeze(-1).float())
+        # Process prosodic features
+        prosody_features = self.prosody_projection(prosody_features.float())
+        # Combine features
+        combined_features = torch.cat([
+            pos_features, grammar_features, duration_features, prosody_features
+        ], dim=-1)
+        # Feature fusion
+        fused_features = self.feature_fusion(combined_features)
+        # Global pooling
+        mask_expanded = attention_mask.unsqueeze(-1).float()
+        pooled_features = torch.sum(fused_features * mask_expanded, dim=1) / torch.sum(mask_expanded, dim=1)
+        return pooled_features
+# Main classifier with stability improvements
+class StableAphasiaClassifier(nn.Module):
+    """Stable aphasia classification model"""
+    def __init__(self, config: ModelConfig, num_labels: int):
+        super().__init__()
+        self.config = config
+        self.num_labels = num_labels
+        # Pre-trained model
+        self.bert = AutoModel.from_pretrained(config.model_name)
+        self.bert_config = self.bert.config
+        # Freeze embeddings for stability
+        for param in self.bert.embeddings.parameters():
+            param.requires_grad = False
+        # Positional encoding
+        self.positional_encoder = StablePositionalEncoding(
+            d_model=self.bert_config.hidden_size,
+            max_len=config.max_length
+        )
+        # Linguistic feature extractor
+        self.linguistic_extractor = StableLinguisticFeatureExtractor(config)
+        # Calculate dimensions
+        bert_dim = self.bert_config.hidden_size
+        linguistic_dim = (config.pos_emb_dim + config.grammar_hidden_dim +
+                         config.duration_hidden_dim + config.prosody_dim) // 2
+        # Feature fusion
+        self.feature_fusion = nn.Sequential(
+            nn.Linear(bert_dim + linguistic_dim, bert_dim),
+            nn.LayerNorm(bert_dim),
+            nn.Tanh(),
+            nn.Dropout(config.dropout_rate)
+        )
+        # Classifier
+        self.classifier = self._build_classifier(bert_dim, num_labels)
+        # Multi-task heads (simplified)
+        self.severity_head = nn.Sequential(
+            nn.Linear(bert_dim, 4),
+            nn.Softmax(dim=-1)
+        )
+        self.fluency_head = nn.Sequential(
+            nn.Linear(bert_dim, 1),
+            nn.Sigmoid()
+        )
+    def _build_classifier(self, input_dim: int, num_labels: int):
+        layers = []
+        current_dim = input_dim
+        for hidden_dim in self.config.classifier_hidden_dims:
+            layers.extend([
+                nn.Linear(current_dim, hidden_dim),
+                nn.LayerNorm(hidden_dim),
+                nn.Tanh(),
+                nn.Dropout(self.config.dropout_rate)
+            ])
+            current_dim = hidden_dim
+        layers.append(nn.Linear(current_dim, num_labels))
+        return nn.Sequential(*layers)
+    def forward(self, input_ids, attention_mask, labels=None,
+                word_pos_ids=None, word_grammar_ids=None, word_durations=None,
+                prosody_features=None, **kwargs):
+        # BERT encoding
+        bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        sequence_output = bert_outputs.last_hidden_state
+        # Apply positional encoding
+        position_enhanced = self.positional_encoder(sequence_output)
+        # Attention pooling
+        pooled_output = self._attention_pooling(position_enhanced, attention_mask)
+        # Process linguistic features
+        if all(x is not None for x in [word_pos_ids, word_grammar_ids, word_durations]):
+            if prosody_features is None:
+                batch_size, seq_len = input_ids.size()
+                prosody_features = torch.zeros(
+                    batch_size, seq_len, self.config.prosody_dim,
+                    device=input_ids.device
+                )
+            linguistic_features = self.linguistic_extractor(
+                word_pos_ids, word_grammar_ids, word_durations,
+                prosody_features, attention_mask
+            )
+        else:
+            linguistic_features = torch.zeros(
+                input_ids.size(0),
+                (self.config.pos_emb_dim + self.config.grammar_hidden_dim +
+                 self.config.duration_hidden_dim + self.config.prosody_dim) // 2,
+                device=input_ids.device
+            )
+        # Feature fusion
+        combined_features = torch.cat([pooled_output, linguistic_features], dim=1)
+        fused_features = self.feature_fusion(combined_features)
+        # Predictions
+        logits = self.classifier(fused_features)
+        severity_pred = self.severity_head(fused_features)
+        fluency_pred = self.fluency_head(fused_features)
+        # Loss computation
+        loss = None
+        if labels is not None:
+            loss = self._compute_loss(logits, labels)
+        return {
+            "logits": logits,
+            "severity_pred": severity_pred,
+            "fluency_pred": fluency_pred,
+            "loss": loss
+        }
+    def _attention_pooling(self, sequence_output, attention_mask):
+        """Attention-based pooling"""
+        attention_weights = torch.softmax(
+            torch.sum(sequence_output, dim=-1, keepdim=True), dim=1
+        )
+        attention_weights = attention_weights * attention_mask.unsqueeze(-1).float()
+        attention_weights = attention_weights / (torch.sum(attention_weights, dim=1, keepdim=True) + 1e-9)
+        pooled = torch.sum(sequence_output * attention_weights, dim=1)
+        return pooled
+    def _compute_loss(self, logits, labels):
+        if self.config.use_focal_loss:
+            focal_loss = FocalLoss(
+                alpha=self.config.focal_alpha,
+                gamma=self.config.focal_gamma,
+                reduction='mean'
+            )
+            return focal_loss(logits, labels)
+        else:
+            if self.config.use_label_smoothing:
+                return F.cross_entropy(
+                    logits, labels,
+                    label_smoothing=self.config.label_smoothing
+                )
+            else:
+                return F.cross_entropy(logits, labels)
+# Stable dataset class
+class StableAphasiaDataset(Dataset):
+    """Stable dataset with simplified processing"""
+    def __init__(self, sentences, tokenizer, aphasia_types_mapping, config: ModelConfig):
+        self.samples = []
+        self.tokenizer = tokenizer
+        self.config = config
+        self.aphasia_types_mapping = aphasia_types_mapping
+        # Add special tokens
+        special_tokens = ["[DIALOGUE]", "[TURN]", "[PAUSE]", "[REPEAT]", "[HESITATION]"]
+        tokenizer.add_special_tokens({"additional_special_tokens": special_tokens})
+        for idx, item in enumerate(sentences):
+            sentence_id = item.get("sentence_id", f"S{idx}")
+            aphasia_type = normalize_type(item.get("aphasia_type", ""))
+            if aphasia_type not in aphasia_types_mapping:
+                log_message(f"Skipping Sentence {sentence_id}: Invalid aphasia type '{aphasia_type}'")
+                continue
+            self._process_sentence(item, sentence_id, aphasia_type)
+        if not self.samples:
+            raise ValueError("No valid samples found in dataset!")
+        log_message(f"Dataset created with {len(self.samples)} samples")
+        self._print_class_distribution()
+    def _process_sentence(self, item, sentence_id, aphasia_type):
+        """Process sentence with stable approach"""
+        all_tokens, all_pos, all_grammar, all_durations = [], [], [], []
+        for dialogue_idx, dialogue in enumerate(item.get("dialogues", [])):
+            if dialogue_idx > 0:
+                all_tokens.append("[DIALOGUE]")
+                all_pos.append(0)
+                all_grammar.append([0, 0, 0])
+                all_durations.append(0.0)
+            for par in dialogue.get("PAR", []):
+                if "tokens" in par and par["tokens"]:
+                    tokens = par["tokens"]
+                    pos_ids = par.get("word_pos_ids", [0] * len(tokens))
+                    grammar_ids = par.get("word_grammar_ids", [[0, 0, 0]] * len(tokens))
+                    durations = par.get("word_durations", [0.0] * len(tokens))
+                    all_tokens.extend(tokens)
+                    all_pos.extend(pos_ids)
+                    all_grammar.extend(grammar_ids)
+                    all_durations.extend(durations)
+        if not all_tokens:
+            return
+        # Create sample
+        self._create_sample(all_tokens, all_pos, all_grammar, all_durations,
+                          sentence_id, aphasia_type)
+    def _create_sample(self, tokens, pos_ids, grammar_ids, durations,
+                      sentence_id, aphasia_type):
+        """Create training sample"""
+        # Tokenize
+        text = " ".join(tokens)
+        encoded = self.tokenizer(
+            text,
+            max_length=self.config.max_length,
+            padding="max_length",
+            truncation=True,
+            return_tensors="pt"
+        )
+        # Align features
+        aligned_pos, aligned_grammar, aligned_durations = self._align_features(
+            tokens, pos_ids, grammar_ids, durations, encoded
+        )
+        # Create prosody features
+        prosody_features = self._extract_prosodic_features(durations, tokens)
+        prosody_tensor = torch.tensor(prosody_features).unsqueeze(0).repeat(
+            self.config.max_length, 1
+        )
+        label = self.aphasia_types_mapping[aphasia_type]
+        sample = {
+            "input_ids": encoded["input_ids"].squeeze(0),
+            "attention_mask": encoded["attention_mask"].squeeze(0),
+            "labels": torch.tensor(label, dtype=torch.long),
+            "word_pos_ids": torch.tensor(aligned_pos, dtype=torch.long),
+            "word_grammar_ids": torch.tensor(aligned_grammar, dtype=torch.long),
+            "word_durations": torch.tensor(aligned_durations, dtype=torch.float),
+            "prosody_features": prosody_tensor.float(),
+            "sentence_id": sentence_id
+        }
+        self.samples.append(sample)
+    def _align_features(self, tokens, pos_ids, grammar_ids, durations, encoded):
+        """Align features with BERT subtokens"""
+        subtoken_to_token = []
+        for token_idx, token in enumerate(tokens):
+            subtokens = self.tokenizer.tokenize(token)
+            subtoken_to_token.extend([token_idx] * len(subtokens))
+        aligned_pos = [0]  # [CLS]
+        aligned_grammar = [[0, 0, 0]]  # [CLS]
+        aligned_durations = [0.0]  # [CLS]
+        for subtoken_idx in range(1, self.config.max_length - 1):
+            if subtoken_idx - 1 < len(subtoken_to_token):
+                original_idx = subtoken_to_token[subtoken_idx - 1]
+                aligned_pos.append(pos_ids[original_idx] if original_idx < len(pos_ids) else 0)
+                aligned_grammar.append(grammar_ids[original_idx] if original_idx < len(grammar_ids) else [0, 0, 0])
+                raw = durations[original_idx] if original_idx < len(durations) else 0.0
+                if isinstance(raw, list) and (isinstance(raw[1], int) and isinstance(raw[0], int)):
+                    if len(raw) >= 2:
+                        duration_val = int(raw[1]) - int(raw[0])
+                    else:
+                        duration_val = raw[0]
+                else:
+                    duration_val = 0.0
+                aligned_durations.append(duration_val)
+            else:
+                aligned_pos.append(0)
+                aligned_grammar.append([0, 0, 0])
+                aligned_durations.append(0.0)
+        aligned_pos.append(0)  # [SEP]
+        aligned_grammar.append([0, 0, 0])  # [SEP]
+        aligned_durations.append(0.0)  # [SEP]
+        return aligned_pos, aligned_grammar, aligned_durations
+    def _extract_prosodic_features(self, durations, tokens):
+        """Extract prosodic features"""
+        if not durations:
+            return [0.0] * self.config.prosody_dim
+        valid_durations = [d for d in durations if isinstance(d, (int, float)) and d > 0]
+        if not valid_durations:
+            return [0.0] * self.config.prosody_dim
+        features = [
+            np.mean(valid_durations),
+            np.std(valid_durations),
+            np.median(valid_durations),
+            len([d for d in valid_durations if d > np.mean(valid_durations) * 1.5])
+        ]
+        # Pad to prosody_dim
+        while len(features) < self.config.prosody_dim:
+            features.append(0.0)
+        return features[:self.config.prosody_dim]
+    def _print_class_distribution(self):
+        """Print class distribution"""
+        label_counts = Counter(sample["labels"].item() for sample in self.samples)
+        reverse_mapping = {v: k for k, v in self.aphasia_types_mapping.items()}
+        log_message("\nClass Distribution:")
+        for label_id, count in sorted(label_counts.items()):
+            class_name = reverse_mapping.get(label_id, f"Unknown_{label_id}")
+            log_message(f"  {class_name}: {count} samples")
+    def __len__(self):
+        return len(self.samples)
+    def __getitem__(self, idx):
+        return self.samples[idx]
+# Stable data collator
+def stable_collate_fn(batch):
+    """Stable data collation"""
+    if not batch or batch[0] is None:
+        return None
+    try:
+        max_length = batch[0]["input_ids"].size(0)
+        collated_batch = {
+            "input_ids": torch.stack([item["input_ids"] for item in batch]),
+            "attention_mask": torch.stack([item["attention_mask"] for item in batch]),
+            "labels": torch.stack([item["labels"] for item in batch]),
+            "sentence_ids": [item.get("sentence_id", "N/A") for item in batch],
+            "word_pos_ids": torch.stack([item.get("word_pos_ids", torch.zeros(max_length, dtype=torch.long)) for item in batch]),
+            "word_grammar_ids": torch.stack([item.get("word_grammar_ids", torch.zeros(max_length, 3, dtype=torch.long)) for item in batch]),
+            "word_durations": torch.stack([item.get("word_durations", torch.zeros(max_length, dtype=torch.float)) for item in batch]),
+            "prosody_features": torch.stack([item.get("prosody_features", torch.zeros(max_length, 32, dtype=torch.float)) for item in batch])
+        }
+        return collated_batch
+    except Exception as e:
+        log_message(f"Collation error: {e}")
+        return None
+# Enhanced Training callback with adaptive learning rate
+class AdaptiveTrainingCallback(TrainerCallback):
+    """Enhanced training callback with adaptive learning rate and comprehensive tracking"""
+    def __init__(self, config: ModelConfig, patience=5, min_delta=0.8):
+        self.config = config
+        self.patience = patience
+        self.min_delta = min_delta
+        self.best_metric = float('-inf')
+        self.patience_counter = 0
+        # Learning rate scheduler
+        self.lr_scheduler = None
+        # History tracker
+        self.history_tracker = TrainingHistoryTracker()
+        # Metrics for current epoch
+        self.current_train_metrics = {}
+        self.current_eval_metrics = {}
+    def on_train_begin(self, args, state, control, **kwargs):
+        """Initialize learning rate scheduler"""
+        if self.config.adaptive_lr:
+            model = kwargs.get('model')
+            optimizer = kwargs.get('optimizer')
+            if optimizer and model:
+                total_steps = state.max_steps if state.max_steps > 0 else len(kwargs.get('train_dataloader', [])) * args.num_train_epochs
+                self.lr_scheduler = AdaptiveLearningRateScheduler(optimizer, self.config, total_steps)
+                log_message("Adaptive learning rate scheduler initialized")
+    def on_log(self, args, state, control, logs=None, **kwargs):
+        """Capture training metrics"""
+        if logs:
+            # Store training metrics
+            if 'train_loss' in logs:
+                self.current_train_metrics['loss'] = logs['train_loss']
+            if 'learning_rate' in logs:
+                self.current_train_metrics['lr'] = logs['learning_rate']
+    def on_evaluate(self, args, state, control, logs=None, **kwargs):
+        """Handle evaluation and learning rate adjustment"""
+        if logs is not None:
+            current_metric = logs.get('eval_f1', 0)
+            current_loss = logs.get('eval_loss', float('inf'))
+            current_acc = logs.get('eval_accuracy', 0)
+            # Store evaluation metrics
+            self.current_eval_metrics = {
+                'loss': current_loss,
+                'f1': current_metric,
+                'accuracy': current_acc,
+                'precision': logs.get('eval_precision_macro', 0),
+                'recall': logs.get('eval_recall_macro', 0)
+            }
+            # Update history
+            epoch_metrics = {
+                'train_loss': self.current_train_metrics.get('loss', 0),
+                'eval_loss': current_loss,
+                'train_accuracy': 0,  # Will be computed separately if needed
+                'eval_accuracy': current_acc,
+                'train_f1': 0,  # Will be computed separately if needed
+                'eval_f1': current_metric,
+                'learning_rate': self.current_train_metrics.get('lr', self.config.learning_rate),
+                'train_precision': 0,
+                'eval_precision': logs.get('eval_precision_macro', 0),
+                'train_recall': 0,
+                'eval_recall': logs.get('eval_recall_macro', 0)
+            }
+            self.history_tracker.update(state.epoch, epoch_metrics)
+            # Adaptive learning rate adjustment
+            if self.lr_scheduler and self.config.adaptive_lr:
+                new_lr = self.lr_scheduler.adaptive_lr_calculation(current_loss, current_metric, current_acc)
+            if current_acc > 0.84:
+                log_message(f"Target accuracy reached ({current_acc:.2%}) → stopping and saving model")
+                control.should_save = True
+                control.should_training_stop = True
+                return control
+            # Early stopping logic
+            if current_metric > self.best_metric + self.min_delta:
+                self.best_metric = current_metric
+                self.patience_counter = 0
+                log_message(f"New best F1 score: {current_metric:.4f}")
+            else:
+                self.patience_counter += 1
+                log_message(f"No improvement for {self.patience_counter} evaluations")
+                if self.patience_counter >= self.patience:
+                    log_message("Early stopping triggered")
+                    control.should_training_stop = True
+        clear_memory()
+    def on_train_end(self, args, state, control, **kwargs):
+        """Save training history at the end"""
+        output_dir = args.output_dir
+        self.history_tracker.save_history(output_dir)
+        self.history_tracker.plot_training_curves(output_dir)
+        log_message("Training history and curves saved")
+# Metrics computation
+def compute_comprehensive_metrics(pred):
+    """Compute comprehensive evaluation metrics"""
+    predictions = pred.predictions[0] if isinstance(pred.predictions, tuple) else pred.predictions
+    labels = pred.label_ids
+    preds = np.argmax(predictions, axis=1)
+    acc = accuracy_score(labels, preds)
+    f1_macro = f1_score(labels, preds, average='macro', zero_division=0)
+    f1_weighted = f1_score(labels, preds, average='weighted', zero_division=0)
+    precision_macro = precision_score(labels, preds, average='macro', zero_division=0)
+    recall_macro = recall_score(labels, preds, average='macro', zero_division=0)
+    # Per-class metrics
+    f1_per_class = f1_score(labels, preds, average=None, zero_division=0)
+    precision_per_class = precision_score(labels, preds, average=None, zero_division=0)
+    recall_per_class = recall_score(labels, preds, average=None, zero_division=0)
+    return {
+        "accuracy": acc,
+        "f1": f1_weighted,
+        "f1_macro": f1_macro,
+        "precision_macro": precision_macro,
+        "recall_macro": recall_macro,
+        "f1_std": np.std(f1_per_class),
+        "precision_std": np.std(precision_per_class),
+        "recall_std": np.std(recall_per_class)
+    }
+# Enhanced analysis and visualization
+def generate_comprehensive_reports(trainer, eval_dataset, aphasia_types_mapping, tokenizer, output_dir):
+    """Generate comprehensive analysis reports and visualizations"""
+    log_message("Generating comprehensive reports...")
+    model = trainer.model
+    if hasattr(model, 'module'):
+        model = model.module
+    model.eval()
+    device = next(model.parameters()).device
+    predictions = []
+    true_labels = []
+    sentence_ids = []
+    severity_preds = []
+    fluency_preds = []
+    prediction_probs = []
+    # Evaluation
+    dataloader = DataLoader(eval_dataset, batch_size=8, collate_fn=stable_collate_fn)
+    with torch.no_grad():
+        for batch_idx, batch in enumerate(dataloader):
+            if batch is None:
+                continue
+            # Move to device
+            for key in ['input_ids', 'attention_mask', 'word_pos_ids',
+                       'word_grammar_ids', 'word_durations', 'labels', 'prosody_features']:
+                if key in batch:
+                    batch[key] = batch[key].to(device)
+            outputs = model(**batch)
+            logits = outputs["logits"]
+            probs = F.softmax(logits, dim=1)
+            preds = torch.argmax(logits, dim=1).cpu().numpy()
+            predictions.extend(preds)
+            true_labels.extend(batch["labels"].cpu().numpy())
+            sentence_ids.extend(batch["sentence_ids"])
+            severity_preds.extend(outputs["severity_pred"].cpu().numpy())
+            fluency_preds.extend(outputs["fluency_pred"].cpu().numpy())
+            prediction_probs.extend(probs.cpu().numpy())
+    # Analysis
+    reverse_mapping = {v: k for k, v in aphasia_types_mapping.items()}
+    # 1. 詳細預測結果
+    log_message("=== DETAILED PREDICTIONS (First 20) ===")
+    for i in range(min(20, len(predictions))):
+        true_type = reverse_mapping.get(true_labels[i], 'Unknown')
+        pred_type = reverse_mapping.get(predictions[i], 'Unknown')
+        severity_level = np.argmax(severity_preds[i])
+        fluency_score = fluency_preds[i][0] if isinstance(fluency_preds[i], np.ndarray) else fluency_preds[i]
+        confidence = np.max(prediction_probs[i])
+        log_message(f"ID: {sentence_ids[i]} | True: {true_type} | Pred: {pred_type} | "
+                   f"Confidence: {confidence:.3f} | Severity: {severity_level} | Fluency: {fluency_score:.3f}")
+    # 2. 混淆矩陣
+    cm = confusion_matrix(true_labels, predictions)
+    # Enhanced confusion matrix plot
+    plt.figure(figsize=(14, 12))
+    # Calculate percentages
+    cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
+    # Create annotation array
+    annotations = np.empty_like(cm, dtype=object)
+    for i in range(cm.shape[0]):
+        for j in range(cm.shape[1]):
+            annotations[i, j] = f'{cm[i, j]}\n({cm_percentage[i, j]:.1f}%)'
+    sns.heatmap(cm, annot=annotations, fmt='', cmap="Blues",
+                xticklabels=list(aphasia_types_mapping.keys()),
+                yticklabels=list(aphasia_types_mapping.keys()),
+                cbar_kws={'label': 'Count'})
+    plt.xlabel("Predicted Label", fontsize=12, fontweight='bold')
+    plt.ylabel("True Label", fontsize=12, fontweight='bold')
+    plt.title("Enhanced Confusion Matrix\n(Count and Percentage)", fontsize=14, fontweight='bold')
+    plt.xticks(rotation=45, ha='right')
+    plt.yticks(rotation=0)
+    plt.tight_layout()
+    plt.savefig(os.path.join(output_dir, "enhanced_confusion_matrix.png"), dpi=300, bbox_inches='tight')
+    plt.close()
+    # 3. 分類報告
+    all_label_ids = list(aphasia_types_mapping.values())
+    report_dict = classification_report(
+        true_labels,
+        predictions,
+        labels=all_label_ids,
+        target_names=list(aphasia_types_mapping.keys()),
+        output_dict=True,
+        zero_division=0
+    )
+    df_report = pd.DataFrame(report_dict).transpose()
+    df_report.to_csv(os.path.join(output_dir, "comprehensive_classification_report.csv"))
+    # 4. Per-class performance visualization
+    class_names = list(aphasia_types_mapping.keys())
+    metrics_data = []
+    for i, class_name in enumerate(class_names):
+        if class_name in report_dict:
+            metrics_data.append({
+                'Class': class_name,
+                'Precision': report_dict[class_name]['precision'],
+                'Recall': report_dict[class_name]['recall'],
+                'F1-Score': report_dict[class_name]['f1-score'],
+                'Support': report_dict[class_name]['support']
+            })
+    df_metrics = pd.DataFrame(metrics_data)
+    df_metrics.to_csv(os.path.join(output_dir, "per_class_metrics.csv"), index=False)
+    # Plot per-class performance
+    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
+    # Precision
+    axes[0, 0].bar(df_metrics['Class'], df_metrics['Precision'], color='skyblue', alpha=0.8)
+    axes[0, 0].set_title('Precision by Class', fontweight='bold')
+    axes[0, 0].set_ylabel('Precision')
+    axes[0, 0].tick_params(axis='x', rotation=45)
+    axes[0, 0].grid(True, alpha=0.3)
+    # Recall
+    axes[0, 1].bar(df_metrics['Class'], df_metrics['Recall'], color='lightcoral', alpha=0.8)
+    axes[0, 1].set_title('Recall by Class', fontweight='bold')
+    axes[0, 1].set_ylabel('Recall')
+    axes[0, 1].tick_params(axis='x', rotation=45)
+    axes[0, 1].grid(True, alpha=0.3)
+    # F1-Score
+    axes[1, 0].bar(df_metrics['Class'], df_metrics['F1-Score'], color='lightgreen', alpha=0.8)
+    axes[1, 0].set_title('F1-Score by Class', fontweight='bold')
+    axes[1, 0].set_ylabel('F1-Score')
+    axes[1, 0].tick_params(axis='x', rotation=45)
+    axes[1, 0].grid(True, alpha=0.3)
+    # Support
+    axes[1, 1].bar(df_metrics['Class'], df_metrics['Support'], color='gold', alpha=0.8)
+    axes[1, 1].set_title('Support by Class', fontweight='bold')
+    axes[1, 1].set_ylabel('Support (Number of Samples)')
+    axes[1, 1].tick_params(axis='x', rotation=45)
+    axes[1, 1].grid(True, alpha=0.3)
+    plt.tight_layout()
+    plt.savefig(os.path.join(output_dir, "per_class_performance.png"), dpi=300, bbox_inches='tight')
+    plt.close()
+    # 5. Prediction confidence distribution
+    confidences = [np.max(prob) for prob in prediction_probs]
+    correct_predictions = [pred == true for pred, true in zip(predictions, true_labels)]
+    plt.figure(figsize=(12, 8))
+    # Separate correct and incorrect predictions
+    correct_confidences = [conf for conf, correct in zip(confidences, correct_predictions) if correct]
+    incorrect_confidences = [conf for conf, correct in zip(confidences, correct_predictions) if not correct]
+    plt.hist(correct_confidences, bins=30, alpha=0.7, label='Correct Predictions', color='green', density=True)
+    plt.hist(incorrect_confidences, bins=30, alpha=0.7, label='Incorrect Predictions', color='red', density=True)
+    plt.xlabel('Prediction Confidence', fontsize=12)
+    plt.ylabel('Density', fontsize=12)
+    plt.title('Distribution of Prediction Confidence', fontsize=14, fontweight='bold')
+    plt.legend()
+    plt.grid(True, alpha=0.3)
+    plt.tight_layout()
+    plt.savefig(os.path.join(output_dir, "confidence_distribution.png"), dpi=300, bbox_inches='tight')
+    plt.close()
+    # 6. 特徵分析
+    log_message("=== FEATURE ANALYSIS ===")
+    avg_severity = np.mean(severity_preds, axis=0)
+    avg_fluency = np.mean(fluency_preds)
+    std_fluency = np.std(fluency_preds)
+    log_message(f"Average Severity Distribution: {avg_severity}")
+    log_message(f"Average Fluency Score: {avg_fluency:.3f} ± {std_fluency:.3f}")
+    # 7. 詳細結果保存
+    results_df = pd.DataFrame({
+        'sentence_id': sentence_ids,
+        'true_label': [reverse_mapping[label] for label in true_labels],
+        'predicted_label': [reverse_mapping[pred] for pred in predictions],
+        'prediction_confidence': confidences,
+        'correct_prediction': correct_predictions,
+        'severity_level': [np.argmax(severity) for severity in severity_preds],
+        'fluency_score': [fluency[0] if isinstance(fluency, np.ndarray) else fluency for fluency in fluency_preds]
+    })
+    # Add probability columns for each class
+    for i, class_name in enumerate(aphasia_types_mapping.keys()):
+        results_df[f'prob_{class_name}'] = [prob[i] for prob in prediction_probs]
+    results_df.to_csv(os.path.join(output_dir, "comprehensive_results.csv"), index=False)
+    # 8. 統計摘要
+    summary_stats = {
+        'Overall Accuracy': accuracy_score(true_labels, predictions),
+        'Macro F1': f1_score(true_labels, predictions, average='macro'),
+        'Weighted F1': f1_score(true_labels, predictions, average='weighted'),
+        'Macro Precision': precision_score(true_labels, predictions, average='macro'),
+        'Macro Recall': recall_score(true_labels, predictions, average='macro'),
+        'Average Confidence': np.mean(confidences),
+        'Confidence Std': np.std(confidences),
+        'Average Severity': avg_severity.tolist(),
+        'Average Fluency': avg_fluency,
+        'Fluency Std': std_fluency
+    }
+    serializable_summary = {
+        k: float(v) if isinstance(v, (np.floating, np.integer)) else v
+        for k, v in summary_stats.items()
+    }
+    with open(os.path.join(output_dir, "summary_statistics.json"), "w") as f:
+        json.dump(serializable_summary, f, indent=2)
+    log_message("Comprehensive Classification Report:")
+    log_message(df_report.to_string())
+    log_message(f"Comprehensive results saved to {output_dir}")
+    return results_df, df_report, summary_stats
+# Main training function with adaptive learning rate
+def train_adaptive_model(json_file: str, output_dir: str = "./adaptive_aphasia_model"):
+    """Main training function with adaptive learning rate"""
+    log_message("Starting Adaptive Aphasia Classification Training")
+    log_message("=" * 60)
+    # Setup
+    config = ModelConfig()
+    os.makedirs(output_dir, exist_ok=True)
+    # Device setup
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    log_message(f"Using device: {device}")
+    # Load data
+    log_message("Loading dataset...")
+    with open(json_file, "r", encoding="utf-8") as f:
+        dataset_json = json.load(f)
+    sentences = dataset_json.get("sentences", [])
+    # Normalize aphasia types
+    for item in sentences:
+        if "aphasia_type" in item:
+            item["aphasia_type"] = normalize_type(item["aphasia_type"])
+    # Aphasia types mapping
+    aphasia_types_mapping = {
+        "BROCA": 0,
+        "TRANSMOTOR": 1,
+        "NOTAPHASICBYWAB": 2,
+        "CONDUCTION": 3,
+        "WERNICKE": 4,
+        "ANOMIC": 5,
+        "GLOBAL": 6,
+        "ISOLATION": 7,
+        "TRANSSENSORY": 8
+    }
+    log_message(f"Aphasia Types Mapping: {aphasia_types_mapping}")
+    num_labels = len(aphasia_types_mapping)
+    log_message(f"Number of labels: {num_labels}")
+    # Filter sentences
+    filtered_sentences = []
+    for item in sentences:
+        aphasia_type = item.get("aphasia_type", "")
+        if aphasia_type in aphasia_types_mapping:
+            filtered_sentences.append(item)
+        else:
+            log_message(f"Excluding sentence with invalid type: {aphasia_type}")
+    log_message(f"Filtered dataset: {len(filtered_sentences)} sentences")
+    # Initialize tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(config.model_name)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    # Create dataset
+    random.shuffle(filtered_sentences)
+    dataset_all = StableAphasiaDataset(
+        filtered_sentences, tokenizer, aphasia_types_mapping, config
+    )
+    # Split dataset
+    total_samples = len(dataset_all)
+    train_size = int(0.8 * total_samples)
+    eval_size = total_samples - train_size
+    train_dataset, eval_dataset = torch.utils.data.random_split(
+        dataset_all, [train_size, eval_size]
+    )
+    log_message(f"Train size: {train_size}, Eval size: {eval_size}")
+    # Setup weighted sampling for class imbalance
+    train_labels = [dataset_all.samples[idx]["labels"].item() for idx in train_dataset.indices]
+    label_counts = Counter(train_labels)
+    sample_weights = [1.0 / label_counts[label] for label in train_labels]
+    sampler = WeightedRandomSampler(
+        weights=sample_weights,
+        num_samples=len(sample_weights),
+        replacement=True
+    )
+    # Model initialization
+    def model_init():
+        model = StableAphasiaClassifier(config, num_labels)
+        model.bert.resize_token_embeddings(len(tokenizer))
+        return model.to(device)
+    # Training arguments
+    training_args = TrainingArguments(
+        output_dir=output_dir,
+        eval_strategy="epoch",
+        save_strategy="epoch",
+        learning_rate=config.learning_rate,
+        per_device_train_batch_size=config.batch_size,
+        per_device_eval_batch_size=config.batch_size,
+        num_train_epochs=config.num_epochs,
+        weight_decay=config.weight_decay,
+        warmup_ratio=config.warmup_ratio,
+        logging_strategy="steps",
+        logging_steps=50,
+        seed=42,
+        dataloader_num_workers=0,
+        gradient_accumulation_steps=config.gradient_accumulation_steps,
+        max_grad_norm=1.0,
+        fp16=False,
+        dataloader_drop_last=True,
+        report_to=None,
+        load_best_model_at_end=True,
+        metric_for_best_model="eval_f1",
+        greater_is_better=True,
+        save_total_limit=3,
+        remove_unused_columns=False,
+    )
+    # Initialize trainer with adaptive callback
+    trainer = Trainer(
+        model_init=model_init,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        compute_metrics=compute_comprehensive_metrics,
+        data_collator=stable_collate_fn,
+        callbacks=[AdaptiveTrainingCallback(config, patience=5, min_delta=0.8)]
+    )
+    # Start training
+    log_message("Starting adaptive training...")
+    try:
+        trainer.train()
+        log_message("Training completed successfully!")
+    except Exception as e:
+        log_message(f"Training error: {str(e)}")
+        import traceback
+        log_message(traceback.format_exc())
+        raise
+    # Final evaluation
+    log_message("Starting final evaluation...")
+    eval_results = trainer.evaluate()
+    log_message(f"Final evaluation results: {eval_results}")
+    # Generate comprehensive reports
+    results_df, report_df, summary_stats = generate_comprehensive_reports(
+        trainer, eval_dataset, aphasia_types_mapping, tokenizer, output_dir
+    )
+    # Save model
+    model_to_save = trainer.model
+    if hasattr(model_to_save, 'module'):
+        model_to_save = model_to_save.module
+    torch.save(model_to_save.state_dict(), os.path.join(output_dir, "pytorch_model.bin"))
+    tokenizer.save_pretrained(output_dir)
+    # Save configuration
+    config_dict = {
+        "model_name": config.model_name,
+        "num_labels": num_labels,
+        "aphasia_types_mapping": aphasia_types_mapping,
+        "training_args": training_args.to_dict(),
+        "adaptive_lr_config": {
+            "adaptive_lr": config.adaptive_lr,
+            "lr_patience": config.lr_patience,
+            "lr_factor": config.lr_factor,
+            "lr_increase_factor": config.lr_increase_factor,
+            "min_lr": config.min_lr,
+            "max_lr": config.max_lr,
+            "oscillation_amplitude": config.oscillation_amplitude
+        }
+    }
+    with open(os.path.join(output_dir, "config.json"), "w") as f:
+        json.dump(config_dict, f, indent=2)
+    log_message(f"Adaptive model and comprehensive reports saved to {output_dir}")
+    clear_memory()
+    return trainer, eval_results, results_df
+# Cross-validation with adaptive learning rate
+def train_adaptive_cross_validation(json_file: str, output_dir: str = "./adaptive_cv_results", n_folds: int = 5):
+    """Cross-validation training with adaptive learning rate"""
+    log_message("Starting Adaptive Cross-Validation Training")
+    config = ModelConfig()
+    os.makedirs(output_dir, exist_ok=True)
+    # Load and prepare data
+    with open(json_file, "r", encoding="utf-8") as f:
+        dataset_json = json.load(f)
+    sentences = dataset_json.get("sentences", [])
+    # Normalize and filter
+    for item in sentences:
+        if "aphasia_type" in item:
+            item["aphasia_type"] = normalize_type(item["aphasia_type"])
+    aphasia_types_mapping = {
+        "BROCA": 0, "TRANSMOTOR": 1, "NOTAPHASICBYWAB": 2,
+        "CONDUCTION": 3, "WERNICKE": 4, "ANOMIC": 5,
+        "GLOBAL": 6, "ISOLATION": 7, "TRANSSENSORY": 8
+    }
+    filtered_sentences = [s for s in sentences if s.get("aphasia_type") in aphasia_types_mapping]
+    # Initialize tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(config.model_name)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    # Create full dataset
+    full_dataset = StableAphasiaDataset(
+        filtered_sentences, tokenizer, aphasia_types_mapping, config
+    )
+    # Extract labels for stratification
+    sample_labels = [sample["labels"].item() for sample in full_dataset.samples]
+    # Cross-validation
+    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
+    fold_results = []
+    all_predictions = []
+    all_true_labels = []
+    for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(sample_labels)), sample_labels)):
+        log_message(f"\n=== Fold {fold + 1}/{n_folds} ===")
+        train_subset = Subset(full_dataset, train_idx)
+        val_subset = Subset(full_dataset, val_idx)
+        # Train single fold
+        fold_trainer, fold_results_dict, fold_predictions = train_adaptive_single_fold(
+            train_subset, val_subset, config, aphasia_types_mapping,
+            tokenizer, fold, output_dir
+        )
+        fold_results.append({
+            'fold': fold + 1,
+            **fold_results_dict
+        })
+        # Collect predictions for ensemble analysis
+        all_predictions.extend(fold_predictions['predictions'])
+        all_true_labels.extend(fold_predictions['true_labels'])
+        clear_memory()
+    # Aggregate results
+    results_df = pd.DataFrame(fold_results)
+    results_df.to_csv(os.path.join(output_dir, "adaptive_cv_summary.csv"), index=False)
+    # Cross-validation summary statistics
+    cv_summary = {
+        'mean_accuracy': results_df['accuracy'].mean(),
+        'std_accuracy': results_df['accuracy'].std(),
+        'mean_f1': results_df['f1'].mean(),
+        'std_f1': results_df['f1'].std(),
+        'mean_f1_macro': results_df['f1_macro'].mean(),
+        'std_f1_macro': results_df['f1_macro'].std(),
+        'mean_precision': results_df['precision_macro'].mean(),
+        'std_precision': results_df['precision_macro'].std(),
+        'mean_recall': results_df['recall_macro'].mean(),
+        'std_recall': results_df['recall_macro'].std()
+    }
+    with open(os.path.join(output_dir, "cv_statistics.json"), "w") as f:
+        json.dump(cv_summary, f, indent=2)
+    # Overall confusion matrix across all folds
+    overall_cm = confusion_matrix(all_true_labels, all_predictions)
+    plt.figure(figsize=(12, 10))
+    sns.heatmap(overall_cm, annot=True, fmt="d", cmap="Blues",
+                xticklabels=list(aphasia_types_mapping.keys()),
+                yticklabels=list(aphasia_types_mapping.keys()))
+    plt.xlabel("Predicted Label")
+    plt.ylabel("True Label")
+    plt.title("Overall Confusion Matrix (All Folds)")
+    plt.xticks(rotation=45)
+    plt.yticks(rotation=0)
+    plt.tight_layout()
+    plt.savefig(os.path.join(output_dir, "overall_confusion_matrix.png"), dpi=300, bbox_inches='tight')
+    plt.close()
+    # Cross-validation results visualization
+    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
+    # Accuracy across folds
+    axes[0, 0].bar(range(1, n_folds + 1), results_df['accuracy'], color='skyblue', alpha=0.8)
+    axes[0, 0].axhline(y=results_df['accuracy'].mean(), color='red', linestyle='--',
+                       label=f'Mean: {results_df["accuracy"].mean():.3f}')
+    axes[0, 0].set_title('Accuracy Across Folds')
+    axes[0, 0].set_xlabel('Fold')
+    axes[0, 0].set_ylabel('Accuracy')
+    axes[0, 0].legend()
+    axes[0, 0].grid(True, alpha=0.3)
+    # F1 Score across folds
+    axes[0, 1].bar(range(1, n_folds + 1), results_df['f1'], color='lightgreen', alpha=0.8)
+    axes[0, 1].axhline(y=results_df['f1'].mean(), color='red', linestyle='--',
+                       label=f'Mean: {results_df["f1"].mean():.3f}')
+    axes[0, 1].set_title('F1 Score Across Folds')
+    axes[0, 1].set_xlabel('Fold')
+    axes[0, 1].set_ylabel('F1 Score')
+    axes[0, 1].legend()
+    axes[0, 1].grid(True, alpha=0.3)
+    # Precision across folds
+    axes[1, 0].bar(range(1, n_folds + 1), results_df['precision_macro'], color='coral', alpha=0.8)
+    axes[1, 0].axhline(y=results_df['precision_macro'].mean(), color='red', linestyle='--',
+                       label=f'Mean: {results_df["precision_macro"].mean():.3f}')
+    axes[1, 0].set_title('Precision Across Folds')
+    axes[1, 0].set_xlabel('Fold')
+    axes[1, 0].set_ylabel('Precision')
+    axes[1, 0].legend()
+    axes[1, 0].grid(True, alpha=0.3)
+    # Recall across folds
+    axes[1, 1].bar(range(1, n_folds + 1), results_df['recall_macro'], color='gold', alpha=0.8)
+    axes[1, 1].axhline(y=results_df['recall_macro'].mean(), color='red', linestyle='--',
+                       label=f'Mean: {results_df["recall_macro"].mean():.3f}')
+    axes[1, 1].set_title('Recall Across Folds')
+    axes[1, 1].set_xlabel('Fold')
+    axes[1, 1].set_ylabel('Recall')
+    axes[1, 1].legend()
+    axes[1, 1].grid(True, alpha=0.3)
+    plt.tight_layout()
+    plt.savefig(os.path.join(output_dir, "cv_performance_comparison.png"), dpi=300, bbox_inches='tight')
+    plt.close()
+    log_message("\n=== Adaptive Cross-Validation Summary ===")
+    log_message(results_df.to_string(index=False))
+    # Statistics
+    log_message(f"\nMean F1: {results_df['f1'].mean():.4f} ± {results_df['f1'].std():.4f}")
+    log_message(f"Mean Accuracy: {results_df['accuracy'].mean():.4f} ± {results_df['accuracy'].std():.4f}")
+    log_message(f"Mean F1 Macro: {results_df['f1_macro'].mean():.4f} ± {results_df['f1_macro'].std():.4f}")
+    return results_df, cv_summary
+def train_adaptive_single_fold(train_dataset, val_dataset, config, aphasia_types_mapping,
+                              tokenizer, fold, output_dir):
+    """Train a single fold with adaptive learning rate"""
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    num_labels = len(aphasia_types_mapping)
+    # Setup weighted sampling
+    train_labels = [train_dataset[i]["labels"].item() for i in range(len(train_dataset))]
+    label_counts = Counter(train_labels)
+    sample_weights = [1.0 / label_counts[label] for label in train_labels]
+    sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)
+    # Model initialization
+    def model_init():
+        model = StableAphasiaClassifier(config, num_labels)
+        model.bert.resize_token_embeddings(len(tokenizer))
+        return model.to(device)
+    # Training arguments
+    fold_output_dir = os.path.join(output_dir, f"fold_{fold}")
+    os.makedirs(fold_output_dir, exist_ok=True)
+    training_args = TrainingArguments(
+        output_dir=fold_output_dir,
+        eval_strategy="epoch",
+        save_strategy="epoch",
+        learning_rate=config.learning_rate,
+        per_device_train_batch_size=config.batch_size,
+        per_device_eval_batch_size=config.batch_size,
+        num_train_epochs=config.num_epochs,
+        weight_decay=config.weight_decay,
+        warmup_ratio=config.warmup_ratio,
+        logging_steps=50,
+        seed=42,
+        dataloader_num_workers=0,
+        gradient_accumulation_steps=config.gradient_accumulation_steps,
+        max_grad_norm=1.0,
+        fp16=False,
+        dataloader_drop_last=True,
+        report_to=None,
+        load_best_model_at_end=True,
+        metric_for_best_model="eval_f1",
+        greater_is_better=True,
+        save_total_limit=1,
+        remove_unused_columns=False,
+    )
+    # Trainer with adaptive callback
+    trainer = Trainer(
+        model_init=model_init,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=val_dataset,
+        compute_metrics=compute_comprehensive_metrics,
+        data_collator=stable_collate_fn,
+        callbacks=[AdaptiveTrainingCallback(config, patience=5, min_delta=0.8)]
+    )
+    # Train
+    trainer.train()
+    # Evaluate
+    eval_results = trainer.evaluate()
+    # Get predictions for ensemble analysis
+    predictions = trainer.predict(val_dataset)
+    pred_labels = np.argmax(predictions.predictions[0] if isinstance(predictions.predictions, tuple) else predictions.predictions, axis=1)
+    true_labels = predictions.label_ids
+    fold_predictions = {
+        'predictions': pred_labels.tolist(),
+        'true_labels': true_labels.tolist()
+    }
+    # Save fold model
+    model_to_save = trainer.model
+    if hasattr(model_to_save, 'module'):
+        model_to_save = model_to_save.module
+    torch.save(model_to_save.state_dict(), os.path.join(fold_output_dir, "pytorch_model.bin"))
+    return trainer, eval_results, fold_predictions
+# Main execution
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Adaptive Learning Rate Aphasia Classification Training")
+    parser.add_argument("--output_dir", type=str, default="./adaptive_aphasia_model", help="Output directory")
+    parser.add_argument("--cross_validation", action="store_true", help="Use cross-validation")
+    parser.add_argument("--n_folds", type=int, default=5, help="Number of CV folds")
+    parser.add_argument("--json_file", type=str, default=json_file, help="Path to JSON dataset file")
+    parser.add_argument("--learning_rate", type=float, default=5e-4, help="Initial learning rate")
+    parser.add_argument("--batch_size", type=int, default=24, help="Batch size")
+    parser.add_argument("--num_epochs", type=int, default=3, help="Number of epochs")
+    parser.add_argument("--adaptive_lr", action="store_true", default=True, help="Use adaptive learning rate")
+    args = parser.parse_args()
+    # Update config with command line arguments
+    config = ModelConfig()
+    config.learning_rate = args.learning_rate
+    config.batch_size = args.batch_size
+    config.num_epochs = args.num_epochs
+    config.adaptive_lr = args.adaptive_lr
+    try:
+        clear_memory()
+        log_message(f"Starting training with adaptive_lr={config.adaptive_lr}")
+        log_message(f"Config: lr={config.learning_rate}, batch_size={config.batch_size}, epochs={config.num_epochs}")
+        if args.cross_validation:
+            results_df, cv_summary = train_adaptive_cross_validation(args.json_file, args.output_dir, args.n_folds)
+            log_message("Cross-validation training completed!")
+        else:
+            trainer, eval_results, results_df = train_adaptive_model(args.json_file, args.output_dir)
+            log_message("Single model training completed!")
+        log_message("All adaptive training completed successfully!")
+    except Exception as e:
+        log_message(f"Training failed: {str(e)}")
+        import traceback
+        log_message(traceback.format_exc())
+    finally:
+        clear_memory()

aphasia_predictions.json ADDED Viewed

	@@ -0,0 +1,435 @@

+{
+  "summary": {
+    "classification_distribution": {
+      "BROCA": 1
+    },
+    "classification_percentages": {
+      "BROCA": "100.0%"
+    },
+    "average_confidence": "0.995",
+    "average_fluency_score": "0.571",
+    "severity_distribution": {
+      "3": 1
+    },
+    "confidence_statistics": {
+      "mean": "0.995",
+      "std": "0.000",
+      "min": "0.995",
+      "max": "0.995"
+    },
+    "most_common_prediction": "BROCA"
+  },
+  "total_sentences": 1,
+  "predictions": [
+    {
+      "sentence_id": "S1",
+      "input_text": "yeah well [DIALOGUE] I yeah you_know dada dada [DIALOGUE] [DIALOGUE] [DIALOGUE] yes beg it~cop two thousand two day-PL no after New_Year's_Day two thousand [DIALOGUE] [DIALOGUE] [DIALOGUE] I do~neg remember I do~neg remember [DIALOGUE] [DIALOGUE] [DIALOGUE] oh beg yeah beg x I aphasia oh beg yes [DIALOGUE] [DIALOGUE] [DIALOGUE] [DIALOGUE] yeah [DIALOGUE] [DIALOGUE] [DIALOGUE] [DIALOGUE] oh beg yes Turkey to China China oh beg yes up at Beijing yes oh and walk on the wall yes beg oh beg god beg I love-PAST it yes beg oh beg just amaze-PRESP oh beg just amaze-PRESP oh beg I just oh yeah [DIALOGUE] oh beg yes [DIALOGUE] [DIALOGUE] [DIALOGUE] [DIALOGUE] [DIALOGUE] kick-PRESP the ball window accident window break&PASTP and it~cop all big end yeah and the window break&PASTP and a ball end yeah [DIALOGUE] [DIALOGUE] [DIALOGUE] [DIALOGUE] oh beg I no I do~neg want it no and rain yes beg rain rain rain yes beg oh_no no no no yes beg mother look-PRESP at son and son get-3S a umbrella [DIALOGUE] [DIALOGUE] [DIALOGUE] cat up the tree darling get cat out the tree ladder break&PASTP there~cop a up the tree tree bark-PRESP oh beg bark-PRESP end yeah and x get-3S mother to down the tree [DIALOGUE] [DIALOGUE] [DIALOGUE] yep [DIALOGUE] [DIALOGUE] [DIALOGUE] [DIALOGUE] oh [DIALOGUE] [DIALOGUE] [DIALOGUE] [DIALOGUE] well beg Cinderella be&PAST&13S a poor child in in oh_god Cinderella poor child in to do many thing-PL in and oh_god [DIALOGUE] oh and you troll child in child&PL oh_god child&PL want-PAST to go to dance and beautiful dadada dadadada and Cinderella be&PAST~neg sure about go-PRESP to dance and oh_god I just [DIALOGUE] she get&PAST to go to the dance in shoe-PL and oh_god oh_god [DIALOGUE] dance and yeah and she be&PAST&13S dance-PRESP around night and she suppose-PAST to be somewhere else and she put&ZERO her foot in the so and she ride&PAST off with the prince [DIALOGUE] [DIALOGUE] [DIALOGUE] [DIALOGUE] [DIALOGUE] oh beg bread two piece-PL of bread and jelly and peanut butter and turn them over and make a peanut butter sandwich [DIALOGUE]",
+      "original_tokens": [
+        "yeah",
+        "well",
+        "[DIALOGUE]",
+        "I",
+        "yeah",
+        "you_know",
+        "dada",
+        "dada",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "yes",
+        "beg",
+        "it~cop",
+        "two",
+        "thousand",
+        "two",
+        "day-PL",
+        "no",
+        "after",
+        "New_Year's_Day",
+        "two",
+        "thousand",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "I",
+        "do~neg",
+        "remember",
+        "I",
+        "do~neg",
+        "remember",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "oh",
+        "beg",
+        "yeah",
+        "beg",
+        "x",
+        "I",
+        "aphasia",
+        "oh",
+        "beg",
+        "yes",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "yeah",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "oh",
+        "beg",
+        "yes",
+        "Turkey",
+        "to",
+        "China",
+        "China",
+        "oh",
+        "beg",
+        "yes",
+        "up",
+        "at",
+        "Beijing",
+        "yes",
+        "oh",
+        "and",
+        "walk",
+        "on",
+        "the",
+        "wall",
+        "yes",
+        "beg",
+        "oh",
+        "beg",
+        "god",
+        "beg",
+        "I",
+        "love-PAST",
+        "it",
+        "yes",
+        "beg",
+        "oh",
+        "beg",
+        "just",
+        "amaze-PRESP",
+        "oh",
+        "beg",
+        "just",
+        "amaze-PRESP",
+        "oh",
+        "beg",
+        "I",
+        "just",
+        "oh",
+        "yeah",
+        "[DIALOGUE]",
+        "oh",
+        "beg",
+        "yes",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "kick-PRESP",
+        "the",
+        "ball",
+        "window",
+        "accident",
+        "window",
+        "break&PASTP",
+        "and",
+        "it~cop",
+        "all",
+        "big",
+        "end",
+        "yeah",
+        "and",
+        "the",
+        "window",
+        "break&PASTP",
+        "and",
+        "a",
+        "ball",
+        "end",
+        "yeah",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "oh",
+        "beg",
+        "I",
+        "no",
+        "I",
+        "do~neg",
+        "want",
+        "it",
+        "no",
+        "and",
+        "rain",
+        "yes",
+        "beg",
+        "rain",
+        "rain",
+        "rain",
+        "yes",
+        "beg",
+        "oh_no",
+        "no",
+        "no",
+        "no",
+        "yes",
+        "beg",
+        "mother",
+        "look-PRESP",
+        "at",
+        "son",
+        "and",
+        "son",
+        "get-3S",
+        "a",
+        "umbrella",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "cat",
+        "up",
+        "the",
+        "tree",
+        "darling",
+        "get",
+        "cat",
+        "out",
+        "the",
+        "tree",
+        "ladder",
+        "break&PASTP",
+        "there~cop",
+        "a",
+        "up",
+        "the",
+        "tree",
+        "tree",
+        "bark-PRESP",
+        "oh",
+        "beg",
+        "bark-PRESP",
+        "end",
+        "yeah",
+        "and",
+        "x",
+        "get-3S",
+        "mother",
+        "to",
+        "down",
+        "the",
+        "tree",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "yep",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "oh",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "well",
+        "beg",
+        "Cinderella",
+        "be&PAST&13S",
+        "a",
+        "poor",
+        "child",
+        "in",
+        "in",
+        "oh_god",
+        "Cinderella",
+        "poor",
+        "child",
+        "in",
+        "to",
+        "do",
+        "many",
+        "thing-PL",
+        "in",
+        "and",
+        "oh_god",
+        "[DIALOGUE]",
+        "oh",
+        "and",
+        "you",
+        "troll",
+        "child",
+        "in",
+        "child&PL",
+        "oh_god",
+        "child&PL",
+        "want-PAST",
+        "to",
+        "go",
+        "to",
+        "dance",
+        "and",
+        "beautiful",
+        "dadada",
+        "dadadada",
+        "and",
+        "Cinderella",
+        "be&PAST~neg",
+        "sure",
+        "about",
+        "go-PRESP",
+        "to",
+        "dance",
+        "and",
+        "oh_god",
+        "I",
+        "just",
+        "[DIALOGUE]",
+        "she",
+        "get&PAST",
+        "to",
+        "go",
+        "to",
+        "the",
+        "dance",
+        "in",
+        "shoe-PL",
+        "and",
+        "oh_god",
+        "oh_god",
+        "[DIALOGUE]",
+        "dance",
+        "and",
+        "yeah",
+        "and",
+        "she",
+        "be&PAST&13S",
+        "dance-PRESP",
+        "around",
+        "night",
+        "and",
+        "she",
+        "suppose-PAST",
+        "to",
+        "be",
+        "somewhere",
+        "else",
+        "and",
+        "she",
+        "put&ZERO",
+        "her",
+        "foot",
+        "in",
+        "the",
+        "so",
+        "and",
+        "she",
+        "ride&PAST",
+        "off",
+        "with",
+        "the",
+        "prince",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "[DIALOGUE]",
+        "oh",
+        "beg",
+        "bread",
+        "two",
+        "piece-PL",
+        "of",
+        "bread",
+        "and",
+        "jelly",
+        "and",
+        "peanut",
+        "butter",
+        "and",
+        "turn",
+        "them",
+        "over",
+        "and",
+        "make",
+        "a",
+        "peanut",
+        "butter",
+        "sandwich",
+        "[DIALOGUE]"
+      ],
+      "prediction": {
+        "predicted_class": "BROCA",
+        "confidence": 0.994691789150238,
+        "confidence_percentage": "99.47%"
+      },
+      "class_description": {
+        "name": "Broca's Aphasia (Non-fluent)",
+        "description": "Characterized by limited speech output, difficulty with grammar and sentence formation, but relatively preserved comprehension. Speech is typically effortful and halting.",
+        "features": [
+          "Non-fluent speech",
+          "Preserved comprehension",
+          "Grammar difficulties",
+          "Word-finding problems"
+        ]
+      },
+      "probability_distribution": {
+        "BROCA": {
+          "probability": 0.994691789150238,
+          "percentage": "99.47%"
+        },
+        "CONDUCTION": {
+          "probability": 0.001859842101112008,
+          "percentage": "0.19%"
+        },
+        "GLOBAL": {
+          "probability": 0.0015279082581400871,
+          "percentage": "0.15%"
+        },
+        "ANOMIC": {
+          "probability": 0.0014873514883220196,
+          "percentage": "0.15%"
+        },
+        "TRANSMOTOR": {
+          "probability": 0.00028855769778601825,
+          "percentage": "0.03%"
+        },
+        "NOTAPHASICBYWAB": {
+          "probability": 9.208399569615722e-05,
+          "percentage": "0.01%"
+        },
+        "WERNICKE": {
+          "probability": 4.5590277295559645e-05,
+          "percentage": "0.00%"
+        },
+        "ISOLATION": {
+          "probability": 6.9648622229578905e-06,
+          "percentage": "0.00%"
+        },
+        "TRANSSENSORY": {
+          "probability": 8.662294881389698e-09,
+          "percentage": "0.00%"
+        }
+      },
+      "additional_predictions": {
+        "severity_distribution": {
+          "level_0": 0.22366976737976074,
+          "level_1": 0.1340962052345276,
+          "level_2": 0.2849337160587311,
+          "level_3": 0.3573003113269806
+        },
+        "predicted_severity_level": 3,
+        "fluency_score": 0.571057915687561,
+        "fluency_rating": "Medium"
+      }
+    }
+  ]
+}

config.json ADDED Viewed

	@@ -0,0 +1,166 @@

+{
+  "model_name": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext",
+  "num_labels": 9,
+  "aphasia_types_mapping": {
+    "BROCA": 0,
+    "TRANSMOTOR": 1,
+    "NOTAPHASICBYWAB": 2,
+    "CONDUCTION": 3,
+    "WERNICKE": 4,
+    "ANOMIC": 5,
+    "GLOBAL": 6,
+    "ISOLATION": 7,
+    "TRANSSENSORY": 8
+  },
+  "training_args": {
+    "output_dir": "./adaptive_aphasia_model",
+    "overwrite_output_dir": false,
+    "do_train": false,
+    "do_eval": true,
+    "do_predict": false,
+    "eval_strategy": "epoch",
+    "prediction_loss_only": false,
+    "per_device_train_batch_size": 10,
+    "per_device_eval_batch_size": 10,
+    "per_gpu_train_batch_size": null,
+    "per_gpu_eval_batch_size": null,
+    "gradient_accumulation_steps": 4,
+    "eval_accumulation_steps": null,
+    "eval_delay": 0,
+    "torch_empty_cache_steps": null,
+    "learning_rate": 0.0005,
+    "weight_decay": 0.01,
+    "adam_beta1": 0.9,
+    "adam_beta2": 0.999,
+    "adam_epsilon": 1e-08,
+    "max_grad_norm": 1.0,
+    "num_train_epochs": 500,
+    "max_steps": -1,
+    "lr_scheduler_type": "linear",
+    "lr_scheduler_kwargs": {},
+    "warmup_ratio": 0.1,
+    "warmup_steps": 0,
+    "log_level": "passive",
+    "log_level_replica": "warning",
+    "log_on_each_node": true,
+    "logging_dir": "./adaptive_aphasia_model/runs/Aug06_00-31-47_ikm-gpu-9104",
+    "logging_strategy": "steps",
+    "logging_first_step": false,
+    "logging_steps": 50,
+    "logging_nan_inf_filter": true,
+    "save_strategy": "epoch",
+    "save_steps": 500,
+    "save_total_limit": 3,
+    "save_safetensors": true,
+    "save_on_each_node": false,
+    "save_only_model": false,
+    "restore_callback_states_from_checkpoint": false,
+    "no_cuda": false,
+    "use_cpu": false,
+    "use_mps_device": false,
+    "seed": 42,
+    "data_seed": null,
+    "jit_mode_eval": false,
+    "use_ipex": false,
+    "bf16": false,
+    "fp16": false,
+    "fp16_opt_level": "O1",
+    "half_precision_backend": "auto",
+    "bf16_full_eval": false,
+    "fp16_full_eval": false,
+    "tf32": null,
+    "local_rank": 1,
+    "ddp_backend": null,
+    "tpu_num_cores": null,
+    "tpu_metrics_debug": false,
+    "debug": [],
+    "dataloader_drop_last": true,
+    "eval_steps": null,
+    "dataloader_num_workers": 0,
+    "dataloader_prefetch_factor": null,
+    "past_index": -1,
+    "run_name": "./adaptive_aphasia_model",
+    "disable_tqdm": false,
+    "remove_unused_columns": false,
+    "label_names": null,
+    "load_best_model_at_end": true,
+    "metric_for_best_model": "eval_f1",
+    "greater_is_better": true,
+    "ignore_data_skip": false,
+    "fsdp": [],
+    "fsdp_min_num_params": 0,
+    "fsdp_config": {
+      "min_num_params": 0,
+      "xla": false,
+      "xla_fsdp_v2": false,
+      "xla_fsdp_grad_ckpt": false
+    },
+    "fsdp_transformer_layer_cls_to_wrap": null,
+    "accelerator_config": {
+      "split_batches": false,
+      "dispatch_batches": null,
+      "even_batches": true,
+      "use_seedable_sampler": true,
+      "non_blocking": false,
+      "gradient_accumulation_kwargs": null
+    },
+    "deepspeed": null,
+    "label_smoothing_factor": 0.0,
+    "optim": "adamw_torch",
+    "optim_args": null,
+    "adafactor": false,
+    "group_by_length": false,
+    "length_column_name": "length",
+    "report_to": [],
+    "ddp_find_unused_parameters": null,
+    "ddp_bucket_cap_mb": null,
+    "ddp_broadcast_buffers": null,
+    "dataloader_pin_memory": true,
+    "dataloader_persistent_workers": false,
+    "skip_memory_metrics": true,
+    "use_legacy_prediction_loop": false,
+    "push_to_hub": false,
+    "resume_from_checkpoint": null,
+    "hub_model_id": null,
+    "hub_strategy": "every_save",
+    "hub_token": "<HUB_TOKEN>",
+    "hub_private_repo": null,
+    "hub_always_push": false,
+    "gradient_checkpointing": false,
+    "gradient_checkpointing_kwargs": null,
+    "include_inputs_for_metrics": false,
+    "include_for_metrics": [],
+    "eval_do_concat_batches": true,
+    "fp16_backend": "auto",
+    "push_to_hub_model_id": null,
+    "push_to_hub_organization": null,
+    "push_to_hub_token": "<PUSH_TO_HUB_TOKEN>",
+    "mp_parameters": "",
+    "auto_find_batch_size": false,
+    "full_determinism": false,
+    "torchdynamo": null,
+    "ray_scope": "last",
+    "ddp_timeout": 1800,
+    "torch_compile": false,
+    "torch_compile_backend": null,
+    "torch_compile_mode": null,
+    "include_tokens_per_second": false,
+    "include_num_input_tokens_seen": false,
+    "neftune_noise_alpha": null,
+    "optim_target_modules": null,
+    "batch_eval_metrics": false,
+    "eval_on_start": false,
+    "use_liger_kernel": false,
+    "eval_use_gather_object": false,
+    "average_tokens_across_devices": false
+  },
+  "adaptive_lr_config": {
+    "adaptive_lr": true,
+    "lr_patience": 3,
+    "lr_factor": 0.8,
+    "lr_increase_factor": 1.2,
+    "min_lr": 1e-06,
+    "max_lr": 0.001,
+    "oscillation_amplitude": 0.1
+  }
+}

sample.input.json ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "[DIALOGUE]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[TURN]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[PAUSE]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[REPEAT]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[HESITATION]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

summary_statistics.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "Overall Accuracy": 0.8802153432032301,
+  "Macro F1": 0.8909792764791806,
+  "Weighted F1": 0.8772149647566893,
+  "Macro Precision": 0.8990448362732847,
+  "Macro Recall": 0.8876134036897266,
+  "Average Confidence": 0.9344870448112488,
+  "Confidence Std": 0.13039512932300568,
+  "Average Severity": [
+    0.23586010932922363,
+    0.2251170426607132,
+    0.29972559213638306,
+    0.2392973005771637
+  ],
+  "Average Fluency": 0.5604473352432251,
+  "Fluency Std": 0.08302813023328781
+}

to_cha.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import batchalign as ba
+nlp = ba.BatchalignPipeline.new("asr,speaker,morphosyntax", lang="eng", num_speakers=2)
+doc = ba.Document.new(media_path="/workspace/SH001/videos/ACWT07a.wav", lang="eng")
+doc = nlp(doc)
+chat = ba.CHATFile(doc=doc)
+chat.write("/workspace/SH001/vid_output/output.cha", write_wor=True)

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,105 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30522": {
+      "content": "[DIALOGUE]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30523": {
+      "content": "[TURN]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30524": {
+      "content": "[PAUSE]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30525": {
+      "content": "[REPEAT]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30526": {
+      "content": "[HESITATION]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "[DIALOGUE]",
+    "[TURN]",
+    "[PAUSE]",
+    "[REPEAT]",
+    "[HESITATION]"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff