| |
| """ |
| Compute baselines for action prediction and recognition tasks: |
| 1. Majority class baseline |
| 2. Transition matrix baseline (for prediction: P(next|prev), for recognition: P(current|prev)) |
| 3. Class frequency baseline (weighted random) |
| """ |
|
|
| import os |
| import sys |
| import json |
| import pickle |
| import re |
| import numpy as np |
| from collections import Counter, defaultdict |
| from sklearn.metrics import accuracy_score, f1_score, classification_report |
|
|
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| from data.dataset import DATASET_DIR, TRAIN_VOLS, VAL_VOLS, TEST_VOLS |
|
|
| ANNOTATION_DIR = "${PULSE_ROOT}" |
|
|
| |
| VERB_MAP_RULES = [ |
| ('抓取', '抓取'), ('拿起', '抓取'), ('拿出', '抓取'), |
| ('从.*取出', '抓取'), ('从.*抓取', '抓取'), ('从.*提取', '抓取'), |
| ('从.*取下', '抓取'), ('从.*抽出', '抓取'), ('从.*拔出', '抓取'), |
| ('双手抓', '抓取'), ('双手协.*抓', '抓取'), ('分别抓', '抓取'), |
| ('伸手', '抓取'), |
| ('放置', '放置'), ('放回', '放置'), ('放入', '放置'), |
| ('丢弃', '放置'), ('归还', '放置'), |
| ('移动', '移动'), ('搬运', '移动'), ('移开', '移动'), |
| ('递给', '移动'), ('拉', '移动'), ('推', '移动'), |
| ('端', '移动'), ('挪', '移动'), ('传', '移动'), |
| ('调整', '调整'), ('调节', '调整'), ('对齐', '调整'), |
| ('理顺', '调整'), ('整平', '调整'), |
| ('擦拭', '擦拭'), ('清洁', '擦拭'), ('清除', '擦拭'), |
| ('清理', '擦拭'), ('擦干', '擦拭'), |
| ('折叠', '折叠'), ('对折', '折叠'), ('弯折', '折叠'), |
| ('卷', '折叠'), ('卷起', '折叠'), |
| ('旋转', '旋转'), ('拧', '旋转'), ('转动', '旋转'), |
| ('扭', '旋转'), ('翻转', '旋转'), ('翻开', '旋转'), |
| ('掀', '旋转'), |
| ('操作', '操作'), ('使用', '操作'), ('打开', '操作'), |
| ('关闭', '操作'), ('开启', '操作'), ('启动', '操作'), |
| ('切割', '操作'), ('切', '操作'), ('剪', '操作'), |
| ('按', '操作'), ('点', '操作'), ('敲', '操作'), |
| ('盖', '盖合'), ('盖上', '盖合'), ('合上', '盖合'), |
| ('扣上', '盖合'), ('密封', '盖合'), |
| ('整理', '整理'), ('收纳', '整理'), ('归类', '整理'), |
| ('排列', '整理'), ('堆叠', '整理'), ('叠放', '整理'), |
| ('展开', '展开'), ('铺', '展开'), ('摊', '展开'), |
| ('撑开', '展开'), ('打开.*展', '展开'), |
| ('倾倒', '倾倒'), ('倒', '倾倒'), ('注入', '倾倒'), |
| ('浇', '倾倒'), ('淋', '倾倒'), |
| ('检查', '检查'), ('查看', '检查'), ('观察', '检查'), |
| ('确认', '检查'), ('审视', '检查'), |
| ('提起', '提起'), ('举起', '提起'), ('抬起', '提起'), |
| ('提', '提起'), ('举', '提起'), |
| ('释放', '释放'), ('松开', '释放'), ('松手', '释放'), |
| ('放开', '释放'), ('脱手', '释放'), |
| ('粘贴', '粘贴'), ('贴', '粘贴'), ('粘', '粘贴'), |
| ('缠', '粘贴'), ('绑', '粘贴'), ('系', '粘贴'), |
| ('连接', '粘贴'), ('固定', '粘贴'), |
| ('分离', '分离'), ('拆', '分离'), ('撕', '分离'), |
| ('剥', '分离'), ('解开', '分离'), ('拔', '分离'), |
| ('按压', '按压'), ('压', '按压'), ('挤', '按压'), |
| ('捏', '按压'), |
| ] |
|
|
| ACTION_CLASSES = [ |
| '抓取', '放置', '移动', '调整', '擦拭', '折叠', '旋转', |
| '操作', '盖合', '整理', '展开', '倾倒', '检查', '提起', |
| '释放', '粘贴', '分离', '按压', '翻转', '其他' |
| ] |
|
|
| COARSE_MAP = { |
| '抓取': '抓取', '放置': '放置', '移动': '移动', '调整': '调整', |
| '擦拭': '擦拭', '折叠': '折叠', '旋转': '旋转', |
| '操作': '其他', '盖合': '其他', '整理': '其他', '展开': '其他', |
| '倾倒': '其他', '检查': '其他', '提起': '其他', '释放': '其他', |
| '粘贴': '其他', '分离': '其他', '按压': '其他', '翻转': '其他', |
| '其他': '其他', |
| } |
|
|
| COARSE_CLASSES = ['抓取', '放置', '移动', '调整', '擦拭', '折叠', '旋转', '其他'] |
|
|
|
|
| def classify_verb(text): |
| for pattern, verb in VERB_MAP_RULES: |
| if re.search(pattern, text): |
| return verb |
| return '其他' |
|
|
|
|
| def load_annotations(vols, coarse=True): |
| """Load all annotation segments with verb labels.""" |
| segments = [] |
|
|
| classes = COARSE_CLASSES if coarse else ACTION_CLASSES |
| class2idx = {c: i for i, c in enumerate(classes)} |
|
|
| for vol in vols: |
| ann_dir = os.path.join(ANNOTATION_DIR, vol) |
| if not os.path.isdir(ann_dir): |
| continue |
| for fn in sorted(os.listdir(ann_dir)): |
| if not fn.endswith('.json'): |
| continue |
| with open(os.path.join(ann_dir, fn)) as f: |
| data = json.load(f) |
|
|
| anns = data.get('segments', data.get('annotations', [])) |
| scene_segs = [] |
| for ann in anns: |
| text = ann.get('task', ann.get('description', '')) |
| verb = classify_verb(text) |
| if coarse: |
| verb = COARSE_MAP.get(verb, '其他') |
| if verb in class2idx: |
| scene_segs.append(class2idx[verb]) |
|
|
| |
| |
| for i in range(len(scene_segs)): |
| prev = scene_segs[i - 1] if i > 0 else scene_segs[i] |
| current = scene_segs[i] |
| segments.append((prev, current)) |
|
|
| return segments, classes |
|
|
|
|
| def compute_transition_matrix(segments, num_classes): |
| """Compute P(next|prev) from training segments.""" |
| counts = np.zeros((num_classes, num_classes)) |
| for prev, current in segments: |
| counts[prev, current] += 1 |
| |
| row_sums = counts.sum(axis=1, keepdims=True) |
| row_sums[row_sums == 0] = 1 |
| trans_matrix = counts / row_sums |
| return trans_matrix |
|
|
|
|
| def main(): |
| for coarse in [True, False]: |
| tag = "8 coarse" if coarse else "20 fine" |
| print(f"\n{'='*60}") |
| print(f"Baselines — {tag} classes") |
| print(f"{'='*60}") |
|
|
| train_segs, classes = load_annotations(TRAIN_VOLS, coarse=coarse) |
| test_segs, _ = load_annotations(TEST_VOLS, coarse=coarse) |
|
|
| num_classes = len(classes) |
|
|
| |
| test_prev = [s[0] for s in test_segs] |
| test_true = [s[1] for s in test_segs] |
| train_labels = [s[1] for s in train_segs] |
|
|
| print(f"Train segments: {len(train_segs)}") |
| print(f"Test segments: {len(test_segs)}") |
|
|
| |
| label_counts = Counter(train_labels) |
| majority_class = label_counts.most_common(1)[0][0] |
| majority_preds = [majority_class] * len(test_true) |
| maj_acc = accuracy_score(test_true, majority_preds) |
| maj_f1w = f1_score(test_true, majority_preds, average='weighted', zero_division=0) |
| maj_f1m = f1_score(test_true, majority_preds, average='macro', zero_division=0) |
| print(f"\n1. Majority class baseline (always predict '{classes[majority_class]}'):") |
| print(f" acc={maj_acc:.3f} f1w={maj_f1w:.3f} f1m={maj_f1m:.3f}") |
|
|
| |
| freq = np.zeros(num_classes) |
| for l in train_labels: |
| freq[l] += 1 |
| freq = freq / freq.sum() |
| np.random.seed(42) |
| freq_preds = np.random.choice(num_classes, size=len(test_true), p=freq) |
| freq_acc = accuracy_score(test_true, freq_preds) |
| freq_f1w = f1_score(test_true, freq_preds, average='weighted', zero_division=0) |
| freq_f1m = f1_score(test_true, freq_preds, average='macro', zero_division=0) |
| print(f"\n2. Random (train distribution) baseline:") |
| print(f" acc={freq_acc:.3f} f1w={freq_f1w:.3f} f1m={freq_f1m:.3f}") |
|
|
| |
| trans_matrix = compute_transition_matrix(train_segs, num_classes) |
| trans_preds = [] |
| for prev in test_prev: |
| |
| trans_preds.append(np.argmax(trans_matrix[prev])) |
| trans_acc = accuracy_score(test_true, trans_preds) |
| trans_f1w = f1_score(test_true, trans_preds, average='weighted', zero_division=0) |
| trans_f1m = f1_score(test_true, trans_preds, average='macro', zero_division=0) |
| print(f"\n3. Transition matrix baseline (argmax P(next|prev)):") |
| print(f" acc={trans_acc:.3f} f1w={trans_f1w:.3f} f1m={trans_f1m:.3f}") |
|
|
| |
| print(f"\n Transition matrix (rows=prev, cols=next):") |
| header = " " + "".join(f"{c[:2]:>6}" for c in classes) |
| print(header) |
| for i, row in enumerate(trans_matrix): |
| vals = "".join(f"{v:6.2f}" for v in row) |
| print(f" {classes[i][:2]}{vals}") |
|
|
| |
| np.random.seed(42) |
| trans_sample_preds = [] |
| for prev in test_prev: |
| p = trans_matrix[prev] |
| if p.sum() == 0: |
| trans_sample_preds.append(majority_class) |
| else: |
| trans_sample_preds.append(np.random.choice(num_classes, p=p)) |
| ts_acc = accuracy_score(test_true, trans_sample_preds) |
| ts_f1w = f1_score(test_true, trans_sample_preds, average='weighted', zero_division=0) |
| ts_f1m = f1_score(test_true, trans_sample_preds, average='macro', zero_division=0) |
| print(f"\n4. Transition matrix + sampling baseline:") |
| print(f" acc={ts_acc:.3f} f1w={ts_f1w:.3f} f1m={ts_f1m:.3f}") |
|
|
| |
| print(f"\n Per-class report (transition argmax):") |
| report = classification_report(test_true, trans_preds, |
| target_names=classes, zero_division=0) |
| print(report) |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|