PULSE-code / experiments /tasks /eval_baselines.py
velvet-pine-22's picture
Upload folder using huggingface_hub
b4b2877 verified
#!/usr/bin/env python3
"""
Compute baselines for action prediction and recognition tasks:
1. Majority class baseline
2. Transition matrix baseline (for prediction: P(next|prev), for recognition: P(current|prev))
3. Class frequency baseline (weighted random)
"""
import os
import sys
import json
import pickle
import re
import numpy as np
from collections import Counter, defaultdict
from sklearn.metrics import accuracy_score, f1_score, classification_report
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from data.dataset import DATASET_DIR, TRAIN_VOLS, VAL_VOLS, TEST_VOLS
ANNOTATION_DIR = "${PULSE_ROOT}"
# Copy verb taxonomy from train_pred_cls.py
VERB_MAP_RULES = [
('抓取', '抓取'), ('拿起', '抓取'), ('拿出', '抓取'),
('从.*取出', '抓取'), ('从.*抓取', '抓取'), ('从.*提取', '抓取'),
('从.*取下', '抓取'), ('从.*抽出', '抓取'), ('从.*拔出', '抓取'),
('双手抓', '抓取'), ('双手协.*抓', '抓取'), ('分别抓', '抓取'),
('伸手', '抓取'),
('放置', '放置'), ('放回', '放置'), ('放入', '放置'),
('丢弃', '放置'), ('归还', '放置'),
('移动', '移动'), ('搬运', '移动'), ('移开', '移动'),
('递给', '移动'), ('拉', '移动'), ('推', '移动'),
('端', '移动'), ('挪', '移动'), ('传', '移动'),
('调整', '调整'), ('调节', '调整'), ('对齐', '调整'),
('理顺', '调整'), ('整平', '调整'),
('擦拭', '擦拭'), ('清洁', '擦拭'), ('清除', '擦拭'),
('清理', '擦拭'), ('擦干', '擦拭'),
('折叠', '折叠'), ('对折', '折叠'), ('弯折', '折叠'),
('卷', '折叠'), ('卷起', '折叠'),
('旋转', '旋转'), ('拧', '旋转'), ('转动', '旋转'),
('扭', '旋转'), ('翻转', '旋转'), ('翻开', '旋转'),
('掀', '旋转'),
('操作', '操作'), ('使用', '操作'), ('打开', '操作'),
('关闭', '操作'), ('开启', '操作'), ('启动', '操作'),
('切割', '操作'), ('切', '操作'), ('剪', '操作'),
('按', '操作'), ('点', '操作'), ('敲', '操作'),
('盖', '盖合'), ('盖上', '盖合'), ('合上', '盖合'),
('扣上', '盖合'), ('密封', '盖合'),
('整理', '整理'), ('收纳', '整理'), ('归类', '整理'),
('排列', '整理'), ('堆叠', '整理'), ('叠放', '整理'),
('展开', '展开'), ('铺', '展开'), ('摊', '展开'),
('撑开', '展开'), ('打开.*展', '展开'),
('倾倒', '倾倒'), ('倒', '倾倒'), ('注入', '倾倒'),
('浇', '倾倒'), ('淋', '倾倒'),
('检查', '检查'), ('查看', '检查'), ('观察', '检查'),
('确认', '检查'), ('审视', '检查'),
('提起', '提起'), ('举起', '提起'), ('抬起', '提起'),
('提', '提起'), ('举', '提起'),
('释放', '释放'), ('松开', '释放'), ('松手', '释放'),
('放开', '释放'), ('脱手', '释放'),
('粘贴', '粘贴'), ('贴', '粘贴'), ('粘', '粘贴'),
('缠', '粘贴'), ('绑', '粘贴'), ('系', '粘贴'),
('连接', '粘贴'), ('固定', '粘贴'),
('分离', '分离'), ('拆', '分离'), ('撕', '分离'),
('剥', '分离'), ('解开', '分离'), ('拔', '分离'),
('按压', '按压'), ('压', '按压'), ('挤', '按压'),
('捏', '按压'),
]
ACTION_CLASSES = [
'抓取', '放置', '移动', '调整', '擦拭', '折叠', '旋转',
'操作', '盖合', '整理', '展开', '倾倒', '检查', '提起',
'释放', '粘贴', '分离', '按压', '翻转', '其他'
]
COARSE_MAP = {
'抓取': '抓取', '放置': '放置', '移动': '移动', '调整': '调整',
'擦拭': '擦拭', '折叠': '折叠', '旋转': '旋转',
'操作': '其他', '盖合': '其他', '整理': '其他', '展开': '其他',
'倾倒': '其他', '检查': '其他', '提起': '其他', '释放': '其他',
'粘贴': '其他', '分离': '其他', '按压': '其他', '翻转': '其他',
'其他': '其他',
}
COARSE_CLASSES = ['抓取', '放置', '移动', '调整', '擦拭', '折叠', '旋转', '其他']
def classify_verb(text):
for pattern, verb in VERB_MAP_RULES:
if re.search(pattern, text):
return verb
return '其他'
def load_annotations(vols, coarse=True):
"""Load all annotation segments with verb labels."""
segments = [] # list of (vol, scene, label_idx, prev_label_idx)
classes = COARSE_CLASSES if coarse else ACTION_CLASSES
class2idx = {c: i for i, c in enumerate(classes)}
for vol in vols:
ann_dir = os.path.join(ANNOTATION_DIR, vol)
if not os.path.isdir(ann_dir):
continue
for fn in sorted(os.listdir(ann_dir)):
if not fn.endswith('.json'):
continue
with open(os.path.join(ann_dir, fn)) as f:
data = json.load(f)
anns = data.get('segments', data.get('annotations', []))
scene_segs = []
for ann in anns:
text = ann.get('task', ann.get('description', ''))
verb = classify_verb(text)
if coarse:
verb = COARSE_MAP.get(verb, '其他')
if verb in class2idx:
scene_segs.append(class2idx[verb])
# For prediction: pairs of (prev, next)
# For recognition: pairs of (prev, current) — same thing
for i in range(len(scene_segs)):
prev = scene_segs[i - 1] if i > 0 else scene_segs[i]
current = scene_segs[i]
segments.append((prev, current))
return segments, classes
def compute_transition_matrix(segments, num_classes):
"""Compute P(next|prev) from training segments."""
counts = np.zeros((num_classes, num_classes))
for prev, current in segments:
counts[prev, current] += 1
# Normalize rows
row_sums = counts.sum(axis=1, keepdims=True)
row_sums[row_sums == 0] = 1
trans_matrix = counts / row_sums
return trans_matrix
def main():
for coarse in [True, False]:
tag = "8 coarse" if coarse else "20 fine"
print(f"\n{'='*60}")
print(f"Baselines — {tag} classes")
print(f"{'='*60}")
train_segs, classes = load_annotations(TRAIN_VOLS, coarse=coarse)
test_segs, _ = load_annotations(TEST_VOLS, coarse=coarse)
num_classes = len(classes)
# Extract test labels
test_prev = [s[0] for s in test_segs]
test_true = [s[1] for s in test_segs]
train_labels = [s[1] for s in train_segs]
print(f"Train segments: {len(train_segs)}")
print(f"Test segments: {len(test_segs)}")
# 1. Majority class baseline
label_counts = Counter(train_labels)
majority_class = label_counts.most_common(1)[0][0]
majority_preds = [majority_class] * len(test_true)
maj_acc = accuracy_score(test_true, majority_preds)
maj_f1w = f1_score(test_true, majority_preds, average='weighted', zero_division=0)
maj_f1m = f1_score(test_true, majority_preds, average='macro', zero_division=0)
print(f"\n1. Majority class baseline (always predict '{classes[majority_class]}'):")
print(f" acc={maj_acc:.3f} f1w={maj_f1w:.3f} f1m={maj_f1m:.3f}")
# 2. Class frequency baseline (predict based on train distribution)
freq = np.zeros(num_classes)
for l in train_labels:
freq[l] += 1
freq = freq / freq.sum()
np.random.seed(42)
freq_preds = np.random.choice(num_classes, size=len(test_true), p=freq)
freq_acc = accuracy_score(test_true, freq_preds)
freq_f1w = f1_score(test_true, freq_preds, average='weighted', zero_division=0)
freq_f1m = f1_score(test_true, freq_preds, average='macro', zero_division=0)
print(f"\n2. Random (train distribution) baseline:")
print(f" acc={freq_acc:.3f} f1w={freq_f1w:.3f} f1m={freq_f1m:.3f}")
# 3. Transition matrix baseline
trans_matrix = compute_transition_matrix(train_segs, num_classes)
trans_preds = []
for prev in test_prev:
# Predict most likely next given prev
trans_preds.append(np.argmax(trans_matrix[prev]))
trans_acc = accuracy_score(test_true, trans_preds)
trans_f1w = f1_score(test_true, trans_preds, average='weighted', zero_division=0)
trans_f1m = f1_score(test_true, trans_preds, average='macro', zero_division=0)
print(f"\n3. Transition matrix baseline (argmax P(next|prev)):")
print(f" acc={trans_acc:.3f} f1w={trans_f1w:.3f} f1m={trans_f1m:.3f}")
# Print transition matrix
print(f"\n Transition matrix (rows=prev, cols=next):")
header = " " + "".join(f"{c[:2]:>6}" for c in classes)
print(header)
for i, row in enumerate(trans_matrix):
vals = "".join(f"{v:6.2f}" for v in row)
print(f" {classes[i][:2]}{vals}")
# 4. Transition + sampling (sample from P(next|prev) instead of argmax)
np.random.seed(42)
trans_sample_preds = []
for prev in test_prev:
p = trans_matrix[prev]
if p.sum() == 0:
trans_sample_preds.append(majority_class)
else:
trans_sample_preds.append(np.random.choice(num_classes, p=p))
ts_acc = accuracy_score(test_true, trans_sample_preds)
ts_f1w = f1_score(test_true, trans_sample_preds, average='weighted', zero_division=0)
ts_f1m = f1_score(test_true, trans_sample_preds, average='macro', zero_division=0)
print(f"\n4. Transition matrix + sampling baseline:")
print(f" acc={ts_acc:.3f} f1w={ts_f1w:.3f} f1m={ts_f1m:.3f}")
# Per-class report for transition argmax
print(f"\n Per-class report (transition argmax):")
report = classification_report(test_true, trans_preds,
target_names=classes, zero_division=0)
print(report)
if __name__ == '__main__':
main()