PULSE-code / experiments /tasks /eval_baselines.py

Upload folder using huggingface_hub

b4b2877 verified 3 days ago

10.1 kB

	#!/usr/bin/env python3
	"""
	Compute baselines for action prediction and recognition tasks:
	1. Majority class baseline
	2. Transition matrix baseline (for prediction: P(next\|prev), for recognition: P(current\|prev))
	3. Class frequency baseline (weighted random)
	"""

	import os
	import sys
	import json
	import pickle
	import re
	import numpy as np
	from collections import Counter, defaultdict
	from sklearn.metrics import accuracy_score, f1_score, classification_report

	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from data.dataset import DATASET_DIR, TRAIN_VOLS, VAL_VOLS, TEST_VOLS

	ANNOTATION_DIR = "${PULSE_ROOT}"

	# Copy verb taxonomy from train_pred_cls.py
	VERB_MAP_RULES = [
	('抓取', '抓取'), ('拿起', '抓取'), ('拿出', '抓取'),
	('从.取出', '抓取'), ('从.抓取', '抓取'), ('从.*提取', '抓取'),
	('从.取下', '抓取'), ('从.抽出', '抓取'), ('从.*拔出', '抓取'),
	('双手抓', '抓取'), ('双手协.*抓', '抓取'), ('分别抓', '抓取'),
	('伸手', '抓取'),
	('放置', '放置'), ('放回', '放置'), ('放入', '放置'),
	('丢弃', '放置'), ('归还', '放置'),
	('移动', '移动'), ('搬运', '移动'), ('移开', '移动'),
	('递给', '移动'), ('拉', '移动'), ('推', '移动'),
	('端', '移动'), ('挪', '移动'), ('传', '移动'),
	('调整', '调整'), ('调节', '调整'), ('对齐', '调整'),
	('理顺', '调整'), ('整平', '调整'),
	('擦拭', '擦拭'), ('清洁', '擦拭'), ('清除', '擦拭'),
	('清理', '擦拭'), ('擦干', '擦拭'),
	('折叠', '折叠'), ('对折', '折叠'), ('弯折', '折叠'),
	('卷', '折叠'), ('卷起', '折叠'),
	('旋转', '旋转'), ('拧', '旋转'), ('转动', '旋转'),
	('扭', '旋转'), ('翻转', '旋转'), ('翻开', '旋转'),
	('掀', '旋转'),
	('操作', '操作'), ('使用', '操作'), ('打开', '操作'),
	('关闭', '操作'), ('开启', '操作'), ('启动', '操作'),
	('切割', '操作'), ('切', '操作'), ('剪', '操作'),
	('按', '操作'), ('点', '操作'), ('敲', '操作'),
	('盖', '盖合'), ('盖上', '盖合'), ('合上', '盖合'),
	('扣上', '盖合'), ('密封', '盖合'),
	('整理', '整理'), ('收纳', '整理'), ('归类', '整理'),
	('排列', '整理'), ('堆叠', '整理'), ('叠放', '整理'),
	('展开', '展开'), ('铺', '展开'), ('摊', '展开'),
	('撑开', '展开'), ('打开.*展', '展开'),
	('倾倒', '倾倒'), ('倒', '倾倒'), ('注入', '倾倒'),
	('浇', '倾倒'), ('淋', '倾倒'),
	('检查', '检查'), ('查看', '检查'), ('观察', '检查'),
	('确认', '检查'), ('审视', '检查'),
	('提起', '提起'), ('举起', '提起'), ('抬起', '提起'),
	('提', '提起'), ('举', '提起'),
	('释放', '释放'), ('松开', '释放'), ('松手', '释放'),
	('放开', '释放'), ('脱手', '释放'),
	('粘贴', '粘贴'), ('贴', '粘贴'), ('粘', '粘贴'),
	('缠', '粘贴'), ('绑', '粘贴'), ('系', '粘贴'),
	('连接', '粘贴'), ('固定', '粘贴'),
	('分离', '分离'), ('拆', '分离'), ('撕', '分离'),
	('剥', '分离'), ('解开', '分离'), ('拔', '分离'),
	('按压', '按压'), ('压', '按压'), ('挤', '按压'),
	('捏', '按压'),
	]

	ACTION_CLASSES = [
	'抓取', '放置', '移动', '调整', '擦拭', '折叠', '旋转',
	'操作', '盖合', '整理', '展开', '倾倒', '检查', '提起',
	'释放', '粘贴', '分离', '按压', '翻转', '其他'
	]

	COARSE_MAP = {
	'抓取': '抓取', '放置': '放置', '移动': '移动', '调整': '调整',
	'擦拭': '擦拭', '折叠': '折叠', '旋转': '旋转',
	'操作': '其他', '盖合': '其他', '整理': '其他', '展开': '其他',
	'倾倒': '其他', '检查': '其他', '提起': '其他', '释放': '其他',
	'粘贴': '其他', '分离': '其他', '按压': '其他', '翻转': '其他',
	'其他': '其他',
	}

	COARSE_CLASSES = ['抓取', '放置', '移动', '调整', '擦拭', '折叠', '旋转', '其他']


	def classify_verb(text):
	for pattern, verb in VERB_MAP_RULES:
	if re.search(pattern, text):
	return verb
	return '其他'


	def load_annotations(vols, coarse=True):
	"""Load all annotation segments with verb labels."""
	segments = [] # list of (vol, scene, label_idx, prev_label_idx)

	classes = COARSE_CLASSES if coarse else ACTION_CLASSES
	class2idx = {c: i for i, c in enumerate(classes)}

	for vol in vols:
	ann_dir = os.path.join(ANNOTATION_DIR, vol)
	if not os.path.isdir(ann_dir):
	continue
	for fn in sorted(os.listdir(ann_dir)):
	if not fn.endswith('.json'):
	continue
	with open(os.path.join(ann_dir, fn)) as f:
	data = json.load(f)

	anns = data.get('segments', data.get('annotations', []))
	scene_segs = []
	for ann in anns:
	text = ann.get('task', ann.get('description', ''))
	verb = classify_verb(text)
	if coarse:
	verb = COARSE_MAP.get(verb, '其他')
	if verb in class2idx:
	scene_segs.append(class2idx[verb])

	# For prediction: pairs of (prev, next)
	# For recognition: pairs of (prev, current) — same thing
	for i in range(len(scene_segs)):
	prev = scene_segs[i - 1] if i > 0 else scene_segs[i]
	current = scene_segs[i]
	segments.append((prev, current))

	return segments, classes


	def compute_transition_matrix(segments, num_classes):
	"""Compute P(next\|prev) from training segments."""
	counts = np.zeros((num_classes, num_classes))
	for prev, current in segments:
	counts[prev, current] += 1
	# Normalize rows
	row_sums = counts.sum(axis=1, keepdims=True)
	row_sums[row_sums == 0] = 1
	trans_matrix = counts / row_sums
	return trans_matrix


	def main():
	for coarse in [True, False]:
	tag = "8 coarse" if coarse else "20 fine"
	print(f"\n{'='*60}")
	print(f"Baselines — {tag} classes")
	print(f"{'='*60}")

	train_segs, classes = load_annotations(TRAIN_VOLS, coarse=coarse)
	test_segs, _ = load_annotations(TEST_VOLS, coarse=coarse)

	num_classes = len(classes)

	# Extract test labels
	test_prev = [s[0] for s in test_segs]
	test_true = [s[1] for s in test_segs]
	train_labels = [s[1] for s in train_segs]

	print(f"Train segments: {len(train_segs)}")
	print(f"Test segments: {len(test_segs)}")

	# 1. Majority class baseline
	label_counts = Counter(train_labels)
	majority_class = label_counts.most_common(1)[0][0]
	majority_preds = [majority_class] * len(test_true)
	maj_acc = accuracy_score(test_true, majority_preds)
	maj_f1w = f1_score(test_true, majority_preds, average='weighted', zero_division=0)
	maj_f1m = f1_score(test_true, majority_preds, average='macro', zero_division=0)
	print(f"\n1. Majority class baseline (always predict '{classes[majority_class]}'):")
	print(f" acc={maj_acc:.3f} f1w={maj_f1w:.3f} f1m={maj_f1m:.3f}")

	# 2. Class frequency baseline (predict based on train distribution)
	freq = np.zeros(num_classes)
	for l in train_labels:
	freq[l] += 1
	freq = freq / freq.sum()
	np.random.seed(42)
	freq_preds = np.random.choice(num_classes, size=len(test_true), p=freq)
	freq_acc = accuracy_score(test_true, freq_preds)
	freq_f1w = f1_score(test_true, freq_preds, average='weighted', zero_division=0)
	freq_f1m = f1_score(test_true, freq_preds, average='macro', zero_division=0)
	print(f"\n2. Random (train distribution) baseline:")
	print(f" acc={freq_acc:.3f} f1w={freq_f1w:.3f} f1m={freq_f1m:.3f}")

	# 3. Transition matrix baseline
	trans_matrix = compute_transition_matrix(train_segs, num_classes)
	trans_preds = []
	for prev in test_prev:
	# Predict most likely next given prev
	trans_preds.append(np.argmax(trans_matrix[prev]))
	trans_acc = accuracy_score(test_true, trans_preds)
	trans_f1w = f1_score(test_true, trans_preds, average='weighted', zero_division=0)
	trans_f1m = f1_score(test_true, trans_preds, average='macro', zero_division=0)
	print(f"\n3. Transition matrix baseline (argmax P(next\|prev)):")
	print(f" acc={trans_acc:.3f} f1w={trans_f1w:.3f} f1m={trans_f1m:.3f}")

	# Print transition matrix
	print(f"\n Transition matrix (rows=prev, cols=next):")
	header = " " + "".join(f"{c[:2]:>6}" for c in classes)
	print(header)
	for i, row in enumerate(trans_matrix):
	vals = "".join(f"{v:6.2f}" for v in row)
	print(f" {classes[i][:2]}{vals}")

	# 4. Transition + sampling (sample from P(next\|prev) instead of argmax)
	np.random.seed(42)
	trans_sample_preds = []
	for prev in test_prev:
	p = trans_matrix[prev]
	if p.sum() == 0:
	trans_sample_preds.append(majority_class)
	else:
	trans_sample_preds.append(np.random.choice(num_classes, p=p))
	ts_acc = accuracy_score(test_true, trans_sample_preds)
	ts_f1w = f1_score(test_true, trans_sample_preds, average='weighted', zero_division=0)
	ts_f1m = f1_score(test_true, trans_sample_preds, average='macro', zero_division=0)
	print(f"\n4. Transition matrix + sampling baseline:")
	print(f" acc={ts_acc:.3f} f1w={ts_f1w:.3f} f1m={ts_f1m:.3f}")

	# Per-class report for transition argmax
	print(f"\n Per-class report (transition argmax):")
	report = classification_report(test_true, trans_preds,
	target_names=classes, zero_division=0)
	print(report)


	if __name__ == '__main__':
	main()