95b89ff 4 months ago

11 kB

	import os
	from typing import Optional, Tuple

	import torch
	import torch.nn as nn
	import torch.optim as optim
	from torch.utils.data import DataLoader
	import torch.nn.functional as F

	from libs.class_id_map import get_id2class_map
	from libs.metric import AverageMeter, BoundaryScoreMeter, ScoreMeter
	from libs.postprocess import PostProcessor
	from tqdm import tqdm
	from prompt.tools import (segment_video_labels, gen_label, gen_label_split,
	generate_segment_features,generate_split_features,
	create_logits, split_feature, split_gt, split_gt_feature, split_mixed_class)
	from prompt.text_prompt import text_prompt_for_clip

	def train(
	train_loader: DataLoader,
	model: nn.Module,
	model_text: nn.Module,
	class_text_list,
	joint_text_list,
	criterion_cls: nn.Module,
	criterion_bound: nn.Module,
	criterion_contrast: nn.Module,
	lambda_bound_loss: float,
	optimizer: optim.Optimizer,
	dataset_name,
	device, output_device,
	) -> float:
	losses = AverageMeter("Loss", ":.4e")

	# switch training mode
	model.train()

	for sample in tqdm(train_loader):
	x = sample["feature"]
	t = sample["label"]
	b = sample["boundary"]
	mask = sample["mask"]


	x = x.to(output_device)
	t = t.to(output_device)
	b = b.to(output_device)
	mask = mask.to(output_device)
	joint_text_list = joint_text_list.to(output_device)

	optimizer.zero_grad()

	batch_size = x.shape[0]

	joint_text_embedding = model_text(joint_text_list).float()

	# compute output and loss
	output_cls, output_bound, output_feature, output_feature_split, logit_scale = model(x, mask, joint_text_embedding)

	#Action-text pairs
	t_segment = segment_video_labels(t)
	label = [i[0] for seg in t_segment for i in seg]

	label_g = gen_label(label)

	texts = list()
	for single_label in label:
	text_item = class_text_list[single_label].unsqueeze(dim=0)
	texts.append(text_item)

	texts = torch.cat(texts).cuda(output_device)
	text_embedding = model_text(texts).float()

	action_embeddings = []
	if isinstance(output_feature, list):
	for i in range(len(output_feature)):
	action_embedding = generate_segment_features(output_feature[i], t_segment, output_device)
	action_embeddings.append(action_embedding)

	#Clip-text pairs
	gt_split, feature_split = split_mixed_class(t_segment,2)
	flag = True
	for i in feature_split:
	if i!=[]:
	flag=False
	if flag ==True:
	feature_split_embedding = None
	else:
	feature_split_embedding = generate_split_features(output_feature_split, feature_split, output_device)
	text_split = text_prompt_for_clip(gt_split, dataset_name, "simple").cuda(output_device)

	text_split_embedding = model_text(text_split).float()

	label_split_g = gen_label_split(gt_split)

	loss = 0.0
	# Action segmentation loss
	if isinstance(output_cls, list):
	n = len(output_cls)
	for out in output_cls:
	loss += criterion_cls(out, t, x) / n
	else:
	loss += criterion_cls(output_cls, t, x)

	# boundary regression loss
	if isinstance(output_bound, list):
	n = len(output_bound)
	for out in output_bound:
	loss += lambda_bound_loss * criterion_bound(out, b, mask) / n
	else:
	loss += lambda_bound_loss * criterion_bound(output_bound, b, mask)

	# action-text contrastive loss
	if isinstance(action_embeddings, list):
	for i in range(len(action_embeddings)):
	logits_per_image, logits_per_text = create_logits(action_embeddings[i], text_embedding, logit_scale[0])
	ground_truth = torch.tensor(label_g, dtype=action_embedding.dtype, device=output_device)

	loss_imgs = criterion_contrast(logits_per_image, ground_truth)
	loss_texts = criterion_contrast(logits_per_text, ground_truth)

	loss += 0.8 * ((loss_imgs + loss_texts) / 2)
	if flag ==False:
	# clip-text contrastive loss
	logits_per_image, logits_per_text = create_logits(feature_split_embedding, text_split_embedding,
	logit_scale[1])
	ground_truth = torch.tensor(label_split_g, dtype=feature_split_embedding.dtype, device=output_device)

	loss_imgs = criterion_contrast(logits_per_image, ground_truth)
	loss_texts = criterion_contrast(logits_per_text, ground_truth)

	loss += 0.5 * ((loss_imgs + loss_texts) / 2)

	# record loss
	losses.update(loss.item(), batch_size)


	loss.backward()
	optimizer.step()

	return losses.avg


	def validate(
	val_loader: DataLoader,
	model: nn.Module,
	model_text: nn.Module,
	joint_text_list,
	criterion_cls: nn.Module,
	criterion_bound: nn.Module,
	lambda_bound_loss: float,
	device,output_device,
	dataset: str,
	dataset_dir: str,
	iou_thresholds: Tuple[float],
	boundary_th: float,
	tolerance: int,
	refinement_method: Optional[str] = None
	) -> Tuple[float, float, float, float, float, float, float, float, str]:
	losses = AverageMeter("Loss", ":.4e")
	postprocessor = PostProcessor(refinement_method, boundary_th)
	scores_cls = ScoreMeter(
	id2class_map=get_id2class_map(dataset, dataset_dir=dataset_dir),
	iou_thresholds=iou_thresholds,
	)
	scores_bound = BoundaryScoreMeter(
	tolerance=tolerance, boundary_threshold=boundary_th
	)

	scores_after_refinement = ScoreMeter(
	id2class_map=get_id2class_map(dataset, dataset_dir=dataset_dir),
	iou_thresholds=iou_thresholds,
	)

	# switch to evaluate mode
	model.eval()

	with torch.no_grad():
	for sample in tqdm(val_loader):
	x = sample["feature"]
	t = sample["label"]
	b = sample["boundary"]
	mask = sample["mask"]

	x = x.to(output_device)
	t = t.to(output_device)
	b = b.to(output_device)
	mask = mask.to(output_device)
	joint_text_list = joint_text_list.to(output_device)

	batch_size = x.shape[0]

	joint_text_embedding = model_text(joint_text_list).float()

	# compute output and loss
	output_cls, output_bound = model(x, mask, joint_text_embedding)

	loss = 0.0
	loss += criterion_cls(output_cls, t, x)
	loss += lambda_bound_loss * criterion_bound(output_bound, b, mask)

	# measure accuracy and record loss
	losses.update(loss.item(), batch_size)

	# calcualte accuracy and f1 score
	output_cls = output_cls.to("cpu").data.numpy()
	output_bound = output_bound.to("cpu").data.numpy()

	t = t.to("cpu").data.numpy()
	b = b.to("cpu").data.numpy()
	mask = mask.to("cpu").data.numpy()

	refined_output_cls = postprocessor(
	output_cls, boundaries=output_bound, masks=mask
	) #加上了边界的预测
	# update score
	scores_cls.update(output_cls, t, output_bound, mask) #The result of not utilizing boundary branch
	scores_bound.update(output_bound, b, mask)
	scores_after_refinement.update(refined_output_cls, t) #The result of utilizing boundary branch
	cls_acc, edit_score, segment_f1s,maps = scores_cls.get_scores()
	cls_acc, edit_score, segment_f1s,_ = scores_after_refinement.get_scores()
	bound_acc, precision, recall, bound_f1s = scores_bound.get_scores()

	return (
	losses.avg,
	cls_acc,
	edit_score,
	segment_f1s,
	maps,
	bound_acc,
	precision,
	recall,
	bound_f1s,
	)

	def evaluate(
	val_loader: DataLoader,
	model: nn.Module,
	model_text,
	joint_text_list,
	device: str,
	boundary_th: float,
	dataset: str,
	dataset_dir: str,
	iou_thresholds: Tuple[float],
	tolerance: float,
	result_path: str,
	config : str,
	refinement_method: Optional[str] = None,
	) -> None:
	postprocessor = PostProcessor(refinement_method, boundary_th)

	scores_before_refinement = ScoreMeter(
	id2class_map=get_id2class_map(dataset, dataset_dir=dataset_dir),
	iou_thresholds=iou_thresholds,
	)

	scores_bound = BoundaryScoreMeter(
	tolerance=tolerance, boundary_threshold=boundary_th
	)

	scores_after_refinement = ScoreMeter(
	id2class_map=get_id2class_map(dataset, dataset_dir=dataset_dir),
	iou_thresholds=iou_thresholds,
	)

	# switch to evaluate mode
	model.eval()

	with torch.no_grad():
	for sample in tqdm(val_loader):
	x = sample["feature"]
	t = sample["label"]
	b = sample["boundary"]
	mask = sample["mask"]

	x = x.to(device)
	t = t.to(device)
	b = b.to(device)
	mask = mask.to(device)
	joint_text_list = joint_text_list.to(device)

	joint_text_embedding = model_text(joint_text_list).float()

	# compute output and loss
	output_cls, output_bound = model(x, mask, joint_text_embedding) # 1,52,1838 and 1,1,1838 N C T and N 1 T

	# calcualte accuracy and f1 score
	output_cls = output_cls.to("cpu").data.numpy()
	output_bound = output_bound.to("cpu").data.numpy()

	x = x.to("cpu").data.numpy()
	t = t.to("cpu").data.numpy()
	b = b.to("cpu").data.numpy()
	mask = mask.to("cpu").data.numpy()

	refined_output_cls = postprocessor(
	output_cls, boundaries=output_bound, masks=mask
	)

	# update score
	scores_before_refinement.update(output_cls, t)
	scores_bound.update(output_bound, b, mask)
	scores_after_refinement.update(refined_output_cls, t)

	print("Before refinement:", scores_before_refinement.get_scores())
	print("Boundary scores:", scores_bound.get_scores())
	print("After refinement:", scores_after_refinement.get_scores())

	# save logs
	scores_before_refinement.save_scores(
	os.path.join(result_path, "test_as_before_refine.csv")
	)
	scores_before_refinement.save_confusion_matrix(
	os.path.join(result_path, "test_c_matrix_before_refinement.csv")
	)
	scores_bound.save_scores(os.path.join(result_path, "test_br.csv"))
	scores_after_refinement.save_scores(
	os.path.join(result_path, "test_as_after_majority_vote.csv")
	)
	scores_after_refinement.save_confusion_matrix(
	os.path.join(result_path, "test_c_matrix_after_majority_vote.csv")
	)