Spaces:

hardiktiwari
/

tensora-autotrain

Sleeping

App Files Files Community

tensora-autotrain / src /autotrain /trainers /text_regression /utils.py

hardiktiwari

Upload 244 files

33d4721 verified 3 months ago

raw

history blame contribute delete

3.68 kB

	import os

	import numpy as np
	from sklearn import metrics


	SINGLE_COLUMN_REGRESSION_EVAL_METRICS = (
	"eval_loss",
	"eval_mse",
	"eval_mae",
	"eval_r2",
	"eval_rmse",
	"eval_explained_variance",
	)


	MODEL_CARD = """
	---
	tags:
	- autotrain
	- text-regression{base_model}
	widget:
	- text: "I love AutoTrain"{dataset_tag}
	---

	# Model Trained Using AutoTrain

	- Problem type: Text Regression

	## Validation Metrics
	{validation_metrics}
	"""


	def single_column_regression_metrics(pred):
	"""
	Computes various regression metrics for a single column of predictions.

	Args:
	pred (tuple): A tuple containing raw predictions and true labels.
	The first element is an array-like of raw predictions,
	and the second element is an array-like of true labels.

	Returns:
	dict: A dictionary containing the computed regression metrics:
	- "mse": Mean Squared Error
	- "mae": Mean Absolute Error
	- "r2": R-squared Score
	- "rmse": Root Mean Squared Error
	- "explained_variance": Explained Variance Score

	Notes:
	If any metric computation fails, the function will return a default value of -999 for that metric.
	"""
	raw_predictions, labels = pred

	def safe_compute(metric_func, default=-999):
	try:
	return metric_func(labels, raw_predictions)
	except Exception:
	return default

	pred_dict = {
	"mse": safe_compute(lambda labels, predictions: metrics.mean_squared_error(labels, predictions)),
	"mae": safe_compute(lambda labels, predictions: metrics.mean_absolute_error(labels, predictions)),
	"r2": safe_compute(lambda labels, predictions: metrics.r2_score(labels, predictions)),
	"rmse": safe_compute(lambda labels, predictions: np.sqrt(metrics.mean_squared_error(labels, predictions))),
	"explained_variance": safe_compute(
	lambda labels, predictions: metrics.explained_variance_score(labels, predictions)
	),
	}

	for key, value in pred_dict.items():
	pred_dict[key] = float(value)
	return pred_dict


	def create_model_card(config, trainer):
	"""
	Generates a model card string based on the provided configuration and trainer.

	Args:
	config (object): Configuration object containing the following attributes:
	- valid_split (optional): Validation split to evaluate the model.
	- data_path (str): Path to the dataset.
	- project_name (str): Name of the project.
	- model (str): Path or identifier of the model.
	trainer (object): Trainer object used to evaluate the model.

	Returns:
	str: A formatted model card string containing dataset information, validation metrics, and base model details.
	"""
	if config.valid_split is not None:
	eval_scores = trainer.evaluate()
	eval_scores = [
	f"{k[len('eval_'):]}: {v}" for k, v in eval_scores.items() if k in SINGLE_COLUMN_REGRESSION_EVAL_METRICS
	]
	eval_scores = "\n\n".join(eval_scores)

	else:
	eval_scores = "No validation metrics available"

	if config.data_path == f"{config.project_name}/autotrain-data" or os.path.isdir(config.data_path):
	dataset_tag = ""
	else:
	dataset_tag = f"\ndatasets:\n- {config.data_path}"

	if os.path.isdir(config.model):
	base_model = ""
	else:
	base_model = f"\nbase_model: {config.model}"

	model_card = MODEL_CARD.format(
	dataset_tag=dataset_tag,
	validation_metrics=eval_scores,
	base_model=base_model,
	)
	return model_card