hardiktiwari's picture
Upload 244 files
33d4721 verified
import os
import albumentations as A
import numpy as np
from sklearn import metrics
from autotrain.trainers.image_regression.dataset import ImageRegressionDataset
VALID_METRICS = [
"eval_loss",
"eval_mse",
"eval_mae",
"eval_r2",
"eval_rmse",
"eval_explained_variance",
]
MODEL_CARD = """
---
library_name: transformers
tags:
- autotrain
- vision
- image-classification
- image-regression{base_model}
widget:
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg
example_title: Tiger
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg
example_title: Teapot
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg
example_title: Palace{dataset_tag}
---
# Model Trained Using AutoTrain
- Problem type: Image Regression
## Validation Metrics
{validation_metrics}
"""
def image_regression_metrics(pred):
"""
Calculate various regression metrics for image regression tasks.
Args:
pred (tuple): A tuple containing raw predictions and labels.
raw_predictions should be a list of lists or a list of numpy.float32 values.
labels should be a list of true values.
Returns:
dict: A dictionary containing the calculated metrics:
- 'mse': Mean Squared Error
- 'mae': Mean Absolute Error
- 'r2': R^2 Score
- 'rmse': Root Mean Squared Error
- 'explained_variance': Explained Variance Score
If an error occurs during the calculation of a metric, the value for that metric will be -999.
"""
raw_predictions, labels = pred
try:
raw_predictions = [r for preds in raw_predictions for r in preds]
except TypeError as err:
if "numpy.float32" not in str(err):
raise Exception(err)
pred_dict = {}
metrics_to_calculate = {
"mse": metrics.mean_squared_error,
"mae": metrics.mean_absolute_error,
"r2": metrics.r2_score,
"rmse": lambda y_true, y_pred: np.sqrt(metrics.mean_squared_error(y_true, y_pred)),
"explained_variance": metrics.explained_variance_score,
}
for key, func in metrics_to_calculate.items():
try:
pred_dict[key] = float(func(labels, raw_predictions))
except Exception:
pred_dict[key] = -999
return pred_dict
def process_data(train_data, valid_data, image_processor, config):
"""
Processes training and validation data by applying image transformations.
Args:
train_data (Dataset): The training dataset.
valid_data (Dataset or None): The validation dataset. If None, only training data is processed.
image_processor (ImageProcessor): An object containing image processing parameters such as size, mean, and std.
config (dict): Configuration dictionary containing additional parameters for the dataset.
Returns:
tuple: A tuple containing the processed training dataset and the processed validation dataset (or None if valid_data is None).
"""
if "shortest_edge" in image_processor.size:
size = image_processor.size["shortest_edge"]
else:
size = (image_processor.size["height"], image_processor.size["width"])
try:
height, width = size
except TypeError:
height = size
width = size
train_transforms = A.Compose(
[
A.RandomResizedCrop(height=height, width=width),
A.RandomRotate90(),
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
A.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
]
)
val_transforms = A.Compose(
[
A.Resize(height=height, width=width),
A.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
]
)
train_data = ImageRegressionDataset(train_data, train_transforms, config)
if valid_data is not None:
valid_data = ImageRegressionDataset(valid_data, val_transforms, config)
return train_data, valid_data
return train_data, None
def create_model_card(config, trainer):
"""
Generates a model card string based on the provided configuration and trainer.
Args:
config (object): Configuration object containing various settings such as
valid_split, data_path, project_name, and model.
trainer (object): Trainer object used to evaluate the model if validation
split is provided.
Returns:
str: A formatted model card string containing dataset information,
validation metrics, and base model details.
"""
if config.valid_split is not None:
eval_scores = trainer.evaluate()
eval_scores = [f"{k[len('eval_'):]}: {v}" for k, v in eval_scores.items() if k in VALID_METRICS]
eval_scores = "\n\n".join(eval_scores)
else:
eval_scores = "No validation metrics available"
if config.data_path == f"{config.project_name}/autotrain-data" or os.path.isdir(config.data_path):
dataset_tag = ""
else:
dataset_tag = f"\ndatasets:\n- {config.data_path}"
if os.path.isdir(config.model):
base_model = ""
else:
base_model = f"\nbase_model: {config.model}"
model_card = MODEL_CARD.format(
dataset_tag=dataset_tag,
validation_metrics=eval_scores,
base_model=base_model,
)
return model_card