# Install

In [2]:
%pip install uv

Note: you may need to restart the kernel to use updated packages.


In [None]:
!uv pip install dagshub setuptools accelerate toml torch torchvision transformers mlflow datasets ipywidgets python-dotenv evaluate

# Setup

In [1]:
import os
import toml
import torch
import mlflow
import dagshub
import datasets
import evaluate
from dotenv import load_dotenv
from torchvision.transforms import v2
from transformers import AutoImageProcessor, AutoModelForImageClassification, TrainingArguments, Trainer

ENV_PATH = "/Users/andrewmayes/Openclassroom/CanineNet/.env"
CONFIG_PATH = "/Users/andrewmayes/Openclassroom/CanineNet/code/config.toml"
CONFIG = toml.load(CONFIG_PATH)

load_dotenv(ENV_PATH)

dagshub.init(repo_name=os.environ['MLFLOW_TRACKING_PROJECTNAME'], repo_owner=os.environ['MLFLOW_TRACKING_USERNAME'], mlflow=True, dvc=True)

os.environ['MLFLOW_TRACKING_USERNAME'] = "amaye15"

mlflow.set_tracking_uri(f'https://dagshub.com/' + os.environ['MLFLOW_TRACKING_USERNAME']
                         + '/' + os.environ['MLFLOW_TRACKING_PROJECTNAME'] + '.mlflow')

CREATE_DATASET = True
ORIGINAL_DATASET = "Alanox/stanford-dogs"
MODIFIED_DATASET = "amaye15/stanford-dogs"
REMOVE_COLUMNS = ["name", "annotations"]
RENAME_COLUMNS = {"image":"pixel_values", "target":"label"}
SPLIT = 0.2

METRICS = ["accuracy", "f1", "precision", "recall"]
# MODELS = 'google/vit-base-patch16-224'
# MODELS = "google/siglip-base-patch16-224"



# Dataset

In [2]:
if CREATE_DATASET:
    ds = datasets.load_dataset(ORIGINAL_DATASET, token=os.getenv("HF_TOKEN"), split="full", trust_remote_code=True)
    ds = ds.remove_columns(REMOVE_COLUMNS).rename_columns(RENAME_COLUMNS)

    labels = ds.select_columns("label").to_pandas().sort_values("label").get("label").unique().tolist()
    numbers = range(len(labels))
    label2int = dict(zip(labels, numbers))
    int2label = dict(zip(numbers, labels))

    for key, val in label2int.items():
        print(f"{key}: {val}")

    ds = ds.class_encode_column("label")
    ds = ds.align_labels_with_mapping(label2int, "label")

    ds = ds.train_test_split(test_size=SPLIT, stratify_by_column = "label")
    #ds.push_to_hub(MODIFIED_DATASET, token=os.getenv("HF_TOKEN"))

    CONFIG["label2int"] = str(label2int)
    CONFIG["int2label"] = str(int2label)

    # with open("output.toml", "w") as toml_file:
    #     toml.dump(toml.dumps(CONFIG), toml_file)

    #ds = datasets.load_dataset(MODIFIED_DATASET, token=os.getenv("HF_TOKEN"), trust_remote_code=True, streaming=True)

Affenpinscher: 0
Afghan Hound: 1
African Hunting Dog: 2
Airedale: 3
American Staffordshire Terrier: 4
Appenzeller: 5
Australian Terrier: 6
Basenji: 7
Basset: 8
Beagle: 9
Bedlington Terrier: 10
Bernese Mountain Dog: 11
Black And Tan Coonhound: 12
Blenheim Spaniel: 13
Bloodhound: 14
Bluetick: 15
Border Collie: 16
Border Terrier: 17
Borzoi: 18
Boston Bull: 19
Bouvier Des Flandres: 20
Boxer: 21
Brabancon Griffon: 22
Briard: 23
Brittany Spaniel: 24
Bull Mastiff: 25
Cairn: 26
Cardigan: 27
Chesapeake Bay Retriever: 28
Chihuahua: 29
Chow: 30
Clumber: 31
Cocker Spaniel: 32
Collie: 33
Curly Coated Retriever: 34
Dandie Dinmont: 35
Dhole: 36
Dingo: 37
Doberman: 38
English Foxhound: 39
English Setter: 40
English Springer: 41
Entlebucher: 42
Eskimo Dog: 43
Flat Coated Retriever: 44
French Bulldog: 45
German Shepherd: 46
German Short Haired Pointer: 47
Giant Schnauzer: 48
Golden Retriever: 49
Gordon Setter: 50
Great Dane: 51
Great Pyrenees: 52
Greater Swiss Mountain Dog: 53
Groenendael: 54
Ibizan Hou

In [3]:
metrics = {metric: evaluate.load(metric) for metric in METRICS}
for lr in [5e-3, 5e-4, 5e-5]:
    for batch in [32, 64, 128]:
        for model_name in ["google/vit-base-patch16-224", "microsoft/swinv2-base-patch4-window16-256", "google/siglip-base-patch16-224", "facebook/dinov2-base"]:

            image_processor = AutoImageProcessor.from_pretrained(model_name)
            model = AutoModelForImageClassification.from_pretrained(
            model_name,
            num_labels=len(label2int),
            id2label=int2label,
            label2id=label2int,
            ignore_mismatched_sizes=True,
            )

            # Then, in your transformations:
            def train_transform(examples, num_ops=10, magnitude=9, num_magnitude_bins=31):

                transformation = v2.Compose(
                    [
                        v2.RandAugment(
                            num_ops=num_ops,
                            magnitude=magnitude,
                            num_magnitude_bins=num_magnitude_bins,
                        )
                    ]
                )
                # Ensure each image has three dimensions (in this case, ensure it's RGB)
                examples["pixel_values"] = [
                    image.convert("RGB") for image in examples["pixel_values"]
                ]
                # Apply transformations
                examples["pixel_values"] = [
                    image_processor(transformation(image), return_tensors="pt")[
                        "pixel_values"
                    ].squeeze()
                    for image in examples["pixel_values"]
                ]
                return examples


            def test_transform(examples):
                # Ensure each image is RGB
                examples["pixel_values"] = [
                    image.convert("RGB") for image in examples["pixel_values"]
                ]
                # Apply processing
                examples["pixel_values"] = [
                    image_processor(image, return_tensors="pt")["pixel_values"].squeeze()
                    for image in examples["pixel_values"]
                ]
                return examples


            def compute_metrics(eval_pred):
                predictions, labels = eval_pred
                # predictions = np.argmax(logits, axis=-1)
                results = {}
                for key, val in metrics.items():
                    if "accuracy" == key:
                        result = next(
                            iter(val.compute(predictions=predictions, references=labels).items())
                        )
                    if "accuracy" != key:
                        result = next(
                            iter(
                                val.compute(
                                    predictions=predictions, references=labels, average="macro"
                                ).items()
                            )
                        )
                    results[result[0]] = result[1]
                return results


            def collate_fn(examples):
                pixel_values = torch.stack([example["pixel_values"] for example in examples])
                labels = torch.tensor([example["label"] for example in examples])
                return {"pixel_values": pixel_values, "labels": labels}


            def preprocess_logits_for_metrics(logits, labels):
                """
                Original Trainer may have a memory leak.
                This is a workaround to avoid storing too many tensors that are not needed.
                """
                pred_ids = torch.argmax(logits, dim=-1)
                return pred_ids

            ds["train"].set_transform(train_transform)
            ds["test"].set_transform(test_transform)

            training_args = TrainingArguments(**CONFIG["training_args"])
            training_args.per_device_train_batch_size = batch
            training_args.per_device_eval_batch_size = batch
            training_args.hub_model_id = f"amaye15/{model_name.replace('/','-')}-batch{batch}-lr{lr}-standford-dogs"

            mlflow.start_run(run_name=f"{model_name.replace('/','-')}-batch{batch}-lr{lr}")

            trainer = Trainer(
                model=model,
                args=training_args,
                train_dataset=ds["train"],
                eval_dataset=ds["test"],
                tokenizer=image_processor,
                data_collator=collate_fn,
                compute_metrics=compute_metrics,
                # callbacks=[early_stopping_callback],
                preprocess_logits_for_metrics=preprocess_logits_for_metrics,
            )

            # Train the model
            trainer.train()

            trainer.push_to_hub()

            mlflow.end_run()

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([120]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([120, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
max_steps is given, it will override any value given in num_train_epochs


  0%|          | 0/1000 [00:00<?, ?it/s]



{'loss': 4.8453, 'grad_norm': 3.2187986373901367, 'learning_rate': 4.9500000000000004e-05, 'epoch': 0.08}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 4.63408899307251, 'eval_accuracy': 0.03547133138969873, 'eval_f1': 0.03035870635022939, 'eval_precision': 0.031125096822983367, 'eval_recall': 0.03642496088050857, 'eval_runtime': 51.1034, 'eval_samples_per_second': 80.543, 'eval_steps_per_second': 2.524, 'epoch': 0.08}




{'loss': 4.5433, 'grad_norm': 3.1309454441070557, 'learning_rate': 4.9e-05, 'epoch': 0.16}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 4.31070613861084, 'eval_accuracy': 0.12463556851311954, 'eval_f1': 0.09815189378488118, 'eval_precision': 0.126318491286059, 'eval_recall': 0.12250067902004853, 'eval_runtime': 50.7554, 'eval_samples_per_second': 81.095, 'eval_steps_per_second': 2.542, 'epoch': 0.16}




{'loss': 4.2752, 'grad_norm': 3.1881861686706543, 'learning_rate': 4.85e-05, 'epoch': 0.23}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 3.9696836471557617, 'eval_accuracy': 0.271865889212828, 'eval_f1': 0.217632428246865, 'eval_precision': 0.2517716495124851, 'eval_recall': 0.26318141054054833, 'eval_runtime': 50.0122, 'eval_samples_per_second': 82.3, 'eval_steps_per_second': 2.579, 'epoch': 0.23}




{'loss': 3.9872, 'grad_norm': 3.366978406906128, 'learning_rate': 4.8e-05, 'epoch': 0.31}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 3.6401774883270264, 'eval_accuracy': 0.4273566569484937, 'eval_f1': 0.3660895297115911, 'eval_precision': 0.42644619182693877, 'eval_recall': 0.4167238077350393, 'eval_runtime': 49.7203, 'eval_samples_per_second': 82.783, 'eval_steps_per_second': 2.595, 'epoch': 0.31}




{'loss': 3.7182, 'grad_norm': 3.2500784397125244, 'learning_rate': 4.75e-05, 'epoch': 0.39}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 3.3251121044158936, 'eval_accuracy': 0.5362001943634597, 'eval_f1': 0.4888484189687165, 'eval_precision': 0.5816706888112184, 'eval_recall': 0.5247186429593306, 'eval_runtime': 49.5066, 'eval_samples_per_second': 83.14, 'eval_steps_per_second': 2.606, 'epoch': 0.39}




{'loss': 3.473, 'grad_norm': 3.3794054985046387, 'learning_rate': 4.7e-05, 'epoch': 0.47}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 3.0453436374664307, 'eval_accuracy': 0.6219630709426628, 'eval_f1': 0.5814523710729809, 'eval_precision': 0.6516315278477566, 'eval_recall': 0.611536920880998, 'eval_runtime': 49.5824, 'eval_samples_per_second': 83.013, 'eval_steps_per_second': 2.602, 'epoch': 0.47}




{'loss': 3.2252, 'grad_norm': 3.5271167755126953, 'learning_rate': 4.6500000000000005e-05, 'epoch': 0.54}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 2.773921489715576, 'eval_accuracy': 0.6817298347910593, 'eval_f1': 0.6505739550950606, 'eval_precision': 0.7193505945909296, 'eval_recall': 0.6712955927826942, 'eval_runtime': 49.4649, 'eval_samples_per_second': 83.21, 'eval_steps_per_second': 2.608, 'epoch': 0.54}




{'loss': 2.9976, 'grad_norm': 3.7335126399993896, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.62}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 2.5391345024108887, 'eval_accuracy': 0.7045675413022352, 'eval_f1': 0.6756150478428508, 'eval_precision': 0.7285806531908359, 'eval_recall': 0.6953532664643869, 'eval_runtime': 49.5802, 'eval_samples_per_second': 83.017, 'eval_steps_per_second': 2.602, 'epoch': 0.62}




{'loss': 2.762, 'grad_norm': 3.7279648780822754, 'learning_rate': 4.55e-05, 'epoch': 0.7}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 2.2989985942840576, 'eval_accuracy': 0.750485908649174, 'eval_f1': 0.725800496636549, 'eval_precision': 0.7646354734126305, 'eval_recall': 0.7420594975688783, 'eval_runtime': 48.8518, 'eval_samples_per_second': 84.255, 'eval_steps_per_second': 2.641, 'epoch': 0.7}




{'loss': 2.5763, 'grad_norm': 3.7486424446105957, 'learning_rate': 4.5e-05, 'epoch': 0.78}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 2.107466459274292, 'eval_accuracy': 0.7645772594752187, 'eval_f1': 0.7433831500294813, 'eval_precision': 0.7792831030046864, 'eval_recall': 0.75564353300614, 'eval_runtime': 49.0856, 'eval_samples_per_second': 83.854, 'eval_steps_per_second': 2.628, 'epoch': 0.78}




{'loss': 2.4357, 'grad_norm': 3.8511111736297607, 'learning_rate': 4.4500000000000004e-05, 'epoch': 0.85}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 1.9225727319717407, 'eval_accuracy': 0.7849854227405247, 'eval_f1': 0.7652021489489449, 'eval_precision': 0.8027206093648191, 'eval_recall': 0.7768373145126328, 'eval_runtime': 49.2931, 'eval_samples_per_second': 83.501, 'eval_steps_per_second': 2.617, 'epoch': 0.85}




{'loss': 2.2669, 'grad_norm': 3.930382490158081, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.93}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 1.7672972679138184, 'eval_accuracy': 0.8007774538386784, 'eval_f1': 0.7837747422166718, 'eval_precision': 0.8149458770309199, 'eval_recall': 0.7937617881931025, 'eval_runtime': 48.9548, 'eval_samples_per_second': 84.078, 'eval_steps_per_second': 2.635, 'epoch': 0.93}




{'loss': 2.1459, 'grad_norm': 3.9222986698150635, 'learning_rate': 4.35e-05, 'epoch': 1.01}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 1.6338553428649902, 'eval_accuracy': 0.8175413022351797, 'eval_f1': 0.8058247212224944, 'eval_precision': 0.8291157786332387, 'eval_recall': 0.8109660231401558, 'eval_runtime': 48.8096, 'eval_samples_per_second': 84.328, 'eval_steps_per_second': 2.643, 'epoch': 1.01}




{'loss': 1.9822, 'grad_norm': 3.697866678237915, 'learning_rate': 4.3e-05, 'epoch': 1.09}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 1.5203596353530884, 'eval_accuracy': 0.8214285714285714, 'eval_f1': 0.811394046732835, 'eval_precision': 0.8365573111749929, 'eval_recall': 0.8150928251002161, 'eval_runtime': 49.0932, 'eval_samples_per_second': 83.841, 'eval_steps_per_second': 2.628, 'epoch': 1.09}




{'loss': 1.8701, 'grad_norm': 3.714578151702881, 'learning_rate': 4.25e-05, 'epoch': 1.17}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 1.4219428300857544, 'eval_accuracy': 0.8172983479105929, 'eval_f1': 0.8090784041709039, 'eval_precision': 0.8330024657306648, 'eval_recall': 0.8116546751922475, 'eval_runtime': 49.3628, 'eval_samples_per_second': 83.383, 'eval_steps_per_second': 2.613, 'epoch': 1.17}




{'loss': 1.8007, 'grad_norm': 3.973750591278076, 'learning_rate': 4.2e-05, 'epoch': 1.24}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 1.3224109411239624, 'eval_accuracy': 0.8292031098153547, 'eval_f1': 0.8205404948387138, 'eval_precision': 0.8390297018418361, 'eval_recall': 0.8233368890551767, 'eval_runtime': 48.8834, 'eval_samples_per_second': 84.2, 'eval_steps_per_second': 2.639, 'epoch': 1.24}




{'loss': 1.8004, 'grad_norm': 4.3374433517456055, 'learning_rate': 4.15e-05, 'epoch': 1.32}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 1.255250334739685, 'eval_accuracy': 0.8323615160349854, 'eval_f1': 0.8243100776010878, 'eval_precision': 0.8413215716962761, 'eval_recall': 0.827061188971052, 'eval_runtime': 48.8143, 'eval_samples_per_second': 84.32, 'eval_steps_per_second': 2.643, 'epoch': 1.32}




{'loss': 1.6511, 'grad_norm': 4.235231876373291, 'learning_rate': 4.1e-05, 'epoch': 1.4}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 1.1728312969207764, 'eval_accuracy': 0.8372206025267249, 'eval_f1': 0.8281689029224051, 'eval_precision': 0.8466546065690131, 'eval_recall': 0.8313627834890125, 'eval_runtime': 48.8413, 'eval_samples_per_second': 84.273, 'eval_steps_per_second': 2.641, 'epoch': 1.4}




{'loss': 1.548, 'grad_norm': 4.652594089508057, 'learning_rate': 4.05e-05, 'epoch': 1.48}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 1.109053373336792, 'eval_accuracy': 0.8394071914480078, 'eval_f1': 0.8300341562938096, 'eval_precision': 0.8499928349581373, 'eval_recall': 0.8339710868607537, 'eval_runtime': 50.0745, 'eval_samples_per_second': 82.197, 'eval_steps_per_second': 2.576, 'epoch': 1.48}




{'loss': 1.5634, 'grad_norm': 4.48358678817749, 'learning_rate': 4e-05, 'epoch': 1.55}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 1.0561015605926514, 'eval_accuracy': 0.8345481049562682, 'eval_f1': 0.826287548167397, 'eval_precision': 0.8444342550131346, 'eval_recall': 0.8287289494101967, 'eval_runtime': 50.026, 'eval_samples_per_second': 82.277, 'eval_steps_per_second': 2.579, 'epoch': 1.55}




{'loss': 1.5163, 'grad_norm': 4.588488578796387, 'learning_rate': 3.9500000000000005e-05, 'epoch': 1.63}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.9983496069908142, 'eval_accuracy': 0.8457240038872692, 'eval_f1': 0.8382174768545753, 'eval_precision': 0.8511547753297591, 'eval_recall': 0.8409440776548832, 'eval_runtime': 52.3351, 'eval_samples_per_second': 78.647, 'eval_steps_per_second': 2.465, 'epoch': 1.63}




{'loss': 1.3883, 'grad_norm': 4.083033084869385, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.71}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.9574113488197327, 'eval_accuracy': 0.8498542274052479, 'eval_f1': 0.8424611801873888, 'eval_precision': 0.8545480234681159, 'eval_recall': 0.8452103929509444, 'eval_runtime': 54.0583, 'eval_samples_per_second': 76.14, 'eval_steps_per_second': 2.386, 'epoch': 1.71}




{'loss': 1.3161, 'grad_norm': 3.867316484451294, 'learning_rate': 3.85e-05, 'epoch': 1.79}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.9129320979118347, 'eval_accuracy': 0.8510689990281827, 'eval_f1': 0.842499864625979, 'eval_precision': 0.8563830616439544, 'eval_recall': 0.8457464024015375, 'eval_runtime': 50.3958, 'eval_samples_per_second': 81.673, 'eval_steps_per_second': 2.56, 'epoch': 1.79}




{'loss': 1.304, 'grad_norm': 4.2902374267578125, 'learning_rate': 3.8e-05, 'epoch': 1.86}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.8726516962051392, 'eval_accuracy': 0.8534985422740525, 'eval_f1': 0.8454465001707215, 'eval_precision': 0.8569985990010426, 'eval_recall': 0.8487166838712229, 'eval_runtime': 50.6061, 'eval_samples_per_second': 81.334, 'eval_steps_per_second': 2.549, 'epoch': 1.86}




{'loss': 1.3268, 'grad_norm': 4.475445747375488, 'learning_rate': 3.7500000000000003e-05, 'epoch': 1.94}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.8411638736724854, 'eval_accuracy': 0.8510689990281827, 'eval_f1': 0.8440658519317673, 'eval_precision': 0.8572039674559415, 'eval_recall': 0.8472696555447555, 'eval_runtime': 50.1137, 'eval_samples_per_second': 82.133, 'eval_steps_per_second': 2.574, 'epoch': 1.94}




{'loss': 1.2388, 'grad_norm': 4.488099575042725, 'learning_rate': 3.7e-05, 'epoch': 2.02}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.810386598110199, 'eval_accuracy': 0.8568999028182702, 'eval_f1': 0.8482250401180048, 'eval_precision': 0.8608009175717143, 'eval_recall': 0.8522420627356607, 'eval_runtime': 50.7442, 'eval_samples_per_second': 81.113, 'eval_steps_per_second': 2.542, 'epoch': 2.02}




{'loss': 1.1333, 'grad_norm': 4.516467094421387, 'learning_rate': 3.65e-05, 'epoch': 2.1}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.7920050024986267, 'eval_accuracy': 0.8556851311953353, 'eval_f1': 0.8486107665476558, 'eval_precision': 0.859555442251791, 'eval_recall': 0.851584767005035, 'eval_runtime': 50.4707, 'eval_samples_per_second': 81.552, 'eval_steps_per_second': 2.556, 'epoch': 2.1}




{'loss': 1.1305, 'grad_norm': 3.9769482612609863, 'learning_rate': 3.6e-05, 'epoch': 2.17}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.7564593553543091, 'eval_accuracy': 0.857871720116618, 'eval_f1': 0.850534421437704, 'eval_precision': 0.8629770792513701, 'eval_recall': 0.8533865540611241, 'eval_runtime': 50.4855, 'eval_samples_per_second': 81.528, 'eval_steps_per_second': 2.555, 'epoch': 2.17}




{'loss': 1.1849, 'grad_norm': 5.161864757537842, 'learning_rate': 3.55e-05, 'epoch': 2.25}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.7497532367706299, 'eval_accuracy': 0.85932944606414, 'eval_f1': 0.8535738637582512, 'eval_precision': 0.8646493754082539, 'eval_recall': 0.8548654745509918, 'eval_runtime': 51.6133, 'eval_samples_per_second': 79.747, 'eval_steps_per_second': 2.499, 'epoch': 2.25}




{'loss': 1.1287, 'grad_norm': 4.14560079574585, 'learning_rate': 3.5e-05, 'epoch': 2.33}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.7347991466522217, 'eval_accuracy': 0.85932944606414, 'eval_f1': 0.853286935591957, 'eval_precision': 0.865349657662883, 'eval_recall': 0.8552408374882046, 'eval_runtime': 51.0727, 'eval_samples_per_second': 80.591, 'eval_steps_per_second': 2.526, 'epoch': 2.33}




{'loss': 1.0537, 'grad_norm': 4.066341876983643, 'learning_rate': 3.45e-05, 'epoch': 2.41}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.7120196223258972, 'eval_accuracy': 0.8554421768707483, 'eval_f1': 0.8495897159090885, 'eval_precision': 0.8585552651791398, 'eval_recall': 0.8515328760686888, 'eval_runtime': 51.0985, 'eval_samples_per_second': 80.55, 'eval_steps_per_second': 2.525, 'epoch': 2.41}




{'loss': 1.1157, 'grad_norm': 4.041014671325684, 'learning_rate': 3.4000000000000007e-05, 'epoch': 2.49}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.6832228302955627, 'eval_accuracy': 0.8622448979591837, 'eval_f1': 0.855151527873783, 'eval_precision': 0.86621670758505, 'eval_recall': 0.8579446769808802, 'eval_runtime': 49.7698, 'eval_samples_per_second': 82.701, 'eval_steps_per_second': 2.592, 'epoch': 2.49}




{'loss': 1.1008, 'grad_norm': 5.150105953216553, 'learning_rate': 3.35e-05, 'epoch': 2.56}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.6704536080360413, 'eval_accuracy': 0.8617589893100097, 'eval_f1': 0.8546440118880293, 'eval_precision': 0.8640243651018866, 'eval_recall': 0.8573904668097079, 'eval_runtime': 49.6951, 'eval_samples_per_second': 82.825, 'eval_steps_per_second': 2.596, 'epoch': 2.56}




{'loss': 1.0512, 'grad_norm': 5.102594375610352, 'learning_rate': 3.3e-05, 'epoch': 2.64}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.655726432800293, 'eval_accuracy': 0.8629737609329446, 'eval_f1': 0.8563127076721974, 'eval_precision': 0.8635521819478348, 'eval_recall': 0.8592849145660356, 'eval_runtime': 50.4309, 'eval_samples_per_second': 81.617, 'eval_steps_per_second': 2.558, 'epoch': 2.64}




{'loss': 1.0641, 'grad_norm': 3.8377113342285156, 'learning_rate': 3.2500000000000004e-05, 'epoch': 2.72}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.6490115523338318, 'eval_accuracy': 0.8632167152575316, 'eval_f1': 0.8581037098670572, 'eval_precision': 0.8690863260667425, 'eval_recall': 0.8595770895727156, 'eval_runtime': 53.4616, 'eval_samples_per_second': 76.99, 'eval_steps_per_second': 2.413, 'epoch': 2.72}




{'loss': 1.0446, 'grad_norm': 4.310233116149902, 'learning_rate': 3.2000000000000005e-05, 'epoch': 2.8}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.6300677061080933, 'eval_accuracy': 0.8651603498542274, 'eval_f1': 0.8597453620586883, 'eval_precision': 0.8692247694896753, 'eval_recall': 0.8611769340746278, 'eval_runtime': 51.2965, 'eval_samples_per_second': 80.239, 'eval_steps_per_second': 2.515, 'epoch': 2.8}




{'loss': 1.0104, 'grad_norm': 4.522639274597168, 'learning_rate': 3.15e-05, 'epoch': 2.87}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.6286835074424744, 'eval_accuracy': 0.8632167152575316, 'eval_f1': 0.8562195534431664, 'eval_precision': 0.8667545101362343, 'eval_recall': 0.8588131278764602, 'eval_runtime': 51.7032, 'eval_samples_per_second': 79.608, 'eval_steps_per_second': 2.495, 'epoch': 2.87}




{'loss': 1.0544, 'grad_norm': 4.106919765472412, 'learning_rate': 3.1e-05, 'epoch': 2.95}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.6149628162384033, 'eval_accuracy': 0.8644314868804664, 'eval_f1': 0.857927074632293, 'eval_precision': 0.8657466184998583, 'eval_recall': 0.8601794684156818, 'eval_runtime': 52.1426, 'eval_samples_per_second': 78.937, 'eval_steps_per_second': 2.474, 'epoch': 2.95}




{'loss': 1.0074, 'grad_norm': 4.786281108856201, 'learning_rate': 3.05e-05, 'epoch': 3.03}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.6061375737190247, 'eval_accuracy': 0.8683187560738581, 'eval_f1': 0.8617445445380302, 'eval_precision': 0.8711997430081487, 'eval_recall': 0.8640854766935886, 'eval_runtime': 51.9411, 'eval_samples_per_second': 79.244, 'eval_steps_per_second': 2.484, 'epoch': 3.03}




{'loss': 0.9329, 'grad_norm': 4.053191184997559, 'learning_rate': 3e-05, 'epoch': 3.11}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.6000697016716003, 'eval_accuracy': 0.8661321671525753, 'eval_f1': 0.8591313660290419, 'eval_precision': 0.8750348135359067, 'eval_recall': 0.861981456704839, 'eval_runtime': 49.9761, 'eval_samples_per_second': 82.359, 'eval_steps_per_second': 2.581, 'epoch': 3.11}




{'loss': 0.9049, 'grad_norm': 3.7903292179107666, 'learning_rate': 2.95e-05, 'epoch': 3.18}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5924625992774963, 'eval_accuracy': 0.8685617103984451, 'eval_f1': 0.8617161924024251, 'eval_precision': 0.8731252723397493, 'eval_recall': 0.8646873775472712, 'eval_runtime': 50.4365, 'eval_samples_per_second': 81.608, 'eval_steps_per_second': 2.558, 'epoch': 3.18}




{'loss': 0.9815, 'grad_norm': 4.7535929679870605, 'learning_rate': 2.9e-05, 'epoch': 3.26}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5806066393852234, 'eval_accuracy': 0.8685617103984451, 'eval_f1': 0.8622182587119455, 'eval_precision': 0.8717482848256731, 'eval_recall': 0.8644384448185363, 'eval_runtime': 49.3023, 'eval_samples_per_second': 83.485, 'eval_steps_per_second': 2.617, 'epoch': 3.26}




{'loss': 0.9507, 'grad_norm': 4.06498384475708, 'learning_rate': 2.8499999999999998e-05, 'epoch': 3.34}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5792554020881653, 'eval_accuracy': 0.8673469387755102, 'eval_f1': 0.8613137189565682, 'eval_precision': 0.8691317661017358, 'eval_recall': 0.863785050860545, 'eval_runtime': 49.2511, 'eval_samples_per_second': 83.572, 'eval_steps_per_second': 2.619, 'epoch': 3.34}




{'loss': 0.9608, 'grad_norm': 4.010519981384277, 'learning_rate': 2.8000000000000003e-05, 'epoch': 3.42}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5720817446708679, 'eval_accuracy': 0.8671039844509232, 'eval_f1': 0.8614434918253173, 'eval_precision': 0.8683467456488219, 'eval_recall': 0.8635546513196746, 'eval_runtime': 49.32, 'eval_samples_per_second': 83.455, 'eval_steps_per_second': 2.616, 'epoch': 3.42}




{'loss': 0.9409, 'grad_norm': 4.6165385246276855, 'learning_rate': 2.7500000000000004e-05, 'epoch': 3.5}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5688398480415344, 'eval_accuracy': 0.8651603498542274, 'eval_f1': 0.8591215620506901, 'eval_precision': 0.8658442906831004, 'eval_recall': 0.8612273702740664, 'eval_runtime': 49.46, 'eval_samples_per_second': 83.219, 'eval_steps_per_second': 2.608, 'epoch': 3.5}




{'loss': 0.8856, 'grad_norm': 3.7885537147521973, 'learning_rate': 2.7000000000000002e-05, 'epoch': 3.57}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5563119649887085, 'eval_accuracy': 0.870019436345967, 'eval_f1': 0.8649784624757803, 'eval_precision': 0.8714328398178769, 'eval_recall': 0.8667391442108526, 'eval_runtime': 49.4538, 'eval_samples_per_second': 83.229, 'eval_steps_per_second': 2.608, 'epoch': 3.57}




{'loss': 0.9099, 'grad_norm': 5.023146629333496, 'learning_rate': 2.6500000000000004e-05, 'epoch': 3.65}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5557062029838562, 'eval_accuracy': 0.8661321671525753, 'eval_f1': 0.8612849221676474, 'eval_precision': 0.8681216404872331, 'eval_recall': 0.8622434834974765, 'eval_runtime': 49.2588, 'eval_samples_per_second': 83.559, 'eval_steps_per_second': 2.619, 'epoch': 3.65}




{'loss': 0.9167, 'grad_norm': 4.845632076263428, 'learning_rate': 2.6000000000000002e-05, 'epoch': 3.73}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5526946783065796, 'eval_accuracy': 0.8685617103984451, 'eval_f1': 0.8638929665689471, 'eval_precision': 0.8701328643411136, 'eval_recall': 0.8647950070672704, 'eval_runtime': 48.692, 'eval_samples_per_second': 84.531, 'eval_steps_per_second': 2.649, 'epoch': 3.73}




{'loss': 0.9077, 'grad_norm': 4.908811569213867, 'learning_rate': 2.5500000000000003e-05, 'epoch': 3.81}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5431388020515442, 'eval_accuracy': 0.8705053449951409, 'eval_f1': 0.8669220309230944, 'eval_precision': 0.8721780266203637, 'eval_recall': 0.8673898965351654, 'eval_runtime': 49.1531, 'eval_samples_per_second': 83.738, 'eval_steps_per_second': 2.624, 'epoch': 3.81}




{'loss': 0.9005, 'grad_norm': 5.006350994110107, 'learning_rate': 2.5e-05, 'epoch': 3.88}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5389750003814697, 'eval_accuracy': 0.8731778425655977, 'eval_f1': 0.8697069550253945, 'eval_precision': 0.8748570776000245, 'eval_recall': 0.8700614515732695, 'eval_runtime': 48.8894, 'eval_samples_per_second': 84.19, 'eval_steps_per_second': 2.639, 'epoch': 3.88}




{'loss': 0.8596, 'grad_norm': 4.478883743286133, 'learning_rate': 2.45e-05, 'epoch': 3.96}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5375447273254395, 'eval_accuracy': 0.8707482993197279, 'eval_f1': 0.8655144499982467, 'eval_precision': 0.8731854105567156, 'eval_recall': 0.8667916588503376, 'eval_runtime': 48.919, 'eval_samples_per_second': 84.139, 'eval_steps_per_second': 2.637, 'epoch': 3.96}




{'loss': 0.8856, 'grad_norm': 5.626810073852539, 'learning_rate': 2.4e-05, 'epoch': 4.04}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5254101753234863, 'eval_accuracy': 0.8705053449951409, 'eval_f1': 0.8651370337427908, 'eval_precision': 0.8740741181870556, 'eval_recall': 0.8663241246770017, 'eval_runtime': 48.9016, 'eval_samples_per_second': 84.169, 'eval_steps_per_second': 2.638, 'epoch': 4.04}




{'loss': 0.8869, 'grad_norm': 5.352138996124268, 'learning_rate': 2.35e-05, 'epoch': 4.12}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5237516760826111, 'eval_accuracy': 0.8717201166180758, 'eval_f1': 0.865671971284358, 'eval_precision': 0.8731240454316355, 'eval_recall': 0.8679943344822763, 'eval_runtime': 48.7844, 'eval_samples_per_second': 84.371, 'eval_steps_per_second': 2.644, 'epoch': 4.12}




{'loss': 0.8069, 'grad_norm': 4.255963325500488, 'learning_rate': 2.3000000000000003e-05, 'epoch': 4.19}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5188306570053101, 'eval_accuracy': 0.8731778425655977, 'eval_f1': 0.867099882007312, 'eval_precision': 0.87435010386749, 'eval_recall': 0.8695338515827772, 'eval_runtime': 48.8651, 'eval_samples_per_second': 84.232, 'eval_steps_per_second': 2.64, 'epoch': 4.19}




{'loss': 0.8474, 'grad_norm': 4.418267250061035, 'learning_rate': 2.25e-05, 'epoch': 4.27}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5188424587249756, 'eval_accuracy': 0.8709912536443148, 'eval_f1': 0.8648806741668069, 'eval_precision': 0.8728845356582566, 'eval_recall': 0.8671342829268437, 'eval_runtime': 50.6662, 'eval_samples_per_second': 81.238, 'eval_steps_per_second': 2.546, 'epoch': 4.27}




{'loss': 0.8243, 'grad_norm': 5.182613372802734, 'learning_rate': 2.2000000000000003e-05, 'epoch': 4.35}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.517697811126709, 'eval_accuracy': 0.8726919339164237, 'eval_f1': 0.8683813511248041, 'eval_precision': 0.8756396608060336, 'eval_recall': 0.8695726524990666, 'eval_runtime': 49.672, 'eval_samples_per_second': 82.864, 'eval_steps_per_second': 2.597, 'epoch': 4.35}




{'loss': 0.8437, 'grad_norm': 5.045173645019531, 'learning_rate': 2.15e-05, 'epoch': 4.43}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5107200741767883, 'eval_accuracy': 0.8726919339164237, 'eval_f1': 0.8681911737274114, 'eval_precision': 0.8742153303246233, 'eval_recall': 0.8693299915121044, 'eval_runtime': 51.4774, 'eval_samples_per_second': 79.957, 'eval_steps_per_second': 2.506, 'epoch': 4.43}




{'loss': 0.7761, 'grad_norm': 4.515903949737549, 'learning_rate': 2.1e-05, 'epoch': 4.5}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5024524331092834, 'eval_accuracy': 0.8739067055393586, 'eval_f1': 0.8699913214103319, 'eval_precision': 0.8750598594391856, 'eval_recall': 0.8708382312909497, 'eval_runtime': 50.3936, 'eval_samples_per_second': 81.677, 'eval_steps_per_second': 2.56, 'epoch': 4.5}




{'loss': 0.784, 'grad_norm': 4.124168872833252, 'learning_rate': 2.05e-05, 'epoch': 4.58}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5016156435012817, 'eval_accuracy': 0.8768221574344023, 'eval_f1': 0.8716643545809573, 'eval_precision': 0.8777660734719462, 'eval_recall': 0.8734492800744772, 'eval_runtime': 50.9735, 'eval_samples_per_second': 80.748, 'eval_steps_per_second': 2.531, 'epoch': 4.58}




{'loss': 0.8055, 'grad_norm': 4.525731563568115, 'learning_rate': 2e-05, 'epoch': 4.66}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5018946528434753, 'eval_accuracy': 0.8739067055393586, 'eval_f1': 0.8700873351301581, 'eval_precision': 0.8771679126025378, 'eval_recall': 0.8709610203860829, 'eval_runtime': 51.3104, 'eval_samples_per_second': 80.218, 'eval_steps_per_second': 2.514, 'epoch': 4.66}




{'loss': 0.8109, 'grad_norm': 4.918236255645752, 'learning_rate': 1.9500000000000003e-05, 'epoch': 4.74}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.49602368474006653, 'eval_accuracy': 0.8770651117589893, 'eval_f1': 0.8724184770886717, 'eval_precision': 0.8785135286746661, 'eval_recall': 0.8739509959414178, 'eval_runtime': 50.8816, 'eval_samples_per_second': 80.894, 'eval_steps_per_second': 2.535, 'epoch': 4.74}




{'loss': 0.8697, 'grad_norm': 5.5949811935424805, 'learning_rate': 1.9e-05, 'epoch': 4.82}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.488719642162323, 'eval_accuracy': 0.8792517006802721, 'eval_f1': 0.8749420478853339, 'eval_precision': 0.8815954350698849, 'eval_recall': 0.875699865312381, 'eval_runtime': 51.2212, 'eval_samples_per_second': 80.357, 'eval_steps_per_second': 2.518, 'epoch': 4.82}




{'loss': 0.7996, 'grad_norm': 5.343413829803467, 'learning_rate': 1.85e-05, 'epoch': 4.89}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.4877583384513855, 'eval_accuracy': 0.8773080660835763, 'eval_f1': 0.8719325240901645, 'eval_precision': 0.8781732781341105, 'eval_recall': 0.8733867416979888, 'eval_runtime': 50.5742, 'eval_samples_per_second': 81.385, 'eval_steps_per_second': 2.551, 'epoch': 4.89}




{'loss': 0.8002, 'grad_norm': 5.200406551361084, 'learning_rate': 1.8e-05, 'epoch': 4.97}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.48469260334968567, 'eval_accuracy': 0.8785228377065112, 'eval_f1': 0.8737883601515327, 'eval_precision': 0.880730357224163, 'eval_recall': 0.8751595996352247, 'eval_runtime': 49.1632, 'eval_samples_per_second': 83.721, 'eval_steps_per_second': 2.624, 'epoch': 4.97}




{'loss': 0.7404, 'grad_norm': 6.2063446044921875, 'learning_rate': 1.75e-05, 'epoch': 5.05}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.48882460594177246, 'eval_accuracy': 0.8770651117589893, 'eval_f1': 0.8726392692763753, 'eval_precision': 0.8795280651438127, 'eval_recall': 0.8739210392423884, 'eval_runtime': 50.4391, 'eval_samples_per_second': 81.603, 'eval_steps_per_second': 2.558, 'epoch': 5.05}




{'loss': 0.7326, 'grad_norm': 3.9257824420928955, 'learning_rate': 1.7000000000000003e-05, 'epoch': 5.13}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.48825767636299133, 'eval_accuracy': 0.8746355685131195, 'eval_f1': 0.8701209700636171, 'eval_precision': 0.8772238194071659, 'eval_recall': 0.8717747556188732, 'eval_runtime': 50.0124, 'eval_samples_per_second': 82.3, 'eval_steps_per_second': 2.579, 'epoch': 5.13}




{'loss': 0.797, 'grad_norm': 5.060814380645752, 'learning_rate': 1.65e-05, 'epoch': 5.2}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.4892158806324005, 'eval_accuracy': 0.8729348882410107, 'eval_f1': 0.8689396976822885, 'eval_precision': 0.8751671494168254, 'eval_recall': 0.8700644770381991, 'eval_runtime': 49.9051, 'eval_samples_per_second': 82.477, 'eval_steps_per_second': 2.585, 'epoch': 5.2}




{'loss': 0.8084, 'grad_norm': 4.504694938659668, 'learning_rate': 1.6000000000000003e-05, 'epoch': 5.28}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.48001018166542053, 'eval_accuracy': 0.8792517006802721, 'eval_f1': 0.8752056640329726, 'eval_precision': 0.8816582892567761, 'eval_recall': 0.876272144996767, 'eval_runtime': 49.7474, 'eval_samples_per_second': 82.738, 'eval_steps_per_second': 2.593, 'epoch': 5.28}




{'loss': 0.8025, 'grad_norm': 5.151145935058594, 'learning_rate': 1.55e-05, 'epoch': 5.36}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.47617706656455994, 'eval_accuracy': 0.8768221574344023, 'eval_f1': 0.8726680937092665, 'eval_precision': 0.8771151901740718, 'eval_recall': 0.8736036311111193, 'eval_runtime': 49.5334, 'eval_samples_per_second': 83.095, 'eval_steps_per_second': 2.604, 'epoch': 5.36}




{'loss': 0.7087, 'grad_norm': 5.042810440063477, 'learning_rate': 1.5e-05, 'epoch': 5.44}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.47620466351509094, 'eval_accuracy': 0.8782798833819242, 'eval_f1': 0.87497004051077, 'eval_precision': 0.8806975888010735, 'eval_recall': 0.8755686029577235, 'eval_runtime': 49.9256, 'eval_samples_per_second': 82.443, 'eval_steps_per_second': 2.584, 'epoch': 5.44}




{'loss': 0.7502, 'grad_norm': 5.252780914306641, 'learning_rate': 1.45e-05, 'epoch': 5.51}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.4753693640232086, 'eval_accuracy': 0.8785228377065112, 'eval_f1': 0.8753647515204712, 'eval_precision': 0.8801227365007281, 'eval_recall': 0.8758968338566632, 'eval_runtime': 49.701, 'eval_samples_per_second': 82.815, 'eval_steps_per_second': 2.596, 'epoch': 5.51}




{'loss': 0.7386, 'grad_norm': 4.32672643661499, 'learning_rate': 1.4000000000000001e-05, 'epoch': 5.59}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.47379669547080994, 'eval_accuracy': 0.8792517006802721, 'eval_f1': 0.8754319224053987, 'eval_precision': 0.8806675702100011, 'eval_recall': 0.8760217928047063, 'eval_runtime': 49.655, 'eval_samples_per_second': 82.892, 'eval_steps_per_second': 2.598, 'epoch': 5.59}




{'loss': 0.8173, 'grad_norm': 4.857478618621826, 'learning_rate': 1.3500000000000001e-05, 'epoch': 5.67}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.4712308645248413, 'eval_accuracy': 0.8792517006802721, 'eval_f1': 0.8749799985940361, 'eval_precision': 0.8801373033918221, 'eval_recall': 0.8762236995104532, 'eval_runtime': 49.9395, 'eval_samples_per_second': 82.42, 'eval_steps_per_second': 2.583, 'epoch': 5.67}




{'loss': 0.8213, 'grad_norm': 4.416097164154053, 'learning_rate': 1.3000000000000001e-05, 'epoch': 5.75}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.46962377429008484, 'eval_accuracy': 0.8790087463556852, 'eval_f1': 0.8750143770029749, 'eval_precision': 0.879517876232382, 'eval_recall': 0.8756484742320205, 'eval_runtime': 49.5319, 'eval_samples_per_second': 83.098, 'eval_steps_per_second': 2.604, 'epoch': 5.75}




{'loss': 0.7184, 'grad_norm': 4.346248149871826, 'learning_rate': 1.25e-05, 'epoch': 5.83}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.47138118743896484, 'eval_accuracy': 0.880466472303207, 'eval_f1': 0.8758928938078699, 'eval_precision': 0.8826157033462411, 'eval_recall': 0.8768000530071781, 'eval_runtime': 50.2999, 'eval_samples_per_second': 81.829, 'eval_steps_per_second': 2.565, 'epoch': 5.83}




{'loss': 0.7168, 'grad_norm': 4.947293281555176, 'learning_rate': 1.2e-05, 'epoch': 5.9}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.46817025542259216, 'eval_accuracy': 0.8748785228377065, 'eval_f1': 0.869518306953471, 'eval_precision': 0.87712624552976, 'eval_recall': 0.8714885684526, 'eval_runtime': 50.2079, 'eval_samples_per_second': 81.979, 'eval_steps_per_second': 2.569, 'epoch': 5.9}




{'loss': 0.7558, 'grad_norm': 4.820581436157227, 'learning_rate': 1.1500000000000002e-05, 'epoch': 5.98}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.4673177897930145, 'eval_accuracy': 0.8760932944606414, 'eval_f1': 0.8711123206151186, 'eval_precision': 0.8786942477792753, 'eval_recall': 0.8728619451532288, 'eval_runtime': 50.633, 'eval_samples_per_second': 81.291, 'eval_steps_per_second': 2.548, 'epoch': 5.98}




{'loss': 0.7169, 'grad_norm': 4.377044677734375, 'learning_rate': 1.1000000000000001e-05, 'epoch': 6.06}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.46775296330451965, 'eval_accuracy': 0.8782798833819242, 'eval_f1': 0.8735617162330748, 'eval_precision': 0.8800886974673685, 'eval_recall': 0.8748617977732224, 'eval_runtime': 49.9977, 'eval_samples_per_second': 82.324, 'eval_steps_per_second': 2.58, 'epoch': 6.06}




{'loss': 0.7042, 'grad_norm': 4.936257839202881, 'learning_rate': 1.05e-05, 'epoch': 6.14}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.46284279227256775, 'eval_accuracy': 0.8758503401360545, 'eval_f1': 0.8709985887620147, 'eval_precision': 0.8772956583762153, 'eval_recall': 0.8723718558867367, 'eval_runtime': 50.6139, 'eval_samples_per_second': 81.322, 'eval_steps_per_second': 2.549, 'epoch': 6.14}




{'loss': 0.7332, 'grad_norm': 5.362953186035156, 'learning_rate': 1e-05, 'epoch': 6.21}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.46724751591682434, 'eval_accuracy': 0.8765792031098154, 'eval_f1': 0.8719893491508406, 'eval_precision': 0.879019819340113, 'eval_recall': 0.8730514618344557, 'eval_runtime': 49.8858, 'eval_samples_per_second': 82.508, 'eval_steps_per_second': 2.586, 'epoch': 6.21}




{'loss': 0.7027, 'grad_norm': 4.152902603149414, 'learning_rate': 9.5e-06, 'epoch': 6.29}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.46439963579177856, 'eval_accuracy': 0.8785228377065112, 'eval_f1': 0.8735523200546538, 'eval_precision': 0.8804909761784245, 'eval_recall': 0.8749045098067426, 'eval_runtime': 50.3797, 'eval_samples_per_second': 81.7, 'eval_steps_per_second': 2.561, 'epoch': 6.29}




{'loss': 0.7283, 'grad_norm': 4.413781642913818, 'learning_rate': 9e-06, 'epoch': 6.37}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.46416357159614563, 'eval_accuracy': 0.8775510204081632, 'eval_f1': 0.8724396776688207, 'eval_precision': 0.8793020021973419, 'eval_recall': 0.8739752415363203, 'eval_runtime': 49.8025, 'eval_samples_per_second': 82.646, 'eval_steps_per_second': 2.59, 'epoch': 6.37}




{'loss': 0.7305, 'grad_norm': 4.686371326446533, 'learning_rate': 8.500000000000002e-06, 'epoch': 6.45}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.4613053500652313, 'eval_accuracy': 0.8780369290573372, 'eval_f1': 0.8728666918594699, 'eval_precision': 0.8784721423973952, 'eval_recall': 0.8741626802482955, 'eval_runtime': 49.8874, 'eval_samples_per_second': 82.506, 'eval_steps_per_second': 2.586, 'epoch': 6.45}




In [None]:
# training_args = TrainingArguments(**CONFIG["training_args"])

# image_processor = AutoImageProcessor.from_pretrained(MODELS)
# model = AutoModelForImageClassification.from_pretrained(
# MODELS,
# num_labels=len(CONFIG["label2int"]),
# id2label=CONFIG["label2int"],
# label2id=CONFIG["int2label"],
# ignore_mismatched_sizes=True,
# )


# training_args = TrainingArguments(**CONFIG["training_args"])

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=ds["train"],
#     eval_dataset=ds["test"],
#     tokenizer=image_processor,
#     data_collator=collate_fn,
#     compute_metrics=compute_metrics,
#     # callbacks=[early_stopping_callback],
#     preprocess_logits_for_metrics=preprocess_logits_for_metrics,
# )

# # Train the model
# trainer.train()

# mlflow.end_run()