File size: 2,163 Bytes
fa64206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import wandb
import yaml
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from data.datasets import load_and_tokenize_data
from utils.monitor import measure_resources

# Charger la configuration
with open('config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Initialiser wandb
wandb.init(project=config['wandb']['project'], entity=config['wandb']['entity'])

# Charger les donn�es
train_dataset, test_dataset = load_and_tokenize_data(config)

def evaluate_model(model_name):
    # Charger le mod�le et le tokenizer
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Tokenizer les donn�es
    train_dataset = train_dataset.map(lambda x: tokenizer(x['text'], padding='max_length', truncation=True), batched=True)
    test_dataset = test_dataset.map(lambda x: tokenizer(x['text'], padding='max_length', truncation=True), batched=True)

    # D�finir les arguments de formation
    training_args = TrainingArguments(
        output_dir=f'./results/{model_name}',
        num_train_epochs=config['training']['num_epochs'],
        per_device_train_batch_size=config['training']['batch_size'],
        per_device_eval_batch_size=config['training']['batch_size'],
        evaluation_strategy='epoch',
        save_steps=10_000,
        save_total_limit=2,
        logging_dir='./logs',
        logging_steps=10,
    )

    # Cr�er le Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
    )

    # Mesurer les ressources et �valuer le mod�le
    peak_memory, training_time = measure_resources(trainer, model_name)

    # �valuation des performances
    metrics = trainer.evaluate()

    wandb.log({
        'model_name': model_name,
        'peak_memory_MB': peak_memory,
        'training_time_seconds': training_time,
        **metrics
    })

# �valuer chaque mod�le
for model_name in config['evaluation']['models']:
    evaluate_model(model_name)