GenAI_project / evaluations /evaluate_models.py
jaothan's picture
Upload 24 files
fa64206 verified
import wandb
import yaml
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from data.datasets import load_and_tokenize_data
from utils.monitor import measure_resources
# Charger la configuration
with open('config/config.yaml', 'r') as f:
config = yaml.safe_load(f)
# Initialiser wandb
wandb.init(project=config['wandb']['project'], entity=config['wandb']['entity'])
# Charger les donn�es
train_dataset, test_dataset = load_and_tokenize_data(config)
def evaluate_model(model_name):
# Charger le mod�le et le tokenizer
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Tokenizer les donn�es
train_dataset = train_dataset.map(lambda x: tokenizer(x['text'], padding='max_length', truncation=True), batched=True)
test_dataset = test_dataset.map(lambda x: tokenizer(x['text'], padding='max_length', truncation=True), batched=True)
# D�finir les arguments de formation
training_args = TrainingArguments(
output_dir=f'./results/{model_name}',
num_train_epochs=config['training']['num_epochs'],
per_device_train_batch_size=config['training']['batch_size'],
per_device_eval_batch_size=config['training']['batch_size'],
evaluation_strategy='epoch',
save_steps=10_000,
save_total_limit=2,
logging_dir='./logs',
logging_steps=10,
)
# Cr�er le Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=test_dataset,
)
# Mesurer les ressources et �valuer le mod�le
peak_memory, training_time = measure_resources(trainer, model_name)
# �valuation des performances
metrics = trainer.evaluate()
wandb.log({
'model_name': model_name,
'peak_memory_MB': peak_memory,
'training_time_seconds': training_time,
**metrics
})
# �valuer chaque mod�le
for model_name in config['evaluation']['models']:
evaluate_model(model_name)