Spaces:
Sleeping
Sleeping
| import wandb | |
| from datasets import load_metric | |
| from transformers import pipeline | |
| import yaml | |
| # Charger la configuration | |
| with open('config/config.yaml', 'r') as f: | |
| config = yaml.safe_load(f) | |
| # Charger le mod�le fine-tuned | |
| model_name = "results_student" # Remplacer par le chemin vers le mod�le student | |
| tokenizer_name = "distilbert-base-uncased" | |
| # Configuration de l'�valuation | |
| bleu = load_metric("bleu") | |
| rouge = load_metric("rouge") | |
| # Initialiser wandb | |
| wandb.init(project=config['wandb']['project'], entity=config['wandb']['entity']) | |
| def evaluate_model(model_name, tokenizer_name): | |
| nlp = pipeline("text-classification", model=model_name, tokenizer=tokenizer_name) | |
| # Simuler des exemples pour l'�valuation | |
| examples = [ | |
| {"reference": "This is a great movie.", "candidate": "This is a fantastic movie."}, | |
| {"reference": "I love this film.", "candidate": "I enjoy this movie."} | |
| ] | |
| references = [e["reference"] for e in examples] | |
| candidates = [nlp(e["candidate"])[0]["label"] for e in examples] | |
| # Calcul des scores BLEU et ROUGE | |
| bleu_score = bleu.compute(predictions=candidates, references=references) | |
| rouge_score = rouge.compute(predictions=candidates, references=references) | |
| # Enregistrer les scores sur wandb | |
| wandb.log({ | |
| "bleu_score": bleu_score, | |
| "rouge_score": rouge_score | |
| }) | |
| # �valuer les mod�les | |
| evaluate_model(model_name, tokenizer_name) | |