|
"""Finetuning example. |
|
|
|
Trains the torchMoji model on the SemEval emotion dataset, using the 'last' |
|
finetuning method and the class average F1 metric. |
|
|
|
The 'last' method does the following: |
|
0) Load all weights except for the softmax layer. Do not add tokens to the |
|
vocabulary and do not extend the embedding layer. |
|
1) Freeze all layers except for the softmax layer. |
|
2) Train. |
|
|
|
The class average F1 metric does the following: |
|
1) For each class, relabel the dataset into binary classification |
|
(belongs to/does not belong to this class). |
|
2) Calculate F1 score for each class. |
|
3) Compute the average of all F1 scores. |
|
""" |
|
|
|
from __future__ import print_function |
|
import example_helper |
|
import json |
|
from torchmoji.finetuning import load_benchmark |
|
from torchmoji.class_avg_finetuning import class_avg_finetune |
|
from torchmoji.model_def import torchmoji_transfer |
|
from torchmoji.global_variables import PRETRAINED_PATH |
|
|
|
DATASET_PATH = '../data/SE0714/raw.pickle' |
|
nb_classes = 3 |
|
|
|
with open('../model/vocabulary.json', 'r') as f: |
|
vocab = json.load(f) |
|
|
|
|
|
|
|
|
|
data = load_benchmark(DATASET_PATH, vocab, extend_with=10000) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = torchmoji_transfer(2, PRETRAINED_PATH, extend_embedding=data['added']) |
|
print(model) |
|
|
|
|
|
model, f1 = class_avg_finetune(model, data['texts'], data['labels'], |
|
nb_classes, data['batch_size'], method='last') |
|
print('F1: {}'.format(f1)) |
|
|