""" Finetuning example. """ from __future__ import print_function import sys import numpy as np from os.path import abspath, dirname sys.path.insert(0, dirname(dirname(abspath(__file__)))) import json import math from torchmoji.model_def import torchmoji_transfer from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH from torchmoji.finetuning import ( load_benchmark, finetune) from torchmoji.class_avg_finetuning import class_avg_finetune def roundup(x): return int(math.ceil(x / 10.0)) * 10 # Format: (dataset_name, # path_to_dataset, # nb_classes, # use_f1_score) DATASETS = [ #('SE0714', '../data/SE0714/raw.pickle', 3, True), #('Olympic', '../data/Olympic/raw.pickle', 4, True), #('PsychExp', '../data/PsychExp/raw.pickle', 7, True), #('SS-Twitter', '../data/SS-Twitter/raw.pickle', 2, False), ('SS-Youtube', '../data/SS-Youtube/raw.pickle', 2, False), #('SE1604', '../data/SE1604/raw.pickle', 3, False), # Excluded due to Twitter's ToS #('SCv1', '../data/SCv1/raw.pickle', 2, True), #('SCv2-GEN', '../data/SCv2-GEN/raw.pickle', 2, True) ] RESULTS_DIR = 'results' # 'new' | 'last' | 'full' | 'chain-thaw' FINETUNE_METHOD = 'last' VERBOSE = 1 nb_tokens = 50000 nb_epochs = 1000 epoch_size = 1000 with open(VOCAB_PATH, 'r') as f: vocab = json.load(f) for rerun_iter in range(5): for p in DATASETS: # debugging assert len(vocab) == nb_tokens dset = p[0] path = p[1] nb_classes = p[2] use_f1_score = p[3] if FINETUNE_METHOD == 'last': extend_with = 0 elif FINETUNE_METHOD in ['new', 'full', 'chain-thaw']: extend_with = 10000 else: raise ValueError('Finetuning method not recognised!') # Load dataset. data = load_benchmark(path, vocab, extend_with=extend_with) (X_train, y_train) = (data['texts'][0], data['labels'][0]) (X_val, y_val) = (data['texts'][1], data['labels'][1]) (X_test, y_test) = (data['texts'][2], data['labels'][2]) weight_path = PRETRAINED_PATH if FINETUNE_METHOD != 'new' else None nb_model_classes = 2 if use_f1_score else nb_classes model = torchmoji_transfer( nb_model_classes, weight_path, extend_embedding=data['added']) print(model) # Training print('Training: {}'.format(path)) if use_f1_score: model, result = class_avg_finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], FINETUNE_METHOD, verbose=VERBOSE) else: model, result = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], FINETUNE_METHOD, metric='acc', verbose=VERBOSE) # Write results if use_f1_score: print('Overall F1 score (dset = {}): {}'.format(dset, result)) with open('{}/{}_{}_{}_results.txt'. format(RESULTS_DIR, dset, FINETUNE_METHOD, rerun_iter), "w") as f: f.write("F1: {}\n".format(result)) else: print('Test accuracy (dset = {}): {}'.format(dset, result)) with open('{}/{}_{}_{}_results.txt'. format(RESULTS_DIR, dset, FINETUNE_METHOD, rerun_iter), "w") as f: f.write("Acc: {}\n".format(result))