|
from datasets import load_dataset |
|
from transformers import FastSpeechForConditionalGeneration, Trainer, TrainingArguments |
|
|
|
|
|
dataset = load_dataset("m6011/sada2022") |
|
najdi_data = dataset.filter(lambda example: example['SpeakerDialect'] == 'Najdi') |
|
|
|
|
|
model = FastSpeechForConditionalGeneration.from_pretrained("facebook/fastspeech2-en-ljspeech") |
|
|
|
|
|
training_args = TrainingArguments( |
|
output_dir="./results", |
|
per_device_train_batch_size=4, |
|
num_train_epochs=5, |
|
) |
|
|
|
trainer = Trainer( |
|
model=model, |
|
args=training_args, |
|
train_dataset=najdi_data['train'], |
|
eval_dataset=najdi_data['test'] |
|
) |
|
|
|
|
|
trainer.train() |
|
|