LabSE training error

#1
by ryjovsky - opened

Hi I'm trying to train Labse for Question Answering using squad dataset. I'm using also huggingface tutorial on Question Answering and i got this error. Could you tell me what I am doing wrong or what should I correct. I'm new and i'm stuck

from datasets import load_dataset
raw_datasets = load_dataset("squad", split='train')

from transformers import BertTokenizerFast, BertModel
from transformers import AutoTokenizer

model_checkpoint = "setu4993/LaBSE"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = BertModel.from_pretrained(model_checkpoint)

max_length = 384
stride = 128

def preprocess_training_examples(examples):
questions = [q.strip() for q in examples["question"]]
inputs = tokenizer(
questions,
examples["context"],
max_length=max_length,
truncation="only_second",
stride=stride,
return_overflowing_tokens=True,
return_offsets_mapping=True,
padding="max_length",
)

   offset_mapping = inputs.pop("offset_mapping")
   sample_map = inputs.pop("overflow_to_sample_mapping")
   answers = examples["answers"]
   start_positions = []
   end_positions = []

   for i, offset in enumerate(offset_mapping):
       sample_idx = sample_map[i]
       answer = answers[sample_idx]
       start_char = answer["answer_start"][0]
       end_char = answer["answer_start"][0] + len(answer["text"][0])
       sequence_ids = inputs.sequence_ids(i)

      # Find the start and end of the context
       idx = 0
       while sequence_ids[idx] != 1:
           idx += 1
       context_start = idx
       while sequence_ids[idx] == 1:
           idx += 1
       context_end = idx - 1

    # If the answer is not fully inside the context, label is (0, 0)
       if offset[context_start][0] > start_char or offset[context_end][1] < end_char:
           start_positions.append(0)
           end_positions.append(0)
       else:
           # Otherwise it's the start and end token positions
           idx = context_start
           while idx <= context_end and offset[idx][0] <= start_char:
               idx += 1
           start_positions.append(idx - 1)

           idx = context_end
           while idx >= context_start and offset[idx][1] >= end_char:
               idx -= 1
           end_positions.append(idx + 1)

   inputs["start_positions"] = start_positions
   inputs["end_positions"] = end_positions
   return inputs

train_dataset = raw_datasets.map(
preprocess_training_examples,
batched=True,
remove_columns=raw_datasets.column_names,
)
len(raw_datasets), len(train_dataset)

from transformers import TrainingArguments

args = TrainingArguments(
"bert-finetuned-squad",
save_strategy="epoch",
learning_rate=2e-5,
num_train_epochs=3,
weight_decay=0.01,
)

from transformers import DataCollatorForLanguageModeling
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer)

from transformers import Trainer

trainer = Trainer(
    model=model,
    args=args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    tokenizer=tokenizer,

)
trainer.train()

TypeError Traceback (most recent call last)
in ()
10 tokenizer=tokenizer,
11 )
---> 12 trainer.train()

4 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []

TypeError: forward() got an unexpected keyword argument 'labels'

This comment has been hidden

Sign up or log in to comment