Short-Answer-Feedback
/

mbart-finetuned-saf-legal-domain

Text2Text Generation

Generated from Trainer

Inference Endpoints

Model card Files Files and versions Community

JohnnyBoy00 commited on Dec 12, 2022

Commit

0aa0322

•

1 Parent(s): 028cc28

Upload preprocessing.py

Files changed (1) hide show

preprocessing.py +24 -0

preprocessing.py ADDED Viewed

	@@ -0,0 +1,24 @@

+MAX_INPUT_LENGTH = 256
+MAX_TARGET_LENGTH = 128
+def preprocess_function(examples):
+    """
+    Preprocess entries of the given dataset (should be used with a `map` function)
+    Params:
+        examples (Dataset): dataset to be preprocessed
+    Returns:
+        model_inputs (BatchEncoding): tokenized dataset entries
+    """
+    inputs, targets = [], []
+    for i in range(len(examples['question'])):
+        inputs.append(f"Antwort: {examples['provided_answer'][i]} Lösung: {examples['reference_answer'][i]} Frage: {examples['question'][i]}")
+        targets.append(f"{examples['verification_feedback'][i]} Feedback: {examples['answer_feedback'][i]}")
+    # apply tokenization to inputs and labels
+    model_inputs = tokenizer(inputs, max_length=MAX_INPUT_LENGTH, padding='max_length', truncation=True)
+    labels = tokenizer(text_target=targets, max_length=MAX_TARGET_LENGTH, padding='max_length', truncation=True)
+    model_inputs['labels'] = labels['input_ids']
+    return model_inputs