metric / string_operators.py
Elron's picture
Upload string_operators.py with huggingface_hub
c9ad8e1 verified
raw
history blame
No virus
1.1 kB
import re
from typing import List
from .operators import FieldOperator
class Split(FieldOperator):
by: str
def process_value(self, value: str) -> List[str]:
return value.split(self.by)
class RegexSplit(FieldOperator):
by: str
def process_value(self, value: str) -> List[str]:
return re.split(self.by, value)
class TokensSplit(FieldOperator):
model: str
_requirements_list = ["transformers"]
def prepare(self):
super().prepare()
from transformers import AutoTokenizer
self.tokenizer = AutoTokenizer.from_pretrained(self.model)
def process_value(self, value: str) -> List[str]:
return self.tokenizer.tokenize(value)
class Join(FieldOperator):
by: str
def process_value(self, value: List[str]) -> str:
return self.by.join(value)
class Strip(FieldOperator):
def process_value(self, value: str) -> str:
return value.strip()
class Replace(FieldOperator):
old: str
new: str
def process_value(self, value: str) -> str:
return value.replace(self.old, self.new)