metric / processors.py
Elron's picture
Upload processors.py with huggingface_hub
c22c8aa
raw
history blame
1.7 kB
import json
import re
from typing import Any
from .operator import BaseFieldOperator
class ToString(BaseFieldOperator):
def process(self, instance):
return str(instance)
class ToStringStripped(BaseFieldOperator):
def process(self, instance):
return str(instance).strip()
class ToListByComma(BaseFieldOperator):
def process(self, instance):
output = [x.strip() for x in instance.split(",")]
return output
class RegexParser(BaseFieldOperator):
"""
A processor that uses regex in order to parse a string.
"""
regex: str
termination_regex: str = None
def process(self, text):
if self.termination_regex is not None and re.fullmatch(self.termination_regex, text):
return []
matches = re.findall(self.regex, text)
return matches
class LoadJson(BaseFieldOperator):
def process(self, text):
try:
return json.loads(text)
except json.JSONDecodeError:
return []
class ListToEmptyEntitiesTuples(BaseFieldOperator):
def process(self, lst):
try:
return [(str(item), "") for item in lst]
except json.JSONDecodeError:
return []
class DictOfListsToPairs(BaseFieldOperator):
position_key_before_value: bool = True
def process(self, obj):
try:
result = []
for key, values in obj.items():
for value in values:
assert isinstance(value, str)
pair = (key, value) if self.position_key_before_value else (value, key)
result.append(pair)
return result
except:
return []