File size: 2,769 Bytes
e8425dc 2746bef f2336e3 2636a15 f2336e3 2636a15 c22c8aa c60c34e 5fb7e94 c60c34e 2746bef 5fb7e94 2746bef e8425dc 2746bef 5fb7e94 e8425dc 5fb7e94 2746bef e8425dc 5fb7e94 e8425dc 26abcb4 5fb7e94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import json
import re
from .operator import BaseFieldOperator
class ToString(BaseFieldOperator):
def process(self, instance):
return str(instance)
class ToStringStripped(BaseFieldOperator):
def process(self, instance):
return str(instance).strip()
class ToListByComma(BaseFieldOperator):
def process(self, instance):
return [x.strip() for x in instance.split(",")]
class RegexParser(BaseFieldOperator):
"""A processor that uses regex in order to parse a string."""
regex: str
termination_regex: str = None
def process(self, text):
if self.termination_regex is not None and re.fullmatch(
self.termination_regex, text
):
return []
return re.findall(self.regex, text)
class LoadJson(BaseFieldOperator):
def process(self, text):
try:
return json.loads(text)
except json.JSONDecodeError:
return []
class ListToEmptyEntitiesTuples(BaseFieldOperator):
def process(self, lst):
try:
return [(str(item), "") for item in lst]
except json.JSONDecodeError:
return []
class DictOfListsToPairs(BaseFieldOperator):
position_key_before_value: bool = True
def process(self, obj):
try:
result = []
for key, values in obj.items():
for value in values:
assert isinstance(value, str)
pair = (
(key, value) if self.position_key_before_value else (value, key)
)
result.append(pair)
return result
except:
return []
class TakeFirstNonEmptyLine(BaseFieldOperator):
def process(self, instance):
splitted = str(instance).strip().split("\n")
if len(splitted) == 0:
return ""
return splitted[0].strip()
class LowerCaseTillPunc(BaseFieldOperator):
def process(self, instance):
non_empty_line = instance.lower()
match = re.search(r"[.,!?;]", non_empty_line)
if match:
# Extract text up to the first punctuation
non_empty_line = non_empty_line[: match.start()]
return non_empty_line
class FirstCharacter(BaseFieldOperator):
def process(self, instance):
match = re.search(r"\s*(\w)", instance)
if match:
return match.groups(0)[0]
return ""
class StringOrNotString(BaseFieldOperator):
string: str
def process(self, instance):
if "not " + self.string.lower() in instance.lower():
return "not " + self.string.lower()
if self.string.lower() in instance.lower():
return self.string.lower()
return instance
|