Elron commited on
Commit
6489550
·
verified ·
1 Parent(s): 8e5aa19

Upload processors.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. processors.py +17 -4
processors.py CHANGED
@@ -15,9 +15,20 @@ class ToStringStripped(FieldOperator):
15
  return str(text).strip()
16
 
17
 
18
- class ToListByComma(FieldOperator):
 
 
 
19
  def process_value(self, text: Any) -> Any:
20
- return [x.strip() for x in text.split(",")]
 
 
 
 
 
 
 
 
21
 
22
 
23
  class RegexParser(FieldOperator):
@@ -111,7 +122,7 @@ class FirstCharacter(FieldOperator):
111
 
112
  class TakeFirstWord(FieldOperator):
113
  def process_value(self, text: Any) -> Any:
114
- match = re.search(r"[\w]+", text)
115
  if match:
116
  return text[match.start() : match.end()]
117
  return ""
@@ -121,7 +132,9 @@ class YesNoToInt(FieldOperator):
121
  def process_value(self, text: Any) -> Any:
122
  if text == "yes":
123
  return "1"
124
- return "0"
 
 
125
 
126
 
127
  class ToYesOrNone(FieldOperator):
 
15
  return str(text).strip()
16
 
17
 
18
+ class Split(FieldOperator):
19
+ delimiter: str = " "
20
+ strip_every_element: bool = False
21
+
22
  def process_value(self, text: Any) -> Any:
23
+ return [
24
+ x.strip() if self.strip_every_element else x
25
+ for x in text.split(self.delimiter)
26
+ ]
27
+
28
+
29
+ class ToListByComma(Split):
30
+ delimiter = ","
31
+ strip_every_element = True
32
 
33
 
34
  class RegexParser(FieldOperator):
 
122
 
123
  class TakeFirstWord(FieldOperator):
124
  def process_value(self, text: Any) -> Any:
125
+ match = re.search(r"([-]*[0-9]+(\.([0-9]+))*)|([\w]+)", text)
126
  if match:
127
  return text[match.start() : match.end()]
128
  return ""
 
132
  def process_value(self, text: Any) -> Any:
133
  if text == "yes":
134
  return "1"
135
+ if text == "no":
136
+ return "0"
137
+ return text
138
 
139
 
140
  class ToYesOrNone(FieldOperator):