Elron commited on
Commit
c9ad8e1
1 Parent(s): 8afaaba

Upload string_operators.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. string_operators.py +35 -0
string_operators.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from typing import List
2
 
3
  from .operators import FieldOperator
@@ -10,8 +11,42 @@ class Split(FieldOperator):
10
  return value.split(self.by)
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  class Join(FieldOperator):
14
  by: str
15
 
16
  def process_value(self, value: List[str]) -> str:
17
  return self.by.join(value)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
  from typing import List
3
 
4
  from .operators import FieldOperator
 
11
  return value.split(self.by)
12
 
13
 
14
+ class RegexSplit(FieldOperator):
15
+ by: str
16
+
17
+ def process_value(self, value: str) -> List[str]:
18
+ return re.split(self.by, value)
19
+
20
+
21
+ class TokensSplit(FieldOperator):
22
+ model: str
23
+ _requirements_list = ["transformers"]
24
+
25
+ def prepare(self):
26
+ super().prepare()
27
+ from transformers import AutoTokenizer
28
+
29
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model)
30
+
31
+ def process_value(self, value: str) -> List[str]:
32
+ return self.tokenizer.tokenize(value)
33
+
34
+
35
  class Join(FieldOperator):
36
  by: str
37
 
38
  def process_value(self, value: List[str]) -> str:
39
  return self.by.join(value)
40
+
41
+
42
+ class Strip(FieldOperator):
43
+ def process_value(self, value: str) -> str:
44
+ return value.strip()
45
+
46
+
47
+ class Replace(FieldOperator):
48
+ old: str
49
+ new: str
50
+
51
+ def process_value(self, value: str) -> str:
52
+ return value.replace(self.old, self.new)