Elron commited on
Commit
147cebb
·
1 Parent(s): 2fb2317

Upload text_utils.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. text_utils.py +163 -0
text_utils.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ def split_words(s):
5
+ # Split PascalCase or camelCase
6
+ s = re.sub("([A-Z][a-z]+)", r" \1", re.sub("([A-Z]+)", r" \1", s)).strip()
7
+ # Split snake_case or kebab-case
8
+ s = re.sub("[_-]", " ", s)
9
+ # Split numbers attached to strings
10
+ s = re.sub("([a-zA-Z])(\d)", r"\1 \2", s)
11
+ s = re.sub("(\d)([a-zA-Z])", r"\1 \2", s)
12
+ # Split the string into words based on spaces
13
+ words = s.split()
14
+ return words
15
+
16
+
17
+ def is_camel_case(s):
18
+ # The string must start with an uppercase letter, followed by zero or more sequences of an uppercase letter followed by zero or more lowercase letters.
19
+ return re.match(r"^[A-Z]+([a-z0-9]*[A-Z]*[a-z0-9]*)*$", s) is not None
20
+
21
+
22
+ def is_snake_case(s):
23
+ # The string must start with a lowercase letter, followed by zero or more sequences of an underscore followed by one or more lowercase letters.
24
+ return re.match(r"^[a-z0-9]+(_[a-z0-9]+)*$", s) is not None
25
+
26
+
27
+ def camel_to_snake_case(s):
28
+ # Add an underscore before every uppercase letter that is followed by a lowercase letter or digit and not preceded by an underscore, a hyphen or an uppercase letter
29
+ s = re.sub("(?<=[^A-Z_-])([A-Z])", r"_\1", s)
30
+
31
+ # Ensure there's an underscore before any uppercase letter that's followed by a lowercase letter or digit and comes after a sequence of uppercase letters
32
+ s = re.sub("([A-Z]+)([A-Z][a-z0-9])", r"\1_\2", s)
33
+
34
+ s = s.lower()
35
+ return s
36
+
37
+
38
+ import shutil
39
+
40
+
41
+ def print_dict(d, indent=0, indent_delta=4, max_chars=None):
42
+ max_chars = max_chars or shutil.get_terminal_size()[0] - 10 # Get terminal size if max_chars not set
43
+ indent_str = " " * indent
44
+ indent_delta_str = " " * indent_delta
45
+
46
+ for key, value in d.items():
47
+ if isinstance(value, dict):
48
+ print(f"{indent_str}{key}:")
49
+ print_dict(value, indent=indent + indent_delta, max_chars=max_chars)
50
+ else:
51
+ # Value is not a dict, print as a string
52
+ str_value = str(value)
53
+
54
+ line_width = max_chars - indent
55
+ # Split value by newline characters and handle each line separately
56
+ lines = str_value.split("\n")
57
+ print(f"{indent_str}{key}:")
58
+ for line in lines:
59
+ if len(line) + len(indent_str) + indent_delta > line_width:
60
+ # Split long lines into multiple lines
61
+ print(f"{indent_str}{indent_delta_str}{line[:line_width]}")
62
+ for i in range(line_width, len(line), line_width):
63
+ print(f"{indent_str}{indent_delta_str}{line[i:i+line_width]}")
64
+ else:
65
+ print(f"{indent_str}{indent_delta_str}{line}")
66
+ key = "" # Empty the key for lines after the first one
67
+
68
+
69
+ def nested_tuple_to_string(nested_tuple: tuple) -> str:
70
+ result = []
71
+ for item in nested_tuple:
72
+ if isinstance(item, tuple):
73
+ result.append(nested_tuple_to_string(item))
74
+ else:
75
+ result.append(str(item))
76
+ return "_".join(result)
77
+
78
+
79
+ if __name__ == "__main__":
80
+ # Define test cases
81
+ test_cases = [
82
+ ("example1", ["example", "1"]),
83
+ ("exampleOne", ["example", "One"]),
84
+ ("123example456", ["123", "example", "456"]),
85
+ ("happyDay", ["happy", "Day"]),
86
+ ("thisIsATest", ["this", "Is", "A", "Test"]),
87
+ ("TestAI2023", ["Test", "AI", "2023"]),
88
+ ("stringWith1Number", ["string", "With", "1", "Number"]),
89
+ ("camelCaseExample", ["camel", "Case", "Example"]),
90
+ ("snake_case_example", ["snake", "case", "example"]),
91
+ ("snake_case2example3", ["snake", "case", "2", "example", "3"]),
92
+ ("kebab-case-example", ["kebab", "case", "example"]),
93
+ ("kebab-case2example3", ["kebab", "case", "2", "example", "3"]),
94
+ ("PascalCaseExample", ["Pascal", "Case", "Example"]),
95
+ ("Title Case Example", ["Title", "Case", "Example"]),
96
+ ("Mixed1Example_case", ["Mixed", "1", "Example", "case"]),
97
+ ("Mixed2Example-case", ["Mixed", "2", "Example", "case"]),
98
+ ("Mixed3_Example-case", ["Mixed", "3", "Example", "case"]),
99
+ ("UPPERCASEEXAMPLE", ["UPPERCASEEXAMPLE"]),
100
+ ("lowercaseexample", ["lowercaseexample"]),
101
+ ("mixedUPanddown", ["mixed", "U", "Panddown"]),
102
+ ]
103
+
104
+ # Loop through test cases
105
+ for i, (input_string, expected_output) in enumerate(test_cases, 1):
106
+ # Apply function and check result
107
+ if split_words(input_string) != expected_output:
108
+ print(f"Failed on example {i}: {input_string}")
109
+ print(f"Expected: {expected_output}, but got: {split_words(input_string)}\n")
110
+
111
+ is_camel_case_test_cases = [
112
+ ("isCamelCase", False),
113
+ ("notCamelCase", False),
114
+ ("camelCase", False),
115
+ ("Notcamelcase", True),
116
+ ("camel_Case", False),
117
+ ("camelCase123", False),
118
+ ("camelcase", False),
119
+ ("CAMELCASE", True),
120
+ ("camel-case", False),
121
+ ("HFLoader", True),
122
+ ]
123
+
124
+ for input_string, expected_output in is_camel_case_test_cases:
125
+ if is_camel_case(input_string) != expected_output:
126
+ print(f"Failed on is_camel_case: {input_string}")
127
+ print(f"Expected: {expected_output}, but got: {is_camel_case(input_string)}\n")
128
+
129
+ is_snake_case_test_cases = [
130
+ ("is_snake_case", True),
131
+ ("Not_snake_case", False),
132
+ ("snake_case", True),
133
+ ("snake_Case", False),
134
+ ("Snakecase", False),
135
+ ("snake-case", False),
136
+ ("snake_case123", True),
137
+ ("123snake_case", True),
138
+ ("snakecase", True),
139
+ ]
140
+
141
+ for input_string, expected_output in is_snake_case_test_cases:
142
+ if is_snake_case(input_string) != expected_output:
143
+ print(f"Failed on is_snake_case: {input_string}")
144
+ print(f"Expected: {expected_output}, but got: {is_snake_case(input_string)}\n")
145
+
146
+ camel_to_snake_case_test_cases = [
147
+ ("camelToSnake", "camel_to_snake"),
148
+ ("CamelToSnake", "camel_to_snake"),
149
+ ("CamelToSnakeCase", "camel_to_snake_case"),
150
+ ("camelToSnakeCase123", "camel_to_snake_case123"),
151
+ ("123CamelToSnakeCase", "123_camel_to_snake_case"),
152
+ ("camelTo_Snake_Case", "camel_to__snake__case"),
153
+ ("camelTo-Snake-Case", "camel_to-_snake-_case"),
154
+ ("camelToSnakeCASE", "camel_to_snake_case"),
155
+ ("CAMELToSnakeCase", "camel_to_snake_case"),
156
+ ("camelToSNAKECase", "camel_to_snake_case"),
157
+ ("HFLoader", "hf_loader"),
158
+ ]
159
+
160
+ for input_string, expected_output in camel_to_snake_case_test_cases:
161
+ if camel_to_snake_case(input_string) != expected_output:
162
+ print(f"Failed on camel_to_snake_case: {input_string}")
163
+ print(f"Expected: {expected_output}, but got: {camel_to_snake_case(input_string)}\n")