Upload text_utils.py with huggingface_hub
Browse files- text_utils.py +7 -7
text_utils.py
CHANGED
@@ -12,12 +12,12 @@ def split_words(s):
|
|
12 |
list: The list of words obtained after splitting the string.
|
13 |
"""
|
14 |
# Split PascalCase or camelCase
|
15 |
-
s = re.sub("([A-Z][a-z]+)", r" \1", re.sub("([A-Z]+)", r" \1", s)).strip()
|
16 |
# Split snake_case or kebab-case
|
17 |
-
s = re.sub("[_-]", " ", s)
|
18 |
# Split numbers attached to strings
|
19 |
-
s = re.sub("([a-zA-Z])(\d)", r"\1 \2", s)
|
20 |
-
s = re.sub("(\d)([a-zA-Z])", r"\1 \2", s)
|
21 |
# Split the string into words based on spaces
|
22 |
words = s.split()
|
23 |
return words
|
@@ -60,10 +60,10 @@ def camel_to_snake_case(s):
|
|
60 |
str: The string converted to snake_case.
|
61 |
"""
|
62 |
# Add an underscore before every uppercase letter that is followed by a lowercase letter or digit and not preceded by an underscore, a hyphen or an uppercase letter
|
63 |
-
s = re.sub("(?<=[^A-Z_-])([A-Z])", r"_\1", s)
|
64 |
|
65 |
# Ensure there's an underscore before any uppercase letter that's followed by a lowercase letter or digit and comes after a sequence of uppercase letters
|
66 |
-
s = re.sub("([A-Z]+)([A-Z][a-z0-9])", r"\1_\2", s)
|
67 |
|
68 |
s = s.lower()
|
69 |
return s
|
@@ -125,4 +125,4 @@ def nested_tuple_to_string(nested_tuple: tuple) -> str:
|
|
125 |
result.append(nested_tuple_to_string(item))
|
126 |
else:
|
127 |
result.append(str(item))
|
128 |
-
return "_".join(result)
|
|
|
12 |
list: The list of words obtained after splitting the string.
|
13 |
"""
|
14 |
# Split PascalCase or camelCase
|
15 |
+
s = re.sub(r"([A-Z][a-z]+)", r" \1", re.sub(r"([A-Z]+)", r" \1", s)).strip()
|
16 |
# Split snake_case or kebab-case
|
17 |
+
s = re.sub(r"[_-]", " ", s)
|
18 |
# Split numbers attached to strings
|
19 |
+
s = re.sub(r"([a-zA-Z])(\d)", r"\1 \2", s)
|
20 |
+
s = re.sub(r"(\d)([a-zA-Z])", r"\1 \2", s)
|
21 |
# Split the string into words based on spaces
|
22 |
words = s.split()
|
23 |
return words
|
|
|
60 |
str: The string converted to snake_case.
|
61 |
"""
|
62 |
# Add an underscore before every uppercase letter that is followed by a lowercase letter or digit and not preceded by an underscore, a hyphen or an uppercase letter
|
63 |
+
s = re.sub(r"(?<=[^A-Z_-])([A-Z])", r"_\1", s)
|
64 |
|
65 |
# Ensure there's an underscore before any uppercase letter that's followed by a lowercase letter or digit and comes after a sequence of uppercase letters
|
66 |
+
s = re.sub(r"([A-Z]+)([A-Z][a-z0-9])", r"\1_\2", s)
|
67 |
|
68 |
s = s.lower()
|
69 |
return s
|
|
|
125 |
result.append(nested_tuple_to_string(item))
|
126 |
else:
|
127 |
result.append(str(item))
|
128 |
+
return "_".join(result)
|