Upload text_utils.py with huggingface_hub
Browse files- text_utils.py +20 -25
text_utils.py
CHANGED
@@ -1,9 +1,10 @@
|
|
|
|
1 |
import re
|
|
|
2 |
|
3 |
|
4 |
def split_words(s):
|
5 |
-
"""
|
6 |
-
Splits a string into words based on PascalCase, camelCase, snake_case, kebab-case, and numbers attached to strings.
|
7 |
|
8 |
Args:
|
9 |
s (str): The string to be split.
|
@@ -19,13 +20,11 @@ def split_words(s):
|
|
19 |
s = re.sub(r"([a-zA-Z])(\d)", r"\1 \2", s)
|
20 |
s = re.sub(r"(\d)([a-zA-Z])", r"\1 \2", s)
|
21 |
# Split the string into words based on spaces
|
22 |
-
|
23 |
-
return words
|
24 |
|
25 |
|
26 |
def is_camel_case(s):
|
27 |
-
"""
|
28 |
-
Checks if a string is in camelCase.
|
29 |
|
30 |
Args:
|
31 |
s (str): The string to be checked.
|
@@ -37,8 +36,7 @@ def is_camel_case(s):
|
|
37 |
|
38 |
|
39 |
def is_snake_case(s):
|
40 |
-
"""
|
41 |
-
Checks if a string is in snake_case.
|
42 |
|
43 |
Args:
|
44 |
s (str): The string to be checked.
|
@@ -50,8 +48,7 @@ def is_snake_case(s):
|
|
50 |
|
51 |
|
52 |
def camel_to_snake_case(s):
|
53 |
-
"""
|
54 |
-
Converts a string from camelCase to snake_case.
|
55 |
|
56 |
Args:
|
57 |
s (str): The string to be converted.
|
@@ -65,16 +62,11 @@ def camel_to_snake_case(s):
|
|
65 |
# Ensure there's an underscore before any uppercase letter that's followed by a lowercase letter or digit and comes after a sequence of uppercase letters
|
66 |
s = re.sub(r"([A-Z]+)([A-Z][a-z0-9])", r"\1_\2", s)
|
67 |
|
68 |
-
|
69 |
-
return s
|
70 |
-
|
71 |
-
|
72 |
-
import shutil
|
73 |
|
74 |
|
75 |
def print_dict(d, indent=0, indent_delta=4, max_chars=None):
|
76 |
-
"""
|
77 |
-
Prints a dictionary in a formatted manner, taking into account the terminal width.
|
78 |
|
79 |
Args:
|
80 |
d (dict): The dictionary to be printed.
|
@@ -82,13 +74,15 @@ def print_dict(d, indent=0, indent_delta=4, max_chars=None):
|
|
82 |
indent_delta (int, optional): The amount of spaces to add for each level of indentation. Defaults to 4.
|
83 |
max_chars (int, optional): The maximum number of characters for each line. Defaults to terminal width - 10.
|
84 |
"""
|
85 |
-
max_chars =
|
|
|
|
|
86 |
indent_str = " " * indent
|
87 |
indent_delta_str = " " * indent_delta
|
88 |
|
89 |
for key, value in d.items():
|
90 |
if isinstance(value, dict):
|
91 |
-
|
92 |
print_dict(value, indent=indent + indent_delta, max_chars=max_chars)
|
93 |
else:
|
94 |
# Value is not a dict, print as a string
|
@@ -97,21 +91,22 @@ def print_dict(d, indent=0, indent_delta=4, max_chars=None):
|
|
97 |
line_width = max_chars - indent
|
98 |
# Split value by newline characters and handle each line separately
|
99 |
lines = str_value.split("\n")
|
100 |
-
|
101 |
for line in lines:
|
102 |
if len(line) + len(indent_str) + indent_delta > line_width:
|
103 |
# Split long lines into multiple lines
|
104 |
-
|
105 |
for i in range(line_width, len(line), line_width):
|
106 |
-
|
|
|
|
|
107 |
else:
|
108 |
-
|
109 |
key = "" # Empty the key for lines after the first one
|
110 |
|
111 |
|
112 |
def nested_tuple_to_string(nested_tuple: tuple) -> str:
|
113 |
-
"""
|
114 |
-
Converts a nested tuple to a string, with elements separated by underscores.
|
115 |
|
116 |
Args:
|
117 |
nested_tuple (tuple): The nested tuple to be converted.
|
|
|
1 |
+
import logging
|
2 |
import re
|
3 |
+
import shutil
|
4 |
|
5 |
|
6 |
def split_words(s):
|
7 |
+
"""Splits a string into words based on PascalCase, camelCase, snake_case, kebab-case, and numbers attached to strings.
|
|
|
8 |
|
9 |
Args:
|
10 |
s (str): The string to be split.
|
|
|
20 |
s = re.sub(r"([a-zA-Z])(\d)", r"\1 \2", s)
|
21 |
s = re.sub(r"(\d)([a-zA-Z])", r"\1 \2", s)
|
22 |
# Split the string into words based on spaces
|
23 |
+
return s.split()
|
|
|
24 |
|
25 |
|
26 |
def is_camel_case(s):
|
27 |
+
"""Checks if a string is in camelCase.
|
|
|
28 |
|
29 |
Args:
|
30 |
s (str): The string to be checked.
|
|
|
36 |
|
37 |
|
38 |
def is_snake_case(s):
|
39 |
+
"""Checks if a string is in snake_case.
|
|
|
40 |
|
41 |
Args:
|
42 |
s (str): The string to be checked.
|
|
|
48 |
|
49 |
|
50 |
def camel_to_snake_case(s):
|
51 |
+
"""Converts a string from camelCase to snake_case.
|
|
|
52 |
|
53 |
Args:
|
54 |
s (str): The string to be converted.
|
|
|
62 |
# Ensure there's an underscore before any uppercase letter that's followed by a lowercase letter or digit and comes after a sequence of uppercase letters
|
63 |
s = re.sub(r"([A-Z]+)([A-Z][a-z0-9])", r"\1_\2", s)
|
64 |
|
65 |
+
return s.lower()
|
|
|
|
|
|
|
|
|
66 |
|
67 |
|
68 |
def print_dict(d, indent=0, indent_delta=4, max_chars=None):
|
69 |
+
"""Prints a dictionary in a formatted manner, taking into account the terminal width.
|
|
|
70 |
|
71 |
Args:
|
72 |
d (dict): The dictionary to be printed.
|
|
|
74 |
indent_delta (int, optional): The amount of spaces to add for each level of indentation. Defaults to 4.
|
75 |
max_chars (int, optional): The maximum number of characters for each line. Defaults to terminal width - 10.
|
76 |
"""
|
77 |
+
max_chars = (
|
78 |
+
max_chars or shutil.get_terminal_size()[0] - 10
|
79 |
+
) # Get terminal size if max_chars not set
|
80 |
indent_str = " " * indent
|
81 |
indent_delta_str = " " * indent_delta
|
82 |
|
83 |
for key, value in d.items():
|
84 |
if isinstance(value, dict):
|
85 |
+
logging.info(f"{indent_str}{key}:")
|
86 |
print_dict(value, indent=indent + indent_delta, max_chars=max_chars)
|
87 |
else:
|
88 |
# Value is not a dict, print as a string
|
|
|
91 |
line_width = max_chars - indent
|
92 |
# Split value by newline characters and handle each line separately
|
93 |
lines = str_value.split("\n")
|
94 |
+
logging.info(f"{indent_str}{key} ({type(value).__name__}):")
|
95 |
for line in lines:
|
96 |
if len(line) + len(indent_str) + indent_delta > line_width:
|
97 |
# Split long lines into multiple lines
|
98 |
+
logging.info(f"{indent_str}{indent_delta_str}{line[:line_width]}")
|
99 |
for i in range(line_width, len(line), line_width):
|
100 |
+
logging.info(
|
101 |
+
f"{indent_str}{indent_delta_str}{line[i:i+line_width]}"
|
102 |
+
)
|
103 |
else:
|
104 |
+
logging.info(f"{indent_str}{indent_delta_str}{line}")
|
105 |
key = "" # Empty the key for lines after the first one
|
106 |
|
107 |
|
108 |
def nested_tuple_to_string(nested_tuple: tuple) -> str:
|
109 |
+
"""Converts a nested tuple to a string, with elements separated by underscores.
|
|
|
110 |
|
111 |
Args:
|
112 |
nested_tuple (tuple): The nested tuple to be converted.
|