Elron commited on
Commit
96fd200
·
1 Parent(s): cb31369

Upload text_utils.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. text_utils.py +7 -7
text_utils.py CHANGED
@@ -12,12 +12,12 @@ def split_words(s):
12
  list: The list of words obtained after splitting the string.
13
  """
14
  # Split PascalCase or camelCase
15
- s = re.sub("([A-Z][a-z]+)", r" \1", re.sub("([A-Z]+)", r" \1", s)).strip()
16
  # Split snake_case or kebab-case
17
- s = re.sub("[_-]", " ", s)
18
  # Split numbers attached to strings
19
- s = re.sub("([a-zA-Z])(\d)", r"\1 \2", s)
20
- s = re.sub("(\d)([a-zA-Z])", r"\1 \2", s)
21
  # Split the string into words based on spaces
22
  words = s.split()
23
  return words
@@ -60,10 +60,10 @@ def camel_to_snake_case(s):
60
  str: The string converted to snake_case.
61
  """
62
  # Add an underscore before every uppercase letter that is followed by a lowercase letter or digit and not preceded by an underscore, a hyphen or an uppercase letter
63
- s = re.sub("(?<=[^A-Z_-])([A-Z])", r"_\1", s)
64
 
65
  # Ensure there's an underscore before any uppercase letter that's followed by a lowercase letter or digit and comes after a sequence of uppercase letters
66
- s = re.sub("([A-Z]+)([A-Z][a-z0-9])", r"\1_\2", s)
67
 
68
  s = s.lower()
69
  return s
@@ -125,4 +125,4 @@ def nested_tuple_to_string(nested_tuple: tuple) -> str:
125
  result.append(nested_tuple_to_string(item))
126
  else:
127
  result.append(str(item))
128
- return "_".join(result)
 
12
  list: The list of words obtained after splitting the string.
13
  """
14
  # Split PascalCase or camelCase
15
+ s = re.sub(r"([A-Z][a-z]+)", r" \1", re.sub(r"([A-Z]+)", r" \1", s)).strip()
16
  # Split snake_case or kebab-case
17
+ s = re.sub(r"[_-]", " ", s)
18
  # Split numbers attached to strings
19
+ s = re.sub(r"([a-zA-Z])(\d)", r"\1 \2", s)
20
+ s = re.sub(r"(\d)([a-zA-Z])", r"\1 \2", s)
21
  # Split the string into words based on spaces
22
  words = s.split()
23
  return words
 
60
  str: The string converted to snake_case.
61
  """
62
  # Add an underscore before every uppercase letter that is followed by a lowercase letter or digit and not preceded by an underscore, a hyphen or an uppercase letter
63
+ s = re.sub(r"(?<=[^A-Z_-])([A-Z])", r"_\1", s)
64
 
65
  # Ensure there's an underscore before any uppercase letter that's followed by a lowercase letter or digit and comes after a sequence of uppercase letters
66
+ s = re.sub(r"([A-Z]+)([A-Z][a-z0-9])", r"\1_\2", s)
67
 
68
  s = s.lower()
69
  return s
 
125
  result.append(nested_tuple_to_string(item))
126
  else:
127
  result.append(str(item))
128
+ return "_".join(result)