peter szemraj commited on
Commit
ea23ea9
1 Parent(s): 7681a60

:bug: attempt to fix empty chars to clean()

Browse files
Files changed (3) hide show
  1. ai_single_response.py +3 -4
  2. app.py +1 -1
  3. utils.py +16 -0
ai_single_response.py CHANGED
@@ -16,8 +16,7 @@ import time
16
  import warnings
17
  from datetime import datetime
18
  from pathlib import Path
19
- from cleantext import clean
20
- from utils import print_spacer
21
 
22
  warnings.filterwarnings(action="ignore", message=".*gradient_checkpointing*")
23
 
@@ -34,8 +33,8 @@ def extract_response(full_resp: list, plist: list, verbose: bool = False):
34
 
35
  verbose (bool, optional): 4 debug. Defaults to False.
36
  """
37
- full_resp = [clean(ele) for ele in full_resp]
38
- plist = [clean(pr) for pr in plist]
39
  p_len = len(plist)
40
  assert (
41
  len(full_resp) >= p_len
 
16
  import warnings
17
  from datetime import datetime
18
  from pathlib import Path
19
+ from utils import print_spacer, cleantxt_wrap
 
20
 
21
  warnings.filterwarnings(action="ignore", message=".*gradient_checkpointing*")
22
 
 
33
 
34
  verbose (bool, optional): 4 debug. Defaults to False.
35
  """
36
+ full_resp = [cleantxt_wrap(ele) for ele in full_resp]
37
+ plist = [cleantxt_wrap(pr) for pr in plist]
38
  p_len = len(plist)
39
  assert (
40
  len(full_resp) >= p_len
app.py CHANGED
@@ -87,7 +87,7 @@ def ask_gpt(message: str):
87
  top_p=0.65,
88
  )
89
  uniques = remove_repeated_words(resp["out_text"])
90
- bot_resp = corr(remove_string_extras(uniques))
91
  rt = round(time.perf_counter() - st, 2)
92
  print(f"took {rt} sec to respond")
93
  return remove_trailing_punctuation(bot_resp)
 
87
  top_p=0.65,
88
  )
89
  uniques = remove_repeated_words(resp["out_text"])
90
+ bot_resp = corr((uniques))
91
  rt = round(time.perf_counter() - st, 2)
92
  print(f"took {rt} sec to respond")
93
  return remove_trailing_punctuation(bot_resp)
utils.py CHANGED
@@ -432,3 +432,19 @@ def split_sentences(text: str):
432
  [list]: [list of sentences]
433
  """
434
  return re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  [list]: [list of sentences]
433
  """
434
  return re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", text)
435
+
436
+ def cleantxt_wrap(ugly_text):
437
+ """
438
+ cleantxt_wrap - applies the clean function to a string.
439
+
440
+ Args:
441
+ ugly_text (str): [string to be cleaned]
442
+
443
+ Returns:
444
+ [str]: [cleaned string]
445
+ """
446
+ if isinstance(ugly_text, str) and len(ugly_text) > 0:
447
+ return clean(ugly_text)
448
+ else:
449
+ return ugly_text
450
+