m-ric HF staff commited on
Commit
31e3c37
β€’
1 Parent(s): 83f2644

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -8
app.py CHANGED
@@ -17,9 +17,8 @@ def length_tokens(txt):
17
 
18
 
19
  def extract_separators_from_string(separators_str):
20
- print('Received:', type(separators_str), 'with value', repr(separators_str))
21
  try:
22
- separators_str = separators_str.replace("\\n", "\n").replace("\\t", "\t") # fix special characters
23
  separators = separators_str[1:-1].split(", ")
24
  return [separator.replace('"', "").replace("'", "") for separator in separators]
25
  except Exception as e:
@@ -47,7 +46,6 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
47
  separator=" ",
48
  )
49
  elif splitter_selection == LABEL_RECURSIVE:
50
- print('Splitting with separators:', ',,'.join([repr(el) for el in separators]), f',and chunk length {length} and chunk overlap {chunk_overlap}')
51
  text_splitter = RecursiveCharacterTextSplitter(
52
  chunk_size=length,
53
  chunk_overlap=int(chunk_overlap),
@@ -55,14 +53,9 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
55
  strip_whitespace=False,
56
  separators=separators,
57
  )
58
- print(text_splitter._separators)
59
  splits = text_splitter.create_documents([text])
60
  text_splits = [split.page_content for split in splits]
61
- print('I did splits:')
62
- print(text_splits)
63
-
64
  unoverlapped_text_splits = unoverlap_list(text_splits)
65
-
66
  output = [((split[0], 'Overlap') if split[1] else (split[0], f"Chunk {str(i)}")) for i, split in enumerate(unoverlapped_text_splits)]
67
  return output
68
 
 
17
 
18
 
19
  def extract_separators_from_string(separators_str):
 
20
  try:
21
+ separators_str = separators_str.replace("\\n", "\n").replace("\\t", "\t").replace("\\\\", "\\") # fix special characters
22
  separators = separators_str[1:-1].split(", ")
23
  return [separator.replace('"', "").replace("'", "") for separator in separators]
24
  except Exception as e:
 
46
  separator=" ",
47
  )
48
  elif splitter_selection == LABEL_RECURSIVE:
 
49
  text_splitter = RecursiveCharacterTextSplitter(
50
  chunk_size=length,
51
  chunk_overlap=int(chunk_overlap),
 
53
  strip_whitespace=False,
54
  separators=separators,
55
  )
 
56
  splits = text_splitter.create_documents([text])
57
  text_splits = [split.page_content for split in splits]
 
 
 
58
  unoverlapped_text_splits = unoverlap_list(text_splits)
 
59
  output = [((split[0], 'Overlap') if split[1] else (split[0], f"Chunk {str(i)}")) for i, split in enumerate(unoverlapped_text_splits)]
60
  return output
61