m-ric HF staff commited on
Commit
5de0055
β€’
1 Parent(s): e8e9bc6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -4,6 +4,7 @@ from langchain.text_splitter import (
4
  RecursiveCharacterTextSplitter,
5
  )
6
  from transformers import AutoTokenizer
 
7
 
8
  LABEL_TEXTSPLITTER = "πŸ¦œπŸ”— LangChain's CharacterTextSplitter"
9
  LABEL_RECURSIVE = "πŸ¦œπŸ”— LangChain's RecursiveCharacterTextSplitter"
@@ -19,14 +20,12 @@ def extract_separators_from_string(separators_str):
19
  separators = separators_str[1:-1].split(", ")
20
  return [separator.replace('"', "").replace("'", "") for separator in separators]
21
  except Exception as e:
22
- print(e)
23
  raise gr.Error(f"""
24
- Did not succeed in extracting seperators from string: {separator_str}.
25
  Please type it in the correct format: "['separator_1', 'separator_2', etc]"
26
  """)
27
 
28
  def change_split_selection(text, slider_count, split_selection, separator_selection, length_unit_selection):
29
- print("Updating separator selection interactivity:")
30
  return (
31
  gr.Textbox.update(visible=(split_selection==LABEL_RECURSIVE)),
32
  chunk(text, slider_count, split_selection, separator_selection, length_unit_selection)
@@ -34,7 +33,6 @@ def change_split_selection(text, slider_count, split_selection, separator_select
34
 
35
  def chunk(text, length, splitter_selection, separators_str, length_unit_selection):
36
  separators = extract_separators_from_string(separators_str)
37
- print(splitter_selection, length_unit_selection.lower())
38
  length_function = (length_tokens if "token" in length_unit_selection.lower() else len)
39
  if splitter_selection == LABEL_TEXTSPLITTER:
40
  text_splitter = CharacterTextSplitter(
 
4
  RecursiveCharacterTextSplitter,
5
  )
6
  from transformers import AutoTokenizer
7
+ from overlap import unoverlap_list
8
 
9
  LABEL_TEXTSPLITTER = "πŸ¦œπŸ”— LangChain's CharacterTextSplitter"
10
  LABEL_RECURSIVE = "πŸ¦œπŸ”— LangChain's RecursiveCharacterTextSplitter"
 
20
  separators = separators_str[1:-1].split(", ")
21
  return [separator.replace('"', "").replace("'", "") for separator in separators]
22
  except Exception as e:
 
23
  raise gr.Error(f"""
24
+ Did not succeed in extracting seperators from string: {separator_str} due to: {str(e)}.
25
  Please type it in the correct format: "['separator_1', 'separator_2', etc]"
26
  """)
27
 
28
  def change_split_selection(text, slider_count, split_selection, separator_selection, length_unit_selection):
 
29
  return (
30
  gr.Textbox.update(visible=(split_selection==LABEL_RECURSIVE)),
31
  chunk(text, slider_count, split_selection, separator_selection, length_unit_selection)
 
33
 
34
  def chunk(text, length, splitter_selection, separators_str, length_unit_selection):
35
  separators = extract_separators_from_string(separators_str)
 
36
  length_function = (length_tokens if "token" in length_unit_selection.lower() else len)
37
  if splitter_selection == LABEL_TEXTSPLITTER:
38
  text_splitter = CharacterTextSplitter(