Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ from langchain.text_splitter import (
|
|
4 |
RecursiveCharacterTextSplitter,
|
5 |
)
|
6 |
from transformers import AutoTokenizer
|
|
|
7 |
|
8 |
LABEL_TEXTSPLITTER = "π¦π LangChain's CharacterTextSplitter"
|
9 |
LABEL_RECURSIVE = "π¦π LangChain's RecursiveCharacterTextSplitter"
|
@@ -19,14 +20,12 @@ def extract_separators_from_string(separators_str):
|
|
19 |
separators = separators_str[1:-1].split(", ")
|
20 |
return [separator.replace('"', "").replace("'", "") for separator in separators]
|
21 |
except Exception as e:
|
22 |
-
print(e)
|
23 |
raise gr.Error(f"""
|
24 |
-
Did not succeed in extracting seperators from string: {separator_str}.
|
25 |
Please type it in the correct format: "['separator_1', 'separator_2', etc]"
|
26 |
""")
|
27 |
|
28 |
def change_split_selection(text, slider_count, split_selection, separator_selection, length_unit_selection):
|
29 |
-
print("Updating separator selection interactivity:")
|
30 |
return (
|
31 |
gr.Textbox.update(visible=(split_selection==LABEL_RECURSIVE)),
|
32 |
chunk(text, slider_count, split_selection, separator_selection, length_unit_selection)
|
@@ -34,7 +33,6 @@ def change_split_selection(text, slider_count, split_selection, separator_select
|
|
34 |
|
35 |
def chunk(text, length, splitter_selection, separators_str, length_unit_selection):
|
36 |
separators = extract_separators_from_string(separators_str)
|
37 |
-
print(splitter_selection, length_unit_selection.lower())
|
38 |
length_function = (length_tokens if "token" in length_unit_selection.lower() else len)
|
39 |
if splitter_selection == LABEL_TEXTSPLITTER:
|
40 |
text_splitter = CharacterTextSplitter(
|
|
|
4 |
RecursiveCharacterTextSplitter,
|
5 |
)
|
6 |
from transformers import AutoTokenizer
|
7 |
+
from overlap import unoverlap_list
|
8 |
|
9 |
LABEL_TEXTSPLITTER = "π¦π LangChain's CharacterTextSplitter"
|
10 |
LABEL_RECURSIVE = "π¦π LangChain's RecursiveCharacterTextSplitter"
|
|
|
20 |
separators = separators_str[1:-1].split(", ")
|
21 |
return [separator.replace('"', "").replace("'", "") for separator in separators]
|
22 |
except Exception as e:
|
|
|
23 |
raise gr.Error(f"""
|
24 |
+
Did not succeed in extracting seperators from string: {separator_str} due to: {str(e)}.
|
25 |
Please type it in the correct format: "['separator_1', 'separator_2', etc]"
|
26 |
""")
|
27 |
|
28 |
def change_split_selection(text, slider_count, split_selection, separator_selection, length_unit_selection):
|
|
|
29 |
return (
|
30 |
gr.Textbox.update(visible=(split_selection==LABEL_RECURSIVE)),
|
31 |
chunk(text, slider_count, split_selection, separator_selection, length_unit_selection)
|
|
|
33 |
|
34 |
def chunk(text, length, splitter_selection, separators_str, length_unit_selection):
|
35 |
separators = extract_separators_from_string(separators_str)
|
|
|
36 |
length_function = (length_tokens if "token" in length_unit_selection.lower() else len)
|
37 |
if splitter_selection == LABEL_TEXTSPLITTER:
|
38 |
text_splitter = CharacterTextSplitter(
|