Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -4,6 +4,7 @@ from langchain.text_splitter import ( | |
| 4 | 
             
                RecursiveCharacterTextSplitter,
         | 
| 5 | 
             
            )
         | 
| 6 | 
             
            from transformers import AutoTokenizer
         | 
|  | |
| 7 |  | 
| 8 | 
             
            LABEL_TEXTSPLITTER = "π¦π LangChain's CharacterTextSplitter"
         | 
| 9 | 
             
            LABEL_RECURSIVE = "π¦π LangChain's RecursiveCharacterTextSplitter"
         | 
| @@ -19,14 +20,12 @@ def extract_separators_from_string(separators_str): | |
| 19 | 
             
                    separators = separators_str[1:-1].split(", ")
         | 
| 20 | 
             
                    return [separator.replace('"', "").replace("'", "") for separator in separators]
         | 
| 21 | 
             
                except Exception as e:
         | 
| 22 | 
            -
                    print(e)
         | 
| 23 | 
             
                    raise gr.Error(f"""
         | 
| 24 | 
            -
                    Did not succeed in extracting seperators from string: {separator_str}.
         | 
| 25 | 
             
                    Please type it in the correct format: "['separator_1', 'separator_2', etc]"
         | 
| 26 | 
             
                    """)
         | 
| 27 |  | 
| 28 | 
             
            def change_split_selection(text, slider_count, split_selection, separator_selection, length_unit_selection):
         | 
| 29 | 
            -
                print("Updating separator selection interactivity:")
         | 
| 30 | 
             
                return (
         | 
| 31 | 
             
                    gr.Textbox.update(visible=(split_selection==LABEL_RECURSIVE)),
         | 
| 32 | 
             
                    chunk(text, slider_count, split_selection, separator_selection, length_unit_selection)
         | 
| @@ -34,7 +33,6 @@ def change_split_selection(text, slider_count, split_selection, separator_select | |
| 34 |  | 
| 35 | 
             
            def chunk(text, length, splitter_selection, separators_str, length_unit_selection):
         | 
| 36 | 
             
                separators = extract_separators_from_string(separators_str)
         | 
| 37 | 
            -
                print(splitter_selection, length_unit_selection.lower())
         | 
| 38 | 
             
                length_function = (length_tokens if "token" in length_unit_selection.lower() else len)
         | 
| 39 | 
             
                if splitter_selection == LABEL_TEXTSPLITTER:
         | 
| 40 | 
             
                    text_splitter = CharacterTextSplitter(
         | 
|  | |
| 4 | 
             
                RecursiveCharacterTextSplitter,
         | 
| 5 | 
             
            )
         | 
| 6 | 
             
            from transformers import AutoTokenizer
         | 
| 7 | 
            +
            from overlap import unoverlap_list
         | 
| 8 |  | 
| 9 | 
             
            LABEL_TEXTSPLITTER = "π¦π LangChain's CharacterTextSplitter"
         | 
| 10 | 
             
            LABEL_RECURSIVE = "π¦π LangChain's RecursiveCharacterTextSplitter"
         | 
|  | |
| 20 | 
             
                    separators = separators_str[1:-1].split(", ")
         | 
| 21 | 
             
                    return [separator.replace('"', "").replace("'", "") for separator in separators]
         | 
| 22 | 
             
                except Exception as e:
         | 
|  | |
| 23 | 
             
                    raise gr.Error(f"""
         | 
| 24 | 
            +
                    Did not succeed in extracting seperators from string: {separator_str} due to: {str(e)}.
         | 
| 25 | 
             
                    Please type it in the correct format: "['separator_1', 'separator_2', etc]"
         | 
| 26 | 
             
                    """)
         | 
| 27 |  | 
| 28 | 
             
            def change_split_selection(text, slider_count, split_selection, separator_selection, length_unit_selection):
         | 
|  | |
| 29 | 
             
                return (
         | 
| 30 | 
             
                    gr.Textbox.update(visible=(split_selection==LABEL_RECURSIVE)),
         | 
| 31 | 
             
                    chunk(text, slider_count, split_selection, separator_selection, length_unit_selection)
         | 
|  | |
| 33 |  | 
| 34 | 
             
            def chunk(text, length, splitter_selection, separators_str, length_unit_selection):
         | 
| 35 | 
             
                separators = extract_separators_from_string(separators_str)
         | 
|  | |
| 36 | 
             
                length_function = (length_tokens if "token" in length_unit_selection.lower() else len)
         | 
| 37 | 
             
                if splitter_selection == LABEL_TEXTSPLITTER:
         | 
| 38 | 
             
                    text_splitter = CharacterTextSplitter(
         | 
