Chris4K commited on
Commit
c458050
1 Parent(s): cb430f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -87,9 +87,9 @@ def process_files(model_name, split_strategy, chunk_size, overlap_size, max_toke
87
 
88
  # Split text into chunks
89
  if split_strategy == 'token':
90
- splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap_size)
91
  else:
92
- splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap_size)
93
 
94
  chunks = splitter.split_text(text)
95
 
@@ -168,7 +168,7 @@ iface = gr.Interface(
168
  gr.File(label="Upload File"),
169
  gr.Textbox(label="Search Query"),
170
  gr.Dropdown(choices=list(MODELS.keys()), label="Embedding Model"),
171
- gr.Radio(choices=["sentence", "recursive"], label="Split Strategy"),
172
  gr.Slider(100, 1000, step=100, value=500, label="Chunk Size"), # Ensure type is int
173
  gr.Slider(0, 100, step=10, value=50, label="Overlap Size"), # Ensure type is int
174
  gr.Slider(50, 500, step=50, value=200, label="Max Tokens"), # Ensure type is int
 
87
 
88
  # Split text into chunks
89
  if split_strategy == 'token':
90
+ splitter = TokenTextSplitter(chunk_size=250, chunk_overlap=20)
91
  else:
92
+ splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=20)
93
 
94
  chunks = splitter.split_text(text)
95
 
 
168
  gr.File(label="Upload File"),
169
  gr.Textbox(label="Search Query"),
170
  gr.Dropdown(choices=list(MODELS.keys()), label="Embedding Model"),
171
+ gr.Radio(choices=["token", "recursive"], label="Split Strategy"),
172
  gr.Slider(100, 1000, step=100, value=500, label="Chunk Size"), # Ensure type is int
173
  gr.Slider(0, 100, step=10, value=50, label="Overlap Size"), # Ensure type is int
174
  gr.Slider(50, 500, step=50, value=200, label="Max Tokens"), # Ensure type is int