ffreemt commited on
Commit
055462a
·
1 Parent(s): 3431b72

Update better separator for get_doc_chunks chunk_szie 400, chunk_overlap 0

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -138,8 +138,9 @@ MODEL_NAME = "paraphrase-multilingual-mpnet-base-v2" # 1.11G
138
  # opanai max 4097
139
  # retriever default k = 4, query lenght about CHUNK_SIZE
140
  # CHUNK_SIZE = about 4097 / 5: 820, with safety room: 625
141
- CHUNK_SIZE = 625 # 250
142
- CHUNK_OVERLAP = 60 # 50
 
143
 
144
  ns_initial = SimpleNamespace(
145
  db=None,
 
138
  # opanai max 4097
139
  # retriever default k = 4, query lenght about CHUNK_SIZE
140
  # CHUNK_SIZE = about 4097 / 5: 820, with safety room: 625
141
+ # Chinese ~2token/char 820/2=410
142
+ CHUNK_SIZE = 400 # 250, 625
143
+ CHUNK_OVERLAP = 0 # 50, 60
144
 
145
  ns_initial = SimpleNamespace(
146
  db=None,