clementsan commited on
Commit
aa98840
1 Parent(s): 4ce7fc5

Add ASCII transliteration of unicode text

Browse files
Files changed (1) hide show
  1. app.py +3 -0
app.py CHANGED
@@ -13,6 +13,7 @@ from langchain_community.llms import HuggingFaceEndpoint
13
 
14
  from pathlib import Path
15
  import chromadb
 
16
 
17
  from transformers import AutoTokenizer
18
  import transformers
@@ -188,6 +189,8 @@ def initialize_database(list_file_obj, chunk_size, chunk_overlap, progress=gr.Pr
188
  # Fix potential issues from naming convention
189
  ## Remove space
190
  collection_name = collection_name.replace(" ","-")
 
 
191
  ## Limit lenght to 50 characters
192
  collection_name = collection_name[:50]
193
  ## Enforce start and end as alphanumeric character
 
13
 
14
  from pathlib import Path
15
  import chromadb
16
+ from unidecode import unidecode
17
 
18
  from transformers import AutoTokenizer
19
  import transformers
 
189
  # Fix potential issues from naming convention
190
  ## Remove space
191
  collection_name = collection_name.replace(" ","-")
192
+ ## ASCII transliterations of Unicode text
193
+ collection_name = unidecode(collection_name)
194
  ## Limit lenght to 50 characters
195
  collection_name = collection_name[:50]
196
  ## Enforce start and end as alphanumeric character