USE_REMOTE = False # use remote tokenizer or local tokenizer # load_vocab_with_SPECIAL_TOKEN = True # 如果不包含会导致计算词典大小错误、overlap_token计算不一致。 # encoding config ADD_SPECIAL_TOKEN = False # LAZY_IMPORT = True # DEBUG: 设置环境变量 RUST_BACKTRACE=full # default_user_input = """\ Replace this text in the input field to see how tokenization works. Buenos días! 华为发布Mate60手机。 ラグビーワールドカップ2023フランス""" default_tokenizer_type_1 = "llama3" default_tokenizer_type_2 = "gpt_4"