eson commited on
Commit
d2551ad
1 Parent(s): 9dfa838
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -60,10 +60,13 @@ def example_fn(example_idx):
60
 
61
 
62
  """Replace this text in the input field to see how tokenization works
63
- 华为智能音箱发布:华为发布mate60 pro手机"""
 
 
64
 
65
  default_user_input = """Replace this text in the input field to see how tokenization works
66
- 华为发布mate60 pro手机"""
 
67
  default_tokenizer_type_1 = "llama"
68
  default_tokenizer_type_2 = "internlm_chat_7b"
69
  default_stats_vocab_size_1, default_stats_zh_token_size_1 = basic_count(default_tokenizer_type_1)
@@ -84,7 +87,8 @@ with gr.Blocks(css="style.css") as demo:
84
  with gr.Row():
85
  gr.Markdown("## Input Text")
86
  dropdown_examples = gr.Dropdown(
87
- ["空格测试", "标点测试", "符号测试", "数字测试"],
 
88
  value="Examples",
89
  type="index",
90
  show_label=False,
 
60
 
61
 
62
  """Replace this text in the input field to see how tokenization works
63
+
64
+
65
+ """
66
 
67
  default_user_input = """Replace this text in the input field to see how tokenization works
68
+ 华为发布Mate60手机
69
+ ラグビーワールドカップ2023フランス"""
70
  default_tokenizer_type_1 = "llama"
71
  default_tokenizer_type_2 = "internlm_chat_7b"
72
  default_stats_vocab_size_1, default_stats_zh_token_size_1 = basic_count(default_tokenizer_type_1)
 
87
  with gr.Row():
88
  gr.Markdown("## Input Text")
89
  dropdown_examples = gr.Dropdown(
90
+ # ["空格测试", "标点测试", "符号测试", "数字测试"],
91
+ ["spaces", "punctuations", "symbols", "digits"],
92
  value="Examples",
93
  type="index",
94
  show_label=False,