Taranosaurus commited on
Commit
2488d19
β€’
1 Parent(s): bbb587f

Formatting and legibility changes

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -80,17 +80,23 @@ def de_tokenize_er(pairs):
80
  with gr.Blocks() as frontend:
81
  with gr.Row():
82
  with gr.Column(scale=3):
83
- gr.Markdown("# πŸ‡ Tokenizaminer\n\n### The Tokenizer Examiner... πŸ•΅οΈπŸ•³οΈ\n\nThe purpose of this tool is to examine the vocabulary and tokens of a models tokenizer and play with the results.\n\n## Instructions\n\n1. Load a tokenizer\n2. Type and Tokenize a sequence\n3. Manipulate it to see what happens!")
 
 
84
  with gr.Group():
85
- input_checkpoint = gr.Dropdown(label="1. Tokenizer", choices=checkpoints, value=checkpoint, allow_custom_value=True, info="Select from the list or enter any model from πŸ€— Hugging Face Models, it will only download the Tokenizer data! Image models won't work here.")
86
  btn_load_tokenizer = gr.Button(value="Load Tokenizer")
87
  with gr.Row():
88
- input_sequence = gr.TextArea(label="2. Sequence", value=sequence, placeholder=placeholder, lines=3, interactive=True)
 
 
89
  with gr.Row():
90
  btn_tokenize = gr.Button(value="Tokenize!")
91
  btn_random_seq = gr.Button(value="Randomize!")
92
  with gr.Row():
93
- token_id_pair = gr.DataFrame(label="3. Decode", col_count=(2,"fixed"), headers=["Token","ID"], type="array", datatype=["str", "number"], height=400, interactive=True)
 
 
94
  with gr.Row():
95
  btn_decode = gr.Button(value="Decode")
96
  with gr.Row():
@@ -101,6 +107,7 @@ with gr.Blocks() as frontend:
101
  output_decoded_ids = gr.TextArea(label="Decoded IDs", interactive=False)
102
  with gr.Column(scale=1):
103
  with gr.Group():
 
104
  output_vocab_count = gr.Number(label="Vocab Size", interactive=False)
105
  output_unknown_token = gr.Textbox(label="Unknown Token", interactive=False)
106
  output_vocab = gr.Code(label="Vocabulary")
 
80
  with gr.Blocks() as frontend:
81
  with gr.Row():
82
  with gr.Column(scale=3):
83
+ gr.Markdown("# πŸ‡ Tokenizaminer\n### The Tokenizer Examiner... πŸ•΅οΈπŸ•³οΈ\nThe purpose of this tool is to examine the vocabulary and tokens of a models tokenizer and play with the results.")
84
+ with gr.Row():
85
+ gr.Markdown("\n#### 1. Load Tokenizer\nSelect from the list or enter any model from πŸ€— Hugging Face Models, it will only download the Tokenizer data! Image models won't work here.")
86
  with gr.Group():
87
+ input_checkpoint = gr.Dropdown(choices=checkpoints, value=checkpoint, allow_custom_value=True, container=False)
88
  btn_load_tokenizer = gr.Button(value="Load Tokenizer")
89
  with gr.Row():
90
+ gr.Markdown("\n#### 2. Sequence & Tokenize")
91
+ with gr.Row():
92
+ input_sequence = gr.TextArea(value=sequence, placeholder=placeholder, lines=3, interactive=True, container=False)
93
  with gr.Row():
94
  btn_tokenize = gr.Button(value="Tokenize!")
95
  btn_random_seq = gr.Button(value="Randomize!")
96
  with gr.Row():
97
+ gr.Markdown("\n#### 3. Decode\nYou can select and edit each cell individually - then hit Decode!")
98
+ with gr.Row():
99
+ token_id_pair = gr.DataFrame(col_count=(2,"fixed"), headers=["Token","ID"], type="array", datatype=["str", "number"], height=400, interactive=True)
100
  with gr.Row():
101
  btn_decode = gr.Button(value="Decode")
102
  with gr.Row():
 
107
  output_decoded_ids = gr.TextArea(label="Decoded IDs", interactive=False)
108
  with gr.Column(scale=1):
109
  with gr.Group():
110
+ gr.Markdown("\n#### Tokenizer Data")
111
  output_vocab_count = gr.Number(label="Vocab Size", interactive=False)
112
  output_unknown_token = gr.Textbox(label="Unknown Token", interactive=False)
113
  output_vocab = gr.Code(label="Vocabulary")