Commit
ca4f672
1 Parent(s): 15f62f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -1,13 +1,20 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer
3
 
4
- # Define a function to tokenize text with a selected tokenizer
 
 
5
  def tokenize_text(text, tokenizer_name):
6
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
7
  tokenized_text = tokenizer.tokenize(text)
8
  input_ids = tokenizer.convert_tokens_to_ids(tokenized_text)
9
- decoded_text = tokenizer.decode(input_ids) # Decode the input IDs
10
- return f"Tokenized Text: {tokenized_text}\nInput IDs: {input_ids}\nDecoded Text: {decoded_text}"
 
 
 
 
 
11
 
12
 
13
  # Define available tokenizers
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer
3
 
4
+ chart_html = gr.HTML(label="Token Frequency Chart")
5
+
6
+ # Define a function to tokenize text and create visualization
7
  def tokenize_text(text, tokenizer_name):
8
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
9
  tokenized_text = tokenizer.tokenize(text)
10
  input_ids = tokenizer.convert_tokens_to_ids(tokenized_text)
11
+ decoded_text = tokenizer.decode(input_ids)
12
+
13
+ # Create visualization HTML
14
+ chart_html = create_token_frequency_chart(tokenized_text)
15
+
16
+ return f"Tokenized Text: {tokenized_text}\nInput IDs: {input_ids}\nDecoded Text: {decoded_text}", chart_html
17
+
18
 
19
 
20
  # Define available tokenizers