Omartificial-Intelligence-Space
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,20 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoTokenizer
|
3 |
|
4 |
-
|
|
|
|
|
5 |
def tokenize_text(text, tokenizer_name):
|
6 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
|
7 |
tokenized_text = tokenizer.tokenize(text)
|
8 |
input_ids = tokenizer.convert_tokens_to_ids(tokenized_text)
|
9 |
-
decoded_text = tokenizer.decode(input_ids)
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
|
13 |
# Define available tokenizers
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoTokenizer
|
3 |
|
4 |
+
chart_html = gr.HTML(label="Token Frequency Chart")
|
5 |
+
|
6 |
+
# Define a function to tokenize text and create visualization
|
7 |
def tokenize_text(text, tokenizer_name):
|
8 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
|
9 |
tokenized_text = tokenizer.tokenize(text)
|
10 |
input_ids = tokenizer.convert_tokens_to_ids(tokenized_text)
|
11 |
+
decoded_text = tokenizer.decode(input_ids)
|
12 |
+
|
13 |
+
# Create visualization HTML
|
14 |
+
chart_html = create_token_frequency_chart(tokenized_text)
|
15 |
+
|
16 |
+
return f"Tokenized Text: {tokenized_text}\nInput IDs: {input_ids}\nDecoded Text: {decoded_text}", chart_html
|
17 |
+
|
18 |
|
19 |
|
20 |
# Define available tokenizers
|