Jordan Legg commited on
Commit
002fb99
·
1 Parent(s): a35c60b
Files changed (2) hide show
  1. README.md +3 -2
  2. app.py +6 -8
README.md CHANGED
@@ -1,6 +1,7 @@
1
  ---
 
2
  title: DiffusionTokenizer
3
- emoji: 🐠
4
  colorFrom: purple
5
  colorTo: indigo
6
  sdk: gradio
@@ -8,7 +9,7 @@ sdk_version: 5.6.0
8
  app_file: app.py
9
  pinned: false
10
  license: creativeml-openrail-m
11
- short_description: Easily count tokens for any HF diffusion model.
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ python_version: 3.11.10
3
  title: DiffusionTokenizer
4
+ emoji: 🔢
5
  colorFrom: purple
6
  colorTo: indigo
7
  sdk: gradio
 
9
  app_file: app.py
10
  pinned: false
11
  license: creativeml-openrail-m
12
+ short_description: Easily visualize tokens for any diffusion model.
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,13 +1,11 @@
1
  import gradio as gr
2
  from transformers import T5TokenizerFast, CLIPTokenizer
3
 
 
 
 
4
 
5
  def count_tokens(text):
6
-
7
- # Load the common tokenizers
8
- t5_tokenizer = T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl", legacy=False)
9
- clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
10
-
11
  # Get tokens and their IDs
12
  t5_tokens = t5_tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)[0].tolist()
13
  clip_tokens = clip_tokenizer.encode(text, add_special_tokens=True)
@@ -51,9 +49,9 @@ def count_tokens(text):
51
  )
52
 
53
  # Create a Gradio interface with custom layout
54
- with gr.Blocks(title="Common Diffusion Model Token Counter") as iface:
55
- gr.Markdown("# Common Diffusion Model Token Counter")
56
- gr.Markdown("Enter text to count tokens using T5 and CLIP tokenizers, commonly used in diffusion models.")
57
 
58
  with gr.Row():
59
  text_input = gr.Textbox(label="Diffusion Prompt", placeholder="Enter your prompt here...")
 
1
  import gradio as gr
2
  from transformers import T5TokenizerFast, CLIPTokenizer
3
 
4
+ # Load the common tokenizers once
5
+ t5_tokenizer = T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl", legacy=False)
6
+ clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
7
 
8
  def count_tokens(text):
 
 
 
 
 
9
  # Get tokens and their IDs
10
  t5_tokens = t5_tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)[0].tolist()
11
  clip_tokens = clip_tokenizer.encode(text, add_special_tokens=True)
 
49
  )
50
 
51
  # Create a Gradio interface with custom layout
52
+ with gr.Blocks(title="DiffusionTokenizer") as iface:
53
+ gr.Markdown("# DiffusionTokenizer🔢")
54
+ gr.Markdown("A lightning fast visulization of the tokens used in diffusion models. Use it to understand how your prompt is tokenized.")
55
 
56
  with gr.Row():
57
  text_input = gr.Textbox(label="Diffusion Prompt", placeholder="Enter your prompt here...")