Spaces:
Sleeping
Sleeping
Prgckwb
commited on
Commit
•
0a485e6
1
Parent(s):
d9d3f4b
:tada: init
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
from diffusers import DiffusionPipeline
|
4 |
from transformers import AutoTokenizer, CLIPTokenizerFast, T5TokenizerFast
|
5 |
-
|
6 |
|
7 |
def load_tokenizers(model_id: str) -> list[CLIPTokenizerFast | T5TokenizerFast | None]:
|
8 |
config = DiffusionPipeline.load_config(model_id)
|
@@ -25,19 +25,20 @@ def load_tokenizers(model_id: str) -> list[CLIPTokenizerFast | T5TokenizerFast |
|
|
25 |
|
26 |
|
27 |
@torch.no_grad()
|
28 |
-
def inference(model_id: str,
|
29 |
tokenizers = load_tokenizers(model_id)
|
30 |
|
31 |
text_pairs_components = []
|
32 |
special_tokens_components = []
|
|
|
33 |
for i, tokenizer in enumerate(tokenizers):
|
34 |
if tokenizer:
|
35 |
label_text = f"Tokenizer {i + 1}: {tokenizer.__class__.__name__}"
|
36 |
|
37 |
# テキストとトークンIDのペアを作成
|
38 |
input_ids = tokenizer(
|
39 |
-
text=
|
40 |
-
truncation=
|
41 |
return_length=False,
|
42 |
return_overflowing_tokens=False,
|
43 |
).input_ids
|
@@ -49,7 +50,6 @@ def inference(model_id: str, input_text: str):
|
|
49 |
label=label_text,
|
50 |
value=token_pairs,
|
51 |
visible=True,
|
52 |
-
show_legend=True,
|
53 |
)
|
54 |
|
55 |
# スペシャルトークンを追加
|
@@ -63,16 +63,32 @@ def inference(model_id: str, input_text: str):
|
|
63 |
label=label_text,
|
64 |
value=special_tokens,
|
65 |
visible=True,
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
)
|
68 |
else:
|
69 |
output_text_pair_component = gr.HighlightedText(visible=False)
|
70 |
output_special_tokens_component = gr.HighlightedText(visible=False)
|
|
|
71 |
|
72 |
text_pairs_components.append(output_text_pair_component)
|
73 |
special_tokens_components.append(output_special_tokens_component)
|
|
|
74 |
|
75 |
-
return
|
76 |
|
77 |
|
78 |
if __name__ == "__main__":
|
@@ -110,6 +126,11 @@ if __name__ == "__main__":
|
|
110 |
output_special_tokens_1 = gr.HighlightedText()
|
111 |
output_special_tokens_2 = gr.HighlightedText()
|
112 |
output_special_tokens_3 = gr.HighlightedText()
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
with gr.Row():
|
115 |
clear_button = gr.ClearButton(components=[input_text])
|
@@ -123,6 +144,9 @@ if __name__ == "__main__":
|
|
123 |
output_special_tokens_1,
|
124 |
output_special_tokens_2,
|
125 |
output_special_tokens_3,
|
|
|
|
|
|
|
126 |
]
|
127 |
submit_button.click(fn=inference, inputs=all_inputs, outputs=all_output)
|
128 |
|
@@ -141,4 +165,4 @@ if __name__ == "__main__":
|
|
141 |
cache_examples=True,
|
142 |
)
|
143 |
|
144 |
-
demo.queue().launch()
|
|
|
2 |
import torch
|
3 |
from diffusers import DiffusionPipeline
|
4 |
from transformers import AutoTokenizer, CLIPTokenizerFast, T5TokenizerFast
|
5 |
+
import pandas as pd
|
6 |
|
7 |
def load_tokenizers(model_id: str) -> list[CLIPTokenizerFast | T5TokenizerFast | None]:
|
8 |
config = DiffusionPipeline.load_config(model_id)
|
|
|
25 |
|
26 |
|
27 |
@torch.no_grad()
|
28 |
+
def inference(model_id: str, text: str):
|
29 |
tokenizers = load_tokenizers(model_id)
|
30 |
|
31 |
text_pairs_components = []
|
32 |
special_tokens_components = []
|
33 |
+
tokenizer_details_components = []
|
34 |
for i, tokenizer in enumerate(tokenizers):
|
35 |
if tokenizer:
|
36 |
label_text = f"Tokenizer {i + 1}: {tokenizer.__class__.__name__}"
|
37 |
|
38 |
# テキストとトークンIDのペアを作成
|
39 |
input_ids = tokenizer(
|
40 |
+
text=text,
|
41 |
+
truncation=False,
|
42 |
return_length=False,
|
43 |
return_overflowing_tokens=False,
|
44 |
).input_ids
|
|
|
50 |
label=label_text,
|
51 |
value=token_pairs,
|
52 |
visible=True,
|
|
|
53 |
)
|
54 |
|
55 |
# スペシャルトークンを追加
|
|
|
63 |
label=label_text,
|
64 |
value=special_tokens,
|
65 |
visible=True,
|
66 |
+
)
|
67 |
+
|
68 |
+
# トークナイザーの詳細情報を追加
|
69 |
+
tokenizer_details = pd.DataFrame([
|
70 |
+
("Type", tokenizer.__class__.__name__),
|
71 |
+
("Vocab Size", tokenizer.vocab_size),
|
72 |
+
("Model Max Length", tokenizer.model_max_length),
|
73 |
+
("Padding Side", tokenizer.padding_side),
|
74 |
+
("Truncation Side", tokenizer.truncation_side),
|
75 |
+
], columns=["Attribute", "Value"])
|
76 |
+
output_tokenizer_details = gr.Dataframe(
|
77 |
+
headers=["Attribute", "Value"],
|
78 |
+
value=tokenizer_details,
|
79 |
+
label=label_text,
|
80 |
+
visible=True,
|
81 |
)
|
82 |
else:
|
83 |
output_text_pair_component = gr.HighlightedText(visible=False)
|
84 |
output_special_tokens_component = gr.HighlightedText(visible=False)
|
85 |
+
output_tokenizer_details = gr.Dataframe(visible=False)
|
86 |
|
87 |
text_pairs_components.append(output_text_pair_component)
|
88 |
special_tokens_components.append(output_special_tokens_component)
|
89 |
+
tokenizer_details_components.append(output_tokenizer_details)
|
90 |
|
91 |
+
return text_pairs_components + special_tokens_components + tokenizer_details_components
|
92 |
|
93 |
|
94 |
if __name__ == "__main__":
|
|
|
126 |
output_special_tokens_1 = gr.HighlightedText()
|
127 |
output_special_tokens_2 = gr.HighlightedText()
|
128 |
output_special_tokens_3 = gr.HighlightedText()
|
129 |
+
with gr.Tab(label="Tokenizer Details"):
|
130 |
+
with gr.Column():
|
131 |
+
output_tokenizer_details_1 = gr.Dataframe(headers=["Attribute", "Value"])
|
132 |
+
output_tokenizer_details_2 = gr.Dataframe(headers=["Attribute", "Value"])
|
133 |
+
output_tokenizer_details_3 = gr.Dataframe(headers=["Attribute", "Value"])
|
134 |
|
135 |
with gr.Row():
|
136 |
clear_button = gr.ClearButton(components=[input_text])
|
|
|
144 |
output_special_tokens_1,
|
145 |
output_special_tokens_2,
|
146 |
output_special_tokens_3,
|
147 |
+
output_tokenizer_details_1,
|
148 |
+
output_tokenizer_details_2,
|
149 |
+
output_tokenizer_details_3,
|
150 |
]
|
151 |
submit_button.click(fn=inference, inputs=all_inputs, outputs=all_output)
|
152 |
|
|
|
165 |
cache_examples=True,
|
166 |
)
|
167 |
|
168 |
+
demo.queue().launch()
|