Prgckwb commited on
Commit
6c94b18
1 Parent(s): 93013c6

Add app.py

Browse files
Files changed (2) hide show
  1. app.py +66 -4
  2. requirements.txt +5 -0
app.py CHANGED
@@ -1,7 +1,69 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ from gradio.themes import colors
4
+ from transformers import AutoTokenizer
5
 
6
+ # Function to map tokenized text to IDs
7
+ def inference(
8
+ text="",
9
+ model_id="openai/clip-vit-large-patch14",
10
+ ) -> (list[str, str], pd.DataFrame):
11
+ if text == "":
12
+ return [], pd.DataFrame()
13
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
14
 
15
+ # Use tokenizer to tokenize the text
16
+ text_inputs = tokenizer(text, return_tensors='pt')
17
+
18
+ input_ids = text_inputs['input_ids'].tolist()[0] # Convert tensor to list
19
+
20
+ # Create pairs of tokens and IDs
21
+ tokens = [tokenizer.decode([id_]) for id_ in input_ids]
22
+ token_pairs = []
23
+
24
+ for token, id_ in zip(tokens, input_ids):
25
+ token_pairs.append((token, str(id_)))
26
+
27
+ # Count the number of characters and tokens
28
+ pos_count = pd.DataFrame({
29
+ "Char Count": [len(text)],
30
+ "Token Count": [len(token_pairs)]
31
+ })
32
+ return token_pairs, pos_count
33
+
34
+
35
+ if __name__ == '__main__':
36
+ iface = gr.Interface(
37
+ fn=inference,
38
+ inputs=[
39
+ gr.Textbox(label="Text"),
40
+ gr.Dropdown(
41
+ label="Model",
42
+ choices=[
43
+ "openai/clip-vit-large-patch14",
44
+ "google-bert/bert-base-uncased",
45
+ "google/flan-t5-base",
46
+ "openai-community/gpt2",
47
+ ],
48
+ value="openai/clip-vit-large-patch14"
49
+ ),
50
+ ],
51
+ outputs=[
52
+ gr.Highlightedtext(label="Highlighted Text"),
53
+ gr.Dataframe(label="Position Count"),
54
+ ],
55
+ examples=[
56
+ ["When I told my computer I needed a break, it froze.", "openai/clip-vit-large-patch14"],
57
+ ["Yesterday, I thought my cat was studying for her degree in philosophy because she sat on my book, "
58
+ "but turns out she was just trying to hatch a plot to steal my dinner.", "openai/clip-vit-large-patch14"],
59
+ ["The square root of x is the cube root of y. What is y to the power of 2, if x = 4?",
60
+ "google/flan-t5-base"]
61
+ ],
62
+ cache_examples=True,
63
+ title="TokenVisor",
64
+ description="Visualize how the Tokenizer used in Hugging Face's Transformers library tokenizes text.",
65
+ theme=gr.Theme(primary_hue=colors.green, secondary_hue=colors.yellow),
66
+ allow_flagging="never",
67
+
68
+ )
69
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ safetensors
4
+ accelerate
5
+ diffusers