Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import spaces
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
@@ -7,11 +6,10 @@ from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
|
7 |
model = GPT2LMHeadModel.from_pretrained("gpt2")
|
8 |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
9 |
|
10 |
-
|
11 |
-
def get_next_token_probs(text, top_k=5):
|
12 |
# Handle empty input
|
13 |
if not text.strip():
|
14 |
-
return [""] *
|
15 |
|
16 |
# Tokenize input
|
17 |
input_ids = tokenizer.encode(text, return_tensors="pt")
|
@@ -25,8 +23,8 @@ def get_next_token_probs(text, top_k=5):
|
|
25 |
next_token_logits = logits[0, -1, :]
|
26 |
next_token_probs = torch.softmax(next_token_logits, dim=0)
|
27 |
|
28 |
-
# Get top-
|
29 |
-
topk_probs, topk_indices = torch.topk(next_token_probs,
|
30 |
topk_tokens = [tokenizer.decode([idx]) for idx in topk_indices]
|
31 |
|
32 |
# Format the results as strings
|
@@ -34,35 +32,15 @@ def get_next_token_probs(text, top_k=5):
|
|
34 |
for i, (token, prob) in enumerate(zip(topk_tokens, topk_probs)):
|
35 |
# Format probability as percentage with 1 decimal place
|
36 |
prob_percent = f"{prob.item()*100:.1f}%"
|
37 |
-
# Clean up token display (
|
38 |
-
display_token = token.replace(" ", "␣")
|
39 |
# Format the output string
|
40 |
formatted_results.append(f"{i+1}. \"{display_token}\" ({prob_percent})")
|
41 |
|
42 |
return formatted_results
|
43 |
|
44 |
-
# Create
|
45 |
-
|
46 |
-
.token-box {
|
47 |
-
margin-top: 10px;
|
48 |
-
padding: 15px;
|
49 |
-
border-radius: 8px;
|
50 |
-
background-color: #f7f7f7;
|
51 |
-
font-family: monospace;
|
52 |
-
font-size: 16px;
|
53 |
-
}
|
54 |
-
.token-item {
|
55 |
-
margin: 8px 0;
|
56 |
-
padding: 8px;
|
57 |
-
background-color: white;
|
58 |
-
border-left: 4px solid #2c8ecb;
|
59 |
-
border-radius: 4px;
|
60 |
-
}
|
61 |
-
footer {display: none}
|
62 |
-
"""
|
63 |
-
|
64 |
-
# Create minimal interface
|
65 |
-
with gr.Blocks(css=custom_css) as demo:
|
66 |
gr.Markdown("### GPT-2 Next Token Predictor")
|
67 |
|
68 |
# Input textbox
|
@@ -72,25 +50,28 @@ with gr.Blocks(css=custom_css) as demo:
|
|
72 |
value="The weather tomorrow will be"
|
73 |
)
|
74 |
|
75 |
-
#
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
-
|
81 |
-
def update_tokens(text):
|
82 |
-
return get_next_token_probs(text)
|
83 |
|
84 |
# Set up the live update
|
85 |
input_text.change(
|
86 |
-
fn=
|
87 |
inputs=input_text,
|
88 |
outputs=token_outputs
|
89 |
)
|
90 |
|
91 |
# Initialize with default text
|
92 |
demo.load(
|
93 |
-
fn=
|
94 |
inputs=input_text,
|
95 |
outputs=token_outputs
|
96 |
)
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
|
|
6 |
model = GPT2LMHeadModel.from_pretrained("gpt2")
|
7 |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
8 |
|
9 |
+
def get_next_token_probs(text):
|
|
|
10 |
# Handle empty input
|
11 |
if not text.strip():
|
12 |
+
return ["No input text"] * 5
|
13 |
|
14 |
# Tokenize input
|
15 |
input_ids = tokenizer.encode(text, return_tensors="pt")
|
|
|
23 |
next_token_logits = logits[0, -1, :]
|
24 |
next_token_probs = torch.softmax(next_token_logits, dim=0)
|
25 |
|
26 |
+
# Get top-5 tokens and their probabilities
|
27 |
+
topk_probs, topk_indices = torch.topk(next_token_probs, 5)
|
28 |
topk_tokens = [tokenizer.decode([idx]) for idx in topk_indices]
|
29 |
|
30 |
# Format the results as strings
|
|
|
32 |
for i, (token, prob) in enumerate(zip(topk_tokens, topk_probs)):
|
33 |
# Format probability as percentage with 1 decimal place
|
34 |
prob_percent = f"{prob.item()*100:.1f}%"
|
35 |
+
# Clean up token display (replace space with visible space symbol)
|
36 |
+
display_token = token.replace(" ", "␣")
|
37 |
# Format the output string
|
38 |
formatted_results.append(f"{i+1}. \"{display_token}\" ({prob_percent})")
|
39 |
|
40 |
return formatted_results
|
41 |
|
42 |
+
# Create minimal interface with simpler components
|
43 |
+
with gr.Blocks(css="footer {display: none}") as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
gr.Markdown("### GPT-2 Next Token Predictor")
|
45 |
|
46 |
# Input textbox
|
|
|
50 |
value="The weather tomorrow will be"
|
51 |
)
|
52 |
|
53 |
+
# Simple header for results
|
54 |
+
gr.Markdown("##### Most likely next tokens:")
|
55 |
+
|
56 |
+
# Individual output textboxes for each token
|
57 |
+
token1 = gr.Markdown()
|
58 |
+
token2 = gr.Markdown()
|
59 |
+
token3 = gr.Markdown()
|
60 |
+
token4 = gr.Markdown()
|
61 |
+
token5 = gr.Markdown()
|
62 |
|
63 |
+
token_outputs = [token1, token2, token3, token4, token5]
|
|
|
|
|
64 |
|
65 |
# Set up the live update
|
66 |
input_text.change(
|
67 |
+
fn=get_next_token_probs,
|
68 |
inputs=input_text,
|
69 |
outputs=token_outputs
|
70 |
)
|
71 |
|
72 |
# Initialize with default text
|
73 |
demo.load(
|
74 |
+
fn=get_next_token_probs,
|
75 |
inputs=input_text,
|
76 |
outputs=token_outputs
|
77 |
)
|