davanstrien HF Staff commited on
Commit
b24cb59
·
verified ·
1 Parent(s): c4e9431

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -39
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import spaces
2
  import gradio as gr
3
  import torch
4
  from transformers import GPT2LMHeadModel, GPT2Tokenizer
@@ -7,11 +6,10 @@ from transformers import GPT2LMHeadModel, GPT2Tokenizer
7
  model = GPT2LMHeadModel.from_pretrained("gpt2")
8
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
9
 
10
- @spaces.GPU
11
- def get_next_token_probs(text, top_k=5):
12
  # Handle empty input
13
  if not text.strip():
14
- return [""] * top_k
15
 
16
  # Tokenize input
17
  input_ids = tokenizer.encode(text, return_tensors="pt")
@@ -25,8 +23,8 @@ def get_next_token_probs(text, top_k=5):
25
  next_token_logits = logits[0, -1, :]
26
  next_token_probs = torch.softmax(next_token_logits, dim=0)
27
 
28
- # Get top-k tokens and their probabilities
29
- topk_probs, topk_indices = torch.topk(next_token_probs, top_k)
30
  topk_tokens = [tokenizer.decode([idx]) for idx in topk_indices]
31
 
32
  # Format the results as strings
@@ -34,35 +32,15 @@ def get_next_token_probs(text, top_k=5):
34
  for i, (token, prob) in enumerate(zip(topk_tokens, topk_probs)):
35
  # Format probability as percentage with 1 decimal place
36
  prob_percent = f"{prob.item()*100:.1f}%"
37
- # Clean up token display (remove leading space if present)
38
- display_token = token.replace(" ", "␣") # Replace space with visible space symbol
39
  # Format the output string
40
  formatted_results.append(f"{i+1}. \"{display_token}\" ({prob_percent})")
41
 
42
  return formatted_results
43
 
44
- # Create custom CSS
45
- custom_css = """
46
- .token-box {
47
- margin-top: 10px;
48
- padding: 15px;
49
- border-radius: 8px;
50
- background-color: #f7f7f7;
51
- font-family: monospace;
52
- font-size: 16px;
53
- }
54
- .token-item {
55
- margin: 8px 0;
56
- padding: 8px;
57
- background-color: white;
58
- border-left: 4px solid #2c8ecb;
59
- border-radius: 4px;
60
- }
61
- footer {display: none}
62
- """
63
-
64
- # Create minimal interface
65
- with gr.Blocks(css=custom_css) as demo:
66
  gr.Markdown("### GPT-2 Next Token Predictor")
67
 
68
  # Input textbox
@@ -72,25 +50,28 @@ with gr.Blocks(css=custom_css) as demo:
72
  value="The weather tomorrow will be"
73
  )
74
 
75
- # Container for token displays
76
- with gr.Box(elem_classes=["token-box"]):
77
- gr.Markdown("##### Most likely next tokens:")
78
- token_outputs = [gr.Markdown(elem_classes=["token-item"]) for _ in range(5)]
 
 
 
 
 
79
 
80
- # Function to update tokens in real-time
81
- def update_tokens(text):
82
- return get_next_token_probs(text)
83
 
84
  # Set up the live update
85
  input_text.change(
86
- fn=update_tokens,
87
  inputs=input_text,
88
  outputs=token_outputs
89
  )
90
 
91
  # Initialize with default text
92
  demo.load(
93
- fn=update_tokens,
94
  inputs=input_text,
95
  outputs=token_outputs
96
  )
 
 
1
  import gradio as gr
2
  import torch
3
  from transformers import GPT2LMHeadModel, GPT2Tokenizer
 
6
  model = GPT2LMHeadModel.from_pretrained("gpt2")
7
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
8
 
9
+ def get_next_token_probs(text):
 
10
  # Handle empty input
11
  if not text.strip():
12
+ return ["No input text"] * 5
13
 
14
  # Tokenize input
15
  input_ids = tokenizer.encode(text, return_tensors="pt")
 
23
  next_token_logits = logits[0, -1, :]
24
  next_token_probs = torch.softmax(next_token_logits, dim=0)
25
 
26
+ # Get top-5 tokens and their probabilities
27
+ topk_probs, topk_indices = torch.topk(next_token_probs, 5)
28
  topk_tokens = [tokenizer.decode([idx]) for idx in topk_indices]
29
 
30
  # Format the results as strings
 
32
  for i, (token, prob) in enumerate(zip(topk_tokens, topk_probs)):
33
  # Format probability as percentage with 1 decimal place
34
  prob_percent = f"{prob.item()*100:.1f}%"
35
+ # Clean up token display (replace space with visible space symbol)
36
+ display_token = token.replace(" ", "␣")
37
  # Format the output string
38
  formatted_results.append(f"{i+1}. \"{display_token}\" ({prob_percent})")
39
 
40
  return formatted_results
41
 
42
+ # Create minimal interface with simpler components
43
+ with gr.Blocks(css="footer {display: none}") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  gr.Markdown("### GPT-2 Next Token Predictor")
45
 
46
  # Input textbox
 
50
  value="The weather tomorrow will be"
51
  )
52
 
53
+ # Simple header for results
54
+ gr.Markdown("##### Most likely next tokens:")
55
+
56
+ # Individual output textboxes for each token
57
+ token1 = gr.Markdown()
58
+ token2 = gr.Markdown()
59
+ token3 = gr.Markdown()
60
+ token4 = gr.Markdown()
61
+ token5 = gr.Markdown()
62
 
63
+ token_outputs = [token1, token2, token3, token4, token5]
 
 
64
 
65
  # Set up the live update
66
  input_text.change(
67
+ fn=get_next_token_probs,
68
  inputs=input_text,
69
  outputs=token_outputs
70
  )
71
 
72
  # Initialize with default text
73
  demo.load(
74
+ fn=get_next_token_probs,
75
  inputs=input_text,
76
  outputs=token_outputs
77
  )