davanstrien HF Staff commited on
Commit
e25871b
·
verified ·
1 Parent(s): 83af74a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -15
app.py CHANGED
@@ -1,15 +1,15 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import GPT2LMHeadModel, GPT2Tokenizer
4
 
5
  # Load model and tokenizer
6
- model = GPT2LMHeadModel.from_pretrained("gpt2")
7
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
8
 
9
  def get_next_token_probs(text):
10
  # Handle empty input
11
  if not text.strip():
12
- return ["No input text"] * 5
13
 
14
  # Tokenize input
15
  input_ids = tokenizer.encode(text, return_tensors="pt")
@@ -23,8 +23,8 @@ def get_next_token_probs(text):
23
  next_token_logits = logits[0, -1, :]
24
  next_token_probs = torch.softmax(next_token_logits, dim=0)
25
 
26
- # Get top-5 tokens and their probabilities
27
- topk_probs, topk_indices = torch.topk(next_token_probs, 5)
28
  topk_tokens = [tokenizer.decode([idx]) for idx in topk_indices]
29
 
30
  # Format the results as strings
@@ -41,7 +41,7 @@ def get_next_token_probs(text):
41
 
42
  # Create minimal interface with simpler components
43
  with gr.Blocks(css="footer {display: none}") as demo:
44
- gr.Markdown("### GPT-2 Next Token Predictor")
45
 
46
  # Input textbox
47
  input_text = gr.Textbox(
@@ -53,14 +53,8 @@ with gr.Blocks(css="footer {display: none}") as demo:
53
  # Simple header for results
54
  gr.Markdown("##### Most likely next tokens:")
55
 
56
- # Individual output textboxes for each token
57
- token1 = gr.Markdown()
58
- token2 = gr.Markdown()
59
- token3 = gr.Markdown()
60
- token4 = gr.Markdown()
61
- token5 = gr.Markdown()
62
-
63
- token_outputs = [token1, token2, token3, token4, token5]
64
 
65
  # Set up the live update
66
  input_text.change(
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
  # Load model and tokenizer
6
+ model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-135M")
7
+ tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-135M")
8
 
9
  def get_next_token_probs(text):
10
  # Handle empty input
11
  if not text.strip():
12
+ return ["No input text"] * 20
13
 
14
  # Tokenize input
15
  input_ids = tokenizer.encode(text, return_tensors="pt")
 
23
  next_token_logits = logits[0, -1, :]
24
  next_token_probs = torch.softmax(next_token_logits, dim=0)
25
 
26
+ # Get top-20 tokens and their probabilities
27
+ topk_probs, topk_indices = torch.topk(next_token_probs, 20)
28
  topk_tokens = [tokenizer.decode([idx]) for idx in topk_indices]
29
 
30
  # Format the results as strings
 
41
 
42
  # Create minimal interface with simpler components
43
  with gr.Blocks(css="footer {display: none}") as demo:
44
+ gr.Markdown("### SmolLM2 Next Token Predictor")
45
 
46
  # Input textbox
47
  input_text = gr.Textbox(
 
53
  # Simple header for results
54
  gr.Markdown("##### Most likely next tokens:")
55
 
56
+ # Create 20 individual output markdown components
57
+ token_outputs = [gr.Markdown() for _ in range(20)]
 
 
 
 
 
 
58
 
59
  # Set up the live update
60
  input_text.change(