kcarnold commited on
Commit
6d1408f
1 Parent(s): 325ca0f
Files changed (1) hide show
  1. app.py +22 -8
app.py CHANGED
@@ -63,6 +63,7 @@ example_generations = model.generate(
63
  input_ids,
64
  num_beams=4,
65
  num_return_sequences=4,
 
66
  )
67
 
68
  col1, col2 = st.columns(2)
@@ -90,21 +91,34 @@ with torch.no_grad():
90
  input_ids = input_ids,
91
  decoder_input_ids = torch.tensor([decoder_input_ids]).to(device))
92
 
 
 
 
 
 
 
 
93
  last_token_logits = model_output.logits[0, -1].cpu()
94
  assert len(last_token_logits.shape) == 1
95
- most_likely_tokens = last_token_logits.topk(k=20)
 
 
96
 
97
- probs = last_token_logits.softmax(dim=-1)
98
  probs_for_likely_tokens = probs[most_likely_tokens.indices]
99
 
100
  with tokenizer.as_target_tokenizer():
101
- probs_table = pd.DataFrame({
102
  'token': [tokenizer.decode(token_id) for token_id in most_likely_tokens.indices],
103
- 'id': most_likely_tokens.indices,
104
- 'probability': probs_for_likely_tokens,
105
- 'logprob': probs_for_likely_tokens.log(),
106
- 'cumulative probability': probs_for_likely_tokens.cumsum(0)
107
- })
 
 
 
 
108
 
109
  st.subheader("Most likely next tokens")
110
  st.table(probs_table.style.hide(axis='index'))
 
63
  input_ids,
64
  num_beams=4,
65
  num_return_sequences=4,
66
+ max_length=100,
67
  )
68
 
69
  col1, col2 = st.columns(2)
 
91
  input_ids = input_ids,
92
  decoder_input_ids = torch.tensor([decoder_input_ids]).to(device))
93
 
94
+ with st.expander("Configuration"):
95
+ top_k = st.slider("Number of tokens to show", min_value=1, max_value=100, value=5)
96
+ temperature = st.slider("Temperature", min_value=0.0, max_value=2.0, value=1.0, step=0.01)
97
+ show_token_ids = st.checkbox("Show token IDs", value=False)
98
+ show_logprobs = st.checkbox("Show log probabilities", value=False)
99
+ show_cumulative_probs = st.checkbox("Show cumulative probabilities", value=False)
100
+
101
  last_token_logits = model_output.logits[0, -1].cpu()
102
  assert len(last_token_logits.shape) == 1
103
+ # apply temperature
104
+ last_token_logits_with_temperature = last_token_logits / temperature
105
+ most_likely_tokens = last_token_logits.topk(k=top_k)
106
 
107
+ probs = last_token_logits_with_temperature.softmax(dim=-1)
108
  probs_for_likely_tokens = probs[most_likely_tokens.indices]
109
 
110
  with tokenizer.as_target_tokenizer():
111
+ prob_table_data = {
112
  'token': [tokenizer.decode(token_id) for token_id in most_likely_tokens.indices],
113
+ }
114
+ if show_token_ids:
115
+ prob_table_data['id'] = most_likely_tokens.indices
116
+ prob_table_data['probability'] = probs_for_likely_tokens
117
+ if show_logprobs:
118
+ prob_table_data['logprob'] = last_token_logits.log_softmax(dim=-1)[most_likely_tokens.indices]
119
+ if show_cumulative_probs:
120
+ prob_table_data['cumulative probability'] = probs_for_likely_tokens.cumsum(0)
121
+ probs_table = pd.DataFrame(prob_table_data)
122
 
123
  st.subheader("Most likely next tokens")
124
  st.table(probs_table.style.hide(axis='index'))