import gradio as gr import transformers import torch.nn.functional as F import numpy as np def generate(model_name="Salesforce/codegen-350M-mono", text="World"): model = transformers.AutoModelForCausalLM.from_pretrained(model_name) tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) input_ids = tokenizer.encode(text, return_tensors='pt') output = model.generate(input_ids, max_length=100, do_sample=True) return tokenizer.decode(output[0]) def get_token_likelyhoods(model_name="Salesforce/codegen-350M-mono", text="World"): # get likelyhoods for each token model = transformers.AutoModelForCausalLM.from_pretrained(model_name) tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) input_ids = tokenizer.encode(text, return_tensors='pt') out = model(input_ids) probs = F.softmax(out.logits, dim=-1).squeeze() output = [] for tok, logits in zip(input_ids.squeeze(), probs): output.append(( tokenizer.decode(tok), str(round(logits[tok].item() * 100, 4)) + "%", # tokenizer.decode(np.argmax(logits.detach())) )) return output demo = gr.Interface( fn=get_token_likelyhoods, title="Per-token likelyhood GUI based on Giant Language model Test Room", inputs = [ gr.Textbox( label="Model name", lines=1, value="Salesforce/codegen-350M-mono", ), gr.Textbox( label="Text", lines=3, value="def first_n_primes(n):\n primes = []\n i = 2\n while len(primes) < n:\n if is_prime(i):\n primes.append(i)\n i += 1\n return", ), ], outputs = gr.HighlightedText( label="Diff", combine_adjacent=True, ).style(color_map={"+": "red", "-": "green"}), ) if __name__ == "__main__": demo.launch() # iface = gr.Interface(fn=generate, inputs=["text", "text"], outputs="text") # iface.launch()