| | import torch |
| | from transformers import RobertaForMaskedLM, RobertaTokenizer |
| | import gradio as gr |
| |
|
| | model = RobertaForMaskedLM.from_pretrained('roberta-base') |
| | tokenizer = RobertaTokenizer.from_pretrained('roberta-base') |
| |
|
| |
|
| | def sentence_perplexity(sentence): |
| | tokenized_sentence = tokenizer(sentence, return_tensors='pt') |
| | input_ids = tokenized_sentence['input_ids'] |
| |
|
| | with torch.no_grad(): |
| | output = model(input_ids) |
| | logits = output.logits |
| |
|
| | probabilities = torch.softmax(logits, dim=-1) |
| | true_token_probabilities = torch.gather(probabilities, 2, input_ids.unsqueeze(-1)).squeeze(-1) |
| | log_probs = torch.log(true_token_probabilities) |
| |
|
| | |
| | log_probs = log_probs[:, 1:-1] |
| |
|
| | |
| | perplexity = torch.exp(-log_probs.mean()).item() |
| |
|
| | return perplexity |
| |
|
| | def weird_score(sentence): |
| | perplexity = sentence_perplexity(sentence) |
| |
|
| | |
| | weird_score = (perplexity - 1) / (perplexity + 1) * 100 |
| |
|
| | return f"Weird Score: {weird_score:.2f}%" |
| |
|
| | |
| | sentence = "This is a normal sentence." |
| | print(weird_score(sentence)) |
| |
|
| | sentence = "Giraffes are known to be fluent in six languages." |
| | print(weird_score(sentence)) |
| |
|
| |
|
| | iface = gr.Interface( |
| | fn=weird_score, |
| | inputs=gr.inputs.Textbox(lines=2, placeholder="Enter a sentence..."), |
| | outputs="text", |
| | title="RoBERTa Weird Score Calculator", |
| | description="This app calculates the weird score percentage of a sentence using RoBERTa." |
| | ) |
| | iface.launch() |
| |
|