File size: 2,265 Bytes
6e9d3da
 
 
 
 
 
 
 
 
 
 
 
 
 
f5e8576
 
a9365ea
6e9d3da
 
ec62001
7fb271c
6e9d3da
 
 
 
 
 
 
 
 
 
 
 
a9365ea
 
 
 
 
 
9bd4265
d7bbd11
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import torch
import streamlit as st
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
from transformers.activations import get_activation
from transformers import AutoTokenizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from transformers import AutoTokenizer, AutoModelForMaskedLM
tokenizer = AutoTokenizer.from_pretrained("roberta-large")
model = AutoModelForMaskedLM.from_pretrained("BigSalmon/FormalRobertaLincoln")
#model = AutoModelForMaskedLM.from_pretrained("BigSalmon/MrLincolnBerta")
model2 = AutoModelForMaskedLM.from_pretrained("roberta-base")


with st.expander('BigSalmon/FormalRobertaa'):
  with st.form(key='my_form'):
    prompt = st.text_area(label='Enter Text. Put <mask> where you want the model to fill in the blank. You can use more than one at a time.')
    submit_button = st.form_submit_button(label='Submit')

    if submit_button:
      a_list = []
      token_ids = tokenizer.encode(prompt, return_tensors='pt')
      token_ids_tk = tokenizer.tokenize(prompt, return_tensors='pt')
      masked_position = (token_ids.squeeze() == tokenizer.mask_token_id).nonzero()
      masked_pos = [mask.item() for mask in masked_position ]
      with torch.no_grad():
        output = model(token_ids)
      last_hidden_state = output[0].squeeze()
      for mask_index in masked_pos:
        mask_hidden_state = last_hidden_state[mask_index]
        idx = torch.topk(mask_hidden_state, k=100, dim=0)[1]
        words = [tokenizer.decode(i.item()).strip() for i in idx]
        st.text_area(label = 'Infill:', value=words)
 
with st.expander('roberta-base result'):
  token_ids = tokenizer.encode(prompt, return_tensors='pt')
  token_ids_tk = tokenizer.tokenize(prompt, return_tensors='pt')
  masked_position = (token_ids.squeeze() == tokenizer.mask_token_id).nonzero()
  masked_pos = [mask.item() for mask in masked_position ]
  with torch.no_grad():
    output = model2(token_ids)
    last_hidden_state = output[0].squeeze()
    for mask_index in masked_pos:
      mask_hidden_state = last_hidden_state[mask_index]
      idx = torch.topk(mask_hidden_state, k=100, dim=0)[1]
      words = [tokenizer.decode(i.item()).strip() for i in idx]
      st.text_area(label = 'Infill:', value=words)