File size: 1,918 Bytes
7537bb1
4f2093f
 
 
 
f705779
4f2093f
3da4992
9c43379
4f2093f
 
 
 
348a64a
 
d93b6b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f2093f
d93b6b1
 
 
 
4f2093f
d93b6b1
4f2093f
 
 
 
 
 
 
 
2334258
4f2093f
 
 
46e9ba0
dbf6476
5b69d1c
4f2093f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import streamlit as st
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch.nn as nn
from scipy.special import softmax

st.markdown("<img width=400px src='https://blog.predanie.ru/wp-content/uploads/2018/10/content_2-1.jpg'>", unsafe_allow_html=True)

labels_articles = {1: 'Computer Science', 2: 'Economics', 3: "Electrical Engineering And Systems Science", 
                   4: "Mathematics", 5: "Physics", 6: "Quantitative Biology", 7: "Quantitative Finance", 
                   8: "Statistics"
                  }
                  
@st.cache(allow_output_mutation=True)
def models():
    class Net(nn.Module):
        def __init__(self):
            super(Net,self).__init__()
            self.layer = nn.Sequential(
                nn.Linear(768, 256),
                nn.ReLU(),
                nn.Linear(256, 128),
                nn.ReLU(),
                nn.Linear(128, 8),
              )
    
        def forward(self,x):
            return self.layer(x)
        
    model_second = Net()
    model_second.load_state_dict(torch.load('model.txt'))
    model_second.eval()
    
    model_name = 'bert-base-uncased'
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model_first = AutoModel.from_pretrained(model_name)
    return (model_first, model_second, tokenizer)

model_first, model_second, tokenizer = models()

title = st.text_area("Write the title of your article, please")
abstract = st.text_area("Write the abstract")
text = title + '. ' + abstract

tokens_info = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
out_first = model_first(**tokens_info).pooler_output
out_second = model_second(out_first).detach().numpy()
out_second = softmax(out_second)
indices = np.argsort(out_second)[0][::-1]
sum_prob = 0
for i in indices:
  st.write(labels_articles[i+1])
  sum_prob += out_second[0][i]
  if sum_prob >= 0.95:
    break