|
import streamlit as st |
|
import torch |
|
from transformers import AutoTokenizer, AutoModel, pipeline |
|
from torch import nn |
|
|
|
st.markdown("### Articles classificator.") |
|
|
|
|
|
@st.cache(allow_output_mutation=True) |
|
def get_bert_and_tokenizer(): |
|
model_name = 'bert-base-uncased' |
|
|
|
return AutoTokenizer.from_pretrained(model_name) |
|
|
|
tokenizer = get_bert_and_tokenizer() |
|
|
|
class devops_model(nn.Module): |
|
def __init__(self): |
|
super(devops_model, self).__init__() |
|
self.bert = None |
|
self.fc = nn.Sequential( |
|
nn.Linear(768, 768), |
|
nn.ReLU(), |
|
nn.Dropout(0.3), |
|
nn.BatchNorm1d(768), |
|
nn.Linear(768, 5), |
|
nn.LogSoftmax(dim=-1) |
|
) |
|
|
|
def forward(self, train_batch): |
|
emb = self.bert(**train_batch)['pooler_output'] |
|
return self.fc(emb) |
|
|
|
@st.cache |
|
def LoadModel(): |
|
return torch.load('model.pt', map_location=torch.device('cpu')) |
|
|
|
model = LoadModel() |
|
|
|
def process(title, summary): |
|
text = title + summary |
|
model.eval() |
|
lines = [text] |
|
X = tokenizer(lines, padding=True, truncation=True, return_tensors="pt") |
|
out = model(X) |
|
probs = torch.exp(out[0]) |
|
return probs |
|
|
|
title = st.text_area("Title", height=60) |
|
|
|
summary = st.text_area("Summary", height=300) |
|
|
|
st.markdown(f"{process(title, summary)}") |