import streamlit as st import torch from transformers import AutoTokenizer, AutoModel, pipeline from torch import nn st.markdown("### Articles classificator.") @st.cache(allow_output_mutation=True) def get_tokenizer(): model_name = 'microsoft/deberta-v3-small' return AutoTokenizer.from_pretrained(model_name) tokenizer = get_tokenizer() class devops_model(nn.Module): def __init__(self): super(devops_model, self).__init__() self.berta = None self.fc = nn.Sequential( nn.Linear(768, 768), nn.ReLU(), nn.Dropout(0.3), nn.BatchNorm1d(768), nn.Linear(768, 5), nn.LogSoftmax(dim=-1) ) def forward(self, train_batch): emb = self.berta(**train_batch)['last_hidden_state'].mean(axis=1) return self.fc(emb) @st.cache def LoadModel(): return torch.load('model_full.pt', map_location=torch.device('cpu')) model = LoadModel() classes = ['Computer Science', 'Mathematics', 'Physics', 'Quantitative Biology', 'Statistics'] def process(title, summary): text = title + summary if not text.strip(): return '' model.eval() lines = [text] X = tokenizer(lines, padding=True, truncation=True, return_tensors="pt") out = model(X) probs = torch.exp(out[0]) sorted_indexes = torch.argsort(probs, descending=True) probs_sum = idx = 0 res = [] while probs_sum < 0.95: prob_idx = sorted_indexes[idx] prob = probs[prob_idx] res.append(f'{classes[prob_idx]}: {prob:.3f}') idx += 1 probs_sum += prob return res title = st.text_area("Title", height=30) summary = st.text_area("Summary", height=180) for string in process(title, summary): st.markdown(string)