Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| import torch.nn.functional as F | |
| import torch | |
| import os | |
| import torch.nn as nn | |
| from torch.utils.data import Dataset, DataLoader | |
| from transformers import BertTokenizerFast as BertTokenizer, AutoModelForSequenceClassification, AutoTokenizer,AutoModel,BertModel, AdamW, get_linear_schedule_with_warmup | |
| import pytorch_lightning as pl | |
| from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping | |
| from pytorch_lightning.loggers import TensorBoardLogger | |
| import streamlit as st | |
| import torchmetrics | |
| pwd = os.path.dirname(__file__) | |
| MODEL_PATH = os.path.join(pwd,"data.pt") | |
| print(MODEL_PATH) | |
| BERT_MODEL_NAME = 'albert-base-v1' | |
| tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME) | |
| class MeshNetwork(pl.LightningModule): | |
| def __init__(self): | |
| super().__init__() | |
| self.bert = AutoModelForSequenceClassification.from_pretrained(BERT_MODEL_NAME, num_labels=13,return_dict=True) | |
| self.criterion = F.cross_entropy | |
| def forward(self, input_ids, attention_mask): | |
| output = self.bert(input_ids=input_ids, attention_mask=attention_mask) | |
| return output.logits | |
| def training_step(self, batch, batch_idx): | |
| input_ids = batch["input_ids"] | |
| attention_mask = batch["attention_mask"] | |
| y = batch['labels'] | |
| y_hat = self.forward(input_ids, attention_mask) | |
| loss = self.criterion(y_hat, y) | |
| # Calculate acc | |
| predictions = F.softmax(y_hat, dim=1).argmax(dim=1) | |
| acc = torchmetrics.functional.accuracy(predictions, y) | |
| self.log("train_acc", acc, on_step=False,prog_bar=True, on_epoch=True, logger=True) | |
| self.log("train_loss", loss, prog_bar=True, on_epoch=True, logger=True) | |
| return {"loss": loss, "predictions": y_hat, "labels": y} | |
| def validation_step(self, batch, batch_idx): | |
| input_ids = batch["input_ids"] | |
| attention_mask = batch["attention_mask"] | |
| y = batch["labels"] | |
| y_hat = self.forward(input_ids, attention_mask) | |
| loss = self.criterion(y_hat, y) | |
| predictions = F.softmax(y_hat, dim=1).argmax(dim=1) | |
| acc = torchmetrics.functional.accuracy(predictions, y) | |
| self.log("val_acc", acc, prog_bar=True, on_step = False,on_epoch=True, logger=True) | |
| self.log("val_loss", loss, prog_bar=True, on_epoch = True, logger=True) | |
| def test_step(self, batch, batch_idx): | |
| input_ids = batch["input_ids"] | |
| attention_mask = batch["attention_mask"] | |
| y = batch["labels"] | |
| y_hat = self.forward(input_ids, attention_mask) | |
| loss = self.criterion(y_hat, y) | |
| predictions = F.softmax(y_hat, dim=1).argmax(dim=1) | |
| acc = torchmetrics.functional.accuracy(predictions, y) | |
| self.log("test_acc", acc, prog_bar=True, on_step=False,on_epoch=True, logger=True) | |
| self.log("test_loss", loss, prog_bar=True, on_epoch = True, logger=True) | |
| def configure_optimizers(self): | |
| optimizer = torch.optim.Adam(params = self.parameters()) | |
| return optimizer | |
| st.title("MeSH Classify") | |
| model = MeshNetwork() | |
| with st.spinner("Loading model..."): | |
| model.load_state_dict(torch.load(MODEL_PATH)) | |
| model.eval() | |
| print(model) | |
| st.success("Model loaded.") | |
| user_input = st.text_input("Enter text to be classified.") | |
| st.write("Check MeSH categories: [link](https://www.ncbi.nlm.nih.gov/mesh/1000048)") | |
| st.markdown("***") | |
| if st.button("Classify Text"): | |
| if user_input: | |
| encoding = tokenizer.encode_plus( | |
| user_input, | |
| add_special_tokens=True, | |
| return_token_type_ids=False, | |
| padding="max_length", | |
| truncation=True, | |
| return_attention_mask=True, | |
| return_tensors='pt', | |
| ) | |
| input_ids=encoding["input_ids"].flatten() | |
| attention_mask=encoding["attention_mask"].flatten() | |
| y_hat = model(input_ids=input_ids.reshape(-1, 512),attention_mask = attention_mask.reshape(-1, 512)) | |
| prob = F.softmax(y_hat, dim=1) | |
| probs = prob.detach().numpy() | |
| st.table(probs) | |
| predictions = prob.argmax(dim=1) | |
| st.write(predictions.detach().numpy()) | |