Spaces:

svasthaintelligence
/

MeSHClassify

Runtime error

App Files Files Community

MeSHClassify / app.py

akapoor

Upload 3 files

0623078 over 2 years ago

raw

history blame contribute delete

3.92 kB

	import pandas as pd
	import numpy as np
	import torch.nn.functional as F
	import torch
	import os
	import torch.nn as nn
	from torch.utils.data import Dataset, DataLoader
	from transformers import BertTokenizerFast as BertTokenizer, AutoModelForSequenceClassification, AutoTokenizer,AutoModel,BertModel, AdamW, get_linear_schedule_with_warmup
	import pytorch_lightning as pl
	from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
	from pytorch_lightning.loggers import TensorBoardLogger
	import streamlit as st
	import torchmetrics
	pwd = os.path.dirname(__file__)
	MODEL_PATH = os.path.join(pwd,"data.pt")
	print(MODEL_PATH)

	BERT_MODEL_NAME = 'albert-base-v1'
	tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME)

	class MeshNetwork(pl.LightningModule):
	def __init__(self):
	super().__init__()
	self.bert = AutoModelForSequenceClassification.from_pretrained(BERT_MODEL_NAME, num_labels=13,return_dict=True)
	self.criterion = F.cross_entropy

	def forward(self, input_ids, attention_mask):
	output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
	return output.logits
	def training_step(self, batch, batch_idx):
	input_ids = batch["input_ids"]
	attention_mask = batch["attention_mask"]
	y = batch['labels']
	y_hat = self.forward(input_ids, attention_mask)
	loss = self.criterion(y_hat, y)
	# Calculate acc
	predictions = F.softmax(y_hat, dim=1).argmax(dim=1)
	acc = torchmetrics.functional.accuracy(predictions, y)
	self.log("train_acc", acc, on_step=False,prog_bar=True, on_epoch=True, logger=True)
	self.log("train_loss", loss, prog_bar=True, on_epoch=True, logger=True)
	return {"loss": loss, "predictions": y_hat, "labels": y}

	def validation_step(self, batch, batch_idx):
	input_ids = batch["input_ids"]
	attention_mask = batch["attention_mask"]
	y = batch["labels"]
	y_hat = self.forward(input_ids, attention_mask)
	loss = self.criterion(y_hat, y)
	predictions = F.softmax(y_hat, dim=1).argmax(dim=1)
	acc = torchmetrics.functional.accuracy(predictions, y)
	self.log("val_acc", acc, prog_bar=True, on_step = False,on_epoch=True, logger=True)
	self.log("val_loss", loss, prog_bar=True, on_epoch = True, logger=True)

	def test_step(self, batch, batch_idx):
	input_ids = batch["input_ids"]
	attention_mask = batch["attention_mask"]
	y = batch["labels"]
	y_hat = self.forward(input_ids, attention_mask)
	loss = self.criterion(y_hat, y)
	predictions = F.softmax(y_hat, dim=1).argmax(dim=1)
	acc = torchmetrics.functional.accuracy(predictions, y)
	self.log("test_acc", acc, prog_bar=True, on_step=False,on_epoch=True, logger=True)
	self.log("test_loss", loss, prog_bar=True, on_epoch = True, logger=True)

	def configure_optimizers(self):
	optimizer = torch.optim.Adam(params = self.parameters())
	return optimizer



	st.title("MeSH Classify")
	model = MeshNetwork()
	with st.spinner("Loading model..."):
	model.load_state_dict(torch.load(MODEL_PATH))
	model.eval()
	print(model)

	st.success("Model loaded.")
	user_input = st.text_input("Enter text to be classified.")
	st.write("Check MeSH categories: [link](https://www.ncbi.nlm.nih.gov/mesh/1000048)")
	st.markdown("***")


	if st.button("Classify Text"):
	if user_input:
	encoding = tokenizer.encode_plus(
	user_input,
	add_special_tokens=True,
	return_token_type_ids=False,
	padding="max_length",
	truncation=True,
	return_attention_mask=True,
	return_tensors='pt',
	)
	input_ids=encoding["input_ids"].flatten()
	attention_mask=encoding["attention_mask"].flatten()


	y_hat = model(input_ids=input_ids.reshape(-1, 512),attention_mask = attention_mask.reshape(-1, 512))
	prob = F.softmax(y_hat, dim=1)
	probs = prob.detach().numpy()
	st.table(probs)
	predictions = prob.argmax(dim=1)
	st.write(predictions.detach().numpy())