import streamlit as st import torch import torch.nn as nn from transformers import BertTokenizer,BertModel, get_linear_schedule_with_warmup, AdamW import pytorch_lightning as pl BERT_MODEL_NAME = 'bert-base-cased' LABEL_COLUMNS = ['anger','joy','fear','surprise','sadness', 'neutral'] class EmotionTagger(pl.LightningModule): def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None): super().__init__() self.bert = BertModel.from_pretrained(BERT_MODEL_NAME, return_dict=True) self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes) self.n_training_steps = n_training_steps self.n_warmup_steps = n_warmup_steps self.criterion = nn.BCELoss() def forward(self, input_ids, attention_mask, labels=None): output = self.bert(input_ids, attention_mask=attention_mask) output = self.classifier(output.pooler_output) output = torch.sigmoid(output) loss = 0 if labels is not None: loss = self.criterion(output, labels) return loss, output def training_step(self, batch, batch_idx): input_ids = batch["input_ids"] attention_mask = batch["attention_mask"] labels = batch["labels"] loss, outputs = self(input_ids, attention_mask, labels) self.log("train_loss", loss, prog_bar=True, logger=True) return {"loss": loss, "predictions": outputs, "labels": labels} def validation_step(self, batch, batch_idx): input_ids = batch["input_ids"] attention_mask = batch["attention_mask"] labels = batch["labels"] loss, outputs = self(input_ids, attention_mask, labels) self.log("val_loss", loss, prog_bar=True, logger=True) return loss def test_step(self, batch, batch_idx): input_ids = batch["input_ids"] attention_mask = batch["attention_mask"] labels = batch["labels"] loss, outputs = self(input_ids, attention_mask, labels) self.log("test_loss", loss, prog_bar=True, logger=True) return loss for i, name in enumerate(LABEL_COLUMNS): class_roc_auc = pytorch_lightning.metrics.functional.auroc(predictions[:, i], labels[:, i]) self.logger.experiment.add_scalar(f"{name}_roc_auc/Train", class_roc_auc, self.current_epoch) def configure_optimizers(self): optimizer = AdamW(self.parameters(), lr=2e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=self.n_warmup_steps, num_training_steps=self.n_training_steps ) return dict( optimizer=optimizer, lr_scheduler=dict( scheduler=scheduler, interval='step' ) ) tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME) model = torch.load("./model.pt") st.title("Analisis de Sentimientos") txt = st.text_area(label="Please write what you want to analyze...") def run_sentiment_analysis (txt) : THRESHOLD = 0.5 encoding = tokenizer.encode_plus( txt, add_special_tokens=True, max_length=512, return_token_type_ids=False, padding="max_length", return_attention_mask=True, return_tensors='pt', ) _, test_prediction = model(encoding["input_ids"], encoding["attention_mask"]) test_prediction = test_prediction.flatten().numpy() predictions = [] print('-------------------- Predictions ---------------------') for label, prediction in zip(LABEL_COLUMNS, test_prediction): if prediction < THRESHOLD: continue predictions.append(" ".join([label,str(prediction)])) return predictions print(txt) if(txt): predictions = run_sentiment_analysis(txt) for prediction in predictions: st.write(prediction)