akivajoshBERT / README.md
jlw1491625's picture
Update README.md
13c7ff6 verified

Evaluation Pipeline:

  • Just change new_data to be your inputs and new_labels to be your outputs
  • The evaluation pipeline will print out the accuracy for that input data as well as the output for each individual input
################################ INPUT NEW DATA ################################
new_data = ["Breaking news headline 1", "Another headline about politics"]
new_labels = [1, 0]
################################ INPUT NEW DATA ################################


import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel
from huggingface_hub import PyTorchModelHubMixin

# Custom PyTorch Model
class CustomBERTModel(nn.Module, PyTorchModelHubMixin):
    def __init__(self, pretrained_model_name, num_labels, dropout_rate=0.1):
        super(CustomBERTModel, self).__init__()
        self.bert = BertModel.from_pretrained(pretrained_model_name)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(self.bert.config.hidden_size, 128)
        self.fc2 = nn.Linear(128, num_labels)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        x = self.dropout(pooled_output)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        logits = self.fc2(x)
        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits, labels)
        return {"loss": loss, "logits": logits} if loss is not None else {"logits": logits}


model = CustomBERTModel.from_pretrained("Akiva-Josh/akivajoshBERT")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
inputs = tokenizer(new_data, truncation=True, padding="max_length", max_length=40, return_tensors="pt")

model.eval()
with torch.no_grad():
    outputs = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
    logits = outputs["logits"]
predictions = torch.argmax(logits, dim=-1)
actual_labels = torch.tensor(new_labels)
correct = (predictions == actual_labels).sum().item()
accuracy = correct / len(actual_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")

label_map_reverse = {1: "NBC", 0: "FoxNews"}
predicted_labels = [label_map_reverse[p.item()] for p in predictions]
print(predicted_labels)