akivajoshBERT / README.md
jlw1491625's picture
Update README.md
13c7ff6 verified
Evaluation Pipeline:
- Just change new_data to be your inputs and new_labels to be your outputs
- The evaluation pipeline will print out the accuracy for that input data as well as the output for each individual input
```
################################ INPUT NEW DATA ################################
new_data = ["Breaking news headline 1", "Another headline about politics"]
new_labels = [1, 0]
################################ INPUT NEW DATA ################################
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel
from huggingface_hub import PyTorchModelHubMixin
# Custom PyTorch Model
class CustomBERTModel(nn.Module, PyTorchModelHubMixin):
def __init__(self, pretrained_model_name, num_labels, dropout_rate=0.1):
super(CustomBERTModel, self).__init__()
self.bert = BertModel.from_pretrained(pretrained_model_name)
self.dropout = nn.Dropout(dropout_rate)
self.fc1 = nn.Linear(self.bert.config.hidden_size, 128)
self.fc2 = nn.Linear(128, num_labels)
def forward(self, input_ids, attention_mask, labels=None):
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
pooled_output = outputs.pooler_output
x = self.dropout(pooled_output)
x = torch.relu(self.fc1(x))
x = self.dropout(x)
logits = self.fc2(x)
loss = None
if labels is not None:
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(logits, labels)
return {"loss": loss, "logits": logits} if loss is not None else {"logits": logits}
model = CustomBERTModel.from_pretrained("Akiva-Josh/akivajoshBERT")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
inputs = tokenizer(new_data, truncation=True, padding="max_length", max_length=40, return_tensors="pt")
model.eval()
with torch.no_grad():
outputs = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
logits = outputs["logits"]
predictions = torch.argmax(logits, dim=-1)
actual_labels = torch.tensor(new_labels)
correct = (predictions == actual_labels).sum().item()
accuracy = correct / len(actual_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")
label_map_reverse = {1: "NBC", 0: "FoxNews"}
predicted_labels = [label_map_reverse[p.item()] for p in predictions]
print(predicted_labels)
```