|
from transformers import Pipeline |
|
import tensorflow as tf |
|
from tensorflow.keras.models import load_model |
|
from tensorflow.keras.preprocessing.text import tokenizer_from_json |
|
from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
import json |
|
import numpy as np |
|
|
|
class NewsClassifierPipeline(Pipeline): |
|
def __init__(self): |
|
super().__init__() |
|
self.model = load_model('news_classifier.h5') |
|
with open('tokenizer.json') as f: |
|
tokenizer_data = json.load(f) |
|
self.tokenizer = tokenizer_from_json(tokenizer_data) |
|
|
|
def preprocess(self, text): |
|
sequence = self.tokenizer.texts_to_sequences([text]) |
|
padded = pad_sequences(sequence) |
|
return padded |
|
|
|
def _forward(self, texts): |
|
processed = self.preprocess(texts) |
|
predictions = self.model.predict(processed) |
|
scores = tf.nn.softmax(predictions, axis=1) |
|
|
|
predicted_class = np.argmax(predictions) |
|
score = float(np.max(scores)) |
|
|
|
label = 'foxnews' if predicted_class == 0 else 'nbc' |
|
|
|
return [{'label': label, 'score': score}] |
|
|
|
def postprocess(self, model_outputs): |
|
return model_outputs |