Jiahuita
/

NewsSourceClassification

Text Classification

news-classification

Model card Files Files and versions Community

Jiahuita commited on Nov 22

Commit

838a3ce

•

1 Parent(s): 5fda167

Add custom pipeline and fix configs

Files changed (2) hide show

pipeline.py +11 -6
requirements.txt +0 -1

pipeline.py CHANGED Viewed

@@ -8,21 +8,26 @@ import json
 class NewsClassifierPipeline(Pipeline):
     def __init__(self):
         super().__init__()
         self.model = load_model('./news_classifier.h5')
         with open('./tokenizer.json', 'r') as f:
             tokenizer_data = json.load(f)
             self.tokenizer = tokenizer_from_json(tokenizer_data)
     def preprocess(self, inputs):
         sequences = self.tokenizer.texts_to_sequences([inputs])
-        return pad_sequences(sequences, maxlen=128)
     def _forward(self, inputs):
         processed = self.preprocess(inputs)
         predictions = self.model.predict(processed)
-        label = "foxnews" if predictions[0][0] > 0.5 else "nbc"
-        score = predictions[0][0] if label == "foxnews" else 1 - predictions[0][0]
-        return [{"label": label, "score": float(score)}]
-    def postprocess(self, outputs):
-        return outputs

 class NewsClassifierPipeline(Pipeline):
     def __init__(self):
         super().__init__()
+        # Load model and tokenizer
         self.model = load_model('./news_classifier.h5')
         with open('./tokenizer.json', 'r') as f:
             tokenizer_data = json.load(f)
             self.tokenizer = tokenizer_from_json(tokenizer_data)
     def preprocess(self, inputs):
+        """Tokenizes and pads the input text."""
         sequences = self.tokenizer.texts_to_sequences([inputs])
+        padded = pad_sequences(sequences, maxlen=128)
+        return padded
     def _forward(self, inputs):
+        """Runs the model prediction."""
         processed = self.preprocess(inputs)
         predictions = self.model.predict(processed)
+        scores = predictions[0]
+        label = "foxnews" if scores[0] > 0.5 else "nbc"
+        return [{"label": label, "score": float(scores[0] if label == "foxnews" else 1 - scores[0])}]
+    def postprocess(self, model_outputs):
+        """Returns the processed output."""
+        return model_outputs

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 tensorflow>=2.10.0
 transformers>=4.30.0
-torch>=2.0.0
 numpy>=1.19.2
 scikit-learn>=0.24.2

 tensorflow>=2.10.0
 transformers>=4.30.0
 numpy>=1.19.2
 scikit-learn>=0.24.2