Jiahuita
commited on
Commit
•
be46298
1
Parent(s):
cd57bd2
Add custom model architecture and configuration
Browse files- _init_.py +4 -0
- config.json +2 -2
- modeling.py +21 -0
- pipeline.py +0 -35
_init_.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .modeling import TFNewsClassifier
|
2 |
+
from transformers import AutoConfig
|
3 |
+
|
4 |
+
AutoConfig.register("news_classifier", TFNewsClassifier)
|
config.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87fdbfd9402e0152bf788f338996d8fce42dbafc705bb7c8fc7fb8f84d6817e8
|
3 |
+
size 307
|
modeling.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
from transformers import PreTrainedModel, TFPreTrainedModel
|
3 |
+
from transformers.modeling_tf_utils import TFSequenceClassificationLoss
|
4 |
+
|
5 |
+
class TFNewsClassifier(TFPreTrainedModel, TFSequenceClassificationLoss):
|
6 |
+
def __init__(self, config):
|
7 |
+
super().__init__(config)
|
8 |
+
self.num_labels = config.num_labels
|
9 |
+
|
10 |
+
# Define your model architecture
|
11 |
+
self.lstm = tf.keras.layers.LSTM(128, return_sequences=True)
|
12 |
+
self.lstm2 = tf.keras.layers.LSTM(64)
|
13 |
+
self.dropout = tf.keras.layers.Dropout(0.5)
|
14 |
+
self.classifier = tf.keras.layers.Dense(self.num_labels, activation='softmax')
|
15 |
+
|
16 |
+
def call(self, inputs, training=False):
|
17 |
+
x = self.lstm(inputs)
|
18 |
+
x = self.dropout(x, training=training)
|
19 |
+
x = self.lstm2(x)
|
20 |
+
x = self.dropout(x, training=training)
|
21 |
+
return self.classifier(x)
|
pipeline.py
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
from transformers import Pipeline
|
2 |
-
import tensorflow as tf
|
3 |
-
from tensorflow.keras.models import load_model
|
4 |
-
from tensorflow.keras.preprocessing.text import tokenizer_from_json
|
5 |
-
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
6 |
-
import json
|
7 |
-
import numpy as np
|
8 |
-
|
9 |
-
class NewsClassifierPipeline(Pipeline):
|
10 |
-
def __init__(self):
|
11 |
-
super().__init__()
|
12 |
-
self.model = load_model('news_classifier.h5')
|
13 |
-
with open('tokenizer.json') as f:
|
14 |
-
tokenizer_data = json.load(f)
|
15 |
-
self.tokenizer = tokenizer_from_json(tokenizer_data)
|
16 |
-
|
17 |
-
def preprocess(self, text):
|
18 |
-
sequence = self.tokenizer.texts_to_sequences([text])
|
19 |
-
padded = pad_sequences(sequence)
|
20 |
-
return padded
|
21 |
-
|
22 |
-
def _forward(self, texts):
|
23 |
-
processed = self.preprocess(texts)
|
24 |
-
predictions = self.model.predict(processed)
|
25 |
-
scores = tf.nn.softmax(predictions, axis=1)
|
26 |
-
|
27 |
-
predicted_class = np.argmax(predictions)
|
28 |
-
score = float(np.max(scores))
|
29 |
-
|
30 |
-
label = 'foxnews' if predicted_class == 0 else 'nbc'
|
31 |
-
|
32 |
-
return [{'label': label, 'score': score}]
|
33 |
-
|
34 |
-
def postprocess(self, model_outputs):
|
35 |
-
return model_outputs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|