model structure:
Input Text
|
Custom Tokenizer (Text -> Tokens)
|
Padding (Equal Length Sequences)
|
Embedding Layer (Tokens -> Dense Vectors)
|
LSTM Layer (Capture Sequential Patterns)
|
Attention Mechanism (Focus on Important Tokens)
|
Dense Layer (Fully Connected)
|
Softmax Activation (Output: Positive or Negative Sentiment
it archieved 95.71% accuracy on the dataset provided
to test it use smth like:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb
MAX_VOCAB_SIZE = 10000
MAX_SEQUENCE_LENGTH = 200
model = tf.keras.models.load_model('RETO-SENTIMENT.h5', custom_objects={'AttentionLayer': AttentionLayer})
class AttentionLayer(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(AttentionLayer, self).__init__(**kwargs)
def call(self, inputs):
attention_weights = tf.nn.softmax(tf.reduce_sum(inputs, axis=-1), axis=-1)
attention_weights = tf.expand_dims(attention_weights, -1)
weighted_sum = inputs * attention_weights
return tf.reduce_sum(weighted_sum, axis=1)
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}
def preprocess_text(input_text):
tokens = [word_index.get(word.lower(), 2) for word in input_text.split()] # 2 is unknown
padded_sequence = pad_sequences([tokens], maxlen=MAX_SEQUENCE_LENGTH)
return padded_sequence
def predict_sentiment(input_text):
preprocessed_text = preprocess_text(input_text)
prediction = model.predict(preprocessed_text)
sentiment = "Positive" if prediction[0][0] > 0.5 else "Negative"
confidence = prediction[0][0] if sentiment == "Positive" else 1 - prediction[0][0]
return sentiment, confidence
if __name__ == "__main__":
test_sentences = [
#add sentences here
]
for sentence in test_sentences:
sentiment, confidence = predict_sentiment(sentence)
print(f"Input: {sentence}")
print(f"Predicted Sentiment: {sentiment} (Confidence: {confidence:.2f})\n")