Reinforce learning
Browse files- neural_network.py +80 -0
neural_network.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
from tensorflow import keras
|
3 |
+
from tensorflow.keras import layers, optimizers
|
4 |
+
|
5 |
+
# Custom Layer for Boolformer, with added threshold parameter
|
6 |
+
class BoolformerLayer(layers.Layer):
|
7 |
+
def __init__(self, threshold=0.5, **kwargs):
|
8 |
+
super(BoolformerLayer, self).__init__(**kwargs)
|
9 |
+
self.threshold = threshold
|
10 |
+
|
11 |
+
def build(self, input_shape):
|
12 |
+
self.dense_layer = layers.Dense(input_shape[-1], activation='relu')
|
13 |
+
|
14 |
+
def call(self, inputs):
|
15 |
+
logic_and = tf.math.logical_and(inputs, inputs > self.threshold)
|
16 |
+
logic_transformed = self.dense_layer(logic_and)
|
17 |
+
return logic_transformed
|
18 |
+
|
19 |
+
# Updated positional encoding function with improved efficiency
|
20 |
+
def positional_encoding(seq_length, d_model):
|
21 |
+
position = tf.range(seq_length, dtype=tf.float32)[:, tf.newaxis]
|
22 |
+
div_term = tf.exp(tf.range(0, d_model, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / d_model))
|
23 |
+
pos_encoding = position * div_term
|
24 |
+
pos_encoding = tf.concat([tf.sin(pos_encoding[:, 0::2]), tf.cos(pos_encoding[:, 1::2])], axis=-1)
|
25 |
+
return pos_encoding[tf.newaxis, ...]
|
26 |
+
|
27 |
+
# Enhanced transformer encoder with parameter flexibility
|
28 |
+
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout_rate=0.1):
|
29 |
+
attention_output = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout_rate)(inputs, inputs)
|
30 |
+
attention_output = layers.Dropout(dropout_rate)(attention_output)
|
31 |
+
attention_output = layers.LayerNormalization(epsilon=1e-6)(inputs + attention_output)
|
32 |
+
|
33 |
+
ffn_output = layers.Dense(ff_dim, activation="relu")(attention_output)
|
34 |
+
ffn_output = layers.Dense(inputs.shape[-1])(ffn_output)
|
35 |
+
ffn_output = layers.Dropout(dropout_rate)(ffn_output)
|
36 |
+
return layers.LayerNormalization(epsilon=1e-6)(attention_output + ffn_output)
|
37 |
+
|
38 |
+
# Improved QLearningLayer with additional functionality
|
39 |
+
class QLearningLayer(layers.Layer):
|
40 |
+
def __init__(self, action_space_size, learning_rate=0.01, gamma=0.95, **kwargs):
|
41 |
+
super(QLearningLayer, self).__init__(**kwargs)
|
42 |
+
self.action_space_size = action_space_size
|
43 |
+
self.learning_rate = learning_rate
|
44 |
+
self.gamma = gamma
|
45 |
+
|
46 |
+
def build(self, input_shape):
|
47 |
+
self.q_table = tf.Variable(initial_value=tf.random.uniform([input_shape[-1], self.action_space_size], 0, 1), trainable=True)
|
48 |
+
|
49 |
+
def call(self, state, action=None, reward=None, next_state=None):
|
50 |
+
if action is not None and reward is not None and next_state is not None:
|
51 |
+
q_update = reward + self.gamma * tf.reduce_max(self.q_table[next_state])
|
52 |
+
self.q_table[state, action].assign((1 - self.learning_rate) * self.q_table[state, action] + self.learning_rate * q_update)
|
53 |
+
return tf.argmax(self.q_table[state], axis=1)
|
54 |
+
|
55 |
+
# Function to create and compile the neural network model
|
56 |
+
def create_neural_network_model(seq_length, d_model, action_space_size):
|
57 |
+
input_layer = keras.Input(shape=(seq_length, d_model))
|
58 |
+
|
59 |
+
pos_encoded = positional_encoding(seq_length, d_model) + input_layer
|
60 |
+
transformer_output = transformer_encoder(pos_encoded, head_size=32, num_heads=2, ff_dim=64)
|
61 |
+
|
62 |
+
x_bool = BoolformerLayer()(transformer_output)
|
63 |
+
rl_layer = QLearningLayer(action_space_size=action_space_size)(x_bool)
|
64 |
+
|
65 |
+
output_layer = layers.Dense(action_space_size, activation='softmax', name='Output')(rl_layer)
|
66 |
+
reward_layer = layers.Dense(1, name='Reward')(rl_layer)
|
67 |
+
|
68 |
+
model = keras.Model(inputs=input_layer, outputs=[output_layer, reward_layer])
|
69 |
+
opt = optimizers.Adam(learning_rate=0.001)
|
70 |
+
model.compile(optimizer=opt, loss={'Output': 'categorical_crossentropy', 'Reward': 'mean_squared_error'},
|
71 |
+
metrics={'Output': 'accuracy'})
|
72 |
+
|
73 |
+
return model
|
74 |
+
|
75 |
+
# Example of creating and compiling the model
|
76 |
+
seq_length = 128 # Example sequence length
|
77 |
+
d_model = 512 # Example dimension
|
78 |
+
action_space_size = 10 # Example action space size
|
79 |
+
|
80 |
+
model = create_neural_network_model(seq_length, d_model, action_space_size)
|