Spaces:
Sleeping
Sleeping
Improving the UI
Browse files
DDQN.py
CHANGED
@@ -27,8 +27,8 @@ class DoubleDeepQNetwork:
|
|
27 |
|
28 |
def build_model(self):
|
29 |
model = keras.Sequential() # linear stack of layers https://keras.io/models/sequential/
|
30 |
-
model.add(keras.layers.Dense(
|
31 |
-
model.add(keras.layers.Dense(
|
32 |
model.add(keras.layers.Dense(self.nA, activation='linear')) # Layer 3 -> [output]
|
33 |
|
34 |
model.compile(loss='mean_squared_error', # Loss function: Mean Squared Error
|
|
|
27 |
|
28 |
def build_model(self):
|
29 |
model = keras.Sequential() # linear stack of layers https://keras.io/models/sequential/
|
30 |
+
model.add(keras.layers.Dense(256, input_dim=self.nS, activation='relu')) # [Input] -> Layer 1
|
31 |
+
model.add(keras.layers.Dense(256, activation='relu')) # Layer 2 -> 3
|
32 |
model.add(keras.layers.Dense(self.nA, activation='linear')) # Layer 3 -> [output]
|
33 |
|
34 |
model.compile(loss='mean_squared_error', # Loss function: Mean Squared Error
|
app.py
CHANGED
@@ -45,19 +45,17 @@ def main():
|
|
45 |
|
46 |
|
47 |
def perform_training(jammer_type, channel_switching_cost):
|
48 |
-
st.sidebar.write("
|
49 |
st.sidebar.write('Training Starting')
|
50 |
train(jammer_type, channel_switching_cost)
|
51 |
-
st.sidebar.write("Training completed")
|
52 |
-
st.sidebar.write("==================================================")
|
53 |
|
54 |
|
55 |
def perform_testing(jammer_type, channel_switching_cost):
|
56 |
-
st.sidebar.write("
|
57 |
st.sidebar.write('Testing Starting')
|
58 |
test(jammer_type, channel_switching_cost)
|
59 |
st.sidebar.write("Testing completed")
|
60 |
-
st.sidebar.write("
|
61 |
|
62 |
|
63 |
if __name__ == "__main__":
|
|
|
45 |
|
46 |
|
47 |
def perform_training(jammer_type, channel_switching_cost):
|
48 |
+
st.sidebar.write("======================")
|
49 |
st.sidebar.write('Training Starting')
|
50 |
train(jammer_type, channel_switching_cost)
|
|
|
|
|
51 |
|
52 |
|
53 |
def perform_testing(jammer_type, channel_switching_cost):
|
54 |
+
st.sidebar.write("======================")
|
55 |
st.sidebar.write('Testing Starting')
|
56 |
test(jammer_type, channel_switching_cost)
|
57 |
st.sidebar.write("Testing completed")
|
58 |
+
st.sidebar.write("======================")
|
59 |
|
60 |
|
61 |
if __name__ == "__main__":
|
tester.py
CHANGED
@@ -26,7 +26,7 @@ def test(jammer_type, channel_switching_cost):
|
|
26 |
discount_rate = 0.95
|
27 |
lr = 0.001
|
28 |
|
29 |
-
agentName = f'DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
|
30 |
DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
|
31 |
DDQN_agent.model = DDQN_agent.load_saved_model(agentName)
|
32 |
rewards = [] # Store rewards for graphing
|
@@ -70,12 +70,12 @@ def test(jammer_type, channel_switching_cost):
|
|
70 |
st.pyplot(fig)
|
71 |
|
72 |
# Save the figure
|
73 |
-
plot_name = f'test_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
|
74 |
plt.savefig(plot_name, bbox_inches='tight')
|
75 |
plt.close(fig) # Close the figure to release resources
|
76 |
|
77 |
# Save Results
|
78 |
# Rewards
|
79 |
-
fileName = f'test_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
|
80 |
with open(fileName, 'w') as f:
|
81 |
json.dump(rewards, f)
|
|
|
26 |
discount_rate = 0.95
|
27 |
lr = 0.001
|
28 |
|
29 |
+
agentName = f'./data/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
|
30 |
DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
|
31 |
DDQN_agent.model = DDQN_agent.load_saved_model(agentName)
|
32 |
rewards = [] # Store rewards for graphing
|
|
|
70 |
st.pyplot(fig)
|
71 |
|
72 |
# Save the figure
|
73 |
+
plot_name = f'./data/test_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
|
74 |
plt.savefig(plot_name, bbox_inches='tight')
|
75 |
plt.close(fig) # Close the figure to release resources
|
76 |
|
77 |
# Save Results
|
78 |
# Rewards
|
79 |
+
fileName = f'./data/test_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
|
80 |
with open(fileName, 'w') as f:
|
81 |
json.dump(rewards, f)
|
trainer.py
CHANGED
@@ -10,19 +10,21 @@ from antiJamEnv import AntiJamEnv
|
|
10 |
|
11 |
|
12 |
def train(jammer_type, channel_switching_cost):
|
|
|
|
|
|
|
|
|
13 |
env = AntiJamEnv(jammer_type, channel_switching_cost)
|
14 |
ob_space = env.observation_space
|
15 |
ac_space = env.action_space
|
16 |
-
st.write(f"Observation space: , {ob_space}")
|
17 |
-
st.write(f"Action space: {ac_space}")
|
18 |
|
19 |
s_size = ob_space.shape[0]
|
20 |
a_size = ac_space.n
|
21 |
max_env_steps = 100
|
22 |
-
TRAIN_Episodes =
|
23 |
env._max_episode_steps = max_env_steps
|
24 |
|
25 |
-
epsilon = 1.0
|
26 |
epsilon_min = 0.01
|
27 |
epsilon_decay = 0.999
|
28 |
discount_rate = 0.95
|
@@ -30,72 +32,69 @@ def train(jammer_type, channel_switching_cost):
|
|
30 |
batch_size = 32
|
31 |
|
32 |
DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
|
33 |
-
rewards = []
|
34 |
-
epsilons = []
|
35 |
|
36 |
-
# Training agent
|
37 |
for e in range(TRAIN_Episodes):
|
38 |
state = env.reset()
|
39 |
-
|
40 |
-
state = np.reshape(state, [1, s_size]) # Resize to store in memory to pass to .predict
|
41 |
tot_rewards = 0
|
42 |
-
for time in range(max_env_steps):
|
43 |
action = DDQN_agent.action(state)
|
44 |
next_state, reward, done, _ = env.step(action)
|
45 |
-
# print(f'The next state is: {next_state}')
|
46 |
-
# done: Three collisions occurred in the last 10 steps.
|
47 |
-
# time == max_env_steps - 1 : No collisions occurred
|
48 |
-
if done or time == max_env_steps - 1:
|
49 |
-
rewards.append(tot_rewards)
|
50 |
-
epsilons.append(DDQN_agent.epsilon)
|
51 |
-
st.write(f"episode: {e}/{TRAIN_Episodes}, score: {tot_rewards}, e: {DDQN_agent.epsilon}")
|
52 |
-
break
|
53 |
-
# Applying channel switching cost
|
54 |
next_state = np.reshape(next_state, [1, s_size])
|
55 |
tot_rewards += reward
|
56 |
-
DDQN_agent.store(state, action, reward, next_state, done)
|
57 |
state = next_state
|
58 |
|
59 |
-
# Experience Replay
|
60 |
if len(DDQN_agent.memory) > batch_size:
|
61 |
DDQN_agent.experience_replay(batch_size)
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
DDQN_agent.update_target_from_model()
|
64 |
-
|
65 |
-
# Early stopping criteria: I am going to use the last 10 runs within 1% of the max
|
66 |
if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
|
67 |
break
|
68 |
|
|
|
|
|
69 |
# Plotting
|
70 |
rolling_average = np.convolve(rewards, np.ones(10) / 10, mode='valid')
|
71 |
|
72 |
-
# Create a new Streamlit figure
|
73 |
-
fig = plt.
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
eps_graph = [100 * x for x in epsilons]
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
|
84 |
# Display the Streamlit figure using streamlit.pyplot
|
85 |
st.set_option('deprecation.showPyplotGlobalUse', False)
|
86 |
-
st.
|
|
|
87 |
|
88 |
# Save the figure
|
89 |
-
plot_name = f'train_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
|
90 |
plt.savefig(plot_name, bbox_inches='tight')
|
91 |
plt.close(fig) # Close the figure to release resources
|
92 |
|
93 |
# Save Results
|
94 |
# Rewards
|
95 |
-
fileName = f'train_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
|
96 |
with open(fileName, 'w') as f:
|
97 |
json.dump(rewards, f)
|
98 |
|
99 |
# Save the agent as a SavedAgent.
|
100 |
-
agentName = f'DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
|
101 |
DDQN_agent.save_model(agentName)
|
|
|
10 |
|
11 |
|
12 |
def train(jammer_type, channel_switching_cost):
|
13 |
+
st.sidebar.subheader("Training Progress")
|
14 |
+
progress_bar = st.sidebar.progress(0)
|
15 |
+
status_text = st.sidebar.empty()
|
16 |
+
|
17 |
env = AntiJamEnv(jammer_type, channel_switching_cost)
|
18 |
ob_space = env.observation_space
|
19 |
ac_space = env.action_space
|
|
|
|
|
20 |
|
21 |
s_size = ob_space.shape[0]
|
22 |
a_size = ac_space.n
|
23 |
max_env_steps = 100
|
24 |
+
TRAIN_Episodes = 20
|
25 |
env._max_episode_steps = max_env_steps
|
26 |
|
27 |
+
epsilon = 1.0
|
28 |
epsilon_min = 0.01
|
29 |
epsilon_decay = 0.999
|
30 |
discount_rate = 0.95
|
|
|
32 |
batch_size = 32
|
33 |
|
34 |
DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
|
35 |
+
rewards = []
|
36 |
+
epsilons = []
|
37 |
|
|
|
38 |
for e in range(TRAIN_Episodes):
|
39 |
state = env.reset()
|
40 |
+
state = np.reshape(state, [1, s_size])
|
|
|
41 |
tot_rewards = 0
|
42 |
+
for time in range(max_env_steps):
|
43 |
action = DDQN_agent.action(state)
|
44 |
next_state, reward, done, _ = env.step(action)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
next_state = np.reshape(next_state, [1, s_size])
|
46 |
tot_rewards += reward
|
47 |
+
DDQN_agent.store(state, action, reward, next_state, done)
|
48 |
state = next_state
|
49 |
|
|
|
50 |
if len(DDQN_agent.memory) > batch_size:
|
51 |
DDQN_agent.experience_replay(batch_size)
|
52 |
+
|
53 |
+
if done or time == max_env_steps - 1:
|
54 |
+
rewards.append(tot_rewards)
|
55 |
+
epsilons.append(DDQN_agent.epsilon)
|
56 |
+
status_text.text(f"Episode: {e+1}/{TRAIN_Episodes}, Reward: {tot_rewards}, Epsilon: {DDQN_agent.epsilon:.3f}")
|
57 |
+
progress_bar.progress((e + 1) / TRAIN_Episodes)
|
58 |
+
break
|
59 |
+
|
60 |
DDQN_agent.update_target_from_model()
|
61 |
+
|
|
|
62 |
if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
|
63 |
break
|
64 |
|
65 |
+
st.sidebar.success("Training completed!")
|
66 |
+
|
67 |
# Plotting
|
68 |
rolling_average = np.convolve(rewards, np.ones(10) / 10, mode='valid')
|
69 |
|
70 |
+
# Create a new Streamlit figure for the training graph
|
71 |
+
fig, ax = plt.subplots(figsize=(8, 6))
|
72 |
+
ax.plot(rewards, label='Rewards')
|
73 |
+
ax.plot(rolling_average, color='black', label='Rolling Average')
|
74 |
+
ax.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-', label='Solved Line')
|
75 |
eps_graph = [100 * x for x in epsilons]
|
76 |
+
ax.plot(eps_graph, color='g', linestyle='-', label='Epsilons')
|
77 |
+
ax.set_xlabel('Episodes')
|
78 |
+
ax.set_ylabel('Rewards')
|
79 |
+
ax.set_title(f'Training Rewards - {jammer_type}, CSC: {channel_switching_cost}')
|
80 |
+
ax.legend()
|
81 |
|
82 |
# Display the Streamlit figure using streamlit.pyplot
|
83 |
st.set_option('deprecation.showPyplotGlobalUse', False)
|
84 |
+
st.sidebar.subheader("Training Graph")
|
85 |
+
st.sidebar.pyplot(fig)
|
86 |
|
87 |
# Save the figure
|
88 |
+
plot_name = f'./data/train_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
|
89 |
plt.savefig(plot_name, bbox_inches='tight')
|
90 |
plt.close(fig) # Close the figure to release resources
|
91 |
|
92 |
# Save Results
|
93 |
# Rewards
|
94 |
+
fileName = f'./data/train_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
|
95 |
with open(fileName, 'w') as f:
|
96 |
json.dump(rewards, f)
|
97 |
|
98 |
# Save the agent as a SavedAgent.
|
99 |
+
agentName = f'./data/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
|
100 |
DDQN_agent.save_model(agentName)
|