asataura commited on
Commit
6b43fcc
1 Parent(s): c0bb1b2

Improving the UI

Browse files
Files changed (4) hide show
  1. DDQN.py +2 -2
  2. app.py +3 -5
  3. tester.py +3 -3
  4. trainer.py +37 -38
DDQN.py CHANGED
@@ -27,8 +27,8 @@ class DoubleDeepQNetwork:
27
 
28
  def build_model(self):
29
  model = keras.Sequential() # linear stack of layers https://keras.io/models/sequential/
30
- model.add(keras.layers.Dense(24, input_dim=self.nS, activation='relu')) # [Input] -> Layer 1
31
- model.add(keras.layers.Dense(24, activation='relu')) # Layer 2 -> 3
32
  model.add(keras.layers.Dense(self.nA, activation='linear')) # Layer 3 -> [output]
33
 
34
  model.compile(loss='mean_squared_error', # Loss function: Mean Squared Error
 
27
 
28
  def build_model(self):
29
  model = keras.Sequential() # linear stack of layers https://keras.io/models/sequential/
30
+ model.add(keras.layers.Dense(256, input_dim=self.nS, activation='relu')) # [Input] -> Layer 1
31
+ model.add(keras.layers.Dense(256, activation='relu')) # Layer 2 -> 3
32
  model.add(keras.layers.Dense(self.nA, activation='linear')) # Layer 3 -> [output]
33
 
34
  model.compile(loss='mean_squared_error', # Loss function: Mean Squared Error
app.py CHANGED
@@ -45,19 +45,17 @@ def main():
45
 
46
 
47
  def perform_training(jammer_type, channel_switching_cost):
48
- st.sidebar.write("==================================================")
49
  st.sidebar.write('Training Starting')
50
  train(jammer_type, channel_switching_cost)
51
- st.sidebar.write("Training completed")
52
- st.sidebar.write("==================================================")
53
 
54
 
55
  def perform_testing(jammer_type, channel_switching_cost):
56
- st.sidebar.write("==================================================")
57
  st.sidebar.write('Testing Starting')
58
  test(jammer_type, channel_switching_cost)
59
  st.sidebar.write("Testing completed")
60
- st.sidebar.write("==================================================")
61
 
62
 
63
  if __name__ == "__main__":
 
45
 
46
 
47
  def perform_training(jammer_type, channel_switching_cost):
48
+ st.sidebar.write("======================")
49
  st.sidebar.write('Training Starting')
50
  train(jammer_type, channel_switching_cost)
 
 
51
 
52
 
53
  def perform_testing(jammer_type, channel_switching_cost):
54
+ st.sidebar.write("======================")
55
  st.sidebar.write('Testing Starting')
56
  test(jammer_type, channel_switching_cost)
57
  st.sidebar.write("Testing completed")
58
+ st.sidebar.write("======================")
59
 
60
 
61
  if __name__ == "__main__":
tester.py CHANGED
@@ -26,7 +26,7 @@ def test(jammer_type, channel_switching_cost):
26
  discount_rate = 0.95
27
  lr = 0.001
28
 
29
- agentName = f'DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
30
  DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
31
  DDQN_agent.model = DDQN_agent.load_saved_model(agentName)
32
  rewards = [] # Store rewards for graphing
@@ -70,12 +70,12 @@ def test(jammer_type, channel_switching_cost):
70
  st.pyplot(fig)
71
 
72
  # Save the figure
73
- plot_name = f'test_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
74
  plt.savefig(plot_name, bbox_inches='tight')
75
  plt.close(fig) # Close the figure to release resources
76
 
77
  # Save Results
78
  # Rewards
79
- fileName = f'test_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
80
  with open(fileName, 'w') as f:
81
  json.dump(rewards, f)
 
26
  discount_rate = 0.95
27
  lr = 0.001
28
 
29
+ agentName = f'./data/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
30
  DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
31
  DDQN_agent.model = DDQN_agent.load_saved_model(agentName)
32
  rewards = [] # Store rewards for graphing
 
70
  st.pyplot(fig)
71
 
72
  # Save the figure
73
+ plot_name = f'./data/test_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
74
  plt.savefig(plot_name, bbox_inches='tight')
75
  plt.close(fig) # Close the figure to release resources
76
 
77
  # Save Results
78
  # Rewards
79
+ fileName = f'./data/test_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
80
  with open(fileName, 'w') as f:
81
  json.dump(rewards, f)
trainer.py CHANGED
@@ -10,19 +10,21 @@ from antiJamEnv import AntiJamEnv
10
 
11
 
12
  def train(jammer_type, channel_switching_cost):
 
 
 
 
13
  env = AntiJamEnv(jammer_type, channel_switching_cost)
14
  ob_space = env.observation_space
15
  ac_space = env.action_space
16
- st.write(f"Observation space: , {ob_space}")
17
- st.write(f"Action space: {ac_space}")
18
 
19
  s_size = ob_space.shape[0]
20
  a_size = ac_space.n
21
  max_env_steps = 100
22
- TRAIN_Episodes = 10
23
  env._max_episode_steps = max_env_steps
24
 
25
- epsilon = 1.0 # exploration rate
26
  epsilon_min = 0.01
27
  epsilon_decay = 0.999
28
  discount_rate = 0.95
@@ -30,72 +32,69 @@ def train(jammer_type, channel_switching_cost):
30
  batch_size = 32
31
 
32
  DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
33
- rewards = [] # Store rewards for graphing
34
- epsilons = [] # Store the Explore/Exploit
35
 
36
- # Training agent
37
  for e in range(TRAIN_Episodes):
38
  state = env.reset()
39
- # print(f"Initial state is: {state}")
40
- state = np.reshape(state, [1, s_size]) # Resize to store in memory to pass to .predict
41
  tot_rewards = 0
42
- for time in range(max_env_steps): # 200 is when you "solve" the game. This can continue forever as far as I know
43
  action = DDQN_agent.action(state)
44
  next_state, reward, done, _ = env.step(action)
45
- # print(f'The next state is: {next_state}')
46
- # done: Three collisions occurred in the last 10 steps.
47
- # time == max_env_steps - 1 : No collisions occurred
48
- if done or time == max_env_steps - 1:
49
- rewards.append(tot_rewards)
50
- epsilons.append(DDQN_agent.epsilon)
51
- st.write(f"episode: {e}/{TRAIN_Episodes}, score: {tot_rewards}, e: {DDQN_agent.epsilon}")
52
- break
53
- # Applying channel switching cost
54
  next_state = np.reshape(next_state, [1, s_size])
55
  tot_rewards += reward
56
- DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict
57
  state = next_state
58
 
59
- # Experience Replay
60
  if len(DDQN_agent.memory) > batch_size:
61
  DDQN_agent.experience_replay(batch_size)
62
- # Update the weights after each episode (You can configure this for x steps as well
 
 
 
 
 
 
 
63
  DDQN_agent.update_target_from_model()
64
- # If our current NN passes we are done
65
- # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
66
  if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
67
  break
68
 
 
 
69
  # Plotting
70
  rolling_average = np.convolve(rewards, np.ones(10) / 10, mode='valid')
71
 
72
- # Create a new Streamlit figure
73
- fig = plt.figure()
74
- plt.plot(rewards, label='Rewards')
75
- plt.plot(rolling_average, color='black', label='Rolling Average')
76
- plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-', label='Solved Line')
77
  eps_graph = [100 * x for x in epsilons]
78
- plt.plot(eps_graph, color='g', linestyle='-', label='Epsilons')
79
- plt.xlabel('Episodes')
80
- plt.ylabel('Rewards')
81
- plt.title(f'Training Rewards - {jammer_type}, CSC: {channel_switching_cost}')
82
- plt.legend()
83
 
84
  # Display the Streamlit figure using streamlit.pyplot
85
  st.set_option('deprecation.showPyplotGlobalUse', False)
86
- st.pyplot(fig)
 
87
 
88
  # Save the figure
89
- plot_name = f'train_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
90
  plt.savefig(plot_name, bbox_inches='tight')
91
  plt.close(fig) # Close the figure to release resources
92
 
93
  # Save Results
94
  # Rewards
95
- fileName = f'train_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
96
  with open(fileName, 'w') as f:
97
  json.dump(rewards, f)
98
 
99
  # Save the agent as a SavedAgent.
100
- agentName = f'DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
101
  DDQN_agent.save_model(agentName)
 
10
 
11
 
12
  def train(jammer_type, channel_switching_cost):
13
+ st.sidebar.subheader("Training Progress")
14
+ progress_bar = st.sidebar.progress(0)
15
+ status_text = st.sidebar.empty()
16
+
17
  env = AntiJamEnv(jammer_type, channel_switching_cost)
18
  ob_space = env.observation_space
19
  ac_space = env.action_space
 
 
20
 
21
  s_size = ob_space.shape[0]
22
  a_size = ac_space.n
23
  max_env_steps = 100
24
+ TRAIN_Episodes = 20
25
  env._max_episode_steps = max_env_steps
26
 
27
+ epsilon = 1.0
28
  epsilon_min = 0.01
29
  epsilon_decay = 0.999
30
  discount_rate = 0.95
 
32
  batch_size = 32
33
 
34
  DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
35
+ rewards = []
36
+ epsilons = []
37
 
 
38
  for e in range(TRAIN_Episodes):
39
  state = env.reset()
40
+ state = np.reshape(state, [1, s_size])
 
41
  tot_rewards = 0
42
+ for time in range(max_env_steps):
43
  action = DDQN_agent.action(state)
44
  next_state, reward, done, _ = env.step(action)
 
 
 
 
 
 
 
 
 
45
  next_state = np.reshape(next_state, [1, s_size])
46
  tot_rewards += reward
47
+ DDQN_agent.store(state, action, reward, next_state, done)
48
  state = next_state
49
 
 
50
  if len(DDQN_agent.memory) > batch_size:
51
  DDQN_agent.experience_replay(batch_size)
52
+
53
+ if done or time == max_env_steps - 1:
54
+ rewards.append(tot_rewards)
55
+ epsilons.append(DDQN_agent.epsilon)
56
+ status_text.text(f"Episode: {e+1}/{TRAIN_Episodes}, Reward: {tot_rewards}, Epsilon: {DDQN_agent.epsilon:.3f}")
57
+ progress_bar.progress((e + 1) / TRAIN_Episodes)
58
+ break
59
+
60
  DDQN_agent.update_target_from_model()
61
+
 
62
  if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
63
  break
64
 
65
+ st.sidebar.success("Training completed!")
66
+
67
  # Plotting
68
  rolling_average = np.convolve(rewards, np.ones(10) / 10, mode='valid')
69
 
70
+ # Create a new Streamlit figure for the training graph
71
+ fig, ax = plt.subplots(figsize=(8, 6))
72
+ ax.plot(rewards, label='Rewards')
73
+ ax.plot(rolling_average, color='black', label='Rolling Average')
74
+ ax.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-', label='Solved Line')
75
  eps_graph = [100 * x for x in epsilons]
76
+ ax.plot(eps_graph, color='g', linestyle='-', label='Epsilons')
77
+ ax.set_xlabel('Episodes')
78
+ ax.set_ylabel('Rewards')
79
+ ax.set_title(f'Training Rewards - {jammer_type}, CSC: {channel_switching_cost}')
80
+ ax.legend()
81
 
82
  # Display the Streamlit figure using streamlit.pyplot
83
  st.set_option('deprecation.showPyplotGlobalUse', False)
84
+ st.sidebar.subheader("Training Graph")
85
+ st.sidebar.pyplot(fig)
86
 
87
  # Save the figure
88
+ plot_name = f'./data/train_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
89
  plt.savefig(plot_name, bbox_inches='tight')
90
  plt.close(fig) # Close the figure to release resources
91
 
92
  # Save Results
93
  # Rewards
94
+ fileName = f'./data/train_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
95
  with open(fileName, 'w') as f:
96
  json.dump(rewards, f)
97
 
98
  # Save the agent as a SavedAgent.
99
+ agentName = f'./data/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
100
  DDQN_agent.save_model(agentName)