Spaces:

asataura
/

jam_shield_LLM_app

Sleeping

App Files Files Community

asataura commited on Aug 14, 2023

Commit

6b43fcc

•

1 Parent(s): c0bb1b2

Improving the UI

Browse files

Files changed (4) hide show

DDQN.py +2 -2
app.py +3 -5
tester.py +3 -3
trainer.py +37 -38

DDQN.py CHANGED Viewed

@@ -27,8 +27,8 @@ class DoubleDeepQNetwork:
     def build_model(self):
         model = keras.Sequential()  # linear stack of layers https://keras.io/models/sequential/
-        model.add(keras.layers.Dense(24, input_dim=self.nS, activation='relu'))  # [Input] -> Layer 1
-        model.add(keras.layers.Dense(24, activation='relu'))  # Layer 2 -> 3
         model.add(keras.layers.Dense(self.nA, activation='linear'))  # Layer 3 -> [output]
         model.compile(loss='mean_squared_error',  # Loss function: Mean Squared Error

     def build_model(self):
         model = keras.Sequential()  # linear stack of layers https://keras.io/models/sequential/
+        model.add(keras.layers.Dense(256, input_dim=self.nS, activation='relu'))  # [Input] -> Layer 1
+        model.add(keras.layers.Dense(256, activation='relu'))  # Layer 2 -> 3
         model.add(keras.layers.Dense(self.nA, activation='linear'))  # Layer 3 -> [output]
         model.compile(loss='mean_squared_error',  # Loss function: Mean Squared Error

app.py CHANGED Viewed

@@ -45,19 +45,17 @@ def main():
 def perform_training(jammer_type, channel_switching_cost):
-    st.sidebar.write("==================================================")
     st.sidebar.write('Training Starting')
     train(jammer_type, channel_switching_cost)
-    st.sidebar.write("Training completed")
-    st.sidebar.write("==================================================")
 def perform_testing(jammer_type, channel_switching_cost):
-    st.sidebar.write("==================================================")
     st.sidebar.write('Testing Starting')
     test(jammer_type, channel_switching_cost)
     st.sidebar.write("Testing completed")
-    st.sidebar.write("==================================================")
 if __name__ == "__main__":

 def perform_training(jammer_type, channel_switching_cost):
+    st.sidebar.write("======================")
     st.sidebar.write('Training Starting')
     train(jammer_type, channel_switching_cost)
 def perform_testing(jammer_type, channel_switching_cost):
+    st.sidebar.write("======================")
     st.sidebar.write('Testing Starting')
     test(jammer_type, channel_switching_cost)
     st.sidebar.write("Testing completed")
+    st.sidebar.write("======================")
 if __name__ == "__main__":

tester.py CHANGED Viewed

@@ -26,7 +26,7 @@ def test(jammer_type, channel_switching_cost):
     discount_rate = 0.95
     lr = 0.001
-    agentName = f'DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
     DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
     DDQN_agent.model = DDQN_agent.load_saved_model(agentName)
     rewards = []  # Store rewards for graphing
@@ -70,12 +70,12 @@ def test(jammer_type, channel_switching_cost):
     st.pyplot(fig)
     # Save the figure
-    plot_name = f'test_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
     plt.savefig(plot_name, bbox_inches='tight')
     plt.close(fig)  # Close the figure to release resources
     # Save Results
     # Rewards
-    fileName = f'test_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
     with open(fileName, 'w') as f:
         json.dump(rewards, f)

     discount_rate = 0.95
     lr = 0.001
+    agentName = f'./data/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
     DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
     DDQN_agent.model = DDQN_agent.load_saved_model(agentName)
     rewards = []  # Store rewards for graphing
     st.pyplot(fig)
     # Save the figure
+    plot_name = f'./data/test_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
     plt.savefig(plot_name, bbox_inches='tight')
     plt.close(fig)  # Close the figure to release resources
     # Save Results
     # Rewards
+    fileName = f'./data/test_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
     with open(fileName, 'w') as f:
         json.dump(rewards, f)

trainer.py CHANGED Viewed

@@ -10,19 +10,21 @@ from antiJamEnv import AntiJamEnv
 def train(jammer_type, channel_switching_cost):
     env = AntiJamEnv(jammer_type, channel_switching_cost)
     ob_space = env.observation_space
     ac_space = env.action_space
-    st.write(f"Observation space: , {ob_space}")
-    st.write(f"Action space: {ac_space}")
     s_size = ob_space.shape[0]
     a_size = ac_space.n
     max_env_steps = 100
-    TRAIN_Episodes = 10
     env._max_episode_steps = max_env_steps
-    epsilon = 1.0  # exploration rate
     epsilon_min = 0.01
     epsilon_decay = 0.999
     discount_rate = 0.95
@@ -30,72 +32,69 @@ def train(jammer_type, channel_switching_cost):
     batch_size = 32
     DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
-    rewards = []  # Store rewards for graphing
-    epsilons = []  # Store the Explore/Exploit
-    # Training agent
     for e in range(TRAIN_Episodes):
         state = env.reset()
-        # print(f"Initial state is: {state}")
-        state = np.reshape(state, [1, s_size])  # Resize to store in memory to pass to .predict
         tot_rewards = 0
-        for time in range(max_env_steps):  # 200 is when you "solve" the game. This can continue forever as far as I know
             action = DDQN_agent.action(state)
             next_state, reward, done, _ = env.step(action)
-            # print(f'The next state is: {next_state}')
-            # done: Three collisions occurred in the last 10 steps.
-            # time == max_env_steps - 1 : No collisions occurred
-            if done or time == max_env_steps - 1:
-                rewards.append(tot_rewards)
-                epsilons.append(DDQN_agent.epsilon)
-                st.write(f"episode: {e}/{TRAIN_Episodes}, score: {tot_rewards}, e: {DDQN_agent.epsilon}")
-                break
-            # Applying channel switching cost
             next_state = np.reshape(next_state, [1, s_size])
             tot_rewards += reward
-            DDQN_agent.store(state, action, reward, next_state, done)  # Resize to store in memory to pass to .predict
             state = next_state
-            # Experience Replay
             if len(DDQN_agent.memory) > batch_size:
                 DDQN_agent.experience_replay(batch_size)
-        # Update the weights after each episode (You can configure this for x steps as well
         DDQN_agent.update_target_from_model()
-        # If our current NN passes we are done
-        # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
         if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
             break
     # Plotting
     rolling_average = np.convolve(rewards, np.ones(10) / 10, mode='valid')
-    # Create a new Streamlit figure
-    fig = plt.figure()
-    plt.plot(rewards, label='Rewards')
-    plt.plot(rolling_average, color='black', label='Rolling Average')
-    plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-', label='Solved Line')
     eps_graph = [100 * x for x in epsilons]
-    plt.plot(eps_graph, color='g', linestyle='-', label='Epsilons')
-    plt.xlabel('Episodes')
-    plt.ylabel('Rewards')
-    plt.title(f'Training Rewards - {jammer_type}, CSC: {channel_switching_cost}')
-    plt.legend()
     # Display the Streamlit figure using streamlit.pyplot
     st.set_option('deprecation.showPyplotGlobalUse', False)
-    st.pyplot(fig)
     # Save the figure
-    plot_name = f'train_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
     plt.savefig(plot_name, bbox_inches='tight')
     plt.close(fig)  # Close the figure to release resources
     # Save Results
     # Rewards
-    fileName = f'train_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
     with open(fileName, 'w') as f:
         json.dump(rewards, f)
     # Save the agent as a SavedAgent.
-    agentName = f'DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
     DDQN_agent.save_model(agentName)

 def train(jammer_type, channel_switching_cost):
+    st.sidebar.subheader("Training Progress")
+    progress_bar = st.sidebar.progress(0)
+    status_text = st.sidebar.empty()
     env = AntiJamEnv(jammer_type, channel_switching_cost)
     ob_space = env.observation_space
     ac_space = env.action_space
     s_size = ob_space.shape[0]
     a_size = ac_space.n
     max_env_steps = 100
+    TRAIN_Episodes = 20
     env._max_episode_steps = max_env_steps
+    epsilon = 1.0
     epsilon_min = 0.01
     epsilon_decay = 0.999
     discount_rate = 0.95
     batch_size = 32
     DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
+    rewards = []
+    epsilons = []
     for e in range(TRAIN_Episodes):
         state = env.reset()
+        state = np.reshape(state, [1, s_size])
         tot_rewards = 0
+        for time in range(max_env_steps):
             action = DDQN_agent.action(state)
             next_state, reward, done, _ = env.step(action)
             next_state = np.reshape(next_state, [1, s_size])
             tot_rewards += reward
+            DDQN_agent.store(state, action, reward, next_state, done)
             state = next_state
             if len(DDQN_agent.memory) > batch_size:
                 DDQN_agent.experience_replay(batch_size)
+            if done or time == max_env_steps - 1:
+                rewards.append(tot_rewards)
+                epsilons.append(DDQN_agent.epsilon)
+                status_text.text(f"Episode: {e+1}/{TRAIN_Episodes}, Reward: {tot_rewards}, Epsilon: {DDQN_agent.epsilon:.3f}")
+                progress_bar.progress((e + 1) / TRAIN_Episodes)
+                break
         DDQN_agent.update_target_from_model()
         if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
             break
+    st.sidebar.success("Training completed!")
     # Plotting
     rolling_average = np.convolve(rewards, np.ones(10) / 10, mode='valid')
+    # Create a new Streamlit figure for the training graph
+    fig, ax = plt.subplots(figsize=(8, 6))
+    ax.plot(rewards, label='Rewards')
+    ax.plot(rolling_average, color='black', label='Rolling Average')
+    ax.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-', label='Solved Line')
     eps_graph = [100 * x for x in epsilons]
+    ax.plot(eps_graph, color='g', linestyle='-', label='Epsilons')
+    ax.set_xlabel('Episodes')
+    ax.set_ylabel('Rewards')
+    ax.set_title(f'Training Rewards - {jammer_type}, CSC: {channel_switching_cost}')
+    ax.legend()
     # Display the Streamlit figure using streamlit.pyplot
     st.set_option('deprecation.showPyplotGlobalUse', False)
+    st.sidebar.subheader("Training Graph")
+    st.sidebar.pyplot(fig)
     # Save the figure
+    plot_name = f'./data/train_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
     plt.savefig(plot_name, bbox_inches='tight')
     plt.close(fig)  # Close the figure to release resources
     # Save Results
     # Rewards
+    fileName = f'./data/train_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
     with open(fileName, 'w') as f:
         json.dump(rewards, f)
     # Save the agent as a SavedAgent.
+    agentName = f'./data/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
     DDQN_agent.save_model(agentName)