Spaces:

asataura
/

jam_shield_LLM_app

Sleeping

App Files Files Community

asataura commited on Aug 14, 2023

Commit

897f151

1 Parent(s): 156c3a3

Integrating the falconLLM

Browse files

Files changed (3) hide show

app.py +18 -23
tester.py +6 -42
trainer.py +11 -10

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ import streamlit as st
 import os
 from trainer import train
 from tester import test
 def main():
@@ -24,32 +26,25 @@ def main():
     st.sidebar.write(f"Jammer Type: {jammer_type}")
     st.sidebar.write(f"Channel Switching Cost: {channel_switching_cost}")
-    train_button = st.sidebar.button('Train')
-    test_button = st.sidebar.button('Test')
-    if train_button or test_button:
-        agent_name = f'DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
-        if os.path.exists(agent_name):
-            if train_button:
-                st.warning("Agent has been trained already! Do you want to retrain?")
-                retrain = st.sidebar.button('Yes')
-                if retrain:
-                    perform_training(jammer_type, channel_switching_cost)
-            elif test_button:
-                perform_testing(jammer_type, channel_switching_cost)
-        else:
-            if train_button:
-                perform_training(jammer_type, channel_switching_cost)
-            elif test_button:
-                st.warning("Agent has not been trained yet. Click Train First!!!")
 def perform_training(jammer_type, channel_switching_cost):
-    train(jammer_type, channel_switching_cost)
-def perform_testing(jammer_type, channel_switching_cost):
-    test(jammer_type, channel_switching_cost)
 if __name__ == "__main__":

 import os
 from trainer import train
 from tester import test
+import transformers
+from transformers import AutoModelForCausalLM, AutoTokenizer
 def main():
     st.sidebar.write(f"Jammer Type: {jammer_type}")
     st.sidebar.write(f"Channel Switching Cost: {channel_switching_cost}")
+    start_button = st.sidebar.button('Start')
+    if start_button:
+        agent = perform_training(jammer_type, channel_switching_cost)
+        test(agent, jammer_type, channel_switching_cost)
 def perform_training(jammer_type, channel_switching_cost):
+    agent = train(jammer_type, channel_switching_cost)
+    model_name = "tiiuae/falcon-7b-instruct"
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    pipeline = transformers.pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=100, temperature=0.7)
+    text = pipeline("Discuss this topic: Integrating LLMs to DRL-based anti-jamming.")
+    st.write(text)
+    return agent
+def perform_testing(agent, jammer_type, channel_switching_cost):
+    test(agent, jammer_type, channel_switching_cost)
 if __name__ == "__main__":

tester.py CHANGED Viewed

@@ -9,26 +9,17 @@ from DDQN import DoubleDeepQNetwork
 from antiJamEnv import AntiJamEnv
-def test(jammer_type, channel_switching_cost):
     env = AntiJamEnv(jammer_type, channel_switching_cost)
     ob_space = env.observation_space
     ac_space = env.action_space
     s_size = ob_space.shape[0]
     a_size = ac_space.n
-    max_env_steps = 100
-    TEST_Episodes = 10
     env._max_episode_steps = max_env_steps
-    epsilon = 1.0  # exploration rate
-    epsilon_min = 0.01
-    epsilon_decay = 0.999
-    discount_rate = 0.95
-    lr = 0.001
-    agentName = f'./data/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
-    DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
-    DDQN_agent.model = DDQN_agent.load_saved_model(agentName)
     rewards = []  # Store rewards for graphing
     epsilons = []  # Store the Explore/Exploit
@@ -47,35 +38,8 @@ def test(jammer_type, channel_switching_cost):
                 break
             next_state = np.reshape(next_state, [1, s_size])
             tot_rewards += reward
             # DON'T STORE ANYTHING DURING TESTING
             state = next_state
-    # Plotting
-    rolling_average = np.convolve(rewards, np.ones(10) / 10, mode='valid')
-    # Create a new Streamlit figure
-    fig = plt.figure()
-    plt.plot(rewards, label='Rewards')
-    plt.plot(rolling_average, color='black', label='Rolling Average')
-    plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-', label='Solved Line')
-    eps_graph = [100 * x for x in epsilons]
-    plt.plot(eps_graph, color='g', linestyle='-', label='Epsilons')
-    plt.xlabel('Episodes')
-    plt.ylabel('Rewards')
-    plt.title(f'Testing Rewards - {jammer_type}, CSC: {channel_switching_cost}')
-    plt.legend()
-    # Display the Streamlit figure using streamlit.pyplot
-    st.set_option('deprecation.showPyplotGlobalUse', False)
-    st.pyplot(fig)
-    # Save the figure
-    plot_name = f'./data/test_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
-    plt.savefig(plot_name, bbox_inches='tight')
-    plt.close(fig)  # Close the figure to release resources
-    # Save Results
-    # Rewards
-    fileName = f'./data/test_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
-    with open(fileName, 'w') as f:
-        json.dump(rewards, f)

 from antiJamEnv import AntiJamEnv
+def test(agent, jammer_type, channel_switching_cost):
     env = AntiJamEnv(jammer_type, channel_switching_cost)
     ob_space = env.observation_space
     ac_space = env.action_space
     s_size = ob_space.shape[0]
     a_size = ac_space.n
+    max_env_steps = 3
+    TEST_Episodes = 1
     env._max_episode_steps = max_env_steps
+    DDQN_agent = agent
     rewards = []  # Store rewards for graphing
     epsilons = []  # Store the Explore/Exploit
                 break
             next_state = np.reshape(next_state, [1, s_size])
             tot_rewards += reward
+            st.write(f"The state is: {state}, action taken is: {action}, obtained reward is: {reward}")
             # DON'T STORE ANYTHING DURING TESTING
             state = next_state

trainer.py CHANGED Viewed

@@ -21,7 +21,7 @@ def train(jammer_type, channel_switching_cost):
     s_size = ob_space.shape[0]
     a_size = ac_space.n
     max_env_steps = 100
-    TRAIN_Episodes = 50
     env._max_episode_steps = max_env_steps
     epsilon = 1.0
@@ -85,16 +85,17 @@ def train(jammer_type, channel_switching_cost):
     st.pyplot(fig)
     # Save the figure
-    plot_name = f'./data/train_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
-    plt.savefig(plot_name, bbox_inches='tight')
     plt.close(fig)  # Close the figure to release resources
     # Save Results
     # Rewards
-    fileName = f'./data/train_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
-    with open(fileName, 'w') as f:
-        json.dump(rewards, f)
-    # Save the agent as a SavedAgent.
-    agentName = f'./data/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
-    DDQN_agent.save_model(agentName)

     s_size = ob_space.shape[0]
     a_size = ac_space.n
     max_env_steps = 100
+    TRAIN_Episodes = 25
     env._max_episode_steps = max_env_steps
     epsilon = 1.0
     st.pyplot(fig)
     # Save the figure
+    # plot_name = f'./data/train_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
+    # plt.savefig(plot_name, bbox_inches='tight')
     plt.close(fig)  # Close the figure to release resources
     # Save Results
     # Rewards
+    # fileName = f'./data/train_rewards_{jammer_type}_csc_{channel_switching_cost}.json'
+    # with open(fileName, 'w') as f:
+    #     json.dump(rewards, f)
+    #
+    # # Save the agent as a SavedAgent.
+    # agentName = f'./data/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
+    # DDQN_agent.save_model(agentName)
+    return DDQN_agent