Spaces:

asataura
/

jam_shield_LLM_app

Sleeping

App Files Files Community

asataura commited on Aug 15, 2023

Commit

8d7cbbe

•

1 Parent(s): 2b93f0a

Integrating the falcon7B LLM

Browse files

Files changed (2) hide show

app.py +24 -55
trainer.py +31 -12

app.py CHANGED Viewed

@@ -2,73 +2,42 @@
 # -*- coding: utf-8 -*-
 import streamlit as st
-import os
 from trainer import train
 from tester import test
-import transformers
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-def perform_training(jammer_type, channel_switching_cost):
-    agent = train(jammer_type, channel_switching_cost)
-    return agent
-def perform_testing(agent, jammer_type, channel_switching_cost):
-    test(agent, jammer_type, channel_switching_cost)
-# model_name = "tiiuae/falcon-7b-instruct"
-# model = AutoModelForCausalLM.from_pretrained(model_name)
-# tokenizer = AutoTokenizer.from_pretrained(model_name)
-# pipeline = transformers.pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=100,
-#                                  temperature=0.7)
-st.title("Beyond the Anti-Jam: Integration of DRL with LLM")
-st.sidebar.header("Make Your Environment Configuration")
-mode = st.sidebar.radio("Choose Mode", ["Auto", "Manual"])
-if mode == "Auto":
-    jammer_type = "dynamic"
-    channel_switching_cost = 0.1
-else:
-    jammer_type = st.sidebar.selectbox("Select Jammer Type", ["constant", "sweeping", "random", "dynamic"])
-    channel_switching_cost = st.sidebar.selectbox("Select Channel Switching Cost", [0, 0.05, 0.1, 0.15, 0.2])
-st.sidebar.subheader("Configuration:")
-st.sidebar.write(f"Jammer Type: {jammer_type}")
-st.sidebar.write(f"Channel Switching Cost: {channel_switching_cost}")
-start_button = st.sidebar.button('Start')
-if start_button:
-    agent, rewards = perform_training(jammer_type, channel_switching_cost)
-    st.subheader("Generating Insights of the DRL-Training")
-    # text = pipeline("Discuss this topic: Integrating LLMs to DRL-based anti-jamming.")
-    # st.write(text)
     test(agent, jammer_type, channel_switching_cost)
-# model = "tiiuae/falcon-7b-instruct"
-#
-# tokenizer = AutoTokenizer.from_pretrained(model)
-# pipeline = transformers.pipeline(
-#     "text-generation",
-#     model=model,
-#     tokenizer=tokenizer,
-#     torch_dtype=torch.bfloat16,
-#     trust_remote_code=True,
-#     device_map="auto",
-# )
-# sequences = pipeline(
-#    "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:",
-#     max_length=200,
-#     do_sample=True,
-#     top_k=10,
-#     num_return_sequences=1,
-#     eos_token_id=tokenizer.eos_token_id,
-# )
-# st.title("Beyond the Anti-Jam: Integration of DRL with LLM")
-# for seq in sequences:
-#     st.write(f"Result: {seq['generated_text']}")

 # -*- coding: utf-8 -*-
 import streamlit as st
 from trainer import train
 from tester import test
+def main():
+    st.title("Beyond the Anti-Jam: Integration of DRL with LLM")
+    st.sidebar.header("Make Your Environment Configuration")
+    mode = st.sidebar.radio("Choose Mode", ["Auto", "Manual"])
+    if mode == "Auto":
+        jammer_type = "dynamic"
+        channel_switching_cost = 0.1
+    else:
+        jammer_type = st.sidebar.selectbox("Select Jammer Type", ["constant", "sweeping", "random", "dynamic"])
+        channel_switching_cost = st.sidebar.selectbox("Select Channel Switching Cost", [0, 0.05, 0.1, 0.15, 0.2])
+    st.sidebar.subheader("Configuration:")
+    st.sidebar.write(f"Jammer Type: {jammer_type}")
+    st.sidebar.write(f"Channel Switching Cost: {channel_switching_cost}")
+    start_button = st.sidebar.button('Start')
+    if start_button:
+        agent = perform_training(jammer_type, channel_switching_cost)
+        test(agent, jammer_type, channel_switching_cost)
+def perform_training(jammer_type, channel_switching_cost):
+    agent = train(jammer_type, channel_switching_cost)
+    return agent
+def perform_testing(agent, jammer_type, channel_switching_cost):
     test(agent, jammer_type, channel_switching_cost)
+if __name__ == "__main__":
+    main()

trainer.py CHANGED Viewed

@@ -7,6 +7,11 @@ import json
 import streamlit as st
 from DDQN import DoubleDeepQNetwork
 from antiJamEnv import AntiJamEnv
 def train(jammer_type, channel_switching_cost):
@@ -53,7 +58,8 @@ def train(jammer_type, channel_switching_cost):
             if done or time == max_env_steps - 1:
                 rewards.append(tot_rewards)
                 epsilons.append(DDQN_agent.epsilon)
-                status_text.text(f"Episode: {e+1}/{TRAIN_Episodes}, Reward: {tot_rewards}, Epsilon: {DDQN_agent.epsilon:.3f}")
                 progress_bar.progress((e + 1) / TRAIN_Episodes)
                 break
@@ -66,12 +72,12 @@ def train(jammer_type, channel_switching_cost):
     # Plotting
     rolling_average = np.convolve(rewards, np.ones(10) / 10, mode='valid')
     # Create a new Streamlit figure for the training graph
     fig, ax = plt.subplots(figsize=(8, 6))
     ax.plot(rewards, label='Rewards')
     ax.plot(rolling_average, color='black', label='Rolling Average')
-    ax.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-', label='Solved Line')
     eps_graph = [100 * x for x in epsilons]
     ax.plot(eps_graph, color='g', linestyle='-', label='Epsilons')
     ax.set_xlabel('Episodes')
@@ -79,23 +85,18 @@ def train(jammer_type, channel_switching_cost):
     ax.set_title(f'Training Rewards - {jammer_type}, CSC: {channel_switching_cost}')
     ax.legend()
-    # Use Streamlit layout to create two side-by-side containers
     with st.container():
         col1, col2 = st.columns(2)
         with col1:
             st.subheader("Training Graph")
-            st.set_option('deprecation.showPyplotGlobalUse', False)
             st.pyplot(fig)
         with col2:
             st.subheader("Graph Explanation")
-            st.write("""
-               The training graph shows the rewards received by the agent in each episode of the training process.
-               The blue line represents the actual reward values, while the black line represents a rolling average.
-               The red horizontal line indicates the threshold for considering the task solved.
-               The green line represents the epsilon (exploration rate) values for the agent, indicating how often it takes random actions.
-               """)
     # Save the figure
     # plot_name = f'./data/train_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
@@ -111,4 +112,22 @@ def train(jammer_type, channel_switching_cost):
     # # Save the agent as a SavedAgent.
     # agentName = f'./data/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
     # DDQN_agent.save_model(agentName)
-    return DDQN_agent, rewards

 import streamlit as st
 from DDQN import DoubleDeepQNetwork
 from antiJamEnv import AntiJamEnv
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model_name = "tiiuae/falcon-7b-instruct"  # Replace with the exact model name or path
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 def train(jammer_type, channel_switching_cost):
             if done or time == max_env_steps - 1:
                 rewards.append(tot_rewards)
                 epsilons.append(DDQN_agent.epsilon)
+                status_text.text(
+                    f"Episode: {e + 1}/{TRAIN_Episodes}, Reward: {tot_rewards}, Epsilon: {DDQN_agent.epsilon:.3f}")
                 progress_bar.progress((e + 1) / TRAIN_Episodes)
                 break
     # Plotting
     rolling_average = np.convolve(rewards, np.ones(10) / 10, mode='valid')
+    solved_threshold = max_env_steps - 0.10 * max_env_steps
     # Create a new Streamlit figure for the training graph
     fig, ax = plt.subplots(figsize=(8, 6))
     ax.plot(rewards, label='Rewards')
     ax.plot(rolling_average, color='black', label='Rolling Average')
+    ax.axhline(y=solved_threshold, color='r', linestyle='-', label='Solved Line')
     eps_graph = [100 * x for x in epsilons]
     ax.plot(eps_graph, color='g', linestyle='-', label='Epsilons')
     ax.set_xlabel('Episodes')
     ax.set_title(f'Training Rewards - {jammer_type}, CSC: {channel_switching_cost}')
     ax.legend()
+    insights = generate_insights(rewards, rolling_average, epsilons, solved_threshold)
     with st.container():
         col1, col2 = st.columns(2)
         with col1:
             st.subheader("Training Graph")
             st.pyplot(fig)
         with col2:
             st.subheader("Graph Explanation")
+            st.write(insights)
     # Save the figure
     # plot_name = f'./data/train_rewards_{jammer_type}_csc_{channel_switching_cost}.png'
     # # Save the agent as a SavedAgent.
     # agentName = f'./data/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
     # DDQN_agent.save_model(agentName)
+    return DDQN_agent
+def generate_insights(rewards, rolling_average, epsilons, solved_threshold):
+    description = (
+        f"The graph represents training rewards over episodes. "
+        f"The actual rewards range from {min(rewards)} to {max(rewards)} with an average of {np.mean(rewards):.2f}. "
+        f"The rolling average values range from {min(rolling_average)} to {max(rolling_average)} with an average of {np.mean(rolling_average):.2f}. "
+        f"The epsilon values range from {min(epsilons)} to {max(epsilons)} with an average exploration rate of {np.mean(epsilons):.2f}. "
+        f"The solved threshold is set at {solved_threshold}. "
+        f"Provide insights based on this data."
+    )
+    input_ids = tokenizer.encode(description, return_tensors="pt")
+    # Generate output from model
+    output_ids = model.generate(input_ids, max_length=300, num_return_sequences=1)
+    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    return output_text