Spaces:
Sleeping
Sleeping
installing langchain
Browse files- requirements.txt +1 -4
- trainer.py +22 -14
requirements.txt
CHANGED
@@ -3,8 +3,5 @@ tensorflow
|
|
3 |
matplotlib
|
4 |
gym
|
5 |
streamlit
|
6 |
-
|
7 |
-
torch
|
8 |
-
einops
|
9 |
-
accelerate
|
10 |
|
|
|
3 |
matplotlib
|
4 |
gym
|
5 |
streamlit
|
6 |
+
langchain
|
|
|
|
|
|
|
7 |
|
trainer.py
CHANGED
@@ -7,11 +7,21 @@ import json
|
|
7 |
import streamlit as st
|
8 |
from DDQN import DoubleDeepQNetwork
|
9 |
from antiJamEnv import AntiJamEnv
|
10 |
-
from
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
def train(jammer_type, channel_switching_cost):
|
@@ -85,7 +95,7 @@ def train(jammer_type, channel_switching_cost):
|
|
85 |
ax.set_title(f'Training Rewards - {jammer_type}, CSC: {channel_switching_cost}')
|
86 |
ax.legend()
|
87 |
|
88 |
-
insights =
|
89 |
|
90 |
with st.container():
|
91 |
col1, col2 = st.columns(2)
|
@@ -115,19 +125,17 @@ def train(jammer_type, channel_switching_cost):
|
|
115 |
return DDQN_agent
|
116 |
|
117 |
|
118 |
-
def
|
119 |
-
|
120 |
f"The graph represents training rewards over episodes. "
|
121 |
f"The actual rewards range from {min(rewards)} to {max(rewards)} with an average of {np.mean(rewards):.2f}. "
|
122 |
f"The rolling average values range from {min(rolling_average)} to {max(rolling_average)} with an average of {np.mean(rolling_average):.2f}. "
|
123 |
f"The epsilon values range from {min(epsilons)} to {max(epsilons)} with an average exploration rate of {np.mean(epsilons):.2f}. "
|
124 |
-
f"The solved threshold is set at {solved_threshold}.
|
125 |
-
f"Provide insights based on this data."
|
126 |
)
|
127 |
-
input_ids = tokenizer.encode(description, return_tensors="pt")
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
|
|
|
132 |
|
133 |
-
return output_text
|
|
|
7 |
import streamlit as st
|
8 |
from DDQN import DoubleDeepQNetwork
|
9 |
from antiJamEnv import AntiJamEnv
|
10 |
+
from langchain import HuggingFaceHub, PromptTemplate, LLMChain
|
11 |
|
12 |
+
|
13 |
+
repo_id = "tiiuae/falcon-7b-instruct"
|
14 |
+
huggingfacehub_api_token = "YOUR_API_TOKEN_HERE" # Replace with your actual API token
|
15 |
+
|
16 |
+
llm = HuggingFaceHub(huggingfacehub_api_token=huggingfacehub_api_token,
|
17 |
+
repo_id=repo_id,
|
18 |
+
model_kwargs={"temperature":0.2, "max_new_tokens":2000})
|
19 |
+
|
20 |
+
template = """You are an AI trained to analyze and provide insights about training graphs in the domain of deep
|
21 |
+
reinforcement learning. Given the following data about a graph: {data}, provide detailed insights. """
|
22 |
+
|
23 |
+
prompt = PromptTemplate(template=template, input_variables=["question"])
|
24 |
+
llm_chain = LLMChain(prompt=prompt, verbose=True, llm=llm)
|
25 |
|
26 |
|
27 |
def train(jammer_type, channel_switching_cost):
|
|
|
95 |
ax.set_title(f'Training Rewards - {jammer_type}, CSC: {channel_switching_cost}')
|
96 |
ax.legend()
|
97 |
|
98 |
+
insights = generate_insights_langchain(rewards, rolling_average, epsilons, solved_threshold)
|
99 |
|
100 |
with st.container():
|
101 |
col1, col2 = st.columns(2)
|
|
|
125 |
return DDQN_agent
|
126 |
|
127 |
|
128 |
+
def generate_insights_langchain(rewards, rolling_average, epsilons, solved_threshold):
|
129 |
+
data_description = (
|
130 |
f"The graph represents training rewards over episodes. "
|
131 |
f"The actual rewards range from {min(rewards)} to {max(rewards)} with an average of {np.mean(rewards):.2f}. "
|
132 |
f"The rolling average values range from {min(rolling_average)} to {max(rolling_average)} with an average of {np.mean(rolling_average):.2f}. "
|
133 |
f"The epsilon values range from {min(epsilons)} to {max(epsilons)} with an average exploration rate of {np.mean(epsilons):.2f}. "
|
134 |
+
f"The solved threshold is set at {solved_threshold}."
|
|
|
135 |
)
|
|
|
136 |
|
137 |
+
result = llm_chain.predict(data=data_description)
|
138 |
+
return result["generated_text"]
|
139 |
+
|
140 |
+
|
141 |
|
|