asataura commited on
Commit
88cee09
1 Parent(s): 8d7cbbe

installing langchain

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -4
  2. trainer.py +22 -14
requirements.txt CHANGED
@@ -3,8 +3,5 @@ tensorflow
3
  matplotlib
4
  gym
5
  streamlit
6
- transformers
7
- torch
8
- einops
9
- accelerate
10
 
 
3
  matplotlib
4
  gym
5
  streamlit
6
+ langchain
 
 
 
7
 
trainer.py CHANGED
@@ -7,11 +7,21 @@ import json
7
  import streamlit as st
8
  from DDQN import DoubleDeepQNetwork
9
  from antiJamEnv import AntiJamEnv
10
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
11
 
12
- model_name = "tiiuae/falcon-7b-instruct" # Replace with the exact model name or path
13
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
14
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  def train(jammer_type, channel_switching_cost):
@@ -85,7 +95,7 @@ def train(jammer_type, channel_switching_cost):
85
  ax.set_title(f'Training Rewards - {jammer_type}, CSC: {channel_switching_cost}')
86
  ax.legend()
87
 
88
- insights = generate_insights(rewards, rolling_average, epsilons, solved_threshold)
89
 
90
  with st.container():
91
  col1, col2 = st.columns(2)
@@ -115,19 +125,17 @@ def train(jammer_type, channel_switching_cost):
115
  return DDQN_agent
116
 
117
 
118
- def generate_insights(rewards, rolling_average, epsilons, solved_threshold):
119
- description = (
120
  f"The graph represents training rewards over episodes. "
121
  f"The actual rewards range from {min(rewards)} to {max(rewards)} with an average of {np.mean(rewards):.2f}. "
122
  f"The rolling average values range from {min(rolling_average)} to {max(rolling_average)} with an average of {np.mean(rolling_average):.2f}. "
123
  f"The epsilon values range from {min(epsilons)} to {max(epsilons)} with an average exploration rate of {np.mean(epsilons):.2f}. "
124
- f"The solved threshold is set at {solved_threshold}. "
125
- f"Provide insights based on this data."
126
  )
127
- input_ids = tokenizer.encode(description, return_tensors="pt")
128
 
129
- # Generate output from model
130
- output_ids = model.generate(input_ids, max_length=300, num_return_sequences=1)
131
- output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
 
132
 
133
- return output_text
 
7
  import streamlit as st
8
  from DDQN import DoubleDeepQNetwork
9
  from antiJamEnv import AntiJamEnv
10
+ from langchain import HuggingFaceHub, PromptTemplate, LLMChain
11
 
12
+
13
+ repo_id = "tiiuae/falcon-7b-instruct"
14
+ huggingfacehub_api_token = "YOUR_API_TOKEN_HERE" # Replace with your actual API token
15
+
16
+ llm = HuggingFaceHub(huggingfacehub_api_token=huggingfacehub_api_token,
17
+ repo_id=repo_id,
18
+ model_kwargs={"temperature":0.2, "max_new_tokens":2000})
19
+
20
+ template = """You are an AI trained to analyze and provide insights about training graphs in the domain of deep
21
+ reinforcement learning. Given the following data about a graph: {data}, provide detailed insights. """
22
+
23
+ prompt = PromptTemplate(template=template, input_variables=["question"])
24
+ llm_chain = LLMChain(prompt=prompt, verbose=True, llm=llm)
25
 
26
 
27
  def train(jammer_type, channel_switching_cost):
 
95
  ax.set_title(f'Training Rewards - {jammer_type}, CSC: {channel_switching_cost}')
96
  ax.legend()
97
 
98
+ insights = generate_insights_langchain(rewards, rolling_average, epsilons, solved_threshold)
99
 
100
  with st.container():
101
  col1, col2 = st.columns(2)
 
125
  return DDQN_agent
126
 
127
 
128
+ def generate_insights_langchain(rewards, rolling_average, epsilons, solved_threshold):
129
+ data_description = (
130
  f"The graph represents training rewards over episodes. "
131
  f"The actual rewards range from {min(rewards)} to {max(rewards)} with an average of {np.mean(rewards):.2f}. "
132
  f"The rolling average values range from {min(rolling_average)} to {max(rolling_average)} with an average of {np.mean(rolling_average):.2f}. "
133
  f"The epsilon values range from {min(epsilons)} to {max(epsilons)} with an average exploration rate of {np.mean(epsilons):.2f}. "
134
+ f"The solved threshold is set at {solved_threshold}."
 
135
  )
 
136
 
137
+ result = llm_chain.predict(data=data_description)
138
+ return result["generated_text"]
139
+
140
+
141