gabehubner commited on
Commit
f2f8639
·
1 Parent(s): ee2308e

working lunar lander attribution mechanism

Browse files
__pycache__/app.cpython-311.pyc ADDED
Binary file (4.73 kB). View file
 
__pycache__/ddpg.cpython-311.pyc CHANGED
Binary files a/__pycache__/ddpg.cpython-311.pyc and b/__pycache__/ddpg.cpython-311.pyc differ
 
__pycache__/train.cpython-311.pyc CHANGED
Binary files a/__pycache__/train.cpython-311.pyc and b/__pycache__/train.cpython-311.pyc differ
 
app.py CHANGED
@@ -1,27 +1,65 @@
1
  import gradio as gr
2
  from train import TrainingLoop
 
 
3
 
4
 
 
5
 
6
- def image_classifier(inp):
7
- return {'cat': 0.3, 'dog': 0.7}
8
 
 
 
 
 
 
 
 
 
 
 
9
 
 
 
 
 
 
 
10
 
 
 
 
11
 
 
12
 
13
- iface = gr.Interface(fn=image_classifier, inputs="image", outputs="label")
14
 
 
 
 
 
15
 
 
 
 
 
16
 
 
17
 
18
- load_trained = gr.Interface(fn=image_classifier, inputs="image", outputs="label")
 
19
 
 
 
 
20
 
 
 
 
 
 
21
 
 
22
 
23
- attribute = gr.Interface(fn=image_classifier, inputs="image", outputs="label")
24
 
25
-
26
- # iface = gr.TabbedInterface(interface_list=[train_from_scratch, load_trained, attribute], tab_names=["Train from Scratch", "Continue Training", "Attribute"],title="Attribution in Deep Reinforcement Learning")
27
- iface.launch()
 
1
  import gradio as gr
2
  from train import TrainingLoop
3
+ from scipy.special import softmax
4
+ import numpy as np
5
 
6
 
7
+ train = None
8
 
9
+ frames, attributions = None, None
 
10
 
11
+ lunar_lander_spec_conversion = {
12
+ 0: "X-coordinate",
13
+ 1: "Y-coordinate",
14
+ 2: "Linear velocity in the X-axis",
15
+ 3: "Linear velocity in the Y-axis",
16
+ 4: "Angle",
17
+ 5: "Angular velocity",
18
+ 6: "Left leg touched the floor",
19
+ 7: "Right leg touched the floor"
20
+ }
21
 
22
+ def create_training_loop(env_spec):
23
+ global train
24
+ train = TrainingLoop(env_spec=env_spec)
25
+ train.create_agent()
26
+
27
+ return train.env.spec
28
 
29
+ def display_softmax(inputs):
30
+ inputs = np.array(inputs)
31
+ probabilities = softmax(inputs)
32
 
33
+ softmax_dict = {name: float(prob) for name, prob in zip(lunar_lander_spec_conversion.values(), probabilities)}
34
 
35
+ return softmax_dict
36
 
37
+ def generate_output(num_iterations, option):
38
+ global frames, attributions
39
+ frames, attributions = train.explain_trained(num_iterations=num_iterations, option=option)
40
+ slider.maximum = len(frames)
41
 
42
+ def get_frame_and_attribution(slider_value):
43
+ global frames, attributions
44
+ frame = frames[slider_value]
45
+ attribution = display_softmax(attributions[slider_value])
46
 
47
+ return frame, attribution
48
 
49
+ with gr.Blocks() as demo:
50
+ gr.Markdown("# Introspection in Deep Reinforcement Learning")
51
 
52
+ with gr.Tab(label="Attribute"):
53
+ env_spec = gr.Textbox(label="Environment Specification (e.g.: LunarLander-v2)", lines=1)
54
+ env = gr.Interface(title="Create the Environment", allow_flagging="never", inputs=env_spec, fn=create_training_loop, outputs=gr.JSON())
55
 
56
+ with gr.Row():
57
+ option = gr.Dropdown(choices=["Torch Tensor of 0's", "Running Average"], type="index")
58
+ baselines = gr.Slider(label="Number of Baseline Iterations", interactive=True, minimum=0, maximum=100, value=10, step=5, info="Baseline inputs to collect for the average", render=True)
59
+ gr.Button("ATTRIBUTE").click(fn=generate_output, inputs=[baselines, option])
60
+ slider = gr.Slider(label="Key Frame", minimum=0, maximum=20000, step=1, value=0)
61
 
62
+ gr.Interface(fn=get_frame_and_attribution, inputs=slider, live=True, outputs=[gr.Image(), gr.Label()])
63
 
 
64
 
65
+ demo.launch()
 
 
tmp/ddpg/actor_ddpg CHANGED
Binary files a/tmp/ddpg/actor_ddpg and b/tmp/ddpg/actor_ddpg differ
 
tmp/ddpg/critic_ddpg CHANGED
Binary files a/tmp/ddpg/critic_ddpg and b/tmp/ddpg/critic_ddpg differ
 
tmp/ddpg/target_actor_ddpg CHANGED
Binary files a/tmp/ddpg/target_actor_ddpg and b/tmp/ddpg/target_actor_ddpg differ
 
tmp/ddpg/target_critic_ddpg CHANGED
Binary files a/tmp/ddpg/target_critic_ddpg and b/tmp/ddpg/target_critic_ddpg differ
 
train.py CHANGED
@@ -18,7 +18,9 @@ class TrainingLoop:
18
  "render_mode": None
19
  }
20
 
21
- self.env = None
 
 
22
 
23
  self.defaults.update(**kwargs)
24
 
@@ -44,7 +46,7 @@ class TrainingLoop:
44
 
45
  score_history = []
46
 
47
- for i in range(1000):
48
  done = False
49
  score = 0
50
  obs, _ = self.env.reset()
@@ -177,8 +179,8 @@ class TrainingLoop:
177
  assert self.agent is not None
178
 
179
  baseline_options = {
180
- "1": torch.zeros(8),
181
- "2": self._collect_running_baseline_average(num_iterations),
182
  }
183
 
184
  baseline = baseline_options[option]
 
18
  "render_mode": None
19
  }
20
 
21
+ self.env = gym.make(
22
+ **self.defaults
23
+ )
24
 
25
  self.defaults.update(**kwargs)
26
 
 
46
 
47
  score_history = []
48
 
49
+ for i in range(10000):
50
  done = False
51
  score = 0
52
  obs, _ = self.env.reset()
 
179
  assert self.agent is not None
180
 
181
  baseline_options = {
182
+ 0: torch.zeros(8),
183
+ 1: self._collect_running_baseline_average(num_iterations),
184
  }
185
 
186
  baseline = baseline_options[option]