|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import random |
|
import numpy as np |
|
import pandas as pd |
|
from matplotlib import pyplot as plt |
|
import gym |
|
|
|
import gym.spaces as spaces |
|
import threading |
|
import math |
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_DAY0=0 |
|
DEFAULT_DAYN=1 |
|
|
|
path = os.getcwd() |
|
|
|
|
|
DEFAULT_POWER_GENERATED = np.genfromtxt(path +'/wind_generation_fortum.csv', delimiter=',', skip_header=0, usecols=[-1]) / 100 |
|
DEFAULT_WIND_POWER_COST = 3.2 |
|
|
|
DEFAULT_DOWN_REG = np.genfromtxt(path + '/down_regulation.csv', delimiter=',', skip_header=1, usecols=[-1]) / 10 |
|
DEFAULT_UP_REG = np.genfromtxt(path + '/up_regulation.csv', delimiter=',', skip_header=1, usecols=[-1]) / 10 |
|
DEFAULT_TRANSFER_PRICE_IMPORT = 0.97 |
|
DEFAULT_TRANSFER_PRICE_EXPORT = 0.09 |
|
|
|
DEFAULT_ITERATIONS = 24 |
|
|
|
DEFAULT_NUM_TCLS = 100 |
|
DEFAULT_AVGTCLPOWER = 1.5 |
|
DEFAULT_TEMPERATURS = np.genfromtxt(path + '/temperatures.csv',usecols=[5],skip_header=1,delimiter=',') |
|
DEFAULT_TCL_SALE_PRICE = 3.2 |
|
DEFAULT_TCL_TMIN = 19 |
|
DEFAULT_TCL_TMAX = 24 |
|
|
|
DEFAULT_NUM_LOADS = 150 |
|
DEFAULT_BASE_LOAD = np.array( |
|
[.4, .3,.2,.2,.2,.2,.3,.5,.6,.6,.5,.5,.5,.4,.4,.6,.8,1.4,1.2,.9,.8,.6,.5,.4]) |
|
DEFAULT_MARKET_PRICE = 5.48 |
|
DEFAULT_PRICE_TIERS = np.array([-3.0, -1.5, 0.0, 1.5, 3.0]) |
|
|
|
DEFAULT_BAT_CAPACITY=500 |
|
DEFAULT_MAX_CHARGE=250 |
|
DEFAULT_MAX_DISCHARGE=250 |
|
|
|
|
|
MAX_R = 100 |
|
|
|
|
|
SOCS_RENDER = [] |
|
LOADS_RENDER = [] |
|
BATTERY_RENDER = [] |
|
PRICE_RENDER = [] |
|
ENERGY_SOLD_RENDER = [] |
|
ENERGY_BOUGHT_RENDER = [] |
|
GRID_PRICES_BUY_RENDER = [] |
|
GRID_PRICES_SELL_RENDER = [] |
|
ENERGY_GENERATED_RENDER = [] |
|
TCL_CONTROL_RENDER = [] |
|
TCL_CONSUMPTION_RENDER = [] |
|
TOTAL_CONSUMPTION_RENDER=[] |
|
TEMP_RENDER=[] |
|
|
|
ACTIONS = [[i, j, k, l] for i in range(4) for j in range(5) for k in range(2) for l in range(2)] |
|
|
|
|
|
class TCL: |
|
""" |
|
Simulates an invidual TCL |
|
""" |
|
|
|
def __init__(self, ca, cm, q, P, Tmin=DEFAULT_TCL_TMIN, Tmax=DEFAULT_TCL_TMAX): |
|
self.ca = ca |
|
self.cm = cm |
|
self.q = q |
|
self.P = P |
|
self.Tmin = Tmin |
|
self.Tmax = Tmax |
|
|
|
|
|
self.u = 0 |
|
|
|
def set_T(self, T, Tm): |
|
self.T = T |
|
self.Tm = Tm |
|
|
|
def control(self, ui=0): |
|
|
|
if self.T < self.Tmin: |
|
self.u = 1 |
|
elif self.Tmin <= self.T < self.Tmax: |
|
self.u = ui |
|
else: |
|
self.u = 0 |
|
|
|
def update_state(self, T0): |
|
|
|
for _ in range(1): |
|
self.T += self.ca * (T0 - self.T) + self.cm * (self.Tm - self.T) + self.P * self.u + self.q |
|
self.Tm += self.cm * (self.T - self.Tm) |
|
if self.T >= self.Tmax: |
|
break |
|
|
|
""" |
|
@property allows us to write "tcl.SoC", and it will |
|
run this function to get the value |
|
""" |
|
|
|
@property |
|
def SoC(self): |
|
return (self.T - self.Tmin) / (self.Tmax - self.Tmin) |
|
|
|
|
|
class Battery: |
|
|
|
def __init__(self, capacity, useD, dissipation, rateC, maxDD, chargeE): |
|
self.capacity = capacity |
|
self.useD = useD |
|
self.dissipation = dissipation |
|
self.rateC = rateC |
|
self.maxDD = maxDD |
|
self.chargeE = chargeE |
|
self.RC = 0 |
|
|
|
|
|
def charge(self, E): |
|
empty = self.capacity - self.RC |
|
if empty <= 0: |
|
return E |
|
else: |
|
self.RC += self.rateC * min(E,self.chargeE) |
|
leftover = self.RC - self.capacity + max(E-self.chargeE,0) |
|
self.RC = min(self.capacity, self.RC) |
|
return max(leftover, 0) |
|
|
|
def supply(self, E): |
|
remaining = self.RC |
|
self.RC -= min(E, remaining,self.maxDD) |
|
self.RC = max(self.RC, 0) |
|
return min(E, remaining,self.maxDD) * self.useD |
|
|
|
def dissipate(self): |
|
self.RC = self.RC * math.exp(- self.dissipation) |
|
|
|
@property |
|
def SoC(self): |
|
return self.RC / self.capacity |
|
|
|
def reset(self): |
|
self.RC=0 |
|
|
|
|
|
class Grid: |
|
def __init__(self, down_reg,up_reg, exp_fees, imp_fees): |
|
self.sell_prices = down_reg |
|
self.buy_prices = up_reg |
|
self.exp_fees=exp_fees |
|
self.imp_fees = imp_fees |
|
self.time = 0 |
|
|
|
def sell(self, E): |
|
return (self.sell_prices[self.time] + self.exp_fees) * E |
|
|
|
def buy(self, E): |
|
return -(self.buy_prices[self.time] + self.imp_fees) * E |
|
|
|
|
|
|
|
|
|
|
|
def set_time(self, time): |
|
self.time = time |
|
|
|
def total_cost(self,prices, energy): |
|
return sum(prices * energy / 100 + self.imp_fees * energy) |
|
|
|
class Generation: |
|
def __init__(self, generation): |
|
self.power = generation |
|
|
|
|
|
def current_generation(self, time): |
|
|
|
return self.power[time] |
|
|
|
|
|
class Load: |
|
def __init__(self, price_sens, base_load, max_v_load,patience): |
|
self.price_sens = max(0,price_sens) |
|
self.orig_price_sens = max(0,price_sens) |
|
self.base_load = base_load |
|
self.max_v_load = max_v_load |
|
self.response = 0 |
|
self.shifted_loads={} |
|
self.patience=max(patience,1) |
|
self.dr_load=0 |
|
|
|
def react(self, price_tier , time_day): |
|
self.dr_load=self.base_load[time_day] |
|
response = self.price_sens * (price_tier - 2) |
|
if response != 0 : |
|
self.dr_load -= self.base_load[time_day] * response |
|
self.shifted_loads[time_day] = self.base_load[time_day] * response |
|
for k in list(self.shifted_loads): |
|
probability_of_execution = -self.shifted_loads[k]*(price_tier - 2) + (time_day-k)/self.patience |
|
if random.random()<=probability_of_execution: |
|
self.dr_load+=self.shifted_loads[k] |
|
del self.shifted_loads[k] |
|
|
|
def load(self): |
|
return max(self.dr_load, 0) |
|
|
|
|
|
class MicroGridEnv(gym.Env): |
|
def __init__(self,**kwargs): |
|
|
|
|
|
|
|
|
|
self.iterations = kwargs.get("iterations", DEFAULT_ITERATIONS) |
|
self.num_tcls = kwargs.get("num_tcls", DEFAULT_NUM_TCLS) |
|
print(self.num_tcls) |
|
self.avg_tcl_power = kwargs.get("tcl_power", DEFAULT_AVGTCLPOWER) |
|
self.tcl_sale_price = kwargs.get("tcl_price", DEFAULT_TCL_SALE_PRICE) |
|
self.num_loads = kwargs.get("num_loads", DEFAULT_NUM_LOADS) |
|
self.typical_load = kwargs.get("base_load", DEFAULT_BASE_LOAD) |
|
self.market_price = kwargs.get("normal_price", DEFAULT_MARKET_PRICE) |
|
self.temperatures = kwargs.get("temperatures", DEFAULT_TEMPERATURS) |
|
self.price_tiers = kwargs.get("price_tiers", DEFAULT_PRICE_TIERS) |
|
self.day0 = kwargs.get("day0", DEFAULT_DAY0) |
|
self.dayn = kwargs.get("dayn", self.day0+1) |
|
self.power_cost = kwargs.get("power_cost", DEFAULT_WIND_POWER_COST) |
|
self.down_reg = kwargs.get("down_reg", DEFAULT_DOWN_REG) |
|
self.up_reg = kwargs.get("up_reg", DEFAULT_UP_REG) |
|
self.imp_fees = kwargs.get("imp_fees", DEFAULT_TRANSFER_PRICE_IMPORT) |
|
self.exp_fees = kwargs.get("exp_fees", DEFAULT_TRANSFER_PRICE_EXPORT) |
|
self.bat_capacity = kwargs.get("battery_capacity", DEFAULT_BAT_CAPACITY) |
|
self.max_discharge = kwargs.get("max_discharge", DEFAULT_MAX_DISCHARGE) |
|
self.max_charge = kwargs.get("max_charge", DEFAULT_MAX_CHARGE) |
|
|
|
|
|
|
|
self.day = self.day0 |
|
|
|
self.time_step = 0 |
|
|
|
|
|
|
|
self.tcls_parameters = [] |
|
|
|
|
|
self.loads_parameters = [] |
|
|
|
self.generation = Generation(kwargs.get("generation_data", DEFAULT_POWER_GENERATED)) |
|
self.grid = Grid(down_reg=self.down_reg,up_reg=self.up_reg, exp_fees=self.exp_fees, imp_fees=self.imp_fees) |
|
self.battery = Battery(capacity=self.bat_capacity, useD=0.9, dissipation=0.001, rateC=0.9, maxDD=self.max_discharge, chargeE=self.max_charge) |
|
|
|
self.tcls = [self._create_tcl(*self._create_tcl_parameters()) for _ in range(self.num_tcls)] |
|
self.loads = [self._create_load(*self._create_load_parameters()) for _ in range(self.num_loads)] |
|
|
|
self.action_space_sep = spaces.Box(low=0, high=1, dtype=np.float32, |
|
shape=(13,)) |
|
self.action_space = spaces.Discrete(80) |
|
|
|
|
|
self.observation_space = spaces.Box(low=-100, high=100, dtype=np.float32, |
|
shape=(self.num_tcls + 7,)) |
|
|
|
def _create_tcl_parameters(self): |
|
""" |
|
Initialize one TCL randomly with given T_0, |
|
and return it. Copy/paste from Taha's code |
|
""" |
|
|
|
|
|
ca = random.normalvariate(0.01, 0.003) |
|
cm = random.normalvariate(0.3, 0.004) |
|
q = random.normalvariate(0, 0.01) |
|
P = random.normalvariate(self.avg_tcl_power, 0.01) |
|
init_temp = random.uniform(15,24) |
|
return [ca, cm, q, P,init_temp] |
|
|
|
def _create_tcl(self, ca, cm, q, P, init_temp): |
|
tcl = TCL(ca, cm, q, P) |
|
tcl.set_T(init_temp, init_temp) |
|
return tcl |
|
|
|
def _create_load_parameters(self): |
|
""" |
|
Initialize one load randomly, |
|
and return it. |
|
""" |
|
|
|
|
|
|
|
price_sensitivity = random.normalvariate(0.4, 0.3) |
|
max_v_load = random.normalvariate(0.4, 0.01) |
|
patience= int(random.normalvariate(10,6)) |
|
return [price_sensitivity, max_v_load,patience] |
|
|
|
def _create_load(self, price_sensitivity, max_v_load,patience): |
|
load = Load(price_sensitivity, base_load=self.typical_load, max_v_load=max_v_load, patience=patience) |
|
return load |
|
|
|
|
|
def _build_state(self): |
|
""" |
|
Return current state representation as one vector. |
|
Returns: |
|
state: 1D state vector, containing state-of-charges of all TCLs, Loads, current battery soc, current power generation, |
|
current temperature, current price and current time (hour) of day |
|
""" |
|
|
|
socs = np.array([tcl.SoC for tcl in self.tcls]) |
|
|
|
|
|
|
|
socs = (socs+np.ones(shape=socs.shape))/2 |
|
loads = self.typical_load[(self.time_step) % 24] |
|
loads = (loads - min(self.typical_load)) / (max(self.typical_load) - min(self.typical_load)) |
|
|
|
current_generation = self.generation.current_generation(self.day*self.iterations+self.time_step) |
|
current_generation = (current_generation- |
|
np.average(self.generation.power[self.day*self.iterations:self.day*self.iterations+self.iterations]))\ |
|
/np.std(self.generation.power[self.day*self.iterations:self.day*self.iterations+self.iterations]) |
|
|
|
|
|
temperature = self.temperatures[self.day*self.iterations+self.time_step] |
|
temperature = (temperature- |
|
min(self.temperatures[self.day*self.iterations:self.day*self.iterations+self.iterations]))\ |
|
/(max(self.temperatures[self.day*self.iterations:self.day*self.iterations+self.iterations]) |
|
-min(self.temperatures[self.day*self.iterations:self.day*self.iterations+self.iterations])) |
|
|
|
price = self.grid.buy_prices[self.day*self.iterations+self.time_step] |
|
price = (price - |
|
np.average(self.grid.buy_prices[self.day*self.iterations:self.day*self.iterations+self.iterations])) \ |
|
/ np.std(self.grid.buy_prices[self.day*self.iterations:self.day*self.iterations+self.iterations]) |
|
|
|
price_grid_sell = self.grid.sell_prices[self.day*self.iterations+self.time_step] |
|
price_grid_sell = (price_grid_sell - |
|
np.average(self.grid.sell_prices[self.day*self.iterations:self.day*self.iterations + self.iterations])) \ |
|
/ np.std(self.grid.sell_prices[self.day*self.iterations:self.day*self.iterations+self.iterations]) |
|
|
|
high_price = min(self.high_price/4,1) |
|
|
|
time_step = (self.time_step)/(self.iterations-1) |
|
|
|
state = np.concatenate((socs, [loads, high_price, time_step,self.battery.SoC, current_generation, |
|
price, |
|
price_grid_sell ])) |
|
return state |
|
|
|
def _build_info(self): |
|
""" |
|
Return dictionary of misc. infos to be given per state. |
|
Here this means providing forecasts of future |
|
prices and temperatures (next 24h) |
|
""" |
|
temp_forecast = np.array(self.temperatures[self.time_step + 1:self.time_step + self.iterations+1]) |
|
return {"temperature_forecast": temp_forecast, |
|
"forecast_times": np.arange(0, self.iterations)} |
|
|
|
def _compute_tcl_power(self): |
|
""" |
|
Return the total power consumption of all TCLs |
|
""" |
|
return sum([tcl.u * tcl.P for tcl in self.tcls]) |
|
|
|
def step(self, action): |
|
""" |
|
Arguments: |
|
action: A list. |
|
|
|
Returns: |
|
state: Current state |
|
reward: How much reward was obtained on last action |
|
terminal: Boolean on if the game ended (maximum number of iterations) |
|
info: None (not used here) |
|
""" |
|
if type(action) is not list: |
|
action = ACTIONS[action] |
|
|
|
self.grid.set_time(self.day*self.iterations + self.time_step) |
|
reward = 0 |
|
|
|
|
|
tcl_action = action[0] |
|
price_action = action[1] |
|
self.high_price += price_action - 2 |
|
if self.high_price > 4: |
|
price_action = 2 |
|
self.high_price = 4 |
|
|
|
energy_deficiency_action = action[2] |
|
energy_excess_action = action[3] |
|
|
|
available_energy = self.generation.current_generation(self.day*self.iterations + self.time_step) |
|
|
|
reward-= available_energy * self.power_cost / 100 |
|
|
|
for load in self.loads: |
|
load.react(price_tier=price_action, time_day=self.time_step%24) |
|
|
|
total_loads = sum([l.load() for l in self.loads]) |
|
|
|
|
|
available_energy -= total_loads |
|
|
|
|
|
|
|
self.sale_price = self.price_tiers[price_action] + self.market_price |
|
|
|
|
|
reward += total_loads * (self.sale_price) / 100 |
|
|
|
|
|
|
|
sortedTCLs = sorted(self.tcls, key=lambda x: x.SoC) |
|
|
|
control = max(min(tcl_action * self.num_tcls * self.avg_tcl_power / 3, available_energy), 0) |
|
self.control = control |
|
|
|
for tcl in sortedTCLs: |
|
if control > 0: |
|
tcl.control(1) |
|
control -= tcl.P * tcl.u |
|
else: |
|
tcl.control(0) |
|
tcl.update_state(self.temperatures[self.day*self.iterations + self.time_step]) |
|
|
|
|
|
available_energy -= self._compute_tcl_power() |
|
reward += self._compute_tcl_power() * self.tcl_sale_price / 100 |
|
|
|
if available_energy > 0: |
|
if energy_excess_action: |
|
available_energy = self.battery.charge(available_energy) |
|
|
|
reward += self.grid.sell(available_energy) / 100 |
|
else: |
|
reward += self.grid.sell(available_energy) / 100 |
|
self.energy_sold = available_energy |
|
self.energy_bought = 0 |
|
|
|
else: |
|
if energy_deficiency_action: |
|
available_energy += self.battery.supply(-available_energy) |
|
|
|
self.energy_bought = -available_energy |
|
reward += self.grid.buy(self.energy_bought) / 100 |
|
self.energy_sold = 0 |
|
|
|
|
|
self.time_step += 1 |
|
|
|
state = self._build_state() |
|
|
|
|
|
terminal = self.time_step == self.iterations |
|
|
|
|
|
|
|
|
|
info = self._build_info() |
|
return state, reward/MAX_R , terminal, info |
|
|
|
def reset(self,day=None): |
|
""" |
|
Create new TCLs, and return initial state. |
|
Note: Overrides previous TCLs |
|
""" |
|
if day == None: |
|
self.day= random.randint(self.day0,self.dayn) |
|
else: |
|
self.day = day |
|
print("Day:", self.day) |
|
self.time_step = 0 |
|
|
|
self.high_price = 0 |
|
|
|
return self._build_state() |
|
|
|
def reset_all(self,day=None): |
|
""" |
|
Create new TCLs, and return initial state. |
|
Note: Overrides previous TCLs |
|
""" |
|
if day == None: |
|
|
|
self.day= self.day0 |
|
else: |
|
self.day = day |
|
print("Day:", self.day) |
|
self.time_step = 0 |
|
self.battery.reset() |
|
self.high_price = 0 |
|
self.tcls.clear() |
|
self.loads.clear() |
|
self.tcls = [self._create_tcl(*self._create_tcl_parameters()) for _ in range(self.num_tcls)] |
|
self.loads = [self._create_load(*self._create_load_parameters()) for _ in range(self.num_loads)] |
|
|
|
|
|
return self._build_state() |
|
|
|
def render(self,name=''): |
|
SOCS_RENDER.append([tcl.SoC*100 for tcl in self.tcls]) |
|
LOADS_RENDER.append([l.load() for l in self.loads]) |
|
PRICE_RENDER.append(self.sale_price) |
|
BATTERY_RENDER.append(self.battery.SoC) |
|
ENERGY_GENERATED_RENDER.append(self.generation.current_generation(self.day*self.iterations+self.time_step-1)) |
|
ENERGY_SOLD_RENDER.append(self.energy_sold) |
|
ENERGY_BOUGHT_RENDER.append(self.energy_bought) |
|
GRID_PRICES_BUY_RENDER.append(self.grid.buy_prices[self.day * self.iterations + self.time_step-1]) |
|
GRID_PRICES_SELL_RENDER.append(self.grid.sell_prices[self.day * self.iterations + self.time_step-1]) |
|
TCL_CONTROL_RENDER.append(self.control) |
|
TCL_CONSUMPTION_RENDER.append(self._compute_tcl_power()) |
|
TOTAL_CONSUMPTION_RENDER.append(self._compute_tcl_power()+np.sum([l.load() for l in self.loads])) |
|
TEMP_RENDER.append(self.temperatures[self.day*self.iterations+self.time_step-1]) |
|
if self.time_step==self.iterations: |
|
fig=plt.figure() |
|
|
|
ax = plt.subplot(2, 1, 1) |
|
plt.axhspan(0, 24, facecolor='g', alpha=0.5) |
|
|
|
ax.set_facecolor("silver") |
|
ax.yaxis.grid(True) |
|
|
|
ax.set_ylabel("TCLs state of charge %") |
|
|
|
|
|
ax.boxplot(SOCS_RENDER, positions=range(24)) |
|
|
|
ax1 = ax.twinx() |
|
ax1.set_ylabel("Temperatures °C") |
|
ax1.plot(np.array(TEMP_RENDER), '--') |
|
plt.title("TCLs state of charge and outdoor Temperatures") |
|
plt.xlabel("Time (h)") |
|
plt.legend(["Outdoor Temperatures"], loc='lower right') |
|
|
|
|
|
ax = plt.subplot(2, 1, 2) |
|
ax.set_facecolor("silver") |
|
ax.set_ylabel("kW") |
|
ax.set_xlabel("Time (h)") |
|
ax.yaxis.grid(True) |
|
ax.plot(ENERGY_GENERATED_RENDER, color='k') |
|
ax.bar(x=np.array(np.arange(self.iterations)) - 0.2, height=TCL_CONTROL_RENDER, width=0.2) |
|
ax.bar(x=np.array(np.arange(self.iterations)), height=TCL_CONSUMPTION_RENDER, width=0.2) |
|
plt.xticks( np.array(np.arange(self.iterations)) ) |
|
plt.title("Energy allocated to and consumed by TCLs and energy generated") |
|
plt.legend(['Energy generated','Energy allocated for TCLs', 'Energy consumed by TCLs']) |
|
plt.xlabel("Time (h)") |
|
plt.ylabel("kW") |
|
|
|
fig.tight_layout() |
|
|
|
plt.savefig('./RESULT/Day'+str(self.day+1)+'.png') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SOCS_RENDER.clear() |
|
LOADS_RENDER.clear() |
|
PRICE_RENDER.clear() |
|
BATTERY_RENDER.clear() |
|
GRID_PRICES_BUY_RENDER.clear() |
|
GRID_PRICES_SELL_RENDER.clear() |
|
ENERGY_BOUGHT_RENDER.clear() |
|
ENERGY_SOLD_RENDER.clear() |
|
ENERGY_GENERATED_RENDER.clear() |
|
TCL_CONTROL_RENDER.clear() |
|
TCL_CONSUMPTION_RENDER.clear() |
|
TOTAL_CONSUMPTION_RENDER.clear() |
|
TEMP_RENDER.clear() |
|
|
|
def close(self): |
|
""" |
|
Nothing to be done here, but has to be defined |
|
""" |
|
return |
|
|
|
def seedy(self, s): |
|
""" |
|
Set the random seed for consistent experiments |
|
""" |
|
random.seed(s) |
|
np.random.seed(s) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
env = MicroGridEnv() |
|
env.seedy(1) |
|
|
|
rewards = [] |
|
|
|
state = env.reset() |
|
|
|
|
|
|
|
while True: |
|
|
|
|
|
|
|
action=[1,2,0,0] |
|
|
|
|
|
print(action) |
|
|
|
|
|
state, reward, terminal, _ = env.step(list(action)) |
|
env.render() |
|
print(reward) |
|
rewards.append(reward) |
|
if terminal: |
|
break |
|
print("Total Reward:", sum(rewards)) |
|
|
|
|
|
states = np.array(rewards) |
|
plt.plot(rewards) |
|
plt.title("rewards") |
|
plt.xlabel("Time") |
|
plt.ylabel("rewards") |
|
plt.show() |
|
|
|
|
|
|