import numpy as np class SecondaryAgent: def __init__(self, model, specialty): self.model = model self.specialty = specialty def predict(self, state): return self.model.predict(state) class PrimeAgent: def __init__(self, gating_network, experts): self.gating_network = gating_network self.experts = experts def act(self, state): gating_weights = self.gating_network.predict(state) expert_outputs = [expert.predict(state) for expert in self.experts] # Weighted sum of expert outputs based on gating weights combined_output = np.sum([weight * output for weight, output in zip(gating_weights[0], expert_outputs)], axis=0) action = np.argmax(combined_output) return action def train(self, states, actions, rewards): self.gating_network.fit(states, actions, sample_weight=rewards, epochs=1, verbose=0) for expert in self.experts: expert.model.fit(states, actions, sample_weight=rewards, epochs=1, verbose=0)