|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Simple example of contextual bandits simulation. |
|
|
|
Code corresponding to: |
|
Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks |
|
for Thompson Sampling, by Carlos Riquelme, George Tucker, and Jasper Snoek. |
|
https://arxiv.org/abs/1802.09127 |
|
""" |
|
|
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
import time |
|
from absl import app |
|
from absl import flags |
|
import numpy as np |
|
import os |
|
import tensorflow as tf |
|
|
|
from bandits.algorithms.bootstrapped_bnn_sampling import BootstrappedBNNSampling |
|
from bandits.core.contextual_bandit import run_contextual_bandit |
|
from bandits.data.data_sampler import sample_adult_data |
|
from bandits.data.data_sampler import sample_census_data |
|
from bandits.data.data_sampler import sample_covertype_data |
|
from bandits.data.data_sampler import sample_jester_data |
|
from bandits.data.data_sampler import sample_mushroom_data |
|
from bandits.data.data_sampler import sample_statlog_data |
|
from bandits.data.data_sampler import sample_stock_data |
|
from bandits.algorithms.fixed_policy_sampling import FixedPolicySampling |
|
from bandits.algorithms.linear_full_posterior_sampling import LinearFullPosteriorSampling |
|
from bandits.algorithms.neural_linear_sampling import NeuralLinearPosteriorSampling |
|
from bandits.algorithms.parameter_noise_sampling import ParameterNoiseSampling |
|
from bandits.algorithms.posterior_bnn_sampling import PosteriorBNNSampling |
|
from bandits.data.synthetic_data_sampler import sample_linear_data |
|
from bandits.data.synthetic_data_sampler import sample_sparse_linear_data |
|
from bandits.data.synthetic_data_sampler import sample_wheel_bandit_data |
|
from bandits.algorithms.uniform_sampling import UniformSampling |
|
|
|
|
|
base_route = os.getcwd() |
|
data_route = 'contextual_bandits/datasets' |
|
|
|
FLAGS = flags.FLAGS |
|
FLAGS.set_default('alsologtostderr', True) |
|
flags.DEFINE_string('logdir', '/tmp/bandits/', 'Base directory to save output') |
|
flags.DEFINE_string( |
|
'mushroom_data', |
|
os.path.join(base_route, data_route, 'mushroom.data'), |
|
'Directory where Mushroom data is stored.') |
|
flags.DEFINE_string( |
|
'financial_data', |
|
os.path.join(base_route, data_route, 'raw_stock_contexts'), |
|
'Directory where Financial data is stored.') |
|
flags.DEFINE_string( |
|
'jester_data', |
|
os.path.join(base_route, data_route, 'jester_data_40jokes_19181users.npy'), |
|
'Directory where Jester data is stored.') |
|
flags.DEFINE_string( |
|
'statlog_data', |
|
os.path.join(base_route, data_route, 'shuttle.trn'), |
|
'Directory where Statlog data is stored.') |
|
flags.DEFINE_string( |
|
'adult_data', |
|
os.path.join(base_route, data_route, 'adult.full'), |
|
'Directory where Adult data is stored.') |
|
flags.DEFINE_string( |
|
'covertype_data', |
|
os.path.join(base_route, data_route, 'covtype.data'), |
|
'Directory where Covertype data is stored.') |
|
flags.DEFINE_string( |
|
'census_data', |
|
os.path.join(base_route, data_route, 'USCensus1990.data.txt'), |
|
'Directory where Census data is stored.') |
|
|
|
|
|
def sample_data(data_type, num_contexts=None): |
|
"""Sample data from given 'data_type'. |
|
|
|
Args: |
|
data_type: Dataset from which to sample. |
|
num_contexts: Number of contexts to sample. |
|
|
|
Returns: |
|
dataset: Sampled matrix with rows: (context, reward_1, ..., reward_num_act). |
|
opt_rewards: Vector of expected optimal reward for each context. |
|
opt_actions: Vector of optimal action for each context. |
|
num_actions: Number of available actions. |
|
context_dim: Dimension of each context. |
|
""" |
|
|
|
if data_type == 'linear': |
|
|
|
num_actions = 8 |
|
context_dim = 10 |
|
noise_stds = [0.01 * (i + 1) for i in range(num_actions)] |
|
dataset, _, opt_linear = sample_linear_data(num_contexts, context_dim, |
|
num_actions, sigma=noise_stds) |
|
opt_rewards, opt_actions = opt_linear |
|
elif data_type == 'sparse_linear': |
|
|
|
num_actions = 7 |
|
context_dim = 10 |
|
noise_stds = [0.01 * (i + 1) for i in range(num_actions)] |
|
num_nnz_dims = int(context_dim / 3.0) |
|
dataset, _, opt_sparse_linear = sample_sparse_linear_data( |
|
num_contexts, context_dim, num_actions, num_nnz_dims, sigma=noise_stds) |
|
opt_rewards, opt_actions = opt_sparse_linear |
|
elif data_type == 'mushroom': |
|
|
|
num_actions = 2 |
|
context_dim = 117 |
|
file_name = FLAGS.mushroom_data |
|
dataset, opt_mushroom = sample_mushroom_data(file_name, num_contexts) |
|
opt_rewards, opt_actions = opt_mushroom |
|
elif data_type == 'financial': |
|
num_actions = 8 |
|
context_dim = 21 |
|
num_contexts = min(3713, num_contexts) |
|
noise_stds = [0.01 * (i + 1) for i in range(num_actions)] |
|
file_name = FLAGS.financial_data |
|
dataset, opt_financial = sample_stock_data(file_name, context_dim, |
|
num_actions, num_contexts, |
|
noise_stds, shuffle_rows=True) |
|
opt_rewards, opt_actions = opt_financial |
|
elif data_type == 'jester': |
|
num_actions = 8 |
|
context_dim = 32 |
|
num_contexts = min(19181, num_contexts) |
|
file_name = FLAGS.jester_data |
|
dataset, opt_jester = sample_jester_data(file_name, context_dim, |
|
num_actions, num_contexts, |
|
shuffle_rows=True, |
|
shuffle_cols=True) |
|
opt_rewards, opt_actions = opt_jester |
|
elif data_type == 'statlog': |
|
file_name = FLAGS.statlog_data |
|
num_actions = 7 |
|
num_contexts = min(43500, num_contexts) |
|
sampled_vals = sample_statlog_data(file_name, num_contexts, |
|
shuffle_rows=True) |
|
contexts, rewards, (opt_rewards, opt_actions) = sampled_vals |
|
dataset = np.hstack((contexts, rewards)) |
|
context_dim = contexts.shape[1] |
|
elif data_type == 'adult': |
|
file_name = FLAGS.adult_data |
|
num_actions = 14 |
|
num_contexts = min(45222, num_contexts) |
|
sampled_vals = sample_adult_data(file_name, num_contexts, |
|
shuffle_rows=True) |
|
contexts, rewards, (opt_rewards, opt_actions) = sampled_vals |
|
dataset = np.hstack((contexts, rewards)) |
|
context_dim = contexts.shape[1] |
|
elif data_type == 'covertype': |
|
file_name = FLAGS.covertype_data |
|
num_actions = 7 |
|
num_contexts = min(150000, num_contexts) |
|
sampled_vals = sample_covertype_data(file_name, num_contexts, |
|
shuffle_rows=True) |
|
contexts, rewards, (opt_rewards, opt_actions) = sampled_vals |
|
dataset = np.hstack((contexts, rewards)) |
|
context_dim = contexts.shape[1] |
|
elif data_type == 'census': |
|
file_name = FLAGS.census_data |
|
num_actions = 9 |
|
num_contexts = min(150000, num_contexts) |
|
sampled_vals = sample_census_data(file_name, num_contexts, |
|
shuffle_rows=True) |
|
contexts, rewards, (opt_rewards, opt_actions) = sampled_vals |
|
dataset = np.hstack((contexts, rewards)) |
|
context_dim = contexts.shape[1] |
|
elif data_type == 'wheel': |
|
delta = 0.95 |
|
num_actions = 5 |
|
context_dim = 2 |
|
mean_v = [1.0, 1.0, 1.0, 1.0, 1.2] |
|
std_v = [0.05, 0.05, 0.05, 0.05, 0.05] |
|
mu_large = 50 |
|
std_large = 0.01 |
|
dataset, opt_wheel = sample_wheel_bandit_data(num_contexts, delta, |
|
mean_v, std_v, |
|
mu_large, std_large) |
|
opt_rewards, opt_actions = opt_wheel |
|
|
|
return dataset, opt_rewards, opt_actions, num_actions, context_dim |
|
|
|
|
|
def display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, name): |
|
"""Displays summary statistics of the performance of each algorithm.""" |
|
|
|
print('---------------------------------------------------') |
|
print('---------------------------------------------------') |
|
print('{} bandit completed after {} seconds.'.format( |
|
name, time.time() - t_init)) |
|
print('---------------------------------------------------') |
|
|
|
performance_pairs = [] |
|
for j, a in enumerate(algos): |
|
performance_pairs.append((a.name, np.sum(h_rewards[:, j]))) |
|
performance_pairs = sorted(performance_pairs, |
|
key=lambda elt: elt[1], |
|
reverse=True) |
|
for i, (name, reward) in enumerate(performance_pairs): |
|
print('{:3}) {:20}| \t \t total reward = {:10}.'.format(i, name, reward)) |
|
|
|
print('---------------------------------------------------') |
|
print('Optimal total reward = {}.'.format(np.sum(opt_rewards))) |
|
print('Frequency of optimal actions (action, frequency):') |
|
print([[elt, list(opt_actions).count(elt)] for elt in set(opt_actions)]) |
|
print('---------------------------------------------------') |
|
print('---------------------------------------------------') |
|
|
|
|
|
def main(_): |
|
|
|
|
|
num_contexts = 2000 |
|
|
|
|
|
|
|
data_type = 'mushroom' |
|
|
|
|
|
sampled_vals = sample_data(data_type, num_contexts) |
|
dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals |
|
|
|
|
|
hparams = tf.contrib.training.HParams(num_actions=num_actions) |
|
|
|
hparams_linear = tf.contrib.training.HParams(num_actions=num_actions, |
|
context_dim=context_dim, |
|
a0=6, |
|
b0=6, |
|
lambda_prior=0.25, |
|
initial_pulls=2) |
|
|
|
hparams_rms = tf.contrib.training.HParams(num_actions=num_actions, |
|
context_dim=context_dim, |
|
init_scale=0.3, |
|
activation=tf.nn.relu, |
|
layer_sizes=[50], |
|
batch_size=512, |
|
activate_decay=True, |
|
initial_lr=0.1, |
|
max_grad_norm=5.0, |
|
show_training=False, |
|
freq_summary=1000, |
|
buffer_s=-1, |
|
initial_pulls=2, |
|
optimizer='RMS', |
|
reset_lr=True, |
|
lr_decay_rate=0.5, |
|
training_freq=50, |
|
training_epochs=100, |
|
p=0.95, |
|
q=3) |
|
|
|
hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions, |
|
context_dim=context_dim, |
|
init_scale=0.3, |
|
activation=tf.nn.relu, |
|
layer_sizes=[50], |
|
batch_size=512, |
|
activate_decay=True, |
|
initial_lr=0.1, |
|
max_grad_norm=5.0, |
|
show_training=False, |
|
freq_summary=1000, |
|
buffer_s=-1, |
|
initial_pulls=2, |
|
optimizer='RMS', |
|
reset_lr=True, |
|
lr_decay_rate=0.5, |
|
training_freq=50, |
|
training_epochs=100, |
|
use_dropout=True, |
|
keep_prob=0.80) |
|
|
|
hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions, |
|
context_dim=context_dim, |
|
init_scale=0.3, |
|
activation=tf.nn.relu, |
|
layer_sizes=[50], |
|
batch_size=512, |
|
activate_decay=True, |
|
initial_lr=0.1, |
|
max_grad_norm=5.0, |
|
show_training=False, |
|
freq_summary=1000, |
|
buffer_s=-1, |
|
initial_pulls=2, |
|
optimizer='RMS', |
|
use_sigma_exp_transform=True, |
|
cleared_times_trained=10, |
|
initial_training_steps=100, |
|
noise_sigma=0.1, |
|
reset_lr=False, |
|
training_freq=50, |
|
training_epochs=100) |
|
|
|
hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions, |
|
context_dim=context_dim, |
|
init_scale=0.3, |
|
activation=tf.nn.relu, |
|
layer_sizes=[50], |
|
batch_size=512, |
|
activate_decay=True, |
|
initial_lr=0.1, |
|
max_grad_norm=5.0, |
|
show_training=False, |
|
freq_summary=1000, |
|
buffer_s=-1, |
|
initial_pulls=2, |
|
reset_lr=True, |
|
lr_decay_rate=0.5, |
|
training_freq=1, |
|
training_freq_network=50, |
|
training_epochs=100, |
|
a0=6, |
|
b0=6, |
|
lambda_prior=0.25) |
|
|
|
hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions, |
|
context_dim=context_dim, |
|
init_scale=0.3, |
|
activation=tf.nn.relu, |
|
layer_sizes=[50], |
|
batch_size=512, |
|
activate_decay=True, |
|
initial_lr=0.1, |
|
max_grad_norm=5.0, |
|
show_training=False, |
|
freq_summary=1000, |
|
buffer_s=-1, |
|
initial_pulls=2, |
|
reset_lr=True, |
|
lr_decay_rate=0.5, |
|
training_freq=10, |
|
training_freq_network=50, |
|
training_epochs=100, |
|
a0=6, |
|
b0=6, |
|
lambda_prior=0.25) |
|
|
|
hparams_pnoise = tf.contrib.training.HParams(num_actions=num_actions, |
|
context_dim=context_dim, |
|
init_scale=0.3, |
|
activation=tf.nn.relu, |
|
layer_sizes=[50], |
|
batch_size=512, |
|
activate_decay=True, |
|
initial_lr=0.1, |
|
max_grad_norm=5.0, |
|
show_training=False, |
|
freq_summary=1000, |
|
buffer_s=-1, |
|
initial_pulls=2, |
|
optimizer='RMS', |
|
reset_lr=True, |
|
lr_decay_rate=0.5, |
|
training_freq=50, |
|
training_epochs=100, |
|
noise_std=0.05, |
|
eps=0.1, |
|
d_samples=300, |
|
) |
|
|
|
hparams_alpha_div = tf.contrib.training.HParams(num_actions=num_actions, |
|
context_dim=context_dim, |
|
init_scale=0.3, |
|
activation=tf.nn.relu, |
|
layer_sizes=[50], |
|
batch_size=512, |
|
activate_decay=True, |
|
initial_lr=0.1, |
|
max_grad_norm=5.0, |
|
show_training=False, |
|
freq_summary=1000, |
|
buffer_s=-1, |
|
initial_pulls=2, |
|
optimizer='RMS', |
|
use_sigma_exp_transform=True, |
|
cleared_times_trained=10, |
|
initial_training_steps=100, |
|
noise_sigma=0.1, |
|
reset_lr=False, |
|
training_freq=50, |
|
training_epochs=100, |
|
alpha=1.0, |
|
k=20, |
|
prior_variance=0.1) |
|
|
|
hparams_gp = tf.contrib.training.HParams(num_actions=num_actions, |
|
num_outputs=num_actions, |
|
context_dim=context_dim, |
|
reset_lr=False, |
|
learn_embeddings=True, |
|
max_num_points=1000, |
|
show_training=False, |
|
freq_summary=1000, |
|
batch_size=512, |
|
keep_fixed_after_max_obs=True, |
|
training_freq=50, |
|
initial_pulls=2, |
|
training_epochs=100, |
|
lr=0.01, |
|
buffer_s=-1, |
|
initial_lr=0.001, |
|
lr_decay_rate=0.0, |
|
optimizer='RMS', |
|
task_latent_dim=5, |
|
activate_decay=False) |
|
|
|
algos = [ |
|
UniformSampling('Uniform Sampling', hparams), |
|
UniformSampling('Uniform Sampling 2', hparams), |
|
FixedPolicySampling('fixed1', [0.75, 0.25], hparams), |
|
FixedPolicySampling('fixed2', [0.25, 0.75], hparams), |
|
PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'), |
|
PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'), |
|
PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'), |
|
NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear), |
|
NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2), |
|
LinearFullPosteriorSampling('LinFullPost', hparams_linear), |
|
BootstrappedBNNSampling('BootRMS', hparams_rms), |
|
ParameterNoiseSampling('ParamNoise', hparams_pnoise), |
|
PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'), |
|
PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'), |
|
] |
|
|
|
|
|
t_init = time.time() |
|
results = run_contextual_bandit(context_dim, num_actions, dataset, algos) |
|
_, h_rewards = results |
|
|
|
|
|
display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type) |
|
|
|
if __name__ == '__main__': |
|
app.run(main) |
|
|