NCTCMumbai's picture
Upload 2583 files
97b6013 verified
raw
history blame
No virus
22.6 kB
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Simple example of contextual bandits simulation.
Code corresponding to:
Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks
for Thompson Sampling, by Carlos Riquelme, George Tucker, and Jasper Snoek.
https://arxiv.org/abs/1802.09127
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
from absl import app
from absl import flags
import numpy as np
import os
import tensorflow as tf
from bandits.algorithms.bootstrapped_bnn_sampling import BootstrappedBNNSampling
from bandits.core.contextual_bandit import run_contextual_bandit
from bandits.data.data_sampler import sample_adult_data
from bandits.data.data_sampler import sample_census_data
from bandits.data.data_sampler import sample_covertype_data
from bandits.data.data_sampler import sample_jester_data
from bandits.data.data_sampler import sample_mushroom_data
from bandits.data.data_sampler import sample_statlog_data
from bandits.data.data_sampler import sample_stock_data
from bandits.algorithms.fixed_policy_sampling import FixedPolicySampling
from bandits.algorithms.linear_full_posterior_sampling import LinearFullPosteriorSampling
from bandits.algorithms.neural_linear_sampling import NeuralLinearPosteriorSampling
from bandits.algorithms.parameter_noise_sampling import ParameterNoiseSampling
from bandits.algorithms.posterior_bnn_sampling import PosteriorBNNSampling
from bandits.data.synthetic_data_sampler import sample_linear_data
from bandits.data.synthetic_data_sampler import sample_sparse_linear_data
from bandits.data.synthetic_data_sampler import sample_wheel_bandit_data
from bandits.algorithms.uniform_sampling import UniformSampling
# Set up your file routes to the data files.
base_route = os.getcwd()
data_route = 'contextual_bandits/datasets'
FLAGS = flags.FLAGS
FLAGS.set_default('alsologtostderr', True)
flags.DEFINE_string('logdir', '/tmp/bandits/', 'Base directory to save output')
flags.DEFINE_string(
'mushroom_data',
os.path.join(base_route, data_route, 'mushroom.data'),
'Directory where Mushroom data is stored.')
flags.DEFINE_string(
'financial_data',
os.path.join(base_route, data_route, 'raw_stock_contexts'),
'Directory where Financial data is stored.')
flags.DEFINE_string(
'jester_data',
os.path.join(base_route, data_route, 'jester_data_40jokes_19181users.npy'),
'Directory where Jester data is stored.')
flags.DEFINE_string(
'statlog_data',
os.path.join(base_route, data_route, 'shuttle.trn'),
'Directory where Statlog data is stored.')
flags.DEFINE_string(
'adult_data',
os.path.join(base_route, data_route, 'adult.full'),
'Directory where Adult data is stored.')
flags.DEFINE_string(
'covertype_data',
os.path.join(base_route, data_route, 'covtype.data'),
'Directory where Covertype data is stored.')
flags.DEFINE_string(
'census_data',
os.path.join(base_route, data_route, 'USCensus1990.data.txt'),
'Directory where Census data is stored.')
def sample_data(data_type, num_contexts=None):
"""Sample data from given 'data_type'.
Args:
data_type: Dataset from which to sample.
num_contexts: Number of contexts to sample.
Returns:
dataset: Sampled matrix with rows: (context, reward_1, ..., reward_num_act).
opt_rewards: Vector of expected optimal reward for each context.
opt_actions: Vector of optimal action for each context.
num_actions: Number of available actions.
context_dim: Dimension of each context.
"""
if data_type == 'linear':
# Create linear dataset
num_actions = 8
context_dim = 10
noise_stds = [0.01 * (i + 1) for i in range(num_actions)]
dataset, _, opt_linear = sample_linear_data(num_contexts, context_dim,
num_actions, sigma=noise_stds)
opt_rewards, opt_actions = opt_linear
elif data_type == 'sparse_linear':
# Create sparse linear dataset
num_actions = 7
context_dim = 10
noise_stds = [0.01 * (i + 1) for i in range(num_actions)]
num_nnz_dims = int(context_dim / 3.0)
dataset, _, opt_sparse_linear = sample_sparse_linear_data(
num_contexts, context_dim, num_actions, num_nnz_dims, sigma=noise_stds)
opt_rewards, opt_actions = opt_sparse_linear
elif data_type == 'mushroom':
# Create mushroom dataset
num_actions = 2
context_dim = 117
file_name = FLAGS.mushroom_data
dataset, opt_mushroom = sample_mushroom_data(file_name, num_contexts)
opt_rewards, opt_actions = opt_mushroom
elif data_type == 'financial':
num_actions = 8
context_dim = 21
num_contexts = min(3713, num_contexts)
noise_stds = [0.01 * (i + 1) for i in range(num_actions)]
file_name = FLAGS.financial_data
dataset, opt_financial = sample_stock_data(file_name, context_dim,
num_actions, num_contexts,
noise_stds, shuffle_rows=True)
opt_rewards, opt_actions = opt_financial
elif data_type == 'jester':
num_actions = 8
context_dim = 32
num_contexts = min(19181, num_contexts)
file_name = FLAGS.jester_data
dataset, opt_jester = sample_jester_data(file_name, context_dim,
num_actions, num_contexts,
shuffle_rows=True,
shuffle_cols=True)
opt_rewards, opt_actions = opt_jester
elif data_type == 'statlog':
file_name = FLAGS.statlog_data
num_actions = 7
num_contexts = min(43500, num_contexts)
sampled_vals = sample_statlog_data(file_name, num_contexts,
shuffle_rows=True)
contexts, rewards, (opt_rewards, opt_actions) = sampled_vals
dataset = np.hstack((contexts, rewards))
context_dim = contexts.shape[1]
elif data_type == 'adult':
file_name = FLAGS.adult_data
num_actions = 14
num_contexts = min(45222, num_contexts)
sampled_vals = sample_adult_data(file_name, num_contexts,
shuffle_rows=True)
contexts, rewards, (opt_rewards, opt_actions) = sampled_vals
dataset = np.hstack((contexts, rewards))
context_dim = contexts.shape[1]
elif data_type == 'covertype':
file_name = FLAGS.covertype_data
num_actions = 7
num_contexts = min(150000, num_contexts)
sampled_vals = sample_covertype_data(file_name, num_contexts,
shuffle_rows=True)
contexts, rewards, (opt_rewards, opt_actions) = sampled_vals
dataset = np.hstack((contexts, rewards))
context_dim = contexts.shape[1]
elif data_type == 'census':
file_name = FLAGS.census_data
num_actions = 9
num_contexts = min(150000, num_contexts)
sampled_vals = sample_census_data(file_name, num_contexts,
shuffle_rows=True)
contexts, rewards, (opt_rewards, opt_actions) = sampled_vals
dataset = np.hstack((contexts, rewards))
context_dim = contexts.shape[1]
elif data_type == 'wheel':
delta = 0.95
num_actions = 5
context_dim = 2
mean_v = [1.0, 1.0, 1.0, 1.0, 1.2]
std_v = [0.05, 0.05, 0.05, 0.05, 0.05]
mu_large = 50
std_large = 0.01
dataset, opt_wheel = sample_wheel_bandit_data(num_contexts, delta,
mean_v, std_v,
mu_large, std_large)
opt_rewards, opt_actions = opt_wheel
return dataset, opt_rewards, opt_actions, num_actions, context_dim
def display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, name):
"""Displays summary statistics of the performance of each algorithm."""
print('---------------------------------------------------')
print('---------------------------------------------------')
print('{} bandit completed after {} seconds.'.format(
name, time.time() - t_init))
print('---------------------------------------------------')
performance_pairs = []
for j, a in enumerate(algos):
performance_pairs.append((a.name, np.sum(h_rewards[:, j])))
performance_pairs = sorted(performance_pairs,
key=lambda elt: elt[1],
reverse=True)
for i, (name, reward) in enumerate(performance_pairs):
print('{:3}) {:20}| \t \t total reward = {:10}.'.format(i, name, reward))
print('---------------------------------------------------')
print('Optimal total reward = {}.'.format(np.sum(opt_rewards)))
print('Frequency of optimal actions (action, frequency):')
print([[elt, list(opt_actions).count(elt)] for elt in set(opt_actions)])
print('---------------------------------------------------')
print('---------------------------------------------------')
def main(_):
# Problem parameters
num_contexts = 2000
# Data type in {linear, sparse_linear, mushroom, financial, jester,
# statlog, adult, covertype, census, wheel}
data_type = 'mushroom'
# Create dataset
sampled_vals = sample_data(data_type, num_contexts)
dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals
# Define hyperparameters and algorithms
hparams = tf.contrib.training.HParams(num_actions=num_actions)
hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
context_dim=context_dim,
a0=6,
b0=6,
lambda_prior=0.25,
initial_pulls=2)
hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
context_dim=context_dim,
init_scale=0.3,
activation=tf.nn.relu,
layer_sizes=[50],
batch_size=512,
activate_decay=True,
initial_lr=0.1,
max_grad_norm=5.0,
show_training=False,
freq_summary=1000,
buffer_s=-1,
initial_pulls=2,
optimizer='RMS',
reset_lr=True,
lr_decay_rate=0.5,
training_freq=50,
training_epochs=100,
p=0.95,
q=3)
hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
context_dim=context_dim,
init_scale=0.3,
activation=tf.nn.relu,
layer_sizes=[50],
batch_size=512,
activate_decay=True,
initial_lr=0.1,
max_grad_norm=5.0,
show_training=False,
freq_summary=1000,
buffer_s=-1,
initial_pulls=2,
optimizer='RMS',
reset_lr=True,
lr_decay_rate=0.5,
training_freq=50,
training_epochs=100,
use_dropout=True,
keep_prob=0.80)
hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
context_dim=context_dim,
init_scale=0.3,
activation=tf.nn.relu,
layer_sizes=[50],
batch_size=512,
activate_decay=True,
initial_lr=0.1,
max_grad_norm=5.0,
show_training=False,
freq_summary=1000,
buffer_s=-1,
initial_pulls=2,
optimizer='RMS',
use_sigma_exp_transform=True,
cleared_times_trained=10,
initial_training_steps=100,
noise_sigma=0.1,
reset_lr=False,
training_freq=50,
training_epochs=100)
hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
context_dim=context_dim,
init_scale=0.3,
activation=tf.nn.relu,
layer_sizes=[50],
batch_size=512,
activate_decay=True,
initial_lr=0.1,
max_grad_norm=5.0,
show_training=False,
freq_summary=1000,
buffer_s=-1,
initial_pulls=2,
reset_lr=True,
lr_decay_rate=0.5,
training_freq=1,
training_freq_network=50,
training_epochs=100,
a0=6,
b0=6,
lambda_prior=0.25)
hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
context_dim=context_dim,
init_scale=0.3,
activation=tf.nn.relu,
layer_sizes=[50],
batch_size=512,
activate_decay=True,
initial_lr=0.1,
max_grad_norm=5.0,
show_training=False,
freq_summary=1000,
buffer_s=-1,
initial_pulls=2,
reset_lr=True,
lr_decay_rate=0.5,
training_freq=10,
training_freq_network=50,
training_epochs=100,
a0=6,
b0=6,
lambda_prior=0.25)
hparams_pnoise = tf.contrib.training.HParams(num_actions=num_actions,
context_dim=context_dim,
init_scale=0.3,
activation=tf.nn.relu,
layer_sizes=[50],
batch_size=512,
activate_decay=True,
initial_lr=0.1,
max_grad_norm=5.0,
show_training=False,
freq_summary=1000,
buffer_s=-1,
initial_pulls=2,
optimizer='RMS',
reset_lr=True,
lr_decay_rate=0.5,
training_freq=50,
training_epochs=100,
noise_std=0.05,
eps=0.1,
d_samples=300,
)
hparams_alpha_div = tf.contrib.training.HParams(num_actions=num_actions,
context_dim=context_dim,
init_scale=0.3,
activation=tf.nn.relu,
layer_sizes=[50],
batch_size=512,
activate_decay=True,
initial_lr=0.1,
max_grad_norm=5.0,
show_training=False,
freq_summary=1000,
buffer_s=-1,
initial_pulls=2,
optimizer='RMS',
use_sigma_exp_transform=True,
cleared_times_trained=10,
initial_training_steps=100,
noise_sigma=0.1,
reset_lr=False,
training_freq=50,
training_epochs=100,
alpha=1.0,
k=20,
prior_variance=0.1)
hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
num_outputs=num_actions,
context_dim=context_dim,
reset_lr=False,
learn_embeddings=True,
max_num_points=1000,
show_training=False,
freq_summary=1000,
batch_size=512,
keep_fixed_after_max_obs=True,
training_freq=50,
initial_pulls=2,
training_epochs=100,
lr=0.01,
buffer_s=-1,
initial_lr=0.001,
lr_decay_rate=0.0,
optimizer='RMS',
task_latent_dim=5,
activate_decay=False)
algos = [
UniformSampling('Uniform Sampling', hparams),
UniformSampling('Uniform Sampling 2', hparams),
FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
LinearFullPosteriorSampling('LinFullPost', hparams_linear),
BootstrappedBNNSampling('BootRMS', hparams_rms),
ParameterNoiseSampling('ParamNoise', hparams_pnoise),
PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
]
# Run contextual bandit problem
t_init = time.time()
results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
_, h_rewards = results
# Display results
display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type)
if __name__ == '__main__':
app.run(main)