michele-milesi
/

diambra-agent-example

Model card Files Files and versions Community

diambra-agent-example / results /dreamer_v3 /config.yaml

Michele Milesi

feat: added dv3

6b39341 3 months ago

raw

history blame contribute delete

No virus

8.39 kB

	num_threads: 1
	float32_matmul_precision: high
	dry_run: false
	seed: 42
	torch_use_deterministic_algorithms: false
	torch_backends_cudnn_benchmark: true
	torch_backends_cudnn_deterministic: false
	cublas_workspace_config: null
	exp_name: dreamer_v3_doapp
	run_name: 2024-04-16_17-34-17_dreamer_v3_doapp_42
	root_dir: dreamer_v3/doapp
	algo:
	name: dreamer_v3
	total_steps: 1024
	per_rank_batch_size: 2
	run_test: false
	cnn_keys:
	encoder:
	- frame
	decoder:
	- frame
	mlp_keys:
	encoder:
	- own_character
	- own_health
	- own_side
	- own_wins
	- opp_character
	- opp_health
	- opp_side
	- opp_wins
	- stage
	- timer
	- action
	decoder:
	- own_character
	- own_health
	- own_side
	- own_wins
	- opp_character
	- opp_health
	- opp_side
	- opp_wins
	- stage
	- timer
	- action
	world_model:
	optimizer:
	_target_: torch.optim.Adam
	lr: 0.0001
	eps: 1.0e-08
	weight_decay: 0
	betas:
	- 0.9
	- 0.999
	discrete_size: 4
	stochastic_size: 4
	kl_dynamic: 0.5
	kl_representation: 0.1
	kl_free_nats: 1.0
	kl_regularizer: 1.0
	continue_scale_factor: 1.0
	clip_gradients: 1000.0
	decoupled_rssm: false
	learnable_initial_recurrent_state: true
	encoder:
	cnn_channels_multiplier: 2
	cnn_act: torch.nn.SiLU
	dense_act: torch.nn.SiLU
	mlp_layers: 1
	cnn_layer_norm:
	cls: sheeprl.models.models.LayerNormChannelLast
	kw:
	eps: 0.001
	mlp_layer_norm:
	cls: sheeprl.models.models.LayerNorm
	kw:
	eps: 0.001
	dense_units: 8
	recurrent_model:
	recurrent_state_size: 8
	layer_norm:
	cls: sheeprl.models.models.LayerNorm
	kw:
	eps: 0.001
	dense_units: 8
	transition_model:
	hidden_size: 8
	dense_act: torch.nn.SiLU
	layer_norm:
	cls: sheeprl.models.models.LayerNorm
	kw:
	eps: 0.001
	representation_model:
	hidden_size: 8
	dense_act: torch.nn.SiLU
	layer_norm:
	cls: sheeprl.models.models.LayerNorm
	kw:
	eps: 0.001
	observation_model:
	cnn_channels_multiplier: 2
	cnn_act: torch.nn.SiLU
	dense_act: torch.nn.SiLU
	mlp_layers: 1
	cnn_layer_norm:
	cls: sheeprl.models.models.LayerNormChannelLast
	kw:
	eps: 0.001
	mlp_layer_norm:
	cls: sheeprl.models.models.LayerNorm
	kw:
	eps: 0.001
	dense_units: 8
	reward_model:
	dense_act: torch.nn.SiLU
	mlp_layers: 1
	layer_norm:
	cls: sheeprl.models.models.LayerNorm
	kw:
	eps: 0.001
	dense_units: 8
	bins: 255
	discount_model:
	learnable: true
	dense_act: torch.nn.SiLU
	mlp_layers: 1
	layer_norm:
	cls: sheeprl.models.models.LayerNorm
	kw:
	eps: 0.001
	dense_units: 8
	actor:
	optimizer:
	_target_: torch.optim.Adam
	lr: 8.0e-05
	eps: 1.0e-05
	weight_decay: 0
	betas:
	- 0.9
	- 0.999
	cls: sheeprl.algos.dreamer_v3.agent.Actor
	ent_coef: 0.0003
	min_std: 0.1
	max_std: 1.0
	init_std: 2.0
	dense_act: torch.nn.SiLU
	mlp_layers: 1
	layer_norm:
	cls: sheeprl.models.models.LayerNorm
	kw:
	eps: 0.001
	dense_units: 8
	clip_gradients: 100.0
	unimix: 0.01
	action_clip: 1.0
	moments:
	decay: 0.99
	max: 1.0
	percentile:
	low: 0.05
	high: 0.95
	critic:
	optimizer:
	_target_: torch.optim.Adam
	lr: 8.0e-05
	eps: 1.0e-05
	weight_decay: 0
	betas:
	- 0.9
	- 0.999
	dense_act: torch.nn.SiLU
	mlp_layers: 1
	layer_norm:
	cls: sheeprl.models.models.LayerNorm
	kw:
	eps: 0.001
	dense_units: 8
	per_rank_target_network_update_freq: 1
	tau: 0.02
	bins: 255
	clip_gradients: 100.0
	gamma: 0.996996996996997
	lmbda: 0.95
	horizon: 15
	replay_ratio: 0.0625
	learning_starts: 1024
	per_rank_pretrain_steps: 0
	per_rank_sequence_length: 64
	cnn_layer_norm:
	cls: sheeprl.models.models.LayerNormChannelLast
	kw:
	eps: 0.001
	mlp_layer_norm:
	cls: sheeprl.models.models.LayerNorm
	kw:
	eps: 0.001
	dense_units: 8
	mlp_layers: 1
	dense_act: torch.nn.SiLU
	cnn_act: torch.nn.SiLU
	unimix: 0.01
	hafner_initialization: true
	player:
	discrete_size: 4
	buffer:
	size: 1024
	memmap: true
	validate_args: false
	from_numpy: false
	checkpoint: true
	checkpoint:
	every: 10000
	resume_from: null
	save_last: true
	keep_last: 5
	distribution:
	validate_args: false
	type: auto
	env:
	id: doapp
	num_envs: 1
	frame_stack: -1
	sync_env: true
	screen_size: 64
	action_repeat: 1
	grayscale: false
	clip_rewards: false
	capture_video: true
	frame_stack_dilation: 1
	max_episode_steps: null
	reward_as_observation: false
	wrapper:
	_target_: sheeprl.envs.diambra.DiambraWrapper
	id: doapp
	action_space: DISCRETE
	screen_size: 64
	grayscale: false
	repeat_action: 1
	rank: null
	log_level: 0
	increase_performance: true
	diambra_settings:
	role: P1
	step_ratio: 6
	difficulty: 4
	continue_game: 0.0
	show_final: false
	outfits: 2
	splash_screen: false
	diambra_wrappers:
	stack_actions: 1
	no_op_max: 0
	no_attack_buttons_combinations: false
	add_last_action: true
	scale: false
	exclude_image_scaling: false
	process_discrete_binary: false
	role_relative: true
	fabric:
	_target_: lightning.fabric.Fabric
	devices: 1
	num_nodes: 1
	strategy: auto
	accelerator: cpu
	precision: 32-true
	callbacks:
	- _target_: sheeprl.utils.callback.CheckpointCallback
	keep_last: 5
	metric:
	log_every: 5000
	disable_timer: false
	log_level: 1
	sync_on_compute: false
	aggregator:
	_target_: sheeprl.utils.metric.MetricAggregator
	raise_on_missing: false
	metrics:
	Rewards/rew_avg:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	Game/ep_len_avg:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	Loss/world_model_loss:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	Loss/value_loss:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	Loss/policy_loss:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	Loss/observation_loss:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	Loss/reward_loss:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	Loss/state_loss:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	Loss/continue_loss:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	State/kl:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	State/post_entropy:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	State/prior_entropy:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	Grads/world_model:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	Grads/actor:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	Grads/critic:
	_target_: torchmetrics.MeanMetric
	sync_on_compute: false
	logger:
	_target_: lightning.fabric.loggers.TensorBoardLogger
	name: 2024-04-16_17-34-17_dreamer_v3_doapp_42
	root_dir: logs/runs/dreamer_v3/doapp
	version: null
	default_hp_metric: true
	prefix: ''
	sub_dir: null
	model_manager:
	disabled: true
	models:
	world_model:
	model_name: dreamer_v3_doapp_world_model
	description: DreamerV3 World Model used in doapp Environment
	tags: {}
	actor:
	model_name: dreamer_v3_doapp_actor
	description: DreamerV3 Actor used in doapp Environment
	tags: {}
	critic:
	model_name: dreamer_v3_doapp_critic
	description: DreamerV3 Critic used in doapp Environment
	tags: {}
	target_critic:
	model_name: dreamer_v3_doapp_target_critic
	description: DreamerV3 Target Critic used in doapp Environment
	tags: {}
	moments:
	model_name: dreamer_v3_doapp_moments
	description: DreamerV3 Moments used in doapp Environment
	tags: {}