Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

pretrained_model/README.md +9 -0
pretrained_model/config.json +84 -0
pretrained_model/config.yaml +217 -0
pretrained_model/model.safetensors +3 -0
training_state.pth +3 -0

pretrained_model/README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+---
+tags:
+- pytorch_model_hub_mixin
+- model_hub_mixin
+---
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
+- Library: [More Information Needed]
+- Docs: [More Information Needed]

pretrained_model/config.json ADDED Viewed

	@@ -0,0 +1,84 @@

+{
+  "beta_end": 0.02,
+  "beta_schedule": "squaredcos_cap_v2",
+  "beta_start": 0.0001,
+  "clip_sample": true,
+  "clip_sample_range": 1.0,
+  "crop_is_random": true,
+  "crop_shape": [
+    224,
+    224
+  ],
+  "diffusion_step_embed_dim": 128,
+  "do_mask_loss_for_padding": false,
+  "down_dims": [
+    256,
+    512,
+    1024
+  ],
+  "ema_inv_gamma": 1.0,
+  "ema_max_alpha": 0.9999,
+  "ema_min_alpha": 0.0,
+  "ema_power": 0.75,
+  "ema_update_after_step": 0,
+  "horizon": 16,
+  "input_normalization_modes": {
+    "observation.images.scene_left_0": "mean_std",
+    "observation.images.scene_right_0": "mean_std",
+    "observation.images.wrist_left_plus": "mean_std",
+    "observation.images.wrist_right_minus": "mean_std",
+    "observation.state": "min_max"
+  },
+  "input_shapes": {
+    "observation.images.scene_left_0": [
+      3,
+      480,
+      640
+    ],
+    "observation.images.scene_right_0": [
+      3,
+      480,
+      640
+    ],
+    "observation.images.wrist_left_plus": [
+      3,
+      480,
+      640
+    ],
+    "observation.images.wrist_right_minus": [
+      3,
+      480,
+      640
+    ],
+    "observation.state": [
+      20
+    ]
+  },
+  "kernel_size": 5,
+  "n_action_steps": 8,
+  "n_groups": 8,
+  "n_obs_steps": 2,
+  "noise_scheduler_type": "DDPM",
+  "num_inference_steps": null,
+  "num_train_timesteps": 100,
+  "output_normalization_modes": {
+    "action": "min_max"
+  },
+  "output_shapes": {
+    "action": [
+      14
+    ]
+  },
+  "prediction_type": "epsilon",
+  "pretrained_backbone_weights": "IMAGENET1K_SWAG_LINEAR_V1",
+  "resize_shape": [
+    256,
+    342
+  ],
+  "spatial_softmax_num_keypoints": 32,
+  "use_ema": true,
+  "use_film_scale_modulation": true,
+  "use_group_norm": true,
+  "use_spatial_softmax": false,
+  "vision_backbone": "vit_b_16"
+}

pretrained_model/config.yaml ADDED Viewed

	@@ -0,0 +1,217 @@

+resume: false
+device: cuda
+use_amp: false
+seed: 100000
+dataset_repo_id: notmahi/tutorial-tri-BimanualPutRedBellPepperInBin
+video_backend: pyav
+training:
+  offline_steps: 20000
+  online_steps: 0
+  online_steps_between_rollouts: 1
+  online_sampling_ratio: 0.5
+  online_env_seed: ???
+  eval_freq: 0
+  log_freq: 250
+  save_checkpoint: true
+  save_freq: 5000
+  num_workers: 4
+  batch_size: 32
+  image_transforms:
+    enable: false
+    max_num_transforms: 3
+    random_order: false
+    brightness:
+      weight: 1
+      min_max:
+      - 0.8
+      - 1.2
+    contrast:
+      weight: 1
+      min_max:
+      - 0.8
+      - 1.2
+    saturation:
+      weight: 1
+      min_max:
+      - 0.5
+      - 1.5
+    hue:
+      weight: 1
+      min_max:
+      - -0.05
+      - 0.05
+    sharpness:
+      weight: 1
+      min_max:
+      - 0.8
+      - 1.2
+  grad_clip_norm: 10
+  lr: 0.0001
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  adam_betas:
+  - 0.95
+  - 0.999
+  adam_eps: 1.0e-08
+  adam_weight_decay: 1.0e-06
+  delta_timestamps:
+    observation.images.wrist_right_minus:
+    - -0.03333333333333333
+    - 0.0
+    observation.images.wrist_left_plus:
+    - -0.03333333333333333
+    - 0.0
+    observation.images.scene_right_0:
+    - -0.03333333333333333
+    - 0.0
+    observation.images.scene_left_0:
+    - -0.03333333333333333
+    - 0.0
+    observation.state:
+    - -0.03333333333333333
+    - 0.0
+    action:
+    - -0.03333333333333333
+    - 0.0
+    - 0.03333333333333333
+    - 0.06666666666666667
+    - 0.1
+    - 0.13333333333333333
+    - 0.16666666666666666
+    - 0.2
+    - 0.23333333333333334
+    - 0.26666666666666666
+    - 0.3
+    - 0.3333333333333333
+    - 0.36666666666666664
+    - 0.4
+    - 0.43333333333333335
+    - 0.4666666666666667
+  drop_n_last_frames: 7
+eval:
+  n_episodes: 50
+  batch_size: 50
+  use_async_envs: false
+wandb:
+  enable: false
+  disable_artifact: false
+  project: lerobot
+  notes: ''
+fps: 30
+env:
+  name: tri
+  task: PutSpatulaInUtensilCrock-v0
+  state_dim: 20
+  action_dim: 14
+  fps: ${fps}
+  episode_length: 400
+  gym:
+    fps: ${fps}
+override_dataset_stats:
+  observation.images.wrist_right_minus:
+    mean:
+    - - - 0.485
+    - - - 0.456
+    - - - 0.406
+    std:
+    - - - 0.229
+    - - - 0.224
+    - - - 0.225
+  observation.images.wrist_left_plus:
+    mean:
+    - - - 0.485
+    - - - 0.456
+    - - - 0.406
+    std:
+    - - - 0.229
+    - - - 0.224
+    - - - 0.225
+  observation.images.scene_right_0:
+    mean:
+    - - - 0.485
+    - - - 0.456
+    - - - 0.406
+    std:
+    - - - 0.229
+    - - - 0.224
+    - - - 0.225
+  observation.images.scene_left_0:
+    mean:
+    - - - 0.485
+    - - - 0.456
+    - - - 0.406
+    std:
+    - - - 0.229
+    - - - 0.224
+    - - - 0.225
+policy:
+  name: diffusion
+  n_obs_steps: 2
+  horizon: 16
+  n_action_steps: 8
+  input_shapes:
+    observation.images.wrist_right_minus:
+    - 3
+    - 480
+    - 640
+    observation.images.wrist_left_plus:
+    - 3
+    - 480
+    - 640
+    observation.images.scene_right_0:
+    - 3
+    - 480
+    - 640
+    observation.images.scene_left_0:
+    - 3
+    - 480
+    - 640
+    observation.state:
+    - ${env.state_dim}
+  output_shapes:
+    action:
+    - ${env.action_dim}
+  input_normalization_modes:
+    observation.images.wrist_right_minus: mean_std
+    observation.images.wrist_left_plus: mean_std
+    observation.images.scene_right_0: mean_std
+    observation.images.scene_left_0: mean_std
+    observation.state: min_max
+  output_normalization_modes:
+    action: min_max
+  vision_backbone: vit_b_16
+  pretrained_backbone_weights: IMAGENET1K_SWAG_LINEAR_V1
+  resize_shape:
+  - 256
+  - 342
+  crop_shape:
+  - 224
+  - 224
+  crop_is_random: true
+  use_spatial_softmax: false
+  use_group_norm: true
+  spatial_softmax_num_keypoints: 32
+  down_dims:
+  - 256
+  - 512
+  - 1024
+  kernel_size: 5
+  n_groups: 8
+  diffusion_step_embed_dim: 128
+  use_film_scale_modulation: true
+  noise_scheduler_type: DDPM
+  num_train_timesteps: 100
+  beta_schedule: squaredcos_cap_v2
+  beta_start: 0.0001
+  beta_end: 0.02
+  prediction_type: epsilon
+  clip_sample: true
+  clip_sample_range: 1.0
+  num_inference_steps: null
+  use_ema: true
+  ema_update_after_step: 0
+  ema_min_alpha: 0.0
+  ema_max_alpha: 0.9999
+  ema_inv_gamma: 1.0
+  ema_power: 0.75
+  do_mask_loss_for_padding: false

pretrained_model/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d5f0ba89c08511a9dc39d427ed15a1bd96fa6130f808af7f8c5bc2c419ee4d3
+size 1899724320

training_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e54f2acf3aab55c24b9f25ece4a940f5aa48c47e9b649d7f0e5a27ebc97414a7
+size 1899951950