| callbacks: | |
| rollout_lh: | |
| _target_: mode.rollout.libero_rollout.RolloutLibero | |
| _recursive_: false | |
| env_cfg: | |
| _target_: mode.wrappers.hulc_wrapper.HulcWrapper | |
| skip_epochs: ${rollout_lh_skip_epochs} | |
| benchmark_name: ${libero_benchmark} | |
| rollout_freq: 10 | |
| num_videos: 0 | |
| num_sequences: 50 | |
| max_steps: 600 | |
| empty_cache: false | |
| debug: false | |
| n_eval: 20 | |
| num_procs: 10 | |
| use_mp: false | |
| task_embedding_format: clip | |
| device: ${device} | |
| checkpoint: | |
| _target_: pytorch_lightning.callbacks.ModelCheckpoint | |
| save_top_k: 1 | |
| verbose: true | |
| monitor: eval_lh/avg_seq_len | |
| mode: max | |
| dirpath: saved_models | |
| filename: '{epoch:02d}_{eval_lh/avg_seq_len:.2f}' | |
| every_n_epochs: ${callbacks.rollout_lh.rollout_freq} | |
| ema: | |
| _target_: mode.callbacks.ema.EMA | |
| decay: 0.999 | |
| start_step: 0 | |
| save_ema_weights_in_callback_state: true | |
| evaluate_ema_weights_instead: true | |
| power: 0.6666666666666666 | |
| inv_gamma: 1.0 | |
| min_value: 0.0 | |
| max_value: 0.9999 | |
| datamodule: | |
| datasets: | |
| lang_dataset: | |
| _target_: mode.datasets.libero_dataset.LiberoMultitaskDataset | |
| key: lang | |
| benchmark_name: ${libero_benchmark} | |
| batch_size: ${batch_size} | |
| proprio_state: ${datamodule.proprioception_dims} | |
| obs_space: ${datamodule.observation_space} | |
| num_workers: ${num_workers} | |
| action_seq_len: ${act_seq_len} | |
| obs_seq_len: ${obs_seq_len} | |
| split_ratio: 0.0 | |
| transforms: | |
| train: | |
| rgb_static: | |
| - _target_: torchvision.transforms.Resize | |
| size: 224 | |
| antialias: true | |
| - _target_: mode.utils.transforms.RandomShiftsAug | |
| pad: 10 | |
| - _target_: mode.utils.transforms.ScaleImageTensor | |
| - _target_: torchvision.transforms.Normalize | |
| mean: | |
| - 0.48145466 | |
| - 0.4578275 | |
| - 0.40821073 | |
| std: | |
| - 0.26862954 | |
| - 0.26130258 | |
| - 0.27577711 | |
| rgb_gripper: | |
| - _target_: torchvision.transforms.Resize | |
| size: 112 | |
| antialias: true | |
| - _target_: mode.utils.transforms.RandomShiftsAug | |
| pad: 4 | |
| - _target_: mode.utils.transforms.ScaleImageTensor | |
| - _target_: torchvision.transforms.Normalize | |
| mean: | |
| - 0.48145466 | |
| - 0.4578275 | |
| - 0.40821073 | |
| std: | |
| - 0.26862954 | |
| - 0.26130258 | |
| - 0.27577711 | |
| val: | |
| rgb_static: | |
| - _target_: torchvision.transforms.Resize | |
| size: 224 | |
| antialias: true | |
| - _target_: mode.utils.transforms.ScaleImageTensor | |
| - _target_: torchvision.transforms.Normalize | |
| mean: | |
| - 0.48145466 | |
| - 0.4578275 | |
| - 0.40821073 | |
| std: | |
| - 0.26862954 | |
| - 0.26130258 | |
| - 0.27577711 | |
| rgb_gripper: | |
| - _target_: torchvision.transforms.Resize | |
| size: 112 | |
| antialias: true | |
| - _target_: mode.utils.transforms.ScaleImageTensor | |
| - _target_: torchvision.transforms.Normalize | |
| mean: | |
| - 0.48145466 | |
| - 0.4578275 | |
| - 0.40821073 | |
| std: | |
| - 0.26862954 | |
| - 0.26130258 | |
| - 0.27577711 | |
| _target_: mode.datasets.libero_data_module.LiberoDataModule | |
| _recursive_: false | |
| root_data_dir: ${root_data_dir} | |
| action_space: 7 | |
| shuffle_val: false | |
| benchmark_name: ${libero_benchmark} | |
| observation_space: | |
| rgb_obs: | |
| - agentview_rgb | |
| - eye_in_hand_rgb | |
| depth_obs: [] | |
| state_obs: | |
| - gripper_states | |
| - joint_states | |
| actions: | |
| - rel_actions | |
| language: | |
| - language | |
| proprioception_dims: None | |
| model: | |
| language_goal: | |
| _target_: mode.models.networks.clip_lang_encoder.LangClip | |
| _recursive_: false | |
| model_name: ${clip_lang_model_name} | |
| model: | |
| _target_: mode.models.edm_diffusion.score_wrappers.GCDenoiser | |
| _recursive_: false | |
| sigma_data: ${model.sigma_data} | |
| inner_model: | |
| _target_: mode.models.networks.modedit.MoDeDiT | |
| action_dim: ${datamodule.action_space} | |
| goal_dim: ${model.cond_dim} | |
| obs_dim: ${obs_dim} | |
| goal_conditioned: true | |
| causal: true | |
| use_custom_attn_mask: false | |
| use_proprio: ${model.use_proprio} | |
| state_dim: ${proprio_dims} | |
| embed_dim: ${model.latent_dim} | |
| n_layers: 12 | |
| goal_seq_len: 1 | |
| obs_seq_len: ${obs_seq_len} | |
| action_seq_len: ${act_seq_len} | |
| embed_pdrob: 0 | |
| goal_drop: 0.1 | |
| attn_pdrop: 0.3 | |
| mlp_pdrop: 0.1 | |
| n_heads: 8 | |
| device: ${device} | |
| linear_output: true | |
| cond_router: true | |
| num_experts: 4 | |
| top_k: 2 | |
| router_normalize: true | |
| use_goal_in_routing: false | |
| use_argmax: false | |
| use_shared_expert: false | |
| use_noise_token_as_input: true | |
| init_style: olmoe | |
| _target_: mode.models.mode_agent.MoDEAgent | |
| _recursive_: false | |
| multistep: ${multistep} | |
| use_lr_scheduler: true | |
| entropy_gamma: 0.01 | |
| router_z_delta: 0.0 | |
| use_proprio: false | |
| seed: ${seed} | |
| sampler_type: ddim | |
| num_sampling_steps: 5 | |
| sigma_data: 0.5 | |
| sigma_min: 0.001 | |
| sigma_max: 80 | |
| noise_scheduler: exponential | |
| sigma_sample_density_type: loglogistic | |
| ckpt_path: /home/reuss/code/MeDiT_Policy/convert_weights/mode_first_run | |
| start_from_pretrained: true | |
| act_window_size: ${act_seq_len} | |
| latent_dim: 1024 | |
| obs_enc_dim: ${obs_dim} | |
| cond_dim: 512 | |
| resnet_type: '50' | |
| optimizer: | |
| _target_: torch.optim.AdamW | |
| transformer_weight_decay: 0.05 | |
| obs_encoder_weight_decay: 0.05 | |
| learning_rate: 0.0001 | |
| betas: | |
| - 0.9 | |
| - 0.95 | |
| lr_scheduler: | |
| lr_scheduler: | |
| init_lr: 0.0001 | |
| init_lr_scale: 0.1 | |
| final_lr_scale: 1.0e-06 | |
| total_steps: 45000 | |
| phase_ratio: (0.02, 0.08, 0.9) | |
| lr: 0.0001 | |
| root_data_dir: /home/yagmurlu/code/MoDE_Calvin/dataset/task_ABC_D | |
| lang_folder: lang_clip_resnet50 | |
| vis_clip_model_name: ViT-B/16 | |
| clip_lang_model_name: ViT-B/32 | |
| log_dir: ./logs | |
| slurm: false | |
| future_range: 29 | |
| seed: 242 | |
| device: cuda | |
| batch_size: 128 | |
| devices: 2 | |
| goal_window_size: 1 | |
| act_dim: 7 | |
| proprio_dims: 9 | |
| obs_dim: 512 | |
| goal_dim: 512 | |
| obs_seq_len: 1 | |
| act_seq_len: 10 | |
| multistep: ${act_seq_len} | |
| p_last_state: 0 | |
| gen_img_res: 112 | |
| max_epochs: 10 | |
| rollout_lh_skip_epochs: 9 | |
| num_workers: 1 | |
| benchmark_name: ${libero_benchmark} | |
| libero_benchmark: libero_90 | |
| trainer: | |
| gpus: ${devices} | |
| precision: 32 | |
| max_epochs: ${max_epochs} | |
| sync_batchnorm: false | |
| accelerator: auto | |
| limit_train_batches: 1000 | |
| limit_val_batches: 4 | |
| logger: | |
| _target_: pytorch_lightning.loggers.WandbLogger | |
| save_dir: . | |
| name: logger | |
| group: mode | |
| log_model: false | |
| project: ${libero_benchmark} | |
| entity: bennoq | |
| id: ??? | |