diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index bd48566556..5ae174987b 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -50,6 +50,7 @@ Guidelines for modifications: * Haoran Zhou * HoJin Jeon * Hongwei Xiong +* Iretiayo Akinola * Jan Kerner * Jean Tampon * Jia Lin Yuan @@ -63,6 +64,7 @@ Guidelines for modifications: * Lorenz Wellhausen * Masoud Moghani * Michael Gussert +* Michael Noseworthy * Muhong Guo * Nuralem Abizov * Oyindamola Omotuyi @@ -91,3 +93,4 @@ Guidelines for modifications: * Gavriel State * Hammad Mazhar * Marco Hutter +* Yashraj Narang diff --git a/docs/source/_static/tasks/factory/gear_mesh.jpg b/docs/source/_static/tasks/factory/gear_mesh.jpg new file mode 100644 index 0000000000..af26e5818a Binary files /dev/null and b/docs/source/_static/tasks/factory/gear_mesh.jpg differ diff --git a/docs/source/_static/tasks/factory/nut_thread.jpg b/docs/source/_static/tasks/factory/nut_thread.jpg new file mode 100644 index 0000000000..9a3155e6c4 Binary files /dev/null and b/docs/source/_static/tasks/factory/nut_thread.jpg differ diff --git a/docs/source/_static/tasks/factory/peg_insert.jpg b/docs/source/_static/tasks/factory/peg_insert.jpg new file mode 100644 index 0000000000..7aaa2c1450 Binary files /dev/null and b/docs/source/_static/tasks/factory/peg_insert.jpg differ diff --git a/docs/source/overview/environments.rst b/docs/source/overview/environments.rst index 95ace6c61a..6559bec4d8 100644 --- a/docs/source/overview/environments.rst +++ b/docs/source/overview/environments.rst @@ -152,6 +152,39 @@ for the lift-cube environment: .. |cube-shadow-lstm-link| replace:: `Isaac-Repose-Cube-Shadow-OpenAI-LSTM-Direct-v0 `__ .. |cube-shadow-vis-link| replace:: `Isaac-Repose-Cube-Shadow-Vision-Direct-v0 `__ +Contact-rich Manipulation +~~~~~~~~~~~~ + +Environments based on contact-rich manipulation tasks such as peg insertion, gear meshing and nut-bolt fastening. + +These tasks share the same task configurations and control options. You can switch between them by specifying the task name. +For example: + +* |factory-peg-link|: Peg insertion with the Franka arm +* |factory-gear-link|: Gear meshing with the Franka arm +* |factory-nut-link|: Nut-Bolt fastening with the Franka arm + +.. table:: + :widths: 33 37 30 + + +--------------------+-------------------------+-----------------------------------------------------------------------------+ + | World | Environment ID | Description | + +====================+=========================+=============================================================================+ + | |factory-peg| | |factory-peg-link| | Insert peg into the socket with the Franka robot | + +--------------------+-------------------------+-----------------------------------------------------------------------------+ + | |factory-gear| | |factory-gear-link| | Insert and mesh gear into the base with other gears, using the Franka robot | + +--------------------+-------------------------+-----------------------------------------------------------------------------+ + | |factory-nut| | |factory-nut-link| | Thread the nut onto the first 2 threads of the bolt, using the Franka robot | + +--------------------+-------------------------+-----------------------------------------------------------------------------+ + +.. |factory-peg| image:: ../_static/tasks/factory/peg_insert.jpg +.. |factory-gear| image:: ../_static/tasks/factory/gear_mesh.jpg +.. |factory-nut| image:: ../_static/tasks/factory/nut_thread.jpg + +.. |factory-peg-link| replace:: `Isaac-Factory-PegInsert-Direct-v0 `__ +.. |factory-gear-link| replace:: `Isaac-Factory-GearMesh-Direct-v0 `__ +.. |factory-nut-link| replace:: `Isaac-Factory-NutThread-Direct-v0 `__ + Locomotion ~~~~~~~~~~ @@ -369,6 +402,18 @@ Comprehensive List of Environments - - Manager Based - **rl_games** (PPO), **rsl_rl** (PPO), **skrl** (PPO), **sb3** (PPO) + * - Isaac-Factory-GearMesh-Direct-v0 + - + - Direct + - **rl_games** (PPO) + * - Isaac-Factory-NutThread-Direct-v0 + - + - Direct + - **rl_games** (PPO) + * - Isaac-Factory-PegInsert-Direct-v0 + - + - Direct + - **rl_games** (PPO) * - Isaac-Franka-Cabinet-Direct-v0 - - Direct diff --git a/source/extensions/omni.isaac.lab_tasks/config/extension.toml b/source/extensions/omni.isaac.lab_tasks/config/extension.toml index accf06c5c5..e82e19521f 100644 --- a/source/extensions/omni.isaac.lab_tasks/config/extension.toml +++ b/source/extensions/omni.isaac.lab_tasks/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.10.15" +version = "0.10.16" # Description title = "Isaac Lab Environments" diff --git a/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst index 2e10dc6dbd..69a4c04aea 100644 --- a/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst +++ b/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst @@ -1,11 +1,23 @@ Changelog --------- +0.10.16 (2024-12-16) +~~~~~~~~~~~~~~~~~~~~ + +Added +^^^^^ + +* Added ``Factory-Direct-v0`` environment as a direct RL env that + implements contact-rich manipulation tasks including peg insertion, + gear meshing, and nut threading. + + 0.10.15 (2024-12-16) ~~~~~~~~~~~~~~~~~~~~ Added ^^^^^ + * Added ``Isaac-Reach-Franka-OSC-v0`` and ``Isaac-Reach-Franka-OSC-Play-v0`` variations of the manager based reach environment that uses :class:`omni.isaac.lab.envs.mdp.actions.OperationalSpaceControllerAction`. @@ -20,6 +32,7 @@ Added * Added ``Isaac-Stack-Cube-Franka-IK-Rel-v0`` and ``Isaac-Stack-Cube-Instance-Randomize-Franka-IK-Rel-v0`` environments as manager-based RL envs that implement a three cube stacking task. + 0.10.13 (2024-10-30) ~~~~~~~~~~~~~~~~~~~~ @@ -49,6 +62,7 @@ Added * Added feature extracted observation cartpole examples. + 0.10.10 (2024-10-25) ~~~~~~~~~~~~~~~~~~~~ diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/__init__.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/__init__.py new file mode 100644 index 0000000000..c19c96f708 --- /dev/null +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/__init__.py @@ -0,0 +1,44 @@ +# Copyright (c) 2022-2024, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents +from .factory_env import FactoryEnv +from .factory_env_cfg import FactoryTaskGearMeshCfg, FactoryTaskNutThreadCfg, FactoryTaskPegInsertCfg + +## +# Register Gym environments. +## + +gym.register( + id="Isaac-Factory-PegInsert-Direct-v0", + entry_point="omni.isaac.lab_tasks.direct.factory:FactoryEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": FactoryTaskPegInsertCfg, + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Factory-GearMesh-Direct-v0", + entry_point="omni.isaac.lab_tasks.direct.factory:FactoryEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": FactoryTaskGearMeshCfg, + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Factory-NutThread-Direct-v0", + entry_point="omni.isaac.lab_tasks.direct.factory:FactoryEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": FactoryTaskNutThreadCfg, + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + }, +) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/agents/__init__.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/agents/__init__.py new file mode 100644 index 0000000000..c3ee657052 --- /dev/null +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2024, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/agents/rl_games_ppo_cfg.yaml b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 0000000000..5494199846 --- /dev/null +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,118 @@ +params: + seed: 0 + algo: + name: a2c_continuous + + env: + clip_actions: 1.0 + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + + space: + continuous: + mu_activation: None + sigma_activation: None + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: False + mlp: + units: [512, 128, 64] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + rnn: + name: lstm + units: 1024 + layers: 2 + before_mlp: True + concat_input: True + layer_norm: True + + load_checkpoint: False + load_path: "" + + config: + name: Factory + device: cuda:0 + full_experiment_name: test + env_name: rlgpu + multi_gpu: False + ppo: True + mixed_precision: True + normalize_input: True + normalize_value: True + value_bootstrap: True + num_actors: 128 + reward_shaper: + scale_value: 1.0 + normalize_advantage: True + gamma: 0.995 + tau: 0.95 + learning_rate: 1.0e-4 + lr_schedule: adaptive + schedule_type: standard + kl_threshold: 0.008 + score_to_win: 20000 + max_epochs: 200 + save_best_after: 10 + save_frequency: 100 + print_stats: True + grad_norm: 1.0 + entropy_coef: 0.0 # 0.0001 # 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 128 + minibatch_size: 512 # batch size = num_envs * horizon_length; minibatch_size = batch_size / num_minibatches + mini_epochs: 4 + critic_coef: 2 + clip_value: True + seq_length: 128 + bounds_loss_coef: 0.0001 + + central_value_config: + minibatch_size: 512 + mini_epochs: 4 + learning_rate: 1e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + clip_value: True + normalize_input: True + truncate_grads: True + + network: + name: actor_critic + central_value: True + + mlp: + units: [512, 128, 64] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + rnn: + name: lstm + units: 1024 + layers: 2 + before_mlp: True + concat_input: True + layer_norm: True + + player: + deterministic: False diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/factory_control.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/factory_control.py new file mode 100644 index 0000000000..33efc41ca0 --- /dev/null +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/factory_control.py @@ -0,0 +1,196 @@ +# Copyright (c) 2022-2024, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Factory: control module. + +Imported by base, environment, and task classes. Not directly executed. +""" + +import math +import torch + +import omni.isaac.core.utils.torch as torch_utils + +from omni.isaac.lab.utils.math import axis_angle_from_quat + + +def compute_dof_torque( + cfg, + dof_pos, + dof_vel, + fingertip_midpoint_pos, + fingertip_midpoint_quat, + fingertip_midpoint_linvel, + fingertip_midpoint_angvel, + jacobian, + arm_mass_matrix, + ctrl_target_fingertip_midpoint_pos, + ctrl_target_fingertip_midpoint_quat, + task_prop_gains, + task_deriv_gains, + device, +): + """Compute Franka DOF torque to move fingertips towards target pose.""" + # References: + # 1) https://ethz.ch/content/dam/ethz/special-interest/mavt/robotics-n-intelligent-systems/rsl-dam/documents/RobotDynamics2018/RD_HS2018script.pdf + # 2) Modern Robotics + + num_envs = cfg.scene.num_envs + dof_torque = torch.zeros((num_envs, dof_pos.shape[1]), device=device) + task_wrench = torch.zeros((num_envs, 6), device=device) + + pos_error, axis_angle_error = get_pose_error( + fingertip_midpoint_pos=fingertip_midpoint_pos, + fingertip_midpoint_quat=fingertip_midpoint_quat, + ctrl_target_fingertip_midpoint_pos=ctrl_target_fingertip_midpoint_pos, + ctrl_target_fingertip_midpoint_quat=ctrl_target_fingertip_midpoint_quat, + jacobian_type="geometric", + rot_error_type="axis_angle", + ) + delta_fingertip_pose = torch.cat((pos_error, axis_angle_error), dim=1) + + # Set tau = k_p * task_pos_error - k_d * task_vel_error (building towards eq. 3.96-3.98) + task_wrench_motion = _apply_task_space_gains( + delta_fingertip_pose=delta_fingertip_pose, + fingertip_midpoint_linvel=fingertip_midpoint_linvel, + fingertip_midpoint_angvel=fingertip_midpoint_angvel, + task_prop_gains=task_prop_gains, + task_deriv_gains=task_deriv_gains, + ) + task_wrench += task_wrench_motion + + # Set tau = J^T * tau, i.e., map tau into joint space as desired + jacobian_T = torch.transpose(jacobian, dim0=1, dim1=2) + dof_torque[:, 0:7] = (jacobian_T @ task_wrench.unsqueeze(-1)).squeeze(-1) + + # adapted from https://gitlab-master.nvidia.com/carbon-gym/carbgym/-/blob/b4bbc66f4e31b1a1bee61dbaafc0766bbfbf0f58/python/examples/franka_cube_ik_osc.py#L70-78 + # roboticsproceedings.org/rss07/p31.pdf + + # useful tensors + arm_mass_matrix_inv = torch.inverse(arm_mass_matrix) + jacobian_T = torch.transpose(jacobian, dim0=1, dim1=2) + arm_mass_matrix_task = torch.inverse( + jacobian @ torch.inverse(arm_mass_matrix) @ jacobian_T + ) # ETH eq. 3.86; geometric Jacobian is assumed + j_eef_inv = arm_mass_matrix_task @ jacobian @ arm_mass_matrix_inv + default_dof_pos_tensor = torch.tensor(cfg.ctrl.default_dof_pos_tensor, device=device).repeat((num_envs, 1)) + # nullspace computation + distance_to_default_dof_pos = default_dof_pos_tensor - dof_pos[:, :7] + distance_to_default_dof_pos = (distance_to_default_dof_pos + math.pi) % ( + 2 * math.pi + ) - math.pi # normalize to [-pi, pi] + u_null = cfg.ctrl.kd_null * -dof_vel[:, :7] + cfg.ctrl.kp_null * distance_to_default_dof_pos + u_null = arm_mass_matrix @ u_null.unsqueeze(-1) + torque_null = (torch.eye(7, device=device).unsqueeze(0) - torch.transpose(jacobian, 1, 2) @ j_eef_inv) @ u_null + dof_torque[:, 0:7] += torque_null.squeeze(-1) + + # TODO: Verify it's okay to no longer do gripper control here. + dof_torque = torch.clamp(dof_torque, min=-100.0, max=100.0) + return dof_torque, task_wrench + + +def get_pose_error( + fingertip_midpoint_pos, + fingertip_midpoint_quat, + ctrl_target_fingertip_midpoint_pos, + ctrl_target_fingertip_midpoint_quat, + jacobian_type, + rot_error_type, +): + """Compute task-space error between target Franka fingertip pose and current pose.""" + # Reference: https://ethz.ch/content/dam/ethz/special-interest/mavt/robotics-n-intelligent-systems/rsl-dam/documents/RobotDynamics2018/RD_HS2018script.pdf + + # Compute pos error + pos_error = ctrl_target_fingertip_midpoint_pos - fingertip_midpoint_pos + + # Compute rot error + if jacobian_type == "geometric": # See example 2.9.8; note use of J_g and transformation between rotation vectors + # Compute quat error (i.e., difference quat) + # Reference: https://personal.utdallas.edu/~sxb027100/dock/quat.html + + # Check for shortest path using quaternion dot product. + quat_dot = (ctrl_target_fingertip_midpoint_quat * fingertip_midpoint_quat).sum(dim=1, keepdim=True) + ctrl_target_fingertip_midpoint_quat = torch.where( + quat_dot.expand(-1, 4) >= 0, ctrl_target_fingertip_midpoint_quat, -ctrl_target_fingertip_midpoint_quat + ) + + fingertip_midpoint_quat_norm = torch_utils.quat_mul( + fingertip_midpoint_quat, torch_utils.quat_conjugate(fingertip_midpoint_quat) + )[ + :, 0 + ] # scalar component + fingertip_midpoint_quat_inv = torch_utils.quat_conjugate( + fingertip_midpoint_quat + ) / fingertip_midpoint_quat_norm.unsqueeze(-1) + quat_error = torch_utils.quat_mul(ctrl_target_fingertip_midpoint_quat, fingertip_midpoint_quat_inv) + + # Convert to axis-angle error + axis_angle_error = axis_angle_from_quat(quat_error) + + if rot_error_type == "quat": + return pos_error, quat_error + elif rot_error_type == "axis_angle": + return pos_error, axis_angle_error + + +def _get_delta_dof_pos(delta_pose, ik_method, jacobian, device): + """Get delta Franka DOF position from delta pose using specified IK method.""" + # References: + # 1) https://www.cs.cmu.edu/~15464-s13/lectures/lecture6/iksurvey.pdf + # 2) https://ethz.ch/content/dam/ethz/special-interest/mavt/robotics-n-intelligent-systems/rsl-dam/documents/RobotDynamics2018/RD_HS2018script.pdf (p. 47) + + if ik_method == "pinv": # Jacobian pseudoinverse + k_val = 1.0 + jacobian_pinv = torch.linalg.pinv(jacobian) + delta_dof_pos = k_val * jacobian_pinv @ delta_pose.unsqueeze(-1) + delta_dof_pos = delta_dof_pos.squeeze(-1) + + elif ik_method == "trans": # Jacobian transpose + k_val = 1.0 + jacobian_T = torch.transpose(jacobian, dim0=1, dim1=2) + delta_dof_pos = k_val * jacobian_T @ delta_pose.unsqueeze(-1) + delta_dof_pos = delta_dof_pos.squeeze(-1) + + elif ik_method == "dls": # damped least squares (Levenberg-Marquardt) + lambda_val = 0.1 # 0.1 + jacobian_T = torch.transpose(jacobian, dim0=1, dim1=2) + lambda_matrix = (lambda_val**2) * torch.eye(n=jacobian.shape[1], device=device) + delta_dof_pos = jacobian_T @ torch.inverse(jacobian @ jacobian_T + lambda_matrix) @ delta_pose.unsqueeze(-1) + delta_dof_pos = delta_dof_pos.squeeze(-1) + + elif ik_method == "svd": # adaptive SVD + k_val = 1.0 + U, S, Vh = torch.linalg.svd(jacobian) + S_inv = 1.0 / S + min_singular_value = 1.0e-5 + S_inv = torch.where(S > min_singular_value, S_inv, torch.zeros_like(S_inv)) + jacobian_pinv = ( + torch.transpose(Vh, dim0=1, dim1=2)[:, :, :6] @ torch.diag_embed(S_inv) @ torch.transpose(U, dim0=1, dim1=2) + ) + delta_dof_pos = k_val * jacobian_pinv @ delta_pose.unsqueeze(-1) + delta_dof_pos = delta_dof_pos.squeeze(-1) + + return delta_dof_pos + + +def _apply_task_space_gains( + delta_fingertip_pose, fingertip_midpoint_linvel, fingertip_midpoint_angvel, task_prop_gains, task_deriv_gains +): + """Interpret PD gains as task-space gains. Apply to task-space error.""" + + task_wrench = torch.zeros_like(delta_fingertip_pose) + + # Apply gains to lin error components + lin_error = delta_fingertip_pose[:, 0:3] + task_wrench[:, 0:3] = task_prop_gains[:, 0:3] * lin_error + task_deriv_gains[:, 0:3] * ( + 0.0 - fingertip_midpoint_linvel + ) + + # Apply gains to rot error components + rot_error = delta_fingertip_pose[:, 3:6] + task_wrench[:, 3:6] = task_prop_gains[:, 3:6] * rot_error + task_deriv_gains[:, 3:6] * ( + 0.0 - fingertip_midpoint_angvel + ) + return task_wrench diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/factory_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/factory_env.py new file mode 100644 index 0000000000..df7850d32c --- /dev/null +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/factory_env.py @@ -0,0 +1,880 @@ +# Copyright (c) 2022-2024, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import numpy as np +import torch + +import carb +import omni.isaac.core.utils.torch as torch_utils + +import omni.isaac.lab.sim as sim_utils +from omni.isaac.lab.assets import Articulation +from omni.isaac.lab.envs import DirectRLEnv +from omni.isaac.lab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane +from omni.isaac.lab.utils.assets import ISAAC_NUCLEUS_DIR +from omni.isaac.lab.utils.math import axis_angle_from_quat + +from . import factory_control as fc +from .factory_env_cfg import OBS_DIM_CFG, STATE_DIM_CFG, FactoryEnvCfg + + +class FactoryEnv(DirectRLEnv): + cfg: FactoryEnvCfg + + def __init__(self, cfg: FactoryEnvCfg, render_mode: str | None = None, **kwargs): + # Update number of obs/states + cfg.observation_space = sum([OBS_DIM_CFG[obs] for obs in cfg.obs_order]) + cfg.state_space = sum([STATE_DIM_CFG[state] for state in cfg.state_order]) + cfg.observation_space += cfg.action_space + cfg.state_space += cfg.action_space + self.cfg_task = cfg.task + + super().__init__(cfg, render_mode, **kwargs) + + self._set_body_inertias() + self._init_tensors() + self._set_default_dynamics_parameters() + self._compute_intermediate_values(dt=self.physics_dt) + + def _set_body_inertias(self): + """Note: this is to account for the asset_options.armature parameter in IGE.""" + inertias = self._robot.root_physx_view.get_inertias() + offset = torch.zeros_like(inertias) + offset[:, :, [0, 4, 8]] += 0.01 + new_inertias = inertias + offset + self._robot.root_physx_view.set_inertias(new_inertias, torch.arange(self.num_envs)) + + def _set_default_dynamics_parameters(self): + """Set parameters defining dynamic interactions.""" + self.default_gains = torch.tensor(self.cfg.ctrl.default_task_prop_gains, device=self.device).repeat( + (self.num_envs, 1) + ) + + self.pos_threshold = torch.tensor(self.cfg.ctrl.pos_action_threshold, device=self.device).repeat( + (self.num_envs, 1) + ) + self.rot_threshold = torch.tensor(self.cfg.ctrl.rot_action_threshold, device=self.device).repeat( + (self.num_envs, 1) + ) + + # Set masses and frictions. + self._set_friction(self._held_asset, self.cfg_task.held_asset_cfg.friction) + self._set_friction(self._fixed_asset, self.cfg_task.fixed_asset_cfg.friction) + self._set_friction(self._robot, self.cfg_task.robot_cfg.friction) + + def _set_friction(self, asset, value): + """Update material properties for a given asset.""" + materials = asset.root_physx_view.get_material_properties() + materials[..., 0] = value # Static friction. + materials[..., 1] = value # Dynamic friction. + env_ids = torch.arange(self.scene.num_envs, device="cpu") + asset.root_physx_view.set_material_properties(materials, env_ids) + + def _init_tensors(self): + """Initialize tensors once.""" + self.identity_quat = ( + torch.tensor([1.0, 0.0, 0.0, 0.0], device=self.device).unsqueeze(0).repeat(self.num_envs, 1) + ) + + # Control targets. + self.ctrl_target_joint_pos = torch.zeros((self.num_envs, self._robot.num_joints), device=self.device) + self.ctrl_target_fingertip_midpoint_pos = torch.zeros((self.num_envs, 3), device=self.device) + self.ctrl_target_fingertip_midpoint_quat = torch.zeros((self.num_envs, 4), device=self.device) + + # Fixed asset. + self.fixed_pos_action_frame = torch.zeros((self.num_envs, 3), device=self.device) + self.fixed_pos_obs_frame = torch.zeros((self.num_envs, 3), device=self.device) + self.init_fixed_pos_obs_noise = torch.zeros((self.num_envs, 3), device=self.device) + + # Held asset + held_base_x_offset = 0.0 + if self.cfg_task.name == "peg_insert": + held_base_z_offset = 0.0 + elif self.cfg_task.name == "gear_mesh": + gear_base_offset = self._get_target_gear_base_offset() + held_base_x_offset = gear_base_offset[0] + held_base_z_offset = gear_base_offset[2] + elif self.cfg_task.name == "nut_thread": + held_base_z_offset = self.cfg_task.fixed_asset_cfg.base_height + else: + raise NotImplementedError("Task not implemented") + + self.held_base_pos_local = torch.tensor([0.0, 0.0, 0.0], device=self.device).repeat((self.num_envs, 1)) + self.held_base_pos_local[:, 0] = held_base_x_offset + self.held_base_pos_local[:, 2] = held_base_z_offset + self.held_base_quat_local = self.identity_quat.clone().detach() + + self.held_base_pos = torch.zeros_like(self.held_base_pos_local) + self.held_base_quat = self.identity_quat.clone().detach() + + # Computer body indices. + self.left_finger_body_idx = self._robot.body_names.index("panda_leftfinger") + self.right_finger_body_idx = self._robot.body_names.index("panda_rightfinger") + self.fingertip_body_idx = self._robot.body_names.index("panda_fingertip_centered") + + # Tensors for finite-differencing. + self.last_update_timestamp = 0.0 # Note: This is for finite differencing body velocities. + self.prev_fingertip_pos = torch.zeros((self.num_envs, 3), device=self.device) + self.prev_fingertip_quat = self.identity_quat.clone() + self.prev_joint_pos = torch.zeros((self.num_envs, 7), device=self.device) + + # Keypoint tensors. + self.target_held_base_pos = torch.zeros((self.num_envs, 3), device=self.device) + self.target_held_base_quat = self.identity_quat.clone().detach() + + offsets = self._get_keypoint_offsets(self.cfg_task.num_keypoints) + self.keypoint_offsets = offsets * self.cfg_task.keypoint_scale + self.keypoints_held = torch.zeros((self.num_envs, self.cfg_task.num_keypoints, 3), device=self.device) + self.keypoints_fixed = torch.zeros_like(self.keypoints_held, device=self.device) + + # Used to compute target poses. + self.fixed_success_pos_local = torch.zeros((self.num_envs, 3), device=self.device) + if self.cfg_task.name == "peg_insert": + self.fixed_success_pos_local[:, 2] = 0.0 + elif self.cfg_task.name == "gear_mesh": + gear_base_offset = self._get_target_gear_base_offset() + self.fixed_success_pos_local[:, 0] = gear_base_offset[0] + self.fixed_success_pos_local[:, 2] = gear_base_offset[2] + elif self.cfg_task.name == "nut_thread": + head_height = self.cfg_task.fixed_asset_cfg.base_height + shank_length = self.cfg_task.fixed_asset_cfg.height + thread_pitch = self.cfg_task.fixed_asset_cfg.thread_pitch + self.fixed_success_pos_local[:, 2] = head_height + shank_length - thread_pitch * 1.5 + else: + raise NotImplementedError("Task not implemented") + + self.ep_succeeded = torch.zeros((self.num_envs,), dtype=torch.long, device=self.device) + self.ep_success_times = torch.zeros((self.num_envs,), dtype=torch.long, device=self.device) + + def _get_keypoint_offsets(self, num_keypoints): + """Get uniformly-spaced keypoints along a line of unit length, centered at 0.""" + keypoint_offsets = torch.zeros((num_keypoints, 3), device=self.device) + keypoint_offsets[:, -1] = torch.linspace(0.0, 1.0, num_keypoints, device=self.device) - 0.5 + + return keypoint_offsets + + def _setup_scene(self): + """Initialize simulation scene.""" + spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg(), translation=(0.0, 0.0, -0.4)) + + # spawn a usd file of a table into the scene + cfg = sim_utils.UsdFileCfg(usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd") + cfg.func( + "/World/envs/env_.*/Table", cfg, translation=(0.55, 0.0, 0.0), orientation=(0.70711, 0.0, 0.0, 0.70711) + ) + + self._robot = Articulation(self.cfg.robot) + self._fixed_asset = Articulation(self.cfg_task.fixed_asset) + self._held_asset = Articulation(self.cfg_task.held_asset) + if self.cfg_task.name == "gear_mesh": + self._small_gear_asset = Articulation(self.cfg_task.small_gear_cfg) + self._large_gear_asset = Articulation(self.cfg_task.large_gear_cfg) + + self.scene.clone_environments(copy_from_source=False) + self.scene.filter_collisions() + + self.scene.articulations["robot"] = self._robot + self.scene.articulations["fixed_asset"] = self._fixed_asset + self.scene.articulations["held_asset"] = self._held_asset + if self.cfg_task.name == "gear_mesh": + self.scene.articulations["small_gear"] = self._small_gear_asset + self.scene.articulations["large_gear"] = self._large_gear_asset + + # add lights + light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) + light_cfg.func("/World/Light", light_cfg) + + def _compute_intermediate_values(self, dt): + """Get values computed from raw tensors. This includes adding noise.""" + # TODO: A lot of these can probably only be set once? + self.fixed_pos = self._fixed_asset.data.root_pos_w - self.scene.env_origins + self.fixed_quat = self._fixed_asset.data.root_quat_w + + self.held_pos = self._held_asset.data.root_pos_w - self.scene.env_origins + self.held_quat = self._held_asset.data.root_quat_w + + self.fingertip_midpoint_pos = self._robot.data.body_pos_w[:, self.fingertip_body_idx] - self.scene.env_origins + self.fingertip_midpoint_quat = self._robot.data.body_quat_w[:, self.fingertip_body_idx] + self.fingertip_midpoint_linvel = self._robot.data.body_lin_vel_w[:, self.fingertip_body_idx] + self.fingertip_midpoint_angvel = self._robot.data.body_ang_vel_w[:, self.fingertip_body_idx] + + jacobians = self._robot.root_physx_view.get_jacobians() + + self.left_finger_jacobian = jacobians[:, self.left_finger_body_idx - 1, 0:6, 0:7] + self.right_finger_jacobian = jacobians[:, self.right_finger_body_idx - 1, 0:6, 0:7] + self.fingertip_midpoint_jacobian = (self.left_finger_jacobian + self.right_finger_jacobian) * 0.5 + self.arm_mass_matrix = self._robot.root_physx_view.get_mass_matrices()[:, 0:7, 0:7] + self.joint_pos = self._robot.data.joint_pos.clone() + self.joint_vel = self._robot.data.joint_vel.clone() + + # Finite-differencing results in more reliable velocity estimates. + self.ee_linvel_fd = (self.fingertip_midpoint_pos - self.prev_fingertip_pos) / dt + self.prev_fingertip_pos = self.fingertip_midpoint_pos.clone() + + # Add state differences if velocity isn't being added. + rot_diff_quat = torch_utils.quat_mul( + self.fingertip_midpoint_quat, torch_utils.quat_conjugate(self.prev_fingertip_quat) + ) + rot_diff_quat *= torch.sign(rot_diff_quat[:, 0]).unsqueeze(-1) + rot_diff_aa = axis_angle_from_quat(rot_diff_quat) + self.ee_angvel_fd = rot_diff_aa / dt + self.prev_fingertip_quat = self.fingertip_midpoint_quat.clone() + + joint_diff = self.joint_pos[:, 0:7] - self.prev_joint_pos + self.joint_vel_fd = joint_diff / dt + self.prev_joint_pos = self.joint_pos[:, 0:7].clone() + + # Keypoint tensors. + self.held_base_quat[:], self.held_base_pos[:] = torch_utils.tf_combine( + self.held_quat, self.held_pos, self.held_base_quat_local, self.held_base_pos_local + ) + self.target_held_base_quat[:], self.target_held_base_pos[:] = torch_utils.tf_combine( + self.fixed_quat, self.fixed_pos, self.identity_quat, self.fixed_success_pos_local + ) + + # Compute pos of keypoints on held asset, and fixed asset in world frame + for idx, keypoint_offset in enumerate(self.keypoint_offsets): + self.keypoints_held[:, idx] = torch_utils.tf_combine( + self.held_base_quat, self.held_base_pos, self.identity_quat, keypoint_offset.repeat(self.num_envs, 1) + )[1] + self.keypoints_fixed[:, idx] = torch_utils.tf_combine( + self.target_held_base_quat, + self.target_held_base_pos, + self.identity_quat, + keypoint_offset.repeat(self.num_envs, 1), + )[1] + + self.keypoint_dist = torch.norm(self.keypoints_held - self.keypoints_fixed, p=2, dim=-1).mean(-1) + self.last_update_timestamp = self._robot._data._sim_timestamp + + def _get_observations(self): + """Get actor/critic inputs using asymmetric critic.""" + noisy_fixed_pos = self.fixed_pos_obs_frame + self.init_fixed_pos_obs_noise + + prev_actions = self.actions.clone() + + obs_dict = { + "fingertip_pos": self.fingertip_midpoint_pos, + "fingertip_pos_rel_fixed": self.fingertip_midpoint_pos - noisy_fixed_pos, + "fingertip_quat": self.fingertip_midpoint_quat, + "ee_linvel": self.ee_linvel_fd, + "ee_angvel": self.ee_angvel_fd, + "prev_actions": prev_actions, + } + + state_dict = { + "fingertip_pos": self.fingertip_midpoint_pos, + "fingertip_pos_rel_fixed": self.fingertip_midpoint_pos - self.fixed_pos_obs_frame, + "fingertip_quat": self.fingertip_midpoint_quat, + "ee_linvel": self.fingertip_midpoint_linvel, + "ee_angvel": self.fingertip_midpoint_angvel, + "joint_pos": self.joint_pos[:, 0:7], + "held_pos": self.held_pos, + "held_pos_rel_fixed": self.held_pos - self.fixed_pos_obs_frame, + "held_quat": self.held_quat, + "fixed_pos": self.fixed_pos, + "fixed_quat": self.fixed_quat, + "task_prop_gains": self.task_prop_gains, + "pos_threshold": self.pos_threshold, + "rot_threshold": self.rot_threshold, + "prev_actions": prev_actions, + } + obs_tensors = [obs_dict[obs_name] for obs_name in self.cfg.obs_order + ["prev_actions"]] + obs_tensors = torch.cat(obs_tensors, dim=-1) + state_tensors = [state_dict[state_name] for state_name in self.cfg.state_order + ["prev_actions"]] + state_tensors = torch.cat(state_tensors, dim=-1) + return {"policy": obs_tensors, "critic": state_tensors} + + def _reset_buffers(self, env_ids): + """Reset buffers.""" + self.ep_succeeded[env_ids] = 0 + + def _pre_physics_step(self, action): + """Apply policy actions with smoothing.""" + env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1) + if len(env_ids) > 0: + self._reset_buffers(env_ids) + + self.actions = ( + self.cfg.ctrl.ema_factor * action.clone().to(self.device) + (1 - self.cfg.ctrl.ema_factor) * self.actions + ) + + def close_gripper_in_place(self): + """Keep gripper in current position as gripper closes.""" + actions = torch.zeros((self.num_envs, 6), device=self.device) + ctrl_target_gripper_dof_pos = 0.0 + + # Interpret actions as target pos displacements and set pos target + pos_actions = actions[:, 0:3] * self.pos_threshold + self.ctrl_target_fingertip_midpoint_pos = self.fingertip_midpoint_pos + pos_actions + + # Interpret actions as target rot (axis-angle) displacements + rot_actions = actions[:, 3:6] + + # Convert to quat and set rot target + angle = torch.norm(rot_actions, p=2, dim=-1) + axis = rot_actions / angle.unsqueeze(-1) + + rot_actions_quat = torch_utils.quat_from_angle_axis(angle, axis) + + rot_actions_quat = torch.where( + angle.unsqueeze(-1).repeat(1, 4) > 1.0e-6, + rot_actions_quat, + torch.tensor([1.0, 0.0, 0.0, 0.0], device=self.device).repeat(self.num_envs, 1), + ) + self.ctrl_target_fingertip_midpoint_quat = torch_utils.quat_mul(rot_actions_quat, self.fingertip_midpoint_quat) + + target_euler_xyz = torch.stack(torch_utils.get_euler_xyz(self.ctrl_target_fingertip_midpoint_quat), dim=1) + target_euler_xyz[:, 0] = 3.14159 + target_euler_xyz[:, 1] = 0.0 + + self.ctrl_target_fingertip_midpoint_quat = torch_utils.quat_from_euler_xyz( + roll=target_euler_xyz[:, 0], pitch=target_euler_xyz[:, 1], yaw=target_euler_xyz[:, 2] + ) + + self.ctrl_target_gripper_dof_pos = ctrl_target_gripper_dof_pos + self.generate_ctrl_signals() + + def _apply_action(self): + """Apply actions for policy as delta targets from current position.""" + # Get current yaw for success checking. + _, _, curr_yaw = torch_utils.get_euler_xyz(self.fingertip_midpoint_quat) + self.curr_yaw = torch.where(curr_yaw > np.deg2rad(235), curr_yaw - 2 * np.pi, curr_yaw) + + # Note: We use finite-differenced velocities for control and observations. + # Check if we need to re-compute velocities within the decimation loop. + if self.last_update_timestamp < self._robot._data._sim_timestamp: + self._compute_intermediate_values(dt=self.physics_dt) + + # Interpret actions as target pos displacements and set pos target + pos_actions = self.actions[:, 0:3] * self.pos_threshold + + # Interpret actions as target rot (axis-angle) displacements + rot_actions = self.actions[:, 3:6] + if self.cfg_task.unidirectional_rot: + rot_actions[:, 2] = -(rot_actions[:, 2] + 1.0) * 0.5 # [-1, 0] + rot_actions = rot_actions * self.rot_threshold + + self.ctrl_target_fingertip_midpoint_pos = self.fingertip_midpoint_pos + pos_actions + # To speed up learning, never allow the policy to move more than 5cm away from the base. + delta_pos = self.ctrl_target_fingertip_midpoint_pos - self.fixed_pos_action_frame + pos_error_clipped = torch.clip( + delta_pos, -self.cfg.ctrl.pos_action_bounds[0], self.cfg.ctrl.pos_action_bounds[1] + ) + self.ctrl_target_fingertip_midpoint_pos = self.fixed_pos_action_frame + pos_error_clipped + + # Convert to quat and set rot target + angle = torch.norm(rot_actions, p=2, dim=-1) + axis = rot_actions / angle.unsqueeze(-1) + + rot_actions_quat = torch_utils.quat_from_angle_axis(angle, axis) + rot_actions_quat = torch.where( + angle.unsqueeze(-1).repeat(1, 4) > 1e-6, + rot_actions_quat, + torch.tensor([1.0, 0.0, 0.0, 0.0], device=self.device).repeat(self.num_envs, 1), + ) + self.ctrl_target_fingertip_midpoint_quat = torch_utils.quat_mul(rot_actions_quat, self.fingertip_midpoint_quat) + + target_euler_xyz = torch.stack(torch_utils.get_euler_xyz(self.ctrl_target_fingertip_midpoint_quat), dim=1) + target_euler_xyz[:, 0] = 3.14159 # Restrict actions to be upright. + target_euler_xyz[:, 1] = 0.0 + + self.ctrl_target_fingertip_midpoint_quat = torch_utils.quat_from_euler_xyz( + roll=target_euler_xyz[:, 0], pitch=target_euler_xyz[:, 1], yaw=target_euler_xyz[:, 2] + ) + + self.ctrl_target_gripper_dof_pos = 0.0 + self.generate_ctrl_signals() + + def _set_gains(self, prop_gains, rot_deriv_scale=1.0): + """Set robot gains using critical damping.""" + self.task_prop_gains = prop_gains + self.task_deriv_gains = 2 * torch.sqrt(prop_gains) + self.task_deriv_gains[:, 3:6] /= rot_deriv_scale + + def generate_ctrl_signals(self): + """Get Jacobian. Set Franka DOF position targets (fingers) or DOF torques (arm).""" + self.joint_torque, self.applied_wrench = fc.compute_dof_torque( + cfg=self.cfg, + dof_pos=self.joint_pos, + dof_vel=self.joint_vel, # _fd, + fingertip_midpoint_pos=self.fingertip_midpoint_pos, + fingertip_midpoint_quat=self.fingertip_midpoint_quat, + fingertip_midpoint_linvel=self.ee_linvel_fd, + fingertip_midpoint_angvel=self.ee_angvel_fd, + jacobian=self.fingertip_midpoint_jacobian, + arm_mass_matrix=self.arm_mass_matrix, + ctrl_target_fingertip_midpoint_pos=self.ctrl_target_fingertip_midpoint_pos, + ctrl_target_fingertip_midpoint_quat=self.ctrl_target_fingertip_midpoint_quat, + task_prop_gains=self.task_prop_gains, + task_deriv_gains=self.task_deriv_gains, + device=self.device, + ) + + # set target for gripper joints to use physx's PD controller + self.ctrl_target_joint_pos[:, 7:9] = self.ctrl_target_gripper_dof_pos + self.joint_torque[:, 7:9] = 0.0 + + self._robot.set_joint_position_target(self.ctrl_target_joint_pos) + self._robot.set_joint_effort_target(self.joint_torque) + + def _get_dones(self): + """Update intermediate values used for rewards and observations.""" + self._compute_intermediate_values(dt=self.physics_dt) + time_out = self.episode_length_buf >= self.max_episode_length - 1 + return time_out, time_out + + def _get_curr_successes(self, success_threshold, check_rot=False): + """Get success mask at current timestep.""" + curr_successes = torch.zeros((self.num_envs,), dtype=torch.bool, device=self.device) + + xy_dist = torch.linalg.vector_norm(self.target_held_base_pos[:, 0:2] - self.held_base_pos[:, 0:2], dim=1) + z_disp = self.held_base_pos[:, 2] - self.target_held_base_pos[:, 2] + + is_centered = torch.where(xy_dist < 0.0025, torch.ones_like(curr_successes), torch.zeros_like(curr_successes)) + # Height threshold to target + fixed_cfg = self.cfg_task.fixed_asset_cfg + if self.cfg_task.name == "peg_insert" or self.cfg_task.name == "gear_mesh": + height_threshold = fixed_cfg.height * success_threshold + elif self.cfg_task.name == "nut_thread": + height_threshold = fixed_cfg.thread_pitch * success_threshold + else: + raise NotImplementedError("Task not implemented") + is_close_or_below = torch.where( + z_disp < height_threshold, torch.ones_like(curr_successes), torch.zeros_like(curr_successes) + ) + curr_successes = torch.logical_and(is_centered, is_close_or_below) + + if check_rot: + is_rotated = self.curr_yaw < self.cfg_task.ee_success_yaw + curr_successes = torch.logical_and(curr_successes, is_rotated) + + return curr_successes + + def _get_rewards(self): + """Update rewards and compute success statistics.""" + # Get successful and failed envs at current timestep + check_rot = self.cfg_task.name == "nut_thread" + curr_successes = self._get_curr_successes( + success_threshold=self.cfg_task.success_threshold, check_rot=check_rot + ) + + rew_buf = self._update_rew_buf(curr_successes) + + # Only log episode success rates at the end of an episode. + if torch.any(self.reset_buf): + self.extras["successes"] = torch.count_nonzero(curr_successes) / self.num_envs + + # Get the time at which an episode first succeeds. + first_success = torch.logical_and(curr_successes, torch.logical_not(self.ep_succeeded)) + self.ep_succeeded[curr_successes] = 1 + + first_success_ids = first_success.nonzero(as_tuple=False).squeeze(-1) + self.ep_success_times[first_success_ids] = self.episode_length_buf[first_success_ids] + nonzero_success_ids = self.ep_success_times.nonzero(as_tuple=False).squeeze(-1) + + if len(nonzero_success_ids) > 0: # Only log for successful episodes. + success_times = self.ep_success_times[nonzero_success_ids].sum() / len(nonzero_success_ids) + self.extras["success_times"] = success_times + + self.prev_actions = self.actions.clone() + return rew_buf + + def _update_rew_buf(self, curr_successes): + """Compute reward at current timestep.""" + rew_dict = {} + + # Keypoint rewards. + def squashing_fn(x, a, b): + return 1 / (torch.exp(a * x) + b + torch.exp(-a * x)) + + a0, b0 = self.cfg_task.keypoint_coef_baseline + rew_dict["kp_baseline"] = squashing_fn(self.keypoint_dist, a0, b0) + # a1, b1 = 25, 2 + a1, b1 = self.cfg_task.keypoint_coef_coarse + rew_dict["kp_coarse"] = squashing_fn(self.keypoint_dist, a1, b1) + a2, b2 = self.cfg_task.keypoint_coef_fine + # a2, b2 = 300, 0 + rew_dict["kp_fine"] = squashing_fn(self.keypoint_dist, a2, b2) + + # Action penalties. + rew_dict["action_penalty"] = torch.norm(self.actions, p=2) + rew_dict["action_grad_penalty"] = torch.norm(self.actions - self.prev_actions, p=2, dim=-1) + rew_dict["curr_engaged"] = ( + self._get_curr_successes(success_threshold=self.cfg_task.engage_threshold, check_rot=False).clone().float() + ) + rew_dict["curr_successes"] = curr_successes.clone().float() + + rew_buf = ( + rew_dict["kp_coarse"] + + rew_dict["kp_baseline"] + + rew_dict["kp_fine"] + - rew_dict["action_penalty"] * self.cfg_task.action_penalty_scale + - rew_dict["action_grad_penalty"] * self.cfg_task.action_grad_penalty_scale + + rew_dict["curr_engaged"] + + rew_dict["curr_successes"] + ) + + for rew_name, rew in rew_dict.items(): + self.extras[f"logs_rew_{rew_name}"] = rew.mean() + + return rew_buf + + def _reset_idx(self, env_ids): + """ + We assume all envs will always be reset at the same time. + """ + super()._reset_idx(env_ids) + + self._set_assets_to_default_pose(env_ids) + self._set_franka_to_default_pose(joints=self.cfg.ctrl.reset_joints, env_ids=env_ids) + self.step_sim_no_action() + + self.randomize_initial_state(env_ids) + + def _get_target_gear_base_offset(self): + """Get offset of target gear from the gear base asset.""" + target_gear = self.cfg_task.target_gear + if target_gear == "gear_large": + gear_base_offset = self.cfg_task.fixed_asset_cfg.large_gear_base_offset + elif target_gear == "gear_medium": + gear_base_offset = self.cfg_task.fixed_asset_cfg.medium_gear_base_offset + elif target_gear == "gear_small": + gear_base_offset = self.cfg_task.fixed_asset_cfg.small_gear_base_offset + else: + raise ValueError(f"{target_gear} not valid in this context!") + return gear_base_offset + + def _set_assets_to_default_pose(self, env_ids): + """Move assets to default pose before randomization.""" + held_state = self._held_asset.data.default_root_state.clone()[env_ids] + held_state[:, 0:3] += self.scene.env_origins[env_ids] + held_state[:, 7:] = 0.0 + self._held_asset.write_root_state_to_sim(held_state, env_ids=env_ids) + self._held_asset.reset() + + fixed_state = self._fixed_asset.data.default_root_state.clone()[env_ids] + fixed_state[:, 0:3] += self.scene.env_origins[env_ids] + fixed_state[:, 7:] = 0.0 + self._fixed_asset.write_root_state_to_sim(fixed_state, env_ids=env_ids) + self._fixed_asset.reset() + + def set_pos_inverse_kinematics(self, env_ids): + """Set robot joint position using DLS IK.""" + ik_time = 0.0 + while ik_time < 0.25: + # Compute error to target. + pos_error, axis_angle_error = fc.get_pose_error( + fingertip_midpoint_pos=self.fingertip_midpoint_pos[env_ids], + fingertip_midpoint_quat=self.fingertip_midpoint_quat[env_ids], + ctrl_target_fingertip_midpoint_pos=self.ctrl_target_fingertip_midpoint_pos[env_ids], + ctrl_target_fingertip_midpoint_quat=self.ctrl_target_fingertip_midpoint_quat[env_ids], + jacobian_type="geometric", + rot_error_type="axis_angle", + ) + + delta_hand_pose = torch.cat((pos_error, axis_angle_error), dim=-1) + + # Solve DLS problem. + delta_dof_pos = fc._get_delta_dof_pos( + delta_pose=delta_hand_pose, + ik_method="dls", + jacobian=self.fingertip_midpoint_jacobian[env_ids], + device=self.device, + ) + self.joint_pos[env_ids, 0:7] += delta_dof_pos[:, 0:7] + self.joint_vel[env_ids, :] = torch.zeros_like(self.joint_pos[env_ids,]) + + self.ctrl_target_joint_pos[env_ids, 0:7] = self.joint_pos[env_ids, 0:7] + # Update dof state. + self._robot.write_joint_state_to_sim(self.joint_pos, self.joint_vel) + self._robot.set_joint_position_target(self.ctrl_target_joint_pos) + + # Simulate and update tensors. + self.step_sim_no_action() + ik_time += self.physics_dt + + return pos_error, axis_angle_error + + def get_handheld_asset_relative_pose(self): + """Get default relative pose between help asset and fingertip.""" + if self.cfg_task.name == "peg_insert": + held_asset_relative_pos = torch.zeros_like(self.held_base_pos_local) + held_asset_relative_pos[:, 2] = self.cfg_task.held_asset_cfg.height + held_asset_relative_pos[:, 2] -= self.cfg_task.robot_cfg.franka_fingerpad_length + elif self.cfg_task.name == "gear_mesh": + held_asset_relative_pos = torch.zeros_like(self.held_base_pos_local) + gear_base_offset = self._get_target_gear_base_offset() + held_asset_relative_pos[:, 0] += gear_base_offset[0] + held_asset_relative_pos[:, 2] += gear_base_offset[2] + held_asset_relative_pos[:, 2] += self.cfg_task.held_asset_cfg.height / 2.0 * 1.1 + elif self.cfg_task.name == "nut_thread": + held_asset_relative_pos = self.held_base_pos_local + else: + raise NotImplementedError("Task not implemented") + + held_asset_relative_quat = self.identity_quat + if self.cfg_task.name == "nut_thread": + # Rotate along z-axis of frame for default position. + initial_rot_deg = self.cfg_task.held_asset_rot_init + rot_yaw_euler = torch.tensor([0.0, 0.0, initial_rot_deg * np.pi / 180.0], device=self.device).repeat( + self.num_envs, 1 + ) + held_asset_relative_quat = torch_utils.quat_from_euler_xyz( + roll=rot_yaw_euler[:, 0], pitch=rot_yaw_euler[:, 1], yaw=rot_yaw_euler[:, 2] + ) + + return held_asset_relative_pos, held_asset_relative_quat + + def _set_franka_to_default_pose(self, joints, env_ids): + """Return Franka to its default joint position.""" + gripper_width = self.cfg_task.held_asset_cfg.diameter / 2 * 1.25 + joint_pos = self._robot.data.default_joint_pos[env_ids] + joint_pos[:, 7:] = gripper_width # MIMIC + joint_pos[:, :7] = torch.tensor(joints, device=self.device)[None, :] + joint_vel = torch.zeros_like(joint_pos) + joint_effort = torch.zeros_like(joint_pos) + self.ctrl_target_joint_pos[env_ids, :] = joint_pos + print(f"Resetting {len(env_ids)} envs...") + self._robot.set_joint_position_target(self.ctrl_target_joint_pos[env_ids], env_ids=env_ids) + self._robot.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids) + self._robot.reset() + self._robot.set_joint_effort_target(joint_effort, env_ids=env_ids) + + self.step_sim_no_action() + + def step_sim_no_action(self): + """Step the simulation without an action. Used for resets.""" + self.scene.write_data_to_sim() + self.sim.step(render=False) + self.scene.update(dt=self.physics_dt) + self._compute_intermediate_values(dt=self.physics_dt) + + def randomize_initial_state(self, env_ids): + """Randomize initial state and perform any episode-level randomization.""" + # Disable gravity. + physics_sim_view = sim_utils.SimulationContext.instance().physics_sim_view + physics_sim_view.set_gravity(carb.Float3(0.0, 0.0, 0.0)) + + # (1.) Randomize fixed asset pose. + fixed_state = self._fixed_asset.data.default_root_state.clone()[env_ids] + # (1.a.) Position + rand_sample = torch.rand((len(env_ids), 3), dtype=torch.float32, device=self.device) + fixed_pos_init_rand = 2 * (rand_sample - 0.5) # [-1, 1] + fixed_asset_init_pos_rand = torch.tensor( + self.cfg_task.fixed_asset_init_pos_noise, dtype=torch.float32, device=self.device + ) + fixed_pos_init_rand = fixed_pos_init_rand @ torch.diag(fixed_asset_init_pos_rand) + fixed_state[:, 0:3] += fixed_pos_init_rand + self.scene.env_origins[env_ids] + # (1.b.) Orientation + fixed_orn_init_yaw = np.deg2rad(self.cfg_task.fixed_asset_init_orn_deg) + fixed_orn_yaw_range = np.deg2rad(self.cfg_task.fixed_asset_init_orn_range_deg) + rand_sample = torch.rand((len(env_ids), 3), dtype=torch.float32, device=self.device) + fixed_orn_euler = fixed_orn_init_yaw + fixed_orn_yaw_range * rand_sample + fixed_orn_euler[:, 0:2] = 0.0 # Only change yaw. + fixed_orn_quat = torch_utils.quat_from_euler_xyz( + fixed_orn_euler[:, 0], fixed_orn_euler[:, 1], fixed_orn_euler[:, 2] + ) + fixed_state[:, 3:7] = fixed_orn_quat + # (1.c.) Velocity + fixed_state[:, 7:] = 0.0 # vel + # (1.d.) Update values. + self._fixed_asset.write_root_state_to_sim(fixed_state, env_ids=env_ids) + self._fixed_asset.reset() + + # (1.e.) Noisy position observation. + fixed_asset_pos_noise = torch.randn((len(env_ids), 3), dtype=torch.float32, device=self.device) + fixed_asset_pos_rand = torch.tensor(self.cfg.obs_rand.fixed_asset_pos, dtype=torch.float32, device=self.device) + fixed_asset_pos_noise = fixed_asset_pos_noise @ torch.diag(fixed_asset_pos_rand) + self.init_fixed_pos_obs_noise[:] = fixed_asset_pos_noise + + self.step_sim_no_action() + + # Compute the frame on the bolt that would be used as observation: fixed_pos_obs_frame + # For example, the tip of the bolt can be used as the observation frame + fixed_tip_pos_local = torch.zeros_like(self.fixed_pos) + fixed_tip_pos_local[:, 2] += self.cfg_task.fixed_asset_cfg.height + fixed_tip_pos_local[:, 2] += self.cfg_task.fixed_asset_cfg.base_height + if self.cfg_task.name == "gear_mesh": + fixed_tip_pos_local[:, 0] = self._get_target_gear_base_offset()[0] + + _, fixed_tip_pos = torch_utils.tf_combine( + self.fixed_quat, self.fixed_pos, self.identity_quat, fixed_tip_pos_local + ) + self.fixed_pos_obs_frame[:] = fixed_tip_pos + + # (2) Move gripper to randomizes location above fixed asset. Keep trying until IK succeeds. + # (a) get position vector to target + bad_envs = env_ids.clone() + ik_attempt = 0 + + hand_down_quat = torch.zeros((self.num_envs, 4), dtype=torch.float32, device=self.device) + self.hand_down_euler = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device) + while True: + n_bad = bad_envs.shape[0] + + above_fixed_pos = fixed_tip_pos.clone() + above_fixed_pos[:, 2] += self.cfg_task.hand_init_pos[2] + + rand_sample = torch.rand((n_bad, 3), dtype=torch.float32, device=self.device) + above_fixed_pos_rand = 2 * (rand_sample - 0.5) # [-1, 1] + hand_init_pos_rand = torch.tensor(self.cfg_task.hand_init_pos_noise, device=self.device) + above_fixed_pos_rand = above_fixed_pos_rand @ torch.diag(hand_init_pos_rand) + above_fixed_pos[bad_envs] += above_fixed_pos_rand + + # (b) get random orientation facing down + hand_down_euler = ( + torch.tensor(self.cfg_task.hand_init_orn, device=self.device).unsqueeze(0).repeat(n_bad, 1) + ) + + rand_sample = torch.rand((n_bad, 3), dtype=torch.float32, device=self.device) + above_fixed_orn_noise = 2 * (rand_sample - 0.5) # [-1, 1] + hand_init_orn_rand = torch.tensor(self.cfg_task.hand_init_orn_noise, device=self.device) + above_fixed_orn_noise = above_fixed_orn_noise @ torch.diag(hand_init_orn_rand) + hand_down_euler += above_fixed_orn_noise + self.hand_down_euler[bad_envs, ...] = hand_down_euler + hand_down_quat[bad_envs, :] = torch_utils.quat_from_euler_xyz( + roll=hand_down_euler[:, 0], pitch=hand_down_euler[:, 1], yaw=hand_down_euler[:, 2] + ) + + # (c) iterative IK Method + self.ctrl_target_fingertip_midpoint_pos[bad_envs, ...] = above_fixed_pos[bad_envs, ...] + self.ctrl_target_fingertip_midpoint_quat[bad_envs, ...] = hand_down_quat[bad_envs, :] + + pos_error, aa_error = self.set_pos_inverse_kinematics(env_ids=bad_envs) + pos_error = torch.linalg.norm(pos_error, dim=1) > 1e-3 + angle_error = torch.norm(aa_error, dim=1) > 1e-3 + any_error = torch.logical_or(pos_error, angle_error) + bad_envs = bad_envs[any_error.nonzero(as_tuple=False).squeeze(-1)] + + # Check IK succeeded for all envs, otherwise try again for those envs + if bad_envs.shape[0] == 0: + break + + self._set_franka_to_default_pose( + joints=[0.00871, -0.10368, -0.00794, -1.49139, -0.00083, 1.38774, 0.0], env_ids=bad_envs + ) + + ik_attempt += 1 + print(f"IK Attempt: {ik_attempt}\tBad Envs: {bad_envs.shape[0]}") + + self.step_sim_no_action() + + # Add flanking gears after servo (so arm doesn't move them). + if self.cfg_task.name == "gear_mesh" and self.cfg_task.add_flanking_gears: + small_gear_state = self._small_gear_asset.data.default_root_state.clone()[env_ids] + small_gear_state[:, 0:7] = fixed_state[:, 0:7] + small_gear_state[:, 7:] = 0.0 # vel + self._small_gear_asset.write_root_state_to_sim(small_gear_state, env_ids=env_ids) + self._small_gear_asset.reset() + + large_gear_state = self._large_gear_asset.data.default_root_state.clone()[env_ids] + large_gear_state[:, 0:7] = fixed_state[:, 0:7] + large_gear_state[:, 7:] = 0.0 # vel + self._large_gear_asset.write_root_state_to_sim(large_gear_state, env_ids=env_ids) + self._large_gear_asset.reset() + + # (3) Randomize asset-in-gripper location. + # flip gripper z orientation + flip_z_quat = torch.tensor([0.0, 0.0, 1.0, 0.0], device=self.device).unsqueeze(0).repeat(self.num_envs, 1) + fingertip_flipped_quat, fingertip_flipped_pos = torch_utils.tf_combine( + q1=self.fingertip_midpoint_quat, + t1=self.fingertip_midpoint_pos, + q2=flip_z_quat, + t2=torch.zeros_like(self.fingertip_midpoint_pos), + ) + + # get default gripper in asset transform + held_asset_relative_pos, held_asset_relative_quat = self.get_handheld_asset_relative_pose() + asset_in_hand_quat, asset_in_hand_pos = torch_utils.tf_inverse( + held_asset_relative_quat, held_asset_relative_pos + ) + + translated_held_asset_quat, translated_held_asset_pos = torch_utils.tf_combine( + q1=fingertip_flipped_quat, t1=fingertip_flipped_pos, q2=asset_in_hand_quat, t2=asset_in_hand_pos + ) + + # Add asset in hand randomization + rand_sample = torch.rand((self.num_envs, 3), dtype=torch.float32, device=self.device) + self.held_asset_pos_noise = 2 * (rand_sample - 0.5) # [-1, 1] + if self.cfg_task.name == "gear_mesh": + self.held_asset_pos_noise[:, 2] = -rand_sample[:, 2] # [-1, 0] + + held_asset_pos_noise = torch.tensor(self.cfg_task.held_asset_pos_noise, device=self.device) + self.held_asset_pos_noise = self.held_asset_pos_noise @ torch.diag(held_asset_pos_noise) + translated_held_asset_quat, translated_held_asset_pos = torch_utils.tf_combine( + q1=translated_held_asset_quat, + t1=translated_held_asset_pos, + q2=self.identity_quat, + t2=self.held_asset_pos_noise, + ) + + held_state = self._held_asset.data.default_root_state.clone() + held_state[:, 0:3] = translated_held_asset_pos + self.scene.env_origins + held_state[:, 3:7] = translated_held_asset_quat + held_state[:, 7:] = 0.0 + self._held_asset.write_root_state_to_sim(held_state) + self._held_asset.reset() + + # Close hand + # Set gains to use for quick resets. + reset_task_prop_gains = torch.tensor(self.cfg.ctrl.reset_task_prop_gains, device=self.device).repeat( + (self.num_envs, 1) + ) + reset_rot_deriv_scale = self.cfg.ctrl.reset_rot_deriv_scale + self._set_gains(reset_task_prop_gains, reset_rot_deriv_scale) + + self.step_sim_no_action() + + grasp_time = 0.0 + while grasp_time < 0.25: + self.ctrl_target_joint_pos[env_ids, 7:] = 0.0 # Close gripper. + self.ctrl_target_gripper_dof_pos = 0.0 + self.close_gripper_in_place() + self.step_sim_no_action() + grasp_time += self.sim.get_physics_dt() + + self.prev_joint_pos = self.joint_pos[:, 0:7].clone() + self.prev_fingertip_pos = self.fingertip_midpoint_pos.clone() + self.prev_fingertip_quat = self.fingertip_midpoint_quat.clone() + + # Set initial actions to involve no-movement. Needed for EMA/correct penalties. + self.actions = torch.zeros_like(self.actions) + self.prev_actions = torch.zeros_like(self.actions) + # Back out what actions should be for initial state. + # Relative position to bolt tip. + self.fixed_pos_action_frame[:] = self.fixed_pos_obs_frame + self.init_fixed_pos_obs_noise + + pos_actions = self.fingertip_midpoint_pos - self.fixed_pos_action_frame + pos_action_bounds = torch.tensor(self.cfg.ctrl.pos_action_bounds, device=self.device) + pos_actions = pos_actions @ torch.diag(1.0 / pos_action_bounds) + self.actions[:, 0:3] = self.prev_actions[:, 0:3] = pos_actions + + # Relative yaw to bolt. + unrot_180_euler = torch.tensor([-np.pi, 0.0, 0.0], device=self.device).repeat(self.num_envs, 1) + unrot_quat = torch_utils.quat_from_euler_xyz( + roll=unrot_180_euler[:, 0], pitch=unrot_180_euler[:, 1], yaw=unrot_180_euler[:, 2] + ) + + fingertip_quat_rel_bolt = torch_utils.quat_mul(unrot_quat, self.fingertip_midpoint_quat) + fingertip_yaw_bolt = torch_utils.get_euler_xyz(fingertip_quat_rel_bolt)[-1] + fingertip_yaw_bolt = torch.where( + fingertip_yaw_bolt > torch.pi / 2, fingertip_yaw_bolt - 2 * torch.pi, fingertip_yaw_bolt + ) + fingertip_yaw_bolt = torch.where( + fingertip_yaw_bolt < -torch.pi, fingertip_yaw_bolt + 2 * torch.pi, fingertip_yaw_bolt + ) + + yaw_action = (fingertip_yaw_bolt + np.deg2rad(180.0)) / np.deg2rad(270.0) * 2.0 - 1.0 + self.actions[:, 5] = self.prev_actions[:, 5] = yaw_action + + # Zero initial velocity. + self.ee_angvel_fd[:, :] = 0.0 + self.ee_linvel_fd[:, :] = 0.0 + + # Set initial gains for the episode. + self._set_gains(self.default_gains) + + physics_sim_view.set_gravity(carb.Float3(*self.cfg.sim.gravity)) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/factory_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/factory_env_cfg.py new file mode 100644 index 0000000000..0356e9e434 --- /dev/null +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/factory_env_cfg.py @@ -0,0 +1,208 @@ +# Copyright (c) 2022-2024, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import omni.isaac.lab.sim as sim_utils +from omni.isaac.lab.actuators.actuator_cfg import ImplicitActuatorCfg +from omni.isaac.lab.assets import ArticulationCfg +from omni.isaac.lab.envs import DirectRLEnvCfg +from omni.isaac.lab.scene import InteractiveSceneCfg +from omni.isaac.lab.sim import PhysxCfg, SimulationCfg +from omni.isaac.lab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg +from omni.isaac.lab.utils import configclass + +from .factory_tasks_cfg import ASSET_DIR, FactoryTask, GearMesh, NutThread, PegInsert + +OBS_DIM_CFG = { + "fingertip_pos": 3, + "fingertip_pos_rel_fixed": 3, + "fingertip_quat": 4, + "ee_linvel": 3, + "ee_angvel": 3, +} + +STATE_DIM_CFG = { + "fingertip_pos": 3, + "fingertip_pos_rel_fixed": 3, + "fingertip_quat": 4, + "ee_linvel": 3, + "ee_angvel": 3, + "joint_pos": 7, + "held_pos": 3, + "held_pos_rel_fixed": 3, + "held_quat": 4, + "fixed_pos": 3, + "fixed_quat": 4, + "task_prop_gains": 6, + "ema_factor": 1, + "pos_threshold": 3, + "rot_threshold": 3, +} + + +@configclass +class ObsRandCfg: + fixed_asset_pos = [0.001, 0.001, 0.001] + + +@configclass +class CtrlCfg: + ema_factor = 0.2 + + pos_action_bounds = [0.05, 0.05, 0.05] + rot_action_bounds = [1.0, 1.0, 1.0] + + pos_action_threshold = [0.02, 0.02, 0.02] + rot_action_threshold = [0.097, 0.097, 0.097] + + reset_joints = [1.5178e-03, -1.9651e-01, -1.4364e-03, -1.9761, -2.7717e-04, 1.7796, 7.8556e-01] + reset_task_prop_gains = [300, 300, 300, 20, 20, 20] + reset_rot_deriv_scale = 10.0 + default_task_prop_gains = [100, 100, 100, 30, 30, 30] + + # Null space parameters. + default_dof_pos_tensor = [-1.3003, -0.4015, 1.1791, -2.1493, 0.4001, 1.9425, 0.4754] + kp_null = 10.0 + kd_null = 6.3246 + + +@configclass +class FactoryEnvCfg(DirectRLEnvCfg): + decimation = 8 + action_space = 6 + # num_*: will be overwritten to correspond to obs_order, state_order. + observation_space = 21 + state_space = 72 + obs_order: list = ["fingertip_pos_rel_fixed", "fingertip_quat", "ee_linvel", "ee_angvel"] + state_order: list = [ + "fingertip_pos", + "fingertip_quat", + "ee_linvel", + "ee_angvel", + "joint_pos", + "held_pos", + "held_pos_rel_fixed", + "held_quat", + "fixed_pos", + "fixed_quat", + ] + + task_name: str = "peg_insert" # peg_insert, gear_mesh, nut_thread + task: FactoryTask = FactoryTask() + obs_rand: ObsRandCfg = ObsRandCfg() + ctrl: CtrlCfg = CtrlCfg() + + episode_length_s = 10.0 # Probably need to override. + sim: SimulationCfg = SimulationCfg( + device="cuda:0", + dt=1 / 120, + gravity=(0.0, 0.0, -9.81), + physx=PhysxCfg( + solver_type=1, + max_position_iteration_count=192, # Important to avoid interpenetration. + max_velocity_iteration_count=1, + bounce_threshold_velocity=0.2, + friction_offset_threshold=0.01, + friction_correlation_distance=0.00625, + gpu_max_rigid_contact_count=2**23, + gpu_max_rigid_patch_count=2**23, + gpu_max_num_partitions=1, # Important for stable simulation. + ), + physics_material=RigidBodyMaterialCfg( + static_friction=1.0, + dynamic_friction=1.0, + ), + ) + + scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=128, env_spacing=2.0) + + robot = ArticulationCfg( + prim_path="/World/envs/env_.*/Robot", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ASSET_DIR}/franka_mimic.usd", + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + ), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + joint_pos={ + "panda_joint1": 0.00871, + "panda_joint2": -0.10368, + "panda_joint3": -0.00794, + "panda_joint4": -1.49139, + "panda_joint5": -0.00083, + "panda_joint6": 1.38774, + "panda_joint7": 0.0, + "panda_finger_joint2": 0.04, + }, + pos=(0.0, 0.0, 0.0), + rot=(1.0, 0.0, 0.0, 0.0), + ), + actuators={ + "panda_arm1": ImplicitActuatorCfg( + joint_names_expr=["panda_joint[1-4]"], + stiffness=0.0, + damping=0.0, + friction=0.0, + armature=0.0, + effort_limit=87, + velocity_limit=124.6, + ), + "panda_arm2": ImplicitActuatorCfg( + joint_names_expr=["panda_joint[5-7]"], + stiffness=0.0, + damping=0.0, + friction=0.0, + armature=0.0, + effort_limit=12, + velocity_limit=149.5, + ), + "panda_hand": ImplicitActuatorCfg( + joint_names_expr=["panda_finger_joint[1-2]"], + effort_limit=40.0, + velocity_limit=0.04, + stiffness=7500.0, + damping=173.0, + friction=0.1, + armature=0.0, + ), + }, + ) + + +@configclass +class FactoryTaskPegInsertCfg(FactoryEnvCfg): + task_name = "peg_insert" + task = PegInsert() + episode_length_s = 10.0 + + +@configclass +class FactoryTaskGearMeshCfg(FactoryEnvCfg): + task_name = "gear_mesh" + task = GearMesh() + episode_length_s = 20.0 + + +@configclass +class FactoryTaskNutThreadCfg(FactoryEnvCfg): + task_name = "nut_thread" + task = NutThread() + episode_length_s = 30.0 diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/factory_tasks_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/factory_tasks_cfg.py new file mode 100644 index 0000000000..643cbc2afb --- /dev/null +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/factory/factory_tasks_cfg.py @@ -0,0 +1,448 @@ +# Copyright (c) 2022-2024, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import omni.isaac.lab.sim as sim_utils +from omni.isaac.lab.assets import ArticulationCfg +from omni.isaac.lab.utils import configclass +from omni.isaac.lab.utils.assets import ISAACLAB_NUCLEUS_DIR + +ASSET_DIR = f"{ISAACLAB_NUCLEUS_DIR}/Factory" + + +@configclass +class FixedAssetCfg: + usd_path: str = "" + diameter: float = 0.0 + height: float = 0.0 + base_height: float = 0.0 # Used to compute held asset CoM. + friction: float = 0.75 + mass: float = 0.05 + + +@configclass +class HeldAssetCfg: + usd_path: str = "" + diameter: float = 0.0 # Used for gripper width. + height: float = 0.0 + friction: float = 0.75 + mass: float = 0.05 + + +@configclass +class RobotCfg: + robot_usd: str = "" + franka_fingerpad_length: float = 0.017608 + friction: float = 0.75 + + +@configclass +class FactoryTask: + robot_cfg: RobotCfg = RobotCfg() + name: str = "" + duration_s = 5.0 + + fixed_asset_cfg: FixedAssetCfg = FixedAssetCfg() + held_asset_cfg: HeldAssetCfg = HeldAssetCfg() + asset_size: float = 0.0 + + # Robot + hand_init_pos: list = [0.0, 0.0, 0.015] # Relative to fixed asset tip. + hand_init_pos_noise: list = [0.02, 0.02, 0.01] + hand_init_orn: list = [3.1416, 0, 2.356] + hand_init_orn_noise: list = [0.0, 0.0, 1.57] + + # Action + unidirectional_rot: bool = False + + # Fixed Asset (applies to all tasks) + fixed_asset_init_pos_noise: list = [0.05, 0.05, 0.05] + fixed_asset_init_orn_deg: float = 0.0 + fixed_asset_init_orn_range_deg: float = 360.0 + + # Held Asset (applies to all tasks) + held_asset_pos_noise: list = [0.0, 0.006, 0.003] # noise level of the held asset in gripper + held_asset_rot_init: float = -90.0 + + # Reward + ee_success_yaw: float = 0.0 # nut_thread task only. + action_penalty_scale: float = 0.0 + action_grad_penalty_scale: float = 0.0 + # Reward function details can be found in Appendix B of https://arxiv.org/pdf/2408.04587. + # Multi-scale keypoints are used to capture different phases of the task. + # Each reward passes the keypoint distance, x, through a squashing function: + # r(x) = 1/(exp(-ax) + b + exp(ax)). + # Each list defines [a, b] which control the slope and maximum of the squashing function. + num_keypoints: int = 4 + keypoint_scale: float = 0.15 + keypoint_coef_baseline: list = [5, 4] # General movement towards fixed object. + keypoint_coef_coarse: list = [50, 2] # Movement to align the assets. + keypoint_coef_fine: list = [100, 0] # Smaller distances for threading or last-inch insertion. + # Fixed-asset height fraction for which different bonuses are rewarded (see individual tasks). + success_threshold: float = 0.04 + engage_threshold: float = 0.9 + + +@configclass +class Peg8mm(HeldAssetCfg): + usd_path = f"{ASSET_DIR}/factory_peg_8mm.usd" + diameter = 0.007986 + height = 0.050 + mass = 0.019 + + +@configclass +class Hole8mm(FixedAssetCfg): + usd_path = f"{ASSET_DIR}/factory_hole_8mm.usd" + diameter = 0.0081 + height = 0.025 + base_height = 0.0 + + +@configclass +class PegInsert(FactoryTask): + name = "peg_insert" + fixed_asset_cfg = Hole8mm() + held_asset_cfg = Peg8mm() + asset_size = 8.0 + duration_s = 10.0 + + # Robot + hand_init_pos: list = [0.0, 0.0, 0.047] # Relative to fixed asset tip. + hand_init_pos_noise: list = [0.02, 0.02, 0.01] + hand_init_orn: list = [3.1416, 0.0, 0.0] + hand_init_orn_noise: list = [0.0, 0.0, 0.785] + + # Fixed Asset (applies to all tasks) + fixed_asset_init_pos_noise: list = [0.05, 0.05, 0.05] + fixed_asset_init_orn_deg: float = 0.0 + fixed_asset_init_orn_range_deg: float = 360.0 + + # Held Asset (applies to all tasks) + held_asset_pos_noise: list = [0.003, 0.0, 0.003] # noise level of the held asset in gripper + held_asset_rot_init: float = 0.0 + + # Rewards + keypoint_coef_baseline: list = [5, 4] + keypoint_coef_coarse: list = [50, 2] + keypoint_coef_fine: list = [100, 0] + # Fraction of socket height. + success_threshold: float = 0.04 + engage_threshold: float = 0.9 + + fixed_asset: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/FixedAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=fixed_asset_cfg.usd_path, + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=fixed_asset_cfg.mass), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.6, 0.0, 0.05), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, + ) + held_asset: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/HeldAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=held_asset_cfg.usd_path, + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=held_asset_cfg.mass), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.4, 0.1), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, + ) + + +@configclass +class GearBase(FixedAssetCfg): + usd_path = f"{ASSET_DIR}/factory_gear_base.usd" + height = 0.02 + base_height = 0.005 + small_gear_base_offset = [5.075e-2, 0.0, 0.0] + medium_gear_base_offset = [2.025e-2, 0.0, 0.0] + large_gear_base_offset = [-3.025e-2, 0.0, 0.0] + + +@configclass +class MediumGear(HeldAssetCfg): + usd_path = f"{ASSET_DIR}/factory_gear_medium.usd" + diameter = 0.03 # Used for gripper width. + height: float = 0.03 + mass = 0.012 + + +@configclass +class GearMesh(FactoryTask): + name = "gear_mesh" + fixed_asset_cfg = GearBase() + held_asset_cfg = MediumGear() + target_gear = "gear_medium" + duration_s = 20.0 + + small_gear_usd = f"{ASSET_DIR}/factory_gear_small.usd" + large_gear_usd = f"{ASSET_DIR}/factory_gear_large.usd" + + small_gear_cfg: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/SmallGearAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=small_gear_usd, + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=0.019), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.4, 0.1), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, + ) + + large_gear_cfg: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/LargeGearAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=large_gear_usd, + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=0.019), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.4, 0.1), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, + ) + + # Gears Asset + add_flanking_gears = True + add_flanking_gears_prob = 1.0 + + # Robot + hand_init_pos: list = [0.0, 0.0, 0.035] # Relative to fixed asset tip. + hand_init_pos_noise: list = [0.02, 0.02, 0.01] + hand_init_orn: list = [3.1416, 0, 0.0] + hand_init_orn_noise: list = [0.0, 0.0, 0.785] + + # Fixed Asset (applies to all tasks) + fixed_asset_init_pos_noise: list = [0.05, 0.05, 0.05] + fixed_asset_init_orn_deg: float = 0.0 + fixed_asset_init_orn_range_deg: float = 15.0 + + # Held Asset (applies to all tasks) + held_asset_pos_noise: list = [0.003, 0.0, 0.003] # noise level of the held asset in gripper + held_asset_rot_init: float = -90.0 + + keypoint_coef_baseline: list = [5, 4] + keypoint_coef_coarse: list = [50, 2] + keypoint_coef_fine: list = [100, 0] + # Fraction of gear peg height. + success_threshold: float = 0.05 + engage_threshold: float = 0.9 + + fixed_asset: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/FixedAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=fixed_asset_cfg.usd_path, + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=fixed_asset_cfg.mass), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.6, 0.0, 0.05), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, + ) + held_asset: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/HeldAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=held_asset_cfg.usd_path, + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=held_asset_cfg.mass), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.4, 0.1), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, + ) + + +@configclass +class NutM16(HeldAssetCfg): + usd_path = f"{ASSET_DIR}/factory_nut_m16.usd" + diameter = 0.024 + height = 0.01 + mass = 0.03 + friction = 0.01 # Additive with the nut means friction is (-0.25 + 0.75)/2 = 0.25 + + +@configclass +class BoltM16(FixedAssetCfg): + usd_path = f"{ASSET_DIR}/factory_bolt_m16.usd" + diameter = 0.024 + height = 0.025 + base_height = 0.01 + thread_pitch = 0.002 + + +@configclass +class NutThread(FactoryTask): + name = "nut_thread" + fixed_asset_cfg = BoltM16() + held_asset_cfg = NutM16() + asset_size = 16.0 + duration_s = 30.0 + + # Robot + hand_init_pos: list = [0.0, 0.0, 0.015] # Relative to fixed asset tip. + hand_init_pos_noise: list = [0.02, 0.02, 0.01] + hand_init_orn: list = [3.1416, 0.0, 1.83] + hand_init_orn_noise: list = [0.0, 0.0, 0.26] + + # Action + unidirectional_rot: bool = True + + # Fixed Asset (applies to all tasks) + fixed_asset_init_pos_noise: list = [0.05, 0.05, 0.05] + fixed_asset_init_orn_deg: float = 120.0 + fixed_asset_init_orn_range_deg: float = 30.0 + + # Held Asset (applies to all tasks) + held_asset_pos_noise: list = [0.0, 0.003, 0.003] # noise level of the held asset in gripper + held_asset_rot_init: float = -90.0 + + # Reward. + ee_success_yaw = 0.0 + keypoint_coef_baseline: list = [100, 2] + keypoint_coef_coarse: list = [500, 2] # 100, 2 + keypoint_coef_fine: list = [1500, 0] # 500, 0 + # Fraction of thread-height. + success_threshold: float = 0.375 + engage_threshold: float = 0.5 + keypoint_scale: float = 0.05 + + fixed_asset: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/FixedAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=fixed_asset_cfg.usd_path, + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=fixed_asset_cfg.mass), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.6, 0.0, 0.05), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, + ) + held_asset: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/HeldAsset", + spawn=sim_utils.UsdFileCfg( + usd_path=held_asset_cfg.usd_path, + activate_contact_sensors=True, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + max_depenetration_velocity=5.0, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=3666.0, + enable_gyroscopic_forces=True, + solver_position_iteration_count=192, + solver_velocity_iteration_count=1, + max_contact_impulse=1e32, + ), + mass_props=sim_utils.MassPropertiesCfg(mass=held_asset_cfg.mass), + collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.4, 0.1), rot=(1.0, 0.0, 0.0, 0.0), joint_pos={}, joint_vel={} + ), + actuators={}, + ) diff --git a/source/standalone/workflows/rl_games/play.py b/source/standalone/workflows/rl_games/play.py index d16e46b2ef..7aa1456fe0 100644 --- a/source/standalone/workflows/rl_games/play.py +++ b/source/standalone/workflows/rl_games/play.py @@ -155,7 +155,7 @@ def main(): # convert obs to agent format obs = agent.obs_to_torch(obs) # agent stepping - actions = agent.get_action(obs, is_deterministic=True) + actions = agent.get_action(obs, is_deterministic=agent.is_deterministic) # env stepping obs, _, dones, _ = env.step(actions) diff --git a/tools/per_test_timeouts.py b/tools/per_test_timeouts.py index a82b49daac..340abde85c 100644 --- a/tools/per_test_timeouts.py +++ b/tools/per_test_timeouts.py @@ -10,7 +10,7 @@ PER_TEST_TIMEOUTS = { "test_articulation.py": 200, "test_deformable_object.py": 200, - "test_environments.py": 1200, # This test runs through all the environments for 100 steps each + "test_environments.py": 1500, # This test runs through all the environments for 100 steps each "test_environment_determinism.py": 200, # This test runs through many the environments for 100 steps each "test_env_rendering_logic.py": 300, "test_camera.py": 500,