Skip to content

Commit fb270ab

Browse files
Improves recorder performance and add additional recording capability (#3302)
# Description <!-- Thank you for your interest in sending a pull request. Please make sure to check the contribution guidelines. Link: https://isaac-sim.github.io/IsaacLab/main/source/refs/contributing.html --> This PR adds fixes from LightWheel Labs and additional functionality to the IsaacLab recorder. Fixes # (issue) - Fixes performance issue when recording long episode data by replacing the use of torch.cat at every timestep with list append. - Fixes configclass validation when key is not a string Adds Functionality - Adds optional episode meta data to HDF5 recorder - Adds option to record data pre-physics step - Adds joint target data to episode data. Joint target data can be optionally recorded by users and replayed to bypass action term controllers and improve replay determinism. ## Type of change <!-- As you go through the list, delete the ones that are not applicable. --> - Bug fix (non-breaking change which fixes an issue) - New feature (non-breaking change which adds functionality) ## Screenshots Please attach before and after screenshots of the change if applicable. <!-- Example: | Before | After | | ------ | ----- | | _gif/png before_ | _gif/png after_ | To upload images to a PR -- simply drag and drop an image while in edit mode and it should upload the image directly. You can then paste that source into the above before/after sections. --> ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there <!-- As you go through the checklist above, you can mark something as done by putting an x character in it For example, - [x] I have done this task - [ ] I have not done this task --> --------- Signed-off-by: Kelly Guo <kellyg@nvidia.com> Co-authored-by: Kelly Guo <kellyg@nvidia.com>
1 parent 4eae06f commit fb270ab

10 files changed

Lines changed: 164 additions & 23 deletions

File tree

scripts/imitation_learning/isaaclab_mimic/annotate_demos.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ def annotate_episode_in_auto_mode(
358358
annotated_episode = env.recorder_manager.get_episode(0)
359359
subtask_term_signal_dict = annotated_episode.data["obs"]["datagen_info"]["subtask_term_signals"]
360360
for signal_name, signal_flags in subtask_term_signal_dict.items():
361+
signal_flags = torch.tensor(signal_flags, device=env.device)
361362
if not torch.any(signal_flags):
362363
is_episode_annotated_successfully = False
363364
print(f'\tDid not detect completion for the subtask "{signal_name}".')

source/isaaclab/config/extension.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
# Note: Semantic Versioning is used: https://semver.org/
4-
version = "0.45.10"
4+
version = "0.45.11"
55

66
# Description
77
title = "Isaac Lab framework for Robot Learning"

source/isaaclab/docs/CHANGELOG.rst

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,32 @@
11
Changelog
22
---------
33

4-
0.45.10 (2025-09-02)
4+
0.45.11 (2025-09-04)
55
~~~~~~~~~~~~~~~~~~~~
66

77
Fixed
88
^^^^^
99

10+
* Fixes a high memory usage and perf slowdown issue in episode data by removing the use of torch.cat when appending to the episode data
11+
at each timestep. The use of torch.cat was causing the episode data to be copied at each timestep, which causes high memory usage and
12+
significant performance slowdown when recording longer episode data.
13+
* Patches the configclass to allow validate dict with key is not a string.
14+
15+
Added
16+
^^^^^
17+
18+
* Added optional episode metadata (ep_meta) to be stored in the HDF5 data attributes.
19+
* Added option to record data pre-physics step.
20+
* Added joint_target data to episode data. Joint target data can be optionally recorded by the user and replayed to improve
21+
determinism of replay.
22+
23+
24+
0.45.10 (2025-09-02)
25+
~~~~~~~~~~~~~~~~~~~
26+
27+
Fixed
28+
^^^^^
29+
1030
* Fixed regression in reach task configuration where the gripper command was being returned.
1131
* Added :attr:`~isaaclab.devices.Se3GamepadCfg.gripper_term` to :class:`~isaaclab.devices.Se3GamepadCfg`
1232
to control whether the gamepad device should return a gripper command.

source/isaaclab/isaaclab/envs/manager_based_rl_env.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
188188
self.scene.write_data_to_sim()
189189
# simulate
190190
self.sim.step(render=False)
191+
self.recorder_manager.record_post_physics_decimation_step()
191192
# render between steps only if the GUI or an RTX sensor needs it
192193
# note: we assume the render interval to be the shortest accepted rendering interval.
193194
# If a camera needs rendering at a faster frequency, this will lead to unexpected behavior.

source/isaaclab/isaaclab/managers/recorder_manager.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,15 @@ def record_post_step(self) -> tuple[str | None, torch.Tensor | dict | None]:
123123
"""
124124
return None, None
125125

126+
def record_post_physics_decimation_step(self) -> tuple[str | None, torch.Tensor | dict | None]:
127+
"""Record data after the physics step is executed in the decimation loop.
128+
129+
Returns:
130+
A tuple of key and value to be recorded.
131+
Please refer to the `record_pre_reset` function for more details.
132+
"""
133+
return None, None
134+
126135

127136
class RecorderManager(ManagerBase):
128137
"""Manager for recording data from recorder terms."""
@@ -362,6 +371,16 @@ def record_post_step(self) -> None:
362371
key, value = term.record_post_step()
363372
self.add_to_episodes(key, value)
364373

374+
def record_post_physics_decimation_step(self) -> None:
375+
"""Trigger recorder terms for post-physics step functions in the decimation loop."""
376+
# Do nothing if no active recorder terms are provided
377+
if len(self.active_terms) == 0:
378+
return
379+
380+
for term in self._terms.values():
381+
key, value = term.record_post_physics_decimation_step()
382+
self.add_to_episodes(key, value)
383+
365384
def record_pre_reset(self, env_ids: Sequence[int] | None, force_export_or_skip=None) -> None:
366385
"""Trigger recorder terms for pre-reset functions.
367386
@@ -406,6 +425,23 @@ def record_post_reset(self, env_ids: Sequence[int] | None) -> None:
406425
key, value = term.record_post_reset(env_ids)
407426
self.add_to_episodes(key, value, env_ids)
408427

428+
def get_ep_meta(self) -> dict:
429+
"""Get the episode metadata."""
430+
if not hasattr(self._env.cfg, "get_ep_meta"):
431+
# Add basic episode metadata
432+
ep_meta = dict()
433+
ep_meta["sim_args"] = {
434+
"dt": self._env.cfg.sim.dt,
435+
"decimation": self._env.cfg.decimation,
436+
"render_interval": self._env.cfg.sim.render_interval,
437+
"num_envs": self._env.cfg.scene.num_envs,
438+
}
439+
return ep_meta
440+
441+
# Add custom episode metadata if available
442+
ep_meta = self._env.cfg.get_ep_meta()
443+
return ep_meta
444+
409445
def export_episodes(self, env_ids: Sequence[int] | None = None) -> None:
410446
"""Concludes and exports the episodes for the given environment ids.
411447
@@ -424,8 +460,18 @@ def export_episodes(self, env_ids: Sequence[int] | None = None) -> None:
424460

425461
# Export episode data through dataset exporter
426462
need_to_flush = False
463+
464+
if any(env_id in self._episodes and not self._episodes[env_id].is_empty() for env_id in env_ids):
465+
ep_meta = self.get_ep_meta()
466+
if self._dataset_file_handler is not None:
467+
self._dataset_file_handler.add_env_args(ep_meta)
468+
if self._failed_episode_dataset_file_handler is not None:
469+
self._failed_episode_dataset_file_handler.add_env_args(ep_meta)
470+
427471
for env_id in env_ids:
428472
if env_id in self._episodes and not self._episodes[env_id].is_empty():
473+
self._episodes[env_id].pre_export()
474+
429475
episode_succeeded = self._episodes[env_id].success
430476
target_dataset_file_handler = None
431477
if (self.cfg.dataset_export_mode == DatasetExportMode.EXPORT_ALL) or (

source/isaaclab/isaaclab/utils/configclass.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,11 @@ def _validate(obj: object, prefix: str = "") -> list[str]:
268268
missing_fields.extend(_validate(item, prefix=current_path))
269269
return missing_fields
270270
elif isinstance(obj, dict):
271-
obj_dict = obj
271+
# Convert any non-string keys to strings to allow validation of dict with non-string keys
272+
if any(not isinstance(key, str) for key in obj.keys()):
273+
obj_dict = {str(key): value for key, value in obj.items()}
274+
else:
275+
obj_dict = obj
272276
elif hasattr(obj, "__dict__"):
273277
obj_dict = obj.__dict__
274278
else:

source/isaaclab/isaaclab/utils/datasets/episode_data.py

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def __init__(self) -> None:
2121
self._data = dict()
2222
self._next_action_index = 0
2323
self._next_state_index = 0
24+
self._next_joint_target_index = 0
2425
self._seed = None
2526
self._env_id = None
2627
self._success = None
@@ -110,12 +111,11 @@ def add(self, key: str, value: torch.Tensor | dict):
110111
for sub_key_index in range(len(sub_keys)):
111112
if sub_key_index == len(sub_keys) - 1:
112113
# Add value to the final dict layer
114+
# Use lists to prevent slow tensor copy during concatenation
113115
if sub_keys[sub_key_index] not in current_dataset_pointer:
114-
current_dataset_pointer[sub_keys[sub_key_index]] = value.unsqueeze(0).clone()
116+
current_dataset_pointer[sub_keys[sub_key_index]] = [value.clone()]
115117
else:
116-
current_dataset_pointer[sub_keys[sub_key_index]] = torch.cat(
117-
(current_dataset_pointer[sub_keys[sub_key_index]], value.unsqueeze(0))
118-
)
118+
current_dataset_pointer[sub_keys[sub_key_index]].append(value.clone())
119119
break
120120
# key index
121121
if sub_keys[sub_key_index] not in current_dataset_pointer:
@@ -160,7 +160,7 @@ def get_state_helper(states, state_index) -> dict | torch.Tensor | None:
160160
elif isinstance(states, torch.Tensor):
161161
if state_index >= len(states):
162162
return None
163-
output_state = states[state_index]
163+
output_state = states[state_index, None]
164164
else:
165165
raise ValueError(f"Invalid state type: {type(states)}")
166166
return output_state
@@ -174,3 +174,47 @@ def get_next_state(self) -> dict | None:
174174
if state is not None:
175175
self._next_state_index += 1
176176
return state
177+
178+
def get_joint_target(self, joint_target_index) -> dict | torch.Tensor | None:
179+
"""Get the joint target of the specified index from the dataset."""
180+
if "joint_targets" not in self._data:
181+
return None
182+
183+
joint_targets = self._data["joint_targets"]
184+
185+
def get_joint_target_helper(joint_targets, joint_target_index) -> dict | torch.Tensor | None:
186+
if isinstance(joint_targets, dict):
187+
output_joint_targets = dict()
188+
for key, value in joint_targets.items():
189+
output_joint_targets[key] = get_joint_target_helper(value, joint_target_index)
190+
if output_joint_targets[key] is None:
191+
return None
192+
elif isinstance(joint_targets, torch.Tensor):
193+
if joint_target_index >= len(joint_targets):
194+
return None
195+
output_joint_targets = joint_targets[joint_target_index]
196+
else:
197+
raise ValueError(f"Invalid joint target type: {type(joint_targets)}")
198+
return output_joint_targets
199+
200+
output_joint_targets = get_joint_target_helper(joint_targets, joint_target_index)
201+
return output_joint_targets
202+
203+
def get_next_joint_target(self) -> dict | torch.Tensor | None:
204+
"""Get the next joint target from the dataset."""
205+
joint_target = self.get_joint_target(self._next_joint_target_index)
206+
if joint_target is not None:
207+
self._next_joint_target_index += 1
208+
return joint_target
209+
210+
def pre_export(self):
211+
"""Prepare data for export by converting lists to tensors."""
212+
213+
def pre_export_helper(data):
214+
for key, value in data.items():
215+
if isinstance(value, list):
216+
data[key] = torch.stack(value)
217+
elif isinstance(value, dict):
218+
pre_export_helper(value)
219+
220+
pre_export_helper(self._data)

source/isaaclab/test/managers/test_recorder_manager.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,28 @@ class DummyStepRecorderTermCfg(RecorderTermCfg):
7878
dataset_export_mode = DatasetExportMode.EXPORT_ALL
7979

8080

81+
@configclass
82+
class DummyEnvCfg:
83+
"""Dummy environment configuration."""
84+
85+
@configclass
86+
class DummySimCfg:
87+
"""Configuration for the dummy sim."""
88+
89+
dt = 0.01
90+
render_interval = 1
91+
92+
@configclass
93+
class DummySceneCfg:
94+
"""Configuration for the dummy scene."""
95+
96+
num_envs = 1
97+
98+
decimation = 1
99+
sim = DummySimCfg()
100+
scene = DummySceneCfg()
101+
102+
81103
def create_dummy_env(device: str = "cpu") -> ManagerBasedEnv:
82104
"""Create a dummy environment."""
83105

@@ -86,8 +108,10 @@ class DummyTerminationManager:
86108

87109
dummy_termination_manager = DummyTerminationManager()
88110
sim = SimulationContext()
111+
dummy_cfg = DummyEnvCfg()
112+
89113
return namedtuple("ManagerBasedEnv", ["num_envs", "device", "sim", "cfg", "termination_manager"])(
90-
20, device, sim, dict(), dummy_termination_manager
114+
20, device, sim, dummy_cfg, dummy_termination_manager
91115
)
92116

93117

@@ -142,8 +166,8 @@ def test_record(dataset_dir):
142166
# check the recorded data
143167
for env_id in range(env.num_envs):
144168
episode = recorder_manager.get_episode(env_id)
145-
assert episode.data["record_pre_step"].shape == (2, 4)
146-
assert episode.data["record_post_step"].shape == (2, 5)
169+
assert torch.stack(episode.data["record_pre_step"]).shape == (2, 4)
170+
assert torch.stack(episode.data["record_post_step"]).shape == (2, 5)
147171

148172
# Trigger pre-reset callbacks which then export and clean the episode data
149173
recorder_manager.record_pre_reset(env_ids=None)
@@ -154,4 +178,4 @@ def test_record(dataset_dir):
154178
recorder_manager.record_post_reset(env_ids=None)
155179
for env_id in range(env.num_envs):
156180
episode = recorder_manager.get_episode(env_id)
157-
assert episode.data["record_post_reset"].shape == (1, 3)
181+
assert torch.stack(episode.data["record_post_reset"]).shape == (1, 3)

source/isaaclab/test/utils/test_episode_data.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,29 +38,29 @@ def test_add_tensors(device):
3838

3939
# test adding data to a key that does not exist
4040
episode.add("key", dummy_data_0)
41-
key_data = episode.data.get("key")
41+
key_data = torch.stack(episode.data.get("key"))
4242
assert key_data is not None
4343
assert torch.equal(key_data, dummy_data_0.unsqueeze(0))
4444

4545
# test adding data to a key that exists
4646
episode.add("key", dummy_data_1)
47-
key_data = episode.data.get("key")
47+
key_data = torch.stack(episode.data.get("key"))
4848
assert key_data is not None
4949
assert torch.equal(key_data, expected_added_data)
5050

5151
# test adding data to a key with "/" in the name
5252
episode.add("first/second", dummy_data_0)
5353
first_data = episode.data.get("first")
5454
assert first_data is not None
55-
second_data = first_data.get("second")
55+
second_data = torch.stack(first_data.get("second"))
5656
assert second_data is not None
5757
assert torch.equal(second_data, dummy_data_0.unsqueeze(0))
5858

5959
# test adding data to a key with "/" in the name that already exists
6060
episode.add("first/second", dummy_data_1)
6161
first_data = episode.data.get("first")
6262
assert first_data is not None
63-
second_data = first_data.get("second")
63+
second_data = torch.stack(first_data.get("second"))
6464
assert second_data is not None
6565
assert torch.equal(second_data, expected_added_data)
6666

@@ -83,31 +83,31 @@ def test_add_dict_tensors(device):
8383
episode.add("key", dummy_dict_data_0)
8484
key_data = episode.data.get("key")
8585
assert key_data is not None
86-
key_0_data = key_data.get("key_0")
86+
key_0_data = torch.stack(key_data.get("key_0"))
8787
assert key_0_data is not None
8888
assert torch.equal(key_0_data, torch.tensor([[0]], device=device))
8989
key_1_data = key_data.get("key_1")
9090
assert key_1_data is not None
91-
key_1_0_data = key_1_data.get("key_1_0")
91+
key_1_0_data = torch.stack(key_1_data.get("key_1_0"))
9292
assert key_1_0_data is not None
9393
assert torch.equal(key_1_0_data, torch.tensor([[1]], device=device))
94-
key_1_1_data = key_1_data.get("key_1_1")
94+
key_1_1_data = torch.stack(key_1_data.get("key_1_1"))
9595
assert key_1_1_data is not None
9696
assert torch.equal(key_1_1_data, torch.tensor([[2]], device=device))
9797

9898
# test adding dict data to a key that exists
9999
episode.add("key", dummy_dict_data_1)
100100
key_data = episode.data.get("key")
101101
assert key_data is not None
102-
key_0_data = key_data.get("key_0")
102+
key_0_data = torch.stack(key_data.get("key_0"))
103103
assert key_0_data is not None
104104
assert torch.equal(key_0_data, torch.tensor([[0], [3]], device=device))
105105
key_1_data = key_data.get("key_1")
106106
assert key_1_data is not None
107-
key_1_0_data = key_1_data.get("key_1_0")
107+
key_1_0_data = torch.stack(key_1_data.get("key_1_0"))
108108
assert key_1_0_data is not None
109109
assert torch.equal(key_1_0_data, torch.tensor([[1], [4]], device=device))
110-
key_1_1_data = key_1_data.get("key_1_1")
110+
key_1_1_data = torch.stack(key_1_data.get("key_1_1"))
111111
assert key_1_1_data is not None
112112
assert torch.equal(key_1_1_data, torch.tensor([[2], [5]], device=device))
113113

@@ -119,7 +119,7 @@ def test_get_initial_state(device):
119119
episode = EpisodeData()
120120

121121
episode.add("initial_state", dummy_initial_state)
122-
initial_state = episode.get_initial_state()
122+
initial_state = torch.stack(episode.get_initial_state())
123123
assert initial_state is not None
124124
assert torch.equal(initial_state, dummy_initial_state.unsqueeze(0))
125125

source/isaaclab/test/utils/test_hdf5_dataset_file_handler.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def test_write_and_load_episode(temp_dir, device):
8282
test_episode = create_test_episode(device)
8383

8484
# write the episode to the dataset
85+
test_episode.pre_export()
8586
dataset_file_handler.write_episode(test_episode)
8687
dataset_file_handler.flush()
8788

0 commit comments

Comments
 (0)