From 2ac6e66cc8847ddb0745f7c47361a8ade5f0a7be Mon Sep 17 00:00:00 2001 From: Zilin Zhu Date: Wed, 27 May 2026 12:11:10 +0000 Subject: [PATCH] Don't use sample.index as default rollout_id --- slime/ray/rollout.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/slime/ray/rollout.py b/slime/ray/rollout.py index 69c786cf8..4998c8232 100644 --- a/slime/ray/rollout.py +++ b/slime/ray/rollout.py @@ -665,6 +665,16 @@ def _convert_samples_to_train_data(self, samples: list[Sample] | list[list[Sampl assert len(raw_rewards) == len(samples) assert len(rewards) == len(samples) + # Rollout id (one per rollout execution). Default rollouts emit one + # sample per rollout, so we fall back to ``sample.index`` (unique). + # Compact / subagent paths that emit multiple training samples per + # rollout set ``rollout_id`` explicitly so all siblings share a + # value; the loss reducer then aggregates them as one rollout. + if samples[0].rollout_id is None: + rollout_ids = list(range(len(samples))) + else: + rollout_ids = [sample.rollout_id for sample in samples] + train_data = { "tokens": [sample.tokens for sample in samples], "response_lengths": [sample.response_length for sample in samples], @@ -674,12 +684,7 @@ def _convert_samples_to_train_data(self, samples: list[Sample] | list[list[Sampl "raw_reward": raw_rewards, "truncated": [1 if sample.status == Sample.Status.TRUNCATED else 0 for sample in samples], "sample_indices": [sample.index for sample in samples], - # Rollout id (one per rollout execution). Default rollouts emit one - # sample per rollout, so we fall back to ``sample.index`` (unique). - # Compact / subagent paths that emit multiple training samples per - # rollout set ``rollout_id`` explicitly so all siblings share a - # value; the loss reducer then aggregates them as one rollout. - "rollout_ids": [s.rollout_id if s.rollout_id is not None else s.index for s in samples], + "rollout_ids": rollout_ids, } # loss mask