Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions agentevolver/module/trainer/ae_ray_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,11 +197,11 @@ def compute_grpo_outcome_advantage(
id2score[index[i]].append(scores[i])
for idx in id2score:
if len(id2score[idx]) == 1:
id2mean[idx] = torch.tensor(0.0)
id2std[idx] = torch.tensor(1.0)
id2mean[idx] = torch.tensor(0.0, device=scores.device)
id2std[idx] = torch.tensor(1.0, device=scores.device)
elif len(id2score[idx]) > 1:
id2mean[idx] = torch.mean(torch.tensor(id2score[idx]))
id2std[idx] = torch.std(torch.tensor([id2score[idx]]))
id2mean[idx] = torch.mean(torch.tensor(id2score[idx], device=scores.device))
id2std[idx] = torch.std(torch.tensor(id2score[idx], device=scores.device))
else:
raise ValueError(f"no score in prompt index: {idx}")
for i in range(bsz):
Expand Down