|
40 | 40 | (COMM)->c_coll->coll_ ## COLL ## _module = (FALLBACKS).COLL.module; \ |
41 | 41 | } while(0) |
42 | 42 |
|
| 43 | +#define HAN_SUBCOM_EXTRA_RETAIN(COMM, PARENT_COMM) \ |
| 44 | + do \ |
| 45 | + { \ |
| 46 | + if (OMPI_COMM_CID_IS_LOWER(COMM, PARENT_COMM)) { \ |
| 47 | + OMPI_COMM_SET_EXTRA_RETAIN(COMM); \ |
| 48 | + OBJ_RETAIN(COMM); \ |
| 49 | + } \ |
| 50 | + } while (0) |
| 51 | + |
43 | 52 | /* |
44 | 53 | * Routine that creates the local hierarchical sub-communicators |
45 | 54 | * Called each time a collective is called. |
@@ -184,6 +193,11 @@ int mca_coll_han_comm_create_new(struct ompi_communicator_t *comm, |
184 | 193 | HAN_SUBCOM_LOAD_COLLECTIVE(fallbacks, comm, han_module, scatter); |
185 | 194 |
|
186 | 195 | OBJ_DESTRUCT(&comm_info); |
| 196 | + |
| 197 | + /* Ensure these communicators aren't released before the parent comm */ |
| 198 | + HAN_SUBCOM_EXTRA_RETAIN(*low_comm, comm); |
| 199 | + HAN_SUBCOM_EXTRA_RETAIN(*up_comm, comm); |
| 200 | + |
187 | 201 | return OMPI_SUCCESS; |
188 | 202 |
|
189 | 203 | return_with_error: |
@@ -338,6 +352,14 @@ int mca_coll_han_comm_create(struct ompi_communicator_t *comm, |
338 | 352 | han_module->cached_up_comms = up_comms; |
339 | 353 | han_module->cached_vranks = vranks; |
340 | 354 |
|
| 355 | + /* Ensure these communicators aren't released before the parent comm */ |
| 356 | + for(int i = 0; i < COLL_HAN_LOW_MODULES; i++) { |
| 357 | + HAN_SUBCOM_EXTRA_RETAIN(low_comms[i], comm); |
| 358 | + } |
| 359 | + for(int i = 0; i < COLL_HAN_UP_MODULES; i++) { |
| 360 | + HAN_SUBCOM_EXTRA_RETAIN(up_comms[i], comm); |
| 361 | + } |
| 362 | + |
341 | 363 | /* Reset the saved collectives to point back to HAN */ |
342 | 364 | HAN_SUBCOM_LOAD_COLLECTIVE(fallbacks, comm, han_module, allgatherv); |
343 | 365 | HAN_SUBCOM_LOAD_COLLECTIVE(fallbacks, comm, han_module, allgather); |
|
0 commit comments