Skip to content

Commit b7c1ab8

Browse files
committed
Enhancement Request 37422329 - [36856306->25.03] Even when snapshot creation fails, the snapshot is still visible in the snapshots list (merge main -> ce/main @ 113056)
[git-p4: depot-paths = "//dev/coherence-ce/main/": change = 113069]
1 parent 613f547 commit b7c1ab8

File tree

7 files changed

+704
-11
lines changed

7 files changed

+704
-11
lines changed

prj/coherence-core-components/src/main/java/com/tangosol/coherence/component/util/daemon/queueProcessor/service/grid/PartitionedService.java

Lines changed: 232 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28251,6 +28251,19 @@ public static class SnapshotController
2825128251
*/
2825228252
private volatile long __m_SnapshotNamesExpire;
2825328253

28254+
/**
28255+
* A cache of failed snapshot names held for at most SNAPSHOT_NAMES_EXPIRE
28256+
* millis after a SnapshotListRequest.
28257+
*/
28258+
private String[] __m_SnapshotFailures;
28259+
28260+
/**
28261+
* An absolute time when SnapshotFailures should no longer be used.
28262+
*
28263+
* @volatile
28264+
*/
28265+
private volatile long __m_SnapshotFailuresExpire;
28266+
2825428267
/**
2825528268
* Property State
2825628269
*
@@ -28849,6 +28862,26 @@ protected long getSnapshotNamesExpire()
2884928862
return __m_SnapshotNamesExpire;
2885028863
}
2885128864

28865+
/**
28866+
* Getter for property SnapshotFailures.<p>
28867+
* A cache of snapshot names held for at most SNAPSHOT_NAMES_EXPIRE
28868+
* millis after a SnapshotListRequest.
28869+
*/
28870+
protected String[] getSnapshotFailures()
28871+
{
28872+
return __m_SnapshotFailures;
28873+
}
28874+
28875+
/**
28876+
* An absolute time when SnapshotFailures should no longer be used.
28877+
*
28878+
* @volatile
28879+
*/
28880+
protected long getSnapshotFailuresExpire()
28881+
{
28882+
return __m_SnapshotFailuresExpire;
28883+
}
28884+
2885228885
// From interface: com.tangosol.persistence.PersistenceManagerMBean
2885328886
/**
2885428887
* Return a list of snapshots that are available for recovery.
@@ -28888,6 +28921,41 @@ public String[] getSnapshots()
2888828921
return asNames;
2888928922
}
2889028923

28924+
/**
28925+
* Return the list of failed snapshots.
28926+
*
28927+
* @return the list of failed snapshots
28928+
*/
28929+
public String[] listFailedSnapshots()
28930+
{
28931+
PartitionedService service = getService();
28932+
if (service == null || !service.isRunning())
28933+
{
28934+
return null;
28935+
}
28936+
28937+
if (Base.getSafeTimeMillis() < getSnapshotFailuresExpire() ||
28938+
Thread.currentThread() == service.getThread())
28939+
{
28940+
// use the cached value if within the allowed staleness or if being asked
28941+
// on the service thread, as it requires a poll to all storage-enabled members.
28942+
// This question could only be asked meaningfully on the Management thread
28943+
// but could be called during serialization of the model for initial
28944+
// registration - see Registry.registerLocalModel()
28945+
return getSnapshotFailures();
28946+
}
28947+
28948+
PartitionedService.SnapshotListRequest msgRequest = (PartitionedService.SnapshotListRequest)
28949+
service.instantiateMessage("SnapshotListRequest");
28950+
msgRequest.setToMemberSet(service.getOwnershipMemberSet());
28951+
msgRequest.setSnapshotName(null);
28952+
msgRequest.setFailed(true); // this ensures we get list of failed snapshots
28953+
28954+
String[] asNames = (String[]) service.poll(msgRequest);
28955+
setSnapshotFailures(asNames);
28956+
return asNames;
28957+
}
28958+
2889128959
/**
2889228960
* Return a Map<Integer, String[]> where the key is the member id
2889328961
* and the value is the list of stores that are known by all members
@@ -29170,17 +29238,21 @@ public void onRecoveryCompleted(String sSnapshot, com.tangosol.net.partition.Par
2917029238

2917129239
String sMessage, sUserData;
2917229240

29241+
PersistenceEnvironment env = SafePersistenceWrappers.unwrap(
29242+
getService().getPersistenceControl().getPersistenceEnvironment());
2917329243
if (partsFailed == null || partsFailed.isEmpty())
2917429244
{
2917529245
sMessage = "Successfully recovered snapshot \"" + sSnapshot + '"';
2917629246
sUserData = "";
2917729247
_trace(sMessage, 3);
29248+
CachePersistenceHelper.recordRecoveryStatus(env, sSnapshot, true, null);
2917829249
}
2917929250
else
2918029251
{
2918129252
sMessage = "Failed to recover snapshot \"" + sSnapshot + '"';
2918229253
sUserData = "failed partitions " + partsFailed;
2918329254
_trace(sMessage + " because of " + sUserData, 2);
29255+
CachePersistenceHelper.recordRecoveryStatus(env, sSnapshot, false, sUserData);
2918429256
}
2918529257

2918629258
// resume the service if it was previously suspended
@@ -29433,6 +29505,48 @@ public void removeSnapshot(String sSnapshot)
2943329505
addNotification(PersistenceManagerMBean.REMOVE_SNAPSHOT_BEGIN, getOperationStatus(), "");
2943429506
}
2943529507

29508+
/**
29509+
* Return snapshot status.
29510+
*
29511+
* @param sName the snapshot name
29512+
*
29513+
* @return the snapshot status, or null if it is not possible
29514+
* to obtain the status.
29515+
*/
29516+
public synchronized String getSnapshotStatus(String sName)
29517+
{
29518+
String currentSnapshotName = getSnapshotName();
29519+
if (sName.equals(currentSnapshotName))
29520+
{
29521+
return getOperationStatus();
29522+
}
29523+
29524+
return CachePersistenceHelper.getSnapshotStatus(
29525+
SafePersistenceWrappers.unwrap(getService().getPersistenceControl().getPersistenceEnvironment()),
29526+
sName);
29527+
}
29528+
29529+
/**
29530+
* Return snapshot recovery status.
29531+
*
29532+
* @param sName the snapshot name
29533+
*
29534+
* @return the snapshot recovery status, or null if it is not
29535+
* possible to obtain the status.
29536+
*/
29537+
public synchronized String getSnapshotRecoveryStatus(String sName)
29538+
{
29539+
String currentSnapshotName = getSnapshotName();
29540+
if (sName.equals(currentSnapshotName))
29541+
{
29542+
return getOperationStatus();
29543+
}
29544+
29545+
return CachePersistenceHelper.getSnapshotRecoveryStatus(
29546+
SafePersistenceWrappers.unwrap(getService().getPersistenceControl().getPersistenceEnvironment()),
29547+
sName);
29548+
}
29549+
2943629550
/**
2943729551
* Reset the state of the SnapshotController after an operation has
2943829552
* completed.
@@ -29628,6 +29742,32 @@ protected void setSnapshotNamesExpire(long ldtExpire)
2962829742
{
2962929743
__m_SnapshotNamesExpire = ldtExpire;
2963029744
}
29745+
29746+
// Accessor for the property "SnapshotFailures"
29747+
/**
29748+
* Setter for property SnapshotFailures.<p>
29749+
* A cache of snapshot names held for at most SNAPSHOT_NAMES_EXPIRE
29750+
* millis after a SnapshotListRequest.
29751+
*/
29752+
public void setSnapshotFailures(String[] asNames)
29753+
{
29754+
// import com.tangosol.util.Base;
29755+
29756+
__m_SnapshotFailures = asNames;
29757+
29758+
setSnapshotFailuresExpire(asNames == null
29759+
? 0L : Base.getSafeTimeMillis() + SNAPSHOT_NAMES_EXPIRE);
29760+
}
29761+
29762+
/**
29763+
* An absolute time when SnapshotFailures should no longer be used.
29764+
*
29765+
* @volatile
29766+
*/
29767+
protected void setSnapshotFailuresExpire(long ldtExpire)
29768+
{
29769+
__m_SnapshotFailuresExpire = ldtExpire;
29770+
}
2963129771

2963229772
// Accessor for the property "State"
2963329773
/**
@@ -30820,15 +30960,26 @@ public static class SnapshotListRequest
3082030960
*/
3082130961
public static final String RESPONSE_STORES = "2";
3082230962

30963+
/**
30964+
* This value indicates the response was for a list of failed snapshots.
30965+
*/
30966+
public static final String RESPONSE_FAILED_SNAPSHOTS = "3";
30967+
3082330968
/**
3082430969
* Property SnapshotName
3082530970
*
3082630971
* If Snapshot is null this means that SnapshotListRequest should
30827-
* retrieve the snapshots for the service otherwise the stores for the
30972+
* retrieve the snapshots for the service (or failed snapshots if
30973+
* __m_failed is set to true) otherwise the stores for the
3082830974
* snapshot specified by SnapshotName should be returned.
3082930975
*/
3083030976
private String __m_SnapshotName;
3083130977
private static com.tangosol.util.ListMap __mapChildren;
30978+
30979+
/**
30980+
* If true request should retrieve list of failed snapshots.
30981+
*/
30982+
private boolean __m_failed;
3083230983

3083330984
// Static initializer
3083430985
static
@@ -30959,6 +31110,15 @@ public String getSnapshotName()
3095931110
return __m_SnapshotName;
3096031111
}
3096131112

31113+
/**
31114+
* Returns true if SnapshotListRequest should retrieve list
31115+
* of failed snapshots.
31116+
*/
31117+
public boolean getFailed()
31118+
{
31119+
return __m_failed;
31120+
}
31121+
3096231122
// Declared at the super level
3096331123
protected com.tangosol.coherence.component.net.Poll instantiatePoll()
3096431124
{
@@ -31010,8 +31170,16 @@ public void onReceived()
3101031170

3101131171
if (sSnapshotName == null)
3101231172
{
31013-
// respond with list of snapshots
31014-
msgResponse.setValue (new Object[] {RESPONSE_SNAPSHOTS, env.listSnapshots()});
31173+
if (getFailed())
31174+
{
31175+
String[] asFailedSnapshots = CachePersistenceHelper.getFailedSnapshots(env);
31176+
msgResponse.setValue (new Object[] {RESPONSE_FAILED_SNAPSHOTS, asFailedSnapshots});
31177+
}
31178+
else
31179+
{
31180+
// respond with list of snapshots
31181+
msgResponse.setValue (new Object[] {RESPONSE_SNAPSHOTS, env.listSnapshots()});
31182+
}
3101531183
}
3101631184
else
3101731185
{
@@ -31052,6 +31220,7 @@ public void read(com.tangosol.io.ReadBuffer.BufferInput input)
3105231220

3105331221
boolean fNull = input.readBoolean();
3105431222
setSnapshotName(fNull ? null : input.readUTF());
31223+
setFailed(fNull && input.readBoolean());
3105531224
}
3105631225

3105731226
// Accessor for the property "SnapshotName"
@@ -31066,6 +31235,15 @@ public void setSnapshotName(String sSnapshotName)
3106631235
__m_SnapshotName = sSnapshotName;
3106731236
}
3106831237

31238+
/**
31239+
* Set to true if SnapshotListRequest should retrieve list
31240+
* of failed snapshots.
31241+
*/
31242+
public void setFailed(boolean bFailed)
31243+
{
31244+
__m_failed = bFailed;
31245+
}
31246+
3106931247
// Declared at the super level
3107031248
public void write(com.tangosol.io.WriteBuffer.BufferOutput output)
3107131249
throws java.io.IOException
@@ -31078,6 +31256,10 @@ public void write(com.tangosol.io.WriteBuffer.BufferOutput output)
3107831256
{
3107931257
output.writeUTF(getSnapshotName());
3108031258
}
31259+
if (fNull)
31260+
{
31261+
output.writeBoolean(getFailed());
31262+
}
3108131263
}
3108231264

3108331265
// ---- class: com.tangosol.coherence.component.util.daemon.queueProcessor.service.grid.PartitionedService$SnapshotListRequest$Poll
@@ -31106,6 +31288,10 @@ public static class Poll
3110631288
*/
3110731289
private transient java.util.Set __m_Snapshots;
3110831290

31291+
/**
31292+
* The set of failed snapshot names returned by the SnapshotListRequests.
31293+
*/
31294+
private transient java.util.Set __m_FailedSnapshots;
3110931295
/**
3111031296
* Property Stores
3111131297
*
@@ -31205,6 +31391,15 @@ public java.util.Set getSnapshots()
3120531391
return __m_Snapshots;
3120631392
}
3120731393

31394+
/**
31395+
* The set of names of failed snapshots
31396+
* returned by the SnapshotListRequests.
31397+
*/
31398+
public java.util.Set getFailedSnapshots()
31399+
{
31400+
return __m_FailedSnapshots;
31401+
}
31402+
3120831403
// Accessor for the property "Stores"
3120931404
/**
3121031405
* Getter for property Stores.<p>
@@ -31229,13 +31424,18 @@ protected void onCompletion()
3122931424
// import java.util.Map;
3123031425
// import java.util.Set;
3123131426

31232-
Set setSnapshots = getSnapshots();
31233-
Map mapStores = getStores();
31427+
Set setSnapshots = getSnapshots();
31428+
Set setFailedSnapshots = getFailedSnapshots();
31429+
Map mapStores = getStores();
3123431430

3123531431
if (setSnapshots != null)
3123631432
{
3123731433
setResult(setSnapshots.toArray(new String[setSnapshots.size()]));
3123831434
}
31435+
else if (setFailedSnapshots != null)
31436+
{
31437+
setResult(setFailedSnapshots.toArray(new String[setFailedSnapshots.size()]));
31438+
}
3123931439
else
3124031440
{
3124131441
setResult(mapStores);
@@ -31280,6 +31480,25 @@ public void onResponse(com.tangosol.coherence.component.net.Message msg)
3128031480
setSnapshots.add(asSnapshots[i]);
3128131481
}
3128231482
}
31483+
else if (PartitionedService.SnapshotListRequest.RESPONSE_FAILED_SNAPSHOTS.equals(oResult[0]))
31484+
{
31485+
// list of snapshots was asked for
31486+
Set setFailedSnapshots = getFailedSnapshots();
31487+
if (setFailedSnapshots == null)
31488+
{
31489+
setFailedSnapshots = new TreeSet();
31490+
setFailedSnapshots(setFailedSnapshots);
31491+
}
31492+
31493+
Object[] asFailedSnapshots = (Object[]) oResult[1]; // logically String[] but POF widens it
31494+
if (asFailedSnapshots != null)
31495+
{
31496+
for (int i = 0, c = asFailedSnapshots.length; i < c; i++)
31497+
{
31498+
setFailedSnapshots.add(asFailedSnapshots[i]);
31499+
}
31500+
}
31501+
}
3128331502
else
3128431503
{
3128531504
// list of stores per snapshot was asked for from each member.
@@ -31317,6 +31536,14 @@ public void setSnapshots(java.util.Set setSnapshots)
3131731536
{
3131831537
__m_Snapshots = setSnapshots;
3131931538
}
31539+
31540+
/**
31541+
* Set the names of failed snapshots returned by the SnapshotListRequests.
31542+
*/
31543+
public void setFailedSnapshots(java.util.Set setFailedSnapshots)
31544+
{
31545+
__m_FailedSnapshots = setFailedSnapshots;
31546+
}
3132031547

3132131548
// Accessor for the property "Stores"
3132231549
/**

0 commit comments

Comments
 (0)