@@ -44,13 +44,15 @@ public long writerId() {
44
44
return writerId ;
45
45
}
46
46
47
- public void append (LogRecordBatch batch , boolean isWriterInBatchExpired ) {
47
+ public void append (
48
+ LogRecordBatch batch , boolean isWriterInBatchExpired , boolean isAppendAsLeader ) {
48
49
LogOffsetMetadata firstOffsetMetadata = new LogOffsetMetadata (batch .baseLogOffset ());
49
50
appendDataBatch (
50
51
batch .batchSequence (),
51
52
firstOffsetMetadata ,
52
53
batch .lastLogOffset (),
53
54
isWriterInBatchExpired ,
55
+ isAppendAsLeader ,
54
56
batch .commitTimestamp ());
55
57
}
56
58
@@ -59,8 +61,9 @@ public void appendDataBatch(
59
61
LogOffsetMetadata firstOffsetMetadata ,
60
62
long lastOffset ,
61
63
boolean isWriterInBatchExpired ,
64
+ boolean isAppendAsLeader ,
62
65
long batchTimestamp ) {
63
- maybeValidateDataBatch (batchSequence , isWriterInBatchExpired , lastOffset );
66
+ maybeValidateDataBatch (batchSequence , isWriterInBatchExpired , lastOffset , isAppendAsLeader );
64
67
updatedEntry .addBath (
65
68
batchSequence ,
66
69
lastOffset ,
@@ -69,13 +72,16 @@ public void appendDataBatch(
69
72
}
70
73
71
74
private void maybeValidateDataBatch (
72
- int appendFirstSeq , boolean isWriterInBatchExpired , long lastOffset ) {
75
+ int appendFirstSeq ,
76
+ boolean isWriterInBatchExpired ,
77
+ long lastOffset ,
78
+ boolean isAppendAsLeader ) {
73
79
int currentLastSeq =
74
80
!updatedEntry .isEmpty ()
75
81
? updatedEntry .lastBatchSequence ()
76
82
: currentEntry .lastBatchSequence ();
77
83
// must be in sequence, even for the first batch should start from 0
78
- if (!inSequence (currentLastSeq , appendFirstSeq , isWriterInBatchExpired )) {
84
+ if (!inSequence (currentLastSeq , appendFirstSeq , isWriterInBatchExpired , isAppendAsLeader )) {
79
85
throw new OutOfOrderSequenceException (
80
86
String .format (
81
87
"Out of order batch sequence for writer %s at offset %s in "
@@ -93,16 +99,52 @@ public WriterStateEntry toEntry() {
93
99
* three scenarios will be judged as in sequence:
94
100
*
95
101
* <ul>
96
- * <li>If lastBatchSeq equals NO_BATCH_SEQUENCE, we need to check whether the committed
97
- * timestamp of the next batch under the current writerId has expired. If it has expired,
98
- * we consider this a special case caused by writerId expiration, for this case, to ensure
99
- * the correctness of follower sync, we still treat it as in sequence.
102
+ * <li>If lastBatchSeq equals NO_BATCH_SEQUENCE, the following two scenarios will be judged as
103
+ * in sequence:
104
+ * <ul>
105
+ * <li>If the committed timestamp of the next batch under the current writerId has
106
+ * expired, we consider this a special case caused by writerId expiration, for this
107
+ * case, to ensure the correctness of follower sync, we still treat it as in
108
+ * sequence.
109
+ * <li>If the append request is from the follower, we consider this is a special case
110
+ * caused by inconsistent expiration of writerId between the leader and follower. To
111
+ * prevent continuous fetch failures on the follower side, we still treat it as in
112
+ * sequence. Here is a detailed example: The expiration of a writer is triggered
113
+ * asynchronously by the {@code PeriodicWriterIdExpirationCheck} thread at intervals
114
+ * defined by {@code server.writer-id.expiration-check-interval}, which can result
115
+ * in slight differences in the actual expiration times of the same writer on the
116
+ * leader replica and follower replicas. This slight difference leads to a dreadful
117
+ * corner case. Imagine the following scenario(set {@code
118
+ * server.writer-id.expiration-check-interval}: 10min, {@code
119
+ * server.writer-id.expiration-time}: 12h):
120
+ * <pre>{@code
121
+ * Step Time Action of Leader Action of Follower
122
+ * 1 00:03:38 receive batch 0 of writer 101
123
+ * 2 00:03:38 fetch batch 0 of writer 101
124
+ * 3 12:05:00 remove state of writer 101
125
+ * 4 12:10:02 receive batch 1 of writer 101
126
+ * 5 12:10:02 fetch batch 0 of writer 101
127
+ * 6 12:11:00 remove state of writer 101
128
+ * }</pre>
129
+ * In step 3, the follower removes the state of writer 101 first, since it has been
130
+ * more than 12 hours since writer 101's last batch write, making it safe to remove.
131
+ * However, since the expiration of writer 101 has not yet occurred on the leader,
132
+ * and a new batch 1 is received at this time, it is successfully written on the
133
+ * leader. At this point, the fetcher pulls batch 1 from the leader, but since the
134
+ * state of writer 101 has already been cleaned up, an {@link
135
+ * OutOfOrderSequenceException} will occur during to write if we don't treat it as
136
+ * in sequence.
137
+ * </ul>
100
138
* <li>nextBatchSeq == lastBatchSeq + 1L
101
139
* <li>lastBatchSeq reaches its maximum value
102
140
* </ul>
103
141
*/
104
- private boolean inSequence (int lastBatchSeq , int nextBatchSeq , boolean isWriterInBatchExpired ) {
105
- return (lastBatchSeq == NO_BATCH_SEQUENCE && isWriterInBatchExpired )
142
+ private boolean inSequence (
143
+ int lastBatchSeq ,
144
+ int nextBatchSeq ,
145
+ boolean isWriterInBatchExpired ,
146
+ boolean isAppendAsLeader ) {
147
+ return (lastBatchSeq == NO_BATCH_SEQUENCE && (isWriterInBatchExpired || !isAppendAsLeader ))
106
148
|| nextBatchSeq == lastBatchSeq + 1L
107
149
|| (nextBatchSeq == 0 && lastBatchSeq == Integer .MAX_VALUE );
108
150
}
0 commit comments