Skip to content

Commit 2840c6c

Browse files
committed
raft: send up-to-date commit index in heartbeats
TODO: describe why it is now safe Signed-off-by: Pavel Kalinnikov <[email protected]>
1 parent 9bffaa4 commit 2840c6c

File tree

5 files changed

+22
-29
lines changed

5 files changed

+22
-29
lines changed

raft.go

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -672,21 +672,12 @@ func (r *raft) maybeSendAppend(to uint64, sendIfEmpty bool) bool {
672672

673673
// sendHeartbeat sends a heartbeat RPC to the given peer.
674674
func (r *raft) sendHeartbeat(to uint64, ctx []byte) {
675-
// Attach the commit as min(to.matched, r.committed).
676-
// When the leader sends out heartbeat message,
677-
// the receiver(follower) might not be matched with the leader
678-
// or it might not have all the committed entries.
679-
// The leader MUST NOT forward the follower's commit to
680-
// an unmatched index.
681-
commit := min(r.trk.Progress[to].Match, r.raftLog.committed)
682-
m := pb.Message{
675+
r.send(pb.Message{
683676
To: to,
684677
Type: pb.MsgHeartbeat,
685-
Commit: commit,
678+
Commit: r.raftLog.committed,
686679
Context: ctx,
687-
}
688-
689-
r.send(m)
680+
})
690681
}
691682

692683
// bcastAppend sends RPC, with entries to all peers that are not up-to-date

testdata/async_storage_writes_append_aba_race.txt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -346,16 +346,16 @@ process-ready 4
346346
----
347347
Ready MustSync=false:
348348
Messages:
349-
4->1 MsgHeartbeat Term:3 Log:0/0
350-
4->2 MsgHeartbeat Term:3 Log:0/0
351-
4->3 MsgHeartbeat Term:3 Log:0/0
352-
4->5 MsgHeartbeat Term:3 Log:0/0
353-
4->6 MsgHeartbeat Term:3 Log:0/0
354-
4->7 MsgHeartbeat Term:3 Log:0/0
349+
4->1 MsgHeartbeat Term:3 Log:0/0 Commit:11
350+
4->2 MsgHeartbeat Term:3 Log:0/0 Commit:11
351+
4->3 MsgHeartbeat Term:3 Log:0/0 Commit:11
352+
4->5 MsgHeartbeat Term:3 Log:0/0 Commit:11
353+
4->6 MsgHeartbeat Term:3 Log:0/0 Commit:11
354+
4->7 MsgHeartbeat Term:3 Log:0/0 Commit:11
355355

356356
deliver-msgs 1
357357
----
358-
4->1 MsgHeartbeat Term:3 Log:0/0
358+
4->1 MsgHeartbeat Term:3 Log:0/0 Commit:11
359359
INFO 1 [term: 2] received a MsgHeartbeat message with higher term from 4 [term: 3]
360360
INFO 1 became follower at term 3
361361

testdata/lagging_commit.txt

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ process-ready 1
134134
Ready MustSync=false:
135135
Messages:
136136
1->2 MsgHeartbeat Term:1 Log:0/0 Commit:13
137-
1->3 MsgHeartbeat Term:1 Log:0/0 Commit:11
137+
1->3 MsgHeartbeat Term:1 Log:0/0 Commit:13
138138

139139
# Since the heartbeat message does not bump the follower's commit index, it will
140140
# take another roundtrip with the leader to update it. As such, the total time
@@ -146,12 +146,18 @@ Messages:
146146
# leader sent the up-to-date commit index in the heartbeat message.
147147
#
148148
# See https://github.com/etcd-io/raft/issues/138 which aims to fix this.
149+
#
150+
# Now this is fixed!
149151
stabilize 1 3
150152
----
151153
> 3 receiving messages
152-
1->3 MsgHeartbeat Term:1 Log:0/0 Commit:11
154+
1->3 MsgHeartbeat Term:1 Log:0/0 Commit:13
153155
> 3 handling Ready
154156
Ready MustSync=false:
157+
HardState Term:1 Vote:1 Commit:13
158+
CommittedEntries:
159+
1/12 EntryNormal "data1"
160+
1/13 EntryNormal "data2"
155161
Messages:
156162
3->1 MsgHeartbeatResp Term:1 Log:0/0
157163
> 1 receiving messages
@@ -164,10 +170,6 @@ stabilize 1 3
164170
1->3 MsgApp Term:1 Log:1/13 Commit:13
165171
> 3 handling Ready
166172
Ready MustSync=false:
167-
HardState Term:1 Vote:1 Commit:13
168-
CommittedEntries:
169-
1/12 EntryNormal "data1"
170-
1/13 EntryNormal "data2"
171173
Messages:
172174
3->1 MsgAppResp Term:1 Log:0/13
173175
> 1 receiving messages

testdata/replicate_pause.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,14 +129,14 @@ stabilize 1
129129
Ready MustSync=false:
130130
Messages:
131131
1->2 MsgHeartbeat Term:1 Log:0/0 Commit:17
132-
1->3 MsgHeartbeat Term:1 Log:0/0 Commit:11
132+
1->3 MsgHeartbeat Term:1 Log:0/0 Commit:17
133133

134134
stabilize 2 3
135135
----
136136
> 2 receiving messages
137137
1->2 MsgHeartbeat Term:1 Log:0/0 Commit:17
138138
> 3 receiving messages
139-
1->3 MsgHeartbeat Term:1 Log:0/0 Commit:11
139+
1->3 MsgHeartbeat Term:1 Log:0/0 Commit:17
140140
> 2 handling Ready
141141
Ready MustSync=false:
142142
Messages:

testdata/snapshot_succeed_via_app_resp.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,14 @@ process-ready 1
6464
Ready MustSync=false:
6565
Messages:
6666
1->2 MsgHeartbeat Term:1 Log:0/0 Commit:11
67-
1->3 MsgHeartbeat Term:1 Log:0/0
67+
1->3 MsgHeartbeat Term:1 Log:0/0 Commit:11
6868

6969
# Iterate until no more work is done by the new peer. It receives the heartbeat
7070
# and responds.
7171
stabilize 3
7272
----
7373
> 3 receiving messages
74-
1->3 MsgHeartbeat Term:1 Log:0/0
74+
1->3 MsgHeartbeat Term:1 Log:0/0 Commit:11
7575
INFO 3 [term: 0] received a MsgHeartbeat message with higher term from 1 [term: 1]
7676
INFO 3 became follower at term 1
7777
> 3 handling Ready

0 commit comments

Comments
 (0)