Skip to content

Commit 73c85a0

Browse files
committed
raft: replace logSynced bool with leaderTerm
Tracking the term of the leader with whom the log is consistent is more straightforward than tracking an obscure bool. It also comens with extra benefits when the TODO to add more safety checks to raftLog is done: leaderTerm can be compared for establishing the order, whereas the bool can only be true/false. Initializing leaderTerm during the raftLog initialization is cheaper compared to initializing it in becomeFollower(), because the former happens only once, while the former can happen many times during the lifetime of this node. Signed-off-by: Pavel Kalinnikov <[email protected]>
1 parent 68a7a9e commit 73c85a0

File tree

2 files changed

+42
-17
lines changed

2 files changed

+42
-17
lines changed

log.go

+34
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,35 @@ type raftLog struct {
2929
// they will be saved into storage.
3030
unstable unstable
3131

32+
// leaderTerm is a term of the leader with whom our log is "consistent". The
33+
// log is guaranteed to be a prefix of this term's leader log.
34+
//
35+
// The leaderTerm can be safely updated to `t` if:
36+
// 1. the last entry in the log has term `t`, or, more generally,
37+
// 2. the last successful append was sent by the leader `t`.
38+
//
39+
// This is due to the following safety property (see raft paper §5.3):
40+
//
41+
// Log Matching: if two logs contain an entry with the same index and term,
42+
// then the logs are identical in all entries up through the given index.
43+
//
44+
// We use (1) to initialize leaderTerm, and (2) to maintain it on updates.
45+
//
46+
// NB: (2) does not imply (1). If our log is behind the leader's log, the last
47+
// entry term can be below leaderTerm.
48+
//
49+
// NB: leaderTerm does not necessarily match this raft node's term. It only
50+
// does for the leader. For followers and candidates, when we first learn or
51+
// bump to a new term, we don't have a proof that our log is consistent with
52+
// the new term's leader (current or prospective). The new leader may override
53+
// any suffix of the log after the committed index. Only when the first append
54+
// from the new leader succeeds, we can update leaderTerm.
55+
//
56+
// During normal operation, leaderTerm matches the node term though. During a
57+
// leader change, it briefly lags behind, and matches again when the first
58+
// append message succeeds.
59+
leaderTerm uint64
60+
3261
// committed is the highest log position that is known to be in
3362
// stable storage on a quorum of nodes.
3463
committed uint64
@@ -88,6 +117,11 @@ func newLogWithSize(storage Storage, logger Logger, maxApplyingEntsSize entryEnc
88117
if err != nil {
89118
panic(err) // TODO(bdarnell)
90119
}
120+
lastTerm, err := storage.Term(lastIndex)
121+
if err != nil {
122+
panic(err) // TODO(pav-kv)
123+
}
124+
log.leaderTerm = lastTerm
91125
log.unstable.offset = lastIndex + 1
92126
log.unstable.offsetInProgress = lastIndex + 1
93127
log.unstable.logger = logger

raft.go

+8-17
Original file line numberDiff line numberDiff line change
@@ -375,12 +375,6 @@ type raft struct {
375375

376376
// the leader id
377377
lead uint64
378-
// logSynced is true if this node's log is guaranteed to be a prefix of the
379-
// leader's log at this term. Always true for the leader. Always false for a
380-
// candidate. For a follower, this is true if the last entry term matches the
381-
// leader term, otherwise becomes true when the first MsgApp append from the
382-
// leader succeeds.
383-
logSynced bool
384378
// leadTransferee is id of the leader transfer target when its value is not zero.
385379
// Follow the procedure defined in raft thesis 3.10.
386380
leadTransferee uint64
@@ -769,7 +763,6 @@ func (r *raft) reset(term uint64) {
769763
r.Vote = None
770764
}
771765
r.lead = None
772-
r.logSynced = false
773766

774767
r.electionElapsed = 0
775768
r.heartbeatElapsed = 0
@@ -873,10 +866,6 @@ func (r *raft) becomeFollower(term uint64, lead uint64) {
873866
r.reset(term)
874867
r.tick = r.tickElection
875868
r.lead = lead
876-
// If the last entry term matches the leader term, the log is guaranteed to be
877-
// a prefix of the leader's log. Otherwise, we will establish this guarantee
878-
// later, on the first successful MsgApp.
879-
r.logSynced = r.raftLog.lastTerm() == term
880869
r.state = StateFollower
881870
r.logger.Infof("%x became follower at term %d", r.id, r.Term)
882871
}
@@ -919,7 +908,6 @@ func (r *raft) becomeLeader() {
919908
r.reset(r.Term)
920909
r.tick = r.tickHeartbeat
921910
r.lead = r.id
922-
r.logSynced = true // the leader's log is in sync with itself
923911
r.state = StateLeader
924912
// Followers enter replicate mode when they've been successfully probed
925913
// (perhaps after having received a snapshot as a result). The leader is
@@ -947,6 +935,8 @@ func (r *raft) becomeLeader() {
947935
// so the preceding log append does not count against the uncommitted log
948936
// quota of the new leader. In other words, after the call to appendEntry,
949937
// r.uncommittedSize is still 0.
938+
939+
r.raftLog.leaderTerm = r.Term // the leader's log is consistent with itself
950940
r.logger.Infof("%x became leader at term %d", r.id, r.Term)
951941
}
952942

@@ -1747,7 +1737,7 @@ func (r *raft) handleAppendEntries(m pb.Message) {
17471737
return
17481738
}
17491739
if mlastIndex, ok := r.raftLog.maybeAppend(m.Index, m.LogTerm, m.Commit, m.Entries...); ok {
1750-
r.logSynced = true // from now on, the log is a prefix of the leader's log
1740+
r.raftLog.leaderTerm = m.Term // the log is now consistent with the leader
17511741
r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: mlastIndex})
17521742
return
17531743
}
@@ -1787,10 +1777,10 @@ func (r *raft) handleHeartbeat(m pb.Message) {
17871777
// leader's log. Otherwise, entries at this index may mismatch.
17881778
//
17891779
// TODO(pav-kv): move this logic to r.raftLog, which is more appropriate for
1790-
// handling safety. The raftLog can use the logSynced flag for other safety
1791-
// checks. For example, unstable.truncateAndAppend currently may override a
1792-
// suffix of the log unconditionally, but it can only be done if !logSynced.
1793-
if r.logSynced {
1780+
// handling safety. The raftLog can use leaderTerm for other safety checks.
1781+
// For example, unstable.truncateAndAppend currently may override a suffix of
1782+
// the log unconditionally, but it can only be done if m.Term > leaderTerm.
1783+
if m.Term == r.raftLog.leaderTerm {
17941784
r.raftLog.commitTo(min(m.Commit, r.raftLog.lastIndex()))
17951785
}
17961786
r.send(pb.Message{To: m.From, Type: pb.MsgHeartbeatResp, Context: m.Context})
@@ -1807,6 +1797,7 @@ func (r *raft) handleSnapshot(m pb.Message) {
18071797
if r.restore(s) {
18081798
r.logger.Infof("%x [commit: %d] restored snapshot [index: %d, term: %d]",
18091799
r.id, r.raftLog.committed, sindex, sterm)
1800+
r.raftLog.leaderTerm = m.Term // the log is now consistent with the leader
18101801
r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: r.raftLog.lastIndex()})
18111802
} else {
18121803
r.logger.Infof("%x [commit: %d] ignored snapshot [index: %d, term: %d]",

0 commit comments

Comments
 (0)