Skip to content

Commit 30f9d89

Browse files
committed
raft: replace logSynced bool with leaderTerm
Tracking the term of the leader with whom the log is consistent is more straightforward than tracking an obscure bool. It also comens with extra benefits when the TODO to add more safety checks to raftLog is done: leaderTerm can be compared for establishing the order, whereas the bool can only be true/false. Initializing leaderTerm during the raftLog initialization is cheaper compared to initializing it in becomeFollower(), because the former happens only once, while the former can happen many times during the lifetime of this node. Signed-off-by: Pavel Kalinnikov <[email protected]>
1 parent 68a7a9e commit 30f9d89

File tree

2 files changed

+42
-11
lines changed

2 files changed

+42
-11
lines changed

log.go

+34
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,35 @@ type raftLog struct {
2929
// they will be saved into storage.
3030
unstable unstable
3131

32+
// leaderTerm is a term of the leader with whom our log is "consistent". The
33+
// log is guaranteed to be a prefix of this term's leader log.
34+
//
35+
// The leaderTerm can be safely updated to `t` if:
36+
// 1. the last entry in the log has term `t`, or, more generally,
37+
// 2. the last successful append was sent by the leader `t`.
38+
//
39+
// This is due to the following safety property (see raft paper §5.3):
40+
//
41+
// Log Matching: if two logs contain an entry with the same index and term,
42+
// then the logs are identical in all entries up through the given index.
43+
//
44+
// We use (1) to initialize leaderTerm, and (2) to maintain it on updates.
45+
//
46+
// NB: (2) does not imply (1). If our log is behind the leader's log, the last
47+
// entry term can be below leaderTerm.
48+
//
49+
// NB: leaderTerm does not necessarily match this raft node's term. It only
50+
// does for the leader. For followers and candidates, when we first learn or
51+
// bump to a new term, we don't have a proof that our log is consistent with
52+
// the new term's leader (current or prospective). The new leader may override
53+
// any suffix of the log after the committed index. Only when the first append
54+
// from the new leader succeeds, we can update leaderTerm.
55+
//
56+
// During normal operation, leaderTerm matches the node term though. During a
57+
// leader change, it briefly lags behind, and matches again when the first
58+
// append message succeeds.
59+
leaderTerm uint64
60+
3261
// committed is the highest log position that is known to be in
3362
// stable storage on a quorum of nodes.
3463
committed uint64
@@ -88,6 +117,11 @@ func newLogWithSize(storage Storage, logger Logger, maxApplyingEntsSize entryEnc
88117
if err != nil {
89118
panic(err) // TODO(bdarnell)
90119
}
120+
lastTerm, err := storage.Term(lastIndex)
121+
if err != nil {
122+
panic(err) // TODO(pav-kv)
123+
}
124+
log.leaderTerm = lastTerm
91125
log.unstable.offset = lastIndex + 1
92126
log.unstable.offsetInProgress = lastIndex + 1
93127
log.unstable.logger = logger

raft.go

+8-11
Original file line numberDiff line numberDiff line change
@@ -769,7 +769,6 @@ func (r *raft) reset(term uint64) {
769769
r.Vote = None
770770
}
771771
r.lead = None
772-
r.logSynced = false
773772

774773
r.electionElapsed = 0
775774
r.heartbeatElapsed = 0
@@ -873,10 +872,6 @@ func (r *raft) becomeFollower(term uint64, lead uint64) {
873872
r.reset(term)
874873
r.tick = r.tickElection
875874
r.lead = lead
876-
// If the last entry term matches the leader term, the log is guaranteed to be
877-
// a prefix of the leader's log. Otherwise, we will establish this guarantee
878-
// later, on the first successful MsgApp.
879-
r.logSynced = r.raftLog.lastTerm() == term
880875
r.state = StateFollower
881876
r.logger.Infof("%x became follower at term %d", r.id, r.Term)
882877
}
@@ -919,7 +914,6 @@ func (r *raft) becomeLeader() {
919914
r.reset(r.Term)
920915
r.tick = r.tickHeartbeat
921916
r.lead = r.id
922-
r.logSynced = true // the leader's log is in sync with itself
923917
r.state = StateLeader
924918
// Followers enter replicate mode when they've been successfully probed
925919
// (perhaps after having received a snapshot as a result). The leader is
@@ -947,6 +941,8 @@ func (r *raft) becomeLeader() {
947941
// so the preceding log append does not count against the uncommitted log
948942
// quota of the new leader. In other words, after the call to appendEntry,
949943
// r.uncommittedSize is still 0.
944+
945+
r.raftLog.leaderTerm = r.Term // the leader's log is consistent with itself
950946
r.logger.Infof("%x became leader at term %d", r.id, r.Term)
951947
}
952948

@@ -1747,7 +1743,7 @@ func (r *raft) handleAppendEntries(m pb.Message) {
17471743
return
17481744
}
17491745
if mlastIndex, ok := r.raftLog.maybeAppend(m.Index, m.LogTerm, m.Commit, m.Entries...); ok {
1750-
r.logSynced = true // from now on, the log is a prefix of the leader's log
1746+
r.raftLog.leaderTerm = m.Term // the log is now consistent with the leader
17511747
r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: mlastIndex})
17521748
return
17531749
}
@@ -1787,10 +1783,10 @@ func (r *raft) handleHeartbeat(m pb.Message) {
17871783
// leader's log. Otherwise, entries at this index may mismatch.
17881784
//
17891785
// TODO(pav-kv): move this logic to r.raftLog, which is more appropriate for
1790-
// handling safety. The raftLog can use the logSynced flag for other safety
1791-
// checks. For example, unstable.truncateAndAppend currently may override a
1792-
// suffix of the log unconditionally, but it can only be done if !logSynced.
1793-
if r.logSynced {
1786+
// handling safety. The raftLog can use leaderTerm for other safety checks.
1787+
// For example, unstable.truncateAndAppend currently may override a suffix of
1788+
// the log unconditionally, but it can only be done if m.Term > leaderTerm.
1789+
if m.Term == r.raftLog.leaderTerm {
17941790
r.raftLog.commitTo(min(m.Commit, r.raftLog.lastIndex()))
17951791
}
17961792
r.send(pb.Message{To: m.From, Type: pb.MsgHeartbeatResp, Context: m.Context})
@@ -1807,6 +1803,7 @@ func (r *raft) handleSnapshot(m pb.Message) {
18071803
if r.restore(s) {
18081804
r.logger.Infof("%x [commit: %d] restored snapshot [index: %d, term: %d]",
18091805
r.id, r.raftLog.committed, sindex, sterm)
1806+
r.raftLog.leaderTerm = m.Term // the log is now consistent with the leader
18101807
r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: r.raftLog.lastIndex()})
18111808
} else {
18121809
r.logger.Infof("%x [commit: %d] ignored snapshot [index: %d, term: %d]",

0 commit comments

Comments
 (0)