diff --git a/.github/workflows/linearizability.yaml b/.github/workflows/linearizability.yaml index 7303a35099b8..4938d6a0086a 100644 --- a/.github/workflows/linearizability.yaml +++ b/.github/workflows/linearizability.yaml @@ -13,7 +13,7 @@ jobs: make build mkdir -p /tmp/linearizability cat server/etcdserver/raft.fail.go - EXPECT_DEBUG=true GO_TEST_FLAGS=-v RESULTS_DIR=/tmp/linearizability make test-linearizability + EXPECT_DEBUG=true GO_TEST_FLAGS='-v --count 60 --failfast --run TestLinearizability' RESULTS_DIR=/tmp/linearizability make test-linearizability - uses: actions/upload-artifact@v2 if: always() with: diff --git a/CHANGELOG/CHANGELOG-3.5.md b/CHANGELOG/CHANGELOG-3.5.md index 59cc13b82b91..6679f4fddc75 100644 --- a/CHANGELOG/CHANGELOG-3.5.md +++ b/CHANGELOG/CHANGELOG-3.5.md @@ -8,6 +8,7 @@ Previous change logs can be found at [CHANGELOG-3.4](https://github.com/etcd-io/ ### etcd server - Fix [Remove memberID from data corrupt alarm](https://github.com/etcd-io/etcd/pull/14852). +- Fix [non mutating requests pass through quotaKVServer when NOSPACE](https://github.com/etcd-io/etcd/pull/14884)
diff --git a/CHANGELOG/CHANGELOG-3.6.md b/CHANGELOG/CHANGELOG-3.6.md index f74b41fe5e9a..f3c5635bfc97 100644 --- a/CHANGELOG/CHANGELOG-3.6.md +++ b/CHANGELOG/CHANGELOG-3.6.md @@ -50,6 +50,7 @@ See [code changes](https://github.com/etcd-io/etcd/compare/v3.5.0...v3.6.0). ### Package `raft` - Send empty `MsgApp` when entry in-flight limits are exceeded. See [pull/14633](https://github.com/etcd-io/etcd/pull/14633). - Add [MaxInflightBytes](https://github.com/etcd-io/etcd/pull/14624) setting in `raft.Config` for better flow control of entries. +- [Decouple raft from etcd](https://github.com/etcd-io/etcd/issues/14713). Migrated raft to a separate [repository](https://github.com/etcd-io/raft), and renamed raft module to `go.etcd.io/raft/v3`. ### etcd server diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ec9687a69822..05cb8f8ed872 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -57,6 +57,11 @@ Setup environment: - For ubuntu and debian run `sudo apt-get install build-essentials` - Verify that everything is installed by running `make build` +Note: `make build` runs with `-v`. Other build flags can be added through env `GO_BUILD_FLAGS`, **if required**. Eg., +```console +GO_BUILD_FLAGS="-buildmode=pie" make build +``` + [file an issue]: https://github.com/etcd-io/etcd/issues/new/choose ## Implement your change diff --git a/Documentation/contributor-guide/modules-future.svg b/Documentation/contributor-guide/modules-future.svg index 6766c84a3fea..92d060a29fc2 100644 --- a/Documentation/contributor-guide/modules-future.svg +++ b/Documentation/contributor-guide/modules-future.svg @@ -1,2 +1,604 @@ - - \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + go.etcd.io/raft/v3 + + diff --git a/Documentation/contributor-guide/modules.md b/Documentation/contributor-guide/modules.md index 2e8ded443ae8..a8551aa39ebc 100644 --- a/Documentation/contributor-guide/modules.md +++ b/Documentation/contributor-guide/modules.md @@ -24,8 +24,9 @@ There are following modules: - **go.etcd.io/etcd/client/v2** - legacy client library used to contact etcd over HTTP protocol. Deprecated. All new usage should depend on /v3 library. - - **go.etcd.io/etcd/raft/v3** - implementation of distributed consensus - protocol. Should have no etcd specific code. + - **go.etcd.io/raft/v3** - implementation of distributed consensus + protocol. Should have no etcd specific code. Hosted in a separate repository: + https://github.com/etcd-io/raft. - **go.etcd.io/etcd/server/v3** - etcd implementation. The code in this package is etcd internal and should not be consumed diff --git a/Documentation/contributor-guide/modules.svg b/Documentation/contributor-guide/modules.svg index 24a711a5907c..5a3c3b2c39e2 100644 --- a/Documentation/contributor-guide/modules.svg +++ b/Documentation/contributor-guide/modules.svg @@ -1 +1,489 @@ - \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + go.etcd.io/raft/v3 + + diff --git a/Makefile b/Makefile index b47e58e517e2..1044e776904b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ .PHONY: build build: - GO_BUILD_FLAGS="-v" ./scripts/build.sh + GO_BUILD_FLAGS="${GO_BUILD_FLAGS} -v" ./scripts/build.sh ./bin/etcd --version ./bin/etcdctl version ./bin/etcdutl version @@ -141,4 +141,5 @@ clean: rm -rf ./release rm -rf ./coverage/*.err ./coverage/*.out rm -rf ./tests/e2e/default.proxy + rm -rf ./bin/shellcheck* find ./ -name "127.0.0.1:*" -o -name "localhost:*" -o -name "*.log" -o -name "agent-*" -o -name "*.coverprofile" -o -name "testname-proxy-*" -delete diff --git a/bill-of-materials.json b/bill-of-materials.json index 5b36cbb48f52..f8d21e4840b1 100644 --- a/bill-of-materials.json +++ b/bill-of-materials.json @@ -53,15 +53,6 @@ } ] }, - { - "project": "github.com/certifi/gocertifi", - "licenses": [ - { - "type": "Mozilla Public License 2.0", - "confidence": 1 - } - ] - }, { "project": "github.com/cespare/xxhash/v2", "licenses": [ @@ -80,33 +71,6 @@ } ] }, - { - "project": "github.com/cockroachdb/datadriven", - "licenses": [ - { - "type": "Apache License 2.0", - "confidence": 1 - } - ] - }, - { - "project": "github.com/cockroachdb/errors", - "licenses": [ - { - "type": "Apache License 2.0", - "confidence": 1 - } - ] - }, - { - "project": "github.com/cockroachdb/logtags", - "licenses": [ - { - "type": "Apache License 2.0", - "confidence": 1 - } - ] - }, { "project": "github.com/coreos/go-semver/semver", "licenses": [ @@ -161,15 +125,6 @@ } ] }, - { - "project": "github.com/getsentry/raven-go", - "licenses": [ - { - "type": "BSD 3-clause \"New\" or \"Revised\" License", - "confidence": 0.9663865546218487 - } - ] - }, { "project": "github.com/go-logr/logr", "licenses": [ @@ -350,15 +305,6 @@ } ] }, - { - "project": "github.com/pkg/errors", - "licenses": [ - { - "type": "BSD 2-clause \"Simplified\" License", - "confidence": 1 - } - ] - }, { "project": "github.com/pmezard/go-difflib/difflib", "licenses": [ @@ -549,7 +495,7 @@ ] }, { - "project": "go.etcd.io/etcd/raft/v3", + "project": "go.etcd.io/etcd/server/v3", "licenses": [ { "type": "Apache License 2.0", @@ -558,7 +504,7 @@ ] }, { - "project": "go.etcd.io/etcd/server/v3", + "project": "go.etcd.io/etcd/tests/v3", "licenses": [ { "type": "Apache License 2.0", @@ -567,7 +513,7 @@ ] }, { - "project": "go.etcd.io/etcd/tests/v3", + "project": "go.etcd.io/etcd/v3", "licenses": [ { "type": "Apache License 2.0", @@ -576,7 +522,7 @@ ] }, { - "project": "go.etcd.io/etcd/v3", + "project": "go.etcd.io/raft/v3", "licenses": [ { "type": "Apache License 2.0", diff --git a/codecov.yml b/codecov.yml index a4b3b7f27738..5dfc7b9b9348 100644 --- a/codecov.yml +++ b/codecov.yml @@ -8,7 +8,6 @@ fixes: - "go.etcd.io/etcd/client/v2/::client/v2/" - "go.etcd.io/etcd/etcdctl/v3/::etcdctl/" - "go.etcd.io/etcd/pkg/v3/::pkg/" - - "go.etcd.io/etcd/raft/v3/::raft/" - "go.etcd.io/etcd/server/v3/::server/" ignore: diff --git a/contrib/raftexample/httpapi.go b/contrib/raftexample/httpapi.go index b85e6a2135b3..dbe226add33c 100644 --- a/contrib/raftexample/httpapi.go +++ b/contrib/raftexample/httpapi.go @@ -20,7 +20,7 @@ import ( "net/http" "strconv" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" ) // Handler for a http based key-value store backed by raft diff --git a/contrib/raftexample/kvstore.go b/contrib/raftexample/kvstore.go index ba49d00ee55c..22f8915fe1c4 100644 --- a/contrib/raftexample/kvstore.go +++ b/contrib/raftexample/kvstore.go @@ -21,8 +21,8 @@ import ( "log" "sync" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" + "go.etcd.io/raft/v3/raftpb" ) // a key-value store backed by raft diff --git a/contrib/raftexample/main.go b/contrib/raftexample/main.go index 1845d0964726..73f02787a356 100644 --- a/contrib/raftexample/main.go +++ b/contrib/raftexample/main.go @@ -18,7 +18,7 @@ import ( "flag" "strings" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" ) func main() { diff --git a/contrib/raftexample/raft.go b/contrib/raftexample/raft.go index b1618e1c1b70..971141ae3598 100644 --- a/contrib/raftexample/raft.go +++ b/contrib/raftexample/raft.go @@ -26,13 +26,13 @@ import ( "go.etcd.io/etcd/client/pkg/v3/fileutil" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" "go.etcd.io/etcd/server/v3/storage/wal" "go.etcd.io/etcd/server/v3/storage/wal/walpb" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" ) diff --git a/contrib/raftexample/raft_test.go b/contrib/raftexample/raft_test.go index c64b65ee655f..5a0385be226e 100644 --- a/contrib/raftexample/raft_test.go +++ b/contrib/raftexample/raft_test.go @@ -18,7 +18,7 @@ import ( "reflect" "testing" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" ) func TestProcessMessages(t *testing.T) { diff --git a/contrib/raftexample/raftexample_test.go b/contrib/raftexample/raftexample_test.go index 6b881e881bb2..f7aa335eb04d 100644 --- a/contrib/raftexample/raftexample_test.go +++ b/contrib/raftexample/raftexample_test.go @@ -25,7 +25,7 @@ import ( "testing" "time" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" ) func getSnapshotFn() (func() ([]byte, error), <-chan struct{}) { diff --git a/etcdutl/etcdutl/backup_command.go b/etcdutl/etcdutl/backup_command.go index d504cc3572bd..e2ed57ef40c1 100644 --- a/etcdutl/etcdutl/backup_command.go +++ b/etcdutl/etcdutl/backup_command.go @@ -26,7 +26,6 @@ import ( "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/pkg/v3/idutil" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/membership" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" "go.etcd.io/etcd/server/v3/etcdserver/api/v2store" @@ -36,6 +35,7 @@ import ( "go.etcd.io/etcd/server/v3/storage/wal" "go.etcd.io/etcd/server/v3/storage/wal/walpb" "go.etcd.io/etcd/server/v3/verify" + "go.etcd.io/raft/v3/raftpb" bolt "go.etcd.io/bbolt" "go.uber.org/zap" diff --git a/etcdutl/go.mod b/etcdutl/go.mod index e560f984371f..857dd8bec9a0 100644 --- a/etcdutl/go.mod +++ b/etcdutl/go.mod @@ -8,7 +8,6 @@ replace ( go.etcd.io/etcd/client/v2 => ../client/v2 go.etcd.io/etcd/client/v3 => ../client/v3 go.etcd.io/etcd/pkg/v3 => ../pkg - go.etcd.io/etcd/raft/v3 => ../raft go.etcd.io/etcd/server/v3 => ../server ) @@ -30,8 +29,8 @@ require ( go.etcd.io/etcd/client/pkg/v3 v3.6.0-alpha.0 go.etcd.io/etcd/client/v3 v3.6.0-alpha.0 go.etcd.io/etcd/pkg/v3 v3.6.0-alpha.0 - go.etcd.io/etcd/raft/v3 v3.6.0-alpha.0 go.etcd.io/etcd/server/v3 v3.6.0-alpha.0 + go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a go.uber.org/zap v1.21.0 ) diff --git a/etcdutl/go.sum b/etcdutl/go.sum index de09ce292978..b6a546d96b47 100644 --- a/etcdutl/go.sum +++ b/etcdutl/go.sum @@ -261,6 +261,8 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= +go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a h1:Znv2XJyAf/fsJsFNt9toO8uyXwwHQ44wxqsvdSxipj4= +go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a/go.mod h1:eMshmuwXLWZrjHXN8ZgYrOMQRSbHqi5M84DEZWhG+o4= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= diff --git a/etcdutl/snapshot/v3_snapshot.go b/etcdutl/snapshot/v3_snapshot.go index 25194f470817..598408361103 100644 --- a/etcdutl/snapshot/v3_snapshot.go +++ b/etcdutl/snapshot/v3_snapshot.go @@ -32,8 +32,6 @@ import ( "go.etcd.io/etcd/client/pkg/v3/types" clientv3 "go.etcd.io/etcd/client/v3" "go.etcd.io/etcd/client/v3/snapshot" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/etcdserver" "go.etcd.io/etcd/server/v3/etcdserver/api/membership" @@ -45,6 +43,8 @@ import ( "go.etcd.io/etcd/server/v3/storage/wal" "go.etcd.io/etcd/server/v3/storage/wal/walpb" "go.etcd.io/etcd/server/v3/verify" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" ) diff --git a/go.mod b/go.mod index f2830185d29c..fbe6e0fe2f7d 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,6 @@ replace ( go.etcd.io/etcd/etcdctl/v3 => ./etcdctl go.etcd.io/etcd/etcdutl/v3 => ./etcdutl go.etcd.io/etcd/pkg/v3 => ./pkg - go.etcd.io/etcd/raft/v3 => ./raft go.etcd.io/etcd/server/v3 => ./server go.etcd.io/etcd/tests/v3 => ./tests ) @@ -29,9 +28,9 @@ require ( go.etcd.io/etcd/etcdctl/v3 v3.6.0-alpha.0 go.etcd.io/etcd/etcdutl/v3 v3.6.0-alpha.0 go.etcd.io/etcd/pkg/v3 v3.6.0-alpha.0 - go.etcd.io/etcd/raft/v3 v3.6.0-alpha.0 go.etcd.io/etcd/server/v3 v3.6.0-alpha.0 go.etcd.io/etcd/tests/v3 v3.6.0-alpha.0 + go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a go.uber.org/zap v1.21.0 golang.org/x/time v0.0.0-20220609170525-579cf78fd858 google.golang.org/grpc v1.51.0 diff --git a/go.sum b/go.sum index 4d67d14d5a3a..b84b844edd26 100644 --- a/go.sum +++ b/go.sum @@ -330,6 +330,8 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= +go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a h1:Znv2XJyAf/fsJsFNt9toO8uyXwwHQ44wxqsvdSxipj4= +go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a/go.mod h1:eMshmuwXLWZrjHXN8ZgYrOMQRSbHqi5M84DEZWhG+o4= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= diff --git a/raft/LICENSE b/raft/LICENSE deleted file mode 100644 index d64569567334..000000000000 --- a/raft/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/raft/OWNERS b/raft/OWNERS deleted file mode 100644 index ab781066e237..000000000000 --- a/raft/OWNERS +++ /dev/null @@ -1,19 +0,0 @@ -approvers: -- heyitsanthony -- philips -- fanminshi -- gyuho -- mitake -- jpbetz -- xiang90 -- bdarnell -reviewers: -- heyitsanthony -- philips -- fanminshi -- gyuho -- mitake -- jpbetz -- xiang90 -- bdarnell -- tschottdorf diff --git a/raft/README.md b/raft/README.md deleted file mode 100644 index fbd8b4d49b5e..000000000000 --- a/raft/README.md +++ /dev/null @@ -1,201 +0,0 @@ -# Raft library - -Raft is a protocol with which a cluster of nodes can maintain a replicated state machine. -The state machine is kept in sync through the use of a replicated log. -For more details on Raft, see "In Search of an Understandable Consensus Algorithm" -(https://raft.github.io/raft.pdf) by Diego Ongaro and John Ousterhout. - -This Raft library is stable and feature complete. As of 2016, it is **the most widely used** Raft library in production, serving tens of thousands clusters each day. It powers distributed systems such as etcd, Kubernetes, Docker Swarm, Cloud Foundry Diego, CockroachDB, TiDB, Project Calico, Flannel, Hyperledger and more. - -Most Raft implementations have a monolithic design, including storage handling, messaging serialization, and network transport. This library instead follows a minimalistic design philosophy by only implementing the core raft algorithm. This minimalism buys flexibility, determinism, and performance. - -To keep the codebase small as well as provide flexibility, the library only implements the Raft algorithm; both network and disk IO are left to the user. Library users must implement their own transportation layer for message passing between Raft peers over the wire. Similarly, users must implement their own storage layer to persist the Raft log and state. - -In order to easily test the Raft library, its behavior should be deterministic. To achieve this determinism, the library models Raft as a state machine. The state machine takes a `Message` as input. A message can either be a local timer update or a network message sent from a remote peer. The state machine's output is a 3-tuple `{[]Messages, []LogEntries, NextState}` consisting of an array of `Messages`, `log entries`, and `Raft state changes`. For state machines with the same state, the same state machine input should always generate the same state machine output. - -A simple example application, _raftexample_, is also available to help illustrate how to use this package in practice: https://github.com/etcd-io/etcd/tree/main/contrib/raftexample - -# Features - -This raft implementation is a full feature implementation of Raft protocol. Features includes: - -- Leader election -- Log replication -- Log compaction -- Membership changes -- Leadership transfer extension -- Efficient linearizable read-only queries served by both the leader and followers - - leader checks with quorum and bypasses Raft log before processing read-only queries - - followers asks leader to get a safe read index before processing read-only queries -- More efficient lease-based linearizable read-only queries served by both the leader and followers - - leader bypasses Raft log and processing read-only queries locally - - followers asks leader to get a safe read index before processing read-only queries - - this approach relies on the clock of the all the machines in raft group - -This raft implementation also includes a few optional enhancements: - -- Optimistic pipelining to reduce log replication latency -- Flow control for log replication -- Batching Raft messages to reduce synchronized network I/O calls -- Batching log entries to reduce disk synchronized I/O -- Writing to leader's disk in parallel -- Internal proposal redirection from followers to leader -- Automatic stepping down when the leader loses quorum -- Protection against unbounded log growth when quorum is lost - -## Notable Users - -- [cockroachdb](https://github.com/cockroachdb/cockroach) A Scalable, Survivable, Strongly-Consistent SQL Database -- [dgraph](https://github.com/dgraph-io/dgraph) A Scalable, Distributed, Low Latency, High Throughput Graph Database -- [etcd](https://github.com/etcd-io/etcd) A distributed reliable key-value store -- [tikv](https://github.com/pingcap/tikv) A Distributed transactional key value database powered by Rust and Raft -- [swarmkit](https://github.com/docker/swarmkit) A toolkit for orchestrating distributed systems at any scale. -- [chain core](https://github.com/chain/chain) Software for operating permissioned, multi-asset blockchain networks - -## Usage - -The primary object in raft is a Node. Either start a Node from scratch using raft.StartNode or start a Node from some initial state using raft.RestartNode. - -To start a three-node cluster -```go - storage := raft.NewMemoryStorage() - c := &raft.Config{ - ID: 0x01, - ElectionTick: 10, - HeartbeatTick: 1, - Storage: storage, - MaxSizePerMsg: 4096, - MaxInflightMsgs: 256, - } - // Set peer list to the other nodes in the cluster. - // Note that they need to be started separately as well. - n := raft.StartNode(c, []raft.Peer{{ID: 0x02}, {ID: 0x03}}) -``` - -Start a single node cluster, like so: -```go - // Create storage and config as shown above. - // Set peer list to itself, so this node can become the leader of this single-node cluster. - peers := []raft.Peer{{ID: 0x01}} - n := raft.StartNode(c, peers) -``` - -To allow a new node to join this cluster, do not pass in any peers. First, add the node to the existing cluster by calling `ProposeConfChange` on any existing node inside the cluster. Then, start the node with an empty peer list, like so: -```go - // Create storage and config as shown above. - n := raft.StartNode(c, nil) -``` - -To restart a node from previous state: -```go - storage := raft.NewMemoryStorage() - - // Recover the in-memory storage from persistent snapshot, state and entries. - storage.ApplySnapshot(snapshot) - storage.SetHardState(state) - storage.Append(entries) - - c := &raft.Config{ - ID: 0x01, - ElectionTick: 10, - HeartbeatTick: 1, - Storage: storage, - MaxSizePerMsg: 4096, - MaxInflightMsgs: 256, - } - - // Restart raft without peer information. - // Peer information is already included in the storage. - n := raft.RestartNode(c) -``` - -After creating a Node, the user has a few responsibilities: - -First, read from the Node.Ready() channel and process the updates it contains. These steps may be performed in parallel, except as noted in step 2. - -1. Write Entries, HardState and Snapshot to persistent storage in order, i.e. Entries first, then HardState and Snapshot if they are not empty. If persistent storage supports atomic writes then all of them can be written together. Note that when writing an Entry with Index i, any previously-persisted entries with Index >= i must be discarded. - -2. Send all Messages to the nodes named in the To field. It is important that no messages be sent until the latest HardState has been persisted to disk, and all Entries written by any previous Ready batch (Messages may be sent while entries from the same batch are being persisted). To reduce the I/O latency, an optimization can be applied to make leader write to disk in parallel with its followers (as explained at section 10.2.1 in Raft thesis). If any Message has type MsgSnap, call Node.ReportSnapshot() after it has been sent (these messages may be large). Note: Marshalling messages is not thread-safe; it is important to make sure that no new entries are persisted while marshalling. The easiest way to achieve this is to serialise the messages directly inside the main raft loop. - -3. Apply Snapshot (if any) and CommittedEntries to the state machine. If any committed Entry has Type EntryConfChange, call Node.ApplyConfChange() to apply it to the node. The configuration change may be cancelled at this point by setting the NodeID field to zero before calling ApplyConfChange (but ApplyConfChange must be called one way or the other, and the decision to cancel must be based solely on the state machine and not external information such as the observed health of the node). - -4. Call Node.Advance() to signal readiness for the next batch of updates. This may be done at any time after step 1, although all updates must be processed in the order they were returned by Ready. - -Second, all persisted log entries must be made available via an implementation of the Storage interface. The provided MemoryStorage type can be used for this (if repopulating its state upon a restart), or a custom disk-backed implementation can be supplied. - -Third, after receiving a message from another node, pass it to Node.Step: - -```go - func recvRaftRPC(ctx context.Context, m raftpb.Message) { - n.Step(ctx, m) - } -``` - -Finally, call `Node.Tick()` at regular intervals (probably via a `time.Ticker`). Raft has two important timeouts: heartbeat and the election timeout. However, internally to the raft package time is represented by an abstract "tick". - -The total state machine handling loop will look something like this: - -```go - for { - select { - case <-s.Ticker: - n.Tick() - case rd := <-s.Node.Ready(): - saveToStorage(rd.HardState, rd.Entries, rd.Snapshot) - send(rd.Messages) - if !raft.IsEmptySnap(rd.Snapshot) { - processSnapshot(rd.Snapshot) - } - for _, entry := range rd.CommittedEntries { - process(entry) - if entry.Type == raftpb.EntryConfChange { - var cc raftpb.ConfChange - cc.Unmarshal(entry.Data) - s.Node.ApplyConfChange(cc) - } - } - s.Node.Advance() - case <-s.done: - return - } - } -``` - -To propose changes to the state machine from the node to take application data, serialize it into a byte slice and call: - -```go - n.Propose(ctx, data) -``` - -If the proposal is committed, data will appear in committed entries with type raftpb.EntryNormal. There is no guarantee that a proposed command will be committed; the command may have to be reproposed after a timeout. - -To add or remove node in a cluster, build ConfChange struct 'cc' and call: - -```go - n.ProposeConfChange(ctx, cc) -``` - -After config change is committed, some committed entry with type raftpb.EntryConfChange will be returned. This must be applied to node through: - -```go - var cc raftpb.ConfChange - cc.Unmarshal(data) - n.ApplyConfChange(cc) -``` - -Note: An ID represents a unique node in a cluster for all time. A -given ID MUST be used only once even if the old node has been removed. -This means that for example IP addresses make poor node IDs since they -may be reused. Node IDs must be non-zero. - -## Implementation notes - -This implementation is up to date with the final Raft thesis (https://github.com/ongardie/dissertation/blob/master/stanford.pdf), although this implementation of the membership change protocol differs somewhat from that described in chapter 4. The key invariant that membership changes happen one node at a time is preserved, but in our implementation the membership change takes effect when its entry is applied, not when it is added to the log (so the entry is committed under the old membership instead of the new). This is equivalent in terms of safety, since the old and new configurations are guaranteed to overlap. - -To ensure there is no attempt to commit two membership changes at once by matching log positions (which would be unsafe since they should have different quorum requirements), any proposed membership change is simply disallowed while any uncommitted change appears in the leader's log. - -This approach introduces a problem when removing a member from a two-member cluster: If one of the members dies before the other one receives the commit of the confchange entry, then the member cannot be removed any more since the cluster cannot make progress. For this reason it is highly recommended to use three or more nodes in every cluster. - -## Go docs - -More detailed development documentation can be found in go docs: https://pkg.go.dev/go.etcd.io/etcd/raft/v3. \ No newline at end of file diff --git a/raft/bootstrap.go b/raft/bootstrap.go deleted file mode 100644 index 824bd5f51bcd..000000000000 --- a/raft/bootstrap.go +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "errors" - - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -// Bootstrap initializes the RawNode for first use by appending configuration -// changes for the supplied peers. This method returns an error if the Storage -// is nonempty. -// -// It is recommended that instead of calling this method, applications bootstrap -// their state manually by setting up a Storage that has a first index > 1 and -// which stores the desired ConfState as its InitialState. -func (rn *RawNode) Bootstrap(peers []Peer) error { - if len(peers) == 0 { - return errors.New("must provide at least one peer to Bootstrap") - } - lastIndex, err := rn.raft.raftLog.storage.LastIndex() - if err != nil { - return err - } - - if lastIndex != 0 { - return errors.New("can't bootstrap a nonempty Storage") - } - - // We've faked out initial entries above, but nothing has been - // persisted. Start with an empty HardState (thus the first Ready will - // emit a HardState update for the app to persist). - rn.prevHardSt = emptyState - - // TODO(tbg): remove StartNode and give the application the right tools to - // bootstrap the initial membership in a cleaner way. - rn.raft.becomeFollower(1, None) - ents := make([]pb.Entry, len(peers)) - for i, peer := range peers { - cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context} - data, err := cc.Marshal() - if err != nil { - return err - } - - ents[i] = pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: uint64(i + 1), Data: data} - } - rn.raft.raftLog.append(ents...) - - // Now apply them, mainly so that the application can call Campaign - // immediately after StartNode in tests. Note that these nodes will - // be added to raft twice: here and when the application's Ready - // loop calls ApplyConfChange. The calls to addNode must come after - // all calls to raftLog.append so progress.next is set after these - // bootstrapping entries (it is an error if we try to append these - // entries since they have already been committed). - // We do not set raftLog.applied so the application will be able - // to observe all conf changes via Ready.CommittedEntries. - // - // TODO(bdarnell): These entries are still unstable; do we need to preserve - // the invariant that committed < unstable? - rn.raft.raftLog.committed = uint64(len(ents)) - for _, peer := range peers { - rn.raft.applyConfChange(pb.ConfChange{NodeID: peer.ID, Type: pb.ConfChangeAddNode}.AsV2()) - } - return nil -} diff --git a/raft/confchange/confchange.go b/raft/confchange/confchange.go deleted file mode 100644 index bc60abf7fe2e..000000000000 --- a/raft/confchange/confchange.go +++ /dev/null @@ -1,423 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package confchange - -import ( - "errors" - "fmt" - "strings" - - "go.etcd.io/etcd/raft/v3/quorum" - pb "go.etcd.io/etcd/raft/v3/raftpb" - "go.etcd.io/etcd/raft/v3/tracker" -) - -// Changer facilitates configuration changes. It exposes methods to handle -// simple and joint consensus while performing the proper validation that allows -// refusing invalid configuration changes before they affect the active -// configuration. -type Changer struct { - Tracker tracker.ProgressTracker - LastIndex uint64 -} - -// EnterJoint verifies that the outgoing (=right) majority config of the joint -// config is empty and initializes it with a copy of the incoming (=left) -// majority config. That is, it transitions from -// -// (1 2 3)&&() -// -// to -// -// (1 2 3)&&(1 2 3). -// -// The supplied changes are then applied to the incoming majority config, -// resulting in a joint configuration that in terms of the Raft thesis[1] -// (Section 4.3) corresponds to `C_{new,old}`. -// -// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf -func (c Changer) EnterJoint(autoLeave bool, ccs ...pb.ConfChangeSingle) (tracker.Config, tracker.ProgressMap, error) { - cfg, prs, err := c.checkAndCopy() - if err != nil { - return c.err(err) - } - if joint(cfg) { - err := errors.New("config is already joint") - return c.err(err) - } - if len(incoming(cfg.Voters)) == 0 { - // We allow adding nodes to an empty config for convenience (testing and - // bootstrap), but you can't enter a joint state. - err := errors.New("can't make a zero-voter config joint") - return c.err(err) - } - // Clear the outgoing config. - *outgoingPtr(&cfg.Voters) = quorum.MajorityConfig{} - // Copy incoming to outgoing. - for id := range incoming(cfg.Voters) { - outgoing(cfg.Voters)[id] = struct{}{} - } - - if err := c.apply(&cfg, prs, ccs...); err != nil { - return c.err(err) - } - cfg.AutoLeave = autoLeave - return checkAndReturn(cfg, prs) -} - -// LeaveJoint transitions out of a joint configuration. It is an error to call -// this method if the configuration is not joint, i.e. if the outgoing majority -// config Voters[1] is empty. -// -// The outgoing majority config of the joint configuration will be removed, -// that is, the incoming config is promoted as the sole decision maker. In the -// notation of the Raft thesis[1] (Section 4.3), this method transitions from -// `C_{new,old}` into `C_new`. -// -// At the same time, any staged learners (LearnersNext) the addition of which -// was held back by an overlapping voter in the former outgoing config will be -// inserted into Learners. -// -// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf -func (c Changer) LeaveJoint() (tracker.Config, tracker.ProgressMap, error) { - cfg, prs, err := c.checkAndCopy() - if err != nil { - return c.err(err) - } - if !joint(cfg) { - err := errors.New("can't leave a non-joint config") - return c.err(err) - } - if len(outgoing(cfg.Voters)) == 0 { - err := fmt.Errorf("configuration is not joint: %v", cfg) - return c.err(err) - } - for id := range cfg.LearnersNext { - nilAwareAdd(&cfg.Learners, id) - prs[id].IsLearner = true - } - cfg.LearnersNext = nil - - for id := range outgoing(cfg.Voters) { - _, isVoter := incoming(cfg.Voters)[id] - _, isLearner := cfg.Learners[id] - - if !isVoter && !isLearner { - delete(prs, id) - } - } - *outgoingPtr(&cfg.Voters) = nil - cfg.AutoLeave = false - - return checkAndReturn(cfg, prs) -} - -// Simple carries out a series of configuration changes that (in aggregate) -// mutates the incoming majority config Voters[0] by at most one. This method -// will return an error if that is not the case, if the resulting quorum is -// zero, or if the configuration is in a joint state (i.e. if there is an -// outgoing configuration). -func (c Changer) Simple(ccs ...pb.ConfChangeSingle) (tracker.Config, tracker.ProgressMap, error) { - cfg, prs, err := c.checkAndCopy() - if err != nil { - return c.err(err) - } - if joint(cfg) { - err := errors.New("can't apply simple config change in joint config") - return c.err(err) - } - if err := c.apply(&cfg, prs, ccs...); err != nil { - return c.err(err) - } - if n := symdiff(incoming(c.Tracker.Voters), incoming(cfg.Voters)); n > 1 { - return tracker.Config{}, nil, errors.New("more than one voter changed without entering joint config") - } - - return checkAndReturn(cfg, prs) -} - -// apply a change to the configuration. By convention, changes to voters are -// always made to the incoming majority config Voters[0]. Voters[1] is either -// empty or preserves the outgoing majority configuration while in a joint state. -func (c Changer) apply(cfg *tracker.Config, prs tracker.ProgressMap, ccs ...pb.ConfChangeSingle) error { - for _, cc := range ccs { - if cc.NodeID == 0 { - // etcd replaces the NodeID with zero if it decides (downstream of - // raft) to not apply a change, so we have to have explicit code - // here to ignore these. - continue - } - switch cc.Type { - case pb.ConfChangeAddNode: - c.makeVoter(cfg, prs, cc.NodeID) - case pb.ConfChangeAddLearnerNode: - c.makeLearner(cfg, prs, cc.NodeID) - case pb.ConfChangeRemoveNode: - c.remove(cfg, prs, cc.NodeID) - case pb.ConfChangeUpdateNode: - default: - return fmt.Errorf("unexpected conf type %d", cc.Type) - } - } - if len(incoming(cfg.Voters)) == 0 { - return errors.New("removed all voters") - } - return nil -} - -// makeVoter adds or promotes the given ID to be a voter in the incoming -// majority config. -func (c Changer) makeVoter(cfg *tracker.Config, prs tracker.ProgressMap, id uint64) { - pr := prs[id] - if pr == nil { - c.initProgress(cfg, prs, id, false /* isLearner */) - return - } - - pr.IsLearner = false - nilAwareDelete(&cfg.Learners, id) - nilAwareDelete(&cfg.LearnersNext, id) - incoming(cfg.Voters)[id] = struct{}{} -} - -// makeLearner makes the given ID a learner or stages it to be a learner once -// an active joint configuration is exited. -// -// The former happens when the peer is not a part of the outgoing config, in -// which case we either add a new learner or demote a voter in the incoming -// config. -// -// The latter case occurs when the configuration is joint and the peer is a -// voter in the outgoing config. In that case, we do not want to add the peer -// as a learner because then we'd have to track a peer as a voter and learner -// simultaneously. Instead, we add the learner to LearnersNext, so that it will -// be added to Learners the moment the outgoing config is removed by -// LeaveJoint(). -func (c Changer) makeLearner(cfg *tracker.Config, prs tracker.ProgressMap, id uint64) { - pr := prs[id] - if pr == nil { - c.initProgress(cfg, prs, id, true /* isLearner */) - return - } - if pr.IsLearner { - return - } - // Remove any existing voter in the incoming config... - c.remove(cfg, prs, id) - // ... but save the Progress. - prs[id] = pr - // Use LearnersNext if we can't add the learner to Learners directly, i.e. - // if the peer is still tracked as a voter in the outgoing config. It will - // be turned into a learner in LeaveJoint(). - // - // Otherwise, add a regular learner right away. - if _, onRight := outgoing(cfg.Voters)[id]; onRight { - nilAwareAdd(&cfg.LearnersNext, id) - } else { - pr.IsLearner = true - nilAwareAdd(&cfg.Learners, id) - } -} - -// remove this peer as a voter or learner from the incoming config. -func (c Changer) remove(cfg *tracker.Config, prs tracker.ProgressMap, id uint64) { - if _, ok := prs[id]; !ok { - return - } - - delete(incoming(cfg.Voters), id) - nilAwareDelete(&cfg.Learners, id) - nilAwareDelete(&cfg.LearnersNext, id) - - // If the peer is still a voter in the outgoing config, keep the Progress. - if _, onRight := outgoing(cfg.Voters)[id]; !onRight { - delete(prs, id) - } -} - -// initProgress initializes a new progress for the given node or learner. -func (c Changer) initProgress(cfg *tracker.Config, prs tracker.ProgressMap, id uint64, isLearner bool) { - if !isLearner { - incoming(cfg.Voters)[id] = struct{}{} - } else { - nilAwareAdd(&cfg.Learners, id) - } - prs[id] = &tracker.Progress{ - // Initializing the Progress with the last index means that the follower - // can be probed (with the last index). - // - // TODO(tbg): seems awfully optimistic. Using the first index would be - // better. The general expectation here is that the follower has no log - // at all (and will thus likely need a snapshot), though the app may - // have applied a snapshot out of band before adding the replica (thus - // making the first index the better choice). - Next: c.LastIndex, - Match: 0, - Inflights: tracker.NewInflights(c.Tracker.MaxInflight, c.Tracker.MaxInflightBytes), - IsLearner: isLearner, - // When a node is first added, we should mark it as recently active. - // Otherwise, CheckQuorum may cause us to step down if it is invoked - // before the added node has had a chance to communicate with us. - RecentActive: true, - } -} - -// checkInvariants makes sure that the config and progress are compatible with -// each other. This is used to check both what the Changer is initialized with, -// as well as what it returns. -func checkInvariants(cfg tracker.Config, prs tracker.ProgressMap) error { - // NB: intentionally allow the empty config. In production we'll never see a - // non-empty config (we prevent it from being created) but we will need to - // be able to *create* an initial config, for example during bootstrap (or - // during tests). Instead of having to hand-code this, we allow - // transitioning from an empty config into any other legal and non-empty - // config. - for _, ids := range []map[uint64]struct{}{ - cfg.Voters.IDs(), - cfg.Learners, - cfg.LearnersNext, - } { - for id := range ids { - if _, ok := prs[id]; !ok { - return fmt.Errorf("no progress for %d", id) - } - } - } - - // Any staged learner was staged because it could not be directly added due - // to a conflicting voter in the outgoing config. - for id := range cfg.LearnersNext { - if _, ok := outgoing(cfg.Voters)[id]; !ok { - return fmt.Errorf("%d is in LearnersNext, but not Voters[1]", id) - } - if prs[id].IsLearner { - return fmt.Errorf("%d is in LearnersNext, but is already marked as learner", id) - } - } - // Conversely Learners and Voters doesn't intersect at all. - for id := range cfg.Learners { - if _, ok := outgoing(cfg.Voters)[id]; ok { - return fmt.Errorf("%d is in Learners and Voters[1]", id) - } - if _, ok := incoming(cfg.Voters)[id]; ok { - return fmt.Errorf("%d is in Learners and Voters[0]", id) - } - if !prs[id].IsLearner { - return fmt.Errorf("%d is in Learners, but is not marked as learner", id) - } - } - - if !joint(cfg) { - // We enforce that empty maps are nil instead of zero. - if outgoing(cfg.Voters) != nil { - return fmt.Errorf("cfg.Voters[1] must be nil when not joint") - } - if cfg.LearnersNext != nil { - return fmt.Errorf("cfg.LearnersNext must be nil when not joint") - } - if cfg.AutoLeave { - return fmt.Errorf("AutoLeave must be false when not joint") - } - } - - return nil -} - -// checkAndCopy copies the tracker's config and progress map (deeply enough for -// the purposes of the Changer) and returns those copies. It returns an error -// if checkInvariants does. -func (c Changer) checkAndCopy() (tracker.Config, tracker.ProgressMap, error) { - cfg := c.Tracker.Config.Clone() - prs := tracker.ProgressMap{} - - for id, pr := range c.Tracker.Progress { - // A shallow copy is enough because we only mutate the Learner field. - ppr := *pr - prs[id] = &ppr - } - return checkAndReturn(cfg, prs) -} - -// checkAndReturn calls checkInvariants on the input and returns either the -// resulting error or the input. -func checkAndReturn(cfg tracker.Config, prs tracker.ProgressMap) (tracker.Config, tracker.ProgressMap, error) { - if err := checkInvariants(cfg, prs); err != nil { - return tracker.Config{}, tracker.ProgressMap{}, err - } - return cfg, prs, nil -} - -// err returns zero values and an error. -func (c Changer) err(err error) (tracker.Config, tracker.ProgressMap, error) { - return tracker.Config{}, nil, err -} - -// nilAwareAdd populates a map entry, creating the map if necessary. -func nilAwareAdd(m *map[uint64]struct{}, id uint64) { - if *m == nil { - *m = map[uint64]struct{}{} - } - (*m)[id] = struct{}{} -} - -// nilAwareDelete deletes from a map, nil'ing the map itself if it is empty after. -func nilAwareDelete(m *map[uint64]struct{}, id uint64) { - if *m == nil { - return - } - delete(*m, id) - if len(*m) == 0 { - *m = nil - } -} - -// symdiff returns the count of the symmetric difference between the sets of -// uint64s, i.e. len( (l - r) \union (r - l)). -func symdiff(l, r map[uint64]struct{}) int { - var n int - pairs := [][2]quorum.MajorityConfig{ - {l, r}, // count elems in l but not in r - {r, l}, // count elems in r but not in l - } - for _, p := range pairs { - for id := range p[0] { - if _, ok := p[1][id]; !ok { - n++ - } - } - } - return n -} - -func joint(cfg tracker.Config) bool { - return len(outgoing(cfg.Voters)) > 0 -} - -func incoming(voters quorum.JointConfig) quorum.MajorityConfig { return voters[0] } -func outgoing(voters quorum.JointConfig) quorum.MajorityConfig { return voters[1] } -func outgoingPtr(voters *quorum.JointConfig) *quorum.MajorityConfig { return &voters[1] } - -// Describe prints the type and NodeID of the configuration changes as a -// space-delimited string. -func Describe(ccs ...pb.ConfChangeSingle) string { - var buf strings.Builder - for _, cc := range ccs { - if buf.Len() > 0 { - buf.WriteByte(' ') - } - fmt.Fprintf(&buf, "%s(%d)", cc.Type, cc.NodeID) - } - return buf.String() -} diff --git a/raft/confchange/datadriven_test.go b/raft/confchange/datadriven_test.go deleted file mode 100644 index f179f1f43f87..000000000000 --- a/raft/confchange/datadriven_test.go +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package confchange - -import ( - "errors" - "fmt" - "strconv" - "strings" - "testing" - - "github.com/cockroachdb/datadriven" - pb "go.etcd.io/etcd/raft/v3/raftpb" - "go.etcd.io/etcd/raft/v3/tracker" -) - -func TestConfChangeDataDriven(t *testing.T) { - datadriven.Walk(t, "testdata", func(t *testing.T, path string) { - tr := tracker.MakeProgressTracker(10, 0) - c := Changer{ - Tracker: tr, - LastIndex: 0, // incremented in this test with each cmd - } - - // The test files use the commands - // - simple: run a simple conf change (i.e. no joint consensus), - // - enter-joint: enter a joint config, and - // - leave-joint: leave a joint config. - // The first two take a list of config changes, which have the following - // syntax: - // - vn: make n a voter, - // - ln: make n a learner, - // - rn: remove n, and - // - un: update n. - datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string { - defer func() { - c.LastIndex++ - }() - var ccs []pb.ConfChangeSingle - toks := strings.Split(strings.TrimSpace(d.Input), " ") - if toks[0] == "" { - toks = nil - } - for _, tok := range toks { - if len(tok) < 2 { - return fmt.Sprintf("unknown token %s", tok) - } - var cc pb.ConfChangeSingle - switch tok[0] { - case 'v': - cc.Type = pb.ConfChangeAddNode - case 'l': - cc.Type = pb.ConfChangeAddLearnerNode - case 'r': - cc.Type = pb.ConfChangeRemoveNode - case 'u': - cc.Type = pb.ConfChangeUpdateNode - default: - return fmt.Sprintf("unknown input: %s", tok) - } - id, err := strconv.ParseUint(tok[1:], 10, 64) - if err != nil { - return err.Error() - } - cc.NodeID = id - ccs = append(ccs, cc) - } - - var cfg tracker.Config - var prs tracker.ProgressMap - var err error - switch d.Cmd { - case "simple": - cfg, prs, err = c.Simple(ccs...) - case "enter-joint": - var autoLeave bool - if len(d.CmdArgs) > 0 { - d.ScanArgs(t, "autoleave", &autoLeave) - } - cfg, prs, err = c.EnterJoint(autoLeave, ccs...) - case "leave-joint": - if len(ccs) > 0 { - err = errors.New("this command takes no input") - } else { - cfg, prs, err = c.LeaveJoint() - } - default: - return "unknown command" - } - if err != nil { - return err.Error() + "\n" - } - c.Tracker.Config, c.Tracker.Progress = cfg, prs - return fmt.Sprintf("%s\n%s", c.Tracker.Config, c.Tracker.Progress) - }) - }) -} diff --git a/raft/confchange/quick_test.go b/raft/confchange/quick_test.go deleted file mode 100644 index 76018f634d5a..000000000000 --- a/raft/confchange/quick_test.go +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package confchange - -import ( - "fmt" - "math/rand" - "reflect" - "testing" - "testing/quick" - - pb "go.etcd.io/etcd/raft/v3/raftpb" - "go.etcd.io/etcd/raft/v3/tracker" -) - -// TestConfChangeQuick uses quickcheck to verify that simple and joint config -// changes arrive at the same result. -func TestConfChangeQuick(t *testing.T) { - cfg := &quick.Config{ - MaxCount: 1000, - } - - // Log the first couple of runs to give some indication of things working - // as intended. - const infoCount = 5 - - runWithJoint := func(c *Changer, ccs []pb.ConfChangeSingle) error { - cfg, prs, err := c.EnterJoint(false /* autoLeave */, ccs...) - if err != nil { - return err - } - // Also do this with autoLeave on, just to check that we'd get the same - // result. - cfg2a, prs2a, err := c.EnterJoint(true /* autoLeave */, ccs...) - if err != nil { - return err - } - cfg2a.AutoLeave = false - if !reflect.DeepEqual(cfg, cfg2a) || !reflect.DeepEqual(prs, prs2a) { - return fmt.Errorf("cfg: %+v\ncfg2a: %+v\nprs: %+v\nprs2a: %+v", - cfg, cfg2a, prs, prs2a) - } - c.Tracker.Config = cfg - c.Tracker.Progress = prs - cfg2b, prs2b, err := c.LeaveJoint() - if err != nil { - return err - } - // Reset back to the main branch with autoLeave=false. - c.Tracker.Config = cfg - c.Tracker.Progress = prs - cfg, prs, err = c.LeaveJoint() - if err != nil { - return err - } - if !reflect.DeepEqual(cfg, cfg2b) || !reflect.DeepEqual(prs, prs2b) { - return fmt.Errorf("cfg: %+v\ncfg2b: %+v\nprs: %+v\nprs2b: %+v", - cfg, cfg2b, prs, prs2b) - } - c.Tracker.Config = cfg - c.Tracker.Progress = prs - return nil - } - - runWithSimple := func(c *Changer, ccs []pb.ConfChangeSingle) error { - for _, cc := range ccs { - cfg, prs, err := c.Simple(cc) - if err != nil { - return err - } - c.Tracker.Config, c.Tracker.Progress = cfg, prs - } - return nil - } - - type testFunc func(*Changer, []pb.ConfChangeSingle) error - - wrapper := func(invoke testFunc) func(setup initialChanges, ccs confChanges) (*Changer, error) { - return func(setup initialChanges, ccs confChanges) (*Changer, error) { - tr := tracker.MakeProgressTracker(10, 0) - c := &Changer{ - Tracker: tr, - LastIndex: 10, - } - - if err := runWithSimple(c, setup); err != nil { - return nil, err - } - - err := invoke(c, ccs) - return c, err - } - } - - var n int - f1 := func(setup initialChanges, ccs confChanges) *Changer { - c, err := wrapper(runWithSimple)(setup, ccs) - if err != nil { - t.Fatal(err) - } - if n < infoCount { - t.Log("initial setup:", Describe(setup...)) - t.Log("changes:", Describe(ccs...)) - t.Log(c.Tracker.Config) - t.Log(c.Tracker.Progress) - } - n++ - return c - } - f2 := func(setup initialChanges, ccs confChanges) *Changer { - c, err := wrapper(runWithJoint)(setup, ccs) - if err != nil { - t.Fatal(err) - } - return c - } - err := quick.CheckEqual(f1, f2, cfg) - if err == nil { - return - } - cErr, ok := err.(*quick.CheckEqualError) - if !ok { - t.Fatal(err) - } - - t.Error("setup:", Describe(cErr.In[0].([]pb.ConfChangeSingle)...)) - t.Error("ccs:", Describe(cErr.In[1].([]pb.ConfChangeSingle)...)) - t.Errorf("out1: %+v\nout2: %+v", cErr.Out1, cErr.Out2) -} - -type confChangeTyp pb.ConfChangeType - -func (confChangeTyp) Generate(rand *rand.Rand, _ int) reflect.Value { - return reflect.ValueOf(confChangeTyp(rand.Intn(4))) -} - -type confChanges []pb.ConfChangeSingle - -func genCC(num func() int, id func() uint64, typ func() pb.ConfChangeType) []pb.ConfChangeSingle { - var ccs []pb.ConfChangeSingle - n := num() - for i := 0; i < n; i++ { - ccs = append(ccs, pb.ConfChangeSingle{Type: typ(), NodeID: id()}) - } - return ccs -} - -func (confChanges) Generate(rand *rand.Rand, _ int) reflect.Value { - num := func() int { - return 1 + rand.Intn(9) - } - id := func() uint64 { - // Note that num() >= 1, so we're never returning 1 from this method, - // meaning that we'll never touch NodeID one, which is special to avoid - // voterless configs altogether in this test. - return 1 + uint64(num()) - } - typ := func() pb.ConfChangeType { - return pb.ConfChangeType(rand.Intn(len(pb.ConfChangeType_name))) - } - return reflect.ValueOf(genCC(num, id, typ)) -} - -type initialChanges []pb.ConfChangeSingle - -func (initialChanges) Generate(rand *rand.Rand, _ int) reflect.Value { - num := func() int { - return 1 + rand.Intn(5) - } - id := func() uint64 { return uint64(num()) } - typ := func() pb.ConfChangeType { - return pb.ConfChangeAddNode - } - // NodeID one is special - it's in the initial config and will be a voter - // always (this is to avoid uninteresting edge cases where the simple conf - // changes can't easily make progress). - ccs := append([]pb.ConfChangeSingle{{Type: pb.ConfChangeAddNode, NodeID: 1}}, genCC(num, id, typ)...) - return reflect.ValueOf(ccs) -} diff --git a/raft/confchange/restore.go b/raft/confchange/restore.go deleted file mode 100644 index ea317fc289a8..000000000000 --- a/raft/confchange/restore.go +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package confchange - -import ( - pb "go.etcd.io/etcd/raft/v3/raftpb" - "go.etcd.io/etcd/raft/v3/tracker" -) - -// toConfChangeSingle translates a conf state into 1) a slice of operations creating -// first the config that will become the outgoing one, and then the incoming one, and -// b) another slice that, when applied to the config resulted from 1), represents the -// ConfState. -func toConfChangeSingle(cs pb.ConfState) (out []pb.ConfChangeSingle, in []pb.ConfChangeSingle) { - // Example to follow along this code: - // voters=(1 2 3) learners=(5) outgoing=(1 2 4 6) learners_next=(4) - // - // This means that before entering the joint config, the configuration - // had voters (1 2 4 6) and perhaps some learners that are already gone. - // The new set of voters is (1 2 3), i.e. (1 2) were kept around, and (4 6) - // are no longer voters; however 4 is poised to become a learner upon leaving - // the joint state. - // We can't tell whether 5 was a learner before entering the joint config, - // but it doesn't matter (we'll pretend that it wasn't). - // - // The code below will construct - // outgoing = add 1; add 2; add 4; add 6 - // incoming = remove 1; remove 2; remove 4; remove 6 - // add 1; add 2; add 3; - // add-learner 5; - // add-learner 4; - // - // So, when starting with an empty config, after applying 'outgoing' we have - // - // quorum=(1 2 4 6) - // - // From which we enter a joint state via 'incoming' - // - // quorum=(1 2 3)&&(1 2 4 6) learners=(5) learners_next=(4) - // - // as desired. - - for _, id := range cs.VotersOutgoing { - // If there are outgoing voters, first add them one by one so that the - // (non-joint) config has them all. - out = append(out, pb.ConfChangeSingle{ - Type: pb.ConfChangeAddNode, - NodeID: id, - }) - - } - - // We're done constructing the outgoing slice, now on to the incoming one - // (which will apply on top of the config created by the outgoing slice). - - // First, we'll remove all of the outgoing voters. - for _, id := range cs.VotersOutgoing { - in = append(in, pb.ConfChangeSingle{ - Type: pb.ConfChangeRemoveNode, - NodeID: id, - }) - } - // Then we'll add the incoming voters and learners. - for _, id := range cs.Voters { - in = append(in, pb.ConfChangeSingle{ - Type: pb.ConfChangeAddNode, - NodeID: id, - }) - } - for _, id := range cs.Learners { - in = append(in, pb.ConfChangeSingle{ - Type: pb.ConfChangeAddLearnerNode, - NodeID: id, - }) - } - // Same for LearnersNext; these are nodes we want to be learners but which - // are currently voters in the outgoing config. - for _, id := range cs.LearnersNext { - in = append(in, pb.ConfChangeSingle{ - Type: pb.ConfChangeAddLearnerNode, - NodeID: id, - }) - } - return out, in -} - -func chain(chg Changer, ops ...func(Changer) (tracker.Config, tracker.ProgressMap, error)) (tracker.Config, tracker.ProgressMap, error) { - for _, op := range ops { - cfg, prs, err := op(chg) - if err != nil { - return tracker.Config{}, nil, err - } - chg.Tracker.Config = cfg - chg.Tracker.Progress = prs - } - return chg.Tracker.Config, chg.Tracker.Progress, nil -} - -// Restore takes a Changer (which must represent an empty configuration), and -// runs a sequence of changes enacting the configuration described in the -// ConfState. -// -// TODO(tbg) it's silly that this takes a Changer. Unravel this by making sure -// the Changer only needs a ProgressMap (not a whole Tracker) at which point -// this can just take LastIndex and MaxInflight directly instead and cook up -// the results from that alone. -func Restore(chg Changer, cs pb.ConfState) (tracker.Config, tracker.ProgressMap, error) { - outgoing, incoming := toConfChangeSingle(cs) - - var ops []func(Changer) (tracker.Config, tracker.ProgressMap, error) - - if len(outgoing) == 0 { - // No outgoing config, so just apply the incoming changes one by one. - for _, cc := range incoming { - cc := cc // loop-local copy - ops = append(ops, func(chg Changer) (tracker.Config, tracker.ProgressMap, error) { - return chg.Simple(cc) - }) - } - } else { - // The ConfState describes a joint configuration. - // - // First, apply all of the changes of the outgoing config one by one, so - // that it temporarily becomes the incoming active config. For example, - // if the config is (1 2 3)&(2 3 4), this will establish (2 3 4)&(). - for _, cc := range outgoing { - cc := cc // loop-local copy - ops = append(ops, func(chg Changer) (tracker.Config, tracker.ProgressMap, error) { - return chg.Simple(cc) - }) - } - // Now enter the joint state, which rotates the above additions into the - // outgoing config, and adds the incoming config in. Continuing the - // example above, we'd get (1 2 3)&(2 3 4), i.e. the incoming operations - // would be removing 2,3,4 and then adding in 1,2,3 while transitioning - // into a joint state. - ops = append(ops, func(chg Changer) (tracker.Config, tracker.ProgressMap, error) { - return chg.EnterJoint(cs.AutoLeave, incoming...) - }) - } - - return chain(chg, ops...) -} diff --git a/raft/confchange/restore_test.go b/raft/confchange/restore_test.go deleted file mode 100644 index ec45e5144ca6..000000000000 --- a/raft/confchange/restore_test.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package confchange - -import ( - "math/rand" - "reflect" - "sort" - "testing" - "testing/quick" - - pb "go.etcd.io/etcd/raft/v3/raftpb" - "go.etcd.io/etcd/raft/v3/tracker" -) - -type rndConfChange pb.ConfState - -// Generate creates a random (valid) ConfState for use with quickcheck. -func (rndConfChange) Generate(rand *rand.Rand, _ int) reflect.Value { - conv := func(sl []int) []uint64 { - // We want IDs but the incoming slice is zero-indexed, so add one to - // each. - out := make([]uint64, len(sl)) - for i := range sl { - out[i] = uint64(sl[i] + 1) - } - return out - } - var cs pb.ConfState - // NB: never generate the empty ConfState, that one should be unit tested. - nVoters := 1 + rand.Intn(5) - - nLearners := rand.Intn(5) - // The number of voters that are in the outgoing config but not in the - // incoming one. (We'll additionally retain a random number of the - // incoming voters below). - nRemovedVoters := rand.Intn(3) - - // Voters, learners, and removed voters must not overlap. A "removed voter" - // is one that we have in the outgoing config but not the incoming one. - ids := conv(rand.Perm(2 * (nVoters + nLearners + nRemovedVoters))) - - cs.Voters = ids[:nVoters] - ids = ids[nVoters:] - - if nLearners > 0 { - cs.Learners = ids[:nLearners] - ids = ids[nLearners:] - } - - // Roll the dice on how many of the incoming voters we decide were also - // previously voters. - // - // NB: this code avoids creating non-nil empty slices (here and below). - nOutgoingRetainedVoters := rand.Intn(nVoters + 1) - if nOutgoingRetainedVoters > 0 || nRemovedVoters > 0 { - cs.VotersOutgoing = append([]uint64(nil), cs.Voters[:nOutgoingRetainedVoters]...) - cs.VotersOutgoing = append(cs.VotersOutgoing, ids[:nRemovedVoters]...) - } - // Only outgoing voters that are not also incoming voters can be in - // LearnersNext (they represent demotions). - if nRemovedVoters > 0 { - if nLearnersNext := rand.Intn(nRemovedVoters + 1); nLearnersNext > 0 { - cs.LearnersNext = ids[:nLearnersNext] - } - } - - cs.AutoLeave = len(cs.VotersOutgoing) > 0 && rand.Intn(2) == 1 - return reflect.ValueOf(rndConfChange(cs)) -} - -func TestRestore(t *testing.T) { - cfg := quick.Config{MaxCount: 1000} - - f := func(cs pb.ConfState) bool { - chg := Changer{ - Tracker: tracker.MakeProgressTracker(20, 0), - LastIndex: 10, - } - cfg, prs, err := Restore(chg, cs) - if err != nil { - t.Error(err) - return false - } - chg.Tracker.Config = cfg - chg.Tracker.Progress = prs - - for _, sl := range [][]uint64{ - cs.Voters, - cs.Learners, - cs.VotersOutgoing, - cs.LearnersNext, - } { - sort.Slice(sl, func(i, j int) bool { return sl[i] < sl[j] }) - } - - cs2 := chg.Tracker.ConfState() - // NB: cs.Equivalent does the same "sorting" dance internally, but let's - // test it a bit here instead of relying on it. - if reflect.DeepEqual(cs, cs2) && cs.Equivalent(cs2) == nil && cs2.Equivalent(cs) == nil { - return true // success - } - t.Errorf(` -before: %+#v -after: %+#v`, cs, cs2) - return false - } - - ids := func(sl ...uint64) []uint64 { - return sl - } - - // Unit tests. - for _, cs := range []pb.ConfState{ - {}, - {Voters: ids(1, 2, 3)}, - {Voters: ids(1, 2, 3), Learners: ids(4, 5, 6)}, - {Voters: ids(1, 2, 3), Learners: ids(5), VotersOutgoing: ids(1, 2, 4, 6), LearnersNext: ids(4)}, - } { - if !f(cs) { - t.FailNow() // f() already logged a nice t.Error() - } - } - - if err := quick.Check(func(cs rndConfChange) bool { - return f(pb.ConfState(cs)) - }, &cfg); err != nil { - t.Error(err) - } -} diff --git a/raft/confchange/testdata/joint_autoleave.txt b/raft/confchange/testdata/joint_autoleave.txt deleted file mode 100644 index 9ec8cb0a4679..000000000000 --- a/raft/confchange/testdata/joint_autoleave.txt +++ /dev/null @@ -1,29 +0,0 @@ -# Test the autoleave argument to EnterJoint. It defaults to false in the -# datadriven tests. The flag has no associated semantics in this package, -# it is simply passed through. -simple -v1 ----- -voters=(1) -1: StateProbe match=0 next=0 - -# Autoleave is reflected in the config. -enter-joint autoleave=true -v2 v3 ----- -voters=(1 2 3)&&(1) autoleave -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=1 -3: StateProbe match=0 next=1 - -# Can't enter-joint twice, even if autoleave changes. -enter-joint autoleave=false ----- -config is already joint - -leave-joint ----- -voters=(1 2 3) -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=1 -3: StateProbe match=0 next=1 diff --git a/raft/confchange/testdata/joint_idempotency.txt b/raft/confchange/testdata/joint_idempotency.txt deleted file mode 100644 index 6d1346b78952..000000000000 --- a/raft/confchange/testdata/joint_idempotency.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Verify that operations upon entering the joint state are idempotent, i.e. -# removing an absent node is fine, etc. - -simple -v1 ----- -voters=(1) -1: StateProbe match=0 next=0 - -enter-joint -r1 r2 r9 v2 v3 v4 v2 v3 v4 l2 l2 r4 r4 l1 l1 ----- -voters=(3)&&(1) learners=(2) learners_next=(1) -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=1 learner -3: StateProbe match=0 next=1 - -leave-joint ----- -voters=(3) learners=(1 2) -1: StateProbe match=0 next=0 learner -2: StateProbe match=0 next=1 learner -3: StateProbe match=0 next=1 diff --git a/raft/confchange/testdata/joint_learners_next.txt b/raft/confchange/testdata/joint_learners_next.txt deleted file mode 100644 index df1da7d0c9f2..000000000000 --- a/raft/confchange/testdata/joint_learners_next.txt +++ /dev/null @@ -1,24 +0,0 @@ -# Verify that when a voter is demoted in a joint config, it will show up in -# learners_next until the joint config is left, and only then will the progress -# turn into that of a learner, without resetting the progress. Note that this -# last fact is verified by `next`, which can tell us which "round" the progress -# was originally created in. - -simple -v1 ----- -voters=(1) -1: StateProbe match=0 next=0 - -enter-joint -v2 l1 ----- -voters=(2)&&(1) learners_next=(1) -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=1 - -leave-joint ----- -voters=(2) learners=(1) -1: StateProbe match=0 next=0 learner -2: StateProbe match=0 next=1 diff --git a/raft/confchange/testdata/joint_safety.txt b/raft/confchange/testdata/joint_safety.txt deleted file mode 100644 index 75d11b199e02..000000000000 --- a/raft/confchange/testdata/joint_safety.txt +++ /dev/null @@ -1,81 +0,0 @@ -leave-joint ----- -can't leave a non-joint config - -enter-joint ----- -can't make a zero-voter config joint - -enter-joint -v1 ----- -can't make a zero-voter config joint - -simple -v1 ----- -voters=(1) -1: StateProbe match=0 next=3 - -leave-joint ----- -can't leave a non-joint config - -# Can enter into joint config. -enter-joint ----- -voters=(1)&&(1) -1: StateProbe match=0 next=3 - -enter-joint ----- -config is already joint - -leave-joint ----- -voters=(1) -1: StateProbe match=0 next=3 - -leave-joint ----- -can't leave a non-joint config - -# Can enter again, this time with some ops. -enter-joint -r1 v2 v3 l4 ----- -voters=(2 3)&&(1) learners=(4) -1: StateProbe match=0 next=3 -2: StateProbe match=0 next=9 -3: StateProbe match=0 next=9 -4: StateProbe match=0 next=9 learner - -enter-joint ----- -config is already joint - -enter-joint -v12 ----- -config is already joint - -simple -l15 ----- -can't apply simple config change in joint config - -leave-joint ----- -voters=(2 3) learners=(4) -2: StateProbe match=0 next=9 -3: StateProbe match=0 next=9 -4: StateProbe match=0 next=9 learner - -simple -l9 ----- -voters=(2 3) learners=(4 9) -2: StateProbe match=0 next=9 -3: StateProbe match=0 next=9 -4: StateProbe match=0 next=9 learner -9: StateProbe match=0 next=14 learner diff --git a/raft/confchange/testdata/simple_idempotency.txt b/raft/confchange/testdata/simple_idempotency.txt deleted file mode 100644 index 2f7ca2e247bb..000000000000 --- a/raft/confchange/testdata/simple_idempotency.txt +++ /dev/null @@ -1,69 +0,0 @@ -simple -v1 ----- -voters=(1) -1: StateProbe match=0 next=0 - -simple -v1 ----- -voters=(1) -1: StateProbe match=0 next=0 - -simple -v2 ----- -voters=(1 2) -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=2 - -simple -l1 ----- -voters=(2) learners=(1) -1: StateProbe match=0 next=0 learner -2: StateProbe match=0 next=2 - -simple -l1 ----- -voters=(2) learners=(1) -1: StateProbe match=0 next=0 learner -2: StateProbe match=0 next=2 - -simple -r1 ----- -voters=(2) -2: StateProbe match=0 next=2 - -simple -r1 ----- -voters=(2) -2: StateProbe match=0 next=2 - -simple -v3 ----- -voters=(2 3) -2: StateProbe match=0 next=2 -3: StateProbe match=0 next=7 - -simple -r3 ----- -voters=(2) -2: StateProbe match=0 next=2 - -simple -r3 ----- -voters=(2) -2: StateProbe match=0 next=2 - -simple -r4 ----- -voters=(2) -2: StateProbe match=0 next=2 diff --git a/raft/confchange/testdata/simple_promote_demote.txt b/raft/confchange/testdata/simple_promote_demote.txt deleted file mode 100644 index 52369b450e31..000000000000 --- a/raft/confchange/testdata/simple_promote_demote.txt +++ /dev/null @@ -1,60 +0,0 @@ -# Set up three voters for this test. - -simple -v1 ----- -voters=(1) -1: StateProbe match=0 next=0 - -simple -v2 ----- -voters=(1 2) -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=1 - -simple -v3 ----- -voters=(1 2 3) -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=1 -3: StateProbe match=0 next=2 - -# Can atomically demote and promote without a hitch. -# This is pointless, but possible. -simple -l1 v1 ----- -voters=(1 2 3) -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=1 -3: StateProbe match=0 next=2 - -# Can demote a voter. -simple -l2 ----- -voters=(1 3) learners=(2) -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=1 learner -3: StateProbe match=0 next=2 - -# Can atomically promote and demote the same voter. -# This is pointless, but possible. -simple -v2 l2 ----- -voters=(1 3) learners=(2) -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=1 learner -3: StateProbe match=0 next=2 - -# Can promote a voter. -simple -v2 ----- -voters=(1 2 3) -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=1 -3: StateProbe match=0 next=2 diff --git a/raft/confchange/testdata/simple_safety.txt b/raft/confchange/testdata/simple_safety.txt deleted file mode 100644 index 6566c5fccf7d..000000000000 --- a/raft/confchange/testdata/simple_safety.txt +++ /dev/null @@ -1,64 +0,0 @@ -simple -l1 ----- -removed all voters - -simple -v1 ----- -voters=(1) -1: StateProbe match=0 next=1 - -simple -v2 l3 ----- -voters=(1 2) learners=(3) -1: StateProbe match=0 next=1 -2: StateProbe match=0 next=2 -3: StateProbe match=0 next=2 learner - -simple -r1 v5 ----- -more than one voter changed without entering joint config - -simple -r1 r2 ----- -removed all voters - -simple -v3 v4 ----- -more than one voter changed without entering joint config - -simple -l1 v5 ----- -more than one voter changed without entering joint config - -simple -l1 l2 ----- -removed all voters - -simple -l2 l3 l4 l5 ----- -voters=(1) learners=(2 3 4 5) -1: StateProbe match=0 next=1 -2: StateProbe match=0 next=2 learner -3: StateProbe match=0 next=2 learner -4: StateProbe match=0 next=8 learner -5: StateProbe match=0 next=8 learner - -simple -r1 ----- -removed all voters - -simple -r2 r3 r4 r5 ----- -voters=(1) -1: StateProbe match=0 next=1 diff --git a/raft/confchange/testdata/update.txt b/raft/confchange/testdata/update.txt deleted file mode 100644 index 50a703ccf1d2..000000000000 --- a/raft/confchange/testdata/update.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Nobody cares about ConfChangeUpdateNode, but at least use it once. It is used -# by etcd as a convenient way to pass a blob through their conf change machinery -# that updates information tracked outside of raft. - -simple -v1 ----- -voters=(1) -1: StateProbe match=0 next=0 - -simple -v2 u1 ----- -voters=(1 2) -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=1 - -simple -u1 u2 u3 u1 u2 u3 ----- -voters=(1 2) -1: StateProbe match=0 next=0 -2: StateProbe match=0 next=1 diff --git a/raft/confchange/testdata/zero.txt b/raft/confchange/testdata/zero.txt deleted file mode 100644 index 5e0d46fe6b6d..000000000000 --- a/raft/confchange/testdata/zero.txt +++ /dev/null @@ -1,6 +0,0 @@ -# NodeID zero is ignored. -simple -v1 r0 v0 l0 ----- -voters=(1) -1: StateProbe match=0 next=0 diff --git a/raft/design.md b/raft/design.md deleted file mode 100644 index 7bc0531dce6f..000000000000 --- a/raft/design.md +++ /dev/null @@ -1,57 +0,0 @@ -## Progress - -Progress represents a follower’s progress in the view of the leader. Leader maintains progresses of all followers, and sends `replication message` to the follower based on its progress. - -`replication message` is a `msgApp` with log entries. - -A progress has two attribute: `match` and `next`. `match` is the index of the highest known matched entry. If leader knows nothing about follower’s replication status, `match` is set to zero. `next` is the index of the first entry that will be replicated to the follower. Leader puts entries from `next` to its latest one in next `replication message`. - -A progress is in one of the three state: `probe`, `replicate`, `snapshot`. - -``` - +--------------------------------------------------------+ - | send snapshot | - | | - +---------+----------+ +----------v---------+ - +---> probe | | snapshot | - | | max inflight = 1 <----------------------------------+ max inflight = 0 | - | +---------+----------+ +--------------------+ - | | 1. snapshot success - | | (next=snapshot.index + 1) - | | 2. snapshot failure - | | (no change) - | | 3. receives msgAppResp(rej=false&&index>lastsnap.index) - | | (match=m.index,next=match+1) -receives msgAppResp(rej=true) -(next=match+1)| | - | | - | | - | | receives msgAppResp(rej=false&&index>match) - | | (match=m.index,next=match+1) - | | - | | - | | - | +---------v----------+ - | | replicate | - +---+ max inflight = n | - +--------------------+ -``` - -When the progress of a follower is in `probe` state, leader sends at most one `replication message` per heartbeat interval. The leader sends `replication message` slowly and probing the actual progress of the follower. A `msgHeartbeatResp` or a `msgAppResp` with reject might trigger the sending of the next `replication message`. - -When the progress of a follower is in `replicate` state, leader sends `replication message`, then optimistically increases `next` to the latest entry sent. This is an optimized state for fast replicating log entries to the follower. - -When the progress of a follower is in `snapshot` state, leader stops sending any `replication message`. - -A newly elected leader sets the progresses of all the followers to `probe` state with `match` = 0 and `next` = last index. The leader slowly (at most once per heartbeat) sends `replication message` to the follower and probes its progress. - -A progress changes to `replicate` when the follower replies with a non-rejection `msgAppResp`, which implies that it has matched the index sent. At this point, leader starts to stream log entries to the follower fast. The progress will fall back to `probe` when the follower replies a rejection `msgAppResp` or the link layer reports the follower is unreachable. We aggressively reset `next` to `match`+1 since if we receive any `msgAppResp` soon, both `match` and `next` will increase directly to the `index` in `msgAppResp`. (We might end up with sending some duplicate entries when aggressively reset `next` too low. see open question) - -A progress changes from `probe` to `snapshot` when the follower falls very far behind and requires a snapshot. After sending `msgSnap`, the leader waits until the success, failure or abortion of the previous snapshot sent. The progress will go back to `probe` after the sending result is applied. - -### Flow Control - -1. limit the max size of message sent per message. Max should be configurable. -Lower the cost at probing state as we limit the size per message; lower the penalty when aggressively decreased to a too low `next` - -2. limit the # of in flight messages < N when in `replicate` state. N should be configurable. Most implementation will have a sending buffer on top of its actual network transport layer (not blocking raft node). We want to make sure raft does not overflow that buffer, which can cause message dropping and triggering a bunch of unnecessary resending repeatedly. diff --git a/raft/diff_test.go b/raft/diff_test.go deleted file mode 100644 index 6030527941f4..000000000000 --- a/raft/diff_test.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "fmt" - "io" - "os" - "os/exec" - "strings" -) - -func diffu(a, b string) string { - if a == b { - return "" - } - aname, bname := mustTemp("base", a), mustTemp("other", b) - defer os.Remove(aname) - defer os.Remove(bname) - cmd := exec.Command("diff", "-u", aname, bname) - buf, err := cmd.CombinedOutput() - if err != nil { - if _, ok := err.(*exec.ExitError); ok { - // do nothing - return string(buf) - } - panic(err) - } - return string(buf) -} - -func mustTemp(pre, body string) string { - f, err := os.CreateTemp("", pre) - if err != nil { - panic(err) - } - _, err = io.Copy(f, strings.NewReader(body)) - if err != nil { - panic(err) - } - f.Close() - return f.Name() -} - -func ltoa(l *raftLog) string { - s := fmt.Sprintf("lastIndex: %d\n", l.lastIndex()) - s += fmt.Sprintf("applied: %d\n", l.applied) - for i, e := range l.allEntries() { - s += fmt.Sprintf("#%d: %+v\n", i, e) - } - return s -} diff --git a/raft/doc.go b/raft/doc.go deleted file mode 100644 index 4febfe6084b8..000000000000 --- a/raft/doc.go +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* -Package raft sends and receives messages in the Protocol Buffer format -defined in the raftpb package. - -Raft is a protocol with which a cluster of nodes can maintain a replicated state machine. -The state machine is kept in sync through the use of a replicated log. -For more details on Raft, see "In Search of an Understandable Consensus Algorithm" -(https://raft.github.io/raft.pdf) by Diego Ongaro and John Ousterhout. - -A simple example application, _raftexample_, is also available to help illustrate -how to use this package in practice: -https://github.com/etcd-io/etcd/tree/main/contrib/raftexample - -# Usage - -The primary object in raft is a Node. You either start a Node from scratch -using raft.StartNode or start a Node from some initial state using raft.RestartNode. - -To start a node from scratch: - - storage := raft.NewMemoryStorage() - c := &Config{ - ID: 0x01, - ElectionTick: 10, - HeartbeatTick: 1, - Storage: storage, - MaxSizePerMsg: 4096, - MaxInflightMsgs: 256, - } - n := raft.StartNode(c, []raft.Peer{{ID: 0x02}, {ID: 0x03}}) - -To restart a node from previous state: - - storage := raft.NewMemoryStorage() - - // recover the in-memory storage from persistent - // snapshot, state and entries. - storage.ApplySnapshot(snapshot) - storage.SetHardState(state) - storage.Append(entries) - - c := &Config{ - ID: 0x01, - ElectionTick: 10, - HeartbeatTick: 1, - Storage: storage, - MaxSizePerMsg: 4096, - MaxInflightMsgs: 256, - } - - // restart raft without peer information. - // peer information is already included in the storage. - n := raft.RestartNode(c) - -Now that you are holding onto a Node you have a few responsibilities: - -First, you must read from the Node.Ready() channel and process the updates -it contains. These steps may be performed in parallel, except as noted in step -2. - -1. Write HardState, Entries, and Snapshot to persistent storage if they are -not empty. Note that when writing an Entry with Index i, any -previously-persisted entries with Index >= i must be discarded. - -2. Send all Messages to the nodes named in the To field. It is important that -no messages be sent until the latest HardState has been persisted to disk, -and all Entries written by any previous Ready batch (Messages may be sent while -entries from the same batch are being persisted). To reduce the I/O latency, an -optimization can be applied to make leader write to disk in parallel with its -followers (as explained at section 10.2.1 in Raft thesis). If any Message has type -MsgSnap, call Node.ReportSnapshot() after it has been sent (these messages may be -large). - -Note: Marshalling messages is not thread-safe; it is important that you -make sure that no new entries are persisted while marshalling. -The easiest way to achieve this is to serialize the messages directly inside -your main raft loop. - -3. Apply Snapshot (if any) and CommittedEntries to the state machine. -If any committed Entry has Type EntryConfChange, call Node.ApplyConfChange() -to apply it to the node. The configuration change may be cancelled at this point -by setting the NodeID field to zero before calling ApplyConfChange -(but ApplyConfChange must be called one way or the other, and the decision to cancel -must be based solely on the state machine and not external information such as -the observed health of the node). - -4. Call Node.Advance() to signal readiness for the next batch of updates. -This may be done at any time after step 1, although all updates must be processed -in the order they were returned by Ready. - -Second, all persisted log entries must be made available via an -implementation of the Storage interface. The provided MemoryStorage -type can be used for this (if you repopulate its state upon a -restart), or you can supply your own disk-backed implementation. - -Third, when you receive a message from another node, pass it to Node.Step: - - func recvRaftRPC(ctx context.Context, m raftpb.Message) { - n.Step(ctx, m) - } - -Finally, you need to call Node.Tick() at regular intervals (probably -via a time.Ticker). Raft has two important timeouts: heartbeat and the -election timeout. However, internally to the raft package time is -represented by an abstract "tick". - -The total state machine handling loop will look something like this: - - for { - select { - case <-s.Ticker: - n.Tick() - case rd := <-s.Node.Ready(): - saveToStorage(rd.State, rd.Entries, rd.Snapshot) - send(rd.Messages) - if !raft.IsEmptySnap(rd.Snapshot) { - processSnapshot(rd.Snapshot) - } - for _, entry := range rd.CommittedEntries { - process(entry) - if entry.Type == raftpb.EntryConfChange { - var cc raftpb.ConfChange - cc.Unmarshal(entry.Data) - s.Node.ApplyConfChange(cc) - } - } - s.Node.Advance() - case <-s.done: - return - } - } - -To propose changes to the state machine from your node take your application -data, serialize it into a byte slice and call: - - n.Propose(ctx, data) - -If the proposal is committed, data will appear in committed entries with type -raftpb.EntryNormal. There is no guarantee that a proposed command will be -committed; you may have to re-propose after a timeout. - -To add or remove a node in a cluster, build ConfChange struct 'cc' and call: - - n.ProposeConfChange(ctx, cc) - -After config change is committed, some committed entry with type -raftpb.EntryConfChange will be returned. You must apply it to node through: - - var cc raftpb.ConfChange - cc.Unmarshal(data) - n.ApplyConfChange(cc) - -Note: An ID represents a unique node in a cluster for all time. A -given ID MUST be used only once even if the old node has been removed. -This means that for example IP addresses make poor node IDs since they -may be reused. Node IDs must be non-zero. - -# Implementation notes - -This implementation is up to date with the final Raft thesis -(https://github.com/ongardie/dissertation/blob/master/stanford.pdf), although our -implementation of the membership change protocol differs somewhat from -that described in chapter 4. The key invariant that membership changes -happen one node at a time is preserved, but in our implementation the -membership change takes effect when its entry is applied, not when it -is added to the log (so the entry is committed under the old -membership instead of the new). This is equivalent in terms of safety, -since the old and new configurations are guaranteed to overlap. - -To ensure that we do not attempt to commit two membership changes at -once by matching log positions (which would be unsafe since they -should have different quorum requirements), we simply disallow any -proposed membership change while any uncommitted change appears in -the leader's log. - -This approach introduces a problem when you try to remove a member -from a two-member cluster: If one of the members dies before the -other one receives the commit of the confchange entry, then the member -cannot be removed any more since the cluster cannot make progress. -For this reason it is highly recommended to use three or more nodes in -every cluster. - -# MessageType - -Package raft sends and receives message in Protocol Buffer format (defined -in raftpb package). Each state (follower, candidate, leader) implements its -own 'step' method ('stepFollower', 'stepCandidate', 'stepLeader') when -advancing with the given raftpb.Message. Each step is determined by its -raftpb.MessageType. Note that every step is checked by one common method -'Step' that safety-checks the terms of node and incoming message to prevent -stale log entries: - - 'MsgHup' is used for election. If a node is a follower or candidate, the - 'tick' function in 'raft' struct is set as 'tickElection'. If a follower or - candidate has not received any heartbeat before the election timeout, it - passes 'MsgHup' to its Step method and becomes (or remains) a candidate to - start a new election. - - 'MsgBeat' is an internal type that signals the leader to send a heartbeat of - the 'MsgHeartbeat' type. If a node is a leader, the 'tick' function in - the 'raft' struct is set as 'tickHeartbeat', and triggers the leader to - send periodic 'MsgHeartbeat' messages to its followers. - - 'MsgProp' proposes to append data to its log entries. This is a special - type to redirect proposals to leader. Therefore, send method overwrites - raftpb.Message's term with its HardState's term to avoid attaching its - local term to 'MsgProp'. When 'MsgProp' is passed to the leader's 'Step' - method, the leader first calls the 'appendEntry' method to append entries - to its log, and then calls 'bcastAppend' method to send those entries to - its peers. When passed to candidate, 'MsgProp' is dropped. When passed to - follower, 'MsgProp' is stored in follower's mailbox(msgs) by the send - method. It is stored with sender's ID and later forwarded to leader by - rafthttp package. - - 'MsgApp' contains log entries to replicate. A leader calls bcastAppend, - which calls sendAppend, which sends soon-to-be-replicated logs in 'MsgApp' - type. When 'MsgApp' is passed to candidate's Step method, candidate reverts - back to follower, because it indicates that there is a valid leader sending - 'MsgApp' messages. Candidate and follower respond to this message in - 'MsgAppResp' type. - - 'MsgAppResp' is response to log replication request('MsgApp'). When - 'MsgApp' is passed to candidate or follower's Step method, it responds by - calling 'handleAppendEntries' method, which sends 'MsgAppResp' to raft - mailbox. - - 'MsgVote' requests votes for election. When a node is a follower or - candidate and 'MsgHup' is passed to its Step method, then the node calls - 'campaign' method to campaign itself to become a leader. Once 'campaign' - method is called, the node becomes candidate and sends 'MsgVote' to peers - in cluster to request votes. When passed to leader or candidate's Step - method and the message's Term is lower than leader's or candidate's, - 'MsgVote' will be rejected ('MsgVoteResp' is returned with Reject true). - If leader or candidate receives 'MsgVote' with higher term, it will revert - back to follower. When 'MsgVote' is passed to follower, it votes for the - sender only when sender's last term is greater than MsgVote's term or - sender's last term is equal to MsgVote's term but sender's last committed - index is greater than or equal to follower's. - - 'MsgVoteResp' contains responses from voting request. When 'MsgVoteResp' is - passed to candidate, the candidate calculates how many votes it has won. If - it's more than majority (quorum), it becomes leader and calls 'bcastAppend'. - If candidate receives majority of votes of denials, it reverts back to - follower. - - 'MsgPreVote' and 'MsgPreVoteResp' are used in an optional two-phase election - protocol. When Config.PreVote is true, a pre-election is carried out first - (using the same rules as a regular election), and no node increases its term - number unless the pre-election indicates that the campaigning node would win. - This minimizes disruption when a partitioned node rejoins the cluster. - - 'MsgSnap' requests to install a snapshot message. When a node has just - become a leader or the leader receives 'MsgProp' message, it calls - 'bcastAppend' method, which then calls 'sendAppend' method to each - follower. In 'sendAppend', if a leader fails to get term or entries, - the leader requests snapshot by sending 'MsgSnap' type message. - - 'MsgSnapStatus' tells the result of snapshot install message. When a - follower rejected 'MsgSnap', it indicates the snapshot request with - 'MsgSnap' had failed from network issues which causes the network layer - to fail to send out snapshots to its followers. Then leader considers - follower's progress as probe. When 'MsgSnap' were not rejected, it - indicates that the snapshot succeeded and the leader sets follower's - progress to probe and resumes its log replication. - - 'MsgHeartbeat' sends heartbeat from leader. When 'MsgHeartbeat' is passed - to candidate and message's term is higher than candidate's, the candidate - reverts back to follower and updates its committed index from the one in - this heartbeat. And it sends the message to its mailbox. When - 'MsgHeartbeat' is passed to follower's Step method and message's term is - higher than follower's, the follower updates its leaderID with the ID - from the message. - - 'MsgHeartbeatResp' is a response to 'MsgHeartbeat'. When 'MsgHeartbeatResp' - is passed to leader's Step method, the leader knows which follower - responded. And only when the leader's last committed index is greater than - follower's Match index, the leader runs 'sendAppend` method. - - 'MsgUnreachable' tells that request(message) wasn't delivered. When - 'MsgUnreachable' is passed to leader's Step method, the leader discovers - that the follower that sent this 'MsgUnreachable' is not reachable, often - indicating 'MsgApp' is lost. When follower's progress state is replicate, - the leader sets it back to probe. -*/ -package raft diff --git a/raft/example_test.go b/raft/example_test.go deleted file mode 100644 index 51c1689245e5..000000000000 --- a/raft/example_test.go +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -func applyToStore(ents []pb.Entry) {} -func sendMessages(msgs []pb.Message) {} -func saveStateToDisk(st pb.HardState) {} -func saveToDisk(ents []pb.Entry) {} - -func ExampleNode() { - c := &Config{} - n := StartNode(c, nil) - defer n.Stop() - - // stuff to n happens in other goroutines - - // the last known state - var prev pb.HardState - for { - // Ready blocks until there is new state ready. - rd := <-n.Ready() - if !isHardStateEqual(prev, rd.HardState) { - saveStateToDisk(rd.HardState) - prev = rd.HardState - } - - saveToDisk(rd.Entries) - go applyToStore(rd.CommittedEntries) - sendMessages(rd.Messages) - } -} diff --git a/raft/go.mod b/raft/go.mod deleted file mode 100644 index 0042f93d2168..000000000000 --- a/raft/go.mod +++ /dev/null @@ -1,34 +0,0 @@ -module go.etcd.io/etcd/raft/v3 - -go 1.19 - -require ( - github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054 // indirect - github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5 - github.com/gogo/protobuf v1.3.2 - github.com/golang/protobuf v1.5.2 - github.com/pkg/errors v0.9.1 // indirect - github.com/stretchr/testify v1.8.1 -) - -require ( - github.com/cockroachdb/errors v1.2.4 // indirect - github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f // indirect - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/getsentry/raven-go v0.2.0 // indirect - github.com/google/go-cmp v0.5.8 // indirect - github.com/kr/pretty v0.2.1 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect - google.golang.org/protobuf v1.27.1 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect -) - -// Bad imports are sometimes causing attempts to pull that code. -// This makes the error more explicit. -replace go.etcd.io/etcd => ./FORBIDDEN_DEPENDENCY - -replace go.etcd.io/etcd/v3 => ./FORBIDDEN_DEPENDENCY - -replace go.etcd.io/etcd/client/pkg/v3 => ./FORBIDDEN_DEPENDENCY - -replace go.etcd.io/etcd/api/v3 => ./FORBIDDEN_DEPENDENCY diff --git a/raft/go.sum b/raft/go.sum deleted file mode 100644 index 864bba87556e..000000000000 --- a/raft/go.sum +++ /dev/null @@ -1,81 +0,0 @@ -github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= -github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054 h1:uH66TXeswKn5PW5zdZ39xEwfS9an067BirqA+P4QaLI= -github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= -github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5 h1:xD/lrqdvwsc+O2bjSSi3YqY73Ke3LAiSCx49aCesA0E= -github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5/go.mod h1:h6jFvWxBdQXxjopDMZyH2UVceIRfR84bdzbkoKrsWNo= -github.com/cockroachdb/errors v1.2.4 h1:Lap807SXTH5tri2TivECb/4abUkMZC9zRoLarvcKDqs= -github.com/cockroachdb/errors v1.2.4/go.mod h1:rQD95gz6FARkaKkQXUksEje/d9a6wBJoCr5oaCLELYA= -github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f h1:o/kfcElHqOiXqcou5a3rIlMc7oJbMQkeLk0VQJ7zgqY= -github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/getsentry/raven-go v0.2.0 h1:no+xWJRb5ZI7eE8TWgIq1jLulQiIoLG0IfYxv5JYMGs= -github.com/getsentry/raven-go v0.2.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= -github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= -github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/raft/interaction_test.go b/raft/interaction_test.go deleted file mode 100644 index 57d0d97d446a..000000000000 --- a/raft/interaction_test.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft_test - -import ( - "testing" - - "github.com/cockroachdb/datadriven" - "go.etcd.io/etcd/raft/v3/rafttest" -) - -func TestInteraction(t *testing.T) { - // NB: if this test fails, run `go test ./raft -rewrite` and inspect the - // diff. Only commit the changes if you understand what caused them and if - // they are desired. - datadriven.Walk(t, "testdata", func(t *testing.T, path string) { - env := rafttest.NewInteractionEnv(nil) - datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string { - return env.Handle(t, *d) - }) - }) -} diff --git a/raft/log.go b/raft/log.go deleted file mode 100644 index 6bf9d153e8cd..000000000000 --- a/raft/log.go +++ /dev/null @@ -1,418 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "fmt" - "log" - - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -type raftLog struct { - // storage contains all stable entries since the last snapshot. - storage Storage - - // unstable contains all unstable entries and snapshot. - // they will be saved into storage. - unstable unstable - - // committed is the highest log position that is known to be in - // stable storage on a quorum of nodes. - committed uint64 - // applied is the highest log position that the application has - // been instructed to apply to its state machine. - // Invariant: applied <= committed - applied uint64 - - logger Logger - - // maxNextCommittedEntsSize is the maximum number aggregate byte size of the - // messages returned from calls to nextCommittedEnts. - maxNextCommittedEntsSize uint64 -} - -// newLog returns log using the given storage and default options. It -// recovers the log to the state that it just commits and applies the -// latest snapshot. -func newLog(storage Storage, logger Logger) *raftLog { - return newLogWithSize(storage, logger, noLimit) -} - -// newLogWithSize returns a log using the given storage and max -// message size. -func newLogWithSize(storage Storage, logger Logger, maxNextCommittedEntsSize uint64) *raftLog { - if storage == nil { - log.Panic("storage must not be nil") - } - log := &raftLog{ - storage: storage, - logger: logger, - maxNextCommittedEntsSize: maxNextCommittedEntsSize, - } - firstIndex, err := storage.FirstIndex() - if err != nil { - panic(err) // TODO(bdarnell) - } - lastIndex, err := storage.LastIndex() - if err != nil { - panic(err) // TODO(bdarnell) - } - log.unstable.offset = lastIndex + 1 - log.unstable.logger = logger - // Initialize our committed and applied pointers to the time of the last compaction. - log.committed = firstIndex - 1 - log.applied = firstIndex - 1 - - return log -} - -func (l *raftLog) String() string { - return fmt.Sprintf("committed=%d, applied=%d, unstable.offset=%d, len(unstable.Entries)=%d", l.committed, l.applied, l.unstable.offset, len(l.unstable.entries)) -} - -// maybeAppend returns (0, false) if the entries cannot be appended. Otherwise, -// it returns (last index of new entries, true). -func (l *raftLog) maybeAppend(index, logTerm, committed uint64, ents ...pb.Entry) (lastnewi uint64, ok bool) { - if l.matchTerm(index, logTerm) { - lastnewi = index + uint64(len(ents)) - ci := l.findConflict(ents) - switch { - case ci == 0: - case ci <= l.committed: - l.logger.Panicf("entry %d conflict with committed entry [committed(%d)]", ci, l.committed) - default: - offset := index + 1 - if ci-offset > uint64(len(ents)) { - l.logger.Panicf("index, %d, is out of range [%d]", ci-offset, len(ents)) - } - l.append(ents[ci-offset:]...) - } - l.commitTo(min(committed, lastnewi)) - return lastnewi, true - } - return 0, false -} - -func (l *raftLog) append(ents ...pb.Entry) uint64 { - if len(ents) == 0 { - return l.lastIndex() - } - if after := ents[0].Index - 1; after < l.committed { - l.logger.Panicf("after(%d) is out of range [committed(%d)]", after, l.committed) - } - l.unstable.truncateAndAppend(ents) - return l.lastIndex() -} - -// findConflict finds the index of the conflict. -// It returns the first pair of conflicting entries between the existing -// entries and the given entries, if there are any. -// If there is no conflicting entries, and the existing entries contains -// all the given entries, zero will be returned. -// If there is no conflicting entries, but the given entries contains new -// entries, the index of the first new entry will be returned. -// An entry is considered to be conflicting if it has the same index but -// a different term. -// The index of the given entries MUST be continuously increasing. -func (l *raftLog) findConflict(ents []pb.Entry) uint64 { - for _, ne := range ents { - if !l.matchTerm(ne.Index, ne.Term) { - if ne.Index <= l.lastIndex() { - l.logger.Infof("found conflict at index %d [existing term: %d, conflicting term: %d]", - ne.Index, l.zeroTermOnErrCompacted(l.term(ne.Index)), ne.Term) - } - return ne.Index - } - } - return 0 -} - -// findConflictByTerm takes an (index, term) pair (indicating a conflicting log -// entry on a leader/follower during an append) and finds the largest index in -// log l with a term <= `term` and an index <= `index`. If no such index exists -// in the log, the log's first index is returned. -// -// The index provided MUST be equal to or less than l.lastIndex(). Invalid -// inputs log a warning and the input index is returned. -func (l *raftLog) findConflictByTerm(index uint64, term uint64) uint64 { - if li := l.lastIndex(); index > li { - // NB: such calls should not exist, but since there is a straightfoward - // way to recover, do it. - // - // It is tempting to also check something about the first index, but - // there is odd behavior with peers that have no log, in which case - // lastIndex will return zero and firstIndex will return one, which - // leads to calls with an index of zero into this method. - l.logger.Warningf("index(%d) is out of range [0, lastIndex(%d)] in findConflictByTerm", - index, li) - return index - } - for { - logTerm, err := l.term(index) - if logTerm <= term || err != nil { - break - } - index-- - } - return index -} - -func (l *raftLog) unstableEntries() []pb.Entry { - if len(l.unstable.entries) == 0 { - return nil - } - return l.unstable.entries -} - -// nextCommittedEnts returns all the available entries for execution. -// If applied is smaller than the index of snapshot, it returns all committed -// entries after the index of snapshot. -func (l *raftLog) nextCommittedEnts() (ents []pb.Entry) { - if l.hasPendingSnapshot() { - // See comment in hasNextCommittedEnts. - return nil - } - if l.committed > l.applied { - lo, hi := l.applied+1, l.committed+1 // [lo, hi) - ents, err := l.slice(lo, hi, l.maxNextCommittedEntsSize) - if err != nil { - l.logger.Panicf("unexpected error when getting unapplied entries (%v)", err) - } - return ents - } - return nil -} - -// hasNextCommittedEnts returns if there is any available entries for execution. -// This is a fast check without heavy raftLog.slice() in nextCommittedEnts(). -func (l *raftLog) hasNextCommittedEnts() bool { - if l.hasPendingSnapshot() { - // If we have a snapshot to apply, don't also return any committed - // entries. Doing so raises questions about what should be applied - // first. - return false - } - return l.committed > l.applied -} - -// hasPendingSnapshot returns if there is pending snapshot waiting for applying. -func (l *raftLog) hasPendingSnapshot() bool { - return l.unstable.snapshot != nil -} - -func (l *raftLog) snapshot() (pb.Snapshot, error) { - if l.unstable.snapshot != nil { - return *l.unstable.snapshot, nil - } - return l.storage.Snapshot() -} - -func (l *raftLog) firstIndex() uint64 { - if i, ok := l.unstable.maybeFirstIndex(); ok { - return i - } - index, err := l.storage.FirstIndex() - if err != nil { - panic(err) // TODO(bdarnell) - } - return index -} - -func (l *raftLog) lastIndex() uint64 { - if i, ok := l.unstable.maybeLastIndex(); ok { - return i - } - i, err := l.storage.LastIndex() - if err != nil { - panic(err) // TODO(bdarnell) - } - return i -} - -func (l *raftLog) commitTo(tocommit uint64) { - // never decrease commit - if l.committed < tocommit { - if l.lastIndex() < tocommit { - l.logger.Panicf("tocommit(%d) is out of range [lastIndex(%d)]. Was the raft log corrupted, truncated, or lost?", tocommit, l.lastIndex()) - } - l.committed = tocommit - } -} - -func (l *raftLog) appliedTo(i uint64) { - if i == 0 { - return - } - if l.committed < i || i < l.applied { - l.logger.Panicf("applied(%d) is out of range [prevApplied(%d), committed(%d)]", i, l.applied, l.committed) - } - l.applied = i -} - -func (l *raftLog) stableTo(i, t uint64) { l.unstable.stableTo(i, t) } - -func (l *raftLog) stableSnapTo(i uint64) { l.unstable.stableSnapTo(i) } - -func (l *raftLog) lastTerm() uint64 { - t, err := l.term(l.lastIndex()) - if err != nil { - l.logger.Panicf("unexpected error when getting the last term (%v)", err) - } - return t -} - -func (l *raftLog) term(i uint64) (uint64, error) { - // the valid term range is [index of dummy entry, last index] - dummyIndex := l.firstIndex() - 1 - if i < dummyIndex || i > l.lastIndex() { - // TODO: return an error instead? - return 0, nil - } - - if t, ok := l.unstable.maybeTerm(i); ok { - return t, nil - } - - t, err := l.storage.Term(i) - if err == nil { - return t, nil - } - if err == ErrCompacted || err == ErrUnavailable { - return 0, err - } - panic(err) // TODO(bdarnell) -} - -func (l *raftLog) entries(i, maxsize uint64) ([]pb.Entry, error) { - if i > l.lastIndex() { - return nil, nil - } - return l.slice(i, l.lastIndex()+1, maxsize) -} - -// allEntries returns all entries in the log. -func (l *raftLog) allEntries() []pb.Entry { - ents, err := l.entries(l.firstIndex(), noLimit) - if err == nil { - return ents - } - if err == ErrCompacted { // try again if there was a racing compaction - return l.allEntries() - } - // TODO (xiangli): handle error? - panic(err) -} - -// isUpToDate determines if the given (lastIndex,term) log is more up-to-date -// by comparing the index and term of the last entries in the existing logs. -// If the logs have last entries with different terms, then the log with the -// later term is more up-to-date. If the logs end with the same term, then -// whichever log has the larger lastIndex is more up-to-date. If the logs are -// the same, the given log is up-to-date. -func (l *raftLog) isUpToDate(lasti, term uint64) bool { - return term > l.lastTerm() || (term == l.lastTerm() && lasti >= l.lastIndex()) -} - -func (l *raftLog) matchTerm(i, term uint64) bool { - t, err := l.term(i) - if err != nil { - return false - } - return t == term -} - -func (l *raftLog) maybeCommit(maxIndex, term uint64) bool { - if maxIndex > l.committed && l.zeroTermOnErrCompacted(l.term(maxIndex)) == term { - l.commitTo(maxIndex) - return true - } - return false -} - -func (l *raftLog) restore(s pb.Snapshot) { - l.logger.Infof("log [%s] starts to restore snapshot [index: %d, term: %d]", l, s.Metadata.Index, s.Metadata.Term) - l.committed = s.Metadata.Index - l.unstable.restore(s) -} - -// slice returns a slice of log entries from lo through hi-1, inclusive. -func (l *raftLog) slice(lo, hi, maxSize uint64) ([]pb.Entry, error) { - err := l.mustCheckOutOfBounds(lo, hi) - if err != nil { - return nil, err - } - if lo == hi { - return nil, nil - } - var ents []pb.Entry - if lo < l.unstable.offset { - storedEnts, err := l.storage.Entries(lo, min(hi, l.unstable.offset), maxSize) - if err == ErrCompacted { - return nil, err - } else if err == ErrUnavailable { - l.logger.Panicf("entries[%d:%d) is unavailable from storage", lo, min(hi, l.unstable.offset)) - } else if err != nil { - panic(err) // TODO(bdarnell) - } - - // check if ents has reached the size limitation - if uint64(len(storedEnts)) < min(hi, l.unstable.offset)-lo { - return storedEnts, nil - } - - ents = storedEnts - } - if hi > l.unstable.offset { - unstable := l.unstable.slice(max(lo, l.unstable.offset), hi) - if len(ents) > 0 { - combined := make([]pb.Entry, len(ents)+len(unstable)) - n := copy(combined, ents) - copy(combined[n:], unstable) - ents = combined - } else { - ents = unstable - } - } - return limitSize(ents, maxSize), nil -} - -// l.firstIndex <= lo <= hi <= l.firstIndex + len(l.entries) -func (l *raftLog) mustCheckOutOfBounds(lo, hi uint64) error { - if lo > hi { - l.logger.Panicf("invalid slice %d > %d", lo, hi) - } - fi := l.firstIndex() - if lo < fi { - return ErrCompacted - } - - length := l.lastIndex() + 1 - fi - if hi > fi+length { - l.logger.Panicf("slice[%d,%d) out of bound [%d,%d]", lo, hi, fi, l.lastIndex()) - } - return nil -} - -func (l *raftLog) zeroTermOnErrCompacted(t uint64, err error) uint64 { - if err == nil { - return t - } - if err == ErrCompacted { - return 0 - } - l.logger.Panicf("unexpected error (%v)", err) - return 0 -} diff --git a/raft/log_test.go b/raft/log_test.go deleted file mode 100644 index 007edc4e7be9..000000000000 --- a/raft/log_test.go +++ /dev/null @@ -1,754 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "fmt" - "testing" - - "github.com/stretchr/testify/require" - - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -func TestFindConflict(t *testing.T) { - previousEnts := []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}, {Index: 3, Term: 3}} - tests := []struct { - ents []pb.Entry - wconflict uint64 - }{ - // no conflict, empty ent - {[]pb.Entry{}, 0}, - // no conflict - {[]pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}, {Index: 3, Term: 3}}, 0}, - {[]pb.Entry{{Index: 2, Term: 2}, {Index: 3, Term: 3}}, 0}, - {[]pb.Entry{{Index: 3, Term: 3}}, 0}, - // no conflict, but has new entries - {[]pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}, {Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 4}}, 4}, - {[]pb.Entry{{Index: 2, Term: 2}, {Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 4}}, 4}, - {[]pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 4}}, 4}, - {[]pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 4}}, 4}, - // conflicts with existing entries - {[]pb.Entry{{Index: 1, Term: 4}, {Index: 2, Term: 4}}, 1}, - {[]pb.Entry{{Index: 2, Term: 1}, {Index: 3, Term: 4}, {Index: 4, Term: 4}}, 2}, - {[]pb.Entry{{Index: 3, Term: 1}, {Index: 4, Term: 2}, {Index: 5, Term: 4}, {Index: 6, Term: 4}}, 3}, - } - - for i, tt := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { - raftLog := newLog(NewMemoryStorage(), raftLogger) - raftLog.append(previousEnts...) - require.Equal(t, tt.wconflict, raftLog.findConflict(tt.ents)) - }) - } -} - -func TestIsUpToDate(t *testing.T) { - previousEnts := []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}, {Index: 3, Term: 3}} - raftLog := newLog(NewMemoryStorage(), raftLogger) - raftLog.append(previousEnts...) - tests := []struct { - lastIndex uint64 - term uint64 - wUpToDate bool - }{ - // greater term, ignore lastIndex - {raftLog.lastIndex() - 1, 4, true}, - {raftLog.lastIndex(), 4, true}, - {raftLog.lastIndex() + 1, 4, true}, - // smaller term, ignore lastIndex - {raftLog.lastIndex() - 1, 2, false}, - {raftLog.lastIndex(), 2, false}, - {raftLog.lastIndex() + 1, 2, false}, - // equal term, equal or lager lastIndex wins - {raftLog.lastIndex() - 1, 3, false}, - {raftLog.lastIndex(), 3, true}, - {raftLog.lastIndex() + 1, 3, true}, - } - - for i, tt := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { - require.Equal(t, tt.wUpToDate, raftLog.isUpToDate(tt.lastIndex, tt.term)) - }) - } -} - -func TestAppend(t *testing.T) { - previousEnts := []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}} - tests := []struct { - ents []pb.Entry - windex uint64 - wents []pb.Entry - wunstable uint64 - }{ - { - []pb.Entry{}, - 2, - []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}}, - 3, - }, - { - []pb.Entry{{Index: 3, Term: 2}}, - 3, - []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}, {Index: 3, Term: 2}}, - 3, - }, - // conflicts with index 1 - { - []pb.Entry{{Index: 1, Term: 2}}, - 1, - []pb.Entry{{Index: 1, Term: 2}}, - 1, - }, - // conflicts with index 2 - { - []pb.Entry{{Index: 2, Term: 3}, {Index: 3, Term: 3}}, - 3, - []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 3}, {Index: 3, Term: 3}}, - 2, - }, - } - - for i, tt := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { - storage := NewMemoryStorage() - storage.Append(previousEnts) - raftLog := newLog(storage, raftLogger) - require.Equal(t, tt.windex, raftLog.append(tt.ents...)) - g, err := raftLog.entries(1, noLimit) - require.NoError(t, err) - require.Equal(t, tt.wents, g) - require.Equal(t, tt.wunstable, raftLog.unstable.offset) - }) - } -} - -// TestLogMaybeAppend ensures: -// If the given (index, term) matches with the existing log: -// 1. If an existing entry conflicts with a new one (same index -// but different terms), delete the existing entry and all that -// follow it -// 2.Append any new entries not already in the log -// -// If the given (index, term) does not match with the existing log: -// -// return false -func TestLogMaybeAppend(t *testing.T) { - previousEnts := []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}, {Index: 3, Term: 3}} - lastindex := uint64(3) - lastterm := uint64(3) - commit := uint64(1) - - tests := []struct { - logTerm uint64 - index uint64 - committed uint64 - ents []pb.Entry - - wlasti uint64 - wappend bool - wcommit uint64 - wpanic bool - }{ - // not match: term is different - { - lastterm - 1, lastindex, lastindex, []pb.Entry{{Index: lastindex + 1, Term: 4}}, - 0, false, commit, false, - }, - // not match: index out of bound - { - lastterm, lastindex + 1, lastindex, []pb.Entry{{Index: lastindex + 2, Term: 4}}, - 0, false, commit, false, - }, - // match with the last existing entry - { - lastterm, lastindex, lastindex, nil, - lastindex, true, lastindex, false, - }, - { - lastterm, lastindex, lastindex + 1, nil, - lastindex, true, lastindex, false, // do not increase commit higher than lastnewi - }, - { - lastterm, lastindex, lastindex - 1, nil, - lastindex, true, lastindex - 1, false, // commit up to the commit in the message - }, - { - lastterm, lastindex, 0, nil, - lastindex, true, commit, false, // commit do not decrease - }, - { - 0, 0, lastindex, nil, - 0, true, commit, false, // commit do not decrease - }, - { - lastterm, lastindex, lastindex, []pb.Entry{{Index: lastindex + 1, Term: 4}}, - lastindex + 1, true, lastindex, false, - }, - { - lastterm, lastindex, lastindex + 1, []pb.Entry{{Index: lastindex + 1, Term: 4}}, - lastindex + 1, true, lastindex + 1, false, - }, - { - lastterm, lastindex, lastindex + 2, []pb.Entry{{Index: lastindex + 1, Term: 4}}, - lastindex + 1, true, lastindex + 1, false, // do not increase commit higher than lastnewi - }, - { - lastterm, lastindex, lastindex + 2, []pb.Entry{{Index: lastindex + 1, Term: 4}, {Index: lastindex + 2, Term: 4}}, - lastindex + 2, true, lastindex + 2, false, - }, - // match with the entry in the middle - { - lastterm - 1, lastindex - 1, lastindex, []pb.Entry{{Index: lastindex, Term: 4}}, - lastindex, true, lastindex, false, - }, - { - lastterm - 2, lastindex - 2, lastindex, []pb.Entry{{Index: lastindex - 1, Term: 4}}, - lastindex - 1, true, lastindex - 1, false, - }, - { - lastterm - 3, lastindex - 3, lastindex, []pb.Entry{{Index: lastindex - 2, Term: 4}}, - lastindex - 2, true, lastindex - 2, true, // conflict with existing committed entry - }, - { - lastterm - 2, lastindex - 2, lastindex, []pb.Entry{{Index: lastindex - 1, Term: 4}, {Index: lastindex, Term: 4}}, - lastindex, true, lastindex, false, - }, - } - - for i, tt := range tests { - raftLog := newLog(NewMemoryStorage(), raftLogger) - raftLog.append(previousEnts...) - raftLog.committed = commit - - t.Run(fmt.Sprint(i), func(t *testing.T) { - defer func() { - if r := recover(); r != nil { - require.True(t, tt.wpanic) - } - }() - glasti, gappend := raftLog.maybeAppend(tt.index, tt.logTerm, tt.committed, tt.ents...) - require.Equal(t, tt.wlasti, glasti) - require.Equal(t, tt.wappend, gappend) - require.Equal(t, tt.wcommit, raftLog.committed) - if gappend && len(tt.ents) != 0 { - gents, err := raftLog.slice(raftLog.lastIndex()-uint64(len(tt.ents))+1, raftLog.lastIndex()+1, noLimit) - require.NoError(t, err) - require.Equal(t, tt.ents, gents) - } - }) - } -} - -// TestCompactionSideEffects ensures that all the log related functionality works correctly after -// a compaction. -func TestCompactionSideEffects(t *testing.T) { - var i uint64 - // Populate the log with 1000 entries; 750 in stable storage and 250 in unstable. - lastIndex := uint64(1000) - unstableIndex := uint64(750) - lastTerm := lastIndex - storage := NewMemoryStorage() - for i = 1; i <= unstableIndex; i++ { - storage.Append([]pb.Entry{{Term: i, Index: i}}) - } - raftLog := newLog(storage, raftLogger) - for i = unstableIndex; i < lastIndex; i++ { - raftLog.append(pb.Entry{Term: i + 1, Index: i + 1}) - } - - require.True(t, raftLog.maybeCommit(lastIndex, lastTerm)) - raftLog.appliedTo(raftLog.committed) - - offset := uint64(500) - storage.Compact(offset) - require.Equal(t, lastIndex, raftLog.lastIndex()) - - for j := offset; j <= raftLog.lastIndex(); j++ { - require.Equal(t, j, mustTerm(raftLog.term(j))) - } - - for j := offset; j <= raftLog.lastIndex(); j++ { - require.True(t, raftLog.matchTerm(j, j)) - } - - unstableEnts := raftLog.unstableEntries() - require.Equal(t, 250, len(unstableEnts)) - require.Equal(t, uint64(751), unstableEnts[0].Index) - - prev := raftLog.lastIndex() - raftLog.append(pb.Entry{Index: raftLog.lastIndex() + 1, Term: raftLog.lastIndex() + 1}) - require.Equal(t, prev+1, raftLog.lastIndex()) - - ents, err := raftLog.entries(raftLog.lastIndex(), noLimit) - require.NoError(t, err) - require.Equal(t, 1, len(ents)) -} - -func TestHasNextCommittedEnts(t *testing.T) { - snap := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{Term: 1, Index: 3}, - } - ents := []pb.Entry{ - {Term: 1, Index: 4}, - {Term: 1, Index: 5}, - {Term: 1, Index: 6}, - } - tests := []struct { - applied uint64 - snap bool - whasNext bool - }{ - {applied: 0, snap: false, whasNext: true}, - {applied: 3, snap: false, whasNext: true}, - {applied: 4, snap: false, whasNext: true}, - {applied: 5, snap: false, whasNext: false}, - // With snapshot. - {applied: 3, snap: true, whasNext: false}, - } - for i, tt := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { - storage := NewMemoryStorage() - require.NoError(t, storage.ApplySnapshot(snap)) - - raftLog := newLog(storage, raftLogger) - raftLog.append(ents...) - raftLog.maybeCommit(5, 1) - raftLog.appliedTo(tt.applied) - if tt.snap { - newSnap := snap - newSnap.Metadata.Index++ - raftLog.restore(newSnap) - } - require.Equal(t, tt.whasNext, raftLog.hasNextCommittedEnts()) - }) - } -} - -func TestNextCommittedEnts(t *testing.T) { - snap := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{Term: 1, Index: 3}, - } - ents := []pb.Entry{ - {Term: 1, Index: 4}, - {Term: 1, Index: 5}, - {Term: 1, Index: 6}, - } - tests := []struct { - applied uint64 - snap bool - wents []pb.Entry - }{ - {applied: 0, snap: false, wents: ents[:2]}, - {applied: 3, snap: false, wents: ents[:2]}, - {applied: 4, snap: false, wents: ents[1:2]}, - {applied: 5, snap: false, wents: nil}, - // With snapshot. - {applied: 3, snap: true, wents: nil}, - } - for i, tt := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { - storage := NewMemoryStorage() - require.NoError(t, storage.ApplySnapshot(snap)) - - raftLog := newLog(storage, raftLogger) - raftLog.append(ents...) - raftLog.maybeCommit(5, 1) - raftLog.appliedTo(tt.applied) - if tt.snap { - newSnap := snap - newSnap.Metadata.Index++ - raftLog.restore(newSnap) - } - require.Equal(t, tt.wents, raftLog.nextCommittedEnts()) - }) - - } -} - -// TestUnstableEnts ensures unstableEntries returns the unstable part of the -// entries correctly. -func TestUnstableEnts(t *testing.T) { - previousEnts := []pb.Entry{{Term: 1, Index: 1}, {Term: 2, Index: 2}} - tests := []struct { - unstable uint64 - wents []pb.Entry - }{ - {3, nil}, - {1, previousEnts}, - } - - for i, tt := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { - // append stable entries to storage - storage := NewMemoryStorage() - require.NoError(t, storage.Append(previousEnts[:tt.unstable-1])) - - // append unstable entries to raftlog - raftLog := newLog(storage, raftLogger) - raftLog.append(previousEnts[tt.unstable-1:]...) - - ents := raftLog.unstableEntries() - if l := len(ents); l > 0 { - raftLog.stableTo(ents[l-1].Index, ents[l-1].Term) - } - require.Equal(t, tt.wents, ents) - require.Equal(t, previousEnts[len(previousEnts)-1].Index+1, raftLog.unstable.offset) - }) - } -} - -func TestCommitTo(t *testing.T) { - previousEnts := []pb.Entry{{Term: 1, Index: 1}, {Term: 2, Index: 2}, {Term: 3, Index: 3}} - commit := uint64(2) - tests := []struct { - commit uint64 - wcommit uint64 - wpanic bool - }{ - {3, 3, false}, - {1, 2, false}, // never decrease - {4, 0, true}, // commit out of range -> panic - } - for i, tt := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { - defer func() { - if r := recover(); r != nil { - require.True(t, tt.wpanic) - } - }() - raftLog := newLog(NewMemoryStorage(), raftLogger) - raftLog.append(previousEnts...) - raftLog.committed = commit - raftLog.commitTo(tt.commit) - require.Equal(t, tt.wcommit, raftLog.committed) - }) - } -} - -func TestStableTo(t *testing.T) { - tests := []struct { - stablei uint64 - stablet uint64 - wunstable uint64 - }{ - {1, 1, 2}, - {2, 2, 3}, - {2, 1, 1}, // bad term - {3, 1, 1}, // bad index - } - for i, tt := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { - raftLog := newLog(NewMemoryStorage(), raftLogger) - raftLog.append([]pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}}...) - raftLog.stableTo(tt.stablei, tt.stablet) - require.Equal(t, tt.wunstable, raftLog.unstable.offset) - }) - } -} - -func TestStableToWithSnap(t *testing.T) { - snapi, snapt := uint64(5), uint64(2) - tests := []struct { - stablei uint64 - stablet uint64 - newEnts []pb.Entry - - wunstable uint64 - }{ - {snapi + 1, snapt, nil, snapi + 1}, - {snapi, snapt, nil, snapi + 1}, - {snapi - 1, snapt, nil, snapi + 1}, - - {snapi + 1, snapt + 1, nil, snapi + 1}, - {snapi, snapt + 1, nil, snapi + 1}, - {snapi - 1, snapt + 1, nil, snapi + 1}, - - {snapi + 1, snapt, []pb.Entry{{Index: snapi + 1, Term: snapt}}, snapi + 2}, - {snapi, snapt, []pb.Entry{{Index: snapi + 1, Term: snapt}}, snapi + 1}, - {snapi - 1, snapt, []pb.Entry{{Index: snapi + 1, Term: snapt}}, snapi + 1}, - - {snapi + 1, snapt + 1, []pb.Entry{{Index: snapi + 1, Term: snapt}}, snapi + 1}, - {snapi, snapt + 1, []pb.Entry{{Index: snapi + 1, Term: snapt}}, snapi + 1}, - {snapi - 1, snapt + 1, []pb.Entry{{Index: snapi + 1, Term: snapt}}, snapi + 1}, - } - for i, tt := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { - s := NewMemoryStorage() - require.NoError(t, s.ApplySnapshot(pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: snapi, Term: snapt}})) - raftLog := newLog(s, raftLogger) - raftLog.append(tt.newEnts...) - raftLog.stableTo(tt.stablei, tt.stablet) - require.Equal(t, tt.wunstable, raftLog.unstable.offset) - }) - - } -} - -// TestCompaction ensures that the number of log entries is correct after compactions. -func TestCompaction(t *testing.T) { - tests := []struct { - lastIndex uint64 - compact []uint64 - wleft []int - wallow bool - }{ - // out of upper bound - {1000, []uint64{1001}, []int{-1}, false}, - {1000, []uint64{300, 500, 800, 900}, []int{700, 500, 200, 100}, true}, - // out of lower bound - {1000, []uint64{300, 299}, []int{700, -1}, false}, - } - - for i, tt := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { - defer func() { - if r := recover(); r != nil { - require.False(t, tt.wallow) - } - }() - storage := NewMemoryStorage() - for i := uint64(1); i <= tt.lastIndex; i++ { - storage.Append([]pb.Entry{{Index: i}}) - } - raftLog := newLog(storage, raftLogger) - raftLog.maybeCommit(tt.lastIndex, 0) - - raftLog.appliedTo(raftLog.committed) - for j := 0; j < len(tt.compact); j++ { - err := storage.Compact(tt.compact[j]) - if err != nil { - require.False(t, tt.wallow) - continue - } - require.Equal(t, tt.wleft[j], len(raftLog.allEntries())) - } - - }) - } -} - -func TestLogRestore(t *testing.T) { - index := uint64(1000) - term := uint64(1000) - snap := pb.SnapshotMetadata{Index: index, Term: term} - storage := NewMemoryStorage() - storage.ApplySnapshot(pb.Snapshot{Metadata: snap}) - raftLog := newLog(storage, raftLogger) - - require.Zero(t, len(raftLog.allEntries())) - require.Equal(t, index+1, raftLog.firstIndex()) - require.Equal(t, index, raftLog.committed) - require.Equal(t, index+1, raftLog.unstable.offset) - require.Equal(t, term, mustTerm(raftLog.term(index))) -} - -func TestIsOutOfBounds(t *testing.T) { - offset := uint64(100) - num := uint64(100) - storage := NewMemoryStorage() - storage.ApplySnapshot(pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: offset}}) - l := newLog(storage, raftLogger) - for i := uint64(1); i <= num; i++ { - l.append(pb.Entry{Index: i + offset}) - } - - first := offset + 1 - tests := []struct { - lo, hi uint64 - wpanic bool - wErrCompacted bool - }{ - { - first - 2, first + 1, - false, - true, - }, - { - first - 1, first + 1, - false, - true, - }, - { - first, first, - false, - false, - }, - { - first + num/2, first + num/2, - false, - false, - }, - { - first + num - 1, first + num - 1, - false, - false, - }, - { - first + num, first + num, - false, - false, - }, - { - first + num, first + num + 1, - true, - false, - }, - { - first + num + 1, first + num + 1, - true, - false, - }, - } - - for i, tt := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { - defer func() { - if r := recover(); r != nil { - require.True(t, tt.wpanic) - } - }() - err := l.mustCheckOutOfBounds(tt.lo, tt.hi) - require.False(t, tt.wpanic) - require.False(t, tt.wErrCompacted && err != ErrCompacted) - require.False(t, !tt.wErrCompacted && err != nil) - }) - } -} - -func TestTerm(t *testing.T) { - var i uint64 - offset := uint64(100) - num := uint64(100) - - storage := NewMemoryStorage() - storage.ApplySnapshot(pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: offset, Term: 1}}) - l := newLog(storage, raftLogger) - for i = 1; i < num; i++ { - l.append(pb.Entry{Index: offset + i, Term: i}) - } - - tests := []struct { - index uint64 - w uint64 - }{ - {offset - 1, 0}, - {offset, 1}, - {offset + num/2, num / 2}, - {offset + num - 1, num - 1}, - {offset + num, 0}, - } - - for j, tt := range tests { - t.Run(fmt.Sprint(j), func(t *testing.T) { - require.Equal(t, tt.w, mustTerm(l.term(tt.index))) - }) - } -} - -func TestTermWithUnstableSnapshot(t *testing.T) { - storagesnapi := uint64(100) - unstablesnapi := storagesnapi + 5 - - storage := NewMemoryStorage() - storage.ApplySnapshot(pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: storagesnapi, Term: 1}}) - l := newLog(storage, raftLogger) - l.restore(pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: unstablesnapi, Term: 1}}) - - tests := []struct { - index uint64 - w uint64 - }{ - // cannot get term from storage - {storagesnapi, 0}, - // cannot get term from the gap between storage ents and unstable snapshot - {storagesnapi + 1, 0}, - {unstablesnapi - 1, 0}, - // get term from unstable snapshot index - {unstablesnapi, 1}, - } - - for i, tt := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { - require.Equal(t, tt.w, mustTerm(l.term(tt.index))) - }) - } -} - -func TestSlice(t *testing.T) { - var i uint64 - offset := uint64(100) - num := uint64(100) - last := offset + num - half := offset + num/2 - halfe := pb.Entry{Index: half, Term: half} - - storage := NewMemoryStorage() - storage.ApplySnapshot(pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: offset}}) - for i = 1; i < num/2; i++ { - storage.Append([]pb.Entry{{Index: offset + i, Term: offset + i}}) - } - l := newLog(storage, raftLogger) - for i = num / 2; i < num; i++ { - l.append(pb.Entry{Index: offset + i, Term: offset + i}) - } - - tests := []struct { - from uint64 - to uint64 - limit uint64 - - w []pb.Entry - wpanic bool - }{ - // test no limit - {offset - 1, offset + 1, noLimit, nil, false}, - {offset, offset + 1, noLimit, nil, false}, - {half - 1, half + 1, noLimit, []pb.Entry{{Index: half - 1, Term: half - 1}, {Index: half, Term: half}}, false}, - {half, half + 1, noLimit, []pb.Entry{{Index: half, Term: half}}, false}, - {last - 1, last, noLimit, []pb.Entry{{Index: last - 1, Term: last - 1}}, false}, - {last, last + 1, noLimit, nil, true}, - - // test limit - {half - 1, half + 1, 0, []pb.Entry{{Index: half - 1, Term: half - 1}}, false}, - {half - 1, half + 1, uint64(halfe.Size() + 1), []pb.Entry{{Index: half - 1, Term: half - 1}}, false}, - {half - 2, half + 1, uint64(halfe.Size() + 1), []pb.Entry{{Index: half - 2, Term: half - 2}}, false}, - {half - 1, half + 1, uint64(halfe.Size() * 2), []pb.Entry{{Index: half - 1, Term: half - 1}, {Index: half, Term: half}}, false}, - {half - 1, half + 2, uint64(halfe.Size() * 3), []pb.Entry{{Index: half - 1, Term: half - 1}, {Index: half, Term: half}, {Index: half + 1, Term: half + 1}}, false}, - {half, half + 2, uint64(halfe.Size()), []pb.Entry{{Index: half, Term: half}}, false}, - {half, half + 2, uint64(halfe.Size() * 2), []pb.Entry{{Index: half, Term: half}, {Index: half + 1, Term: half + 1}}, false}, - } - - for j, tt := range tests { - t.Run(fmt.Sprint(j), func(t *testing.T) { - defer func() { - if r := recover(); r != nil { - require.True(t, tt.wpanic) - } - }() - g, err := l.slice(tt.from, tt.to, tt.limit) - require.False(t, tt.from <= offset && err != ErrCompacted) - require.False(t, tt.from > offset && err != nil) - require.Equal(t, tt.w, g) - }) - } -} - -func mustTerm(term uint64, err error) uint64 { - if err != nil { - panic(err) - } - return term -} diff --git a/raft/log_unstable.go b/raft/log_unstable.go deleted file mode 100644 index 73641b7a984d..000000000000 --- a/raft/log_unstable.go +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import pb "go.etcd.io/etcd/raft/v3/raftpb" - -// unstable.entries[i] has raft log position i+unstable.offset. -// Note that unstable.offset may be less than the highest log -// position in storage; this means that the next write to storage -// might need to truncate the log before persisting unstable.entries. -type unstable struct { - // the incoming unstable snapshot, if any. - snapshot *pb.Snapshot - // all entries that have not yet been written to storage. - entries []pb.Entry - offset uint64 - - logger Logger -} - -// maybeFirstIndex returns the index of the first possible entry in entries -// if it has a snapshot. -func (u *unstable) maybeFirstIndex() (uint64, bool) { - if u.snapshot != nil { - return u.snapshot.Metadata.Index + 1, true - } - return 0, false -} - -// maybeLastIndex returns the last index if it has at least one -// unstable entry or snapshot. -func (u *unstable) maybeLastIndex() (uint64, bool) { - if l := len(u.entries); l != 0 { - return u.offset + uint64(l) - 1, true - } - if u.snapshot != nil { - return u.snapshot.Metadata.Index, true - } - return 0, false -} - -// maybeTerm returns the term of the entry at index i, if there -// is any. -func (u *unstable) maybeTerm(i uint64) (uint64, bool) { - if i < u.offset { - if u.snapshot != nil && u.snapshot.Metadata.Index == i { - return u.snapshot.Metadata.Term, true - } - return 0, false - } - - last, ok := u.maybeLastIndex() - if !ok { - return 0, false - } - if i > last { - return 0, false - } - - return u.entries[i-u.offset].Term, true -} - -func (u *unstable) stableTo(i, t uint64) { - gt, ok := u.maybeTerm(i) - if !ok { - // Unstable entry missing. Ignore. - return - } - if i < u.offset { - // Index matched unstable snapshot, not unstable entry. Ignore. - return - } - if gt != t { - // Term mismatch between unstable entry and specified entry. Ignore. - return - } - u.entries = u.entries[i+1-u.offset:] - u.offset = i + 1 - u.shrinkEntriesArray() -} - -// shrinkEntriesArray discards the underlying array used by the entries slice -// if most of it isn't being used. This avoids holding references to a bunch of -// potentially large entries that aren't needed anymore. Simply clearing the -// entries wouldn't be safe because clients might still be using them. -func (u *unstable) shrinkEntriesArray() { - // We replace the array if we're using less than half of the space in - // it. This number is fairly arbitrary, chosen as an attempt to balance - // memory usage vs number of allocations. It could probably be improved - // with some focused tuning. - const lenMultiple = 2 - if len(u.entries) == 0 { - u.entries = nil - } else if len(u.entries)*lenMultiple < cap(u.entries) { - newEntries := make([]pb.Entry, len(u.entries)) - copy(newEntries, u.entries) - u.entries = newEntries - } -} - -func (u *unstable) stableSnapTo(i uint64) { - if u.snapshot != nil && u.snapshot.Metadata.Index == i { - u.snapshot = nil - } -} - -func (u *unstable) restore(s pb.Snapshot) { - u.offset = s.Metadata.Index + 1 - u.entries = nil - u.snapshot = &s -} - -func (u *unstable) truncateAndAppend(ents []pb.Entry) { - after := ents[0].Index - switch { - case after == u.offset+uint64(len(u.entries)): - // after is the next index in the u.entries - // directly append - u.entries = append(u.entries, ents...) - case after <= u.offset: - u.logger.Infof("replace the unstable entries from index %d", after) - // The log is being truncated to before our current offset - // portion, so set the offset and replace the entries - u.offset = after - u.entries = ents - default: - // truncate to after and copy to u.entries - // then append - u.logger.Infof("truncate the unstable entries before index %d", after) - u.entries = append([]pb.Entry{}, u.slice(u.offset, after)...) - u.entries = append(u.entries, ents...) - } -} - -func (u *unstable) slice(lo uint64, hi uint64) []pb.Entry { - u.mustCheckOutOfBounds(lo, hi) - return u.entries[lo-u.offset : hi-u.offset] -} - -// u.offset <= lo <= hi <= u.offset+len(u.entries) -func (u *unstable) mustCheckOutOfBounds(lo, hi uint64) { - if lo > hi { - u.logger.Panicf("invalid unstable.slice %d > %d", lo, hi) - } - upper := u.offset + uint64(len(u.entries)) - if lo < u.offset || hi > upper { - u.logger.Panicf("unstable.slice[%d,%d) out of bound [%d,%d]", lo, hi, u.offset, upper) - } -} diff --git a/raft/log_unstable_test.go b/raft/log_unstable_test.go deleted file mode 100644 index a71e74f0a9cc..000000000000 --- a/raft/log_unstable_test.go +++ /dev/null @@ -1,349 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "fmt" - "testing" - - "github.com/stretchr/testify/require" - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -func TestUnstableMaybeFirstIndex(t *testing.T) { - tests := []struct { - entries []pb.Entry - offset uint64 - snap *pb.Snapshot - - wok bool - windex uint64 - }{ - // no snapshot - { - []pb.Entry{{Index: 5, Term: 1}}, 5, nil, - false, 0, - }, - { - []pb.Entry{}, 0, nil, - false, 0, - }, - // has snapshot - { - []pb.Entry{{Index: 5, Term: 1}}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - true, 5, - }, - { - []pb.Entry{}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - true, 5, - }, - } - - for i, tt := range tests { - tt := tt - t.Run(fmt.Sprint(i), func(t *testing.T) { - u := unstable{ - entries: tt.entries, - offset: tt.offset, - snapshot: tt.snap, - logger: raftLogger, - } - index, ok := u.maybeFirstIndex() - require.Equal(t, tt.wok, ok) - require.Equal(t, tt.windex, index) - }) - } -} - -func TestMaybeLastIndex(t *testing.T) { - tests := []struct { - entries []pb.Entry - offset uint64 - snap *pb.Snapshot - - wok bool - windex uint64 - }{ - // last in entries - { - []pb.Entry{{Index: 5, Term: 1}}, 5, nil, - true, 5, - }, - { - []pb.Entry{{Index: 5, Term: 1}}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - true, 5, - }, - // last in snapshot - { - []pb.Entry{}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - true, 4, - }, - // empty unstable - { - []pb.Entry{}, 0, nil, - false, 0, - }, - } - - for i, tt := range tests { - tt := tt - t.Run(fmt.Sprint(i), func(t *testing.T) { - u := unstable{ - entries: tt.entries, - offset: tt.offset, - snapshot: tt.snap, - logger: raftLogger, - } - index, ok := u.maybeLastIndex() - require.Equal(t, tt.wok, ok) - require.Equal(t, tt.windex, index) - }) - } -} - -func TestUnstableMaybeTerm(t *testing.T) { - tests := []struct { - entries []pb.Entry - offset uint64 - snap *pb.Snapshot - index uint64 - - wok bool - wterm uint64 - }{ - // term from entries - { - []pb.Entry{{Index: 5, Term: 1}}, 5, nil, - 5, - true, 1, - }, - { - []pb.Entry{{Index: 5, Term: 1}}, 5, nil, - 6, - false, 0, - }, - { - []pb.Entry{{Index: 5, Term: 1}}, 5, nil, - 4, - false, 0, - }, - { - []pb.Entry{{Index: 5, Term: 1}}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - 5, - true, 1, - }, - { - []pb.Entry{{Index: 5, Term: 1}}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - 6, - false, 0, - }, - // term from snapshot - { - []pb.Entry{{Index: 5, Term: 1}}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - 4, - true, 1, - }, - { - []pb.Entry{{Index: 5, Term: 1}}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - 3, - false, 0, - }, - { - []pb.Entry{}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - 5, - false, 0, - }, - { - []pb.Entry{}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - 4, - true, 1, - }, - { - []pb.Entry{}, 0, nil, - 5, - false, 0, - }, - } - - for i, tt := range tests { - tt := tt - t.Run(fmt.Sprint(i), func(t *testing.T) { - u := unstable{ - entries: tt.entries, - offset: tt.offset, - snapshot: tt.snap, - logger: raftLogger, - } - term, ok := u.maybeTerm(tt.index) - require.Equal(t, tt.wok, ok) - require.Equal(t, tt.wterm, term) - }) - } -} - -func TestUnstableRestore(t *testing.T) { - u := unstable{ - entries: []pb.Entry{{Index: 5, Term: 1}}, - offset: 5, - snapshot: &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - logger: raftLogger, - } - s := pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 6, Term: 2}} - u.restore(s) - - require.Equal(t, s.Metadata.Index+1, u.offset) - require.Zero(t, len(u.entries)) - require.Equal(t, &s, u.snapshot) -} - -func TestUnstableStableTo(t *testing.T) { - tests := []struct { - entries []pb.Entry - offset uint64 - snap *pb.Snapshot - index, term uint64 - - woffset uint64 - wlen int - }{ - { - []pb.Entry{}, 0, nil, - 5, 1, - 0, 0, - }, - { - []pb.Entry{{Index: 5, Term: 1}}, 5, nil, - 5, 1, // stable to the first entry - 6, 0, - }, - { - []pb.Entry{{Index: 5, Term: 1}, {Index: 6, Term: 1}}, 5, nil, - 5, 1, // stable to the first entry - 6, 1, - }, - { - []pb.Entry{{Index: 6, Term: 2}}, 6, nil, - 6, 1, // stable to the first entry and term mismatch - 6, 1, - }, - { - []pb.Entry{{Index: 5, Term: 1}}, 5, nil, - 4, 1, // stable to old entry - 5, 1, - }, - { - []pb.Entry{{Index: 5, Term: 1}}, 5, nil, - 4, 2, // stable to old entry - 5, 1, - }, - // with snapshot - { - []pb.Entry{{Index: 5, Term: 1}}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - 5, 1, // stable to the first entry - 6, 0, - }, - { - []pb.Entry{{Index: 5, Term: 1}, {Index: 6, Term: 1}}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - 5, 1, // stable to the first entry - 6, 1, - }, - { - []pb.Entry{{Index: 6, Term: 2}}, 6, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 5, Term: 1}}, - 6, 1, // stable to the first entry and term mismatch - 6, 1, - }, - { - []pb.Entry{{Index: 5, Term: 1}}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 1}}, - 4, 1, // stable to snapshot - 5, 1, - }, - { - []pb.Entry{{Index: 5, Term: 2}}, 5, &pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: 4, Term: 2}}, - 4, 1, // stable to old entry - 5, 1, - }, - } - - for i, tt := range tests { - tt := tt - t.Run(fmt.Sprint(i), func(t *testing.T) { - u := unstable{ - entries: tt.entries, - offset: tt.offset, - snapshot: tt.snap, - logger: raftLogger, - } - u.stableTo(tt.index, tt.term) - require.Equal(t, tt.woffset, u.offset) - require.Equal(t, tt.wlen, len(u.entries)) - }) - } -} - -func TestUnstableTruncateAndAppend(t *testing.T) { - tests := []struct { - entries []pb.Entry - offset uint64 - snap *pb.Snapshot - toappend []pb.Entry - - woffset uint64 - wentries []pb.Entry - }{ - // append to the end - { - []pb.Entry{{Index: 5, Term: 1}}, 5, nil, - []pb.Entry{{Index: 6, Term: 1}, {Index: 7, Term: 1}}, - 5, []pb.Entry{{Index: 5, Term: 1}, {Index: 6, Term: 1}, {Index: 7, Term: 1}}, - }, - // replace the unstable entries - { - []pb.Entry{{Index: 5, Term: 1}}, 5, nil, - []pb.Entry{{Index: 5, Term: 2}, {Index: 6, Term: 2}}, - 5, []pb.Entry{{Index: 5, Term: 2}, {Index: 6, Term: 2}}, - }, - { - []pb.Entry{{Index: 5, Term: 1}}, 5, nil, - []pb.Entry{{Index: 4, Term: 2}, {Index: 5, Term: 2}, {Index: 6, Term: 2}}, - 4, []pb.Entry{{Index: 4, Term: 2}, {Index: 5, Term: 2}, {Index: 6, Term: 2}}, - }, - // truncate the existing entries and append - { - []pb.Entry{{Index: 5, Term: 1}, {Index: 6, Term: 1}, {Index: 7, Term: 1}}, 5, nil, - []pb.Entry{{Index: 6, Term: 2}}, - 5, []pb.Entry{{Index: 5, Term: 1}, {Index: 6, Term: 2}}, - }, - { - []pb.Entry{{Index: 5, Term: 1}, {Index: 6, Term: 1}, {Index: 7, Term: 1}}, 5, nil, - []pb.Entry{{Index: 7, Term: 2}, {Index: 8, Term: 2}}, - 5, []pb.Entry{{Index: 5, Term: 1}, {Index: 6, Term: 1}, {Index: 7, Term: 2}, {Index: 8, Term: 2}}, - }, - } - - for i, tt := range tests { - tt := tt - t.Run(fmt.Sprint(i), func(t *testing.T) { - u := unstable{ - entries: tt.entries, - offset: tt.offset, - snapshot: tt.snap, - logger: raftLogger, - } - u.truncateAndAppend(tt.toappend) - require.Equal(t, tt.woffset, u.offset) - require.Equal(t, tt.wentries, u.entries) - }) - } -} diff --git a/raft/logger.go b/raft/logger.go deleted file mode 100644 index e3cb00cc9d13..000000000000 --- a/raft/logger.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "fmt" - "io" - "log" - "os" - "sync" -) - -type Logger interface { - Debug(v ...interface{}) - Debugf(format string, v ...interface{}) - - Error(v ...interface{}) - Errorf(format string, v ...interface{}) - - Info(v ...interface{}) - Infof(format string, v ...interface{}) - - Warning(v ...interface{}) - Warningf(format string, v ...interface{}) - - Fatal(v ...interface{}) - Fatalf(format string, v ...interface{}) - - Panic(v ...interface{}) - Panicf(format string, v ...interface{}) -} - -func SetLogger(l Logger) { - raftLoggerMu.Lock() - raftLogger = l - raftLoggerMu.Unlock() -} - -func ResetDefaultLogger() { - SetLogger(defaultLogger) -} - -func getLogger() Logger { - raftLoggerMu.Lock() - defer raftLoggerMu.Unlock() - return raftLogger -} - -var ( - defaultLogger = &DefaultLogger{Logger: log.New(os.Stderr, "raft", log.LstdFlags)} - discardLogger = &DefaultLogger{Logger: log.New(io.Discard, "", 0)} - raftLoggerMu sync.Mutex - raftLogger = Logger(defaultLogger) -) - -const ( - calldepth = 2 -) - -// DefaultLogger is a default implementation of the Logger interface. -type DefaultLogger struct { - *log.Logger - debug bool -} - -func (l *DefaultLogger) EnableTimestamps() { - l.SetFlags(l.Flags() | log.Ldate | log.Ltime) -} - -func (l *DefaultLogger) EnableDebug() { - l.debug = true -} - -func (l *DefaultLogger) Debug(v ...interface{}) { - if l.debug { - l.Output(calldepth, header("DEBUG", fmt.Sprint(v...))) - } -} - -func (l *DefaultLogger) Debugf(format string, v ...interface{}) { - if l.debug { - l.Output(calldepth, header("DEBUG", fmt.Sprintf(format, v...))) - } -} - -func (l *DefaultLogger) Info(v ...interface{}) { - l.Output(calldepth, header("INFO", fmt.Sprint(v...))) -} - -func (l *DefaultLogger) Infof(format string, v ...interface{}) { - l.Output(calldepth, header("INFO", fmt.Sprintf(format, v...))) -} - -func (l *DefaultLogger) Error(v ...interface{}) { - l.Output(calldepth, header("ERROR", fmt.Sprint(v...))) -} - -func (l *DefaultLogger) Errorf(format string, v ...interface{}) { - l.Output(calldepth, header("ERROR", fmt.Sprintf(format, v...))) -} - -func (l *DefaultLogger) Warning(v ...interface{}) { - l.Output(calldepth, header("WARN", fmt.Sprint(v...))) -} - -func (l *DefaultLogger) Warningf(format string, v ...interface{}) { - l.Output(calldepth, header("WARN", fmt.Sprintf(format, v...))) -} - -func (l *DefaultLogger) Fatal(v ...interface{}) { - l.Output(calldepth, header("FATAL", fmt.Sprint(v...))) - os.Exit(1) -} - -func (l *DefaultLogger) Fatalf(format string, v ...interface{}) { - l.Output(calldepth, header("FATAL", fmt.Sprintf(format, v...))) - os.Exit(1) -} - -func (l *DefaultLogger) Panic(v ...interface{}) { - l.Logger.Panic(v...) -} - -func (l *DefaultLogger) Panicf(format string, v ...interface{}) { - l.Logger.Panicf(format, v...) -} - -func header(lvl, msg string) string { - return fmt.Sprintf("%s: %s", lvl, msg) -} diff --git a/raft/node.go b/raft/node.go deleted file mode 100644 index c05379ac7fc2..000000000000 --- a/raft/node.go +++ /dev/null @@ -1,593 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "context" - "errors" - - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -type SnapshotStatus int - -const ( - SnapshotFinish SnapshotStatus = 1 - SnapshotFailure SnapshotStatus = 2 -) - -var ( - emptyState = pb.HardState{} - - // ErrStopped is returned by methods on Nodes that have been stopped. - ErrStopped = errors.New("raft: stopped") -) - -// SoftState provides state that is useful for logging and debugging. -// The state is volatile and does not need to be persisted to the WAL. -type SoftState struct { - Lead uint64 // must use atomic operations to access; keep 64-bit aligned. - RaftState StateType -} - -func (a *SoftState) equal(b *SoftState) bool { - return a.Lead == b.Lead && a.RaftState == b.RaftState -} - -// Ready encapsulates the entries and messages that are ready to read, -// be saved to stable storage, committed or sent to other peers. -// All fields in Ready are read-only. -type Ready struct { - // The current volatile state of a Node. - // SoftState will be nil if there is no update. - // It is not required to consume or store SoftState. - *SoftState - - // The current state of a Node to be saved to stable storage BEFORE - // Messages are sent. - // HardState will be equal to empty state if there is no update. - pb.HardState - - // ReadStates can be used for node to serve linearizable read requests locally - // when its applied index is greater than the index in ReadState. - // Note that the readState will be returned when raft receives msgReadIndex. - // The returned is only valid for the request that requested to read. - ReadStates []ReadState - - // Entries specifies entries to be saved to stable storage BEFORE - // Messages are sent. - Entries []pb.Entry - - // Snapshot specifies the snapshot to be saved to stable storage. - Snapshot pb.Snapshot - - // CommittedEntries specifies entries to be committed to a - // store/state-machine. These have previously been committed to stable - // store. - CommittedEntries []pb.Entry - - // Messages specifies outbound messages to be sent AFTER Entries are - // committed to stable storage. - // If it contains a MsgSnap message, the application MUST report back to raft - // when the snapshot has been received or has failed by calling ReportSnapshot. - Messages []pb.Message - - // MustSync indicates whether the HardState and Entries must be synchronously - // written to disk or if an asynchronous write is permissible. - MustSync bool -} - -func isHardStateEqual(a, b pb.HardState) bool { - return a.Term == b.Term && a.Vote == b.Vote && a.Commit == b.Commit -} - -// IsEmptyHardState returns true if the given HardState is empty. -func IsEmptyHardState(st pb.HardState) bool { - return isHardStateEqual(st, emptyState) -} - -// IsEmptySnap returns true if the given Snapshot is empty. -func IsEmptySnap(sp pb.Snapshot) bool { - return sp.Metadata.Index == 0 -} - -// appliedCursor extracts from the Ready the highest index the client has -// applied (once the Ready is confirmed via Advance). If no information is -// contained in the Ready, returns zero. -func (rd Ready) appliedCursor() uint64 { - if n := len(rd.CommittedEntries); n > 0 { - return rd.CommittedEntries[n-1].Index - } - if index := rd.Snapshot.Metadata.Index; index > 0 { - return index - } - return 0 -} - -// Node represents a node in a raft cluster. -type Node interface { - // Tick increments the internal logical clock for the Node by a single tick. Election - // timeouts and heartbeat timeouts are in units of ticks. - Tick() - // Campaign causes the Node to transition to candidate state and start campaigning to become leader. - Campaign(ctx context.Context) error - // Propose proposes that data be appended to the log. Note that proposals can be lost without - // notice, therefore it is user's job to ensure proposal retries. - Propose(ctx context.Context, data []byte) error - // ProposeConfChange proposes a configuration change. Like any proposal, the - // configuration change may be dropped with or without an error being - // returned. In particular, configuration changes are dropped unless the - // leader has certainty that there is no prior unapplied configuration - // change in its log. - // - // The method accepts either a pb.ConfChange (deprecated) or pb.ConfChangeV2 - // message. The latter allows arbitrary configuration changes via joint - // consensus, notably including replacing a voter. Passing a ConfChangeV2 - // message is only allowed if all Nodes participating in the cluster run a - // version of this library aware of the V2 API. See pb.ConfChangeV2 for - // usage details and semantics. - ProposeConfChange(ctx context.Context, cc pb.ConfChangeI) error - - // Step advances the state machine using the given message. ctx.Err() will be returned, if any. - Step(ctx context.Context, msg pb.Message) error - - // Ready returns a channel that returns the current point-in-time state. - // Users of the Node must call Advance after retrieving the state returned by Ready. - // - // NOTE: No committed entries from the next Ready may be applied until all committed entries - // and snapshots from the previous one have finished. - Ready() <-chan Ready - - // Advance notifies the Node that the application has saved progress up to the last Ready. - // It prepares the node to return the next available Ready. - // - // The application should generally call Advance after it applies the entries in last Ready. - // - // However, as an optimization, the application may call Advance while it is applying the - // commands. For example. when the last Ready contains a snapshot, the application might take - // a long time to apply the snapshot data. To continue receiving Ready without blocking raft - // progress, it can call Advance before finishing applying the last ready. - Advance() - // ApplyConfChange applies a config change (previously passed to - // ProposeConfChange) to the node. This must be called whenever a config - // change is observed in Ready.CommittedEntries, except when the app decides - // to reject the configuration change (i.e. treats it as a noop instead), in - // which case it must not be called. - // - // Returns an opaque non-nil ConfState protobuf which must be recorded in - // snapshots. - ApplyConfChange(cc pb.ConfChangeI) *pb.ConfState - - // TransferLeadership attempts to transfer leadership to the given transferee. - TransferLeadership(ctx context.Context, lead, transferee uint64) - - // ReadIndex request a read state. The read state will be set in the ready. - // Read state has a read index. Once the application advances further than the read - // index, any linearizable read requests issued before the read request can be - // processed safely. The read state will have the same rctx attached. - // Note that request can be lost without notice, therefore it is user's job - // to ensure read index retries. - ReadIndex(ctx context.Context, rctx []byte) error - - // Status returns the current status of the raft state machine. - Status() Status - // ReportUnreachable reports the given node is not reachable for the last send. - ReportUnreachable(id uint64) - // ReportSnapshot reports the status of the sent snapshot. The id is the raft ID of the follower - // who is meant to receive the snapshot, and the status is SnapshotFinish or SnapshotFailure. - // Calling ReportSnapshot with SnapshotFinish is a no-op. But, any failure in applying a - // snapshot (for e.g., while streaming it from leader to follower), should be reported to the - // leader with SnapshotFailure. When leader sends a snapshot to a follower, it pauses any raft - // log probes until the follower can apply the snapshot and advance its state. If the follower - // can't do that, for e.g., due to a crash, it could end up in a limbo, never getting any - // updates from the leader. Therefore, it is crucial that the application ensures that any - // failure in snapshot sending is caught and reported back to the leader; so it can resume raft - // log probing in the follower. - ReportSnapshot(id uint64, status SnapshotStatus) - // Stop performs any necessary termination of the Node. - Stop() -} - -type Peer struct { - ID uint64 - Context []byte -} - -func setupNode(c *Config, peers []Peer) *node { - if len(peers) == 0 { - panic("no peers given; use RestartNode instead") - } - rn, err := NewRawNode(c) - if err != nil { - panic(err) - } - err = rn.Bootstrap(peers) - if err != nil { - c.Logger.Warningf("error occurred during starting a new node: %v", err) - } - - n := newNode(rn) - return &n -} - -// StartNode returns a new Node given configuration and a list of raft peers. -// It appends a ConfChangeAddNode entry for each given peer to the initial log. -// -// Peers must not be zero length; call RestartNode in that case. -func StartNode(c *Config, peers []Peer) Node { - n := setupNode(c, peers) - go n.run() - return n -} - -// RestartNode is similar to StartNode but does not take a list of peers. -// The current membership of the cluster will be restored from the Storage. -// If the caller has an existing state machine, pass in the last log index that -// has been applied to it; otherwise use zero. -func RestartNode(c *Config) Node { - rn, err := NewRawNode(c) - if err != nil { - panic(err) - } - n := newNode(rn) - go n.run() - return &n -} - -type msgWithResult struct { - m pb.Message - result chan error -} - -// node is the canonical implementation of the Node interface -type node struct { - propc chan msgWithResult - recvc chan pb.Message - confc chan pb.ConfChangeV2 - confstatec chan pb.ConfState - readyc chan Ready - advancec chan struct{} - tickc chan struct{} - done chan struct{} - stop chan struct{} - status chan chan Status - - rn *RawNode -} - -func newNode(rn *RawNode) node { - return node{ - propc: make(chan msgWithResult), - recvc: make(chan pb.Message), - confc: make(chan pb.ConfChangeV2), - confstatec: make(chan pb.ConfState), - readyc: make(chan Ready), - advancec: make(chan struct{}), - // make tickc a buffered chan, so raft node can buffer some ticks when the node - // is busy processing raft messages. Raft node will resume process buffered - // ticks when it becomes idle. - tickc: make(chan struct{}, 128), - done: make(chan struct{}), - stop: make(chan struct{}), - status: make(chan chan Status), - rn: rn, - } -} - -func (n *node) Stop() { - select { - case n.stop <- struct{}{}: - // Not already stopped, so trigger it - case <-n.done: - // Node has already been stopped - no need to do anything - return - } - // Block until the stop has been acknowledged by run() - <-n.done -} - -func (n *node) run() { - var propc chan msgWithResult - var readyc chan Ready - var advancec chan struct{} - var rd Ready - - r := n.rn.raft - - lead := None - - for { - if advancec != nil { - readyc = nil - } else if n.rn.HasReady() { - // Populate a Ready. Note that this Ready is not guaranteed to - // actually be handled. We will arm readyc, but there's no guarantee - // that we will actually send on it. It's possible that we will - // service another channel instead, loop around, and then populate - // the Ready again. We could instead force the previous Ready to be - // handled first, but it's generally good to emit larger Readys plus - // it simplifies testing (by emitting less frequently and more - // predictably). - rd = n.rn.readyWithoutAccept() - readyc = n.readyc - } - - if lead != r.lead { - if r.hasLeader() { - if lead == None { - r.logger.Infof("raft.node: %x elected leader %x at term %d", r.id, r.lead, r.Term) - } else { - r.logger.Infof("raft.node: %x changed leader from %x to %x at term %d", r.id, lead, r.lead, r.Term) - } - propc = n.propc - } else { - r.logger.Infof("raft.node: %x lost leader %x at term %d", r.id, lead, r.Term) - propc = nil - } - lead = r.lead - } - - select { - // TODO: maybe buffer the config propose if there exists one (the way - // described in raft dissertation) - // Currently it is dropped in Step silently. - case pm := <-propc: - m := pm.m - m.From = r.id - err := r.Step(m) - if pm.result != nil { - pm.result <- err - close(pm.result) - } - case m := <-n.recvc: - // filter out response message from unknown From. - if pr := r.prs.Progress[m.From]; pr != nil || !IsResponseMsg(m.Type) { - r.Step(m) - } - case cc := <-n.confc: - _, okBefore := r.prs.Progress[r.id] - cs := r.applyConfChange(cc) - // If the node was removed, block incoming proposals. Note that we - // only do this if the node was in the config before. Nodes may be - // a member of the group without knowing this (when they're catching - // up on the log and don't have the latest config) and we don't want - // to block the proposal channel in that case. - // - // NB: propc is reset when the leader changes, which, if we learn - // about it, sort of implies that we got readded, maybe? This isn't - // very sound and likely has bugs. - if _, okAfter := r.prs.Progress[r.id]; okBefore && !okAfter { - var found bool - for _, sl := range [][]uint64{cs.Voters, cs.VotersOutgoing} { - for _, id := range sl { - if id == r.id { - found = true - break - } - } - if found { - break - } - } - if !found { - propc = nil - } - } - select { - case n.confstatec <- cs: - case <-n.done: - } - case <-n.tickc: - n.rn.Tick() - case readyc <- rd: - n.rn.acceptReady(rd) - advancec = n.advancec - case <-advancec: - n.rn.Advance(rd) - rd = Ready{} - advancec = nil - case c := <-n.status: - c <- getStatus(r) - case <-n.stop: - close(n.done) - return - } - } -} - -// Tick increments the internal logical clock for this Node. Election timeouts -// and heartbeat timeouts are in units of ticks. -func (n *node) Tick() { - select { - case n.tickc <- struct{}{}: - case <-n.done: - default: - n.rn.raft.logger.Warningf("%x A tick missed to fire. Node blocks too long!", n.rn.raft.id) - } -} - -func (n *node) Campaign(ctx context.Context) error { return n.step(ctx, pb.Message{Type: pb.MsgHup}) } - -func (n *node) Propose(ctx context.Context, data []byte) error { - return n.stepWait(ctx, pb.Message{Type: pb.MsgProp, Entries: []pb.Entry{{Data: data}}}) -} - -func (n *node) Step(ctx context.Context, m pb.Message) error { - // ignore unexpected local messages receiving over network - if IsLocalMsg(m.Type) { - // TODO: return an error? - return nil - } - return n.step(ctx, m) -} - -func confChangeToMsg(c pb.ConfChangeI) (pb.Message, error) { - typ, data, err := pb.MarshalConfChange(c) - if err != nil { - return pb.Message{}, err - } - return pb.Message{Type: pb.MsgProp, Entries: []pb.Entry{{Type: typ, Data: data}}}, nil -} - -func (n *node) ProposeConfChange(ctx context.Context, cc pb.ConfChangeI) error { - msg, err := confChangeToMsg(cc) - if err != nil { - return err - } - return n.Step(ctx, msg) -} - -func (n *node) step(ctx context.Context, m pb.Message) error { - return n.stepWithWaitOption(ctx, m, false) -} - -func (n *node) stepWait(ctx context.Context, m pb.Message) error { - return n.stepWithWaitOption(ctx, m, true) -} - -// Step advances the state machine using msgs. The ctx.Err() will be returned, -// if any. -func (n *node) stepWithWaitOption(ctx context.Context, m pb.Message, wait bool) error { - if m.Type != pb.MsgProp { - select { - case n.recvc <- m: - return nil - case <-ctx.Done(): - return ctx.Err() - case <-n.done: - return ErrStopped - } - } - ch := n.propc - pm := msgWithResult{m: m} - if wait { - pm.result = make(chan error, 1) - } - select { - case ch <- pm: - if !wait { - return nil - } - case <-ctx.Done(): - return ctx.Err() - case <-n.done: - return ErrStopped - } - select { - case err := <-pm.result: - if err != nil { - return err - } - case <-ctx.Done(): - return ctx.Err() - case <-n.done: - return ErrStopped - } - return nil -} - -func (n *node) Ready() <-chan Ready { return n.readyc } - -func (n *node) Advance() { - select { - case n.advancec <- struct{}{}: - case <-n.done: - } -} - -func (n *node) ApplyConfChange(cc pb.ConfChangeI) *pb.ConfState { - var cs pb.ConfState - select { - case n.confc <- cc.AsV2(): - case <-n.done: - } - select { - case cs = <-n.confstatec: - case <-n.done: - } - return &cs -} - -func (n *node) Status() Status { - c := make(chan Status) - select { - case n.status <- c: - return <-c - case <-n.done: - return Status{} - } -} - -func (n *node) ReportUnreachable(id uint64) { - select { - case n.recvc <- pb.Message{Type: pb.MsgUnreachable, From: id}: - case <-n.done: - } -} - -func (n *node) ReportSnapshot(id uint64, status SnapshotStatus) { - rej := status == SnapshotFailure - - select { - case n.recvc <- pb.Message{Type: pb.MsgSnapStatus, From: id, Reject: rej}: - case <-n.done: - } -} - -func (n *node) TransferLeadership(ctx context.Context, lead, transferee uint64) { - select { - // manually set 'from' and 'to', so that leader can voluntarily transfers its leadership - case n.recvc <- pb.Message{Type: pb.MsgTransferLeader, From: transferee, To: lead}: - case <-n.done: - case <-ctx.Done(): - } -} - -func (n *node) ReadIndex(ctx context.Context, rctx []byte) error { - return n.step(ctx, pb.Message{Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: rctx}}}) -} - -func newReady(r *raft, prevSoftSt *SoftState, prevHardSt pb.HardState) Ready { - rd := Ready{ - Entries: r.raftLog.unstableEntries(), - CommittedEntries: r.raftLog.nextCommittedEnts(), - Messages: r.msgs, - } - if softSt := r.softState(); !softSt.equal(prevSoftSt) { - rd.SoftState = softSt - } - if hardSt := r.hardState(); !isHardStateEqual(hardSt, prevHardSt) { - rd.HardState = hardSt - } - if r.raftLog.unstable.snapshot != nil { - rd.Snapshot = *r.raftLog.unstable.snapshot - } - if len(r.readStates) != 0 { - rd.ReadStates = r.readStates - } - rd.MustSync = MustSync(r.hardState(), prevHardSt, len(rd.Entries)) - return rd -} - -// MustSync returns true if the hard state and count of Raft entries indicate -// that a synchronous write to persistent storage is required. -func MustSync(st, prevst pb.HardState, entsnum int) bool { - // Persistent state on all servers: - // (Updated on stable storage before responding to RPCs) - // currentTerm - // votedFor - // log entries[] - return entsnum != 0 || st.Vote != prevst.Vote || st.Term != prevst.Term -} diff --git a/raft/node_bench_test.go b/raft/node_bench_test.go deleted file mode 100644 index fde40feb4dca..000000000000 --- a/raft/node_bench_test.go +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "context" - "testing" - "time" -) - -func BenchmarkOneNode(b *testing.B) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - s := newTestMemoryStorage(withPeers(1)) - rn := newTestRawNode(1, 10, 1, s) - n := newNode(rn) - go n.run() - - defer n.Stop() - - n.Campaign(ctx) - go func() { - for i := 0; i < b.N; i++ { - n.Propose(ctx, []byte("foo")) - } - }() - - for { - rd := <-n.Ready() - s.Append(rd.Entries) - // a reasonable disk sync latency - time.Sleep(1 * time.Millisecond) - n.Advance() - if rd.HardState.Commit == uint64(b.N+1) { - return - } - } -} diff --git a/raft/node_test.go b/raft/node_test.go deleted file mode 100644 index 77c566d84a5a..000000000000 --- a/raft/node_test.go +++ /dev/null @@ -1,991 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "bytes" - "context" - "fmt" - "math" - "reflect" - "strings" - "testing" - "time" - - "go.etcd.io/etcd/raft/v3/raftpb" -) - -// readyWithTimeout selects from n.Ready() with a 1-second timeout. It -// panics on timeout, which is better than the indefinite wait that -// would occur if this channel were read without being wrapped in a -// select. -func readyWithTimeout(n Node) Ready { - select { - case rd := <-n.Ready(): - if nn, ok := n.(*nodeTestHarness); ok { - n = nn.node - } - if nn, ok := n.(*node); ok { - nn.rn.raft.logger.Infof("emitted ready: %s", DescribeReady(rd, nil)) - } - return rd - case <-time.After(time.Second): - panic("timed out waiting for ready") - } -} - -// TestNodeStep ensures that node.Step sends msgProp to propc chan -// and other kinds of messages to recvc chan. -func TestNodeStep(t *testing.T) { - for i, msgn := range raftpb.MessageType_name { - n := &node{ - propc: make(chan msgWithResult, 1), - recvc: make(chan raftpb.Message, 1), - } - msgt := raftpb.MessageType(i) - n.Step(context.TODO(), raftpb.Message{Type: msgt}) - // Proposal goes to proc chan. Others go to recvc chan. - if msgt == raftpb.MsgProp { - select { - case <-n.propc: - default: - t.Errorf("%d: cannot receive %s on propc chan", msgt, msgn) - } - } else { - if IsLocalMsg(msgt) { - select { - case <-n.recvc: - t.Errorf("%d: step should ignore %s", msgt, msgn) - default: - } - } else { - select { - case <-n.recvc: - default: - t.Errorf("%d: cannot receive %s on recvc chan", msgt, msgn) - } - } - } - } -} - -// TestNodeStepUnblock should Cancel and Stop should unblock Step() -func TestNodeStepUnblock(t *testing.T) { - // a node without buffer to block step - n := &node{ - propc: make(chan msgWithResult), - done: make(chan struct{}), - } - - ctx, cancel := context.WithCancel(context.Background()) - stopFunc := func() { close(n.done) } - - tests := []struct { - unblock func() - werr error - }{ - {stopFunc, ErrStopped}, - {cancel, context.Canceled}, - } - - for i, tt := range tests { - errc := make(chan error, 1) - go func() { - err := n.Step(ctx, raftpb.Message{Type: raftpb.MsgProp}) - errc <- err - }() - tt.unblock() - select { - case err := <-errc: - if err != tt.werr { - t.Errorf("#%d: err = %v, want %v", i, err, tt.werr) - } - //clean up side-effect - if ctx.Err() != nil { - ctx = context.TODO() - } - select { - case <-n.done: - n.done = make(chan struct{}) - default: - } - case <-time.After(1 * time.Second): - t.Fatalf("#%d: failed to unblock step", i) - } - } -} - -// TestNodePropose ensures that node.Propose sends the given proposal to the underlying raft. -func TestNodePropose(t *testing.T) { - var msgs []raftpb.Message - appendStep := func(r *raft, m raftpb.Message) error { - t.Log(DescribeMessage(m, nil)) - if m.Type == raftpb.MsgAppResp { - return nil // injected by (*raft).advance - } - msgs = append(msgs, m) - return nil - } - - s := newTestMemoryStorage(withPeers(1)) - rn := newTestRawNode(1, 10, 1, s) - n := newNode(rn) - r := rn.raft - go n.run() - if err := n.Campaign(context.TODO()); err != nil { - t.Fatal(err) - } - for { - rd := <-n.Ready() - s.Append(rd.Entries) - // change the step function to appendStep until this raft becomes leader - if rd.SoftState.Lead == r.id { - r.step = appendStep - n.Advance() - break - } - n.Advance() - } - n.Propose(context.TODO(), []byte("somedata")) - n.Stop() - - if len(msgs) != 1 { - t.Fatalf("len(msgs) = %d, want %d", len(msgs), 1) - } - if msgs[0].Type != raftpb.MsgProp { - t.Errorf("msg type = %d, want %d", msgs[0].Type, raftpb.MsgProp) - } - if !bytes.Equal(msgs[0].Entries[0].Data, []byte("somedata")) { - t.Errorf("data = %v, want %v", msgs[0].Entries[0].Data, []byte("somedata")) - } -} - -// TestDisableProposalForwarding ensures that proposals are not forwarded to -// the leader when DisableProposalForwarding is true. -func TestDisableProposalForwarding(t *testing.T) { - r1 := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - r2 := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - cfg3 := newTestConfig(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - cfg3.DisableProposalForwarding = true - r3 := newRaft(cfg3) - nt := newNetwork(r1, r2, r3) - - // elect r1 as leader - nt.send(raftpb.Message{From: 1, To: 1, Type: raftpb.MsgHup}) - - var testEntries = []raftpb.Entry{{Data: []byte("testdata")}} - - // send proposal to r2(follower) where DisableProposalForwarding is false - r2.Step(raftpb.Message{From: 2, To: 2, Type: raftpb.MsgProp, Entries: testEntries}) - - // verify r2(follower) does forward the proposal when DisableProposalForwarding is false - if len(r2.msgs) != 1 { - t.Fatalf("len(r2.msgs) expected 1, got %d", len(r2.msgs)) - } - - // send proposal to r3(follower) where DisableProposalForwarding is true - r3.Step(raftpb.Message{From: 3, To: 3, Type: raftpb.MsgProp, Entries: testEntries}) - - // verify r3(follower) does not forward the proposal when DisableProposalForwarding is true - if len(r3.msgs) != 0 { - t.Fatalf("len(r3.msgs) expected 0, got %d", len(r3.msgs)) - } -} - -// TestNodeReadIndexToOldLeader ensures that raftpb.MsgReadIndex to old leader -// gets forwarded to the new leader and 'send' method does not attach its term. -func TestNodeReadIndexToOldLeader(t *testing.T) { - r1 := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - r2 := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - r3 := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - nt := newNetwork(r1, r2, r3) - - // elect r1 as leader - nt.send(raftpb.Message{From: 1, To: 1, Type: raftpb.MsgHup}) - - var testEntries = []raftpb.Entry{{Data: []byte("testdata")}} - - // send readindex request to r2(follower) - r2.Step(raftpb.Message{From: 2, To: 2, Type: raftpb.MsgReadIndex, Entries: testEntries}) - - // verify r2(follower) forwards this message to r1(leader) with term not set - if len(r2.msgs) != 1 { - t.Fatalf("len(r2.msgs) expected 1, got %d", len(r2.msgs)) - } - readIndxMsg1 := raftpb.Message{From: 2, To: 1, Type: raftpb.MsgReadIndex, Entries: testEntries} - if !reflect.DeepEqual(r2.msgs[0], readIndxMsg1) { - t.Fatalf("r2.msgs[0] expected %+v, got %+v", readIndxMsg1, r2.msgs[0]) - } - - // send readindex request to r3(follower) - r3.Step(raftpb.Message{From: 3, To: 3, Type: raftpb.MsgReadIndex, Entries: testEntries}) - - // verify r3(follower) forwards this message to r1(leader) with term not set as well. - if len(r3.msgs) != 1 { - t.Fatalf("len(r3.msgs) expected 1, got %d", len(r3.msgs)) - } - readIndxMsg2 := raftpb.Message{From: 3, To: 1, Type: raftpb.MsgReadIndex, Entries: testEntries} - if !reflect.DeepEqual(r3.msgs[0], readIndxMsg2) { - t.Fatalf("r3.msgs[0] expected %+v, got %+v", readIndxMsg2, r3.msgs[0]) - } - - // now elect r3 as leader - nt.send(raftpb.Message{From: 3, To: 3, Type: raftpb.MsgHup}) - - // let r1 steps the two messages previously we got from r2, r3 - r1.Step(readIndxMsg1) - r1.Step(readIndxMsg2) - - // verify r1(follower) forwards these messages again to r3(new leader) - if len(r1.msgs) != 2 { - t.Fatalf("len(r1.msgs) expected 1, got %d", len(r1.msgs)) - } - readIndxMsg3 := raftpb.Message{From: 2, To: 3, Type: raftpb.MsgReadIndex, Entries: testEntries} - if !reflect.DeepEqual(r1.msgs[0], readIndxMsg3) { - t.Fatalf("r1.msgs[0] expected %+v, got %+v", readIndxMsg3, r1.msgs[0]) - } - readIndxMsg3 = raftpb.Message{From: 3, To: 3, Type: raftpb.MsgReadIndex, Entries: testEntries} - if !reflect.DeepEqual(r1.msgs[1], readIndxMsg3) { - t.Fatalf("r1.msgs[1] expected %+v, got %+v", readIndxMsg3, r1.msgs[1]) - } -} - -// TestNodeProposeConfig ensures that node.ProposeConfChange sends the given configuration proposal -// to the underlying raft. -func TestNodeProposeConfig(t *testing.T) { - var msgs []raftpb.Message - appendStep := func(r *raft, m raftpb.Message) error { - if m.Type == raftpb.MsgAppResp { - return nil // injected by (*raft).advance - } - msgs = append(msgs, m) - return nil - } - - s := newTestMemoryStorage(withPeers(1)) - rn := newTestRawNode(1, 10, 1, s) - n := newNode(rn) - r := rn.raft - go n.run() - n.Campaign(context.TODO()) - for { - rd := <-n.Ready() - s.Append(rd.Entries) - // change the step function to appendStep until this raft becomes leader - if rd.SoftState.Lead == r.id { - r.step = appendStep - n.Advance() - break - } - n.Advance() - } - cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} - ccdata, err := cc.Marshal() - if err != nil { - t.Fatal(err) - } - n.ProposeConfChange(context.TODO(), cc) - n.Stop() - - if len(msgs) != 1 { - t.Fatalf("len(msgs) = %d, want %d", len(msgs), 1) - } - if msgs[0].Type != raftpb.MsgProp { - t.Errorf("msg type = %d, want %d", msgs[0].Type, raftpb.MsgProp) - } - if !bytes.Equal(msgs[0].Entries[0].Data, ccdata) { - t.Errorf("data = %v, want %v", msgs[0].Entries[0].Data, ccdata) - } -} - -// TestNodeProposeAddDuplicateNode ensures that two proposes to add the same node should -// not affect the later propose to add new node. -func TestNodeProposeAddDuplicateNode(t *testing.T) { - s := newTestMemoryStorage(withPeers(1)) - cfg := newTestConfig(1, 10, 1, s) - ctx, cancel, n := newNodeTestHarness(context.Background(), t, cfg) - defer cancel() - n.Campaign(ctx) - allCommittedEntries := make([]raftpb.Entry, 0) - ticker := time.NewTicker(time.Millisecond * 100) - defer ticker.Stop() - goroutineStopped := make(chan struct{}) - applyConfChan := make(chan struct{}) - - rd := readyWithTimeout(n) - s.Append(rd.Entries) - n.Advance() - - go func() { - defer close(goroutineStopped) - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - n.Tick() - case rd := <-n.Ready(): - t.Log(DescribeReady(rd, nil)) - s.Append(rd.Entries) - applied := false - for _, e := range rd.CommittedEntries { - allCommittedEntries = append(allCommittedEntries, e) - switch e.Type { - case raftpb.EntryNormal: - case raftpb.EntryConfChange: - var cc raftpb.ConfChange - cc.Unmarshal(e.Data) - n.ApplyConfChange(cc) - applied = true - } - } - n.Advance() - if applied { - applyConfChan <- struct{}{} - } - } - } - }() - - cc1 := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} - ccdata1, _ := cc1.Marshal() - n.ProposeConfChange(ctx, cc1) - <-applyConfChan - - // try add the same node again - n.ProposeConfChange(ctx, cc1) - <-applyConfChan - - // the new node join should be ok - cc2 := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 2} - ccdata2, _ := cc2.Marshal() - n.ProposeConfChange(ctx, cc2) - <-applyConfChan - - cancel() - <-goroutineStopped - - if len(allCommittedEntries) != 4 { - t.Errorf("len(entry) = %d, want %d, %v\n", len(allCommittedEntries), 4, allCommittedEntries) - } - if !bytes.Equal(allCommittedEntries[1].Data, ccdata1) { - t.Errorf("data = %v, want %v", allCommittedEntries[1].Data, ccdata1) - } - if !bytes.Equal(allCommittedEntries[3].Data, ccdata2) { - t.Errorf("data = %v, want %v", allCommittedEntries[3].Data, ccdata2) - } -} - -// TestBlockProposal ensures that node will block proposal when it does not -// know who is the current leader; node will accept proposal when it knows -// who is the current leader. -func TestBlockProposal(t *testing.T) { - rn := newTestRawNode(1, 10, 1, newTestMemoryStorage(withPeers(1))) - n := newNode(rn) - go n.run() - defer n.Stop() - - errc := make(chan error, 1) - go func() { - errc <- n.Propose(context.TODO(), []byte("somedata")) - }() - - time.Sleep(10 * time.Millisecond) - - select { - case err := <-errc: - t.Errorf("err = %v, want blocking", err) - default: - } - - n.Campaign(context.TODO()) - select { - case err := <-errc: - if err != nil { - t.Errorf("err = %v, want %v", err, nil) - } - case <-time.After(10 * time.Second): - t.Errorf("blocking proposal, want unblocking") - } -} - -func TestNodeProposeWaitDropped(t *testing.T) { - var msgs []raftpb.Message - droppingMsg := []byte("test_dropping") - dropStep := func(r *raft, m raftpb.Message) error { - if m.Type == raftpb.MsgProp && strings.Contains(m.String(), string(droppingMsg)) { - t.Logf("dropping message: %v", m.String()) - return ErrProposalDropped - } - if m.Type == raftpb.MsgAppResp { - // This is produced by raft internally, see (*raft).advance. - return nil - } - msgs = append(msgs, m) - return nil - } - - s := newTestMemoryStorage(withPeers(1)) - rn := newTestRawNode(1, 10, 1, s) - n := newNode(rn) - r := rn.raft - go n.run() - n.Campaign(context.TODO()) - for { - rd := <-n.Ready() - s.Append(rd.Entries) - // change the step function to dropStep until this raft becomes leader - if rd.SoftState.Lead == r.id { - r.step = dropStep - n.Advance() - break - } - n.Advance() - } - proposalTimeout := time.Millisecond * 100 - ctx, cancel := context.WithTimeout(context.Background(), proposalTimeout) - // propose with cancel should be cancelled earyly if dropped - err := n.Propose(ctx, droppingMsg) - if err != ErrProposalDropped { - t.Errorf("should drop proposal : %v", err) - } - cancel() - - n.Stop() - if len(msgs) != 0 { - t.Fatalf("len(msgs) = %d, want %d", len(msgs), 0) - } -} - -// TestNodeTick ensures that node.Tick() will increase the -// elapsed of the underlying raft state machine. -func TestNodeTick(t *testing.T) { - s := newTestMemoryStorage(withPeers(1)) - rn := newTestRawNode(1, 10, 1, s) - n := newNode(rn) - r := rn.raft - go n.run() - elapsed := r.electionElapsed - n.Tick() - - for len(n.tickc) != 0 { - time.Sleep(100 * time.Millisecond) - } - - n.Stop() - if r.electionElapsed != elapsed+1 { - t.Errorf("elapsed = %d, want %d", r.electionElapsed, elapsed+1) - } -} - -// TestNodeStop ensures that node.Stop() blocks until the node has stopped -// processing, and that it is idempotent -func TestNodeStop(t *testing.T) { - rn := newTestRawNode(1, 10, 1, newTestMemoryStorage(withPeers(1))) - n := newNode(rn) - donec := make(chan struct{}) - - go func() { - n.run() - close(donec) - }() - - status := n.Status() - n.Stop() - - select { - case <-donec: - case <-time.After(time.Second): - t.Fatalf("timed out waiting for node to stop!") - } - - emptyStatus := Status{} - - if reflect.DeepEqual(status, emptyStatus) { - t.Errorf("status = %v, want not empty", status) - } - // Further status should return be empty, the node is stopped. - status = n.Status() - if !reflect.DeepEqual(status, emptyStatus) { - t.Errorf("status = %v, want empty", status) - } - // Subsequent Stops should have no effect. - n.Stop() -} - -// TestNodeStart ensures that a node can be started correctly. The node should -// start with correct configuration change entries, and can accept and commit -// proposals. -func TestNodeStart(t *testing.T) { - cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} - ccdata, err := cc.Marshal() - if err != nil { - t.Fatalf("unexpected marshal error: %v", err) - } - wants := []Ready{ - { - HardState: raftpb.HardState{Term: 1, Commit: 1, Vote: 0}, - Entries: []raftpb.Entry{ - {Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata}, - }, - CommittedEntries: []raftpb.Entry{ - {Type: raftpb.EntryConfChange, Term: 1, Index: 1, Data: ccdata}, - }, - MustSync: true, - }, - { - HardState: raftpb.HardState{Term: 2, Commit: 2, Vote: 1}, - Entries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, - CommittedEntries: []raftpb.Entry{{Term: 2, Index: 2, Data: nil}}, - MustSync: true, - }, - { - HardState: raftpb.HardState{Term: 2, Commit: 3, Vote: 1}, - Entries: nil, - CommittedEntries: []raftpb.Entry{{Term: 2, Index: 3, Data: []byte("foo")}}, - MustSync: false, - }, - } - storage := NewMemoryStorage() - c := &Config{ - ID: 1, - ElectionTick: 10, - HeartbeatTick: 1, - Storage: storage, - MaxSizePerMsg: noLimit, - MaxInflightMsgs: 256, - } - n := StartNode(c, []Peer{{ID: 1}}) - ctx, cancel, n := newNodeTestHarness(context.Background(), t, c, Peer{ID: 1}) - defer cancel() - - { - rd := <-n.Ready() - if !reflect.DeepEqual(rd, wants[0]) { - t.Fatalf("#1: rd = %+v,\n w %+v", rd, wants[0]) - } - storage.Append(rd.Entries) - n.Advance() - } - - if err := n.Campaign(ctx); err != nil { - t.Fatal(err) - } - - { - rd := <-n.Ready() - storage.Append(rd.Entries) - n.Advance() - } - - n.Propose(ctx, []byte("foo")) - { - rd := <-n.Ready() - if !reflect.DeepEqual(rd, wants[1]) { - t.Errorf("#2: rd = %+v,\n w %+v", rd, wants[1]) - } - storage.Append(rd.Entries) - n.Advance() - } - - { - rd := <-n.Ready() - if !reflect.DeepEqual(rd, wants[2]) { - t.Errorf("#3: rd = %+v,\n w %+v", rd, wants[2]) - } - storage.Append(rd.Entries) - n.Advance() - } - - select { - case rd := <-n.Ready(): - t.Errorf("unexpected Ready: %+v", rd) - case <-time.After(time.Millisecond): - } -} - -func TestNodeRestart(t *testing.T) { - entries := []raftpb.Entry{ - {Term: 1, Index: 1}, - {Term: 1, Index: 2, Data: []byte("foo")}, - } - st := raftpb.HardState{Term: 1, Commit: 1} - - want := Ready{ - // No HardState is emitted because there was no change. - HardState: raftpb.HardState{}, - // commit up to index commit index in st - CommittedEntries: entries[:st.Commit], - // MustSync is false because no HardState or new entries are provided. - MustSync: false, - } - - storage := NewMemoryStorage() - storage.SetHardState(st) - storage.Append(entries) - c := &Config{ - ID: 1, - ElectionTick: 10, - HeartbeatTick: 1, - Storage: storage, - MaxSizePerMsg: noLimit, - MaxInflightMsgs: 256, - } - n := RestartNode(c) - defer n.Stop() - if g := <-n.Ready(); !reflect.DeepEqual(g, want) { - t.Errorf("g = %+v,\n w %+v", g, want) - } - n.Advance() - - select { - case rd := <-n.Ready(): - t.Errorf("unexpected Ready: %+v", rd) - case <-time.After(time.Millisecond): - } -} - -func TestNodeRestartFromSnapshot(t *testing.T) { - snap := raftpb.Snapshot{ - Metadata: raftpb.SnapshotMetadata{ - ConfState: raftpb.ConfState{Voters: []uint64{1, 2}}, - Index: 2, - Term: 1, - }, - } - entries := []raftpb.Entry{ - {Term: 1, Index: 3, Data: []byte("foo")}, - } - st := raftpb.HardState{Term: 1, Commit: 3} - - want := Ready{ - // No HardState is emitted because nothing changed relative to what is - // already persisted. - HardState: raftpb.HardState{}, - // commit up to index commit index in st - CommittedEntries: entries, - // MustSync is only true when there is a new HardState or new entries; - // neither is the case here. - MustSync: false, - } - - s := NewMemoryStorage() - s.SetHardState(st) - s.ApplySnapshot(snap) - s.Append(entries) - c := &Config{ - ID: 1, - ElectionTick: 10, - HeartbeatTick: 1, - Storage: s, - MaxSizePerMsg: noLimit, - MaxInflightMsgs: 256, - } - n := RestartNode(c) - defer n.Stop() - if g := <-n.Ready(); !reflect.DeepEqual(g, want) { - t.Errorf("g = %+v,\n w %+v", g, want) - } else { - n.Advance() - } - - select { - case rd := <-n.Ready(): - t.Errorf("unexpected Ready: %+v", rd) - case <-time.After(time.Millisecond): - } -} - -func TestNodeAdvance(t *testing.T) { - storage := newTestMemoryStorage(withPeers(1)) - c := &Config{ - ID: 1, - ElectionTick: 10, - HeartbeatTick: 1, - Storage: storage, - MaxSizePerMsg: noLimit, - MaxInflightMsgs: 256, - } - ctx, cancel, n := newNodeTestHarness(context.Background(), t, c) - defer cancel() - - n.Campaign(ctx) - rd := readyWithTimeout(n) - // Commit empty entry. - storage.Append(rd.Entries) - n.Advance() - - n.Propose(ctx, []byte("foo")) - rd = readyWithTimeout(n) - storage.Append(rd.Entries) - n.Advance() - select { - case <-n.Ready(): - case <-time.After(100 * time.Millisecond): - t.Errorf("expect Ready after Advance, but there is no Ready available") - } -} - -func TestSoftStateEqual(t *testing.T) { - tests := []struct { - st *SoftState - we bool - }{ - {&SoftState{}, true}, - {&SoftState{Lead: 1}, false}, - {&SoftState{RaftState: StateLeader}, false}, - } - for i, tt := range tests { - if g := tt.st.equal(&SoftState{}); g != tt.we { - t.Errorf("#%d, equal = %v, want %v", i, g, tt.we) - } - } -} - -func TestIsHardStateEqual(t *testing.T) { - tests := []struct { - st raftpb.HardState - we bool - }{ - {emptyState, true}, - {raftpb.HardState{Vote: 1}, false}, - {raftpb.HardState{Commit: 1}, false}, - {raftpb.HardState{Term: 1}, false}, - } - - for i, tt := range tests { - if isHardStateEqual(tt.st, emptyState) != tt.we { - t.Errorf("#%d, equal = %v, want %v", i, isHardStateEqual(tt.st, emptyState), tt.we) - } - } -} - -func TestNodeProposeAddLearnerNode(t *testing.T) { - ticker := time.NewTicker(time.Millisecond * 100) - defer ticker.Stop() - s := newTestMemoryStorage(withPeers(1)) - rn := newTestRawNode(1, 10, 1, s) - n := newNode(rn) - go n.run() - n.Campaign(context.TODO()) - stop := make(chan struct{}) - done := make(chan struct{}) - applyConfChan := make(chan struct{}) - go func() { - defer close(done) - for { - select { - case <-stop: - return - case <-ticker.C: - n.Tick() - case rd := <-n.Ready(): - s.Append(rd.Entries) - t.Logf("raft: %v", rd.Entries) - for _, ent := range rd.Entries { - if ent.Type != raftpb.EntryConfChange { - continue - } - var cc raftpb.ConfChange - cc.Unmarshal(ent.Data) - state := n.ApplyConfChange(cc) - if len(state.Learners) == 0 || - state.Learners[0] != cc.NodeID || - cc.NodeID != 2 { - t.Errorf("apply conf change should return new added learner: %v", state.String()) - } - - if len(state.Voters) != 1 { - t.Errorf("add learner should not change the nodes: %v", state.String()) - } - t.Logf("apply raft conf %v changed to: %v", cc, state.String()) - applyConfChan <- struct{}{} - } - n.Advance() - } - } - }() - cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddLearnerNode, NodeID: 2} - n.ProposeConfChange(context.TODO(), cc) - <-applyConfChan - close(stop) - <-done - n.Stop() -} - -func TestAppendPagination(t *testing.T) { - const maxSizePerMsg = 2048 - n := newNetworkWithConfig(func(c *Config) { - c.MaxSizePerMsg = maxSizePerMsg - }, nil, nil, nil) - - seenFullMessage := false - // Inspect all messages to see that we never exceed the limit, but - // we do see messages of larger than half the limit. - n.msgHook = func(m raftpb.Message) bool { - if m.Type == raftpb.MsgApp { - size := 0 - for _, e := range m.Entries { - size += len(e.Data) - } - if size > maxSizePerMsg { - t.Errorf("sent MsgApp that is too large: %d bytes", size) - } - if size > maxSizePerMsg/2 { - seenFullMessage = true - } - } - return true - } - - n.send(raftpb.Message{From: 1, To: 1, Type: raftpb.MsgHup}) - - // Partition the network while we make our proposals. This forces - // the entries to be batched into larger messages. - n.isolate(1) - blob := []byte(strings.Repeat("a", 1000)) - for i := 0; i < 5; i++ { - n.send(raftpb.Message{From: 1, To: 1, Type: raftpb.MsgProp, Entries: []raftpb.Entry{{Data: blob}}}) - } - n.recover() - - // After the partition recovers, tick the clock to wake everything - // back up and send the messages. - n.send(raftpb.Message{From: 1, To: 1, Type: raftpb.MsgBeat}) - if !seenFullMessage { - t.Error("didn't see any messages more than half the max size; something is wrong with this test") - } -} - -func TestCommitPagination(t *testing.T) { - s := newTestMemoryStorage(withPeers(1)) - cfg := newTestConfig(1, 10, 1, s) - cfg.MaxCommittedSizePerReady = 2048 - ctx, cancel, n := newNodeTestHarness(context.Background(), t, cfg) - defer cancel() - n.Campaign(ctx) - - rd := readyWithTimeout(n) - s.Append(rd.Entries) - n.Advance() - rd = readyWithTimeout(n) - if len(rd.CommittedEntries) != 1 { - t.Fatalf("expected 1 (empty) entry, got %d", len(rd.CommittedEntries)) - } - s.Append(rd.Entries) - n.Advance() - - blob := []byte(strings.Repeat("a", 1000)) - for i := 0; i < 3; i++ { - if err := n.Propose(ctx, blob); err != nil { - t.Fatal(err) - } - } - - // First the three proposals have to be appended. - rd = readyWithTimeout(n) - if len(rd.Entries) != 3 { - t.Fatal("expected to see three entries") - } - s.Append(rd.Entries) - n.Advance() - - // The 3 proposals will commit in two batches. - rd = readyWithTimeout(n) - if len(rd.CommittedEntries) != 2 { - t.Fatalf("expected 2 entries in first batch, got %d", len(rd.CommittedEntries)) - } - s.Append(rd.Entries) - n.Advance() - rd = readyWithTimeout(n) - if len(rd.CommittedEntries) != 1 { - t.Fatalf("expected 1 entry in second batch, got %d", len(rd.CommittedEntries)) - } - s.Append(rd.Entries) - n.Advance() -} - -type ignoreSizeHintMemStorage struct { - *MemoryStorage -} - -func (s *ignoreSizeHintMemStorage) Entries(lo, hi uint64, maxSize uint64) ([]raftpb.Entry, error) { - return s.MemoryStorage.Entries(lo, hi, math.MaxUint64) -} - -// TestNodeCommitPaginationAfterRestart regression tests a scenario in which the -// Storage's Entries size limitation is slightly more permissive than Raft's -// internal one. The original bug was the following: -// -// - node learns that index 11 (or 100, doesn't matter) is committed -// - nextCommittedEnts returns index 1..10 in CommittedEntries due to size limiting. -// However, index 10 already exceeds maxBytes, due to a user-provided impl of Entries. -// - Commit index gets bumped to 10 -// - the node persists the HardState, but crashes before applying the entries -// - upon restart, the storage returns the same entries, but `slice` takes a different code path -// (since it is now called with an upper bound of 10) and removes the last entry. -// - Raft emits a HardState with a regressing commit index. -// -// A simpler version of this test would have the storage return a lot less entries than dictated -// by maxSize (for example, exactly one entry) after the restart, resulting in a larger regression. -// This wouldn't need to exploit anything about Raft-internal code paths to fail. -func TestNodeCommitPaginationAfterRestart(t *testing.T) { - s := &ignoreSizeHintMemStorage{ - MemoryStorage: newTestMemoryStorage(withPeers(1)), - } - persistedHardState := raftpb.HardState{ - Term: 1, - Vote: 1, - Commit: 10, - } - - s.hardState = persistedHardState - s.ents = make([]raftpb.Entry, 10) - var size uint64 - for i := range s.ents { - ent := raftpb.Entry{ - Term: 1, - Index: uint64(i + 1), - Type: raftpb.EntryNormal, - Data: []byte("a"), - } - - s.ents[i] = ent - size += uint64(ent.Size()) - } - - cfg := newTestConfig(1, 10, 1, s) - // Set a MaxSizePerMsg that would suggest to Raft that the last committed entry should - // not be included in the initial rd.CommittedEntries. However, our storage will ignore - // this and *will* return it (which is how the Commit index ended up being 10 initially). - cfg.MaxSizePerMsg = size - uint64(s.ents[len(s.ents)-1].Size()) - 1 - - rn, err := NewRawNode(cfg) - if err != nil { - t.Fatal(err) - } - n := newNode(rn) - go n.run() - defer n.Stop() - - rd := readyWithTimeout(&n) - if !IsEmptyHardState(rd.HardState) && rd.HardState.Commit < persistedHardState.Commit { - t.Errorf("HardState regressed: Commit %d -> %d\nCommitting:\n%+v", - persistedHardState.Commit, rd.HardState.Commit, - DescribeEntries(rd.CommittedEntries, func(data []byte) string { return fmt.Sprintf("%q", data) }), - ) - } -} diff --git a/raft/node_util_test.go b/raft/node_util_test.go deleted file mode 100644 index 5093cba6bf9d..000000000000 --- a/raft/node_util_test.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2022 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "context" - "fmt" - "testing" - "time" -) - -type nodeTestHarness struct { - *node - t *testing.T -} - -func (l *nodeTestHarness) Debug(v ...interface{}) { - l.t.Log(v...) -} - -func (l *nodeTestHarness) Debugf(format string, v ...interface{}) { - l.t.Logf(format, v...) -} - -func (l *nodeTestHarness) Error(v ...interface{}) { - l.t.Error(v...) -} - -func (l *nodeTestHarness) Errorf(format string, v ...interface{}) { - l.t.Errorf(format, v...) -} - -func (l *nodeTestHarness) Info(v ...interface{}) { - l.t.Log(v...) -} - -func (l *nodeTestHarness) Infof(format string, v ...interface{}) { - l.t.Logf(format, v...) -} - -func (l *nodeTestHarness) Warning(v ...interface{}) { - l.t.Log(v...) -} - -func (l *nodeTestHarness) Warningf(format string, v ...interface{}) { - l.t.Logf(format, v...) -} - -func (l *nodeTestHarness) Fatal(v ...interface{}) { - l.t.Error(v...) - panic(v) -} - -func (l *nodeTestHarness) Fatalf(format string, v ...interface{}) { - l.t.Errorf(format, v...) - panic(fmt.Sprintf(format, v...)) -} - -func (l *nodeTestHarness) Panic(v ...interface{}) { - l.t.Log(v...) - panic(v) -} - -func (l *nodeTestHarness) Panicf(format string, v ...interface{}) { - l.t.Errorf(format, v...) - panic(fmt.Sprintf(format, v...)) -} - -func newNodeTestHarness(ctx context.Context, t *testing.T, cfg *Config, peers ...Peer) (_ context.Context, cancel func(), _ *nodeTestHarness) { - // Wrap context in a 10s timeout to make tests more robust. Otherwise, - // it's likely that deadlock will occur unless Node behaves exactly as - // expected - when you expect a Ready and start waiting on the channel - // but no Ready ever shows up, for example. - ctx, cancel = context.WithTimeout(ctx, 10*time.Second) - var n *node - if len(peers) > 0 { - n = setupNode(cfg, peers) - } else { - rn, err := NewRawNode(cfg) - if err != nil { - t.Fatal(err) - } - nn := newNode(rn) - n = &nn - } - go func() { - defer func() { - if r := recover(); r != nil { - t.Error(r) - } - }() - defer cancel() - defer n.Stop() - n.run() - }() - t.Cleanup(n.Stop) - return ctx, cancel, &nodeTestHarness{node: n, t: t} -} diff --git a/raft/quorum/bench_test.go b/raft/quorum/bench_test.go deleted file mode 100644 index 5c7961ed6cfc..000000000000 --- a/raft/quorum/bench_test.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package quorum - -import ( - "fmt" - "math" - "math/rand" - "testing" -) - -func BenchmarkMajorityConfig_CommittedIndex(b *testing.B) { - // go test -run - -bench . -benchmem ./raft/quorum - for _, n := range []int{1, 3, 5, 7, 9, 11} { - b.Run(fmt.Sprintf("voters=%d", n), func(b *testing.B) { - c := MajorityConfig{} - l := mapAckIndexer{} - for i := uint64(0); i < uint64(n); i++ { - c[i+1] = struct{}{} - l[i+1] = Index(rand.Int63n(math.MaxInt64)) - } - - for i := 0; i < b.N; i++ { - _ = c.CommittedIndex(l) - } - }) - } -} diff --git a/raft/quorum/datadriven_test.go b/raft/quorum/datadriven_test.go deleted file mode 100644 index b40eaa76c696..000000000000 --- a/raft/quorum/datadriven_test.go +++ /dev/null @@ -1,250 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package quorum - -import ( - "fmt" - "strings" - "testing" - - "github.com/cockroachdb/datadriven" -) - -// TestDataDriven parses and executes the test cases in ./testdata/*. An entry -// in such a file specifies the command, which is either of "committed" to check -// CommittedIndex or "vote" to verify a VoteResult. The underlying configuration -// and inputs are specified via the arguments 'cfg' and 'cfgj' (for the majority -// config and, optionally, majority config joint to the first one) and 'idx' -// (for CommittedIndex) and 'votes' (for VoteResult). -// -// Internally, the harness runs some additional checks on each test case for -// which it is known that the result shouldn't change. For example, -// interchanging the majority configurations of a joint quorum must not -// influence the result; if it does, this is noted in the test's output. -func TestDataDriven(t *testing.T) { - datadriven.Walk(t, "testdata", func(t *testing.T, path string) { - datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string { - // Two majority configs. The first one is always used (though it may - // be empty) and the second one is used iff joint is true. - var joint bool - var ids, idsj []uint64 - // The committed indexes for the nodes in the config in the order in - // which they appear in (ids,idsj), without repetition. An underscore - // denotes an omission (i.e. no information for this voter); this is - // different from 0. For example, - // - // cfg=(1,2) cfgj=(2,3,4) idxs=(_,5,_,7) initializes the idx for voter 2 - // to 5 and that for voter 4 to 7 (and no others). - // - // cfgj=zero is specified to instruct the test harness to treat cfgj - // as zero instead of not specified (i.e. it will trigger a joint - // quorum test instead of a majority quorum test for cfg only). - var idxs []Index - // Votes. These are initialized similar to idxs except the only values - // used are 1 (voted against) and 2 (voted for). This looks awkward, - // but is convenient because it allows sharing code between the two. - var votes []Index - - // Parse the args. - for _, arg := range d.CmdArgs { - for i := range arg.Vals { - switch arg.Key { - case "cfg": - var n uint64 - arg.Scan(t, i, &n) - ids = append(ids, n) - case "cfgj": - joint = true - if arg.Vals[i] == "zero" { - if len(arg.Vals) != 1 { - t.Fatalf("cannot mix 'zero' into configuration") - } - } else { - var n uint64 - arg.Scan(t, i, &n) - idsj = append(idsj, n) - } - case "idx": - var n uint64 - // Register placeholders as zeroes. - if arg.Vals[i] != "_" { - arg.Scan(t, i, &n) - if n == 0 { - // This is a restriction caused by the above - // special-casing for _. - t.Fatalf("cannot use 0 as idx") - } - } - idxs = append(idxs, Index(n)) - case "votes": - var s string - arg.Scan(t, i, &s) - switch s { - case "y": - votes = append(votes, 2) - case "n": - votes = append(votes, 1) - case "_": - votes = append(votes, 0) - default: - t.Fatalf("unknown vote: %s", s) - } - default: - t.Fatalf("unknown arg %s", arg.Key) - } - } - } - - // Build the two majority configs. - c := MajorityConfig{} - for _, id := range ids { - c[id] = struct{}{} - } - cj := MajorityConfig{} - for _, id := range idsj { - cj[id] = struct{}{} - } - - // Helper that returns an AckedIndexer which has the specified indexes - // mapped to the right IDs. - makeLookuper := func(idxs []Index, ids, idsj []uint64) mapAckIndexer { - l := mapAckIndexer{} - var p int // next to consume from idxs - for _, id := range append(append([]uint64(nil), ids...), idsj...) { - if _, ok := l[id]; ok { - continue - } - if p < len(idxs) { - // NB: this creates zero entries for placeholders that we remove later. - // The upshot of doing it that way is to avoid having to specify place- - // holders multiple times when omitting voters present in both halves of - // a joint config. - l[id] = idxs[p] - p++ - } - } - - for id := range l { - // Zero entries are created by _ placeholders; we don't want - // them in the lookuper because "no entry" is different from - // "zero entry". Note that we prevent tests from specifying - // zero commit indexes, so that there's no confusion between - // the two concepts. - if l[id] == 0 { - delete(l, id) - } - } - return l - } - - { - input := idxs - if d.Cmd == "vote" { - input = votes - } - if voters := JointConfig([2]MajorityConfig{c, cj}).IDs(); len(voters) != len(input) { - return fmt.Sprintf("error: mismatched input (explicit or _) for voters %v: %v", - voters, input) - } - } - - var buf strings.Builder - switch d.Cmd { - case "committed": - l := makeLookuper(idxs, ids, idsj) - - // Branch based on whether this is a majority or joint quorum - // test case. - if !joint { - idx := c.CommittedIndex(l) - fmt.Fprint(&buf, c.Describe(l)) - // These alternative computations should return the same - // result. If not, print to the output. - if aIdx := alternativeMajorityCommittedIndex(c, l); aIdx != idx { - fmt.Fprintf(&buf, "%s <-- via alternative computation\n", aIdx) - } - // Joining a majority with the empty majority should give same result. - if aIdx := JointConfig([2]MajorityConfig{c, {}}).CommittedIndex(l); aIdx != idx { - fmt.Fprintf(&buf, "%s <-- via zero-joint quorum\n", aIdx) - } - // Joining a majority with itself should give same result. - if aIdx := JointConfig([2]MajorityConfig{c, c}).CommittedIndex(l); aIdx != idx { - fmt.Fprintf(&buf, "%s <-- via self-joint quorum\n", aIdx) - } - overlay := func(c MajorityConfig, l AckedIndexer, id uint64, idx Index) AckedIndexer { - ll := mapAckIndexer{} - for iid := range c { - if iid == id { - ll[iid] = idx - } else if idx, ok := l.AckedIndex(iid); ok { - ll[iid] = idx - } - } - return ll - } - for id := range c { - iidx, _ := l.AckedIndex(id) - if idx > iidx && iidx > 0 { - // If the committed index was definitely above the currently - // inspected idx, the result shouldn't change if we lower it - // further. - lo := overlay(c, l, id, iidx-1) - if aIdx := c.CommittedIndex(lo); aIdx != idx { - fmt.Fprintf(&buf, "%s <-- overlaying %d->%d", aIdx, id, iidx) - } - lo = overlay(c, l, id, 0) - if aIdx := c.CommittedIndex(lo); aIdx != idx { - fmt.Fprintf(&buf, "%s <-- overlaying %d->0", aIdx, id) - } - } - } - fmt.Fprintf(&buf, "%s\n", idx) - } else { - cc := JointConfig([2]MajorityConfig{c, cj}) - fmt.Fprint(&buf, cc.Describe(l)) - idx := cc.CommittedIndex(l) - // Interchanging the majorities shouldn't make a difference. If it does, print. - if aIdx := JointConfig([2]MajorityConfig{cj, c}).CommittedIndex(l); aIdx != idx { - fmt.Fprintf(&buf, "%s <-- via symmetry\n", aIdx) - } - fmt.Fprintf(&buf, "%s\n", idx) - } - case "vote": - ll := makeLookuper(votes, ids, idsj) - l := map[uint64]bool{} - for id, v := range ll { - l[id] = v != 1 // NB: 1 == false, 2 == true - } - - if !joint { - // Test a majority quorum. - r := c.VoteResult(l) - fmt.Fprintf(&buf, "%v\n", r) - } else { - // Run a joint quorum test case. - r := JointConfig([2]MajorityConfig{c, cj}).VoteResult(l) - // Interchanging the majorities shouldn't make a difference. If it does, print. - if ar := JointConfig([2]MajorityConfig{cj, c}).VoteResult(l); ar != r { - fmt.Fprintf(&buf, "%v <-- via symmetry\n", ar) - } - fmt.Fprintf(&buf, "%v\n", r) - } - default: - t.Fatalf("unknown command: %s", d.Cmd) - } - return buf.String() - }) - }) -} diff --git a/raft/quorum/joint.go b/raft/quorum/joint.go deleted file mode 100644 index e3741e0b0a96..000000000000 --- a/raft/quorum/joint.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package quorum - -// JointConfig is a configuration of two groups of (possibly overlapping) -// majority configurations. Decisions require the support of both majorities. -type JointConfig [2]MajorityConfig - -func (c JointConfig) String() string { - if len(c[1]) > 0 { - return c[0].String() + "&&" + c[1].String() - } - return c[0].String() -} - -// IDs returns a newly initialized map representing the set of voters present -// in the joint configuration. -func (c JointConfig) IDs() map[uint64]struct{} { - m := map[uint64]struct{}{} - for _, cc := range c { - for id := range cc { - m[id] = struct{}{} - } - } - return m -} - -// Describe returns a (multi-line) representation of the commit indexes for the -// given lookuper. -func (c JointConfig) Describe(l AckedIndexer) string { - return MajorityConfig(c.IDs()).Describe(l) -} - -// CommittedIndex returns the largest committed index for the given joint -// quorum. An index is jointly committed if it is committed in both constituent -// majorities. -func (c JointConfig) CommittedIndex(l AckedIndexer) Index { - idx0 := c[0].CommittedIndex(l) - idx1 := c[1].CommittedIndex(l) - if idx0 < idx1 { - return idx0 - } - return idx1 -} - -// VoteResult takes a mapping of voters to yes/no (true/false) votes and returns -// a result indicating whether the vote is pending, lost, or won. A joint quorum -// requires both majority quorums to vote in favor. -func (c JointConfig) VoteResult(votes map[uint64]bool) VoteResult { - r1 := c[0].VoteResult(votes) - r2 := c[1].VoteResult(votes) - - if r1 == r2 { - // If they agree, return the agreed state. - return r1 - } - if r1 == VoteLost || r2 == VoteLost { - // If either config has lost, loss is the only possible outcome. - return VoteLost - } - // One side won, the other one is pending, so the whole outcome is. - return VotePending -} diff --git a/raft/quorum/majority.go b/raft/quorum/majority.go deleted file mode 100644 index 12766137a9c1..000000000000 --- a/raft/quorum/majority.go +++ /dev/null @@ -1,207 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package quorum - -import ( - "fmt" - "math" - "sort" - "strings" -) - -// MajorityConfig is a set of IDs that uses majority quorums to make decisions. -type MajorityConfig map[uint64]struct{} - -func (c MajorityConfig) String() string { - sl := make([]uint64, 0, len(c)) - for id := range c { - sl = append(sl, id) - } - sort.Slice(sl, func(i, j int) bool { return sl[i] < sl[j] }) - var buf strings.Builder - buf.WriteByte('(') - for i := range sl { - if i > 0 { - buf.WriteByte(' ') - } - fmt.Fprint(&buf, sl[i]) - } - buf.WriteByte(')') - return buf.String() -} - -// Describe returns a (multi-line) representation of the commit indexes for the -// given lookuper. -func (c MajorityConfig) Describe(l AckedIndexer) string { - if len(c) == 0 { - return "" - } - type tup struct { - id uint64 - idx Index - ok bool // idx found? - bar int // length of bar displayed for this tup - } - - // Below, populate .bar so that the i-th largest commit index has bar i (we - // plot this as sort of a progress bar). The actual code is a bit more - // complicated and also makes sure that equal index => equal bar. - - n := len(c) - info := make([]tup, 0, n) - for id := range c { - idx, ok := l.AckedIndex(id) - info = append(info, tup{id: id, idx: idx, ok: ok}) - } - - // Sort by index - sort.Slice(info, func(i, j int) bool { - if info[i].idx == info[j].idx { - return info[i].id < info[j].id - } - return info[i].idx < info[j].idx - }) - - // Populate .bar. - for i := range info { - if i > 0 && info[i-1].idx < info[i].idx { - info[i].bar = i - } - } - - // Sort by ID. - sort.Slice(info, func(i, j int) bool { - return info[i].id < info[j].id - }) - - var buf strings.Builder - - // Print. - fmt.Fprint(&buf, strings.Repeat(" ", n)+" idx\n") - for i := range info { - bar := info[i].bar - if !info[i].ok { - fmt.Fprint(&buf, "?"+strings.Repeat(" ", n)) - } else { - fmt.Fprint(&buf, strings.Repeat("x", bar)+">"+strings.Repeat(" ", n-bar)) - } - fmt.Fprintf(&buf, " %5d (id=%d)\n", info[i].idx, info[i].id) - } - return buf.String() -} - -// Slice returns the MajorityConfig as a sorted slice. -func (c MajorityConfig) Slice() []uint64 { - var sl []uint64 - for id := range c { - sl = append(sl, id) - } - sort.Slice(sl, func(i, j int) bool { return sl[i] < sl[j] }) - return sl -} - -func insertionSort(sl []uint64) { - a, b := 0, len(sl) - for i := a + 1; i < b; i++ { - for j := i; j > a && sl[j] < sl[j-1]; j-- { - sl[j], sl[j-1] = sl[j-1], sl[j] - } - } -} - -// CommittedIndex computes the committed index from those supplied via the -// provided AckedIndexer (for the active config). -func (c MajorityConfig) CommittedIndex(l AckedIndexer) Index { - n := len(c) - if n == 0 { - // This plays well with joint quorums which, when one half is the zero - // MajorityConfig, should behave like the other half. - return math.MaxUint64 - } - - // Use an on-stack slice to collect the committed indexes when n <= 7 - // (otherwise we alloc). The alternative is to stash a slice on - // MajorityConfig, but this impairs usability (as is, MajorityConfig is just - // a map, and that's nice). The assumption is that running with a - // replication factor of >7 is rare, and in cases in which it happens - // performance is a lesser concern (additionally the performance - // implications of an allocation here are far from drastic). - var stk [7]uint64 - var srt []uint64 - if len(stk) >= n { - srt = stk[:n] - } else { - srt = make([]uint64, n) - } - - { - // Fill the slice with the indexes observed. Any unused slots will be - // left as zero; these correspond to voters that may report in, but - // haven't yet. We fill from the right (since the zeroes will end up on - // the left after sorting below anyway). - i := n - 1 - for id := range c { - if idx, ok := l.AckedIndex(id); ok { - srt[i] = uint64(idx) - i-- - } - } - } - - // Sort by index. Use a bespoke algorithm (copied from the stdlib's sort - // package) to keep srt on the stack. - insertionSort(srt) - - // The smallest index into the array for which the value is acked by a - // quorum. In other words, from the end of the slice, move n/2+1 to the - // left (accounting for zero-indexing). - pos := n - (n/2 + 1) - return Index(srt[pos]) -} - -// VoteResult takes a mapping of voters to yes/no (true/false) votes and returns -// a result indicating whether the vote is pending (i.e. neither a quorum of -// yes/no has been reached), won (a quorum of yes has been reached), or lost (a -// quorum of no has been reached). -func (c MajorityConfig) VoteResult(votes map[uint64]bool) VoteResult { - if len(c) == 0 { - // By convention, the elections on an empty config win. This comes in - // handy with joint quorums because it'll make a half-populated joint - // quorum behave like a majority quorum. - return VoteWon - } - - var votedCnt int //vote counts for yes. - var missing int - for id := range c { - v, ok := votes[id] - if !ok { - missing++ - continue - } - if v { - votedCnt++ - } - } - - q := len(c)/2 + 1 - if votedCnt >= q { - return VoteWon - } - if votedCnt+missing >= q { - return VotePending - } - return VoteLost -} diff --git a/raft/quorum/quick_test.go b/raft/quorum/quick_test.go deleted file mode 100644 index d838b54f8c39..000000000000 --- a/raft/quorum/quick_test.go +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package quorum - -import ( - "math" - "math/rand" - "reflect" - "testing" - "testing/quick" -) - -// TestQuick uses quickcheck to heuristically assert that the main -// implementation of (MajorityConfig).CommittedIndex agrees with a "dumb" -// alternative version. -func TestQuick(t *testing.T) { - cfg := &quick.Config{ - MaxCount: 50000, - } - - t.Run("majority_commit", func(t *testing.T) { - fn1 := func(c memberMap, l idxMap) uint64 { - return uint64(MajorityConfig(c).CommittedIndex(mapAckIndexer(l))) - } - fn2 := func(c memberMap, l idxMap) uint64 { - return uint64(alternativeMajorityCommittedIndex(MajorityConfig(c), mapAckIndexer(l))) - } - if err := quick.CheckEqual(fn1, fn2, cfg); err != nil { - t.Fatal(err) - } - }) -} - -// smallRandIdxMap returns a reasonably sized map of ids to commit indexes. -func smallRandIdxMap(rand *rand.Rand, _ int) map[uint64]Index { - // Hard-code a reasonably small size here (quick will hard-code 50, which - // is not useful here). - size := 10 - - n := rand.Intn(size) - ids := rand.Perm(2 * n)[:n] - idxs := make([]int, len(ids)) - for i := range idxs { - idxs[i] = rand.Intn(n) - } - - m := map[uint64]Index{} - for i := range ids { - m[uint64(ids[i])] = Index(idxs[i]) - } - return m -} - -type idxMap map[uint64]Index - -func (idxMap) Generate(rand *rand.Rand, size int) reflect.Value { - m := smallRandIdxMap(rand, size) - return reflect.ValueOf(m) -} - -type memberMap map[uint64]struct{} - -func (memberMap) Generate(rand *rand.Rand, size int) reflect.Value { - m := smallRandIdxMap(rand, size) - mm := map[uint64]struct{}{} - for id := range m { - mm[id] = struct{}{} - } - return reflect.ValueOf(mm) -} - -// This is an alternative implementation of (MajorityConfig).CommittedIndex(l). -func alternativeMajorityCommittedIndex(c MajorityConfig, l AckedIndexer) Index { - if len(c) == 0 { - return math.MaxUint64 - } - - idToIdx := map[uint64]Index{} - for id := range c { - if idx, ok := l.AckedIndex(id); ok { - idToIdx[id] = idx - } - } - - // Build a map from index to voters who have acked that or any higher index. - idxToVotes := map[Index]int{} - for _, idx := range idToIdx { - idxToVotes[idx] = 0 - } - - for _, idx := range idToIdx { - for idy := range idxToVotes { - if idy > idx { - continue - } - idxToVotes[idy]++ - } - } - - // Find the maximum index that has achieved quorum. - q := len(c)/2 + 1 - var maxQuorumIdx Index - for idx, n := range idxToVotes { - if n >= q && idx > maxQuorumIdx { - maxQuorumIdx = idx - } - } - - return maxQuorumIdx -} diff --git a/raft/quorum/quorum.go b/raft/quorum/quorum.go deleted file mode 100644 index 2899e46c96dc..000000000000 --- a/raft/quorum/quorum.go +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package quorum - -import ( - "math" - "strconv" -) - -// Index is a Raft log position. -type Index uint64 - -func (i Index) String() string { - if i == math.MaxUint64 { - return "∞" - } - return strconv.FormatUint(uint64(i), 10) -} - -// AckedIndexer allows looking up a commit index for a given ID of a voter -// from a corresponding MajorityConfig. -type AckedIndexer interface { - AckedIndex(voterID uint64) (idx Index, found bool) -} - -type mapAckIndexer map[uint64]Index - -func (m mapAckIndexer) AckedIndex(id uint64) (Index, bool) { - idx, ok := m[id] - return idx, ok -} - -// VoteResult indicates the outcome of a vote. -// -//go:generate stringer -type=VoteResult -type VoteResult uint8 - -const ( - // VotePending indicates that the decision of the vote depends on future - // votes, i.e. neither "yes" or "no" has reached quorum yet. - VotePending VoteResult = 1 + iota - // VoteLost indicates that the quorum has voted "no". - VoteLost - // VoteWon indicates that the quorum has voted "yes". - VoteWon -) diff --git a/raft/quorum/testdata/joint_commit.txt b/raft/quorum/testdata/joint_commit.txt deleted file mode 100644 index 12f19fb331cc..000000000000 --- a/raft/quorum/testdata/joint_commit.txt +++ /dev/null @@ -1,481 +0,0 @@ -# No difference between a simple majority quorum and a simple majority quorum -# joint with an empty majority quorum. (This is asserted for all datadriven tests -# by the framework, so we don't dwell on it more). -# -# Note that by specifying cfgj explicitly we tell the test harness to treat the -# input as a joint quorum and not a majority quorum. If we didn't specify -# cfgj=zero the test would pass just the same, but it wouldn't be exercising the -# joint quorum path. -committed cfg=(1,2,3) cfgj=zero idx=(100,101,99) ----- - idx -x> 100 (id=1) -xx> 101 (id=2) -> 99 (id=3) -100 - -# Joint nonoverlapping singleton quorums. - -committed cfg=(1) cfgj=(2) idx=(_,_) ----- - idx -? 0 (id=1) -? 0 (id=2) -0 - -# Voter 1 has 100 committed, 2 nothing. This means we definitely won't commit -# past 100. -committed cfg=(1) cfgj=(2) idx=(100,_) ----- - idx -x> 100 (id=1) -? 0 (id=2) -0 - -# Committed index collapses once both majorities do, to the lower index. -committed cfg=(1) cfgj=(2) idx=(13, 100) ----- - idx -> 13 (id=1) -x> 100 (id=2) -13 - -# Joint overlapping (i.e. identical) singleton quorum. - -committed cfg=(1) cfgj=(1) idx=(_) ----- - idx -? 0 (id=1) -0 - -committed cfg=(1) cfgj=(1) idx=(100) ----- - idx -> 100 (id=1) -100 - - - -# Two-node config joint with non-overlapping single node config -committed cfg=(1,3) cfgj=(2) idx=(_,_,_) ----- - idx -? 0 (id=1) -? 0 (id=2) -? 0 (id=3) -0 - -committed cfg=(1,3) cfgj=(2) idx=(100,_,_) ----- - idx -xx> 100 (id=1) -? 0 (id=2) -? 0 (id=3) -0 - -# 1 has 100 committed, 2 has 50 (collapsing half of the joint quorum to 50). -committed cfg=(1,3) cfgj=(2) idx=(100,_,50) ----- - idx -xx> 100 (id=1) -x> 50 (id=2) -? 0 (id=3) -0 - -# 2 reports 45, collapsing the other half (to 45). -committed cfg=(1,3) cfgj=(2) idx=(100,45,50) ----- - idx -xx> 100 (id=1) -x> 50 (id=2) -> 45 (id=3) -45 - -# Two-node config with overlapping single-node config. - -committed cfg=(1,2) cfgj=(2) idx=(_,_) ----- - idx -? 0 (id=1) -? 0 (id=2) -0 - -# 1 reports 100. -committed cfg=(1,2) cfgj=(2) idx=(100,_) ----- - idx -x> 100 (id=1) -? 0 (id=2) -0 - -# 2 reports 100. -committed cfg=(1,2) cfgj=(2) idx=(_,100) ----- - idx -? 0 (id=1) -x> 100 (id=2) -0 - -committed cfg=(1,2) cfgj=(2) idx=(50,100) ----- - idx -> 50 (id=1) -x> 100 (id=2) -50 - -committed cfg=(1,2) cfgj=(2) idx=(100,50) ----- - idx -x> 100 (id=1) -> 50 (id=2) -50 - - - -# Joint non-overlapping two-node configs. - -committed cfg=(1,2) cfgj=(3,4) idx=(50,_,_,_) ----- - idx -xxx> 50 (id=1) -? 0 (id=2) -? 0 (id=3) -? 0 (id=4) -0 - -committed cfg=(1,2) cfgj=(3,4) idx=(50,_,49,_) ----- - idx -xxx> 50 (id=1) -? 0 (id=2) -xx> 49 (id=3) -? 0 (id=4) -0 - -committed cfg=(1,2) cfgj=(3,4) idx=(50,48,49,_) ----- - idx -xxx> 50 (id=1) -x> 48 (id=2) -xx> 49 (id=3) -? 0 (id=4) -0 - -committed cfg=(1,2) cfgj=(3,4) idx=(50,48,49,47) ----- - idx -xxx> 50 (id=1) -x> 48 (id=2) -xx> 49 (id=3) -> 47 (id=4) -47 - -# Joint overlapping two-node configs. -committed cfg=(1,2) cfgj=(2,3) idx=(_,_,_) ----- - idx -? 0 (id=1) -? 0 (id=2) -? 0 (id=3) -0 - -committed cfg=(1,2) cfgj=(2,3) idx=(100,_,_) ----- - idx -xx> 100 (id=1) -? 0 (id=2) -? 0 (id=3) -0 - -committed cfg=(1,2) cfgj=(2,3) idx=(_,100,_) ----- - idx -? 0 (id=1) -xx> 100 (id=2) -? 0 (id=3) -0 - -committed cfg=(1,2) cfgj=(2,3) idx=(_,100,99) ----- - idx -? 0 (id=1) -xx> 100 (id=2) -x> 99 (id=3) -0 - -committed cfg=(1,2) cfgj=(2,3) idx=(101,100,99) ----- - idx -xx> 101 (id=1) -x> 100 (id=2) -> 99 (id=3) -99 - -# Joint identical two-node configs. -committed cfg=(1,2) cfgj=(1,2) idx=(_,_) ----- - idx -? 0 (id=1) -? 0 (id=2) -0 - -committed cfg=(1,2) cfgj=(1,2) idx=(_,40) ----- - idx -? 0 (id=1) -x> 40 (id=2) -0 - -committed cfg=(1,2) cfgj=(1,2) idx=(41,40) ----- - idx -x> 41 (id=1) -> 40 (id=2) -40 - - - -# Joint disjoint three-node configs. - -committed cfg=(1,2,3) cfgj=(4,5,6) idx=(_,_,_,_,_,_) ----- - idx -? 0 (id=1) -? 0 (id=2) -? 0 (id=3) -? 0 (id=4) -? 0 (id=5) -? 0 (id=6) -0 - -committed cfg=(1,2,3) cfgj=(4,5,6) idx=(100,_,_,_,_,_) ----- - idx -xxxxx> 100 (id=1) -? 0 (id=2) -? 0 (id=3) -? 0 (id=4) -? 0 (id=5) -? 0 (id=6) -0 - -committed cfg=(1,2,3) cfgj=(4,5,6) idx=(100,_,_,90,_,_) ----- - idx -xxxxx> 100 (id=1) -? 0 (id=2) -? 0 (id=3) -xxxx> 90 (id=4) -? 0 (id=5) -? 0 (id=6) -0 - -committed cfg=(1,2,3) cfgj=(4,5,6) idx=(100,99,_,_,_,_) ----- - idx -xxxxx> 100 (id=1) -xxxx> 99 (id=2) -? 0 (id=3) -? 0 (id=4) -? 0 (id=5) -? 0 (id=6) -0 - -# First quorum <= 99, second one <= 97. Both quorums guarantee that 90 is -# committed. -committed cfg=(1,2,3) cfgj=(4,5,6) idx=(_,99,90,97,95,_) ----- - idx -? 0 (id=1) -xxxxx> 99 (id=2) -xx> 90 (id=3) -xxxx> 97 (id=4) -xxx> 95 (id=5) -? 0 (id=6) -90 - -# First quorum collapsed to 92. Second one already had at least 95 committed, -# so the result also collapses. -committed cfg=(1,2,3) cfgj=(4,5,6) idx=(92,99,90,97,95,_) ----- - idx -xx> 92 (id=1) -xxxxx> 99 (id=2) -x> 90 (id=3) -xxxx> 97 (id=4) -xxx> 95 (id=5) -? 0 (id=6) -92 - -# Second quorum collapses, but nothing changes in the output. -committed cfg=(1,2,3) cfgj=(4,5,6) idx=(92,99,90,97,95,77) ----- - idx -xx> 92 (id=1) -xxxxx> 99 (id=2) -x> 90 (id=3) -xxxx> 97 (id=4) -xxx> 95 (id=5) -> 77 (id=6) -92 - - -# Joint overlapping three-node configs. - -committed cfg=(1,2,3) cfgj=(1,4,5) idx=(_,_,_,_,_) ----- - idx -? 0 (id=1) -? 0 (id=2) -? 0 (id=3) -? 0 (id=4) -? 0 (id=5) -0 - -committed cfg=(1,2,3) cfgj=(1,4,5) idx=(100,_,_,_,_) ----- - idx -xxxx> 100 (id=1) -? 0 (id=2) -? 0 (id=3) -? 0 (id=4) -? 0 (id=5) -0 - -committed cfg=(1,2,3) cfgj=(1,4,5) idx=(100,101,_,_,_) ----- - idx -xxx> 100 (id=1) -xxxx> 101 (id=2) -? 0 (id=3) -? 0 (id=4) -? 0 (id=5) -0 - -committed cfg=(1,2,3) cfgj=(1,4,5) idx=(100,101,100,_,_) ----- - idx -xx> 100 (id=1) -xxxx> 101 (id=2) -> 100 (id=3) -? 0 (id=4) -? 0 (id=5) -0 - -# Second quorum could commit either 98 or 99, but first quorum is open. -committed cfg=(1,2,3) cfgj=(1,4,5) idx=(_,100,_,99,98) ----- - idx -? 0 (id=1) -xxxx> 100 (id=2) -? 0 (id=3) -xxx> 99 (id=4) -xx> 98 (id=5) -0 - -# Additionally, first quorum can commit either 100 or 99 -committed cfg=(1,2,3) cfgj=(1,4,5) idx=(_,100,99,99,98) ----- - idx -? 0 (id=1) -xxxx> 100 (id=2) -xx> 99 (id=3) -> 99 (id=4) -x> 98 (id=5) -98 - -committed cfg=(1,2,3) cfgj=(1,4,5) idx=(1,100,99,99,98) ----- - idx -> 1 (id=1) -xxxx> 100 (id=2) -xx> 99 (id=3) -> 99 (id=4) -x> 98 (id=5) -98 - -committed cfg=(1,2,3) cfgj=(1,4,5) idx=(100,100,99,99,98) ----- - idx -xxx> 100 (id=1) -> 100 (id=2) -x> 99 (id=3) -> 99 (id=4) -> 98 (id=5) -99 - - -# More overlap. - -committed cfg=(1,2,3) cfgj=(2,3,4) idx=(_,_,_,_) ----- - idx -? 0 (id=1) -? 0 (id=2) -? 0 (id=3) -? 0 (id=4) -0 - -committed cfg=(1,2,3) cfgj=(2,3,4) idx=(_,100,99,_) ----- - idx -? 0 (id=1) -xxx> 100 (id=2) -xx> 99 (id=3) -? 0 (id=4) -99 - -committed cfg=(1,2,3) cfgj=(2,3,4) idx=(98,100,99,_) ----- - idx -x> 98 (id=1) -xxx> 100 (id=2) -xx> 99 (id=3) -? 0 (id=4) -99 - -committed cfg=(1,2,3) cfgj=(2,3,4) idx=(100,100,99,_) ----- - idx -xx> 100 (id=1) -> 100 (id=2) -x> 99 (id=3) -? 0 (id=4) -99 - -committed cfg=(1,2,3) cfgj=(2,3,4) idx=(100,100,99,98) ----- - idx -xx> 100 (id=1) -> 100 (id=2) -x> 99 (id=3) -> 98 (id=4) -99 - -committed cfg=(1,2,3) cfgj=(2,3,4) idx=(100,_,_,101) ----- - idx -xx> 100 (id=1) -? 0 (id=2) -? 0 (id=3) -xxx> 101 (id=4) -0 - -committed cfg=(1,2,3) cfgj=(2,3,4) idx=(100,99,_,101) ----- - idx -xx> 100 (id=1) -x> 99 (id=2) -? 0 (id=3) -xxx> 101 (id=4) -99 - -# Identical. This is also exercised in the test harness, so it's listed here -# only briefly. -committed cfg=(1,2,3) cfgj=(1,2,3) idx=(50,45,_) ----- - idx -xx> 50 (id=1) -x> 45 (id=2) -? 0 (id=3) -45 diff --git a/raft/quorum/testdata/joint_vote.txt b/raft/quorum/testdata/joint_vote.txt deleted file mode 100644 index 36cd0cabcff7..000000000000 --- a/raft/quorum/testdata/joint_vote.txt +++ /dev/null @@ -1,165 +0,0 @@ -# Empty joint config wins all votes. This isn't used in production. Note that -# by specifying cfgj explicitly we tell the test harness to treat the input as -# a joint quorum and not a majority quorum. -vote cfgj=zero ----- -VoteWon - -# More examples with close to trivial configs. - -vote cfg=(1) cfgj=zero votes=(_) ----- -VotePending - -vote cfg=(1) cfgj=zero votes=(y) ----- -VoteWon - -vote cfg=(1) cfgj=zero votes=(n) ----- -VoteLost - -vote cfg=(1) cfgj=(1) votes=(_) ----- -VotePending - -vote cfg=(1) cfgj=(1) votes=(y) ----- -VoteWon - -vote cfg=(1) cfgj=(1) votes=(n) ----- -VoteLost - -vote cfg=(1) cfgj=(2) votes=(_,_) ----- -VotePending - -vote cfg=(1) cfgj=(2) votes=(y,_) ----- -VotePending - -vote cfg=(1) cfgj=(2) votes=(y,y) ----- -VoteWon - -vote cfg=(1) cfgj=(2) votes=(y,n) ----- -VoteLost - -vote cfg=(1) cfgj=(2) votes=(n,_) ----- -VoteLost - -vote cfg=(1) cfgj=(2) votes=(n,n) ----- -VoteLost - -vote cfg=(1) cfgj=(2) votes=(n,y) ----- -VoteLost - -# Two node configs. - -vote cfg=(1,2) cfgj=(3,4) votes=(_,_,_,_) ----- -VotePending - -vote cfg=(1,2) cfgj=(3,4) votes=(y,_,_,_) ----- -VotePending - -vote cfg=(1,2) cfgj=(3,4) votes=(y,y,_,_) ----- -VotePending - -vote cfg=(1,2) cfgj=(3,4) votes=(y,y,n,_) ----- -VoteLost - -vote cfg=(1,2) cfgj=(3,4) votes=(y,y,n,n) ----- -VoteLost - -vote cfg=(1,2) cfgj=(3,4) votes=(y,y,y,n) ----- -VoteLost - -vote cfg=(1,2) cfgj=(3,4) votes=(y,y,y,y) ----- -VoteWon - -vote cfg=(1,2) cfgj=(2,3) votes=(_,_,_) ----- -VotePending - -vote cfg=(1,2) cfgj=(2,3) votes=(_,n,_) ----- -VoteLost - -vote cfg=(1,2) cfgj=(2,3) votes=(y,y,_) ----- -VotePending - -vote cfg=(1,2) cfgj=(2,3) votes=(y,y,n) ----- -VoteLost - -vote cfg=(1,2) cfgj=(2,3) votes=(y,y,y) ----- -VoteWon - -vote cfg=(1,2) cfgj=(1,2) votes=(_,_) ----- -VotePending - -vote cfg=(1,2) cfgj=(1,2) votes=(y,_) ----- -VotePending - -vote cfg=(1,2) cfgj=(1,2) votes=(y,n) ----- -VoteLost - -vote cfg=(1,2) cfgj=(1,2) votes=(n,_) ----- -VoteLost - -vote cfg=(1,2) cfgj=(1,2) votes=(n,n) ----- -VoteLost - - -# Simple example for overlapping three node configs. - -vote cfg=(1,2,3) cfgj=(2,3,4) votes=(_,_,_,_) ----- -VotePending - -vote cfg=(1,2,3) cfgj=(2,3,4) votes=(_,n,_,_) ----- -VotePending - -vote cfg=(1,2,3) cfgj=(2,3,4) votes=(_,n,n,_) ----- -VoteLost - -vote cfg=(1,2,3) cfgj=(2,3,4) votes=(_,y,y,_) ----- -VoteWon - -vote cfg=(1,2,3) cfgj=(2,3,4) votes=(y,y,_,_) ----- -VotePending - -vote cfg=(1,2,3) cfgj=(2,3,4) votes=(y,y,n,_) ----- -VotePending - -vote cfg=(1,2,3) cfgj=(2,3,4) votes=(y,y,n,n) ----- -VoteLost - -vote cfg=(1,2,3) cfgj=(2,3,4) votes=(y,y,n,y) ----- -VoteWon diff --git a/raft/quorum/testdata/majority_commit.txt b/raft/quorum/testdata/majority_commit.txt deleted file mode 100644 index 6ff5d0b89e0e..000000000000 --- a/raft/quorum/testdata/majority_commit.txt +++ /dev/null @@ -1,153 +0,0 @@ -# The empty quorum commits "everything". This is useful for its use in joint -# quorums. -committed ----- -∞ - - - -# A single voter quorum is not final when no index is known. -committed cfg=(1) idx=(_) ----- - idx -? 0 (id=1) -0 - -# When an index is known, that's the committed index, and that's final. -committed cfg=(1) idx=(12) ----- - idx -> 12 (id=1) -12 - - - - -# With two nodes, start out similarly. -committed cfg=(1, 2) idx=(_,_) ----- - idx -? 0 (id=1) -? 0 (id=2) -0 - -# The first committed index becomes known (for n1). Nothing changes in the -# output because idx=12 is not known to be on a quorum (which is both nodes). -committed cfg=(1, 2) idx=(12,_) ----- - idx -x> 12 (id=1) -? 0 (id=2) -0 - -# The second index comes in and finalize the decision. The result will be the -# smaller of the two indexes. -committed cfg=(1,2) idx=(12,5) ----- - idx -x> 12 (id=1) -> 5 (id=2) -5 - - - - -# No surprises for three nodes. -committed cfg=(1,2,3) idx=(_,_,_) ----- - idx -? 0 (id=1) -? 0 (id=2) -? 0 (id=3) -0 - -committed cfg=(1,2,3) idx=(12,_,_) ----- - idx -xx> 12 (id=1) -? 0 (id=2) -? 0 (id=3) -0 - -# We see a committed index, but a higher committed index for the last pending -# votes could change (increment) the outcome, so not final yet. -committed cfg=(1,2,3) idx=(12,5,_) ----- - idx -xx> 12 (id=1) -x> 5 (id=2) -? 0 (id=3) -5 - -# a) the case in which it does: -committed cfg=(1,2,3) idx=(12,5,6) ----- - idx -xx> 12 (id=1) -> 5 (id=2) -x> 6 (id=3) -6 - -# b) the case in which it does not: -committed cfg=(1,2,3) idx=(12,5,4) ----- - idx -xx> 12 (id=1) -x> 5 (id=2) -> 4 (id=3) -5 - -# c) a different case in which the last index is pending but it has no chance of -# swaying the outcome (because nobody in the current quorum agrees on anything -# higher than the candidate): -committed cfg=(1,2,3) idx=(5,5,_) ----- - idx -x> 5 (id=1) -> 5 (id=2) -? 0 (id=3) -5 - -# c) continued: Doesn't matter what shows up last. The result is final. -committed cfg=(1,2,3) idx=(5,5,12) ----- - idx -> 5 (id=1) -> 5 (id=2) -xx> 12 (id=3) -5 - -# With all committed idx known, the result is final. -committed cfg=(1, 2, 3) idx=(100, 101, 103) ----- - idx -> 100 (id=1) -x> 101 (id=2) -xx> 103 (id=3) -101 - - - -# Some more complicated examples. Similar to case c) above. The result is -# already final because no index higher than 103 is one short of quorum. -committed cfg=(1, 2, 3, 4, 5) idx=(101, 104, 103, 103,_) ----- - idx -x> 101 (id=1) -xxxx> 104 (id=2) -xx> 103 (id=3) -> 103 (id=4) -? 0 (id=5) -103 - -# A similar case which is not final because another vote for >= 103 would change -# the outcome. -committed cfg=(1, 2, 3, 4, 5) idx=(101, 102, 103, 103,_) ----- - idx -x> 101 (id=1) -xx> 102 (id=2) -xxx> 103 (id=3) -> 103 (id=4) -? 0 (id=5) -102 diff --git a/raft/quorum/testdata/majority_vote.txt b/raft/quorum/testdata/majority_vote.txt deleted file mode 100644 index 5f9564b4f51f..000000000000 --- a/raft/quorum/testdata/majority_vote.txt +++ /dev/null @@ -1,97 +0,0 @@ -# The empty config always announces a won vote. -vote ----- -VoteWon - -vote cfg=(1) votes=(_) ----- -VotePending - -vote cfg=(1) votes=(n) ----- -VoteLost - -vote cfg=(123) votes=(y) ----- -VoteWon - - - - -vote cfg=(4,8) votes=(_,_) ----- -VotePending - -# With two voters, a single rejection loses the vote. -vote cfg=(4,8) votes=(n,_) ----- -VoteLost - -vote cfg=(4,8) votes=(y,_) ----- -VotePending - -vote cfg=(4,8) votes=(n,y) ----- -VoteLost - -vote cfg=(4,8) votes=(y,y) ----- -VoteWon - - - -vote cfg=(2,4,7) votes=(_,_,_) ----- -VotePending - -vote cfg=(2,4,7) votes=(n,_,_) ----- -VotePending - -vote cfg=(2,4,7) votes=(y,_,_) ----- -VotePending - -vote cfg=(2,4,7) votes=(n,n,_) ----- -VoteLost - -vote cfg=(2,4,7) votes=(y,n,_) ----- -VotePending - -vote cfg=(2,4,7) votes=(y,y,_) ----- -VoteWon - -vote cfg=(2,4,7) votes=(y,y,n) ----- -VoteWon - -vote cfg=(2,4,7) votes=(n,y,n) ----- -VoteLost - - - -# Test some random example with seven nodes (why not). -vote cfg=(1,2,3,4,5,6,7) votes=(y,y,n,y,_,_,_) ----- -VotePending - -vote cfg=(1,2,3,4,5,6,7) votes=(_,y,y,_,n,y,n) ----- -VotePending - -vote cfg=(1,2,3,4,5,6,7) votes=(y,y,n,y,_,n,y) ----- -VoteWon - -vote cfg=(1,2,3,4,5,6,7) votes=(y,y,_,n,y,n,n) ----- -VotePending - -vote cfg=(1,2,3,4,5,6,7) votes=(y,y,n,y,n,n,n) ----- -VoteLost diff --git a/raft/quorum/voteresult_string.go b/raft/quorum/voteresult_string.go deleted file mode 100644 index 9eca8fd0c96b..000000000000 --- a/raft/quorum/voteresult_string.go +++ /dev/null @@ -1,26 +0,0 @@ -// Code generated by "stringer -type=VoteResult"; DO NOT EDIT. - -package quorum - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[VotePending-1] - _ = x[VoteLost-2] - _ = x[VoteWon-3] -} - -const _VoteResult_name = "VotePendingVoteLostVoteWon" - -var _VoteResult_index = [...]uint8{0, 11, 19, 26} - -func (i VoteResult) String() string { - i -= 1 - if i >= VoteResult(len(_VoteResult_index)-1) { - return "VoteResult(" + strconv.FormatInt(int64(i+1), 10) + ")" - } - return _VoteResult_name[_VoteResult_index[i]:_VoteResult_index[i+1]] -} diff --git a/raft/raft.go b/raft/raft.go deleted file mode 100644 index 180a96e9389c..000000000000 --- a/raft/raft.go +++ /dev/null @@ -1,1892 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "bytes" - "errors" - "fmt" - "math" - "math/rand" - "sort" - "strings" - "sync" - "time" - - "go.etcd.io/etcd/raft/v3/confchange" - "go.etcd.io/etcd/raft/v3/quorum" - pb "go.etcd.io/etcd/raft/v3/raftpb" - "go.etcd.io/etcd/raft/v3/tracker" -) - -// None is a placeholder node ID used when there is no leader. -const None uint64 = 0 -const noLimit = math.MaxUint64 - -// Possible values for StateType. -const ( - StateFollower StateType = iota - StateCandidate - StateLeader - StatePreCandidate - numStates -) - -type ReadOnlyOption int - -const ( - // ReadOnlySafe guarantees the linearizability of the read only request by - // communicating with the quorum. It is the default and suggested option. - ReadOnlySafe ReadOnlyOption = iota - // ReadOnlyLeaseBased ensures linearizability of the read only request by - // relying on the leader lease. It can be affected by clock drift. - // If the clock drift is unbounded, leader might keep the lease longer than it - // should (clock can move backward/pause without any bound). ReadIndex is not safe - // in that case. - ReadOnlyLeaseBased -) - -// Possible values for CampaignType -const ( - // campaignPreElection represents the first phase of a normal election when - // Config.PreVote is true. - campaignPreElection CampaignType = "CampaignPreElection" - // campaignElection represents a normal (time-based) election (the second phase - // of the election when Config.PreVote is true). - campaignElection CampaignType = "CampaignElection" - // campaignTransfer represents the type of leader transfer - campaignTransfer CampaignType = "CampaignTransfer" -) - -// ErrProposalDropped is returned when the proposal is ignored by some cases, -// so that the proposer can be notified and fail fast. -var ErrProposalDropped = errors.New("raft proposal dropped") - -// lockedRand is a small wrapper around rand.Rand to provide -// synchronization among multiple raft groups. Only the methods needed -// by the code are exposed (e.g. Intn). -type lockedRand struct { - mu sync.Mutex - rand *rand.Rand -} - -func (r *lockedRand) Intn(n int) int { - r.mu.Lock() - v := r.rand.Intn(n) - r.mu.Unlock() - return v -} - -var globalRand = &lockedRand{ - rand: rand.New(rand.NewSource(time.Now().UnixNano())), -} - -// CampaignType represents the type of campaigning -// the reason we use the type of string instead of uint64 -// is because it's simpler to compare and fill in raft entries -type CampaignType string - -// StateType represents the role of a node in a cluster. -type StateType uint64 - -var stmap = [...]string{ - "StateFollower", - "StateCandidate", - "StateLeader", - "StatePreCandidate", -} - -func (st StateType) String() string { - return stmap[st] -} - -// Config contains the parameters to start a raft. -type Config struct { - // ID is the identity of the local raft. ID cannot be 0. - ID uint64 - - // ElectionTick is the number of Node.Tick invocations that must pass between - // elections. That is, if a follower does not receive any message from the - // leader of current term before ElectionTick has elapsed, it will become - // candidate and start an election. ElectionTick must be greater than - // HeartbeatTick. We suggest ElectionTick = 10 * HeartbeatTick to avoid - // unnecessary leader switching. - ElectionTick int - // HeartbeatTick is the number of Node.Tick invocations that must pass between - // heartbeats. That is, a leader sends heartbeat messages to maintain its - // leadership every HeartbeatTick ticks. - HeartbeatTick int - - // Storage is the storage for raft. raft generates entries and states to be - // stored in storage. raft reads the persisted entries and states out of - // Storage when it needs. raft reads out the previous state and configuration - // out of storage when restarting. - Storage Storage - // Applied is the last applied index. It should only be set when restarting - // raft. raft will not return entries to the application smaller or equal to - // Applied. If Applied is unset when restarting, raft might return previous - // applied entries. This is a very application dependent configuration. - Applied uint64 - - // MaxSizePerMsg limits the max byte size of each append message. Smaller - // value lowers the raft recovery cost(initial probing and message lost - // during normal operation). On the other side, it might affect the - // throughput during normal replication. Note: math.MaxUint64 for unlimited, - // 0 for at most one entry per message. - MaxSizePerMsg uint64 - // MaxCommittedSizePerReady limits the size of the committed entries which - // can be applied. - MaxCommittedSizePerReady uint64 - // MaxUncommittedEntriesSize limits the aggregate byte size of the - // uncommitted entries that may be appended to a leader's log. Once this - // limit is exceeded, proposals will begin to return ErrProposalDropped - // errors. Note: 0 for no limit. - MaxUncommittedEntriesSize uint64 - // MaxInflightMsgs limits the max number of in-flight append messages during - // optimistic replication phase. The application transportation layer usually - // has its own sending buffer over TCP/UDP. Setting MaxInflightMsgs to avoid - // overflowing that sending buffer. TODO (xiangli): feedback to application to - // limit the proposal rate? - MaxInflightMsgs int - // MaxInflightBytes limits the number of in-flight bytes in append messages. - // Complements MaxInflightMsgs. Ignored if zero. - // - // This effectively bounds the bandwidth-delay product. Note that especially - // in high-latency deployments setting this too low can lead to a dramatic - // reduction in throughput. For example, with a peer that has a round-trip - // latency of 100ms to the leader and this setting is set to 1 MB, there is a - // throughput limit of 10 MB/s for this group. With RTT of 400ms, this drops - // to 2.5 MB/s. See Little's law to understand the maths behind. - MaxInflightBytes uint64 - - // CheckQuorum specifies if the leader should check quorum activity. Leader - // steps down when quorum is not active for an electionTimeout. - CheckQuorum bool - - // PreVote enables the Pre-Vote algorithm described in raft thesis section - // 9.6. This prevents disruption when a node that has been partitioned away - // rejoins the cluster. - PreVote bool - - // ReadOnlyOption specifies how the read only request is processed. - // - // ReadOnlySafe guarantees the linearizability of the read only request by - // communicating with the quorum. It is the default and suggested option. - // - // ReadOnlyLeaseBased ensures linearizability of the read only request by - // relying on the leader lease. It can be affected by clock drift. - // If the clock drift is unbounded, leader might keep the lease longer than it - // should (clock can move backward/pause without any bound). ReadIndex is not safe - // in that case. - // CheckQuorum MUST be enabled if ReadOnlyOption is ReadOnlyLeaseBased. - ReadOnlyOption ReadOnlyOption - - // Logger is the logger used for raft log. For multinode which can host - // multiple raft group, each raft group can have its own logger - Logger Logger - - // DisableProposalForwarding set to true means that followers will drop - // proposals, rather than forwarding them to the leader. One use case for - // this feature would be in a situation where the Raft leader is used to - // compute the data of a proposal, for example, adding a timestamp from a - // hybrid logical clock to data in a monotonically increasing way. Forwarding - // should be disabled to prevent a follower with an inaccurate hybrid - // logical clock from assigning the timestamp and then forwarding the data - // to the leader. - DisableProposalForwarding bool -} - -func (c *Config) validate() error { - if c.ID == None { - return errors.New("cannot use none as id") - } - - if c.HeartbeatTick <= 0 { - return errors.New("heartbeat tick must be greater than 0") - } - - if c.ElectionTick <= c.HeartbeatTick { - return errors.New("election tick must be greater than heartbeat tick") - } - - if c.Storage == nil { - return errors.New("storage cannot be nil") - } - - if c.MaxUncommittedEntriesSize == 0 { - c.MaxUncommittedEntriesSize = noLimit - } - - // default MaxCommittedSizePerReady to MaxSizePerMsg because they were - // previously the same parameter. - if c.MaxCommittedSizePerReady == 0 { - c.MaxCommittedSizePerReady = c.MaxSizePerMsg - } - - if c.MaxInflightMsgs <= 0 { - return errors.New("max inflight messages must be greater than 0") - } - if c.MaxInflightBytes == 0 { - c.MaxInflightBytes = noLimit - } else if c.MaxInflightBytes < c.MaxSizePerMsg { - return errors.New("max inflight bytes must be >= max message size") - } - - if c.Logger == nil { - c.Logger = getLogger() - } - - if c.ReadOnlyOption == ReadOnlyLeaseBased && !c.CheckQuorum { - return errors.New("CheckQuorum must be enabled when ReadOnlyOption is ReadOnlyLeaseBased") - } - - return nil -} - -type raft struct { - id uint64 - - Term uint64 - Vote uint64 - - readStates []ReadState - - // the log - raftLog *raftLog - - maxMsgSize uint64 - maxUncommittedSize uint64 - // TODO(tbg): rename to trk. - prs tracker.ProgressTracker - - state StateType - - // isLearner is true if the local raft node is a learner. - isLearner bool - - msgs []pb.Message - - // the leader id - lead uint64 - // leadTransferee is id of the leader transfer target when its value is not zero. - // Follow the procedure defined in raft thesis 3.10. - leadTransferee uint64 - // Only one conf change may be pending (in the log, but not yet - // applied) at a time. This is enforced via pendingConfIndex, which - // is set to a value >= the log index of the latest pending - // configuration change (if any). Config changes are only allowed to - // be proposed if the leader's applied index is greater than this - // value. - pendingConfIndex uint64 - // an estimate of the size of the uncommitted tail of the Raft log. Used to - // prevent unbounded log growth. Only maintained by the leader. Reset on - // term changes. - uncommittedSize uint64 - - readOnly *readOnly - - // number of ticks since it reached last electionTimeout when it is leader - // or candidate. - // number of ticks since it reached last electionTimeout or received a - // valid message from current leader when it is a follower. - electionElapsed int - - // number of ticks since it reached last heartbeatTimeout. - // only leader keeps heartbeatElapsed. - heartbeatElapsed int - - checkQuorum bool - preVote bool - - heartbeatTimeout int - electionTimeout int - // randomizedElectionTimeout is a random number between - // [electiontimeout, 2 * electiontimeout - 1]. It gets reset - // when raft changes its state to follower or candidate. - randomizedElectionTimeout int - disableProposalForwarding bool - - tick func() - step stepFunc - - logger Logger - - // pendingReadIndexMessages is used to store messages of type MsgReadIndex - // that can't be answered as new leader didn't committed any log in - // current term. Those will be handled as fast as first log is committed in - // current term. - pendingReadIndexMessages []pb.Message -} - -func newRaft(c *Config) *raft { - if err := c.validate(); err != nil { - panic(err.Error()) - } - raftlog := newLogWithSize(c.Storage, c.Logger, c.MaxCommittedSizePerReady) - hs, cs, err := c.Storage.InitialState() - if err != nil { - panic(err) // TODO(bdarnell) - } - - r := &raft{ - id: c.ID, - lead: None, - isLearner: false, - raftLog: raftlog, - maxMsgSize: c.MaxSizePerMsg, - maxUncommittedSize: c.MaxUncommittedEntriesSize, - prs: tracker.MakeProgressTracker(c.MaxInflightMsgs, c.MaxInflightBytes), - electionTimeout: c.ElectionTick, - heartbeatTimeout: c.HeartbeatTick, - logger: c.Logger, - checkQuorum: c.CheckQuorum, - preVote: c.PreVote, - readOnly: newReadOnly(c.ReadOnlyOption), - disableProposalForwarding: c.DisableProposalForwarding, - } - - cfg, prs, err := confchange.Restore(confchange.Changer{ - Tracker: r.prs, - LastIndex: raftlog.lastIndex(), - }, cs) - if err != nil { - panic(err) - } - assertConfStatesEquivalent(r.logger, cs, r.switchToConfig(cfg, prs)) - - if !IsEmptyHardState(hs) { - r.loadState(hs) - } - if c.Applied > 0 { - raftlog.appliedTo(c.Applied) - } - r.becomeFollower(r.Term, None) - - var nodesStrs []string - for _, n := range r.prs.VoterNodes() { - nodesStrs = append(nodesStrs, fmt.Sprintf("%x", n)) - } - - r.logger.Infof("newRaft %x [peers: [%s], term: %d, commit: %d, applied: %d, lastindex: %d, lastterm: %d]", - r.id, strings.Join(nodesStrs, ","), r.Term, r.raftLog.committed, r.raftLog.applied, r.raftLog.lastIndex(), r.raftLog.lastTerm()) - return r -} - -func (r *raft) hasLeader() bool { return r.lead != None } - -func (r *raft) softState() *SoftState { return &SoftState{Lead: r.lead, RaftState: r.state} } - -func (r *raft) hardState() pb.HardState { - return pb.HardState{ - Term: r.Term, - Vote: r.Vote, - Commit: r.raftLog.committed, - } -} - -// send schedules persisting state to a stable storage and AFTER that -// sending the message (as part of next Ready message processing). -func (r *raft) send(m pb.Message) { - if m.From == None { - m.From = r.id - } - if m.Type == pb.MsgVote || m.Type == pb.MsgVoteResp || m.Type == pb.MsgPreVote || m.Type == pb.MsgPreVoteResp { - if m.Term == 0 { - // All {pre-,}campaign messages need to have the term set when - // sending. - // - MsgVote: m.Term is the term the node is campaigning for, - // non-zero as we increment the term when campaigning. - // - MsgVoteResp: m.Term is the new r.Term if the MsgVote was - // granted, non-zero for the same reason MsgVote is - // - MsgPreVote: m.Term is the term the node will campaign, - // non-zero as we use m.Term to indicate the next term we'll be - // campaigning for - // - MsgPreVoteResp: m.Term is the term received in the original - // MsgPreVote if the pre-vote was granted, non-zero for the - // same reasons MsgPreVote is - panic(fmt.Sprintf("term should be set when sending %s", m.Type)) - } - } else { - if m.Term != 0 { - panic(fmt.Sprintf("term should not be set when sending %s (was %d)", m.Type, m.Term)) - } - // do not attach term to MsgProp, MsgReadIndex - // proposals are a way to forward to the leader and - // should be treated as local message. - // MsgReadIndex is also forwarded to leader. - if m.Type != pb.MsgProp && m.Type != pb.MsgReadIndex { - m.Term = r.Term - } - } - if m.To == r.id { - r.logger.Panicf("message should not be self-addressed when sending %s", m.Type) - } - r.msgs = append(r.msgs, m) -} - -// sendAppend sends an append RPC with new entries (if any) and the -// current commit index to the given peer. -func (r *raft) sendAppend(to uint64) { - r.maybeSendAppend(to, true) -} - -// maybeSendAppend sends an append RPC with new entries to the given peer, -// if necessary. Returns true if a message was sent. The sendIfEmpty -// argument controls whether messages with no entries will be sent -// ("empty" messages are useful to convey updated Commit indexes, but -// are undesirable when we're sending multiple messages in a batch). -func (r *raft) maybeSendAppend(to uint64, sendIfEmpty bool) bool { - pr := r.prs.Progress[to] - if pr.IsPaused() { - return false - } - - term, errt := r.raftLog.term(pr.Next - 1) - var ents []pb.Entry - var erre error - // In a throttled StateReplicate only send empty MsgApp, to ensure progress. - // Otherwise, if we had a full Inflights and all inflight messages were in - // fact dropped, replication to that follower would stall. Instead, an empty - // MsgApp will eventually reach the follower (heartbeats responses prompt the - // leader to send an append), allowing it to be acked or rejected, both of - // which will clear out Inflights. - if pr.State != tracker.StateReplicate || !pr.Inflights.Full() { - ents, erre = r.raftLog.entries(pr.Next, r.maxMsgSize) - } - - if len(ents) == 0 && !sendIfEmpty { - return false - } - - if errt != nil || erre != nil { // send snapshot if we failed to get term or entries - if !pr.RecentActive { - r.logger.Debugf("ignore sending snapshot to %x since it is not recently active", to) - return false - } - - snapshot, err := r.raftLog.snapshot() - if err != nil { - if err == ErrSnapshotTemporarilyUnavailable { - r.logger.Debugf("%x failed to send snapshot to %x because snapshot is temporarily unavailable", r.id, to) - return false - } - panic(err) // TODO(bdarnell) - } - if IsEmptySnap(snapshot) { - panic("need non-empty snapshot") - } - sindex, sterm := snapshot.Metadata.Index, snapshot.Metadata.Term - r.logger.Debugf("%x [firstindex: %d, commit: %d] sent snapshot[index: %d, term: %d] to %x [%s]", - r.id, r.raftLog.firstIndex(), r.raftLog.committed, sindex, sterm, to, pr) - pr.BecomeSnapshot(sindex) - r.logger.Debugf("%x paused sending replication messages to %x [%s]", r.id, to, pr) - - r.send(pb.Message{To: to, Type: pb.MsgSnap, Snapshot: &snapshot}) - return true - } - - // Send the actual MsgApp otherwise, and update the progress accordingly. - next := pr.Next // save Next for later, as the progress update can change it - if err := pr.UpdateOnEntriesSend(len(ents), payloadsSize(ents), next); err != nil { - r.logger.Panicf("%x: %v", r.id, err) - } - r.send(pb.Message{ - To: to, - Type: pb.MsgApp, - Index: next - 1, - LogTerm: term, - Entries: ents, - Commit: r.raftLog.committed, - }) - return true -} - -// sendHeartbeat sends a heartbeat RPC to the given peer. -func (r *raft) sendHeartbeat(to uint64, ctx []byte) { - // Attach the commit as min(to.matched, r.committed). - // When the leader sends out heartbeat message, - // the receiver(follower) might not be matched with the leader - // or it might not have all the committed entries. - // The leader MUST NOT forward the follower's commit to - // an unmatched index. - commit := min(r.prs.Progress[to].Match, r.raftLog.committed) - m := pb.Message{ - To: to, - Type: pb.MsgHeartbeat, - Commit: commit, - Context: ctx, - } - - r.send(m) -} - -// bcastAppend sends RPC, with entries to all peers that are not up-to-date -// according to the progress recorded in r.prs. -func (r *raft) bcastAppend() { - r.prs.Visit(func(id uint64, _ *tracker.Progress) { - if id == r.id { - return - } - r.sendAppend(id) - }) -} - -// bcastHeartbeat sends RPC, without entries to all the peers. -func (r *raft) bcastHeartbeat() { - lastCtx := r.readOnly.lastPendingRequestCtx() - if len(lastCtx) == 0 { - r.bcastHeartbeatWithCtx(nil) - } else { - r.bcastHeartbeatWithCtx([]byte(lastCtx)) - } -} - -func (r *raft) bcastHeartbeatWithCtx(ctx []byte) { - r.prs.Visit(func(id uint64, _ *tracker.Progress) { - if id == r.id { - return - } - r.sendHeartbeat(id, ctx) - }) -} - -func (r *raft) advance(rd Ready) { - r.reduceUncommittedSize(rd.CommittedEntries) - - // If entries were applied (or a snapshot), update our cursor for - // the next Ready. Note that if the current HardState contains a - // new Commit index, this does not mean that we're also applying - // all of the new entries due to commit pagination by size. - if newApplied := rd.appliedCursor(); newApplied > 0 { - r.raftLog.appliedTo(newApplied) - - if r.prs.Config.AutoLeave && newApplied >= r.pendingConfIndex && r.state == StateLeader { - // If the current (and most recent, at least for this leader's term) - // configuration should be auto-left, initiate that now. We use a - // nil Data which unmarshals into an empty ConfChangeV2 and has the - // benefit that appendEntry can never refuse it based on its size - // (which registers as zero). - m, err := confChangeToMsg(nil) - if err != nil { - panic(err) - } - // NB: this proposal can't be dropped due to size, but can be - // dropped if a leadership transfer is in progress. We'll keep - // checking this condition on each applied entry, so either the - // leadership transfer will succeed and the new leader will leave - // the joint configuration, or the leadership transfer will fail, - // and we will propose the config change on the next advance. - if err := r.Step(m); err != nil { - r.logger.Debugf("not initiating automatic transition out of joint configuration %s: %v", r.prs.Config, err) - } else { - r.logger.Infof("initiating automatic transition out of joint configuration %s", r.prs.Config) - } - } - } - - if len(rd.Entries) > 0 { - e := rd.Entries[len(rd.Entries)-1] - if r.id == r.lead { - // The leader needs to self-ack the entries just appended (since it doesn't - // send an MsgApp to itself). This is roughly equivalent to: - // - // r.prs.Progress[r.id].MaybeUpdate(e.Index) - // if r.maybeCommit() { - // r.bcastAppend() - // } - _ = r.Step(pb.Message{From: r.id, Type: pb.MsgAppResp, Index: e.Index}) - } - // NB: it's important for performance that this call happens after - // r.Step above on the leader. This is because r.Step can then use - // a fast-path for `r.raftLog.term()`. - r.raftLog.stableTo(e.Index, e.Term) - } - if !IsEmptySnap(rd.Snapshot) { - r.raftLog.stableSnapTo(rd.Snapshot.Metadata.Index) - } -} - -// maybeCommit attempts to advance the commit index. Returns true if -// the commit index changed (in which case the caller should call -// r.bcastAppend). -func (r *raft) maybeCommit() bool { - mci := r.prs.Committed() - return r.raftLog.maybeCommit(mci, r.Term) -} - -func (r *raft) reset(term uint64) { - if r.Term != term { - r.Term = term - r.Vote = None - } - r.lead = None - - r.electionElapsed = 0 - r.heartbeatElapsed = 0 - r.resetRandomizedElectionTimeout() - - r.abortLeaderTransfer() - - r.prs.ResetVotes() - r.prs.Visit(func(id uint64, pr *tracker.Progress) { - *pr = tracker.Progress{ - Match: 0, - Next: r.raftLog.lastIndex() + 1, - Inflights: tracker.NewInflights(r.prs.MaxInflight, r.prs.MaxInflightBytes), - IsLearner: pr.IsLearner, - } - if id == r.id { - pr.Match = r.raftLog.lastIndex() - } - }) - - r.pendingConfIndex = 0 - r.uncommittedSize = 0 - r.readOnly = newReadOnly(r.readOnly.option) -} - -func (r *raft) appendEntry(es ...pb.Entry) (accepted bool) { - li := r.raftLog.lastIndex() - for i := range es { - es[i].Term = r.Term - es[i].Index = li + 1 + uint64(i) - } - // Track the size of this uncommitted proposal. - if !r.increaseUncommittedSize(es) { - r.logger.Warningf( - "%x appending new entries to log would exceed uncommitted entry size limit; dropping proposal", - r.id, - ) - // Drop the proposal. - return false - } - // use latest "last" index after truncate/append - r.raftLog.append(es...) - return true -} - -// tickElection is run by followers and candidates after r.electionTimeout. -func (r *raft) tickElection() { - r.electionElapsed++ - - if r.promotable() && r.pastElectionTimeout() { - r.electionElapsed = 0 - if err := r.Step(pb.Message{From: r.id, Type: pb.MsgHup}); err != nil { - r.logger.Debugf("error occurred during election: %v", err) - } - } -} - -// tickHeartbeat is run by leaders to send a MsgBeat after r.heartbeatTimeout. -func (r *raft) tickHeartbeat() { - r.heartbeatElapsed++ - r.electionElapsed++ - - if r.electionElapsed >= r.electionTimeout { - r.electionElapsed = 0 - if r.checkQuorum { - if err := r.Step(pb.Message{From: r.id, Type: pb.MsgCheckQuorum}); err != nil { - r.logger.Debugf("error occurred during checking sending heartbeat: %v", err) - } - } - // If current leader cannot transfer leadership in electionTimeout, it becomes leader again. - if r.state == StateLeader && r.leadTransferee != None { - r.abortLeaderTransfer() - } - } - - if r.state != StateLeader { - return - } - - if r.heartbeatElapsed >= r.heartbeatTimeout { - r.heartbeatElapsed = 0 - if err := r.Step(pb.Message{From: r.id, Type: pb.MsgBeat}); err != nil { - r.logger.Debugf("error occurred during checking sending heartbeat: %v", err) - } - } -} - -func (r *raft) becomeFollower(term uint64, lead uint64) { - r.step = stepFollower - r.reset(term) - r.tick = r.tickElection - r.lead = lead - r.state = StateFollower - r.logger.Infof("%x became follower at term %d", r.id, r.Term) -} - -func (r *raft) becomeCandidate() { - // TODO(xiangli) remove the panic when the raft implementation is stable - if r.state == StateLeader { - panic("invalid transition [leader -> candidate]") - } - r.step = stepCandidate - r.reset(r.Term + 1) - r.tick = r.tickElection - r.Vote = r.id - r.state = StateCandidate - r.logger.Infof("%x became candidate at term %d", r.id, r.Term) -} - -func (r *raft) becomePreCandidate() { - // TODO(xiangli) remove the panic when the raft implementation is stable - if r.state == StateLeader { - panic("invalid transition [leader -> pre-candidate]") - } - // Becoming a pre-candidate changes our step functions and state, - // but doesn't change anything else. In particular it does not increase - // r.Term or change r.Vote. - r.step = stepCandidate - r.prs.ResetVotes() - r.tick = r.tickElection - r.lead = None - r.state = StatePreCandidate - r.logger.Infof("%x became pre-candidate at term %d", r.id, r.Term) -} - -func (r *raft) becomeLeader() { - // TODO(xiangli) remove the panic when the raft implementation is stable - if r.state == StateFollower { - panic("invalid transition [follower -> leader]") - } - r.step = stepLeader - r.reset(r.Term) - r.tick = r.tickHeartbeat - r.lead = r.id - r.state = StateLeader - // Followers enter replicate mode when they've been successfully probed - // (perhaps after having received a snapshot as a result). The leader is - // trivially in this state. Note that r.reset() has initialized this - // progress with the last index already. - pr := r.prs.Progress[r.id] - pr.BecomeReplicate() - // The leader always has RecentActive == true; MsgCheckQuorum makes sure to - // preserve this. - pr.RecentActive = true - - // Conservatively set the pendingConfIndex to the last index in the - // log. There may or may not be a pending config change, but it's - // safe to delay any future proposals until we commit all our - // pending log entries, and scanning the entire tail of the log - // could be expensive. - r.pendingConfIndex = r.raftLog.lastIndex() - - emptyEnt := pb.Entry{Data: nil} - if !r.appendEntry(emptyEnt) { - // This won't happen because we just called reset() above. - r.logger.Panic("empty entry was dropped") - } - // As a special case, don't count the initial empty entry towards the - // uncommitted log quota. This is because we want to preserve the - // behavior of allowing one entry larger than quota if the current - // usage is zero. - r.reduceUncommittedSize([]pb.Entry{emptyEnt}) - r.logger.Infof("%x became leader at term %d", r.id, r.Term) -} - -func (r *raft) hup(t CampaignType) { - if r.state == StateLeader { - r.logger.Debugf("%x ignoring MsgHup because already leader", r.id) - return - } - - if !r.promotable() { - r.logger.Warningf("%x is unpromotable and can not campaign", r.id) - return - } - ents, err := r.raftLog.slice(r.raftLog.applied+1, r.raftLog.committed+1, noLimit) - if err != nil { - r.logger.Panicf("unexpected error getting unapplied entries (%v)", err) - } - if n := numOfPendingConf(ents); n != 0 && r.raftLog.committed > r.raftLog.applied { - r.logger.Warningf("%x cannot campaign at term %d since there are still %d pending configuration changes to apply", r.id, r.Term, n) - return - } - - r.logger.Infof("%x is starting a new election at term %d", r.id, r.Term) - r.campaign(t) -} - -// campaign transitions the raft instance to candidate state. This must only be -// called after verifying that this is a legitimate transition. -func (r *raft) campaign(t CampaignType) { - if !r.promotable() { - // This path should not be hit (callers are supposed to check), but - // better safe than sorry. - r.logger.Warningf("%x is unpromotable; campaign() should have been called", r.id) - } - var term uint64 - var voteMsg pb.MessageType - if t == campaignPreElection { - r.becomePreCandidate() - voteMsg = pb.MsgPreVote - // PreVote RPCs are sent for the next term before we've incremented r.Term. - term = r.Term + 1 - } else { - r.becomeCandidate() - voteMsg = pb.MsgVote - term = r.Term - } - if _, _, res := r.poll(r.id, voteRespMsgType(voteMsg), true); res == quorum.VoteWon { - // We won the election after voting for ourselves (which must mean that - // this is a single-node cluster). Advance to the next state. - if t == campaignPreElection { - r.campaign(campaignElection) - } else { - r.becomeLeader() - } - return - } - var ids []uint64 - { - idMap := r.prs.Voters.IDs() - ids = make([]uint64, 0, len(idMap)) - for id := range idMap { - ids = append(ids, id) - } - sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] }) - } - for _, id := range ids { - if id == r.id { - continue - } - r.logger.Infof("%x [logterm: %d, index: %d] sent %s request to %x at term %d", - r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), voteMsg, id, r.Term) - - var ctx []byte - if t == campaignTransfer { - ctx = []byte(t) - } - r.send(pb.Message{Term: term, To: id, Type: voteMsg, Index: r.raftLog.lastIndex(), LogTerm: r.raftLog.lastTerm(), Context: ctx}) - } -} - -func (r *raft) poll(id uint64, t pb.MessageType, v bool) (granted int, rejected int, result quorum.VoteResult) { - if v { - r.logger.Infof("%x received %s from %x at term %d", r.id, t, id, r.Term) - } else { - r.logger.Infof("%x received %s rejection from %x at term %d", r.id, t, id, r.Term) - } - r.prs.RecordVote(id, v) - return r.prs.TallyVotes() -} - -func (r *raft) Step(m pb.Message) error { - // Handle the message term, which may result in our stepping down to a follower. - switch { - case m.Term == 0: - // local message - case m.Term > r.Term: - if m.Type == pb.MsgVote || m.Type == pb.MsgPreVote { - force := bytes.Equal(m.Context, []byte(campaignTransfer)) - inLease := r.checkQuorum && r.lead != None && r.electionElapsed < r.electionTimeout - if !force && inLease { - // If a server receives a RequestVote request within the minimum election timeout - // of hearing from a current leader, it does not update its term or grant its vote - r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] ignored %s from %x [logterm: %d, index: %d] at term %d: lease is not expired (remaining ticks: %d)", - r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.Type, m.From, m.LogTerm, m.Index, r.Term, r.electionTimeout-r.electionElapsed) - return nil - } - } - switch { - case m.Type == pb.MsgPreVote: - // Never change our term in response to a PreVote - case m.Type == pb.MsgPreVoteResp && !m.Reject: - // We send pre-vote requests with a term in our future. If the - // pre-vote is granted, we will increment our term when we get a - // quorum. If it is not, the term comes from the node that - // rejected our vote so we should become a follower at the new - // term. - default: - r.logger.Infof("%x [term: %d] received a %s message with higher term from %x [term: %d]", - r.id, r.Term, m.Type, m.From, m.Term) - if m.Type == pb.MsgApp || m.Type == pb.MsgHeartbeat || m.Type == pb.MsgSnap { - r.becomeFollower(m.Term, m.From) - } else { - r.becomeFollower(m.Term, None) - } - } - - case m.Term < r.Term: - if (r.checkQuorum || r.preVote) && (m.Type == pb.MsgHeartbeat || m.Type == pb.MsgApp) { - // We have received messages from a leader at a lower term. It is possible - // that these messages were simply delayed in the network, but this could - // also mean that this node has advanced its term number during a network - // partition, and it is now unable to either win an election or to rejoin - // the majority on the old term. If checkQuorum is false, this will be - // handled by incrementing term numbers in response to MsgVote with a - // higher term, but if checkQuorum is true we may not advance the term on - // MsgVote and must generate other messages to advance the term. The net - // result of these two features is to minimize the disruption caused by - // nodes that have been removed from the cluster's configuration: a - // removed node will send MsgVotes (or MsgPreVotes) which will be ignored, - // but it will not receive MsgApp or MsgHeartbeat, so it will not create - // disruptive term increases, by notifying leader of this node's activeness. - // The above comments also true for Pre-Vote - // - // When follower gets isolated, it soon starts an election ending - // up with a higher term than leader, although it won't receive enough - // votes to win the election. When it regains connectivity, this response - // with "pb.MsgAppResp" of higher term would force leader to step down. - // However, this disruption is inevitable to free this stuck node with - // fresh election. This can be prevented with Pre-Vote phase. - r.send(pb.Message{To: m.From, Type: pb.MsgAppResp}) - } else if m.Type == pb.MsgPreVote { - // Before Pre-Vote enable, there may have candidate with higher term, - // but less log. After update to Pre-Vote, the cluster may deadlock if - // we drop messages with a lower term. - r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] rejected %s from %x [logterm: %d, index: %d] at term %d", - r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.Type, m.From, m.LogTerm, m.Index, r.Term) - r.send(pb.Message{To: m.From, Term: r.Term, Type: pb.MsgPreVoteResp, Reject: true}) - } else { - // ignore other cases - r.logger.Infof("%x [term: %d] ignored a %s message with lower term from %x [term: %d]", - r.id, r.Term, m.Type, m.From, m.Term) - } - return nil - } - - switch m.Type { - case pb.MsgHup: - if r.preVote { - r.hup(campaignPreElection) - } else { - r.hup(campaignElection) - } - - case pb.MsgVote, pb.MsgPreVote: - // We can vote if this is a repeat of a vote we've already cast... - canVote := r.Vote == m.From || - // ...we haven't voted and we don't think there's a leader yet in this term... - (r.Vote == None && r.lead == None) || - // ...or this is a PreVote for a future term... - (m.Type == pb.MsgPreVote && m.Term > r.Term) - // ...and we believe the candidate is up to date. - if canVote && r.raftLog.isUpToDate(m.Index, m.LogTerm) { - // Note: it turns out that that learners must be allowed to cast votes. - // This seems counter- intuitive but is necessary in the situation in which - // a learner has been promoted (i.e. is now a voter) but has not learned - // about this yet. - // For example, consider a group in which id=1 is a learner and id=2 and - // id=3 are voters. A configuration change promoting 1 can be committed on - // the quorum `{2,3}` without the config change being appended to the - // learner's log. If the leader (say 2) fails, there are de facto two - // voters remaining. Only 3 can win an election (due to its log containing - // all committed entries), but to do so it will need 1 to vote. But 1 - // considers itself a learner and will continue to do so until 3 has - // stepped up as leader, replicates the conf change to 1, and 1 applies it. - // Ultimately, by receiving a request to vote, the learner realizes that - // the candidate believes it to be a voter, and that it should act - // accordingly. The candidate's config may be stale, too; but in that case - // it won't win the election, at least in the absence of the bug discussed - // in: - // https://github.com/etcd-io/etcd/issues/7625#issuecomment-488798263. - r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] cast %s for %x [logterm: %d, index: %d] at term %d", - r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.Type, m.From, m.LogTerm, m.Index, r.Term) - // When responding to Msg{Pre,}Vote messages we include the term - // from the message, not the local term. To see why, consider the - // case where a single node was previously partitioned away and - // it's local term is now out of date. If we include the local term - // (recall that for pre-votes we don't update the local term), the - // (pre-)campaigning node on the other end will proceed to ignore - // the message (it ignores all out of date messages). - // The term in the original message and current local term are the - // same in the case of regular votes, but different for pre-votes. - r.send(pb.Message{To: m.From, Term: m.Term, Type: voteRespMsgType(m.Type)}) - if m.Type == pb.MsgVote { - // Only record real votes. - r.electionElapsed = 0 - r.Vote = m.From - } - } else { - r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] rejected %s from %x [logterm: %d, index: %d] at term %d", - r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.Type, m.From, m.LogTerm, m.Index, r.Term) - r.send(pb.Message{To: m.From, Term: r.Term, Type: voteRespMsgType(m.Type), Reject: true}) - } - - default: - err := r.step(r, m) - if err != nil { - return err - } - } - return nil -} - -type stepFunc func(r *raft, m pb.Message) error - -func stepLeader(r *raft, m pb.Message) error { - // These message types do not require any progress for m.From. - switch m.Type { - case pb.MsgBeat: - r.bcastHeartbeat() - return nil - case pb.MsgCheckQuorum: - if !r.prs.QuorumActive() { - r.logger.Warningf("%x stepped down to follower since quorum is not active", r.id) - r.becomeFollower(r.Term, None) - } - // Mark everyone (but ourselves) as inactive in preparation for the next - // CheckQuorum. - r.prs.Visit(func(id uint64, pr *tracker.Progress) { - if id != r.id { - pr.RecentActive = false - } - }) - return nil - case pb.MsgProp: - if len(m.Entries) == 0 { - r.logger.Panicf("%x stepped empty MsgProp", r.id) - } - if r.prs.Progress[r.id] == nil { - // If we are not currently a member of the range (i.e. this node - // was removed from the configuration while serving as leader), - // drop any new proposals. - return ErrProposalDropped - } - if r.leadTransferee != None { - r.logger.Debugf("%x [term %d] transfer leadership to %x is in progress; dropping proposal", r.id, r.Term, r.leadTransferee) - return ErrProposalDropped - } - - for i := range m.Entries { - e := &m.Entries[i] - var cc pb.ConfChangeI - if e.Type == pb.EntryConfChange { - var ccc pb.ConfChange - if err := ccc.Unmarshal(e.Data); err != nil { - panic(err) - } - cc = ccc - } else if e.Type == pb.EntryConfChangeV2 { - var ccc pb.ConfChangeV2 - if err := ccc.Unmarshal(e.Data); err != nil { - panic(err) - } - cc = ccc - } - if cc != nil { - alreadyPending := r.pendingConfIndex > r.raftLog.applied - alreadyJoint := len(r.prs.Config.Voters[1]) > 0 - wantsLeaveJoint := len(cc.AsV2().Changes) == 0 - - var refused string - if alreadyPending { - refused = fmt.Sprintf("possible unapplied conf change at index %d (applied to %d)", r.pendingConfIndex, r.raftLog.applied) - } else if alreadyJoint && !wantsLeaveJoint { - refused = "must transition out of joint config first" - } else if !alreadyJoint && wantsLeaveJoint { - refused = "not in joint state; refusing empty conf change" - } - - if refused != "" { - r.logger.Infof("%x ignoring conf change %v at config %s: %s", r.id, cc, r.prs.Config, refused) - m.Entries[i] = pb.Entry{Type: pb.EntryNormal} - } else { - r.pendingConfIndex = r.raftLog.lastIndex() + uint64(i) + 1 - } - } - } - - if !r.appendEntry(m.Entries...) { - return ErrProposalDropped - } - r.bcastAppend() - return nil - case pb.MsgReadIndex: - // only one voting member (the leader) in the cluster - if r.prs.IsSingleton() { - if resp := r.responseToReadIndexReq(m, r.raftLog.committed); resp.To != None { - r.send(resp) - } - return nil - } - - // Postpone read only request when this leader has not committed - // any log entry at its term. - if !r.committedEntryInCurrentTerm() { - r.pendingReadIndexMessages = append(r.pendingReadIndexMessages, m) - return nil - } - - sendMsgReadIndexResponse(r, m) - - return nil - } - - // All other message types require a progress for m.From (pr). - pr := r.prs.Progress[m.From] - if pr == nil { - r.logger.Debugf("%x no progress available for %x", r.id, m.From) - return nil - } - switch m.Type { - case pb.MsgAppResp: - // NB: this code path is also hit from (*raft).advance, where the leader steps - // an MsgAppResp to acknowledge the appended entries in the last Ready. - - pr.RecentActive = true - - if m.Reject { - // RejectHint is the suggested next base entry for appending (i.e. - // we try to append entry RejectHint+1 next), and LogTerm is the - // term that the follower has at index RejectHint. Older versions - // of this library did not populate LogTerm for rejections and it - // is zero for followers with an empty log. - // - // Under normal circumstances, the leader's log is longer than the - // follower's and the follower's log is a prefix of the leader's - // (i.e. there is no divergent uncommitted suffix of the log on the - // follower). In that case, the first probe reveals where the - // follower's log ends (RejectHint=follower's last index) and the - // subsequent probe succeeds. - // - // However, when networks are partitioned or systems overloaded, - // large divergent log tails can occur. The naive attempt, probing - // entry by entry in decreasing order, will be the product of the - // length of the diverging tails and the network round-trip latency, - // which can easily result in hours of time spent probing and can - // even cause outright outages. The probes are thus optimized as - // described below. - r.logger.Debugf("%x received MsgAppResp(rejected, hint: (index %d, term %d)) from %x for index %d", - r.id, m.RejectHint, m.LogTerm, m.From, m.Index) - nextProbeIdx := m.RejectHint - if m.LogTerm > 0 { - // If the follower has an uncommitted log tail, we would end up - // probing one by one until we hit the common prefix. - // - // For example, if the leader has: - // - // idx 1 2 3 4 5 6 7 8 9 - // ----------------- - // term (L) 1 3 3 3 5 5 5 5 5 - // term (F) 1 1 1 1 2 2 - // - // Then, after sending an append anchored at (idx=9,term=5) we - // would receive a RejectHint of 6 and LogTerm of 2. Without the - // code below, we would try an append at index 6, which would - // fail again. - // - // However, looking only at what the leader knows about its own - // log and the rejection hint, it is clear that a probe at index - // 6, 5, 4, 3, and 2 must fail as well: - // - // For all of these indexes, the leader's log term is larger than - // the rejection's log term. If a probe at one of these indexes - // succeeded, its log term at that index would match the leader's, - // i.e. 3 or 5 in this example. But the follower already told the - // leader that it is still at term 2 at index 6, and since the - // log term only ever goes up (within a log), this is a contradiction. - // - // At index 1, however, the leader can draw no such conclusion, - // as its term 1 is not larger than the term 2 from the - // follower's rejection. We thus probe at 1, which will succeed - // in this example. In general, with this approach we probe at - // most once per term found in the leader's log. - // - // There is a similar mechanism on the follower (implemented in - // handleAppendEntries via a call to findConflictByTerm) that is - // useful if the follower has a large divergent uncommitted log - // tail[1], as in this example: - // - // idx 1 2 3 4 5 6 7 8 9 - // ----------------- - // term (L) 1 3 3 3 3 3 3 3 7 - // term (F) 1 3 3 4 4 5 5 5 6 - // - // Naively, the leader would probe at idx=9, receive a rejection - // revealing the log term of 6 at the follower. Since the leader's - // term at the previous index is already smaller than 6, the leader- - // side optimization discussed above is ineffective. The leader thus - // probes at index 8 and, naively, receives a rejection for the same - // index and log term 5. Again, the leader optimization does not improve - // over linear probing as term 5 is above the leader's term 3 for that - // and many preceding indexes; the leader would have to probe linearly - // until it would finally hit index 3, where the probe would succeed. - // - // Instead, we apply a similar optimization on the follower. When the - // follower receives the probe at index 8 (log term 3), it concludes - // that all of the leader's log preceding that index has log terms of - // 3 or below. The largest index in the follower's log with a log term - // of 3 or below is index 3. The follower will thus return a rejection - // for index=3, log term=3 instead. The leader's next probe will then - // succeed at that index. - // - // [1]: more precisely, if the log terms in the large uncommitted - // tail on the follower are larger than the leader's. At first, - // it may seem unintuitive that a follower could even have such - // a large tail, but it can happen: - // - // 1. Leader appends (but does not commit) entries 2 and 3, crashes. - // idx 1 2 3 4 5 6 7 8 9 - // ----------------- - // term (L) 1 2 2 [crashes] - // term (F) 1 - // term (F) 1 - // - // 2. a follower becomes leader and appends entries at term 3. - // ----------------- - // term (x) 1 2 2 [down] - // term (F) 1 3 3 3 3 - // term (F) 1 - // - // 3. term 3 leader goes down, term 2 leader returns as term 4 - // leader. It commits the log & entries at term 4. - // - // ----------------- - // term (L) 1 2 2 2 - // term (x) 1 3 3 3 3 [down] - // term (F) 1 - // ----------------- - // term (L) 1 2 2 2 4 4 4 - // term (F) 1 3 3 3 3 [gets probed] - // term (F) 1 2 2 2 4 4 4 - // - // 4. the leader will now probe the returning follower at index - // 7, the rejection points it at the end of the follower's log - // which is at a higher log term than the actually committed - // log. - nextProbeIdx = r.raftLog.findConflictByTerm(m.RejectHint, m.LogTerm) - } - if pr.MaybeDecrTo(m.Index, nextProbeIdx) { - r.logger.Debugf("%x decreased progress of %x to [%s]", r.id, m.From, pr) - if pr.State == tracker.StateReplicate { - pr.BecomeProbe() - } - r.sendAppend(m.From) - } - } else { - oldPaused := pr.IsPaused() - if pr.MaybeUpdate(m.Index) { - switch { - case pr.State == tracker.StateProbe: - pr.BecomeReplicate() - case pr.State == tracker.StateSnapshot && pr.Match >= pr.PendingSnapshot: - // TODO(tbg): we should also enter this branch if a snapshot is - // received that is below pr.PendingSnapshot but which makes it - // possible to use the log again. - r.logger.Debugf("%x recovered from needing snapshot, resumed sending replication messages to %x [%s]", r.id, m.From, pr) - // Transition back to replicating state via probing state - // (which takes the snapshot into account). If we didn't - // move to replicating state, that would only happen with - // the next round of appends (but there may not be a next - // round for a while, exposing an inconsistent RaftStatus). - pr.BecomeProbe() - pr.BecomeReplicate() - case pr.State == tracker.StateReplicate: - pr.Inflights.FreeLE(m.Index) - } - - if r.maybeCommit() { - // committed index has progressed for the term, so it is safe - // to respond to pending read index requests - releasePendingReadIndexMessages(r) - r.bcastAppend() - } else if oldPaused { - // If we were paused before, this node may be missing the - // latest commit index, so send it. - r.sendAppend(m.From) - } - // We've updated flow control information above, which may - // allow us to send multiple (size-limited) in-flight messages - // at once (such as when transitioning from probe to - // replicate, or when freeTo() covers multiple messages). If - // we have more entries to send, send as many messages as we - // can (without sending empty messages for the commit index) - if r.id != m.From { - for r.maybeSendAppend(m.From, false /* sendIfEmpty */) { - } - } - // Transfer leadership is in progress. - if m.From == r.leadTransferee && pr.Match == r.raftLog.lastIndex() { - r.logger.Infof("%x sent MsgTimeoutNow to %x after received MsgAppResp", r.id, m.From) - r.sendTimeoutNow(m.From) - } - } - } - case pb.MsgHeartbeatResp: - pr.RecentActive = true - pr.MsgAppFlowPaused = false - - // NB: if the follower is paused (full Inflights), this will still send an - // empty append, allowing it to recover from situations in which all the - // messages that filled up Inflights in the first place were dropped. Note - // also that the outgoing heartbeat already communicated the commit index. - if pr.Match < r.raftLog.lastIndex() { - r.sendAppend(m.From) - } - - if r.readOnly.option != ReadOnlySafe || len(m.Context) == 0 { - return nil - } - - if r.prs.Voters.VoteResult(r.readOnly.recvAck(m.From, m.Context)) != quorum.VoteWon { - return nil - } - - rss := r.readOnly.advance(m) - for _, rs := range rss { - if resp := r.responseToReadIndexReq(rs.req, rs.index); resp.To != None { - r.send(resp) - } - } - case pb.MsgSnapStatus: - if pr.State != tracker.StateSnapshot { - return nil - } - // TODO(tbg): this code is very similar to the snapshot handling in - // MsgAppResp above. In fact, the code there is more correct than the - // code here and should likely be updated to match (or even better, the - // logic pulled into a newly created Progress state machine handler). - if !m.Reject { - pr.BecomeProbe() - r.logger.Debugf("%x snapshot succeeded, resumed sending replication messages to %x [%s]", r.id, m.From, pr) - } else { - // NB: the order here matters or we'll be probing erroneously from - // the snapshot index, but the snapshot never applied. - pr.PendingSnapshot = 0 - pr.BecomeProbe() - r.logger.Debugf("%x snapshot failed, resumed sending replication messages to %x [%s]", r.id, m.From, pr) - } - // If snapshot finish, wait for the MsgAppResp from the remote node before sending - // out the next MsgApp. - // If snapshot failure, wait for a heartbeat interval before next try - pr.MsgAppFlowPaused = true - case pb.MsgUnreachable: - // During optimistic replication, if the remote becomes unreachable, - // there is huge probability that a MsgApp is lost. - if pr.State == tracker.StateReplicate { - pr.BecomeProbe() - } - r.logger.Debugf("%x failed to send message to %x because it is unreachable [%s]", r.id, m.From, pr) - case pb.MsgTransferLeader: - if pr.IsLearner { - r.logger.Debugf("%x is learner. Ignored transferring leadership", r.id) - return nil - } - leadTransferee := m.From - lastLeadTransferee := r.leadTransferee - if lastLeadTransferee != None { - if lastLeadTransferee == leadTransferee { - r.logger.Infof("%x [term %d] transfer leadership to %x is in progress, ignores request to same node %x", - r.id, r.Term, leadTransferee, leadTransferee) - return nil - } - r.abortLeaderTransfer() - r.logger.Infof("%x [term %d] abort previous transferring leadership to %x", r.id, r.Term, lastLeadTransferee) - } - if leadTransferee == r.id { - r.logger.Debugf("%x is already leader. Ignored transferring leadership to self", r.id) - return nil - } - // Transfer leadership to third party. - r.logger.Infof("%x [term %d] starts to transfer leadership to %x", r.id, r.Term, leadTransferee) - // Transfer leadership should be finished in one electionTimeout, so reset r.electionElapsed. - r.electionElapsed = 0 - r.leadTransferee = leadTransferee - if pr.Match == r.raftLog.lastIndex() { - r.sendTimeoutNow(leadTransferee) - r.logger.Infof("%x sends MsgTimeoutNow to %x immediately as %x already has up-to-date log", r.id, leadTransferee, leadTransferee) - } else { - r.sendAppend(leadTransferee) - } - } - return nil -} - -// stepCandidate is shared by StateCandidate and StatePreCandidate; the difference is -// whether they respond to MsgVoteResp or MsgPreVoteResp. -func stepCandidate(r *raft, m pb.Message) error { - // Only handle vote responses corresponding to our candidacy (while in - // StateCandidate, we may get stale MsgPreVoteResp messages in this term from - // our pre-candidate state). - var myVoteRespType pb.MessageType - if r.state == StatePreCandidate { - myVoteRespType = pb.MsgPreVoteResp - } else { - myVoteRespType = pb.MsgVoteResp - } - switch m.Type { - case pb.MsgProp: - r.logger.Infof("%x no leader at term %d; dropping proposal", r.id, r.Term) - return ErrProposalDropped - case pb.MsgApp: - r.becomeFollower(m.Term, m.From) // always m.Term == r.Term - r.handleAppendEntries(m) - case pb.MsgHeartbeat: - r.becomeFollower(m.Term, m.From) // always m.Term == r.Term - r.handleHeartbeat(m) - case pb.MsgSnap: - r.becomeFollower(m.Term, m.From) // always m.Term == r.Term - r.handleSnapshot(m) - case myVoteRespType: - gr, rj, res := r.poll(m.From, m.Type, !m.Reject) - r.logger.Infof("%x has received %d %s votes and %d vote rejections", r.id, gr, m.Type, rj) - switch res { - case quorum.VoteWon: - if r.state == StatePreCandidate { - r.campaign(campaignElection) - } else { - r.becomeLeader() - r.bcastAppend() - } - case quorum.VoteLost: - // pb.MsgPreVoteResp contains future term of pre-candidate - // m.Term > r.Term; reuse r.Term - r.becomeFollower(r.Term, None) - } - case pb.MsgTimeoutNow: - r.logger.Debugf("%x [term %d state %v] ignored MsgTimeoutNow from %x", r.id, r.Term, r.state, m.From) - } - return nil -} - -func stepFollower(r *raft, m pb.Message) error { - switch m.Type { - case pb.MsgProp: - if r.lead == None { - r.logger.Infof("%x no leader at term %d; dropping proposal", r.id, r.Term) - return ErrProposalDropped - } else if r.disableProposalForwarding { - r.logger.Infof("%x not forwarding to leader %x at term %d; dropping proposal", r.id, r.lead, r.Term) - return ErrProposalDropped - } - m.To = r.lead - r.send(m) - case pb.MsgApp: - r.electionElapsed = 0 - r.lead = m.From - r.handleAppendEntries(m) - case pb.MsgHeartbeat: - r.electionElapsed = 0 - r.lead = m.From - r.handleHeartbeat(m) - case pb.MsgSnap: - r.electionElapsed = 0 - r.lead = m.From - r.handleSnapshot(m) - case pb.MsgTransferLeader: - if r.lead == None { - r.logger.Infof("%x no leader at term %d; dropping leader transfer msg", r.id, r.Term) - return nil - } - m.To = r.lead - r.send(m) - case pb.MsgTimeoutNow: - r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership.", r.id, r.Term, m.From) - // Leadership transfers never use pre-vote even if r.preVote is true; we - // know we are not recovering from a partition so there is no need for the - // extra round trip. - r.hup(campaignTransfer) - case pb.MsgReadIndex: - if r.lead == None { - r.logger.Infof("%x no leader at term %d; dropping index reading msg", r.id, r.Term) - return nil - } - m.To = r.lead - r.send(m) - case pb.MsgReadIndexResp: - if len(m.Entries) != 1 { - r.logger.Errorf("%x invalid format of MsgReadIndexResp from %x, entries count: %d", r.id, m.From, len(m.Entries)) - return nil - } - r.readStates = append(r.readStates, ReadState{Index: m.Index, RequestCtx: m.Entries[0].Data}) - } - return nil -} - -func (r *raft) handleAppendEntries(m pb.Message) { - if m.Index < r.raftLog.committed { - r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: r.raftLog.committed}) - return - } - - if mlastIndex, ok := r.raftLog.maybeAppend(m.Index, m.LogTerm, m.Commit, m.Entries...); ok { - r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: mlastIndex}) - } else { - r.logger.Debugf("%x [logterm: %d, index: %d] rejected MsgApp [logterm: %d, index: %d] from %x", - r.id, r.raftLog.zeroTermOnErrCompacted(r.raftLog.term(m.Index)), m.Index, m.LogTerm, m.Index, m.From) - - // Return a hint to the leader about the maximum index and term that the - // two logs could be divergent at. Do this by searching through the - // follower's log for the maximum (index, term) pair with a term <= the - // MsgApp's LogTerm and an index <= the MsgApp's Index. This can help - // skip all indexes in the follower's uncommitted tail with terms - // greater than the MsgApp's LogTerm. - // - // See the other caller for findConflictByTerm (in stepLeader) for a much - // more detailed explanation of this mechanism. - hintIndex := min(m.Index, r.raftLog.lastIndex()) - hintIndex = r.raftLog.findConflictByTerm(hintIndex, m.LogTerm) - hintTerm, err := r.raftLog.term(hintIndex) - if err != nil { - panic(fmt.Sprintf("term(%d) must be valid, but got %v", hintIndex, err)) - } - r.send(pb.Message{ - To: m.From, - Type: pb.MsgAppResp, - Index: m.Index, - Reject: true, - RejectHint: hintIndex, - LogTerm: hintTerm, - }) - } -} - -func (r *raft) handleHeartbeat(m pb.Message) { - r.raftLog.commitTo(m.Commit) - r.send(pb.Message{To: m.From, Type: pb.MsgHeartbeatResp, Context: m.Context}) -} - -func (r *raft) handleSnapshot(m pb.Message) { - // MsgSnap messages should always carry a non-nil Snapshot, but err on the - // side of safety and treat a nil Snapshot as a zero-valued Snapshot. - var s pb.Snapshot - if m.Snapshot != nil { - s = *m.Snapshot - } - sindex, sterm := s.Metadata.Index, s.Metadata.Term - if r.restore(s) { - r.logger.Infof("%x [commit: %d] restored snapshot [index: %d, term: %d]", - r.id, r.raftLog.committed, sindex, sterm) - r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: r.raftLog.lastIndex()}) - } else { - r.logger.Infof("%x [commit: %d] ignored snapshot [index: %d, term: %d]", - r.id, r.raftLog.committed, sindex, sterm) - r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: r.raftLog.committed}) - } -} - -// restore recovers the state machine from a snapshot. It restores the log and the -// configuration of state machine. If this method returns false, the snapshot was -// ignored, either because it was obsolete or because of an error. -func (r *raft) restore(s pb.Snapshot) bool { - if s.Metadata.Index <= r.raftLog.committed { - return false - } - if r.state != StateFollower { - // This is defense-in-depth: if the leader somehow ended up applying a - // snapshot, it could move into a new term without moving into a - // follower state. This should never fire, but if it did, we'd have - // prevented damage by returning early, so log only a loud warning. - // - // At the time of writing, the instance is guaranteed to be in follower - // state when this method is called. - r.logger.Warningf("%x attempted to restore snapshot as leader; should never happen", r.id) - r.becomeFollower(r.Term+1, None) - return false - } - - // More defense-in-depth: throw away snapshot if recipient is not in the - // config. This shouldn't ever happen (at the time of writing) but lots of - // code here and there assumes that r.id is in the progress tracker. - found := false - cs := s.Metadata.ConfState - - for _, set := range [][]uint64{ - cs.Voters, - cs.Learners, - cs.VotersOutgoing, - // `LearnersNext` doesn't need to be checked. According to the rules, if a peer in - // `LearnersNext`, it has to be in `VotersOutgoing`. - } { - for _, id := range set { - if id == r.id { - found = true - break - } - } - if found { - break - } - } - if !found { - r.logger.Warningf( - "%x attempted to restore snapshot but it is not in the ConfState %v; should never happen", - r.id, cs, - ) - return false - } - - // Now go ahead and actually restore. - - if r.raftLog.matchTerm(s.Metadata.Index, s.Metadata.Term) { - r.logger.Infof("%x [commit: %d, lastindex: %d, lastterm: %d] fast-forwarded commit to snapshot [index: %d, term: %d]", - r.id, r.raftLog.committed, r.raftLog.lastIndex(), r.raftLog.lastTerm(), s.Metadata.Index, s.Metadata.Term) - r.raftLog.commitTo(s.Metadata.Index) - return false - } - - r.raftLog.restore(s) - - // Reset the configuration and add the (potentially updated) peers in anew. - r.prs = tracker.MakeProgressTracker(r.prs.MaxInflight, r.prs.MaxInflightBytes) - cfg, prs, err := confchange.Restore(confchange.Changer{ - Tracker: r.prs, - LastIndex: r.raftLog.lastIndex(), - }, cs) - - if err != nil { - // This should never happen. Either there's a bug in our config change - // handling or the client corrupted the conf change. - panic(fmt.Sprintf("unable to restore config %+v: %s", cs, err)) - } - - assertConfStatesEquivalent(r.logger, cs, r.switchToConfig(cfg, prs)) - - pr := r.prs.Progress[r.id] - pr.MaybeUpdate(pr.Next - 1) // TODO(tbg): this is untested and likely unneeded - - r.logger.Infof("%x [commit: %d, lastindex: %d, lastterm: %d] restored snapshot [index: %d, term: %d]", - r.id, r.raftLog.committed, r.raftLog.lastIndex(), r.raftLog.lastTerm(), s.Metadata.Index, s.Metadata.Term) - return true -} - -// promotable indicates whether state machine can be promoted to leader, -// which is true when its own id is in progress list. -func (r *raft) promotable() bool { - pr := r.prs.Progress[r.id] - return pr != nil && !pr.IsLearner && !r.raftLog.hasPendingSnapshot() -} - -func (r *raft) applyConfChange(cc pb.ConfChangeV2) pb.ConfState { - cfg, prs, err := func() (tracker.Config, tracker.ProgressMap, error) { - changer := confchange.Changer{ - Tracker: r.prs, - LastIndex: r.raftLog.lastIndex(), - } - if cc.LeaveJoint() { - return changer.LeaveJoint() - } else if autoLeave, ok := cc.EnterJoint(); ok { - return changer.EnterJoint(autoLeave, cc.Changes...) - } - return changer.Simple(cc.Changes...) - }() - - if err != nil { - // TODO(tbg): return the error to the caller. - panic(err) - } - - return r.switchToConfig(cfg, prs) -} - -// switchToConfig reconfigures this node to use the provided configuration. It -// updates the in-memory state and, when necessary, carries out additional -// actions such as reacting to the removal of nodes or changed quorum -// requirements. -// -// The inputs usually result from restoring a ConfState or applying a ConfChange. -func (r *raft) switchToConfig(cfg tracker.Config, prs tracker.ProgressMap) pb.ConfState { - r.prs.Config = cfg - r.prs.Progress = prs - - r.logger.Infof("%x switched to configuration %s", r.id, r.prs.Config) - cs := r.prs.ConfState() - pr, ok := r.prs.Progress[r.id] - - // Update whether the node itself is a learner, resetting to false when the - // node is removed. - r.isLearner = ok && pr.IsLearner - - if (!ok || r.isLearner) && r.state == StateLeader { - // This node is leader and was removed or demoted. We prevent demotions - // at the time writing but hypothetically we handle them the same way as - // removing the leader: stepping down into the next Term. - // - // TODO(tbg): step down (for sanity) and ask follower with largest Match - // to TimeoutNow (to avoid interruption). This might still drop some - // proposals but it's better than nothing. - // - // TODO(tbg): test this branch. It is untested at the time of writing. - return cs - } - - // The remaining steps only make sense if this node is the leader and there - // are other nodes. - if r.state != StateLeader || len(cs.Voters) == 0 { - return cs - } - - if r.maybeCommit() { - // If the configuration change means that more entries are committed now, - // broadcast/append to everyone in the updated config. - r.bcastAppend() - } else { - // Otherwise, still probe the newly added replicas; there's no reason to - // let them wait out a heartbeat interval (or the next incoming - // proposal). - r.prs.Visit(func(id uint64, pr *tracker.Progress) { - if id == r.id { - return - } - r.maybeSendAppend(id, false /* sendIfEmpty */) - }) - } - // If the leadTransferee was removed or demoted, abort the leadership transfer. - if _, tOK := r.prs.Config.Voters.IDs()[r.leadTransferee]; !tOK && r.leadTransferee != 0 { - r.abortLeaderTransfer() - } - - return cs -} - -func (r *raft) loadState(state pb.HardState) { - if state.Commit < r.raftLog.committed || state.Commit > r.raftLog.lastIndex() { - r.logger.Panicf("%x state.commit %d is out of range [%d, %d]", r.id, state.Commit, r.raftLog.committed, r.raftLog.lastIndex()) - } - r.raftLog.committed = state.Commit - r.Term = state.Term - r.Vote = state.Vote -} - -// pastElectionTimeout returns true if r.electionElapsed is greater -// than or equal to the randomized election timeout in -// [electiontimeout, 2 * electiontimeout - 1]. -func (r *raft) pastElectionTimeout() bool { - return r.electionElapsed >= r.randomizedElectionTimeout -} - -func (r *raft) resetRandomizedElectionTimeout() { - r.randomizedElectionTimeout = r.electionTimeout + globalRand.Intn(r.electionTimeout) -} - -func (r *raft) sendTimeoutNow(to uint64) { - r.send(pb.Message{To: to, Type: pb.MsgTimeoutNow}) -} - -func (r *raft) abortLeaderTransfer() { - r.leadTransferee = None -} - -// committedEntryInCurrentTerm return true if the peer has committed an entry in its term. -func (r *raft) committedEntryInCurrentTerm() bool { - return r.raftLog.zeroTermOnErrCompacted(r.raftLog.term(r.raftLog.committed)) == r.Term -} - -// responseToReadIndexReq constructs a response for `req`. If `req` comes from the peer -// itself, a blank value will be returned. -func (r *raft) responseToReadIndexReq(req pb.Message, readIndex uint64) pb.Message { - if req.From == None || req.From == r.id { - r.readStates = append(r.readStates, ReadState{ - Index: readIndex, - RequestCtx: req.Entries[0].Data, - }) - return pb.Message{} - } - return pb.Message{ - Type: pb.MsgReadIndexResp, - To: req.From, - Index: readIndex, - Entries: req.Entries, - } -} - -// increaseUncommittedSize computes the size of the proposed entries and -// determines whether they would push leader over its maxUncommittedSize limit. -// If the new entries would exceed the limit, the method returns false. If not, -// the increase in uncommitted entry size is recorded and the method returns -// true. -// -// Empty payloads are never refused. This is used both for appending an empty -// entry at a new leader's term, as well as leaving a joint configuration. -func (r *raft) increaseUncommittedSize(ents []pb.Entry) bool { - s := payloadsSize(ents) - if r.uncommittedSize > 0 && s > 0 && r.uncommittedSize+s > r.maxUncommittedSize { - // If the uncommitted tail of the Raft log is empty, allow any size - // proposal. Otherwise, limit the size of the uncommitted tail of the - // log and drop any proposal that would push the size over the limit. - // Note the added requirement s>0 which is used to make sure that - // appending single empty entries to the log always succeeds, used both - // for replicating a new leader's initial empty entry, and for - // auto-leaving joint configurations. - return false - } - r.uncommittedSize += s - return true -} - -// reduceUncommittedSize accounts for the newly committed entries by decreasing -// the uncommitted entry size limit. -func (r *raft) reduceUncommittedSize(ents []pb.Entry) { - if r.uncommittedSize == 0 { - // Fast-path for followers, who do not track or enforce the limit. - return - } - if s := payloadsSize(ents); s > r.uncommittedSize { - // uncommittedSize may underestimate the size of the uncommitted Raft - // log tail but will never overestimate it. Saturate at 0 instead of - // allowing overflow. - r.uncommittedSize = 0 - } else { - r.uncommittedSize -= s - } -} - -func payloadsSize(ents []pb.Entry) uint64 { - var s uint64 - for _, e := range ents { - s += uint64(PayloadSize(e)) - } - return s -} - -func numOfPendingConf(ents []pb.Entry) int { - n := 0 - for i := range ents { - if ents[i].Type == pb.EntryConfChange || ents[i].Type == pb.EntryConfChangeV2 { - n++ - } - } - return n -} - -func releasePendingReadIndexMessages(r *raft) { - if len(r.pendingReadIndexMessages) == 0 { - // Fast path for the common case to avoid a call to storage.LastIndex() - // via committedEntryInCurrentTerm. - return - } - if !r.committedEntryInCurrentTerm() { - r.logger.Error("pending MsgReadIndex should be released only after first commit in current term") - return - } - - msgs := r.pendingReadIndexMessages - r.pendingReadIndexMessages = nil - - for _, m := range msgs { - sendMsgReadIndexResponse(r, m) - } -} - -func sendMsgReadIndexResponse(r *raft, m pb.Message) { - // thinking: use an internally defined context instead of the user given context. - // We can express this in terms of the term and index instead of a user-supplied value. - // This would allow multiple reads to piggyback on the same message. - switch r.readOnly.option { - // If more than the local vote is needed, go through a full broadcast. - case ReadOnlySafe: - r.readOnly.addRequest(r.raftLog.committed, m) - // The local node automatically acks the request. - r.readOnly.recvAck(r.id, m.Entries[0].Data) - r.bcastHeartbeatWithCtx(m.Entries[0].Data) - case ReadOnlyLeaseBased: - if resp := r.responseToReadIndexReq(m, r.raftLog.committed); resp.To != None { - r.send(resp) - } - } -} diff --git a/raft/raft_flow_control_test.go b/raft/raft_flow_control_test.go deleted file mode 100644 index 29dff843f20e..000000000000 --- a/raft/raft_flow_control_test.go +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "testing" - - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -// TestMsgAppFlowControlFull ensures: -// 1. msgApp can fill the sending window until full -// 2. when the window is full, no more msgApp can be sent. - -func TestMsgAppFlowControlFull(t *testing.T) { - r := newTestRaft(1, 5, 1, newTestMemoryStorage(withPeers(1, 2))) - r.becomeCandidate() - r.becomeLeader() - - pr2 := r.prs.Progress[2] - // force the progress to be in replicate state - pr2.BecomeReplicate() - // fill in the inflights window - for i := 0; i < r.prs.MaxInflight; i++ { - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - ms := r.readMessages() - if len(ms) != 1 || ms[0].Type != pb.MsgApp { - t.Fatalf("#%d: len(ms) = %d, want 1 MsgApp", i, len(ms)) - } - } - - // ensure 1 - if !pr2.IsPaused() { - t.Fatal("paused = false, want true") - } - - // ensure 2 - for i := 0; i < 10; i++ { - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - ms := r.readMessages() - if len(ms) != 0 { - t.Fatalf("#%d: len(ms) = %d, want 0", i, len(ms)) - } - } -} - -// TestMsgAppFlowControlMoveForward ensures msgAppResp can move -// forward the sending window correctly: -// 1. valid msgAppResp.index moves the windows to pass all smaller or equal index. -// 2. out-of-dated msgAppResp has no effect on the sliding window. -func TestMsgAppFlowControlMoveForward(t *testing.T) { - r := newTestRaft(1, 5, 1, newTestMemoryStorage(withPeers(1, 2))) - r.becomeCandidate() - r.becomeLeader() - - pr2 := r.prs.Progress[2] - // force the progress to be in replicate state - pr2.BecomeReplicate() - // fill in the inflights window - for i := 0; i < r.prs.MaxInflight; i++ { - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - r.readMessages() - } - - // 1 is noop, 2 is the first proposal we just sent. - // so we start with 2. - for tt := 2; tt < r.prs.MaxInflight; tt++ { - // move forward the window - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgAppResp, Index: uint64(tt)}) - r.readMessages() - - // fill in the inflights window again - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - ms := r.readMessages() - if len(ms) != 1 || ms[0].Type != pb.MsgApp { - t.Fatalf("#%d: len(ms) = %d, want 1 MsgApp", tt, len(ms)) - } - - // ensure 1 - if !pr2.IsPaused() { - t.Fatalf("#%d: paused = false, want true", tt) - } - - // ensure 2 - for i := 0; i < tt; i++ { - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgAppResp, Index: uint64(i)}) - if !pr2.IsPaused() { - t.Fatalf("#%d.%d: paused = false, want true", tt, i) - } - } - } -} - -// TestMsgAppFlowControlRecvHeartbeat ensures a heartbeat response -// frees one slot if the window is full. -func TestMsgAppFlowControlRecvHeartbeat(t *testing.T) { - r := newTestRaft(1, 5, 1, newTestMemoryStorage(withPeers(1, 2))) - r.becomeCandidate() - r.becomeLeader() - - pr2 := r.prs.Progress[2] - // force the progress to be in replicate state - pr2.BecomeReplicate() - // fill in the inflights window - for i := 0; i < r.prs.MaxInflight; i++ { - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - r.readMessages() - } - - for tt := 1; tt < 5; tt++ { - // recv tt msgHeartbeatResp and expect one free slot - for i := 0; i < tt; i++ { - if !pr2.IsPaused() { - t.Fatalf("#%d.%d: paused = false, want true", tt, i) - } - // Unpauses the progress, sends an empty MsgApp, and pauses it again. - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgHeartbeatResp}) - ms := r.readMessages() - if len(ms) != 1 || ms[0].Type != pb.MsgApp || len(ms[0].Entries) != 0 { - t.Fatalf("#%d.%d: len(ms) == %d, want 1 empty MsgApp", tt, i, len(ms)) - } - } - - // No more appends are sent if there are no heartbeats. - for i := 0; i < 10; i++ { - if !pr2.IsPaused() { - t.Fatalf("#%d.%d: paused = false, want true", tt, i) - } - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - ms := r.readMessages() - if len(ms) != 0 { - t.Fatalf("#%d.%d: len(ms) = %d, want 0", tt, i, len(ms)) - } - } - - // clear all pending messages. - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgHeartbeatResp}) - r.readMessages() - } -} diff --git a/raft/raft_paper_test.go b/raft/raft_paper_test.go deleted file mode 100644 index 44536c241ab6..000000000000 --- a/raft/raft_paper_test.go +++ /dev/null @@ -1,943 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* -This file contains tests which verify that the scenarios described -in the raft paper (https://raft.github.io/raft.pdf) are -handled by the raft implementation correctly. Each test focuses on -several sentences written in the paper. This could help us to prevent -most implementation bugs. - -Each test is composed of three parts: init, test and check. -Init part uses simple and understandable way to simulate the init state. -Test part uses Step function to generate the scenario. Check part checks -outgoing messages and state. -*/ -package raft - -import ( - "fmt" - "reflect" - "sort" - "testing" - - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -func TestFollowerUpdateTermFromMessage(t *testing.T) { - testUpdateTermFromMessage(t, StateFollower) -} -func TestCandidateUpdateTermFromMessage(t *testing.T) { - testUpdateTermFromMessage(t, StateCandidate) -} -func TestLeaderUpdateTermFromMessage(t *testing.T) { - testUpdateTermFromMessage(t, StateLeader) -} - -// testUpdateTermFromMessage tests that if one server’s current term is -// smaller than the other’s, then it updates its current term to the larger -// value. If a candidate or leader discovers that its term is out of date, -// it immediately reverts to follower state. -// Reference: section 5.1 -func testUpdateTermFromMessage(t *testing.T, state StateType) { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - switch state { - case StateFollower: - r.becomeFollower(1, 2) - case StateCandidate: - r.becomeCandidate() - case StateLeader: - r.becomeCandidate() - r.becomeLeader() - } - - r.Step(pb.Message{Type: pb.MsgApp, Term: 2}) - - if r.Term != 2 { - t.Errorf("term = %d, want %d", r.Term, 2) - } - if r.state != StateFollower { - t.Errorf("state = %v, want %v", r.state, StateFollower) - } -} - -// TestRejectStaleTermMessage tests that if a server receives a request with -// a stale term number, it rejects the request. -// Our implementation ignores the request instead. -// Reference: section 5.1 -func TestRejectStaleTermMessage(t *testing.T) { - called := false - fakeStep := func(r *raft, m pb.Message) error { - called = true - return nil - } - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - r.step = fakeStep - r.loadState(pb.HardState{Term: 2}) - - r.Step(pb.Message{Type: pb.MsgApp, Term: r.Term - 1}) - - if called { - t.Errorf("stepFunc called = %v, want %v", called, false) - } -} - -// TestStartAsFollower tests that when servers start up, they begin as followers. -// Reference: section 5.2 -func TestStartAsFollower(t *testing.T) { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - if r.state != StateFollower { - t.Errorf("state = %s, want %s", r.state, StateFollower) - } -} - -// TestLeaderBcastBeat tests that if the leader receives a heartbeat tick, -// it will send a MsgHeartbeat with m.Index = 0, m.LogTerm=0 and empty entries -// as heartbeat to all followers. -// Reference: section 5.2 -func TestLeaderBcastBeat(t *testing.T) { - // heartbeat interval - hi := 1 - r := newTestRaft(1, 10, hi, newTestMemoryStorage(withPeers(1, 2, 3))) - r.becomeCandidate() - r.becomeLeader() - for i := 0; i < 10; i++ { - mustAppendEntry(r, pb.Entry{Index: uint64(i) + 1}) - } - - for i := 0; i < hi; i++ { - r.tick() - } - - msgs := r.readMessages() - sort.Sort(messageSlice(msgs)) - wmsgs := []pb.Message{ - {From: 1, To: 2, Term: 1, Type: pb.MsgHeartbeat}, - {From: 1, To: 3, Term: 1, Type: pb.MsgHeartbeat}, - } - if !reflect.DeepEqual(msgs, wmsgs) { - t.Errorf("msgs = %v, want %v", msgs, wmsgs) - } -} - -func TestFollowerStartElection(t *testing.T) { - testNonleaderStartElection(t, StateFollower) -} -func TestCandidateStartNewElection(t *testing.T) { - testNonleaderStartElection(t, StateCandidate) -} - -// testNonleaderStartElection tests that if a follower receives no communication -// over election timeout, it begins an election to choose a new leader. It -// increments its current term and transitions to candidate state. It then -// votes for itself and issues RequestVote RPCs in parallel to each of the -// other servers in the cluster. -// Reference: section 5.2 -// Also if a candidate fails to obtain a majority, it will time out and -// start a new election by incrementing its term and initiating another -// round of RequestVote RPCs. -// Reference: section 5.2 -func testNonleaderStartElection(t *testing.T, state StateType) { - // election timeout - et := 10 - r := newTestRaft(1, et, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - switch state { - case StateFollower: - r.becomeFollower(1, 2) - case StateCandidate: - r.becomeCandidate() - } - - for i := 1; i < 2*et; i++ { - r.tick() - } - - if r.Term != 2 { - t.Errorf("term = %d, want 2", r.Term) - } - if r.state != StateCandidate { - t.Errorf("state = %s, want %s", r.state, StateCandidate) - } - if !r.prs.Votes[r.id] { - t.Errorf("vote for self = false, want true") - } - msgs := r.readMessages() - sort.Sort(messageSlice(msgs)) - wmsgs := []pb.Message{ - {From: 1, To: 2, Term: 2, Type: pb.MsgVote}, - {From: 1, To: 3, Term: 2, Type: pb.MsgVote}, - } - if !reflect.DeepEqual(msgs, wmsgs) { - t.Errorf("msgs = %v, want %v", msgs, wmsgs) - } -} - -// TestLeaderElectionInOneRoundRPC tests all cases that may happen in -// leader election during one round of RequestVote RPC: -// a) it wins the election -// b) it loses the election -// c) it is unclear about the result -// Reference: section 5.2 -func TestLeaderElectionInOneRoundRPC(t *testing.T) { - tests := []struct { - size int - votes map[uint64]bool - state StateType - }{ - // win the election when receiving votes from a majority of the servers - {1, map[uint64]bool{}, StateLeader}, - {3, map[uint64]bool{2: true, 3: true}, StateLeader}, - {3, map[uint64]bool{2: true}, StateLeader}, - {5, map[uint64]bool{2: true, 3: true, 4: true, 5: true}, StateLeader}, - {5, map[uint64]bool{2: true, 3: true, 4: true}, StateLeader}, - {5, map[uint64]bool{2: true, 3: true}, StateLeader}, - - // return to follower state if it receives vote denial from a majority - {3, map[uint64]bool{2: false, 3: false}, StateFollower}, - {5, map[uint64]bool{2: false, 3: false, 4: false, 5: false}, StateFollower}, - {5, map[uint64]bool{2: true, 3: false, 4: false, 5: false}, StateFollower}, - - // stay in candidate if it does not obtain the majority - {3, map[uint64]bool{}, StateCandidate}, - {5, map[uint64]bool{2: true}, StateCandidate}, - {5, map[uint64]bool{2: false, 3: false}, StateCandidate}, - {5, map[uint64]bool{}, StateCandidate}, - } - for i, tt := range tests { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(idsBySize(tt.size)...))) - - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - for id, vote := range tt.votes { - r.Step(pb.Message{From: id, To: 1, Term: r.Term, Type: pb.MsgVoteResp, Reject: !vote}) - } - - if r.state != tt.state { - t.Errorf("#%d: state = %s, want %s", i, r.state, tt.state) - } - if g := r.Term; g != 1 { - t.Errorf("#%d: term = %d, want %d", i, g, 1) - } - } -} - -// TestFollowerVote tests that each follower will vote for at most one -// candidate in a given term, on a first-come-first-served basis. -// Reference: section 5.2 -func TestFollowerVote(t *testing.T) { - tests := []struct { - vote uint64 - nvote uint64 - wreject bool - }{ - {None, 2, false}, - {None, 3, false}, - {2, 2, false}, - {3, 3, false}, - {2, 3, true}, - {3, 2, true}, - } - for i, tt := range tests { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - r.loadState(pb.HardState{Term: 1, Vote: tt.vote}) - - r.Step(pb.Message{From: tt.nvote, To: 1, Term: 1, Type: pb.MsgVote}) - - msgs := r.readMessages() - wmsgs := []pb.Message{ - {From: 1, To: tt.nvote, Term: 1, Type: pb.MsgVoteResp, Reject: tt.wreject}, - } - if !reflect.DeepEqual(msgs, wmsgs) { - t.Errorf("#%d: msgs = %v, want %v", i, msgs, wmsgs) - } - } -} - -// TestCandidateFallback tests that while waiting for votes, -// if a candidate receives an AppendEntries RPC from another server claiming -// to be leader whose term is at least as large as the candidate's current term, -// it recognizes the leader as legitimate and returns to follower state. -// Reference: section 5.2 -func TestCandidateFallback(t *testing.T) { - tests := []pb.Message{ - {From: 2, To: 1, Term: 1, Type: pb.MsgApp}, - {From: 2, To: 1, Term: 2, Type: pb.MsgApp}, - } - for i, tt := range tests { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - if r.state != StateCandidate { - t.Fatalf("unexpected state = %s, want %s", r.state, StateCandidate) - } - - r.Step(tt) - - if g := r.state; g != StateFollower { - t.Errorf("#%d: state = %s, want %s", i, g, StateFollower) - } - if g := r.Term; g != tt.Term { - t.Errorf("#%d: term = %d, want %d", i, g, tt.Term) - } - } -} - -func TestFollowerElectionTimeoutRandomized(t *testing.T) { - SetLogger(discardLogger) - defer SetLogger(defaultLogger) - testNonleaderElectionTimeoutRandomized(t, StateFollower) -} -func TestCandidateElectionTimeoutRandomized(t *testing.T) { - SetLogger(discardLogger) - defer SetLogger(defaultLogger) - testNonleaderElectionTimeoutRandomized(t, StateCandidate) -} - -// testNonleaderElectionTimeoutRandomized tests that election timeout for -// follower or candidate is randomized. -// Reference: section 5.2 -func testNonleaderElectionTimeoutRandomized(t *testing.T, state StateType) { - et := 10 - r := newTestRaft(1, et, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - timeouts := make(map[int]bool) - for round := 0; round < 50*et; round++ { - switch state { - case StateFollower: - r.becomeFollower(r.Term+1, 2) - case StateCandidate: - r.becomeCandidate() - } - - time := 0 - for len(r.readMessages()) == 0 { - r.tick() - time++ - } - timeouts[time] = true - } - - for d := et; d < 2*et; d++ { - if !timeouts[d] { - t.Errorf("timeout in %d ticks should happen", d) - } - } -} - -func TestFollowersElectionTimeoutNonconflict(t *testing.T) { - SetLogger(discardLogger) - defer SetLogger(defaultLogger) - testNonleadersElectionTimeoutNonconflict(t, StateFollower) -} -func TestCandidatesElectionTimeoutNonconflict(t *testing.T) { - SetLogger(discardLogger) - defer SetLogger(defaultLogger) - testNonleadersElectionTimeoutNonconflict(t, StateCandidate) -} - -// testNonleadersElectionTimeoutNonconflict tests that in most cases only a -// single server(follower or candidate) will time out, which reduces the -// likelihood of split vote in the new election. -// Reference: section 5.2 -func testNonleadersElectionTimeoutNonconflict(t *testing.T, state StateType) { - et := 10 - size := 5 - rs := make([]*raft, size) - ids := idsBySize(size) - for k := range rs { - rs[k] = newTestRaft(ids[k], et, 1, newTestMemoryStorage(withPeers(ids...))) - } - conflicts := 0 - for round := 0; round < 1000; round++ { - for _, r := range rs { - switch state { - case StateFollower: - r.becomeFollower(r.Term+1, None) - case StateCandidate: - r.becomeCandidate() - } - } - - timeoutNum := 0 - for timeoutNum == 0 { - for _, r := range rs { - r.tick() - if len(r.readMessages()) > 0 { - timeoutNum++ - } - } - } - // several rafts time out at the same tick - if timeoutNum > 1 { - conflicts++ - } - } - - if g := float64(conflicts) / 1000; g > 0.3 { - t.Errorf("probability of conflicts = %v, want <= 0.3", g) - } -} - -// TestLeaderStartReplication tests that when receiving client proposals, -// the leader appends the proposal to its log as a new entry, then issues -// AppendEntries RPCs in parallel to each of the other servers to replicate -// the entry. Also, when sending an AppendEntries RPC, the leader includes -// the index and term of the entry in its log that immediately precedes -// the new entries. -// Also, it writes the new entry into stable storage. -// Reference: section 5.3 -func TestLeaderStartReplication(t *testing.T) { - s := newTestMemoryStorage(withPeers(1, 2, 3)) - r := newTestRaft(1, 10, 1, s) - r.becomeCandidate() - r.becomeLeader() - commitNoopEntry(r, s) - li := r.raftLog.lastIndex() - - ents := []pb.Entry{{Data: []byte("some data")}} - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: ents}) - - if g := r.raftLog.lastIndex(); g != li+1 { - t.Errorf("lastIndex = %d, want %d", g, li+1) - } - if g := r.raftLog.committed; g != li { - t.Errorf("committed = %d, want %d", g, li) - } - msgs := r.readMessages() - sort.Sort(messageSlice(msgs)) - wents := []pb.Entry{{Index: li + 1, Term: 1, Data: []byte("some data")}} - wmsgs := []pb.Message{ - {From: 1, To: 2, Term: 1, Type: pb.MsgApp, Index: li, LogTerm: 1, Entries: wents, Commit: li}, - {From: 1, To: 3, Term: 1, Type: pb.MsgApp, Index: li, LogTerm: 1, Entries: wents, Commit: li}, - } - if !reflect.DeepEqual(msgs, wmsgs) { - t.Errorf("msgs = %+v, want %+v", msgs, wmsgs) - } - if g := r.raftLog.unstableEntries(); !reflect.DeepEqual(g, wents) { - t.Errorf("ents = %+v, want %+v", g, wents) - } -} - -// TestLeaderCommitEntry tests that when the entry has been safely replicated, -// the leader gives out the applied entries, which can be applied to its state -// machine. -// Also, the leader keeps track of the highest index it knows to be committed, -// and it includes that index in future AppendEntries RPCs so that the other -// servers eventually find out. -// Reference: section 5.3 -func TestLeaderCommitEntry(t *testing.T) { - s := newTestMemoryStorage(withPeers(1, 2, 3)) - r := newTestRaft(1, 10, 1, s) - r.becomeCandidate() - r.becomeLeader() - commitNoopEntry(r, s) - li := r.raftLog.lastIndex() - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) - - for _, m := range r.readMessages() { - r.Step(acceptAndReply(m)) - } - - if g := r.raftLog.committed; g != li+1 { - t.Errorf("committed = %d, want %d", g, li+1) - } - wents := []pb.Entry{{Index: li + 1, Term: 1, Data: []byte("some data")}} - if g := r.raftLog.nextCommittedEnts(); !reflect.DeepEqual(g, wents) { - t.Errorf("nextCommittedEnts = %+v, want %+v", g, wents) - } - msgs := r.readMessages() - sort.Sort(messageSlice(msgs)) - for i, m := range msgs { - if w := uint64(i + 2); m.To != w { - t.Errorf("to = %x, want %x", m.To, w) - } - if m.Type != pb.MsgApp { - t.Errorf("type = %v, want %v", m.Type, pb.MsgApp) - } - if m.Commit != li+1 { - t.Errorf("commit = %d, want %d", m.Commit, li+1) - } - } -} - -// TestLeaderAcknowledgeCommit tests that a log entry is committed once the -// leader that created the entry has replicated it on a majority of the servers. -// Reference: section 5.3 -func TestLeaderAcknowledgeCommit(t *testing.T) { - tests := []struct { - size int - nonLeaderAcceptors map[uint64]bool - wack bool - }{ - {1, nil, true}, - {3, nil, false}, - {3, map[uint64]bool{2: true}, true}, - {3, map[uint64]bool{2: true, 3: true}, true}, - {5, nil, false}, - {5, map[uint64]bool{2: true}, false}, - {5, map[uint64]bool{2: true, 3: true}, true}, - {5, map[uint64]bool{2: true, 3: true, 4: true}, true}, - {5, map[uint64]bool{2: true, 3: true, 4: true, 5: true}, true}, - } - for i, tt := range tests { - s := newTestMemoryStorage(withPeers(idsBySize(tt.size)...)) - r := newTestRaft(1, 10, 1, s) - r.becomeCandidate() - r.becomeLeader() - commitNoopEntry(r, s) - li := r.raftLog.lastIndex() - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) - - rd := newReady(r, &SoftState{}, pb.HardState{}) - s.Append(rd.Entries) - r.advance(rd) // simulate having appended entry on leader - for _, m := range rd.Messages { - if tt.nonLeaderAcceptors[m.To] { - r.Step(acceptAndReply(m)) - } - } - - if g := r.raftLog.committed > li; g != tt.wack { - t.Errorf("#%d: ack commit = %v, want %v", i, g, tt.wack) - } - } -} - -// TestLeaderCommitPrecedingEntries tests that when leader commits a log entry, -// it also commits all preceding entries in the leader’s log, including -// entries created by previous leaders. -// Also, it applies the entry to its local state machine (in log order). -// Reference: section 5.3 -func TestLeaderCommitPrecedingEntries(t *testing.T) { - tests := [][]pb.Entry{ - {}, - {{Term: 2, Index: 1}}, - {{Term: 1, Index: 1}, {Term: 2, Index: 2}}, - {{Term: 1, Index: 1}}, - } - for i, tt := range tests { - storage := newTestMemoryStorage(withPeers(1, 2, 3)) - storage.Append(tt) - r := newTestRaft(1, 10, 1, storage) - r.loadState(pb.HardState{Term: 2}) - r.becomeCandidate() - r.becomeLeader() - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) - - for _, m := range r.readMessages() { - r.Step(acceptAndReply(m)) - } - - li := uint64(len(tt)) - wents := append(tt, pb.Entry{Term: 3, Index: li + 1}, pb.Entry{Term: 3, Index: li + 2, Data: []byte("some data")}) - if g := r.raftLog.nextCommittedEnts(); !reflect.DeepEqual(g, wents) { - t.Errorf("#%d: ents = %+v, want %+v", i, g, wents) - } - } -} - -// TestFollowerCommitEntry tests that once a follower learns that a log entry -// is committed, it applies the entry to its local state machine (in log order). -// Reference: section 5.3 -func TestFollowerCommitEntry(t *testing.T) { - tests := []struct { - ents []pb.Entry - commit uint64 - }{ - { - []pb.Entry{ - {Term: 1, Index: 1, Data: []byte("some data")}, - }, - 1, - }, - { - []pb.Entry{ - {Term: 1, Index: 1, Data: []byte("some data")}, - {Term: 1, Index: 2, Data: []byte("some data2")}, - }, - 2, - }, - { - []pb.Entry{ - {Term: 1, Index: 1, Data: []byte("some data2")}, - {Term: 1, Index: 2, Data: []byte("some data")}, - }, - 2, - }, - { - []pb.Entry{ - {Term: 1, Index: 1, Data: []byte("some data")}, - {Term: 1, Index: 2, Data: []byte("some data2")}, - }, - 1, - }, - } - for i, tt := range tests { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - r.becomeFollower(1, 2) - - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgApp, Term: 1, Entries: tt.ents, Commit: tt.commit}) - - if g := r.raftLog.committed; g != tt.commit { - t.Errorf("#%d: committed = %d, want %d", i, g, tt.commit) - } - wents := tt.ents[:int(tt.commit)] - if g := r.raftLog.nextCommittedEnts(); !reflect.DeepEqual(g, wents) { - t.Errorf("#%d: nextCommittedEnts = %v, want %v", i, g, wents) - } - } -} - -// TestFollowerCheckMsgApp tests that if the follower does not find an -// entry in its log with the same index and term as the one in AppendEntries RPC, -// then it refuses the new entries. Otherwise it replies that it accepts the -// append entries. -// Reference: section 5.3 -func TestFollowerCheckMsgApp(t *testing.T) { - ents := []pb.Entry{{Term: 1, Index: 1}, {Term: 2, Index: 2}} - tests := []struct { - term uint64 - index uint64 - windex uint64 - wreject bool - wrejectHint uint64 - wlogterm uint64 - }{ - // match with committed entries - {0, 0, 1, false, 0, 0}, - {ents[0].Term, ents[0].Index, 1, false, 0, 0}, - // match with uncommitted entries - {ents[1].Term, ents[1].Index, 2, false, 0, 0}, - - // unmatch with existing entry - {ents[0].Term, ents[1].Index, ents[1].Index, true, 1, 1}, - // unexisting entry - {ents[1].Term + 1, ents[1].Index + 1, ents[1].Index + 1, true, 2, 2}, - } - for i, tt := range tests { - storage := newTestMemoryStorage(withPeers(1, 2, 3)) - storage.Append(ents) - r := newTestRaft(1, 10, 1, storage) - r.loadState(pb.HardState{Commit: 1}) - r.becomeFollower(2, 2) - - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgApp, Term: 2, LogTerm: tt.term, Index: tt.index}) - - msgs := r.readMessages() - wmsgs := []pb.Message{ - {From: 1, To: 2, Type: pb.MsgAppResp, Term: 2, Index: tt.windex, Reject: tt.wreject, RejectHint: tt.wrejectHint, LogTerm: tt.wlogterm}, - } - if !reflect.DeepEqual(msgs, wmsgs) { - t.Errorf("#%d: msgs = %+v, want %+v", i, msgs, wmsgs) - } - } -} - -// TestFollowerAppendEntries tests that when AppendEntries RPC is valid, -// the follower will delete the existing conflict entry and all that follow it, -// and append any new entries not already in the log. -// Also, it writes the new entry into stable storage. -// Reference: section 5.3 -func TestFollowerAppendEntries(t *testing.T) { - tests := []struct { - index, term uint64 - ents []pb.Entry - wents []pb.Entry - wunstable []pb.Entry - }{ - { - 2, 2, - []pb.Entry{{Term: 3, Index: 3}}, - []pb.Entry{{Term: 1, Index: 1}, {Term: 2, Index: 2}, {Term: 3, Index: 3}}, - []pb.Entry{{Term: 3, Index: 3}}, - }, - { - 1, 1, - []pb.Entry{{Term: 3, Index: 2}, {Term: 4, Index: 3}}, - []pb.Entry{{Term: 1, Index: 1}, {Term: 3, Index: 2}, {Term: 4, Index: 3}}, - []pb.Entry{{Term: 3, Index: 2}, {Term: 4, Index: 3}}, - }, - { - 0, 0, - []pb.Entry{{Term: 1, Index: 1}}, - []pb.Entry{{Term: 1, Index: 1}, {Term: 2, Index: 2}}, - nil, - }, - { - 0, 0, - []pb.Entry{{Term: 3, Index: 1}}, - []pb.Entry{{Term: 3, Index: 1}}, - []pb.Entry{{Term: 3, Index: 1}}, - }, - } - for i, tt := range tests { - storage := newTestMemoryStorage(withPeers(1, 2, 3)) - storage.Append([]pb.Entry{{Term: 1, Index: 1}, {Term: 2, Index: 2}}) - r := newTestRaft(1, 10, 1, storage) - r.becomeFollower(2, 2) - - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgApp, Term: 2, LogTerm: tt.term, Index: tt.index, Entries: tt.ents}) - - if g := r.raftLog.allEntries(); !reflect.DeepEqual(g, tt.wents) { - t.Errorf("#%d: ents = %+v, want %+v", i, g, tt.wents) - } - if g := r.raftLog.unstableEntries(); !reflect.DeepEqual(g, tt.wunstable) { - t.Errorf("#%d: unstableEnts = %+v, want %+v", i, g, tt.wunstable) - } - } -} - -// TestLeaderSyncFollowerLog tests that the leader could bring a follower's log -// into consistency with its own. -// Reference: section 5.3, figure 7 -func TestLeaderSyncFollowerLog(t *testing.T) { - ents := []pb.Entry{ - {}, - {Term: 1, Index: 1}, {Term: 1, Index: 2}, {Term: 1, Index: 3}, - {Term: 4, Index: 4}, {Term: 4, Index: 5}, - {Term: 5, Index: 6}, {Term: 5, Index: 7}, - {Term: 6, Index: 8}, {Term: 6, Index: 9}, {Term: 6, Index: 10}, - } - term := uint64(8) - tests := [][]pb.Entry{ - { - {}, - {Term: 1, Index: 1}, {Term: 1, Index: 2}, {Term: 1, Index: 3}, - {Term: 4, Index: 4}, {Term: 4, Index: 5}, - {Term: 5, Index: 6}, {Term: 5, Index: 7}, - {Term: 6, Index: 8}, {Term: 6, Index: 9}, - }, - { - {}, - {Term: 1, Index: 1}, {Term: 1, Index: 2}, {Term: 1, Index: 3}, - {Term: 4, Index: 4}, - }, - { - {}, - {Term: 1, Index: 1}, {Term: 1, Index: 2}, {Term: 1, Index: 3}, - {Term: 4, Index: 4}, {Term: 4, Index: 5}, - {Term: 5, Index: 6}, {Term: 5, Index: 7}, - {Term: 6, Index: 8}, {Term: 6, Index: 9}, {Term: 6, Index: 10}, {Term: 6, Index: 11}, - }, - { - {}, - {Term: 1, Index: 1}, {Term: 1, Index: 2}, {Term: 1, Index: 3}, - {Term: 4, Index: 4}, {Term: 4, Index: 5}, - {Term: 5, Index: 6}, {Term: 5, Index: 7}, - {Term: 6, Index: 8}, {Term: 6, Index: 9}, {Term: 6, Index: 10}, - {Term: 7, Index: 11}, {Term: 7, Index: 12}, - }, - { - {}, - {Term: 1, Index: 1}, {Term: 1, Index: 2}, {Term: 1, Index: 3}, - {Term: 4, Index: 4}, {Term: 4, Index: 5}, {Term: 4, Index: 6}, {Term: 4, Index: 7}, - }, - { - {}, - {Term: 1, Index: 1}, {Term: 1, Index: 2}, {Term: 1, Index: 3}, - {Term: 2, Index: 4}, {Term: 2, Index: 5}, {Term: 2, Index: 6}, - {Term: 3, Index: 7}, {Term: 3, Index: 8}, {Term: 3, Index: 9}, {Term: 3, Index: 10}, {Term: 3, Index: 11}, - }, - } - for i, tt := range tests { - leadStorage := newTestMemoryStorage(withPeers(1, 2, 3)) - leadStorage.Append(ents) - lead := newTestRaft(1, 10, 1, leadStorage) - lead.loadState(pb.HardState{Commit: lead.raftLog.lastIndex(), Term: term}) - followerStorage := newTestMemoryStorage(withPeers(1, 2, 3)) - followerStorage.Append(tt) - follower := newTestRaft(2, 10, 1, followerStorage) - follower.loadState(pb.HardState{Term: term - 1}) - // It is necessary to have a three-node cluster. - // The second may have more up-to-date log than the first one, so the - // first node needs the vote from the third node to become the leader. - n := newNetwork(lead, follower, nopStepper) - n.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - // The election occurs in the term after the one we loaded with - // lead.loadState above. - n.send(pb.Message{From: 3, To: 1, Type: pb.MsgVoteResp, Term: term + 1}) - - n.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - - if g := diffu(ltoa(lead.raftLog), ltoa(follower.raftLog)); g != "" { - t.Errorf("#%d: log diff:\n%s", i, g) - } - } -} - -// TestVoteRequest tests that the vote request includes information about the candidate’s log -// and are sent to all of the other nodes. -// Reference: section 5.4.1 -func TestVoteRequest(t *testing.T) { - tests := []struct { - ents []pb.Entry - wterm uint64 - }{ - {[]pb.Entry{{Term: 1, Index: 1}}, 2}, - {[]pb.Entry{{Term: 1, Index: 1}, {Term: 2, Index: 2}}, 3}, - } - for j, tt := range tests { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - r.Step(pb.Message{ - From: 2, To: 1, Type: pb.MsgApp, Term: tt.wterm - 1, LogTerm: 0, Index: 0, Entries: tt.ents, - }) - r.readMessages() - - for i := 1; i < r.electionTimeout*2; i++ { - r.tickElection() - } - - msgs := r.readMessages() - sort.Sort(messageSlice(msgs)) - if len(msgs) != 2 { - t.Fatalf("#%d: len(msg) = %d, want %d", j, len(msgs), 2) - } - for i, m := range msgs { - if m.Type != pb.MsgVote { - t.Errorf("#%d: msgType = %d, want %d", i, m.Type, pb.MsgVote) - } - if m.To != uint64(i+2) { - t.Errorf("#%d: to = %d, want %d", i, m.To, i+2) - } - if m.Term != tt.wterm { - t.Errorf("#%d: term = %d, want %d", i, m.Term, tt.wterm) - } - windex, wlogterm := tt.ents[len(tt.ents)-1].Index, tt.ents[len(tt.ents)-1].Term - if m.Index != windex { - t.Errorf("#%d: index = %d, want %d", i, m.Index, windex) - } - if m.LogTerm != wlogterm { - t.Errorf("#%d: logterm = %d, want %d", i, m.LogTerm, wlogterm) - } - } - } -} - -// TestVoter tests the voter denies its vote if its own log is more up-to-date -// than that of the candidate. -// Reference: section 5.4.1 -func TestVoter(t *testing.T) { - tests := []struct { - ents []pb.Entry - logterm uint64 - index uint64 - - wreject bool - }{ - // same logterm - {[]pb.Entry{{Term: 1, Index: 1}}, 1, 1, false}, - {[]pb.Entry{{Term: 1, Index: 1}}, 1, 2, false}, - {[]pb.Entry{{Term: 1, Index: 1}, {Term: 1, Index: 2}}, 1, 1, true}, - // candidate higher logterm - {[]pb.Entry{{Term: 1, Index: 1}}, 2, 1, false}, - {[]pb.Entry{{Term: 1, Index: 1}}, 2, 2, false}, - {[]pb.Entry{{Term: 1, Index: 1}, {Term: 1, Index: 2}}, 2, 1, false}, - // voter higher logterm - {[]pb.Entry{{Term: 2, Index: 1}}, 1, 1, true}, - {[]pb.Entry{{Term: 2, Index: 1}}, 1, 2, true}, - {[]pb.Entry{{Term: 2, Index: 1}, {Term: 1, Index: 2}}, 1, 1, true}, - } - for i, tt := range tests { - storage := newTestMemoryStorage(withPeers(1, 2)) - storage.Append(tt.ents) - r := newTestRaft(1, 10, 1, storage) - - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgVote, Term: 3, LogTerm: tt.logterm, Index: tt.index}) - - msgs := r.readMessages() - if len(msgs) != 1 { - t.Fatalf("#%d: len(msg) = %d, want %d", i, len(msgs), 1) - } - m := msgs[0] - if m.Type != pb.MsgVoteResp { - t.Errorf("#%d: msgType = %d, want %d", i, m.Type, pb.MsgVoteResp) - } - if m.Reject != tt.wreject { - t.Errorf("#%d: reject = %t, want %t", i, m.Reject, tt.wreject) - } - } -} - -// TestLeaderOnlyCommitsLogFromCurrentTerm tests that only log entries from the leader’s -// current term are committed by counting replicas. -// Reference: section 5.4.2 -func TestLeaderOnlyCommitsLogFromCurrentTerm(t *testing.T) { - ents := []pb.Entry{{Term: 1, Index: 1}, {Term: 2, Index: 2}} - tests := []struct { - index uint64 - wcommit uint64 - }{ - // do not commit log entries in previous terms - {1, 0}, - {2, 0}, - // commit log in current term - {3, 3}, - } - for i, tt := range tests { - storage := newTestMemoryStorage(withPeers(1, 2)) - storage.Append(ents) - r := newTestRaft(1, 10, 1, storage) - r.loadState(pb.HardState{Term: 2}) - // become leader at term 3 - r.becomeCandidate() - r.becomeLeader() - r.readMessages() - // propose a entry to current term - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgAppResp, Term: r.Term, Index: tt.index}) - rd := newReady(r, &SoftState{}, pb.HardState{}) - storage.Append(rd.Entries) - r.advance(rd) - if r.raftLog.committed != tt.wcommit { - t.Errorf("#%d: commit = %d, want %d", i, r.raftLog.committed, tt.wcommit) - } - } -} - -type messageSlice []pb.Message - -func (s messageSlice) Len() int { return len(s) } -func (s messageSlice) Less(i, j int) bool { return fmt.Sprint(s[i]) < fmt.Sprint(s[j]) } -func (s messageSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } - -func commitNoopEntry(r *raft, s *MemoryStorage) { - if r.state != StateLeader { - panic("it should only be used when it is the leader") - } - r.bcastAppend() - // simulate the response of MsgApp - msgs := r.readMessages() - for _, m := range msgs { - if m.Type != pb.MsgApp || len(m.Entries) != 1 || m.Entries[0].Data != nil { - panic("not a message to append noop entry") - } - r.Step(acceptAndReply(m)) - } - // ignore further messages to refresh followers' commit index - r.readMessages() - s.Append(r.raftLog.unstableEntries()) - r.raftLog.appliedTo(r.raftLog.committed) - r.raftLog.stableTo(r.raftLog.lastIndex(), r.raftLog.lastTerm()) -} - -func acceptAndReply(m pb.Message) pb.Message { - if m.Type != pb.MsgApp { - panic("type should be MsgApp") - } - return pb.Message{ - From: m.To, - To: m.From, - Term: m.Term, - Type: pb.MsgAppResp, - Index: m.Index + uint64(len(m.Entries)), - } -} diff --git a/raft/raft_snap_test.go b/raft/raft_snap_test.go deleted file mode 100644 index f8ed07eba15a..000000000000 --- a/raft/raft_snap_test.go +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "testing" - - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -var ( - testingSnap = pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - Index: 11, // magic number - Term: 11, // magic number - ConfState: pb.ConfState{Voters: []uint64{1, 2}}, - }, - } -) - -func TestSendingSnapshotSetPendingSnapshot(t *testing.T) { - storage := newTestMemoryStorage(withPeers(1)) - sm := newTestRaft(1, 10, 1, storage) - sm.restore(testingSnap) - - sm.becomeCandidate() - sm.becomeLeader() - - // force set the next of node 2, so that - // node 2 needs a snapshot - sm.prs.Progress[2].Next = sm.raftLog.firstIndex() - - sm.Step(pb.Message{From: 2, To: 1, Type: pb.MsgAppResp, Index: sm.prs.Progress[2].Next - 1, Reject: true}) - if sm.prs.Progress[2].PendingSnapshot != 11 { - t.Fatalf("PendingSnapshot = %d, want 11", sm.prs.Progress[2].PendingSnapshot) - } -} - -func TestPendingSnapshotPauseReplication(t *testing.T) { - storage := newTestMemoryStorage(withPeers(1, 2)) - sm := newTestRaft(1, 10, 1, storage) - sm.restore(testingSnap) - - sm.becomeCandidate() - sm.becomeLeader() - - sm.prs.Progress[2].BecomeSnapshot(11) - - sm.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - msgs := sm.readMessages() - if len(msgs) != 0 { - t.Fatalf("len(msgs) = %d, want 0", len(msgs)) - } -} - -func TestSnapshotFailure(t *testing.T) { - storage := newTestMemoryStorage(withPeers(1, 2)) - sm := newTestRaft(1, 10, 1, storage) - sm.restore(testingSnap) - - sm.becomeCandidate() - sm.becomeLeader() - - sm.prs.Progress[2].Next = 1 - sm.prs.Progress[2].BecomeSnapshot(11) - - sm.Step(pb.Message{From: 2, To: 1, Type: pb.MsgSnapStatus, Reject: true}) - if sm.prs.Progress[2].PendingSnapshot != 0 { - t.Fatalf("PendingSnapshot = %d, want 0", sm.prs.Progress[2].PendingSnapshot) - } - if sm.prs.Progress[2].Next != 1 { - t.Fatalf("Next = %d, want 1", sm.prs.Progress[2].Next) - } - if !sm.prs.Progress[2].MsgAppFlowPaused { - t.Errorf("MsgAppFlowPaused = %v, want true", sm.prs.Progress[2].MsgAppFlowPaused) - } -} - -func TestSnapshotSucceed(t *testing.T) { - storage := newTestMemoryStorage(withPeers(1, 2)) - sm := newTestRaft(1, 10, 1, storage) - sm.restore(testingSnap) - - sm.becomeCandidate() - sm.becomeLeader() - - sm.prs.Progress[2].Next = 1 - sm.prs.Progress[2].BecomeSnapshot(11) - - sm.Step(pb.Message{From: 2, To: 1, Type: pb.MsgSnapStatus, Reject: false}) - if sm.prs.Progress[2].PendingSnapshot != 0 { - t.Fatalf("PendingSnapshot = %d, want 0", sm.prs.Progress[2].PendingSnapshot) - } - if sm.prs.Progress[2].Next != 12 { - t.Fatalf("Next = %d, want 12", sm.prs.Progress[2].Next) - } - if !sm.prs.Progress[2].MsgAppFlowPaused { - t.Errorf("MsgAppFlowPaused = %v, want true", sm.prs.Progress[2].MsgAppFlowPaused) - } -} - -func TestSnapshotAbort(t *testing.T) { - storage := newTestMemoryStorage(withPeers(1, 2)) - sm := newTestRaft(1, 10, 1, storage) - sm.restore(testingSnap) - - sm.becomeCandidate() - sm.becomeLeader() - - sm.prs.Progress[2].Next = 1 - sm.prs.Progress[2].BecomeSnapshot(11) - - // A successful msgAppResp that has a higher/equal index than the - // pending snapshot should abort the pending snapshot. - sm.Step(pb.Message{From: 2, To: 1, Type: pb.MsgAppResp, Index: 11}) - if sm.prs.Progress[2].PendingSnapshot != 0 { - t.Fatalf("PendingSnapshot = %d, want 0", sm.prs.Progress[2].PendingSnapshot) - } - // The follower entered StateReplicate and the leader send an append - // and optimistically updated the progress (so we see 13 instead of 12). - // There is something to append because the leader appended an empty entry - // to the log at index 12 when it assumed leadership. - if sm.prs.Progress[2].Next != 13 { - t.Fatalf("Next = %d, want 13", sm.prs.Progress[2].Next) - } - if n := sm.prs.Progress[2].Inflights.Count(); n != 1 { - t.Fatalf("expected an inflight message, got %d", n) - } -} diff --git a/raft/raft_test.go b/raft/raft_test.go deleted file mode 100644 index 687f94582c35..000000000000 --- a/raft/raft_test.go +++ /dev/null @@ -1,4892 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "bytes" - "fmt" - "math" - "math/rand" - "reflect" - "strings" - "testing" - - pb "go.etcd.io/etcd/raft/v3/raftpb" - "go.etcd.io/etcd/raft/v3/tracker" -) - -// nextEnts returns the appliable entries and updates the applied index -func nextEnts(r *raft, s *MemoryStorage) (ents []pb.Entry) { - for { - rd := newReady(r, &SoftState{}, pb.HardState{}) - s.Append(rd.Entries) - r.advance(rd) - if len(rd.Entries)+len(rd.CommittedEntries) == 0 { - return ents - } - ents = append(ents, rd.CommittedEntries...) - } -} - -func mustAppendEntry(r *raft, ents ...pb.Entry) { - if !r.appendEntry(ents...) { - panic("entry unexpectedly dropped") - } -} - -type stateMachine interface { - Step(m pb.Message) error - readMessages() []pb.Message -} - -func (r *raft) readMessages() []pb.Message { - msgs := r.msgs - r.msgs = make([]pb.Message, 0) - - return msgs -} - -func TestProgressLeader(t *testing.T) { - s := newTestMemoryStorage(withPeers(1, 2)) - r := newTestRaft(1, 5, 1, s) - r.becomeCandidate() - r.becomeLeader() - r.prs.Progress[2].BecomeReplicate() - - // Send proposals to r1. The first 5 entries should be queued in the unstable log. - propMsg := pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("foo")}}} - for i := 0; i < 5; i++ { - if err := r.Step(propMsg); err != nil { - t.Fatalf("proposal resulted in error: %v", err) - } - } - if m := r.prs.Progress[1].Match; m != 0 { - t.Fatalf("expected zero match, got %d", m) - } - rd := newReady(r, &SoftState{}, pb.HardState{}) - if len(rd.Entries) != 6 || len(rd.Entries[0].Data) > 0 || string(rd.Entries[5].Data) != "foo" { - t.Fatalf("unexpected Entries: %s", DescribeReady(rd, nil)) - } - r.advance(rd) - if m := r.prs.Progress[1].Match; m != 6 { - t.Fatalf("unexpected Match %d", m) - } - if m := r.prs.Progress[1].Next; m != 7 { - t.Fatalf("unexpected Next %d", m) - } -} - -// TestProgressResumeByHeartbeatResp ensures raft.heartbeat reset progress.paused by heartbeat response. -func TestProgressResumeByHeartbeatResp(t *testing.T) { - r := newTestRaft(1, 5, 1, newTestMemoryStorage(withPeers(1, 2))) - r.becomeCandidate() - r.becomeLeader() - - r.prs.Progress[2].MsgAppFlowPaused = true - - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgBeat}) - if !r.prs.Progress[2].MsgAppFlowPaused { - t.Errorf("paused = %v, want true", r.prs.Progress[2].MsgAppFlowPaused) - } - - r.prs.Progress[2].BecomeReplicate() - if r.prs.Progress[2].MsgAppFlowPaused { - t.Errorf("paused = %v, want false", r.prs.Progress[2].MsgAppFlowPaused) - } - r.prs.Progress[2].MsgAppFlowPaused = true - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgHeartbeatResp}) - if r.prs.Progress[2].MsgAppFlowPaused { - t.Errorf("paused = %v, want false", r.prs.Progress[2].MsgAppFlowPaused) - } -} - -func TestProgressPaused(t *testing.T) { - r := newTestRaft(1, 5, 1, newTestMemoryStorage(withPeers(1, 2))) - r.becomeCandidate() - r.becomeLeader() - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - - ms := r.readMessages() - if len(ms) != 1 { - t.Errorf("len(ms) = %d, want 1", len(ms)) - } -} - -func TestProgressFlowControl(t *testing.T) { - cfg := newTestConfig(1, 5, 1, newTestMemoryStorage(withPeers(1, 2))) - cfg.MaxInflightMsgs = 3 - cfg.MaxSizePerMsg = 2048 - cfg.MaxInflightBytes = 9000 // A little over MaxInflightMsgs * MaxSizePerMsg. - r := newRaft(cfg) - r.becomeCandidate() - r.becomeLeader() - - // Throw away all the messages relating to the initial election. - r.readMessages() - - // While node 2 is in probe state, propose a bunch of entries. - r.prs.Progress[2].BecomeProbe() - blob := []byte(strings.Repeat("a", 1000)) - large := []byte(strings.Repeat("b", 5000)) - for i := 0; i < 22; i++ { - blob := blob - if i >= 10 && i < 16 { // Temporarily send large messages. - blob = large - } - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: blob}}}) - } - - ms := r.readMessages() - // First append has two entries: the empty entry to confirm the - // election, and the first proposal (only one proposal gets sent - // because we're in probe state). - if len(ms) != 1 || ms[0].Type != pb.MsgApp { - t.Fatalf("expected 1 MsgApp, got %v", ms) - } - if len(ms[0].Entries) != 2 { - t.Fatalf("expected 2 entries, got %d", len(ms[0].Entries)) - } - if len(ms[0].Entries[0].Data) != 0 || len(ms[0].Entries[1].Data) != 1000 { - t.Fatalf("unexpected entry sizes: %v", ms[0].Entries) - } - - ackAndVerify := func(index uint64, expEntries ...int) uint64 { - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgAppResp, Index: index}) - ms := r.readMessages() - if got, want := len(ms), len(expEntries); got != want { - t.Fatalf("expected %d messages, got %d", want, got) - } - for i, m := range ms { - if got, want := m.Type, pb.MsgApp; got != want { - t.Errorf("%d: expected MsgApp, got %s", i, got) - } - if got, want := len(m.Entries), expEntries[i]; got != want { - t.Errorf("%d: expected %d entries, got %d", i, want, got) - } - } - last := ms[len(ms)-1].Entries - if len(last) == 0 { - return index - } - return last[len(last)-1].Index - } - - // When this append is acked, we change to replicate state and can - // send multiple messages at once. - index := ackAndVerify(ms[0].Entries[1].Index, 2, 2, 2) - // Ack all three of those messages together and get another 3 messages. The - // third message contains a single large entry, in contrast to 2 before. - index = ackAndVerify(index, 2, 1, 1) - // All subsequent messages contain one large entry, and we cap at 2 messages - // because it overflows MaxInflightBytes. - index = ackAndVerify(index, 1, 1) - index = ackAndVerify(index, 1, 1) - // Start getting small messages again. - index = ackAndVerify(index, 1, 2, 2) - ackAndVerify(index, 2) -} - -func TestUncommittedEntryLimit(t *testing.T) { - // Use a relatively large number of entries here to prevent regression of a - // bug which computed the size before it was fixed. This test would fail - // with the bug, either because we'd get dropped proposals earlier than we - // expect them, or because the final tally ends up nonzero. (At the time of - // writing, the former). - const maxEntries = 1024 - testEntry := pb.Entry{Data: []byte("testdata")} - maxEntrySize := maxEntries * PayloadSize(testEntry) - - if n := PayloadSize(pb.Entry{Data: nil}); n != 0 { - t.Fatal("entry with no Data must have zero payload size") - } - - cfg := newTestConfig(1, 5, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - cfg.MaxUncommittedEntriesSize = uint64(maxEntrySize) - cfg.MaxInflightMsgs = 2 * 1024 // avoid interference - r := newRaft(cfg) - r.becomeCandidate() - r.becomeLeader() - if n := r.uncommittedSize; n != 0 { - t.Fatalf("expected zero uncommitted size, got %d bytes", n) - } - - // Set the two followers to the replicate state. Commit to tail of log. - const numFollowers = 2 - r.prs.Progress[2].BecomeReplicate() - r.prs.Progress[3].BecomeReplicate() - r.uncommittedSize = 0 - - // Send proposals to r1. The first 5 entries should be appended to the log. - propMsg := pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{testEntry}} - propEnts := make([]pb.Entry, maxEntries) - for i := 0; i < maxEntries; i++ { - if err := r.Step(propMsg); err != nil { - t.Fatalf("proposal resulted in error: %v", err) - } - propEnts[i] = testEntry - } - - // Send one more proposal to r1. It should be rejected. - if err := r.Step(propMsg); err != ErrProposalDropped { - t.Fatalf("proposal not dropped: %v", err) - } - - // Read messages and reduce the uncommitted size as if we had committed - // these entries. - ms := r.readMessages() - if e := maxEntries * numFollowers; len(ms) != e { - t.Fatalf("expected %d messages, got %d", e, len(ms)) - } - r.reduceUncommittedSize(propEnts) - if r.uncommittedSize != 0 { - t.Fatalf("committed everything, but still tracking %d", r.uncommittedSize) - } - - // Send a single large proposal to r1. Should be accepted even though it - // pushes us above the limit because we were beneath it before the proposal. - propEnts = make([]pb.Entry, 2*maxEntries) - for i := range propEnts { - propEnts[i] = testEntry - } - propMsgLarge := pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: propEnts} - if err := r.Step(propMsgLarge); err != nil { - t.Fatalf("proposal resulted in error: %v", err) - } - - // Send one more proposal to r1. It should be rejected, again. - if err := r.Step(propMsg); err != ErrProposalDropped { - t.Fatalf("proposal not dropped: %v", err) - } - - // But we can always append an entry with no Data. This is used both for the - // leader's first empty entry and for auto-transitioning out of joint config - // states. - if err := r.Step( - pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}, - ); err != nil { - t.Fatal(err) - } - - // Read messages and reduce the uncommitted size as if we had committed - // these entries. - ms = r.readMessages() - if e := 2 * numFollowers; len(ms) != e { - t.Fatalf("expected %d messages, got %d", e, len(ms)) - } - r.reduceUncommittedSize(propEnts) - if n := r.uncommittedSize; n != 0 { - t.Fatalf("expected zero uncommitted size, got %d", n) - } -} - -func TestLeaderElection(t *testing.T) { - testLeaderElection(t, false) -} - -func TestLeaderElectionPreVote(t *testing.T) { - testLeaderElection(t, true) -} - -func testLeaderElection(t *testing.T, preVote bool) { - var cfg func(*Config) - candState := StateCandidate - candTerm := uint64(1) - if preVote { - cfg = preVoteConfig - // In pre-vote mode, an election that fails to complete - // leaves the node in pre-candidate state without advancing - // the term. - candState = StatePreCandidate - candTerm = 0 - } - tests := []struct { - *network - state StateType - expTerm uint64 - }{ - {newNetworkWithConfig(cfg, nil, nil, nil), StateLeader, 1}, - {newNetworkWithConfig(cfg, nil, nil, nopStepper), StateLeader, 1}, - {newNetworkWithConfig(cfg, nil, nopStepper, nopStepper), candState, candTerm}, - {newNetworkWithConfig(cfg, nil, nopStepper, nopStepper, nil), candState, candTerm}, - {newNetworkWithConfig(cfg, nil, nopStepper, nopStepper, nil, nil), StateLeader, 1}, - - // three logs further along than 0, but in the same term so rejections - // are returned instead of the votes being ignored. - {newNetworkWithConfig(cfg, - nil, entsWithConfig(cfg, 1), entsWithConfig(cfg, 1), entsWithConfig(cfg, 1, 1), nil), - StateFollower, 1}, - } - - for i, tt := range tests { - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - sm := tt.network.peers[1].(*raft) - if sm.state != tt.state { - t.Errorf("#%d: state = %s, want %s", i, sm.state, tt.state) - } - if g := sm.Term; g != tt.expTerm { - t.Errorf("#%d: term = %d, want %d", i, g, tt.expTerm) - } - } -} - -// TestLearnerElectionTimeout verfies that the leader should not start election even -// when times out. -func TestLearnerElectionTimeout(t *testing.T) { - n1 := newTestLearnerRaft(1, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) - n2 := newTestLearnerRaft(2, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) - - n1.becomeFollower(1, None) - n2.becomeFollower(1, None) - - // n2 is learner. Learner should not start election even when times out. - setRandomizedElectionTimeout(n2, n2.electionTimeout) - for i := 0; i < n2.electionTimeout; i++ { - n2.tick() - } - - if n2.state != StateFollower { - t.Errorf("peer 2 state: %s, want %s", n2.state, StateFollower) - } -} - -// TestLearnerPromotion verifies that the learner should not election until -// it is promoted to a normal peer. -func TestLearnerPromotion(t *testing.T) { - n1 := newTestLearnerRaft(1, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) - n2 := newTestLearnerRaft(2, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) - - n1.becomeFollower(1, None) - n2.becomeFollower(1, None) - - nt := newNetwork(n1, n2) - - if n1.state == StateLeader { - t.Error("peer 1 state is leader, want not", n1.state) - } - - // n1 should become leader - setRandomizedElectionTimeout(n1, n1.electionTimeout) - for i := 0; i < n1.electionTimeout; i++ { - n1.tick() - } - - if n1.state != StateLeader { - t.Errorf("peer 1 state: %s, want %s", n1.state, StateLeader) - } - if n2.state != StateFollower { - t.Errorf("peer 2 state: %s, want %s", n2.state, StateFollower) - } - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgBeat}) - - n1.applyConfChange(pb.ConfChange{NodeID: 2, Type: pb.ConfChangeAddNode}.AsV2()) - n2.applyConfChange(pb.ConfChange{NodeID: 2, Type: pb.ConfChangeAddNode}.AsV2()) - if n2.isLearner { - t.Error("peer 2 is learner, want not") - } - - // n2 start election, should become leader - setRandomizedElectionTimeout(n2, n2.electionTimeout) - for i := 0; i < n2.electionTimeout; i++ { - n2.tick() - } - - nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgBeat}) - - if n1.state != StateFollower { - t.Errorf("peer 1 state: %s, want %s", n1.state, StateFollower) - } - if n2.state != StateLeader { - t.Errorf("peer 2 state: %s, want %s", n2.state, StateLeader) - } -} - -// TestLearnerCanVote checks that a learner can vote when it receives a valid Vote request. -// See (*raft).Step for why this is necessary and correct behavior. -func TestLearnerCanVote(t *testing.T) { - n2 := newTestLearnerRaft(2, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) - - n2.becomeFollower(1, None) - - n2.Step(pb.Message{From: 1, To: 2, Term: 2, Type: pb.MsgVote, LogTerm: 11, Index: 11}) - - if len(n2.msgs) != 1 { - t.Fatalf("expected exactly one message, not %+v", n2.msgs) - } - msg := n2.msgs[0] - if msg.Type != pb.MsgVoteResp && !msg.Reject { - t.Fatal("expected learner to not reject vote") - } -} - -func TestLeaderCycle(t *testing.T) { - testLeaderCycle(t, false) -} - -func TestLeaderCyclePreVote(t *testing.T) { - testLeaderCycle(t, true) -} - -// testLeaderCycle verifies that each node in a cluster can campaign -// and be elected in turn. This ensures that elections (including -// pre-vote) work when not starting from a clean slate (as they do in -// TestLeaderElection) -func testLeaderCycle(t *testing.T, preVote bool) { - var cfg func(*Config) - if preVote { - cfg = preVoteConfig - } - n := newNetworkWithConfig(cfg, nil, nil, nil) - for campaignerID := uint64(1); campaignerID <= 3; campaignerID++ { - n.send(pb.Message{From: campaignerID, To: campaignerID, Type: pb.MsgHup}) - - for _, peer := range n.peers { - sm := peer.(*raft) - if sm.id == campaignerID && sm.state != StateLeader { - t.Errorf("preVote=%v: campaigning node %d state = %v, want StateLeader", - preVote, sm.id, sm.state) - } else if sm.id != campaignerID && sm.state != StateFollower { - t.Errorf("preVote=%v: after campaign of node %d, "+ - "node %d had state = %v, want StateFollower", - preVote, campaignerID, sm.id, sm.state) - } - } - } -} - -// TestLeaderElectionOverwriteNewerLogs tests a scenario in which a -// newly-elected leader does *not* have the newest (i.e. highest term) -// log entries, and must overwrite higher-term log entries with -// lower-term ones. -func TestLeaderElectionOverwriteNewerLogs(t *testing.T) { - testLeaderElectionOverwriteNewerLogs(t, false) -} - -func TestLeaderElectionOverwriteNewerLogsPreVote(t *testing.T) { - testLeaderElectionOverwriteNewerLogs(t, true) -} - -func testLeaderElectionOverwriteNewerLogs(t *testing.T, preVote bool) { - var cfg func(*Config) - if preVote { - cfg = preVoteConfig - } - // This network represents the results of the following sequence of - // events: - // - Node 1 won the election in term 1. - // - Node 1 replicated a log entry to node 2 but died before sending - // it to other nodes. - // - Node 3 won the second election in term 2. - // - Node 3 wrote an entry to its logs but died without sending it - // to any other nodes. - // - // At this point, nodes 1, 2, and 3 all have uncommitted entries in - // their logs and could win an election at term 3. The winner's log - // entry overwrites the losers'. (TestLeaderSyncFollowerLog tests - // the case where older log entries are overwritten, so this test - // focuses on the case where the newer entries are lost). - n := newNetworkWithConfig(cfg, - entsWithConfig(cfg, 1), // Node 1: Won first election - entsWithConfig(cfg, 1), // Node 2: Got logs from node 1 - entsWithConfig(cfg, 2), // Node 3: Won second election - votedWithConfig(cfg, 3, 2), // Node 4: Voted but didn't get logs - votedWithConfig(cfg, 3, 2)) // Node 5: Voted but didn't get logs - - // Node 1 campaigns. The election fails because a quorum of nodes - // know about the election that already happened at term 2. Node 1's - // term is pushed ahead to 2. - n.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - sm1 := n.peers[1].(*raft) - if sm1.state != StateFollower { - t.Errorf("state = %s, want StateFollower", sm1.state) - } - if sm1.Term != 2 { - t.Errorf("term = %d, want 2", sm1.Term) - } - - // Node 1 campaigns again with a higher term. This time it succeeds. - n.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - if sm1.state != StateLeader { - t.Errorf("state = %s, want StateLeader", sm1.state) - } - if sm1.Term != 3 { - t.Errorf("term = %d, want 3", sm1.Term) - } - - // Now all nodes agree on a log entry with term 1 at index 1 (and - // term 3 at index 2). - for i := range n.peers { - sm := n.peers[i].(*raft) - entries := sm.raftLog.allEntries() - if len(entries) != 2 { - t.Fatalf("node %d: len(entries) == %d, want 2", i, len(entries)) - } - if entries[0].Term != 1 { - t.Errorf("node %d: term at index 1 == %d, want 1", i, entries[0].Term) - } - if entries[1].Term != 3 { - t.Errorf("node %d: term at index 2 == %d, want 3", i, entries[1].Term) - } - } -} - -func TestVoteFromAnyState(t *testing.T) { - testVoteFromAnyState(t, pb.MsgVote) -} - -func TestPreVoteFromAnyState(t *testing.T) { - testVoteFromAnyState(t, pb.MsgPreVote) -} - -func testVoteFromAnyState(t *testing.T, vt pb.MessageType) { - for st := StateType(0); st < numStates; st++ { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - r.Term = 1 - - switch st { - case StateFollower: - r.becomeFollower(r.Term, 3) - case StatePreCandidate: - r.becomePreCandidate() - case StateCandidate: - r.becomeCandidate() - case StateLeader: - r.becomeCandidate() - r.becomeLeader() - } - - // Note that setting our state above may have advanced r.Term - // past its initial value. - origTerm := r.Term - newTerm := r.Term + 1 - - msg := pb.Message{ - From: 2, - To: 1, - Type: vt, - Term: newTerm, - LogTerm: newTerm, - Index: 42, - } - if err := r.Step(msg); err != nil { - t.Errorf("%s,%s: Step failed: %s", vt, st, err) - } - if len(r.msgs) != 1 { - t.Errorf("%s,%s: %d response messages, want 1: %+v", vt, st, len(r.msgs), r.msgs) - } else { - resp := r.msgs[0] - if resp.Type != voteRespMsgType(vt) { - t.Errorf("%s,%s: response message is %s, want %s", - vt, st, resp.Type, voteRespMsgType(vt)) - } - if resp.Reject { - t.Errorf("%s,%s: unexpected rejection", vt, st) - } - } - - // If this was a real vote, we reset our state and term. - if vt == pb.MsgVote { - if r.state != StateFollower { - t.Errorf("%s,%s: state %s, want %s", vt, st, r.state, StateFollower) - } - if r.Term != newTerm { - t.Errorf("%s,%s: term %d, want %d", vt, st, r.Term, newTerm) - } - if r.Vote != 2 { - t.Errorf("%s,%s: vote %d, want 2", vt, st, r.Vote) - } - } else { - // In a prevote, nothing changes. - if r.state != st { - t.Errorf("%s,%s: state %s, want %s", vt, st, r.state, st) - } - if r.Term != origTerm { - t.Errorf("%s,%s: term %d, want %d", vt, st, r.Term, origTerm) - } - // if st == StateFollower or StatePreCandidate, r hasn't voted yet. - // In StateCandidate or StateLeader, it's voted for itself. - if r.Vote != None && r.Vote != 1 { - t.Errorf("%s,%s: vote %d, want %d or 1", vt, st, r.Vote, None) - } - } - } -} - -func TestLogReplication(t *testing.T) { - tests := []struct { - *network - msgs []pb.Message - wcommitted uint64 - }{ - { - newNetwork(nil, nil, nil), - []pb.Message{ - {From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}, - }, - 2, - }, - { - newNetwork(nil, nil, nil), - []pb.Message{ - {From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}, - {From: 1, To: 2, Type: pb.MsgHup}, - {From: 1, To: 2, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}, - }, - 4, - }, - } - - for i, tt := range tests { - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - for _, m := range tt.msgs { - tt.send(m) - } - - for j, x := range tt.network.peers { - sm := x.(*raft) - - if sm.raftLog.committed != tt.wcommitted { - t.Errorf("#%d.%d: committed = %d, want %d", i, j, sm.raftLog.committed, tt.wcommitted) - } - - var ents []pb.Entry - for _, e := range nextEnts(sm, tt.network.storage[j]) { - if e.Data != nil { - ents = append(ents, e) - } - } - var props []pb.Message - for _, m := range tt.msgs { - if m.Type == pb.MsgProp { - props = append(props, m) - } - } - for k, m := range props { - if !bytes.Equal(ents[k].Data, m.Entries[0].Data) { - t.Errorf("#%d.%d: data = %d, want %d", i, j, ents[k].Data, m.Entries[0].Data) - } - } - } - } -} - -// TestLearnerLogReplication tests that a learner can receive entries from the leader. -func TestLearnerLogReplication(t *testing.T) { - s1 := newTestMemoryStorage(withPeers(1), withLearners(2)) - n1 := newTestLearnerRaft(1, 10, 1, s1) - n2 := newTestLearnerRaft(2, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) - - nt := newNetwork(n1, n2) - nt.t = t - - n1.becomeFollower(1, None) - n2.becomeFollower(1, None) - - setRandomizedElectionTimeout(n1, n1.electionTimeout) - for i := 0; i < n1.electionTimeout; i++ { - n1.tick() - } - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgBeat}) - - // n1 is leader and n2 is learner - if n1.state != StateLeader { - t.Errorf("peer 1 state: %s, want %s", n1.state, StateLeader) - } - if !n2.isLearner { - t.Error("peer 2 state: not learner, want yes") - } - - nextCommitted := uint64(2) - { - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - rd := newReady(n1, &SoftState{}, pb.HardState{}) - nt.send(rd.Messages...) - s1.Append(rd.Entries) - n1.advance(rd) - } - if n1.raftLog.committed != nextCommitted { - t.Errorf("peer 1 wants committed to %d, but still %d", nextCommitted, n1.raftLog.committed) - } - - { - rd := newReady(n1, &SoftState{}, pb.HardState{}) - nt.send(rd.Messages...) - } - - if n1.raftLog.committed != n2.raftLog.committed { - t.Errorf("peer 2 wants committed to %d, but still %d", n1.raftLog.committed, n2.raftLog.committed) - } - - match := n1.prs.Progress[2].Match - if match != n2.raftLog.committed { - t.Errorf("progress 2 of leader 1 wants match %d, but got %d", n2.raftLog.committed, match) - } -} - -func TestSingleNodeCommit(t *testing.T) { - s := newTestMemoryStorage(withPeers(1)) - cfg := newTestConfig(1, 10, 1, s) - r := newRaft(cfg) - tt := newNetwork(r) - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) - - rd := newReady(r, &SoftState{}, pb.HardState{}) - s.Append(rd.Entries) - r.advance(rd) - - sm := tt.peers[1].(*raft) - if sm.raftLog.committed != 3 { - t.Errorf("committed = %d, want %d", sm.raftLog.committed, 3) - } -} - -// TestCannotCommitWithoutNewTermEntry tests the entries cannot be committed -// when leader changes, no new proposal comes in and ChangeTerm proposal is -// filtered. -func TestCannotCommitWithoutNewTermEntry(t *testing.T) { - tt := newNetwork(nil, nil, nil, nil, nil) - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - // 0 cannot reach 2,3,4 - tt.cut(1, 3) - tt.cut(1, 4) - tt.cut(1, 5) - - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) - - sm := tt.peers[1].(*raft) - if sm.raftLog.committed != 1 { - t.Errorf("committed = %d, want %d", sm.raftLog.committed, 1) - } - - // network recovery - tt.recover() - // avoid committing ChangeTerm proposal - tt.ignore(pb.MsgApp) - - // elect 2 as the new leader with term 2 - tt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup}) - - // no log entries from previous term should be committed - sm = tt.peers[2].(*raft) - if sm.raftLog.committed != 1 { - t.Errorf("committed = %d, want %d", sm.raftLog.committed, 1) - } - - tt.recover() - // send heartbeat; reset wait - tt.send(pb.Message{From: 2, To: 2, Type: pb.MsgBeat}) - // append an entry at current term - tt.send(pb.Message{From: 2, To: 2, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) - // expect the committed to be advanced - if sm.raftLog.committed != 5 { - t.Errorf("committed = %d, want %d", sm.raftLog.committed, 5) - } -} - -// TestCommitWithoutNewTermEntry tests the entries could be committed -// when leader changes, no new proposal comes in. -func TestCommitWithoutNewTermEntry(t *testing.T) { - tt := newNetwork(nil, nil, nil, nil, nil) - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - // 0 cannot reach 2,3,4 - tt.cut(1, 3) - tt.cut(1, 4) - tt.cut(1, 5) - - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) - - sm := tt.peers[1].(*raft) - if sm.raftLog.committed != 1 { - t.Errorf("committed = %d, want %d", sm.raftLog.committed, 1) - } - - // network recovery - tt.recover() - - // elect 2 as the new leader with term 2 - // after append a ChangeTerm entry from the current term, all entries - // should be committed - tt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup}) - - if sm.raftLog.committed != 4 { - t.Errorf("committed = %d, want %d", sm.raftLog.committed, 4) - } -} - -func TestDuelingCandidates(t *testing.T) { - s1 := newTestMemoryStorage(withPeers(1, 2, 3)) - s2 := newTestMemoryStorage(withPeers(1, 2, 3)) - s3 := newTestMemoryStorage(withPeers(1, 2, 3)) - a := newTestRaft(1, 10, 1, s1) - b := newTestRaft(2, 10, 1, s2) - c := newTestRaft(3, 10, 1, s3) - - nt := newNetwork(a, b, c) - nt.cut(1, 3) - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - // 1 becomes leader since it receives votes from 1 and 2 - sm := nt.peers[1].(*raft) - if sm.state != StateLeader { - t.Errorf("state = %s, want %s", sm.state, StateLeader) - } - - // 3 stays as candidate since it receives a vote from 3 and a rejection from 2 - sm = nt.peers[3].(*raft) - if sm.state != StateCandidate { - t.Errorf("state = %s, want %s", sm.state, StateCandidate) - } - - nt.recover() - - // candidate 3 now increases its term and tries to vote again - // we expect it to disrupt the leader 1 since it has a higher term - // 3 will be follower again since both 1 and 2 rejects its vote request since 3 does not have a long enough log - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - if sm.state != StateFollower { - t.Errorf("state = %s, want %s", sm.state, StateFollower) - } - - tests := []struct { - sm *raft - state StateType - term uint64 - lastIndex uint64 - }{ - {a, StateFollower, 2, 1}, - {b, StateFollower, 2, 1}, - {c, StateFollower, 2, 0}, - } - - for i, tt := range tests { - if g := tt.sm.state; g != tt.state { - t.Errorf("#%d: state = %s, want %s", i, g, tt.state) - } - if g := tt.sm.Term; g != tt.term { - t.Errorf("#%d: term = %d, want %d", i, g, tt.term) - } - if exp, act := tt.lastIndex, tt.sm.raftLog.lastIndex(); exp != act { - t.Errorf("#%d: last index exp = %d, act = %d", i, exp, act) - } - } -} - -func TestDuelingPreCandidates(t *testing.T) { - cfgA := newTestConfig(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - cfgB := newTestConfig(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - cfgC := newTestConfig(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - cfgA.PreVote = true - cfgB.PreVote = true - cfgC.PreVote = true - a := newRaft(cfgA) - b := newRaft(cfgB) - c := newRaft(cfgC) - - nt := newNetwork(a, b, c) - nt.t = t - nt.cut(1, 3) - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - // 1 becomes leader since it receives votes from 1 and 2 - sm := nt.peers[1].(*raft) - if sm.state != StateLeader { - t.Errorf("state = %s, want %s", sm.state, StateLeader) - } - - // 3 campaigns then reverts to follower when its PreVote is rejected - sm = nt.peers[3].(*raft) - if sm.state != StateFollower { - t.Errorf("state = %s, want %s", sm.state, StateFollower) - } - - nt.recover() - - // Candidate 3 now increases its term and tries to vote again. - // With PreVote, it does not disrupt the leader. - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - tests := []struct { - sm *raft - state StateType - term uint64 - lastIndex uint64 - }{ - {a, StateLeader, 1, 1}, - {b, StateFollower, 1, 1}, - {c, StateFollower, 1, 0}, - } - - for i, tt := range tests { - if g := tt.sm.state; g != tt.state { - t.Errorf("#%d: state = %s, want %s", i, g, tt.state) - } - if g := tt.sm.Term; g != tt.term { - t.Errorf("#%d: term = %d, want %d", i, g, tt.term) - } - if exp, act := tt.lastIndex, tt.sm.raftLog.lastIndex(); exp != act { - t.Errorf("#%d: last index is %d, exp %d", i, act, exp) - } - } -} - -func TestCandidateConcede(t *testing.T) { - tt := newNetwork(nil, nil, nil) - tt.isolate(1) - - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - tt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - // heal the partition - tt.recover() - // send heartbeat; reset wait - tt.send(pb.Message{From: 3, To: 3, Type: pb.MsgBeat}) - - data := []byte("force follower") - // send a proposal to 3 to flush out a MsgApp to 1 - tt.send(pb.Message{From: 3, To: 3, Type: pb.MsgProp, Entries: []pb.Entry{{Data: data}}}) - // send heartbeat; flush out commit - tt.send(pb.Message{From: 3, To: 3, Type: pb.MsgBeat}) - - a := tt.peers[1].(*raft) - if g := a.state; g != StateFollower { - t.Errorf("state = %s, want %s", g, StateFollower) - } - if g := a.Term; g != 1 { - t.Errorf("term = %d, want %d", g, 1) - } - wantLog := ltoa(&raftLog{ - storage: &MemoryStorage{ - ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}, {Term: 1, Index: 2, Data: data}}, - }, - unstable: unstable{offset: 3}, - committed: 2, - }) - for i, p := range tt.peers { - if sm, ok := p.(*raft); ok { - l := ltoa(sm.raftLog) - if g := diffu(wantLog, l); g != "" { - t.Errorf("#%d: diff:\n%s", i, g) - } - } else { - t.Logf("#%d: empty log", i) - } - } -} - -func TestSingleNodeCandidate(t *testing.T) { - tt := newNetwork(nil) - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - sm := tt.peers[1].(*raft) - if sm.state != StateLeader { - t.Errorf("state = %d, want %d", sm.state, StateLeader) - } -} - -func TestSingleNodePreCandidate(t *testing.T) { - tt := newNetworkWithConfig(preVoteConfig, nil) - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - sm := tt.peers[1].(*raft) - if sm.state != StateLeader { - t.Errorf("state = %d, want %d", sm.state, StateLeader) - } -} - -func TestOldMessages(t *testing.T) { - tt := newNetwork(nil, nil, nil) - // make 0 leader @ term 3 - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - tt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup}) - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - // pretend we're an old leader trying to make progress; this entry is expected to be ignored. - tt.send(pb.Message{From: 2, To: 1, Type: pb.MsgApp, Term: 2, Entries: []pb.Entry{{Index: 3, Term: 2}}}) - // commit a new entry - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - - ilog := &raftLog{ - storage: &MemoryStorage{ - ents: []pb.Entry{ - {}, {Data: nil, Term: 1, Index: 1}, - {Data: nil, Term: 2, Index: 2}, {Data: nil, Term: 3, Index: 3}, - {Data: []byte("somedata"), Term: 3, Index: 4}, - }, - }, - unstable: unstable{offset: 5}, - committed: 4, - } - base := ltoa(ilog) - for i, p := range tt.peers { - if sm, ok := p.(*raft); ok { - l := ltoa(sm.raftLog) - if g := diffu(base, l); g != "" { - t.Errorf("#%d: diff:\n%s", i, g) - } - } else { - t.Logf("#%d: empty log", i) - } - } -} - -// TestOldMessagesReply - optimization - reply with new term. - -func TestProposal(t *testing.T) { - tests := []struct { - *network - success bool - }{ - {newNetwork(nil, nil, nil), true}, - {newNetwork(nil, nil, nopStepper), true}, - {newNetwork(nil, nopStepper, nopStepper), false}, - {newNetwork(nil, nopStepper, nopStepper, nil), false}, - {newNetwork(nil, nopStepper, nopStepper, nil, nil), true}, - } - - for j, tt := range tests { - send := func(m pb.Message) { - defer func() { - // only recover if we expect it to panic (success==false) - if !tt.success { - e := recover() - if e != nil { - t.Logf("#%d: err: %s", j, e) - } - } - }() - tt.send(m) - } - - data := []byte("somedata") - - // promote 1 to become leader - send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: data}}}) - r := tt.network.peers[1].(*raft) - - wantLog := newLog(NewMemoryStorage(), raftLogger) - if tt.success { - wantLog = &raftLog{ - storage: &MemoryStorage{ - ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}, {Term: 1, Index: 2, Data: data}}, - }, - unstable: unstable{offset: 3}, - } - } - base := ltoa(wantLog) - for i, p := range tt.peers { - if sm, ok := p.(*raft); ok { - l := ltoa(sm.raftLog) - if g := diffu(base, l); g != "" { - t.Errorf("#%d: peer %d diff:\n%s", j, i, g) - } - } else { - t.Logf("#%d: peer %d empty log", j, i) - } - } - if g := r.Term; g != 1 { - t.Errorf("#%d: term = %d, want %d", j, g, 1) - } - } -} - -func TestProposalByProxy(t *testing.T) { - data := []byte("somedata") - tests := []*network{ - newNetwork(nil, nil, nil), - newNetwork(nil, nil, nopStepper), - } - - for j, tt := range tests { - // promote 0 the leader - tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - // propose via follower - tt.send(pb.Message{From: 2, To: 2, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - - wantLog := &raftLog{ - storage: &MemoryStorage{ - ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}, {Term: 1, Data: data, Index: 2}}, - }, - unstable: unstable{offset: 3}, - committed: 2} - base := ltoa(wantLog) - for i, p := range tt.peers { - if sm, ok := p.(*raft); ok { - l := ltoa(sm.raftLog) - if g := diffu(base, l); g != "" { - t.Errorf("#%d: peer %d diff:\n%s", j, i, g) - } - } else { - t.Logf("#%d: peer %d empty log", j, i) - } - } - sm := tt.peers[1].(*raft) - if g := sm.Term; g != 1 { - t.Errorf("#%d: term = %d, want %d", j, g, 1) - } - } -} - -func TestCommit(t *testing.T) { - tests := []struct { - matches []uint64 - logs []pb.Entry - smTerm uint64 - w uint64 - }{ - // single - {[]uint64{1}, []pb.Entry{{Index: 1, Term: 1}}, 1, 1}, - {[]uint64{1}, []pb.Entry{{Index: 1, Term: 1}}, 2, 0}, - {[]uint64{2}, []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}}, 2, 2}, - {[]uint64{1}, []pb.Entry{{Index: 1, Term: 2}}, 2, 1}, - - // odd - {[]uint64{2, 1, 1}, []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}}, 1, 1}, - {[]uint64{2, 1, 1}, []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 1}}, 2, 0}, - {[]uint64{2, 1, 2}, []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}}, 2, 2}, - {[]uint64{2, 1, 2}, []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 1}}, 2, 0}, - - // even - {[]uint64{2, 1, 1, 1}, []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}}, 1, 1}, - {[]uint64{2, 1, 1, 1}, []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 1}}, 2, 0}, - {[]uint64{2, 1, 1, 2}, []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}}, 1, 1}, - {[]uint64{2, 1, 1, 2}, []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 1}}, 2, 0}, - {[]uint64{2, 1, 2, 2}, []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}}, 2, 2}, - {[]uint64{2, 1, 2, 2}, []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 1}}, 2, 0}, - } - - for i, tt := range tests { - storage := newTestMemoryStorage(withPeers(1)) - storage.Append(tt.logs) - storage.hardState = pb.HardState{Term: tt.smTerm} - - sm := newTestRaft(1, 10, 2, storage) - for j := 0; j < len(tt.matches); j++ { - id := uint64(j) + 1 - if id > 1 { - sm.applyConfChange(pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: id}.AsV2()) - } - pr := sm.prs.Progress[id] - pr.Match, pr.Next = tt.matches[j], tt.matches[j]+1 - } - sm.maybeCommit() - if g := sm.raftLog.committed; g != tt.w { - t.Errorf("#%d: committed = %d, want %d", i, g, tt.w) - } - } -} - -func TestPastElectionTimeout(t *testing.T) { - tests := []struct { - elapse int - wprobability float64 - round bool - }{ - {5, 0, false}, - {10, 0.1, true}, - {13, 0.4, true}, - {15, 0.6, true}, - {18, 0.9, true}, - {20, 1, false}, - } - - for i, tt := range tests { - sm := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1))) - sm.electionElapsed = tt.elapse - c := 0 - for j := 0; j < 10000; j++ { - sm.resetRandomizedElectionTimeout() - if sm.pastElectionTimeout() { - c++ - } - } - got := float64(c) / 10000.0 - if tt.round { - got = math.Floor(got*10+0.5) / 10.0 - } - if got != tt.wprobability { - t.Errorf("#%d: probability = %v, want %v", i, got, tt.wprobability) - } - } -} - -// TestStepIgnoreOldTermMsg to ensure that the Step function ignores the message -// from old term and does not pass it to the actual stepX function. -func TestStepIgnoreOldTermMsg(t *testing.T) { - called := false - fakeStep := func(r *raft, m pb.Message) error { - called = true - return nil - } - sm := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1))) - sm.step = fakeStep - sm.Term = 2 - sm.Step(pb.Message{Type: pb.MsgApp, Term: sm.Term - 1}) - if called { - t.Errorf("stepFunc called = %v , want %v", called, false) - } -} - -// TestHandleMsgApp ensures: -// 1. Reply false if log doesn’t contain an entry at prevLogIndex whose term matches prevLogTerm. -// 2. If an existing entry conflicts with a new one (same index but different terms), -// delete the existing entry and all that follow it; append any new entries not already in the log. -// 3. If leaderCommit > commitIndex, set commitIndex = min(leaderCommit, index of last new entry). -func TestHandleMsgApp(t *testing.T) { - tests := []struct { - m pb.Message - wIndex uint64 - wCommit uint64 - wReject bool - }{ - // Ensure 1 - {pb.Message{Type: pb.MsgApp, Term: 2, LogTerm: 3, Index: 2, Commit: 3}, 2, 0, true}, // previous log mismatch - {pb.Message{Type: pb.MsgApp, Term: 2, LogTerm: 3, Index: 3, Commit: 3}, 2, 0, true}, // previous log non-exist - - // Ensure 2 - {pb.Message{Type: pb.MsgApp, Term: 2, LogTerm: 1, Index: 1, Commit: 1}, 2, 1, false}, - {pb.Message{Type: pb.MsgApp, Term: 2, LogTerm: 0, Index: 0, Commit: 1, Entries: []pb.Entry{{Index: 1, Term: 2}}}, 1, 1, false}, - {pb.Message{Type: pb.MsgApp, Term: 2, LogTerm: 2, Index: 2, Commit: 3, Entries: []pb.Entry{{Index: 3, Term: 2}, {Index: 4, Term: 2}}}, 4, 3, false}, - {pb.Message{Type: pb.MsgApp, Term: 2, LogTerm: 2, Index: 2, Commit: 4, Entries: []pb.Entry{{Index: 3, Term: 2}}}, 3, 3, false}, - {pb.Message{Type: pb.MsgApp, Term: 2, LogTerm: 1, Index: 1, Commit: 4, Entries: []pb.Entry{{Index: 2, Term: 2}}}, 2, 2, false}, - - // Ensure 3 - {pb.Message{Type: pb.MsgApp, Term: 1, LogTerm: 1, Index: 1, Commit: 3}, 2, 1, false}, // match entry 1, commit up to last new entry 1 - {pb.Message{Type: pb.MsgApp, Term: 1, LogTerm: 1, Index: 1, Commit: 3, Entries: []pb.Entry{{Index: 2, Term: 2}}}, 2, 2, false}, // match entry 1, commit up to last new entry 2 - {pb.Message{Type: pb.MsgApp, Term: 2, LogTerm: 2, Index: 2, Commit: 3}, 2, 2, false}, // match entry 2, commit up to last new entry 2 - {pb.Message{Type: pb.MsgApp, Term: 2, LogTerm: 2, Index: 2, Commit: 4}, 2, 2, false}, // commit up to log.last() - } - - for i, tt := range tests { - storage := newTestMemoryStorage(withPeers(1)) - storage.Append([]pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}}) - sm := newTestRaft(1, 10, 1, storage) - sm.becomeFollower(2, None) - - sm.handleAppendEntries(tt.m) - if sm.raftLog.lastIndex() != tt.wIndex { - t.Errorf("#%d: lastIndex = %d, want %d", i, sm.raftLog.lastIndex(), tt.wIndex) - } - if sm.raftLog.committed != tt.wCommit { - t.Errorf("#%d: committed = %d, want %d", i, sm.raftLog.committed, tt.wCommit) - } - m := sm.readMessages() - if len(m) != 1 { - t.Fatalf("#%d: msg = nil, want 1", i) - } - if m[0].Reject != tt.wReject { - t.Errorf("#%d: reject = %v, want %v", i, m[0].Reject, tt.wReject) - } - } -} - -// TestHandleHeartbeat ensures that the follower commits to the commit in the message. -func TestHandleHeartbeat(t *testing.T) { - commit := uint64(2) - tests := []struct { - m pb.Message - wCommit uint64 - }{ - {pb.Message{From: 2, To: 1, Type: pb.MsgHeartbeat, Term: 2, Commit: commit + 1}, commit + 1}, - {pb.Message{From: 2, To: 1, Type: pb.MsgHeartbeat, Term: 2, Commit: commit - 1}, commit}, // do not decrease commit - } - - for i, tt := range tests { - storage := newTestMemoryStorage(withPeers(1, 2)) - storage.Append([]pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}, {Index: 3, Term: 3}}) - sm := newTestRaft(1, 5, 1, storage) - sm.becomeFollower(2, 2) - sm.raftLog.commitTo(commit) - sm.handleHeartbeat(tt.m) - if sm.raftLog.committed != tt.wCommit { - t.Errorf("#%d: committed = %d, want %d", i, sm.raftLog.committed, tt.wCommit) - } - m := sm.readMessages() - if len(m) != 1 { - t.Fatalf("#%d: msg = nil, want 1", i) - } - if m[0].Type != pb.MsgHeartbeatResp { - t.Errorf("#%d: type = %v, want MsgHeartbeatResp", i, m[0].Type) - } - } -} - -// TestHandleHeartbeatResp ensures that we re-send log entries when we get a heartbeat response. -func TestHandleHeartbeatResp(t *testing.T) { - storage := newTestMemoryStorage(withPeers(1, 2)) - storage.Append([]pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}, {Index: 3, Term: 3}}) - sm := newTestRaft(1, 5, 1, storage) - sm.becomeCandidate() - sm.becomeLeader() - sm.raftLog.commitTo(sm.raftLog.lastIndex()) - - // A heartbeat response from a node that is behind; re-send MsgApp - sm.Step(pb.Message{From: 2, Type: pb.MsgHeartbeatResp}) - msgs := sm.readMessages() - if len(msgs) != 1 { - t.Fatalf("len(msgs) = %d, want 1", len(msgs)) - } - if msgs[0].Type != pb.MsgApp { - t.Errorf("type = %v, want MsgApp", msgs[0].Type) - } - - // A second heartbeat response generates another MsgApp re-send - sm.Step(pb.Message{From: 2, Type: pb.MsgHeartbeatResp}) - msgs = sm.readMessages() - if len(msgs) != 1 { - t.Fatalf("len(msgs) = %d, want 1", len(msgs)) - } - if msgs[0].Type != pb.MsgApp { - t.Errorf("type = %v, want MsgApp", msgs[0].Type) - } - - // Once we have an MsgAppResp, heartbeats no longer send MsgApp. - sm.Step(pb.Message{ - From: 2, - Type: pb.MsgAppResp, - Index: msgs[0].Index + uint64(len(msgs[0].Entries)), - }) - // Consume the message sent in response to MsgAppResp - sm.readMessages() - - sm.Step(pb.Message{From: 2, Type: pb.MsgHeartbeatResp}) - msgs = sm.readMessages() - if len(msgs) != 0 { - t.Fatalf("len(msgs) = %d, want 0: %+v", len(msgs), msgs) - } -} - -// TestRaftFreesReadOnlyMem ensures raft will free read request from -// readOnly readIndexQueue and pendingReadIndex map. -// related issue: https://github.com/etcd-io/etcd/issues/7571 -func TestRaftFreesReadOnlyMem(t *testing.T) { - sm := newTestRaft(1, 5, 1, newTestMemoryStorage(withPeers(1, 2))) - sm.becomeCandidate() - sm.becomeLeader() - sm.raftLog.commitTo(sm.raftLog.lastIndex()) - - ctx := []byte("ctx") - - // leader starts linearizable read request. - // more info: raft dissertation 6.4, step 2. - sm.Step(pb.Message{From: 2, Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: ctx}}}) - msgs := sm.readMessages() - if len(msgs) != 1 { - t.Fatalf("len(msgs) = %d, want 1", len(msgs)) - } - if msgs[0].Type != pb.MsgHeartbeat { - t.Fatalf("type = %v, want MsgHeartbeat", msgs[0].Type) - } - if !bytes.Equal(msgs[0].Context, ctx) { - t.Fatalf("Context = %v, want %v", msgs[0].Context, ctx) - } - if len(sm.readOnly.readIndexQueue) != 1 { - t.Fatalf("len(readIndexQueue) = %v, want 1", len(sm.readOnly.readIndexQueue)) - } - if len(sm.readOnly.pendingReadIndex) != 1 { - t.Fatalf("len(pendingReadIndex) = %v, want 1", len(sm.readOnly.pendingReadIndex)) - } - if _, ok := sm.readOnly.pendingReadIndex[string(ctx)]; !ok { - t.Fatalf("can't find context %v in pendingReadIndex ", ctx) - } - - // heartbeat responses from majority of followers (1 in this case) - // acknowledge the authority of the leader. - // more info: raft dissertation 6.4, step 3. - sm.Step(pb.Message{From: 2, Type: pb.MsgHeartbeatResp, Context: ctx}) - if len(sm.readOnly.readIndexQueue) != 0 { - t.Fatalf("len(readIndexQueue) = %v, want 0", len(sm.readOnly.readIndexQueue)) - } - if len(sm.readOnly.pendingReadIndex) != 0 { - t.Fatalf("len(pendingReadIndex) = %v, want 0", len(sm.readOnly.pendingReadIndex)) - } - if _, ok := sm.readOnly.pendingReadIndex[string(ctx)]; ok { - t.Fatalf("found context %v in pendingReadIndex, want none", ctx) - } -} - -// TestMsgAppRespWaitReset verifies the resume behavior of a leader -// MsgAppResp. -func TestMsgAppRespWaitReset(t *testing.T) { - s := newTestMemoryStorage(withPeers(1, 2, 3)) - sm := newTestRaft(1, 5, 1, s) - sm.becomeCandidate() - sm.becomeLeader() - - // Run n1 which includes sending a message like the below - // one to n2, but also appending to its own log. - nextEnts(sm, s) - - // Node 2 acks the first entry, making it committed. - sm.Step(pb.Message{ - From: 2, - Type: pb.MsgAppResp, - Index: 1, - }) - if sm.raftLog.committed != 1 { - t.Fatalf("expected committed to be 1, got %d", sm.raftLog.committed) - } - // Also consume the MsgApp messages that update Commit on the followers. - sm.readMessages() - - // A new command is now proposed on node 1. - sm.Step(pb.Message{ - From: 1, - Type: pb.MsgProp, - Entries: []pb.Entry{{}}, - }) - - // The command is broadcast to all nodes not in the wait state. - // Node 2 left the wait state due to its MsgAppResp, but node 3 is still waiting. - msgs := sm.readMessages() - if len(msgs) != 1 { - t.Fatalf("expected 1 message, got %d: %+v", len(msgs), msgs) - } - if msgs[0].Type != pb.MsgApp || msgs[0].To != 2 { - t.Errorf("expected MsgApp to node 2, got %v to %d", msgs[0].Type, msgs[0].To) - } - if len(msgs[0].Entries) != 1 || msgs[0].Entries[0].Index != 2 { - t.Errorf("expected to send entry 2, but got %v", msgs[0].Entries) - } - - // Now Node 3 acks the first entry. This releases the wait and entry 2 is sent. - sm.Step(pb.Message{ - From: 3, - Type: pb.MsgAppResp, - Index: 1, - }) - msgs = sm.readMessages() - if len(msgs) != 1 { - t.Fatalf("expected 1 message, got %d: %+v", len(msgs), msgs) - } - if msgs[0].Type != pb.MsgApp || msgs[0].To != 3 { - t.Errorf("expected MsgApp to node 3, got %v to %d", msgs[0].Type, msgs[0].To) - } - if len(msgs[0].Entries) != 1 || msgs[0].Entries[0].Index != 2 { - t.Errorf("expected to send entry 2, but got %v", msgs[0].Entries) - } -} - -func TestRecvMsgVote(t *testing.T) { - testRecvMsgVote(t, pb.MsgVote) -} - -func TestRecvMsgPreVote(t *testing.T) { - testRecvMsgVote(t, pb.MsgPreVote) -} - -func testRecvMsgVote(t *testing.T, msgType pb.MessageType) { - tests := []struct { - state StateType - index, logTerm uint64 - voteFor uint64 - wreject bool - }{ - {StateFollower, 0, 0, None, true}, - {StateFollower, 0, 1, None, true}, - {StateFollower, 0, 2, None, true}, - {StateFollower, 0, 3, None, false}, - - {StateFollower, 1, 0, None, true}, - {StateFollower, 1, 1, None, true}, - {StateFollower, 1, 2, None, true}, - {StateFollower, 1, 3, None, false}, - - {StateFollower, 2, 0, None, true}, - {StateFollower, 2, 1, None, true}, - {StateFollower, 2, 2, None, false}, - {StateFollower, 2, 3, None, false}, - - {StateFollower, 3, 0, None, true}, - {StateFollower, 3, 1, None, true}, - {StateFollower, 3, 2, None, false}, - {StateFollower, 3, 3, None, false}, - - {StateFollower, 3, 2, 2, false}, - {StateFollower, 3, 2, 1, true}, - - {StateLeader, 3, 3, 1, true}, - {StatePreCandidate, 3, 3, 1, true}, - {StateCandidate, 3, 3, 1, true}, - } - - max := func(a, b uint64) uint64 { - if a > b { - return a - } - return b - } - - for i, tt := range tests { - sm := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1))) - sm.state = tt.state - switch tt.state { - case StateFollower: - sm.step = stepFollower - case StateCandidate, StatePreCandidate: - sm.step = stepCandidate - case StateLeader: - sm.step = stepLeader - } - sm.Vote = tt.voteFor - sm.raftLog = &raftLog{ - storage: &MemoryStorage{ents: []pb.Entry{{}, {Index: 1, Term: 2}, {Index: 2, Term: 2}}}, - unstable: unstable{offset: 3}, - } - - // raft.Term is greater than or equal to raft.raftLog.lastTerm. In this - // test we're only testing MsgVote responses when the campaigning node - // has a different raft log compared to the recipient node. - // Additionally we're verifying behaviour when the recipient node has - // already given out its vote for its current term. We're not testing - // what the recipient node does when receiving a message with a - // different term number, so we simply initialize both term numbers to - // be the same. - term := max(sm.raftLog.lastTerm(), tt.logTerm) - sm.Term = term - sm.Step(pb.Message{Type: msgType, Term: term, From: 2, Index: tt.index, LogTerm: tt.logTerm}) - - msgs := sm.readMessages() - if g := len(msgs); g != 1 { - t.Fatalf("#%d: len(msgs) = %d, want 1", i, g) - } - if g := msgs[0].Type; g != voteRespMsgType(msgType) { - t.Errorf("#%d, m.Type = %v, want %v", i, g, voteRespMsgType(msgType)) - } - if g := msgs[0].Reject; g != tt.wreject { - t.Errorf("#%d, m.Reject = %v, want %v", i, g, tt.wreject) - } - } -} - -func TestStateTransition(t *testing.T) { - tests := []struct { - from StateType - to StateType - wallow bool - wterm uint64 - wlead uint64 - }{ - {StateFollower, StateFollower, true, 1, None}, - {StateFollower, StatePreCandidate, true, 0, None}, - {StateFollower, StateCandidate, true, 1, None}, - {StateFollower, StateLeader, false, 0, None}, - - {StatePreCandidate, StateFollower, true, 0, None}, - {StatePreCandidate, StatePreCandidate, true, 0, None}, - {StatePreCandidate, StateCandidate, true, 1, None}, - {StatePreCandidate, StateLeader, true, 0, 1}, - - {StateCandidate, StateFollower, true, 0, None}, - {StateCandidate, StatePreCandidate, true, 0, None}, - {StateCandidate, StateCandidate, true, 1, None}, - {StateCandidate, StateLeader, true, 0, 1}, - - {StateLeader, StateFollower, true, 1, None}, - {StateLeader, StatePreCandidate, false, 0, None}, - {StateLeader, StateCandidate, false, 1, None}, - {StateLeader, StateLeader, true, 0, 1}, - } - - for i, tt := range tests { - func() { - defer func() { - if r := recover(); r != nil { - if tt.wallow { - t.Errorf("%d: allow = %v, want %v", i, false, true) - } - } - }() - - sm := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1))) - sm.state = tt.from - - switch tt.to { - case StateFollower: - sm.becomeFollower(tt.wterm, tt.wlead) - case StatePreCandidate: - sm.becomePreCandidate() - case StateCandidate: - sm.becomeCandidate() - case StateLeader: - sm.becomeLeader() - } - - if sm.Term != tt.wterm { - t.Errorf("%d: term = %d, want %d", i, sm.Term, tt.wterm) - } - if sm.lead != tt.wlead { - t.Errorf("%d: lead = %d, want %d", i, sm.lead, tt.wlead) - } - }() - } -} - -func TestAllServerStepdown(t *testing.T) { - tests := []struct { - state StateType - - wstate StateType - wterm uint64 - windex uint64 - }{ - {StateFollower, StateFollower, 3, 0}, - {StatePreCandidate, StateFollower, 3, 0}, - {StateCandidate, StateFollower, 3, 0}, - {StateLeader, StateFollower, 3, 1}, - } - - tmsgTypes := [...]pb.MessageType{pb.MsgVote, pb.MsgApp} - tterm := uint64(3) - - for i, tt := range tests { - sm := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - switch tt.state { - case StateFollower: - sm.becomeFollower(1, None) - case StatePreCandidate: - sm.becomePreCandidate() - case StateCandidate: - sm.becomeCandidate() - case StateLeader: - sm.becomeCandidate() - sm.becomeLeader() - } - - for j, msgType := range tmsgTypes { - sm.Step(pb.Message{From: 2, Type: msgType, Term: tterm, LogTerm: tterm}) - - if sm.state != tt.wstate { - t.Errorf("#%d.%d state = %v , want %v", i, j, sm.state, tt.wstate) - } - if sm.Term != tt.wterm { - t.Errorf("#%d.%d term = %v , want %v", i, j, sm.Term, tt.wterm) - } - if sm.raftLog.lastIndex() != tt.windex { - t.Errorf("#%d.%d index = %v , want %v", i, j, sm.raftLog.lastIndex(), tt.windex) - } - if uint64(len(sm.raftLog.allEntries())) != tt.windex { - t.Errorf("#%d.%d len(ents) = %v , want %v", i, j, len(sm.raftLog.allEntries()), tt.windex) - } - wlead := uint64(2) - if msgType == pb.MsgVote { - wlead = None - } - if sm.lead != wlead { - t.Errorf("#%d, sm.lead = %d, want %d", i, sm.lead, None) - } - } - } -} - -func TestCandidateResetTermMsgHeartbeat(t *testing.T) { - testCandidateResetTerm(t, pb.MsgHeartbeat) -} - -func TestCandidateResetTermMsgApp(t *testing.T) { - testCandidateResetTerm(t, pb.MsgApp) -} - -// testCandidateResetTerm tests when a candidate receives a -// MsgHeartbeat or MsgApp from leader, "Step" resets the term -// with leader's and reverts back to follower. -func testCandidateResetTerm(t *testing.T, mt pb.MessageType) { - a := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - b := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - c := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - nt := newNetwork(a, b, c) - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - if a.state != StateLeader { - t.Errorf("state = %s, want %s", a.state, StateLeader) - } - if b.state != StateFollower { - t.Errorf("state = %s, want %s", b.state, StateFollower) - } - if c.state != StateFollower { - t.Errorf("state = %s, want %s", c.state, StateFollower) - } - - // isolate 3 and increase term in rest - nt.isolate(3) - - nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup}) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - if a.state != StateLeader { - t.Errorf("state = %s, want %s", a.state, StateLeader) - } - if b.state != StateFollower { - t.Errorf("state = %s, want %s", b.state, StateFollower) - } - - // trigger campaign in isolated c - c.resetRandomizedElectionTimeout() - for i := 0; i < c.randomizedElectionTimeout; i++ { - c.tick() - } - - if c.state != StateCandidate { - t.Errorf("state = %s, want %s", c.state, StateCandidate) - } - - nt.recover() - - // leader sends to isolated candidate - // and expects candidate to revert to follower - nt.send(pb.Message{From: 1, To: 3, Term: a.Term, Type: mt}) - - if c.state != StateFollower { - t.Errorf("state = %s, want %s", c.state, StateFollower) - } - - // follower c term is reset with leader's - if a.Term != c.Term { - t.Errorf("follower term expected same term as leader's %d, got %d", a.Term, c.Term) - } -} - -func TestLeaderStepdownWhenQuorumActive(t *testing.T) { - sm := newTestRaft(1, 5, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - sm.checkQuorum = true - - sm.becomeCandidate() - sm.becomeLeader() - - for i := 0; i < sm.electionTimeout+1; i++ { - sm.Step(pb.Message{From: 2, Type: pb.MsgHeartbeatResp, Term: sm.Term}) - sm.tick() - } - - if sm.state != StateLeader { - t.Errorf("state = %v, want %v", sm.state, StateLeader) - } -} - -func TestLeaderStepdownWhenQuorumLost(t *testing.T) { - sm := newTestRaft(1, 5, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - sm.checkQuorum = true - - sm.becomeCandidate() - sm.becomeLeader() - - for i := 0; i < sm.electionTimeout+1; i++ { - sm.tick() - } - - if sm.state != StateFollower { - t.Errorf("state = %v, want %v", sm.state, StateFollower) - } -} - -func TestLeaderSupersedingWithCheckQuorum(t *testing.T) { - a := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - b := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - c := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - a.checkQuorum = true - b.checkQuorum = true - c.checkQuorum = true - - nt := newNetwork(a, b, c) - setRandomizedElectionTimeout(b, b.electionTimeout+1) - - for i := 0; i < b.electionTimeout; i++ { - b.tick() - } - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - if a.state != StateLeader { - t.Errorf("state = %s, want %s", a.state, StateLeader) - } - - if c.state != StateFollower { - t.Errorf("state = %s, want %s", c.state, StateFollower) - } - - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - // Peer b rejected c's vote since its electionElapsed had not reached to electionTimeout - if c.state != StateCandidate { - t.Errorf("state = %s, want %s", c.state, StateCandidate) - } - - // Letting b's electionElapsed reach to electionTimeout - for i := 0; i < b.electionTimeout; i++ { - b.tick() - } - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - if c.state != StateLeader { - t.Errorf("state = %s, want %s", c.state, StateLeader) - } -} - -func TestLeaderElectionWithCheckQuorum(t *testing.T) { - a := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - b := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - c := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - a.checkQuorum = true - b.checkQuorum = true - c.checkQuorum = true - - nt := newNetwork(a, b, c) - setRandomizedElectionTimeout(a, a.electionTimeout+1) - setRandomizedElectionTimeout(b, b.electionTimeout+2) - - // Immediately after creation, votes are cast regardless of the - // election timeout. - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - if a.state != StateLeader { - t.Errorf("state = %s, want %s", a.state, StateLeader) - } - - if c.state != StateFollower { - t.Errorf("state = %s, want %s", c.state, StateFollower) - } - - // need to reset randomizedElectionTimeout larger than electionTimeout again, - // because the value might be reset to electionTimeout since the last state changes - setRandomizedElectionTimeout(a, a.electionTimeout+1) - setRandomizedElectionTimeout(b, b.electionTimeout+2) - for i := 0; i < a.electionTimeout; i++ { - a.tick() - } - for i := 0; i < b.electionTimeout; i++ { - b.tick() - } - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - if a.state != StateFollower { - t.Errorf("state = %s, want %s", a.state, StateFollower) - } - - if c.state != StateLeader { - t.Errorf("state = %s, want %s", c.state, StateLeader) - } -} - -// TestFreeStuckCandidateWithCheckQuorum ensures that a candidate with a higher term -// can disrupt the leader even if the leader still "officially" holds the lease, The -// leader is expected to step down and adopt the candidate's term -func TestFreeStuckCandidateWithCheckQuorum(t *testing.T) { - a := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - b := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - c := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - a.checkQuorum = true - b.checkQuorum = true - c.checkQuorum = true - - nt := newNetwork(a, b, c) - setRandomizedElectionTimeout(b, b.electionTimeout+1) - - for i := 0; i < b.electionTimeout; i++ { - b.tick() - } - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - nt.isolate(1) - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - if b.state != StateFollower { - t.Errorf("state = %s, want %s", b.state, StateFollower) - } - - if c.state != StateCandidate { - t.Errorf("state = %s, want %s", c.state, StateCandidate) - } - - if c.Term != b.Term+1 { - t.Errorf("term = %d, want %d", c.Term, b.Term+1) - } - - // Vote again for safety - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - if b.state != StateFollower { - t.Errorf("state = %s, want %s", b.state, StateFollower) - } - - if c.state != StateCandidate { - t.Errorf("state = %s, want %s", c.state, StateCandidate) - } - - if c.Term != b.Term+2 { - t.Errorf("term = %d, want %d", c.Term, b.Term+2) - } - - nt.recover() - nt.send(pb.Message{From: 1, To: 3, Type: pb.MsgHeartbeat, Term: a.Term}) - - // Disrupt the leader so that the stuck peer is freed - if a.state != StateFollower { - t.Errorf("state = %s, want %s", a.state, StateFollower) - } - - if c.Term != a.Term { - t.Errorf("term = %d, want %d", c.Term, a.Term) - } - - // Vote again, should become leader this time - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - if c.state != StateLeader { - t.Errorf("peer 3 state: %s, want %s", c.state, StateLeader) - } -} - -func TestNonPromotableVoterWithCheckQuorum(t *testing.T) { - a := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2))) - b := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1))) - - a.checkQuorum = true - b.checkQuorum = true - - nt := newNetwork(a, b) - setRandomizedElectionTimeout(b, b.electionTimeout+1) - // Need to remove 2 again to make it a non-promotable node since newNetwork overwritten some internal states - b.applyConfChange(pb.ConfChange{Type: pb.ConfChangeRemoveNode, NodeID: 2}.AsV2()) - - if b.promotable() { - t.Fatalf("promotable = %v, want false", b.promotable()) - } - - for i := 0; i < b.electionTimeout; i++ { - b.tick() - } - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - if a.state != StateLeader { - t.Errorf("state = %s, want %s", a.state, StateLeader) - } - - if b.state != StateFollower { - t.Errorf("state = %s, want %s", b.state, StateFollower) - } - - if b.lead != 1 { - t.Errorf("lead = %d, want 1", b.lead) - } -} - -// TestDisruptiveFollower tests isolated follower, -// with slow network incoming from leader, election times out -// to become a candidate with an increased term. Then, the -// candiate's response to late leader heartbeat forces the leader -// to step down. -func TestDisruptiveFollower(t *testing.T) { - n1 := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - n2 := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - n3 := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - n1.checkQuorum = true - n2.checkQuorum = true - n3.checkQuorum = true - - n1.becomeFollower(1, None) - n2.becomeFollower(1, None) - n3.becomeFollower(1, None) - - nt := newNetwork(n1, n2, n3) - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - // check state - // n1.state == StateLeader - // n2.state == StateFollower - // n3.state == StateFollower - if n1.state != StateLeader { - t.Fatalf("node 1 state: %s, want %s", n1.state, StateLeader) - } - if n2.state != StateFollower { - t.Fatalf("node 2 state: %s, want %s", n2.state, StateFollower) - } - if n3.state != StateFollower { - t.Fatalf("node 3 state: %s, want %s", n3.state, StateFollower) - } - - // etcd server "advanceTicksForElection" on restart; - // this is to expedite campaign trigger when given larger - // election timeouts (e.g. multi-datacenter deploy) - // Or leader messages are being delayed while ticks elapse - setRandomizedElectionTimeout(n3, n3.electionTimeout+2) - for i := 0; i < n3.randomizedElectionTimeout-1; i++ { - n3.tick() - } - - // ideally, before last election tick elapses, - // the follower n3 receives "pb.MsgApp" or "pb.MsgHeartbeat" - // from leader n1, and then resets its "electionElapsed" - // however, last tick may elapse before receiving any - // messages from leader, thus triggering campaign - n3.tick() - - // n1 is still leader yet - // while its heartbeat to candidate n3 is being delayed - - // check state - // n1.state == StateLeader - // n2.state == StateFollower - // n3.state == StateCandidate - if n1.state != StateLeader { - t.Fatalf("node 1 state: %s, want %s", n1.state, StateLeader) - } - if n2.state != StateFollower { - t.Fatalf("node 2 state: %s, want %s", n2.state, StateFollower) - } - if n3.state != StateCandidate { - t.Fatalf("node 3 state: %s, want %s", n3.state, StateCandidate) - } - // check term - // n1.Term == 2 - // n2.Term == 2 - // n3.Term == 3 - if n1.Term != 2 { - t.Fatalf("node 1 term: %d, want %d", n1.Term, 2) - } - if n2.Term != 2 { - t.Fatalf("node 2 term: %d, want %d", n2.Term, 2) - } - if n3.Term != 3 { - t.Fatalf("node 3 term: %d, want %d", n3.Term, 3) - } - - // while outgoing vote requests are still queued in n3, - // leader heartbeat finally arrives at candidate n3 - // however, due to delayed network from leader, leader - // heartbeat was sent with lower term than candidate's - nt.send(pb.Message{From: 1, To: 3, Term: n1.Term, Type: pb.MsgHeartbeat}) - - // then candidate n3 responds with "pb.MsgAppResp" of higher term - // and leader steps down from a message with higher term - // this is to disrupt the current leader, so that candidate - // with higher term can be freed with following election - - // check state - // n1.state == StateFollower - // n2.state == StateFollower - // n3.state == StateCandidate - if n1.state != StateFollower { - t.Fatalf("node 1 state: %s, want %s", n1.state, StateFollower) - } - if n2.state != StateFollower { - t.Fatalf("node 2 state: %s, want %s", n2.state, StateFollower) - } - if n3.state != StateCandidate { - t.Fatalf("node 3 state: %s, want %s", n3.state, StateCandidate) - } - // check term - // n1.Term == 3 - // n2.Term == 2 - // n3.Term == 3 - if n1.Term != 3 { - t.Fatalf("node 1 term: %d, want %d", n1.Term, 3) - } - if n2.Term != 2 { - t.Fatalf("node 2 term: %d, want %d", n2.Term, 2) - } - if n3.Term != 3 { - t.Fatalf("node 3 term: %d, want %d", n3.Term, 3) - } -} - -// TestDisruptiveFollowerPreVote tests isolated follower, -// with slow network incoming from leader, election times out -// to become a pre-candidate with less log than current leader. -// Then pre-vote phase prevents this isolated node from forcing -// current leader to step down, thus less disruptions. -func TestDisruptiveFollowerPreVote(t *testing.T) { - n1 := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - n2 := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - n3 := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - n1.checkQuorum = true - n2.checkQuorum = true - n3.checkQuorum = true - - n1.becomeFollower(1, None) - n2.becomeFollower(1, None) - n3.becomeFollower(1, None) - - nt := newNetwork(n1, n2, n3) - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - // check state - // n1.state == StateLeader - // n2.state == StateFollower - // n3.state == StateFollower - if n1.state != StateLeader { - t.Fatalf("node 1 state: %s, want %s", n1.state, StateLeader) - } - if n2.state != StateFollower { - t.Fatalf("node 2 state: %s, want %s", n2.state, StateFollower) - } - if n3.state != StateFollower { - t.Fatalf("node 3 state: %s, want %s", n3.state, StateFollower) - } - - nt.isolate(3) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - n1.preVote = true - n2.preVote = true - n3.preVote = true - nt.recover() - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - // check state - // n1.state == StateLeader - // n2.state == StateFollower - // n3.state == StatePreCandidate - if n1.state != StateLeader { - t.Fatalf("node 1 state: %s, want %s", n1.state, StateLeader) - } - if n2.state != StateFollower { - t.Fatalf("node 2 state: %s, want %s", n2.state, StateFollower) - } - if n3.state != StatePreCandidate { - t.Fatalf("node 3 state: %s, want %s", n3.state, StatePreCandidate) - } - // check term - // n1.Term == 2 - // n2.Term == 2 - // n3.Term == 2 - if n1.Term != 2 { - t.Fatalf("node 1 term: %d, want %d", n1.Term, 2) - } - if n2.Term != 2 { - t.Fatalf("node 2 term: %d, want %d", n2.Term, 2) - } - if n3.Term != 2 { - t.Fatalf("node 2 term: %d, want %d", n3.Term, 2) - } - - // delayed leader heartbeat does not force current leader to step down - nt.send(pb.Message{From: 1, To: 3, Term: n1.Term, Type: pb.MsgHeartbeat}) - if n1.state != StateLeader { - t.Fatalf("node 1 state: %s, want %s", n1.state, StateLeader) - } -} - -func TestReadOnlyOptionSafe(t *testing.T) { - a := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - b := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - c := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - nt := newNetwork(a, b, c) - setRandomizedElectionTimeout(b, b.electionTimeout+1) - - for i := 0; i < b.electionTimeout; i++ { - b.tick() - } - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - if a.state != StateLeader { - t.Fatalf("state = %s, want %s", a.state, StateLeader) - } - - tests := []struct { - sm *raft - proposals int - wri uint64 - wctx []byte - }{ - {a, 10, 11, []byte("ctx1")}, - {b, 10, 21, []byte("ctx2")}, - {c, 10, 31, []byte("ctx3")}, - {a, 10, 41, []byte("ctx4")}, - {b, 10, 51, []byte("ctx5")}, - {c, 10, 61, []byte("ctx6")}, - } - - for i, tt := range tests { - for j := 0; j < tt.proposals; j++ { - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - } - - nt.send(pb.Message{From: tt.sm.id, To: tt.sm.id, Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: tt.wctx}}}) - - r := tt.sm - if len(r.readStates) == 0 { - t.Errorf("#%d: len(readStates) = 0, want non-zero", i) - } - rs := r.readStates[0] - if rs.Index != tt.wri { - t.Errorf("#%d: readIndex = %d, want %d", i, rs.Index, tt.wri) - } - - if !bytes.Equal(rs.RequestCtx, tt.wctx) { - t.Errorf("#%d: requestCtx = %v, want %v", i, rs.RequestCtx, tt.wctx) - } - r.readStates = nil - } -} - -func TestReadOnlyWithLearner(t *testing.T) { - s := newTestMemoryStorage(withPeers(1), withLearners(2)) - a := newTestLearnerRaft(1, 10, 1, s) - b := newTestLearnerRaft(2, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) - - nt := newNetwork(a, b) - setRandomizedElectionTimeout(b, b.electionTimeout+1) - - for i := 0; i < b.electionTimeout; i++ { - b.tick() - } - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - if a.state != StateLeader { - t.Fatalf("state = %s, want %s", a.state, StateLeader) - } - - tests := []struct { - sm *raft - proposals int - wri uint64 - wctx []byte - }{ - {a, 10, 11, []byte("ctx1")}, - {b, 10, 21, []byte("ctx2")}, - {a, 10, 31, []byte("ctx3")}, - {b, 10, 41, []byte("ctx4")}, - } - - for i, tt := range tests { - for j := 0; j < tt.proposals; j++ { - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - nextEnts(a, s) // append the entries on the leader - } - - nt.send(pb.Message{From: tt.sm.id, To: tt.sm.id, Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: tt.wctx}}}) - - r := tt.sm - if len(r.readStates) == 0 { - t.Fatalf("#%d: len(readStates) = 0, want non-zero", i) - } - rs := r.readStates[0] - if rs.Index != tt.wri { - t.Errorf("#%d: readIndex = %d, want %d", i, rs.Index, tt.wri) - } - - if !bytes.Equal(rs.RequestCtx, tt.wctx) { - t.Errorf("#%d: requestCtx = %v, want %v", i, rs.RequestCtx, tt.wctx) - } - r.readStates = nil - } -} - -func TestReadOnlyOptionLease(t *testing.T) { - a := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - b := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - c := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - a.readOnly.option = ReadOnlyLeaseBased - b.readOnly.option = ReadOnlyLeaseBased - c.readOnly.option = ReadOnlyLeaseBased - a.checkQuorum = true - b.checkQuorum = true - c.checkQuorum = true - - nt := newNetwork(a, b, c) - setRandomizedElectionTimeout(b, b.electionTimeout+1) - - for i := 0; i < b.electionTimeout; i++ { - b.tick() - } - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - if a.state != StateLeader { - t.Fatalf("state = %s, want %s", a.state, StateLeader) - } - - tests := []struct { - sm *raft - proposals int - wri uint64 - wctx []byte - }{ - {a, 10, 11, []byte("ctx1")}, - {b, 10, 21, []byte("ctx2")}, - {c, 10, 31, []byte("ctx3")}, - {a, 10, 41, []byte("ctx4")}, - {b, 10, 51, []byte("ctx5")}, - {c, 10, 61, []byte("ctx6")}, - } - - for i, tt := range tests { - for j := 0; j < tt.proposals; j++ { - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - } - - nt.send(pb.Message{From: tt.sm.id, To: tt.sm.id, Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: tt.wctx}}}) - - r := tt.sm - rs := r.readStates[0] - if rs.Index != tt.wri { - t.Errorf("#%d: readIndex = %d, want %d", i, rs.Index, tt.wri) - } - - if !bytes.Equal(rs.RequestCtx, tt.wctx) { - t.Errorf("#%d: requestCtx = %v, want %v", i, rs.RequestCtx, tt.wctx) - } - r.readStates = nil - } -} - -// TestReadOnlyForNewLeader ensures that a leader only accepts MsgReadIndex message -// when it commits at least one log entry at it term. -func TestReadOnlyForNewLeader(t *testing.T) { - nodeConfigs := []struct { - id uint64 - committed uint64 - applied uint64 - compactIndex uint64 - }{ - {1, 1, 1, 0}, - {2, 2, 2, 2}, - {3, 2, 2, 2}, - } - peers := make([]stateMachine, 0) - for _, c := range nodeConfigs { - storage := newTestMemoryStorage(withPeers(1, 2, 3)) - storage.Append([]pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 1}}) - storage.SetHardState(pb.HardState{Term: 1, Commit: c.committed}) - if c.compactIndex != 0 { - storage.Compact(c.compactIndex) - } - cfg := newTestConfig(c.id, 10, 1, storage) - cfg.Applied = c.applied - raft := newRaft(cfg) - peers = append(peers, raft) - } - nt := newNetwork(peers...) - - // Drop MsgApp to forbid peer a to commit any log entry at its term after it becomes leader. - nt.ignore(pb.MsgApp) - // Force peer a to become leader. - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - sm := nt.peers[1].(*raft) - if sm.state != StateLeader { - t.Fatalf("state = %s, want %s", sm.state, StateLeader) - } - - // Ensure peer a drops read only request. - var windex uint64 = 4 - wctx := []byte("ctx") - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: wctx}}}) - if len(sm.readStates) != 0 { - t.Fatalf("len(readStates) = %d, want zero", len(sm.readStates)) - } - - nt.recover() - - // Force peer a to commit a log entry at its term - for i := 0; i < sm.heartbeatTimeout; i++ { - sm.tick() - } - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - if sm.raftLog.committed != 4 { - t.Fatalf("committed = %d, want 4", sm.raftLog.committed) - } - lastLogTerm := sm.raftLog.zeroTermOnErrCompacted(sm.raftLog.term(sm.raftLog.committed)) - if lastLogTerm != sm.Term { - t.Fatalf("last log term = %d, want %d", lastLogTerm, sm.Term) - } - - // Ensure peer a processed postponed read only request after it committed an entry at its term. - if len(sm.readStates) != 1 { - t.Fatalf("len(readStates) = %d, want 1", len(sm.readStates)) - } - rs := sm.readStates[0] - if rs.Index != windex { - t.Fatalf("readIndex = %d, want %d", rs.Index, windex) - } - if !bytes.Equal(rs.RequestCtx, wctx) { - t.Fatalf("requestCtx = %v, want %v", rs.RequestCtx, wctx) - } - - // Ensure peer a accepts read only request after it committed an entry at its term. - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: wctx}}}) - if len(sm.readStates) != 2 { - t.Fatalf("len(readStates) = %d, want 2", len(sm.readStates)) - } - rs = sm.readStates[1] - if rs.Index != windex { - t.Fatalf("readIndex = %d, want %d", rs.Index, windex) - } - if !bytes.Equal(rs.RequestCtx, wctx) { - t.Fatalf("requestCtx = %v, want %v", rs.RequestCtx, wctx) - } -} - -func TestLeaderAppResp(t *testing.T) { - // initial progress: match = 0; next = 3 - tests := []struct { - index uint64 - reject bool - // progress - wmatch uint64 - wnext uint64 - // message - wmsgNum int - windex uint64 - wcommitted uint64 - }{ - {3, true, 0, 3, 0, 0, 0}, // stale resp; no replies - {2, true, 0, 2, 1, 1, 0}, // denied resp; leader does not commit; decrease next and send probing msg - {2, false, 2, 4, 2, 2, 2}, // accept resp; leader commits; broadcast with commit index - {0, false, 0, 3, 0, 0, 0}, // ignore heartbeat replies - } - - for i, tt := range tests { - // sm term is 1 after it becomes the leader. - // thus the last log term must be 1 to be committed. - sm := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - sm.raftLog = &raftLog{ - storage: &MemoryStorage{ents: []pb.Entry{{}, {Index: 1, Term: 0}, {Index: 2, Term: 1}}}, - unstable: unstable{offset: 3}, - } - sm.becomeCandidate() - sm.becomeLeader() - sm.readMessages() - sm.Step(pb.Message{From: 2, Type: pb.MsgAppResp, Index: tt.index, Term: sm.Term, Reject: tt.reject, RejectHint: tt.index}) - - p := sm.prs.Progress[2] - if p.Match != tt.wmatch { - t.Errorf("#%d match = %d, want %d", i, p.Match, tt.wmatch) - } - if p.Next != tt.wnext { - t.Errorf("#%d next = %d, want %d", i, p.Next, tt.wnext) - } - - msgs := sm.readMessages() - - if len(msgs) != tt.wmsgNum { - t.Errorf("#%d msgNum = %d, want %d", i, len(msgs), tt.wmsgNum) - } - for j, msg := range msgs { - if msg.Index != tt.windex { - t.Errorf("#%d.%d index = %d, want %d", i, j, msg.Index, tt.windex) - } - if msg.Commit != tt.wcommitted { - t.Errorf("#%d.%d commit = %d, want %d", i, j, msg.Commit, tt.wcommitted) - } - } - } -} - -// TestBcastBeat is when the leader receives a heartbeat tick, it should -// send a MsgHeartbeat with m.Index = 0, m.LogTerm=0 and empty entries. -func TestBcastBeat(t *testing.T) { - offset := uint64(1000) - // make a state machine with log.offset = 1000 - s := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - Index: offset, - Term: 1, - ConfState: pb.ConfState{Voters: []uint64{1, 2, 3}}, - }, - } - storage := NewMemoryStorage() - storage.ApplySnapshot(s) - sm := newTestRaft(1, 10, 1, storage) - sm.Term = 1 - - sm.becomeCandidate() - sm.becomeLeader() - for i := 0; i < 10; i++ { - mustAppendEntry(sm, pb.Entry{Index: uint64(i) + 1}) - } - // slow follower - sm.prs.Progress[2].Match, sm.prs.Progress[2].Next = 5, 6 - // normal follower - sm.prs.Progress[3].Match, sm.prs.Progress[3].Next = sm.raftLog.lastIndex(), sm.raftLog.lastIndex()+1 - - sm.Step(pb.Message{Type: pb.MsgBeat}) - msgs := sm.readMessages() - if len(msgs) != 2 { - t.Fatalf("len(msgs) = %v, want 2", len(msgs)) - } - wantCommitMap := map[uint64]uint64{ - 2: min(sm.raftLog.committed, sm.prs.Progress[2].Match), - 3: min(sm.raftLog.committed, sm.prs.Progress[3].Match), - } - for i, m := range msgs { - if m.Type != pb.MsgHeartbeat { - t.Fatalf("#%d: type = %v, want = %v", i, m.Type, pb.MsgHeartbeat) - } - if m.Index != 0 { - t.Fatalf("#%d: prevIndex = %d, want %d", i, m.Index, 0) - } - if m.LogTerm != 0 { - t.Fatalf("#%d: prevTerm = %d, want %d", i, m.LogTerm, 0) - } - if wantCommitMap[m.To] == 0 { - t.Fatalf("#%d: unexpected to %d", i, m.To) - } else { - if m.Commit != wantCommitMap[m.To] { - t.Fatalf("#%d: commit = %d, want %d", i, m.Commit, wantCommitMap[m.To]) - } - delete(wantCommitMap, m.To) - } - if len(m.Entries) != 0 { - t.Fatalf("#%d: len(entries) = %d, want 0", i, len(m.Entries)) - } - } -} - -// TestRecvMsgBeat tests the output of the state machine when receiving MsgBeat -func TestRecvMsgBeat(t *testing.T) { - tests := []struct { - state StateType - wMsg int - }{ - {StateLeader, 2}, - // candidate and follower should ignore MsgBeat - {StateCandidate, 0}, - {StateFollower, 0}, - } - - for i, tt := range tests { - sm := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - sm.raftLog = &raftLog{storage: &MemoryStorage{ents: []pb.Entry{{}, {Index: 1, Term: 0}, {Index: 2, Term: 1}}}} - sm.Term = 1 - sm.state = tt.state - switch tt.state { - case StateFollower: - sm.step = stepFollower - case StateCandidate: - sm.step = stepCandidate - case StateLeader: - sm.step = stepLeader - } - sm.Step(pb.Message{From: 1, To: 1, Type: pb.MsgBeat}) - - msgs := sm.readMessages() - if len(msgs) != tt.wMsg { - t.Errorf("%d: len(msgs) = %d, want %d", i, len(msgs), tt.wMsg) - } - for _, m := range msgs { - if m.Type != pb.MsgHeartbeat { - t.Errorf("%d: msg.type = %v, want %v", i, m.Type, pb.MsgHeartbeat) - } - } - } -} - -func TestLeaderIncreaseNext(t *testing.T) { - previousEnts := []pb.Entry{{Term: 1, Index: 1}, {Term: 1, Index: 2}, {Term: 1, Index: 3}} - tests := []struct { - // progress - state tracker.StateType - next uint64 - - wnext uint64 - }{ - // state replicate, optimistically increase next - // previous entries + noop entry + propose + 1 - {tracker.StateReplicate, 2, uint64(len(previousEnts) + 1 + 1 + 1)}, - // state probe, not optimistically increase next - {tracker.StateProbe, 2, 2}, - } - - for i, tt := range tests { - sm := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2))) - sm.raftLog.append(previousEnts...) - sm.becomeCandidate() - sm.becomeLeader() - sm.prs.Progress[2].State = tt.state - sm.prs.Progress[2].Next = tt.next - sm.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - - p := sm.prs.Progress[2] - if p.Next != tt.wnext { - t.Errorf("#%d next = %d, want %d", i, p.Next, tt.wnext) - } - } -} - -func TestSendAppendForProgressProbe(t *testing.T) { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2))) - r.becomeCandidate() - r.becomeLeader() - r.readMessages() - r.prs.Progress[2].BecomeProbe() - - // each round is a heartbeat - for i := 0; i < 3; i++ { - if i == 0 { - // we expect that raft will only send out one msgAPP on the first - // loop. After that, the follower is paused until a heartbeat response is - // received. - mustAppendEntry(r, pb.Entry{Data: []byte("somedata")}) - r.sendAppend(2) - msg := r.readMessages() - if len(msg) != 1 { - t.Errorf("len(msg) = %d, want %d", len(msg), 1) - } - if msg[0].Index != 0 { - t.Errorf("index = %d, want %d", msg[0].Index, 0) - } - } - - if !r.prs.Progress[2].MsgAppFlowPaused { - t.Errorf("paused = %v, want true", r.prs.Progress[2].MsgAppFlowPaused) - } - for j := 0; j < 10; j++ { - mustAppendEntry(r, pb.Entry{Data: []byte("somedata")}) - r.sendAppend(2) - if l := len(r.readMessages()); l != 0 { - t.Errorf("len(msg) = %d, want %d", l, 0) - } - } - - // do a heartbeat - for j := 0; j < r.heartbeatTimeout; j++ { - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgBeat}) - } - if !r.prs.Progress[2].MsgAppFlowPaused { - t.Errorf("paused = %v, want true", r.prs.Progress[2].MsgAppFlowPaused) - } - - // consume the heartbeat - msg := r.readMessages() - if len(msg) != 1 { - t.Errorf("len(msg) = %d, want %d", len(msg), 1) - } - if msg[0].Type != pb.MsgHeartbeat { - t.Errorf("type = %v, want %v", msg[0].Type, pb.MsgHeartbeat) - } - } - - // a heartbeat response will allow another message to be sent - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgHeartbeatResp}) - msg := r.readMessages() - if len(msg) != 1 { - t.Errorf("len(msg) = %d, want %d", len(msg), 1) - } - if msg[0].Index != 0 { - t.Errorf("index = %d, want %d", msg[0].Index, 0) - } - if !r.prs.Progress[2].MsgAppFlowPaused { - t.Errorf("paused = %v, want true", r.prs.Progress[2].MsgAppFlowPaused) - } -} - -func TestSendAppendForProgressReplicate(t *testing.T) { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2))) - r.becomeCandidate() - r.becomeLeader() - r.readMessages() - r.prs.Progress[2].BecomeReplicate() - - for i := 0; i < 10; i++ { - mustAppendEntry(r, pb.Entry{Data: []byte("somedata")}) - r.sendAppend(2) - msgs := r.readMessages() - if len(msgs) != 1 { - t.Errorf("len(msg) = %d, want %d", len(msgs), 1) - } - } -} - -func TestSendAppendForProgressSnapshot(t *testing.T) { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2))) - r.becomeCandidate() - r.becomeLeader() - r.readMessages() - r.prs.Progress[2].BecomeSnapshot(10) - - for i := 0; i < 10; i++ { - mustAppendEntry(r, pb.Entry{Data: []byte("somedata")}) - r.sendAppend(2) - msgs := r.readMessages() - if len(msgs) != 0 { - t.Errorf("len(msg) = %d, want %d", len(msgs), 0) - } - } -} - -func TestRecvMsgUnreachable(t *testing.T) { - previousEnts := []pb.Entry{{Term: 1, Index: 1}, {Term: 1, Index: 2}, {Term: 1, Index: 3}} - s := newTestMemoryStorage(withPeers(1, 2)) - s.Append(previousEnts) - r := newTestRaft(1, 10, 1, s) - r.becomeCandidate() - r.becomeLeader() - r.readMessages() - // set node 2 to state replicate - r.prs.Progress[2].Match = 3 - r.prs.Progress[2].BecomeReplicate() - r.prs.Progress[2].OptimisticUpdate(5) - - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgUnreachable}) - - if r.prs.Progress[2].State != tracker.StateProbe { - t.Errorf("state = %s, want %s", r.prs.Progress[2].State, tracker.StateProbe) - } - if wnext := r.prs.Progress[2].Match + 1; r.prs.Progress[2].Next != wnext { - t.Errorf("next = %d, want %d", r.prs.Progress[2].Next, wnext) - } -} - -func TestRestore(t *testing.T) { - s := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - Index: 11, // magic number - Term: 11, // magic number - ConfState: pb.ConfState{Voters: []uint64{1, 2, 3}}, - }, - } - - storage := newTestMemoryStorage(withPeers(1, 2)) - sm := newTestRaft(1, 10, 1, storage) - if ok := sm.restore(s); !ok { - t.Fatal("restore fail, want succeed") - } - - if sm.raftLog.lastIndex() != s.Metadata.Index { - t.Errorf("log.lastIndex = %d, want %d", sm.raftLog.lastIndex(), s.Metadata.Index) - } - if mustTerm(sm.raftLog.term(s.Metadata.Index)) != s.Metadata.Term { - t.Errorf("log.lastTerm = %d, want %d", mustTerm(sm.raftLog.term(s.Metadata.Index)), s.Metadata.Term) - } - sg := sm.prs.VoterNodes() - if !reflect.DeepEqual(sg, s.Metadata.ConfState.Voters) { - t.Errorf("sm.Voters = %+v, want %+v", sg, s.Metadata.ConfState.Voters) - } - - if ok := sm.restore(s); ok { - t.Fatal("restore succeed, want fail") - } - // It should not campaign before actually applying data. - for i := 0; i < sm.randomizedElectionTimeout; i++ { - sm.tick() - } - if sm.state != StateFollower { - t.Errorf("state = %d, want %d", sm.state, StateFollower) - } -} - -// TestRestoreWithLearner restores a snapshot which contains learners. -func TestRestoreWithLearner(t *testing.T) { - s := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - Index: 11, // magic number - Term: 11, // magic number - ConfState: pb.ConfState{Voters: []uint64{1, 2}, Learners: []uint64{3}}, - }, - } - - storage := newTestMemoryStorage(withPeers(1, 2), withLearners(3)) - sm := newTestLearnerRaft(3, 8, 2, storage) - if ok := sm.restore(s); !ok { - t.Error("restore fail, want succeed") - } - - if sm.raftLog.lastIndex() != s.Metadata.Index { - t.Errorf("log.lastIndex = %d, want %d", sm.raftLog.lastIndex(), s.Metadata.Index) - } - if mustTerm(sm.raftLog.term(s.Metadata.Index)) != s.Metadata.Term { - t.Errorf("log.lastTerm = %d, want %d", mustTerm(sm.raftLog.term(s.Metadata.Index)), s.Metadata.Term) - } - sg := sm.prs.VoterNodes() - if len(sg) != len(s.Metadata.ConfState.Voters) { - t.Errorf("sm.Voters = %+v, length not equal with %+v", sg, s.Metadata.ConfState.Voters) - } - lns := sm.prs.LearnerNodes() - if len(lns) != len(s.Metadata.ConfState.Learners) { - t.Errorf("sm.LearnerNodes = %+v, length not equal with %+v", sg, s.Metadata.ConfState.Learners) - } - for _, n := range s.Metadata.ConfState.Voters { - if sm.prs.Progress[n].IsLearner { - t.Errorf("sm.Node %x isLearner = %s, want %t", n, sm.prs.Progress[n], false) - } - } - for _, n := range s.Metadata.ConfState.Learners { - if !sm.prs.Progress[n].IsLearner { - t.Errorf("sm.Node %x isLearner = %s, want %t", n, sm.prs.Progress[n], true) - } - } - - if ok := sm.restore(s); ok { - t.Error("restore succeed, want fail") - } -} - -// TestRestoreWithVotersOutgoing tests if outgoing voter can receive and apply snapshot correctly. -func TestRestoreWithVotersOutgoing(t *testing.T) { - s := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - Index: 11, // magic number - Term: 11, // magic number - ConfState: pb.ConfState{Voters: []uint64{2, 3, 4}, VotersOutgoing: []uint64{1, 2, 3}}, - }, - } - - storage := newTestMemoryStorage(withPeers(1, 2)) - sm := newTestRaft(1, 10, 1, storage) - if ok := sm.restore(s); !ok { - t.Fatal("restore fail, want succeed") - } - - if sm.raftLog.lastIndex() != s.Metadata.Index { - t.Errorf("log.lastIndex = %d, want %d", sm.raftLog.lastIndex(), s.Metadata.Index) - } - if mustTerm(sm.raftLog.term(s.Metadata.Index)) != s.Metadata.Term { - t.Errorf("log.lastTerm = %d, want %d", mustTerm(sm.raftLog.term(s.Metadata.Index)), s.Metadata.Term) - } - sg := sm.prs.VoterNodes() - if !reflect.DeepEqual(sg, []uint64{1, 2, 3, 4}) { - t.Errorf("sm.Voters = %+v, want %+v", sg, s.Metadata.ConfState.Voters) - } - - if ok := sm.restore(s); ok { - t.Fatal("restore succeed, want fail") - } - // It should not campaign before actually applying data. - for i := 0; i < sm.randomizedElectionTimeout; i++ { - sm.tick() - } - if sm.state != StateFollower { - t.Errorf("state = %d, want %d", sm.state, StateFollower) - } -} - -// TestRestoreVoterToLearner verifies that a normal peer can be downgraded to a -// learner through a snapshot. At the time of writing, we don't allow -// configuration changes to do this directly, but note that the snapshot may -// compress multiple changes to the configuration into one: the voter could have -// been removed, then readded as a learner and the snapshot reflects both -// changes. In that case, a voter receives a snapshot telling it that it is now -// a learner. In fact, the node has to accept that snapshot, or it is -// permanently cut off from the Raft log. -func TestRestoreVoterToLearner(t *testing.T) { - s := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - Index: 11, // magic number - Term: 11, // magic number - ConfState: pb.ConfState{Voters: []uint64{1, 2}, Learners: []uint64{3}}, - }, - } - - storage := newTestMemoryStorage(withPeers(1, 2, 3)) - sm := newTestRaft(3, 10, 1, storage) - - if sm.isLearner { - t.Errorf("%x is learner, want not", sm.id) - } - if ok := sm.restore(s); !ok { - t.Error("restore failed unexpectedly") - } -} - -// TestRestoreLearnerPromotion checks that a learner can become to a follower after -// restoring snapshot. -func TestRestoreLearnerPromotion(t *testing.T) { - s := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - Index: 11, // magic number - Term: 11, // magic number - ConfState: pb.ConfState{Voters: []uint64{1, 2, 3}}, - }, - } - - storage := newTestMemoryStorage(withPeers(1, 2), withLearners(3)) - sm := newTestLearnerRaft(3, 10, 1, storage) - - if !sm.isLearner { - t.Errorf("%x is not learner, want yes", sm.id) - } - - if ok := sm.restore(s); !ok { - t.Error("restore fail, want succeed") - } - - if sm.isLearner { - t.Errorf("%x is learner, want not", sm.id) - } -} - -// TestLearnerReceiveSnapshot tests that a learner can receive a snpahost from leader -func TestLearnerReceiveSnapshot(t *testing.T) { - // restore the state machine from a snapshot so it has a compacted log and a snapshot - s := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - Index: 11, // magic number - Term: 11, // magic number - ConfState: pb.ConfState{Voters: []uint64{1}, Learners: []uint64{2}}, - }, - } - - store := newTestMemoryStorage(withPeers(1), withLearners(2)) - n1 := newTestLearnerRaft(1, 10, 1, store) - n2 := newTestLearnerRaft(2, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) - - n1.restore(s) - ready := newReady(n1, &SoftState{}, pb.HardState{}) - store.ApplySnapshot(ready.Snapshot) - n1.advance(ready) - - // Force set n1 appplied index. - n1.raftLog.appliedTo(n1.raftLog.committed) - - nt := newNetwork(n1, n2) - - setRandomizedElectionTimeout(n1, n1.electionTimeout) - for i := 0; i < n1.electionTimeout; i++ { - n1.tick() - } - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgBeat}) - - if n2.raftLog.committed != n1.raftLog.committed { - t.Errorf("peer 2 must commit to %d, but %d", n1.raftLog.committed, n2.raftLog.committed) - } -} - -func TestRestoreIgnoreSnapshot(t *testing.T) { - previousEnts := []pb.Entry{{Term: 1, Index: 1}, {Term: 1, Index: 2}, {Term: 1, Index: 3}} - commit := uint64(1) - storage := newTestMemoryStorage(withPeers(1, 2)) - sm := newTestRaft(1, 10, 1, storage) - sm.raftLog.append(previousEnts...) - sm.raftLog.commitTo(commit) - - s := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - Index: commit, - Term: 1, - ConfState: pb.ConfState{Voters: []uint64{1, 2}}, - }, - } - - // ignore snapshot - if ok := sm.restore(s); ok { - t.Errorf("restore = %t, want %t", ok, false) - } - if sm.raftLog.committed != commit { - t.Errorf("commit = %d, want %d", sm.raftLog.committed, commit) - } - - // ignore snapshot and fast forward commit - s.Metadata.Index = commit + 1 - if ok := sm.restore(s); ok { - t.Errorf("restore = %t, want %t", ok, false) - } - if sm.raftLog.committed != commit+1 { - t.Errorf("commit = %d, want %d", sm.raftLog.committed, commit+1) - } -} - -func TestProvideSnap(t *testing.T) { - // restore the state machine from a snapshot so it has a compacted log and a snapshot - s := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - Index: 11, // magic number - Term: 11, // magic number - ConfState: pb.ConfState{Voters: []uint64{1, 2}}, - }, - } - storage := newTestMemoryStorage(withPeers(1)) - sm := newTestRaft(1, 10, 1, storage) - sm.restore(s) - - sm.becomeCandidate() - sm.becomeLeader() - - // force set the next of node 2, so that node 2 needs a snapshot - sm.prs.Progress[2].Next = sm.raftLog.firstIndex() - sm.Step(pb.Message{From: 2, To: 1, Type: pb.MsgAppResp, Index: sm.prs.Progress[2].Next - 1, Reject: true}) - - msgs := sm.readMessages() - if len(msgs) != 1 { - t.Fatalf("len(msgs) = %d, want 1", len(msgs)) - } - m := msgs[0] - if m.Type != pb.MsgSnap { - t.Errorf("m.Type = %v, want %v", m.Type, pb.MsgSnap) - } -} - -func TestIgnoreProvidingSnap(t *testing.T) { - // restore the state machine from a snapshot so it has a compacted log and a snapshot - s := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - Index: 11, // magic number - Term: 11, // magic number - ConfState: pb.ConfState{Voters: []uint64{1, 2}}, - }, - } - storage := newTestMemoryStorage(withPeers(1)) - sm := newTestRaft(1, 10, 1, storage) - sm.restore(s) - - sm.becomeCandidate() - sm.becomeLeader() - - // force set the next of node 2, so that node 2 needs a snapshot - // change node 2 to be inactive, expect node 1 ignore sending snapshot to 2 - sm.prs.Progress[2].Next = sm.raftLog.firstIndex() - 1 - sm.prs.Progress[2].RecentActive = false - - sm.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - - msgs := sm.readMessages() - if len(msgs) != 0 { - t.Errorf("len(msgs) = %d, want 0", len(msgs)) - } -} - -func TestRestoreFromSnapMsg(t *testing.T) { - s := &pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - Index: 11, // magic number - Term: 11, // magic number - ConfState: pb.ConfState{Voters: []uint64{1, 2}}, - }, - } - m := pb.Message{Type: pb.MsgSnap, From: 1, Term: 2, Snapshot: s} - - sm := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2))) - sm.Step(m) - - if sm.lead != uint64(1) { - t.Errorf("sm.lead = %d, want 1", sm.lead) - } - - // TODO(bdarnell): what should this test? -} - -func TestSlowNodeRestore(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - nt.isolate(3) - for j := 0; j <= 100; j++ { - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - } - lead := nt.peers[1].(*raft) - nextEnts(lead, nt.storage[1]) - nt.storage[1].CreateSnapshot(lead.raftLog.applied, &pb.ConfState{Voters: lead.prs.VoterNodes()}, nil) - nt.storage[1].Compact(lead.raftLog.applied) - - nt.recover() - // send heartbeats so that the leader can learn everyone is active. - // node 3 will only be considered as active when node 1 receives a reply from it. - for { - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgBeat}) - if lead.prs.Progress[3].RecentActive { - break - } - } - - // trigger a snapshot - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - - follower := nt.peers[3].(*raft) - - // trigger a commit - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - if follower.raftLog.committed != lead.raftLog.committed { - t.Errorf("follower.committed = %d, want %d", follower.raftLog.committed, lead.raftLog.committed) - } -} - -// TestStepConfig tests that when raft step msgProp in EntryConfChange type, -// it appends the entry to log and sets pendingConf to be true. -func TestStepConfig(t *testing.T) { - // a raft that cannot make progress - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2))) - r.becomeCandidate() - r.becomeLeader() - index := r.raftLog.lastIndex() - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Type: pb.EntryConfChange}}}) - if g := r.raftLog.lastIndex(); g != index+1 { - t.Errorf("index = %d, want %d", g, index+1) - } - if r.pendingConfIndex != index+1 { - t.Errorf("pendingConfIndex = %d, want %d", r.pendingConfIndex, index+1) - } -} - -// TestStepIgnoreConfig tests that if raft step the second msgProp in -// EntryConfChange type when the first one is uncommitted, the node will set -// the proposal to noop and keep its original state. -func TestStepIgnoreConfig(t *testing.T) { - // a raft that cannot make progress - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2))) - r.becomeCandidate() - r.becomeLeader() - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Type: pb.EntryConfChange}}}) - index := r.raftLog.lastIndex() - pendingConfIndex := r.pendingConfIndex - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Type: pb.EntryConfChange}}}) - wents := []pb.Entry{{Type: pb.EntryNormal, Term: 1, Index: 3, Data: nil}} - ents, err := r.raftLog.entries(index+1, noLimit) - if err != nil { - t.Fatalf("unexpected error %v", err) - } - if !reflect.DeepEqual(ents, wents) { - t.Errorf("ents = %+v, want %+v", ents, wents) - } - if r.pendingConfIndex != pendingConfIndex { - t.Errorf("pendingConfIndex = %d, want %d", r.pendingConfIndex, pendingConfIndex) - } -} - -// TestNewLeaderPendingConfig tests that new leader sets its pendingConfigIndex -// based on uncommitted entries. -func TestNewLeaderPendingConfig(t *testing.T) { - tests := []struct { - addEntry bool - wpendingIndex uint64 - }{ - {false, 0}, - {true, 1}, - } - for i, tt := range tests { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2))) - if tt.addEntry { - mustAppendEntry(r, pb.Entry{Type: pb.EntryNormal}) - } - r.becomeCandidate() - r.becomeLeader() - if r.pendingConfIndex != tt.wpendingIndex { - t.Errorf("#%d: pendingConfIndex = %d, want %d", - i, r.pendingConfIndex, tt.wpendingIndex) - } - } -} - -// TestAddNode tests that addNode could update nodes correctly. -func TestAddNode(t *testing.T) { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1))) - r.applyConfChange(pb.ConfChange{NodeID: 2, Type: pb.ConfChangeAddNode}.AsV2()) - nodes := r.prs.VoterNodes() - wnodes := []uint64{1, 2} - if !reflect.DeepEqual(nodes, wnodes) { - t.Errorf("nodes = %v, want %v", nodes, wnodes) - } -} - -// TestAddLearner tests that addLearner could update nodes correctly. -func TestAddLearner(t *testing.T) { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1))) - // Add new learner peer. - r.applyConfChange(pb.ConfChange{NodeID: 2, Type: pb.ConfChangeAddLearnerNode}.AsV2()) - if r.isLearner { - t.Fatal("expected 1 to be voter") - } - nodes := r.prs.LearnerNodes() - wnodes := []uint64{2} - if !reflect.DeepEqual(nodes, wnodes) { - t.Errorf("nodes = %v, want %v", nodes, wnodes) - } - if !r.prs.Progress[2].IsLearner { - t.Fatal("expected 2 to be learner") - } - - // Promote peer to voter. - r.applyConfChange(pb.ConfChange{NodeID: 2, Type: pb.ConfChangeAddNode}.AsV2()) - if r.prs.Progress[2].IsLearner { - t.Fatal("expected 2 to be voter") - } - - // Demote r. - r.applyConfChange(pb.ConfChange{NodeID: 1, Type: pb.ConfChangeAddLearnerNode}.AsV2()) - if !r.prs.Progress[1].IsLearner { - t.Fatal("expected 1 to be learner") - } - if !r.isLearner { - t.Fatal("expected 1 to be learner") - } - - // Promote r again. - r.applyConfChange(pb.ConfChange{NodeID: 1, Type: pb.ConfChangeAddNode}.AsV2()) - if r.prs.Progress[1].IsLearner { - t.Fatal("expected 1 to be voter") - } - if r.isLearner { - t.Fatal("expected 1 to be voter") - } -} - -// TestAddNodeCheckQuorum tests that addNode does not trigger a leader election -// immediately when checkQuorum is set. -func TestAddNodeCheckQuorum(t *testing.T) { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1))) - r.checkQuorum = true - - r.becomeCandidate() - r.becomeLeader() - - for i := 0; i < r.electionTimeout-1; i++ { - r.tick() - } - - r.applyConfChange(pb.ConfChange{NodeID: 2, Type: pb.ConfChangeAddNode}.AsV2()) - - // This tick will reach electionTimeout, which triggers a quorum check. - r.tick() - - // Node 1 should still be the leader after a single tick. - if r.state != StateLeader { - t.Errorf("state = %v, want %v", r.state, StateLeader) - } - - // After another electionTimeout ticks without hearing from node 2, - // node 1 should step down. - for i := 0; i < r.electionTimeout; i++ { - r.tick() - } - - if r.state != StateFollower { - t.Errorf("state = %v, want %v", r.state, StateFollower) - } -} - -// TestRemoveNode tests that removeNode could update nodes and -// removed list correctly. -func TestRemoveNode(t *testing.T) { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2))) - r.applyConfChange(pb.ConfChange{NodeID: 2, Type: pb.ConfChangeRemoveNode}.AsV2()) - w := []uint64{1} - if g := r.prs.VoterNodes(); !reflect.DeepEqual(g, w) { - t.Errorf("nodes = %v, want %v", g, w) - } - - // Removing the remaining voter will panic. - defer func() { - if r := recover(); r == nil { - t.Error("did not panic") - } - }() - r.applyConfChange(pb.ConfChange{NodeID: 1, Type: pb.ConfChangeRemoveNode}.AsV2()) -} - -// TestRemoveLearner tests that removeNode could update nodes and -// removed list correctly. -func TestRemoveLearner(t *testing.T) { - r := newTestLearnerRaft(1, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) - r.applyConfChange(pb.ConfChange{NodeID: 2, Type: pb.ConfChangeRemoveNode}.AsV2()) - w := []uint64{1} - if g := r.prs.VoterNodes(); !reflect.DeepEqual(g, w) { - t.Errorf("nodes = %v, want %v", g, w) - } - - w = nil - if g := r.prs.LearnerNodes(); !reflect.DeepEqual(g, w) { - t.Errorf("nodes = %v, want %v", g, w) - } - - // Removing the remaining voter will panic. - defer func() { - if r := recover(); r == nil { - t.Error("did not panic") - } - }() - r.applyConfChange(pb.ConfChange{NodeID: 1, Type: pb.ConfChangeRemoveNode}.AsV2()) -} - -func TestPromotable(t *testing.T) { - id := uint64(1) - tests := []struct { - peers []uint64 - wp bool - }{ - {[]uint64{1}, true}, - {[]uint64{1, 2, 3}, true}, - {[]uint64{}, false}, - {[]uint64{2, 3}, false}, - } - for i, tt := range tests { - r := newTestRaft(id, 5, 1, newTestMemoryStorage(withPeers(tt.peers...))) - if g := r.promotable(); g != tt.wp { - t.Errorf("#%d: promotable = %v, want %v", i, g, tt.wp) - } - } -} - -func TestRaftNodes(t *testing.T) { - tests := []struct { - ids []uint64 - wids []uint64 - }{ - { - []uint64{1, 2, 3}, - []uint64{1, 2, 3}, - }, - { - []uint64{3, 2, 1}, - []uint64{1, 2, 3}, - }, - } - for i, tt := range tests { - r := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(tt.ids...))) - if !reflect.DeepEqual(r.prs.VoterNodes(), tt.wids) { - t.Errorf("#%d: nodes = %+v, want %+v", i, r.prs.VoterNodes(), tt.wids) - } - } -} - -func TestCampaignWhileLeader(t *testing.T) { - testCampaignWhileLeader(t, false) -} - -func TestPreCampaignWhileLeader(t *testing.T) { - testCampaignWhileLeader(t, true) -} - -func testCampaignWhileLeader(t *testing.T, preVote bool) { - cfg := newTestConfig(1, 5, 1, newTestMemoryStorage(withPeers(1))) - cfg.PreVote = preVote - r := newRaft(cfg) - if r.state != StateFollower { - t.Errorf("expected new node to be follower but got %s", r.state) - } - // We don't call campaign() directly because it comes after the check - // for our current state. - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - if r.state != StateLeader { - t.Errorf("expected single-node election to become leader but got %s", r.state) - } - term := r.Term - r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - if r.state != StateLeader { - t.Errorf("expected to remain leader but got %s", r.state) - } - if r.Term != term { - t.Errorf("expected to remain in term %v but got %v", term, r.Term) - } -} - -// TestCommitAfterRemoveNode verifies that pending commands can become -// committed when a config change reduces the quorum requirements. -func TestCommitAfterRemoveNode(t *testing.T) { - // Create a cluster with two nodes. - s := newTestMemoryStorage(withPeers(1, 2)) - r := newTestRaft(1, 5, 1, s) - r.becomeCandidate() - r.becomeLeader() - - // Begin to remove the second node. - cc := pb.ConfChange{ - Type: pb.ConfChangeRemoveNode, - NodeID: 2, - } - ccData, err := cc.Marshal() - if err != nil { - t.Fatal(err) - } - r.Step(pb.Message{ - Type: pb.MsgProp, - Entries: []pb.Entry{ - {Type: pb.EntryConfChange, Data: ccData}, - }, - }) - // Stabilize the log and make sure nothing is committed yet. - if ents := nextEnts(r, s); len(ents) > 0 { - t.Fatalf("unexpected committed entries: %v", ents) - } - ccIndex := r.raftLog.lastIndex() - - // While the config change is pending, make another proposal. - r.Step(pb.Message{ - Type: pb.MsgProp, - Entries: []pb.Entry{ - {Type: pb.EntryNormal, Data: []byte("hello")}, - }, - }) - - // Node 2 acknowledges the config change, committing it. - r.Step(pb.Message{ - Type: pb.MsgAppResp, - From: 2, - Index: ccIndex, - }) - ents := nextEnts(r, s) - if len(ents) != 2 { - t.Fatalf("expected two committed entries, got %v", ents) - } - if ents[0].Type != pb.EntryNormal || ents[0].Data != nil { - t.Fatalf("expected ents[0] to be empty, but got %v", ents[0]) - } - if ents[1].Type != pb.EntryConfChange { - t.Fatalf("expected ents[1] to be EntryConfChange, got %v", ents[1]) - } - - // Apply the config change. This reduces quorum requirements so the - // pending command can now commit. - r.applyConfChange(cc.AsV2()) - ents = nextEnts(r, s) - if len(ents) != 1 || ents[0].Type != pb.EntryNormal || - string(ents[0].Data) != "hello" { - t.Fatalf("expected one committed EntryNormal, got %v", ents) - } -} - -// TestLeaderTransferToUpToDateNode verifies transferring should succeed -// if the transferee has the most up-to-date log entries when transfer starts. -func TestLeaderTransferToUpToDateNode(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - lead := nt.peers[1].(*raft) - - if lead.lead != 1 { - t.Fatalf("after election leader is %x, want 1", lead.lead) - } - - // Transfer leadership to 2. - nt.send(pb.Message{From: 2, To: 1, Type: pb.MsgTransferLeader}) - - checkLeaderTransferState(t, lead, StateFollower, 2) - - // After some log replication, transfer leadership back to 1. - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - - nt.send(pb.Message{From: 1, To: 2, Type: pb.MsgTransferLeader}) - - checkLeaderTransferState(t, lead, StateLeader, 1) -} - -// TestLeaderTransferToUpToDateNodeFromFollower verifies transferring should succeed -// if the transferee has the most up-to-date log entries when transfer starts. -// Not like TestLeaderTransferToUpToDateNode, where the leader transfer message -// is sent to the leader, in this test case every leader transfer message is sent -// to the follower. -func TestLeaderTransferToUpToDateNodeFromFollower(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - lead := nt.peers[1].(*raft) - - if lead.lead != 1 { - t.Fatalf("after election leader is %x, want 1", lead.lead) - } - - // Transfer leadership to 2. - nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgTransferLeader}) - - checkLeaderTransferState(t, lead, StateFollower, 2) - - // After some log replication, transfer leadership back to 1. - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgTransferLeader}) - - checkLeaderTransferState(t, lead, StateLeader, 1) -} - -// TestLeaderTransferWithCheckQuorum ensures transferring leader still works -// even the current leader is still under its leader lease -func TestLeaderTransferWithCheckQuorum(t *testing.T) { - nt := newNetwork(nil, nil, nil) - for i := 1; i < 4; i++ { - r := nt.peers[uint64(i)].(*raft) - r.checkQuorum = true - setRandomizedElectionTimeout(r, r.electionTimeout+i) - } - - // Letting peer 2 electionElapsed reach to timeout so that it can vote for peer 1 - f := nt.peers[2].(*raft) - for i := 0; i < f.electionTimeout; i++ { - f.tick() - } - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - lead := nt.peers[1].(*raft) - - if lead.lead != 1 { - t.Fatalf("after election leader is %x, want 1", lead.lead) - } - - // Transfer leadership to 2. - nt.send(pb.Message{From: 2, To: 1, Type: pb.MsgTransferLeader}) - - checkLeaderTransferState(t, lead, StateFollower, 2) - - // After some log replication, transfer leadership back to 1. - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - - nt.send(pb.Message{From: 1, To: 2, Type: pb.MsgTransferLeader}) - - checkLeaderTransferState(t, lead, StateLeader, 1) -} - -func TestLeaderTransferToSlowFollower(t *testing.T) { - defaultLogger.EnableDebug() - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - nt.isolate(3) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - - nt.recover() - lead := nt.peers[1].(*raft) - if lead.prs.Progress[3].Match != 1 { - t.Fatalf("node 1 has match %x for node 3, want %x", lead.prs.Progress[3].Match, 1) - } - - // Transfer leadership to 3 when node 3 is lack of log. - nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader}) - - checkLeaderTransferState(t, lead, StateFollower, 3) -} - -func TestLeaderTransferAfterSnapshot(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - nt.isolate(3) - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - lead := nt.peers[1].(*raft) - nextEnts(lead, nt.storage[1]) - nt.storage[1].CreateSnapshot(lead.raftLog.applied, &pb.ConfState{Voters: lead.prs.VoterNodes()}, nil) - nt.storage[1].Compact(lead.raftLog.applied) - - nt.recover() - if lead.prs.Progress[3].Match != 1 { - t.Fatalf("node 1 has match %x for node 3, want %x", lead.prs.Progress[3].Match, 1) - } - - filtered := pb.Message{} - // Snapshot needs to be applied before sending MsgAppResp - nt.msgHook = func(m pb.Message) bool { - if m.Type != pb.MsgAppResp || m.From != 3 || m.Reject { - return true - } - filtered = m - return false - } - // Transfer leadership to 3 when node 3 is lack of snapshot. - nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader}) - if lead.state != StateLeader { - t.Fatalf("node 1 should still be leader as snapshot is not applied, got %x", lead.state) - } - if reflect.DeepEqual(filtered, pb.Message{}) { - t.Fatalf("Follower should report snapshot progress automatically.") - } - - // Apply snapshot and resume progress - follower := nt.peers[3].(*raft) - ready := newReady(follower, &SoftState{}, pb.HardState{}) - nt.storage[3].ApplySnapshot(ready.Snapshot) - follower.advance(ready) - nt.msgHook = nil - nt.send(filtered) - - checkLeaderTransferState(t, lead, StateFollower, 3) -} - -func TestLeaderTransferToSelf(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - lead := nt.peers[1].(*raft) - - // Transfer leadership to self, there will be noop. - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgTransferLeader}) - checkLeaderTransferState(t, lead, StateLeader, 1) -} - -func TestLeaderTransferToNonExistingNode(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - lead := nt.peers[1].(*raft) - // Transfer leadership to non-existing node, there will be noop. - nt.send(pb.Message{From: 4, To: 1, Type: pb.MsgTransferLeader}) - checkLeaderTransferState(t, lead, StateLeader, 1) -} - -func TestLeaderTransferTimeout(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - nt.isolate(3) - - lead := nt.peers[1].(*raft) - - // Transfer leadership to isolated node, wait for timeout. - nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader}) - if lead.leadTransferee != 3 { - t.Fatalf("wait transferring, leadTransferee = %v, want %v", lead.leadTransferee, 3) - } - for i := 0; i < lead.heartbeatTimeout; i++ { - lead.tick() - } - if lead.leadTransferee != 3 { - t.Fatalf("wait transferring, leadTransferee = %v, want %v", lead.leadTransferee, 3) - } - - for i := 0; i < lead.electionTimeout-lead.heartbeatTimeout; i++ { - lead.tick() - } - - checkLeaderTransferState(t, lead, StateLeader, 1) -} - -func TestLeaderTransferIgnoreProposal(t *testing.T) { - s := newTestMemoryStorage(withPeers(1, 2, 3)) - r := newTestRaft(1, 10, 1, s) - nt := newNetwork(r, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - nt.isolate(3) - - lead := nt.peers[1].(*raft) - - nextEnts(r, s) // handle empty entry - - // Transfer leadership to isolated node to let transfer pending, then send proposal. - nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader}) - if lead.leadTransferee != 3 { - t.Fatalf("wait transferring, leadTransferee = %v, want %v", lead.leadTransferee, 3) - } - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - err := lead.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}}) - if err != ErrProposalDropped { - t.Fatalf("should return drop proposal error while transferring") - } - - if lead.prs.Progress[1].Match != 1 { - t.Fatalf("node 1 has match %x, want %x", lead.prs.Progress[1].Match, 1) - } -} - -func TestLeaderTransferReceiveHigherTermVote(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - nt.isolate(3) - - lead := nt.peers[1].(*raft) - - // Transfer leadership to isolated node to let transfer pending. - nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader}) - if lead.leadTransferee != 3 { - t.Fatalf("wait transferring, leadTransferee = %v, want %v", lead.leadTransferee, 3) - } - - nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup, Index: 1, Term: 2}) - - checkLeaderTransferState(t, lead, StateFollower, 2) -} - -func TestLeaderTransferRemoveNode(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - nt.ignore(pb.MsgTimeoutNow) - - lead := nt.peers[1].(*raft) - - // The leadTransferee is removed when leadship transferring. - nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader}) - if lead.leadTransferee != 3 { - t.Fatalf("wait transferring, leadTransferee = %v, want %v", lead.leadTransferee, 3) - } - - lead.applyConfChange(pb.ConfChange{NodeID: 3, Type: pb.ConfChangeRemoveNode}.AsV2()) - - checkLeaderTransferState(t, lead, StateLeader, 1) -} - -func TestLeaderTransferDemoteNode(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - nt.ignore(pb.MsgTimeoutNow) - - lead := nt.peers[1].(*raft) - - // The leadTransferee is demoted when leadship transferring. - nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader}) - if lead.leadTransferee != 3 { - t.Fatalf("wait transferring, leadTransferee = %v, want %v", lead.leadTransferee, 3) - } - - lead.applyConfChange(pb.ConfChangeV2{ - Changes: []pb.ConfChangeSingle{ - { - Type: pb.ConfChangeRemoveNode, - NodeID: 3, - }, - { - Type: pb.ConfChangeAddLearnerNode, - NodeID: 3, - }, - }, - }) - - // Make the Raft group commit the LeaveJoint entry. - lead.applyConfChange(pb.ConfChangeV2{}) - checkLeaderTransferState(t, lead, StateLeader, 1) -} - -// TestLeaderTransferBack verifies leadership can transfer back to self when last transfer is pending. -func TestLeaderTransferBack(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - nt.isolate(3) - - lead := nt.peers[1].(*raft) - - nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader}) - if lead.leadTransferee != 3 { - t.Fatalf("wait transferring, leadTransferee = %v, want %v", lead.leadTransferee, 3) - } - - // Transfer leadership back to self. - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgTransferLeader}) - - checkLeaderTransferState(t, lead, StateLeader, 1) -} - -// TestLeaderTransferSecondTransferToAnotherNode verifies leader can transfer to another node -// when last transfer is pending. -func TestLeaderTransferSecondTransferToAnotherNode(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - nt.isolate(3) - - lead := nt.peers[1].(*raft) - - nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader}) - if lead.leadTransferee != 3 { - t.Fatalf("wait transferring, leadTransferee = %v, want %v", lead.leadTransferee, 3) - } - - // Transfer leadership to another node. - nt.send(pb.Message{From: 2, To: 1, Type: pb.MsgTransferLeader}) - - checkLeaderTransferState(t, lead, StateFollower, 2) -} - -// TestLeaderTransferSecondTransferToSameNode verifies second transfer leader request -// to the same node should not extend the timeout while the first one is pending. -func TestLeaderTransferSecondTransferToSameNode(t *testing.T) { - nt := newNetwork(nil, nil, nil) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - nt.isolate(3) - - lead := nt.peers[1].(*raft) - - nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader}) - if lead.leadTransferee != 3 { - t.Fatalf("wait transferring, leadTransferee = %v, want %v", lead.leadTransferee, 3) - } - - for i := 0; i < lead.heartbeatTimeout; i++ { - lead.tick() - } - // Second transfer leadership request to the same node. - nt.send(pb.Message{From: 3, To: 1, Type: pb.MsgTransferLeader}) - - for i := 0; i < lead.electionTimeout-lead.heartbeatTimeout; i++ { - lead.tick() - } - - checkLeaderTransferState(t, lead, StateLeader, 1) -} - -func checkLeaderTransferState(t *testing.T, r *raft, state StateType, lead uint64) { - if r.state != state || r.lead != lead { - t.Fatalf("after transferring, node has state %v lead %v, want state %v lead %v", r.state, r.lead, state, lead) - } - if r.leadTransferee != None { - t.Fatalf("after transferring, node has leadTransferee %v, want leadTransferee %v", r.leadTransferee, None) - } -} - -// TestTransferNonMember verifies that when a MsgTimeoutNow arrives at -// a node that has been removed from the group, nothing happens. -// (previously, if the node also got votes, it would panic as it -// transitioned to StateLeader) -func TestTransferNonMember(t *testing.T) { - r := newTestRaft(1, 5, 1, newTestMemoryStorage(withPeers(2, 3, 4))) - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgTimeoutNow}) - - r.Step(pb.Message{From: 2, To: 1, Type: pb.MsgVoteResp}) - r.Step(pb.Message{From: 3, To: 1, Type: pb.MsgVoteResp}) - if r.state != StateFollower { - t.Fatalf("state is %s, want StateFollower", r.state) - } -} - -// TestNodeWithSmallerTermCanCompleteElection tests the scenario where a node -// that has been partitioned away (and fallen behind) rejoins the cluster at -// about the same time the leader node gets partitioned away. -// Previously the cluster would come to a standstill when run with PreVote -// enabled. -func TestNodeWithSmallerTermCanCompleteElection(t *testing.T) { - n1 := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - n2 := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - n3 := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - n1.becomeFollower(1, None) - n2.becomeFollower(1, None) - n3.becomeFollower(1, None) - - n1.preVote = true - n2.preVote = true - n3.preVote = true - - // cause a network partition to isolate node 3 - nt := newNetwork(n1, n2, n3) - nt.cut(1, 3) - nt.cut(2, 3) - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - sm := nt.peers[1].(*raft) - if sm.state != StateLeader { - t.Errorf("peer 1 state: %s, want %s", sm.state, StateLeader) - } - - sm = nt.peers[2].(*raft) - if sm.state != StateFollower { - t.Errorf("peer 2 state: %s, want %s", sm.state, StateFollower) - } - - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - sm = nt.peers[3].(*raft) - if sm.state != StatePreCandidate { - t.Errorf("peer 3 state: %s, want %s", sm.state, StatePreCandidate) - } - - nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup}) - - // check whether the term values are expected - // a.Term == 3 - // b.Term == 3 - // c.Term == 1 - sm = nt.peers[1].(*raft) - if sm.Term != 3 { - t.Errorf("peer 1 term: %d, want %d", sm.Term, 3) - } - - sm = nt.peers[2].(*raft) - if sm.Term != 3 { - t.Errorf("peer 2 term: %d, want %d", sm.Term, 3) - } - - sm = nt.peers[3].(*raft) - if sm.Term != 1 { - t.Errorf("peer 3 term: %d, want %d", sm.Term, 1) - } - - // check state - // a == follower - // b == leader - // c == pre-candidate - sm = nt.peers[1].(*raft) - if sm.state != StateFollower { - t.Errorf("peer 1 state: %s, want %s", sm.state, StateFollower) - } - sm = nt.peers[2].(*raft) - if sm.state != StateLeader { - t.Errorf("peer 2 state: %s, want %s", sm.state, StateLeader) - } - sm = nt.peers[3].(*raft) - if sm.state != StatePreCandidate { - t.Errorf("peer 3 state: %s, want %s", sm.state, StatePreCandidate) - } - - sm.logger.Infof("going to bring back peer 3 and kill peer 2") - // recover the network then immediately isolate b which is currently - // the leader, this is to emulate the crash of b. - nt.recover() - nt.cut(2, 1) - nt.cut(2, 3) - - // call for election - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - // do we have a leader? - sma := nt.peers[1].(*raft) - smb := nt.peers[3].(*raft) - if sma.state != StateLeader && smb.state != StateLeader { - t.Errorf("no leader") - } -} - -// TestPreVoteWithSplitVote verifies that after split vote, cluster can complete -// election in next round. -func TestPreVoteWithSplitVote(t *testing.T) { - n1 := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - n2 := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - n3 := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - n1.becomeFollower(1, None) - n2.becomeFollower(1, None) - n3.becomeFollower(1, None) - - n1.preVote = true - n2.preVote = true - n3.preVote = true - - nt := newNetwork(n1, n2, n3) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - // simulate leader down. followers start split vote. - nt.isolate(1) - nt.send([]pb.Message{ - {From: 2, To: 2, Type: pb.MsgHup}, - {From: 3, To: 3, Type: pb.MsgHup}, - }...) - - // check whether the term values are expected - // n2.Term == 3 - // n3.Term == 3 - sm := nt.peers[2].(*raft) - if sm.Term != 3 { - t.Errorf("peer 2 term: %d, want %d", sm.Term, 3) - } - sm = nt.peers[3].(*raft) - if sm.Term != 3 { - t.Errorf("peer 3 term: %d, want %d", sm.Term, 3) - } - - // check state - // n2 == candidate - // n3 == candidate - sm = nt.peers[2].(*raft) - if sm.state != StateCandidate { - t.Errorf("peer 2 state: %s, want %s", sm.state, StateCandidate) - } - sm = nt.peers[3].(*raft) - if sm.state != StateCandidate { - t.Errorf("peer 3 state: %s, want %s", sm.state, StateCandidate) - } - - // node 2 election timeout first - nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup}) - - // check whether the term values are expected - // n2.Term == 4 - // n3.Term == 4 - sm = nt.peers[2].(*raft) - if sm.Term != 4 { - t.Errorf("peer 2 term: %d, want %d", sm.Term, 4) - } - sm = nt.peers[3].(*raft) - if sm.Term != 4 { - t.Errorf("peer 3 term: %d, want %d", sm.Term, 4) - } - - // check state - // n2 == leader - // n3 == follower - sm = nt.peers[2].(*raft) - if sm.state != StateLeader { - t.Errorf("peer 2 state: %s, want %s", sm.state, StateLeader) - } - sm = nt.peers[3].(*raft) - if sm.state != StateFollower { - t.Errorf("peer 3 state: %s, want %s", sm.state, StateFollower) - } -} - -// TestPreVoteWithCheckQuorum ensures that after a node become pre-candidate, -// it will checkQuorum correctly. -func TestPreVoteWithCheckQuorum(t *testing.T) { - n1 := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - n2 := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - n3 := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - n1.becomeFollower(1, None) - n2.becomeFollower(1, None) - n3.becomeFollower(1, None) - - n1.preVote = true - n2.preVote = true - n3.preVote = true - - n1.checkQuorum = true - n2.checkQuorum = true - n3.checkQuorum = true - - nt := newNetwork(n1, n2, n3) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - // isolate node 1. node 2 and node 3 have leader info - nt.isolate(1) - - // check state - sm := nt.peers[1].(*raft) - if sm.state != StateLeader { - t.Fatalf("peer 1 state: %s, want %s", sm.state, StateLeader) - } - sm = nt.peers[2].(*raft) - if sm.state != StateFollower { - t.Fatalf("peer 2 state: %s, want %s", sm.state, StateFollower) - } - sm = nt.peers[3].(*raft) - if sm.state != StateFollower { - t.Fatalf("peer 3 state: %s, want %s", sm.state, StateFollower) - } - - // node 2 will ignore node 3's PreVote - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup}) - - // Do we have a leader? - if n2.state != StateLeader && n3.state != StateFollower { - t.Errorf("no leader") - } -} - -// TestLearnerCampaign verifies that a learner won't campaign even if it receives -// a MsgHup or MsgTimeoutNow. -func TestLearnerCampaign(t *testing.T) { - n1 := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1))) - n1.applyConfChange(pb.ConfChange{NodeID: 2, Type: pb.ConfChangeAddLearnerNode}.AsV2()) - n2 := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1))) - n2.applyConfChange(pb.ConfChange{NodeID: 2, Type: pb.ConfChangeAddLearnerNode}.AsV2()) - nt := newNetwork(n1, n2) - nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup}) - - if !n2.isLearner { - t.Fatalf("failed to make n2 a learner") - } - - if n2.state != StateFollower { - t.Fatalf("n2 campaigned despite being learner") - } - - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - if n1.state != StateLeader || n1.lead != 1 { - t.Fatalf("n1 did not become leader") - } - - // NB: TransferLeader already checks that the recipient is not a learner, but - // the check could have happened by the time the recipient becomes a learner, - // in which case it will receive MsgTimeoutNow as in this test case and we - // verify that it's ignored. - nt.send(pb.Message{From: 1, To: 2, Type: pb.MsgTimeoutNow}) - - if n2.state != StateFollower { - t.Fatalf("n2 accepted leadership transfer despite being learner") - } -} - -// simulate rolling update a cluster for Pre-Vote. cluster has 3 nodes [n1, n2, n3]. -// n1 is leader with term 2 -// n2 is follower with term 2 -// n3 is partitioned, with term 4 and less log, state is candidate -func newPreVoteMigrationCluster(t *testing.T) *network { - n1 := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - n2 := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - n3 := newTestRaft(3, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - - n1.becomeFollower(1, None) - n2.becomeFollower(1, None) - n3.becomeFollower(1, None) - - n1.preVote = true - n2.preVote = true - // We intentionally do not enable PreVote for n3, this is done so in order - // to simulate a rolling restart process where it's possible to have a mixed - // version cluster with replicas with PreVote enabled, and replicas without. - - nt := newNetwork(n1, n2, n3) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - - // Cause a network partition to isolate n3. - nt.isolate(3) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}}) - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - // check state - // n1.state == StateLeader - // n2.state == StateFollower - // n3.state == StateCandidate - if n1.state != StateLeader { - t.Fatalf("node 1 state: %s, want %s", n1.state, StateLeader) - } - if n2.state != StateFollower { - t.Fatalf("node 2 state: %s, want %s", n2.state, StateFollower) - } - if n3.state != StateCandidate { - t.Fatalf("node 3 state: %s, want %s", n3.state, StateCandidate) - } - - // check term - // n1.Term == 2 - // n2.Term == 2 - // n3.Term == 4 - if n1.Term != 2 { - t.Fatalf("node 1 term: %d, want %d", n1.Term, 2) - } - if n2.Term != 2 { - t.Fatalf("node 2 term: %d, want %d", n2.Term, 2) - } - if n3.Term != 4 { - t.Fatalf("node 3 term: %d, want %d", n3.Term, 4) - } - - // Enable prevote on n3, then recover the network - n3.preVote = true - nt.recover() - - return nt -} - -func TestPreVoteMigrationCanCompleteElection(t *testing.T) { - nt := newPreVoteMigrationCluster(t) - - // n1 is leader with term 2 - // n2 is follower with term 2 - // n3 is pre-candidate with term 4, and less log - n2 := nt.peers[2].(*raft) - n3 := nt.peers[3].(*raft) - - // simulate leader down - nt.isolate(1) - - // Call for elections from both n2 and n3. - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup}) - - // check state - // n2.state == Follower - // n3.state == PreCandidate - if n2.state != StateFollower { - t.Errorf("node 2 state: %s, want %s", n2.state, StateFollower) - } - if n3.state != StatePreCandidate { - t.Errorf("node 3 state: %s, want %s", n3.state, StatePreCandidate) - } - - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup}) - - // Do we have a leader? - if n2.state != StateLeader && n3.state != StateFollower { - t.Errorf("no leader") - } -} - -func TestPreVoteMigrationWithFreeStuckPreCandidate(t *testing.T) { - nt := newPreVoteMigrationCluster(t) - - // n1 is leader with term 2 - // n2 is follower with term 2 - // n3 is pre-candidate with term 4, and less log - n1 := nt.peers[1].(*raft) - n2 := nt.peers[2].(*raft) - n3 := nt.peers[3].(*raft) - - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - if n1.state != StateLeader { - t.Errorf("node 1 state: %s, want %s", n1.state, StateLeader) - } - if n2.state != StateFollower { - t.Errorf("node 2 state: %s, want %s", n2.state, StateFollower) - } - if n3.state != StatePreCandidate { - t.Errorf("node 3 state: %s, want %s", n3.state, StatePreCandidate) - } - - // Pre-Vote again for safety - nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup}) - - if n1.state != StateLeader { - t.Errorf("node 1 state: %s, want %s", n1.state, StateLeader) - } - if n2.state != StateFollower { - t.Errorf("node 2 state: %s, want %s", n2.state, StateFollower) - } - if n3.state != StatePreCandidate { - t.Errorf("node 3 state: %s, want %s", n3.state, StatePreCandidate) - } - - nt.send(pb.Message{From: 1, To: 3, Type: pb.MsgHeartbeat, Term: n1.Term}) - - // Disrupt the leader so that the stuck peer is freed - if n1.state != StateFollower { - t.Errorf("state = %s, want %s", n1.state, StateFollower) - } - if n3.Term != n1.Term { - t.Errorf("term = %d, want %d", n3.Term, n1.Term) - } -} - -func testConfChangeCheckBeforeCampaign(t *testing.T, v2 bool) { - nt := newNetwork(nil, nil, nil) - n1 := nt.peers[1].(*raft) - n2 := nt.peers[2].(*raft) - nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) - if n1.state != StateLeader { - t.Errorf("node 1 state: %s, want %s", n1.state, StateLeader) - } - - // Begin to remove the third node. - cc := pb.ConfChange{ - Type: pb.ConfChangeRemoveNode, - NodeID: 2, - } - var ccData []byte - var err error - var ty pb.EntryType - if v2 { - ccv2 := cc.AsV2() - ccData, err = ccv2.Marshal() - ty = pb.EntryConfChangeV2 - } else { - ccData, err = cc.Marshal() - ty = pb.EntryConfChange - } - if err != nil { - t.Fatal(err) - } - nt.send(pb.Message{ - From: 1, - To: 1, - Type: pb.MsgProp, - Entries: []pb.Entry{ - {Type: ty, Data: ccData}, - }, - }) - - // Trigger campaign in node 2 - for i := 0; i < n2.randomizedElectionTimeout; i++ { - n2.tick() - } - // It's still follower because committed conf change is not applied. - if n2.state != StateFollower { - t.Errorf("node 2 state: %s, want %s", n2.state, StateFollower) - } - - // Transfer leadership to peer 2. - nt.send(pb.Message{From: 2, To: 1, Type: pb.MsgTransferLeader}) - if n1.state != StateLeader { - t.Errorf("node 1 state: %s, want %s", n1.state, StateLeader) - } - // It's still follower because committed conf change is not applied. - if n2.state != StateFollower { - t.Errorf("node 2 state: %s, want %s", n2.state, StateFollower) - } - // Abort transfer leader - for i := 0; i < n1.electionTimeout; i++ { - n1.tick() - } - - // Advance apply - nextEnts(n2, nt.storage[2]) - - // Transfer leadership to peer 2 again. - nt.send(pb.Message{From: 2, To: 1, Type: pb.MsgTransferLeader}) - if n1.state != StateFollower { - t.Errorf("node 1 state: %s, want %s", n1.state, StateFollower) - } - if n2.state != StateLeader { - t.Errorf("node 2 state: %s, want %s", n2.state, StateLeader) - } - - nextEnts(n1, nt.storage[1]) - // Trigger campaign in node 2 - for i := 0; i < n1.randomizedElectionTimeout; i++ { - n1.tick() - } - if n1.state != StateCandidate { - t.Errorf("node 1 state: %s, want %s", n1.state, StateCandidate) - } -} - -// TestConfChangeCheckBeforeCampaign tests if unapplied ConfChange is checked before campaign. -func TestConfChangeCheckBeforeCampaign(t *testing.T) { - testConfChangeCheckBeforeCampaign(t, false) -} - -// TestConfChangeV2CheckBeforeCampaign tests if unapplied ConfChangeV2 is checked before campaign. -func TestConfChangeV2CheckBeforeCampaign(t *testing.T) { - testConfChangeCheckBeforeCampaign(t, true) -} - -func TestFastLogRejection(t *testing.T) { - tests := []struct { - leaderLog []pb.Entry // Logs on the leader - followerLog []pb.Entry // Logs on the follower - rejectHintTerm uint64 // Expected term included in rejected MsgAppResp. - rejectHintIndex uint64 // Expected index included in rejected MsgAppResp. - nextAppendTerm uint64 // Expected term when leader appends after rejected. - nextAppendIndex uint64 // Expected index when leader appends after rejected. - }{ - // This case tests that leader can find the conflict index quickly. - // Firstly leader appends (type=MsgApp,index=7,logTerm=4, entries=...); - // After rejected leader appends (type=MsgApp,index=3,logTerm=2). - { - leaderLog: []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 2, Index: 2}, - {Term: 2, Index: 3}, - {Term: 4, Index: 4}, - {Term: 4, Index: 5}, - {Term: 4, Index: 6}, - {Term: 4, Index: 7}, - }, - followerLog: []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 2, Index: 2}, - {Term: 2, Index: 3}, - {Term: 3, Index: 4}, - {Term: 3, Index: 5}, - {Term: 3, Index: 6}, - {Term: 3, Index: 7}, - {Term: 3, Index: 8}, - {Term: 3, Index: 9}, - {Term: 3, Index: 10}, - {Term: 3, Index: 11}, - }, - rejectHintTerm: 3, - rejectHintIndex: 7, - nextAppendTerm: 2, - nextAppendIndex: 3, - }, - // This case tests that leader can find the conflict index quickly. - // Firstly leader appends (type=MsgApp,index=8,logTerm=5, entries=...); - // After rejected leader appends (type=MsgApp,index=4,logTerm=3). - { - leaderLog: []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 2, Index: 2}, - {Term: 2, Index: 3}, - {Term: 3, Index: 4}, - {Term: 4, Index: 5}, - {Term: 4, Index: 6}, - {Term: 4, Index: 7}, - {Term: 5, Index: 8}, - }, - followerLog: []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 2, Index: 2}, - {Term: 2, Index: 3}, - {Term: 3, Index: 4}, - {Term: 3, Index: 5}, - {Term: 3, Index: 6}, - {Term: 3, Index: 7}, - {Term: 3, Index: 8}, - {Term: 3, Index: 9}, - {Term: 3, Index: 10}, - {Term: 3, Index: 11}, - }, - rejectHintTerm: 3, - rejectHintIndex: 8, - nextAppendTerm: 3, - nextAppendIndex: 4, - }, - // This case tests that follower can find the conflict index quickly. - // Firstly leader appends (type=MsgApp,index=4,logTerm=1, entries=...); - // After rejected leader appends (type=MsgApp,index=1,logTerm=1). - { - leaderLog: []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 1, Index: 2}, - {Term: 1, Index: 3}, - {Term: 1, Index: 4}, - }, - followerLog: []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 2, Index: 2}, - {Term: 2, Index: 3}, - {Term: 4, Index: 4}, - }, - rejectHintTerm: 1, - rejectHintIndex: 1, - nextAppendTerm: 1, - nextAppendIndex: 1, - }, - // This case is similar to the previous case. However, this time, the - // leader has a longer uncommitted log tail than the follower. - // Firstly leader appends (type=MsgApp,index=6,logTerm=1, entries=...); - // After rejected leader appends (type=MsgApp,index=1,logTerm=1). - { - leaderLog: []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 1, Index: 2}, - {Term: 1, Index: 3}, - {Term: 1, Index: 4}, - {Term: 1, Index: 5}, - {Term: 1, Index: 6}, - }, - followerLog: []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 2, Index: 2}, - {Term: 2, Index: 3}, - {Term: 4, Index: 4}, - }, - rejectHintTerm: 1, - rejectHintIndex: 1, - nextAppendTerm: 1, - nextAppendIndex: 1, - }, - // This case is similar to the previous case. However, this time, the - // follower has a longer uncommitted log tail than the leader. - // Firstly leader appends (type=MsgApp,index=4,logTerm=1, entries=...); - // After rejected leader appends (type=MsgApp,index=1,logTerm=1). - { - leaderLog: []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 1, Index: 2}, - {Term: 1, Index: 3}, - {Term: 1, Index: 4}, - }, - followerLog: []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 2, Index: 2}, - {Term: 2, Index: 3}, - {Term: 4, Index: 4}, - {Term: 4, Index: 5}, - {Term: 4, Index: 6}, - }, - rejectHintTerm: 1, - rejectHintIndex: 1, - nextAppendTerm: 1, - nextAppendIndex: 1, - }, - // An normal case that there are no log conflicts. - // Firstly leader appends (type=MsgApp,index=5,logTerm=5, entries=...); - // After rejected leader appends (type=MsgApp,index=4,logTerm=4). - { - leaderLog: []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 1, Index: 2}, - {Term: 1, Index: 3}, - {Term: 4, Index: 4}, - {Term: 5, Index: 5}, - }, - followerLog: []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 1, Index: 2}, - {Term: 1, Index: 3}, - {Term: 4, Index: 4}, - }, - rejectHintTerm: 4, - rejectHintIndex: 4, - nextAppendTerm: 4, - nextAppendIndex: 4, - }, - // Test case from example comment in stepLeader (on leader). - { - leaderLog: []pb.Entry{ - {Term: 2, Index: 1}, - {Term: 5, Index: 2}, - {Term: 5, Index: 3}, - {Term: 5, Index: 4}, - {Term: 5, Index: 5}, - {Term: 5, Index: 6}, - {Term: 5, Index: 7}, - {Term: 5, Index: 8}, - {Term: 5, Index: 9}, - }, - followerLog: []pb.Entry{ - {Term: 2, Index: 1}, - {Term: 4, Index: 2}, - {Term: 4, Index: 3}, - {Term: 4, Index: 4}, - {Term: 4, Index: 5}, - {Term: 4, Index: 6}, - }, - rejectHintTerm: 4, - rejectHintIndex: 6, - nextAppendTerm: 2, - nextAppendIndex: 1, - }, - // Test case from example comment in handleAppendEntries (on follower). - { - leaderLog: []pb.Entry{ - {Term: 2, Index: 1}, - {Term: 2, Index: 2}, - {Term: 2, Index: 3}, - {Term: 2, Index: 4}, - {Term: 2, Index: 5}, - }, - followerLog: []pb.Entry{ - {Term: 2, Index: 1}, - {Term: 4, Index: 2}, - {Term: 4, Index: 3}, - {Term: 4, Index: 4}, - {Term: 4, Index: 5}, - {Term: 4, Index: 6}, - {Term: 4, Index: 7}, - {Term: 4, Index: 8}, - }, - nextAppendTerm: 2, - nextAppendIndex: 1, - rejectHintTerm: 2, - rejectHintIndex: 1, - }, - } - - for i, test := range tests { - t.Run("", func(t *testing.T) { - s1 := NewMemoryStorage() - s1.snapshot.Metadata.ConfState = pb.ConfState{Voters: []uint64{1, 2, 3}} - s1.Append(test.leaderLog) - s2 := NewMemoryStorage() - s2.snapshot.Metadata.ConfState = pb.ConfState{Voters: []uint64{1, 2, 3}} - s2.Append(test.followerLog) - - n1 := newTestRaft(1, 10, 1, s1) - n2 := newTestRaft(2, 10, 1, s2) - - n1.becomeCandidate() - n1.becomeLeader() - - n2.Step(pb.Message{From: 1, To: 1, Type: pb.MsgHeartbeat}) - - msgs := n2.readMessages() - if len(msgs) != 1 { - t.Errorf("can't read 1 message from peer 2") - } - if msgs[0].Type != pb.MsgHeartbeatResp { - t.Errorf("can't read heartbeat response from peer 2") - } - if n1.Step(msgs[0]) != nil { - t.Errorf("peer 1 step heartbeat response fail") - } - - msgs = n1.readMessages() - if len(msgs) != 1 { - t.Errorf("can't read 1 message from peer 1") - } - if msgs[0].Type != pb.MsgApp { - t.Errorf("can't read append from peer 1") - } - - if n2.Step(msgs[0]) != nil { - t.Errorf("peer 2 step append fail") - } - msgs = n2.readMessages() - if len(msgs) != 1 { - t.Errorf("can't read 1 message from peer 2") - } - if msgs[0].Type != pb.MsgAppResp { - t.Errorf("can't read append response from peer 2") - } - if !msgs[0].Reject { - t.Errorf("expected rejected append response from peer 2") - } - if msgs[0].LogTerm != test.rejectHintTerm { - t.Fatalf("#%d expected hint log term = %d, but got %d", i, test.rejectHintTerm, msgs[0].LogTerm) - } - if msgs[0].RejectHint != test.rejectHintIndex { - t.Fatalf("#%d expected hint index = %d, but got %d", i, test.rejectHintIndex, msgs[0].RejectHint) - } - - if n1.Step(msgs[0]) != nil { - t.Errorf("peer 1 step append fail") - } - msgs = n1.readMessages() - if msgs[0].LogTerm != test.nextAppendTerm { - t.Fatalf("#%d expected log term = %d, but got %d", i, test.nextAppendTerm, msgs[0].LogTerm) - } - if msgs[0].Index != test.nextAppendIndex { - t.Fatalf("#%d expected index = %d, but got %d", i, test.nextAppendIndex, msgs[0].Index) - } - }) - } -} - -func entsWithConfig(configFunc func(*Config), terms ...uint64) *raft { - storage := NewMemoryStorage() - for i, term := range terms { - storage.Append([]pb.Entry{{Index: uint64(i + 1), Term: term}}) - } - cfg := newTestConfig(1, 5, 1, storage) - if configFunc != nil { - configFunc(cfg) - } - sm := newRaft(cfg) - sm.reset(terms[len(terms)-1]) - return sm -} - -// votedWithConfig creates a raft state machine with Vote and Term set -// to the given value but no log entries (indicating that it voted in -// the given term but has not received any logs). -func votedWithConfig(configFunc func(*Config), vote, term uint64) *raft { - storage := NewMemoryStorage() - storage.SetHardState(pb.HardState{Vote: vote, Term: term}) - cfg := newTestConfig(1, 5, 1, storage) - if configFunc != nil { - configFunc(cfg) - } - sm := newRaft(cfg) - sm.reset(term) - return sm -} - -type network struct { - t *testing.T // optional - - peers map[uint64]stateMachine - storage map[uint64]*MemoryStorage - dropm map[connem]float64 - ignorem map[pb.MessageType]bool - - // msgHook is called for each message sent. It may inspect the - // message and return true to send it or false to drop it. - msgHook func(pb.Message) bool -} - -// newNetwork initializes a network from peers. -// A nil node will be replaced with a new *stateMachine. -// A *stateMachine will get its k, id. -// When using stateMachine, the address list is always [1, n]. -func newNetwork(peers ...stateMachine) *network { - return newNetworkWithConfig(nil, peers...) -} - -// newNetworkWithConfig is like newNetwork but calls the given func to -// modify the configuration of any state machines it creates. -func newNetworkWithConfig(configFunc func(*Config), peers ...stateMachine) *network { - size := len(peers) - peerAddrs := idsBySize(size) - - npeers := make(map[uint64]stateMachine, size) - nstorage := make(map[uint64]*MemoryStorage, size) - - for j, p := range peers { - id := peerAddrs[j] - switch v := p.(type) { - case nil: - nstorage[id] = newTestMemoryStorage(withPeers(peerAddrs...)) - cfg := newTestConfig(id, 10, 1, nstorage[id]) - if configFunc != nil { - configFunc(cfg) - } - sm := newRaft(cfg) - npeers[id] = sm - case *raft: - // TODO(tbg): this is all pretty confused. Clean this up. - learners := make(map[uint64]bool, len(v.prs.Learners)) - for i := range v.prs.Learners { - learners[i] = true - } - v.id = id - v.prs = tracker.MakeProgressTracker(v.prs.MaxInflight, v.prs.MaxInflightBytes) - if len(learners) > 0 { - v.prs.Learners = map[uint64]struct{}{} - } - for i := 0; i < size; i++ { - pr := &tracker.Progress{} - if _, ok := learners[peerAddrs[i]]; ok { - pr.IsLearner = true - v.prs.Learners[peerAddrs[i]] = struct{}{} - } else { - v.prs.Voters[0][peerAddrs[i]] = struct{}{} - } - v.prs.Progress[peerAddrs[i]] = pr - } - v.reset(v.Term) - npeers[id] = v - case *blackHole: - npeers[id] = v - default: - panic(fmt.Sprintf("unexpected state machine type: %T", p)) - } - } - return &network{ - peers: npeers, - storage: nstorage, - dropm: make(map[connem]float64), - ignorem: make(map[pb.MessageType]bool), - } -} - -func preVoteConfig(c *Config) { - c.PreVote = true -} - -func (nw *network) send(msgs ...pb.Message) { - for len(msgs) > 0 { - m := msgs[0] - p := nw.peers[m.To] - if nw.t != nil { - nw.t.Log(DescribeMessage(m, nil)) - } - p.Step(m) - msgs = append(msgs[1:], nw.filter(p.readMessages())...) - } -} - -func (nw *network) drop(from, to uint64, perc float64) { - nw.dropm[connem{from, to}] = perc -} - -func (nw *network) cut(one, other uint64) { - nw.drop(one, other, 2.0) // always drop - nw.drop(other, one, 2.0) // always drop -} - -func (nw *network) isolate(id uint64) { - for i := 0; i < len(nw.peers); i++ { - nid := uint64(i) + 1 - if nid != id { - nw.drop(id, nid, 1.0) // always drop - nw.drop(nid, id, 1.0) // always drop - } - } -} - -func (nw *network) ignore(t pb.MessageType) { - nw.ignorem[t] = true -} - -func (nw *network) recover() { - nw.dropm = make(map[connem]float64) - nw.ignorem = make(map[pb.MessageType]bool) -} - -func (nw *network) filter(msgs []pb.Message) []pb.Message { - var mm []pb.Message - for _, m := range msgs { - if nw.ignorem[m.Type] { - continue - } - switch m.Type { - case pb.MsgHup: - // hups never go over the network, so don't drop them but panic - panic("unexpected msgHup") - default: - perc := nw.dropm[connem{m.From, m.To}] - if n := rand.Float64(); n < perc { - continue - } - } - if nw.msgHook != nil { - if !nw.msgHook(m) { - continue - } - } - mm = append(mm, m) - } - return mm -} - -type connem struct { - from, to uint64 -} - -type blackHole struct{} - -func (blackHole) Step(pb.Message) error { return nil } -func (blackHole) readMessages() []pb.Message { return nil } - -var nopStepper = &blackHole{} - -func idsBySize(size int) []uint64 { - ids := make([]uint64, size) - for i := 0; i < size; i++ { - ids[i] = 1 + uint64(i) - } - return ids -} - -// setRandomizedElectionTimeout set up the value by caller instead of choosing -// by system, in some test scenario we need to fill in some expected value to -// ensure the certainty -func setRandomizedElectionTimeout(r *raft, v int) { - r.randomizedElectionTimeout = v -} - -func newTestConfig(id uint64, election, heartbeat int, storage Storage) *Config { - return &Config{ - ID: id, - ElectionTick: election, - HeartbeatTick: heartbeat, - Storage: storage, - MaxSizePerMsg: noLimit, - MaxInflightMsgs: 256, - } -} - -type testMemoryStorageOptions func(*MemoryStorage) - -func withPeers(peers ...uint64) testMemoryStorageOptions { - return func(ms *MemoryStorage) { - ms.snapshot.Metadata.ConfState.Voters = peers - } -} - -func withLearners(learners ...uint64) testMemoryStorageOptions { - return func(ms *MemoryStorage) { - ms.snapshot.Metadata.ConfState.Learners = learners - } -} - -func newTestMemoryStorage(opts ...testMemoryStorageOptions) *MemoryStorage { - ms := NewMemoryStorage() - for _, o := range opts { - o(ms) - } - return ms -} - -func newTestRaft(id uint64, election, heartbeat int, storage Storage) *raft { - return newRaft(newTestConfig(id, election, heartbeat, storage)) -} - -func newTestLearnerRaft(id uint64, election, heartbeat int, storage Storage) *raft { - cfg := newTestConfig(id, election, heartbeat, storage) - return newRaft(cfg) -} - -// newTestRawNode sets up a RawNode with the given peers. The configuration will -// not be reflected in the Storage. -func newTestRawNode(id uint64, election, heartbeat int, storage Storage) *RawNode { - cfg := newTestConfig(id, election, heartbeat, storage) - rn, err := NewRawNode(cfg) - if err != nil { - panic(err) - } - return rn -} diff --git a/raft/raftpb/confchange.go b/raft/raftpb/confchange.go deleted file mode 100644 index a3ddff62fd0d..000000000000 --- a/raft/raftpb/confchange.go +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raftpb - -import ( - "fmt" - "strconv" - "strings" - - "github.com/gogo/protobuf/proto" -) - -// ConfChangeI abstracts over ConfChangeV2 and (legacy) ConfChange to allow -// treating them in a unified manner. -type ConfChangeI interface { - AsV2() ConfChangeV2 - AsV1() (ConfChange, bool) -} - -// MarshalConfChange calls Marshal on the underlying ConfChange or ConfChangeV2 -// and returns the result along with the corresponding EntryType. -func MarshalConfChange(c ConfChangeI) (EntryType, []byte, error) { - var typ EntryType - var ccdata []byte - var err error - if c == nil { - // A nil data unmarshals into an empty ConfChangeV2 and has the benefit - // that appendEntry can never refuse it based on its size (which - // registers as zero). - typ = EntryConfChangeV2 - ccdata = nil - } else if ccv1, ok := c.AsV1(); ok { - typ = EntryConfChange - ccdata, err = ccv1.Marshal() - } else { - ccv2 := c.AsV2() - typ = EntryConfChangeV2 - ccdata, err = ccv2.Marshal() - } - return typ, ccdata, err -} - -// AsV2 returns a V2 configuration change carrying out the same operation. -func (c ConfChange) AsV2() ConfChangeV2 { - return ConfChangeV2{ - Changes: []ConfChangeSingle{{ - Type: c.Type, - NodeID: c.NodeID, - }}, - Context: c.Context, - } -} - -// AsV1 returns the ConfChange and true. -func (c ConfChange) AsV1() (ConfChange, bool) { - return c, true -} - -// AsV2 is the identity. -func (c ConfChangeV2) AsV2() ConfChangeV2 { return c } - -// AsV1 returns ConfChange{} and false. -func (c ConfChangeV2) AsV1() (ConfChange, bool) { return ConfChange{}, false } - -// EnterJoint returns two bools. The second bool is true if and only if this -// config change will use Joint Consensus, which is the case if it contains more -// than one change or if the use of Joint Consensus was requested explicitly. -// The first bool can only be true if second one is, and indicates whether the -// Joint State will be left automatically. -func (c ConfChangeV2) EnterJoint() (autoLeave bool, ok bool) { - // NB: in theory, more config changes could qualify for the "simple" - // protocol but it depends on the config on top of which the changes apply. - // For example, adding two learners is not OK if both nodes are part of the - // base config (i.e. two voters are turned into learners in the process of - // applying the conf change). In practice, these distinctions should not - // matter, so we keep it simple and use Joint Consensus liberally. - if c.Transition != ConfChangeTransitionAuto || len(c.Changes) > 1 { - // Use Joint Consensus. - var autoLeave bool - switch c.Transition { - case ConfChangeTransitionAuto: - autoLeave = true - case ConfChangeTransitionJointImplicit: - autoLeave = true - case ConfChangeTransitionJointExplicit: - default: - panic(fmt.Sprintf("unknown transition: %+v", c)) - } - return autoLeave, true - } - return false, false -} - -// LeaveJoint is true if the configuration change leaves a joint configuration. -// This is the case if the ConfChangeV2 is zero, with the possible exception of -// the Context field. -func (c ConfChangeV2) LeaveJoint() bool { - // NB: c is already a copy. - c.Context = nil - return proto.Equal(&c, &ConfChangeV2{}) -} - -// ConfChangesFromString parses a Space-delimited sequence of operations into a -// slice of ConfChangeSingle. The supported operations are: -// - vn: make n a voter, -// - ln: make n a learner, -// - rn: remove n, and -// - un: update n. -func ConfChangesFromString(s string) ([]ConfChangeSingle, error) { - var ccs []ConfChangeSingle - toks := strings.Split(strings.TrimSpace(s), " ") - if toks[0] == "" { - toks = nil - } - for _, tok := range toks { - if len(tok) < 2 { - return nil, fmt.Errorf("unknown token %s", tok) - } - var cc ConfChangeSingle - switch tok[0] { - case 'v': - cc.Type = ConfChangeAddNode - case 'l': - cc.Type = ConfChangeAddLearnerNode - case 'r': - cc.Type = ConfChangeRemoveNode - case 'u': - cc.Type = ConfChangeUpdateNode - default: - return nil, fmt.Errorf("unknown input: %s", tok) - } - id, err := strconv.ParseUint(tok[1:], 10, 64) - if err != nil { - return nil, err - } - cc.NodeID = id - ccs = append(ccs, cc) - } - return ccs, nil -} - -// ConfChangesToString is the inverse to ConfChangesFromString. -func ConfChangesToString(ccs []ConfChangeSingle) string { - var buf strings.Builder - for i, cc := range ccs { - if i > 0 { - buf.WriteByte(' ') - } - switch cc.Type { - case ConfChangeAddNode: - buf.WriteByte('v') - case ConfChangeAddLearnerNode: - buf.WriteByte('l') - case ConfChangeRemoveNode: - buf.WriteByte('r') - case ConfChangeUpdateNode: - buf.WriteByte('u') - default: - buf.WriteString("unknown") - } - fmt.Fprintf(&buf, "%d", cc.NodeID) - } - return buf.String() -} diff --git a/raft/raftpb/confstate.go b/raft/raftpb/confstate.go deleted file mode 100644 index 39b9dd70004d..000000000000 --- a/raft/raftpb/confstate.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raftpb - -import ( - "fmt" - "reflect" - "sort" -) - -// Equivalent returns a nil error if the inputs describe the same configuration. -// On mismatch, returns a descriptive error showing the differences. -func (cs ConfState) Equivalent(cs2 ConfState) error { - cs1 := cs - orig1, orig2 := cs1, cs2 - s := func(sl *[]uint64) { - *sl = append([]uint64(nil), *sl...) - sort.Slice(*sl, func(i, j int) bool { return (*sl)[i] < (*sl)[j] }) - } - - for _, cs := range []*ConfState{&cs1, &cs2} { - s(&cs.Voters) - s(&cs.Learners) - s(&cs.VotersOutgoing) - s(&cs.LearnersNext) - } - - if !reflect.DeepEqual(cs1, cs2) { - return fmt.Errorf("ConfStates not equivalent after sorting:\n%+#v\n%+#v\nInputs were:\n%+#v\n%+#v", cs1, cs2, orig1, orig2) - } - return nil -} diff --git a/raft/raftpb/confstate_test.go b/raft/raftpb/confstate_test.go deleted file mode 100644 index 712d71583171..000000000000 --- a/raft/raftpb/confstate_test.go +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raftpb - -import ( - "testing" -) - -func TestConfState_Equivalent(t *testing.T) { - type testCase struct { - cs, cs2 ConfState - ok bool - } - - testCases := []testCase{ - // Reordered voters and learners. - {ConfState{ - Voters: []uint64{1, 2, 3}, - Learners: []uint64{5, 4, 6}, - VotersOutgoing: []uint64{9, 8, 7}, - LearnersNext: []uint64{10, 20, 15}, - }, ConfState{ - Voters: []uint64{1, 2, 3}, - Learners: []uint64{4, 5, 6}, - VotersOutgoing: []uint64{7, 9, 8}, - LearnersNext: []uint64{20, 10, 15}, - }, true}, - // Not sensitive to nil vs empty slice. - {ConfState{Voters: []uint64{}}, ConfState{Voters: []uint64(nil)}, true}, - // Non-equivalent voters. - {ConfState{Voters: []uint64{1, 2, 3, 4}}, ConfState{Voters: []uint64{2, 1, 3}}, false}, - {ConfState{Voters: []uint64{1, 4, 3}}, ConfState{Voters: []uint64{2, 1, 3}}, false}, - // Non-equivalent learners. - {ConfState{Voters: []uint64{1, 2, 3, 4}}, ConfState{Voters: []uint64{2, 1, 3}}, false}, - // Sensitive to AutoLeave flag. - {ConfState{AutoLeave: true}, ConfState{}, false}, - } - - for _, tc := range testCases { - t.Run("", func(t *testing.T) { - if err := tc.cs.Equivalent(tc.cs2); (err == nil) != tc.ok { - t.Fatalf("wanted error: %t, got:\n%s", tc.ok, err) - } - }) - } -} diff --git a/raft/raftpb/raft.pb.go b/raft/raftpb/raft.pb.go deleted file mode 100644 index cdbfe3aa5631..000000000000 --- a/raft/raftpb/raft.pb.go +++ /dev/null @@ -1,2999 +0,0 @@ -// Code generated by protoc-gen-gogo. DO NOT EDIT. -// source: raft.proto - -package raftpb - -import ( - fmt "fmt" - io "io" - math "math" - math_bits "math/bits" - - _ "github.com/gogo/protobuf/gogoproto" - proto "github.com/golang/protobuf/proto" -) - -// Reference imports to suppress errors if they are not otherwise used. -var _ = proto.Marshal -var _ = fmt.Errorf -var _ = math.Inf - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the proto package it is being compiled against. -// A compilation error at this line likely means your copy of the -// proto package needs to be updated. -const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package - -type EntryType int32 - -const ( - EntryNormal EntryType = 0 - EntryConfChange EntryType = 1 - EntryConfChangeV2 EntryType = 2 -) - -var EntryType_name = map[int32]string{ - 0: "EntryNormal", - 1: "EntryConfChange", - 2: "EntryConfChangeV2", -} - -var EntryType_value = map[string]int32{ - "EntryNormal": 0, - "EntryConfChange": 1, - "EntryConfChangeV2": 2, -} - -func (x EntryType) Enum() *EntryType { - p := new(EntryType) - *p = x - return p -} - -func (x EntryType) String() string { - return proto.EnumName(EntryType_name, int32(x)) -} - -func (x *EntryType) UnmarshalJSON(data []byte) error { - value, err := proto.UnmarshalJSONEnum(EntryType_value, data, "EntryType") - if err != nil { - return err - } - *x = EntryType(value) - return nil -} - -func (EntryType) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{0} -} - -// For description of different message types, see: -// https://pkg.go.dev/go.etcd.io/etcd/raft/v3#hdr-MessageType -type MessageType int32 - -const ( - MsgHup MessageType = 0 - MsgBeat MessageType = 1 - MsgProp MessageType = 2 - MsgApp MessageType = 3 - MsgAppResp MessageType = 4 - MsgVote MessageType = 5 - MsgVoteResp MessageType = 6 - MsgSnap MessageType = 7 - MsgHeartbeat MessageType = 8 - MsgHeartbeatResp MessageType = 9 - MsgUnreachable MessageType = 10 - MsgSnapStatus MessageType = 11 - MsgCheckQuorum MessageType = 12 - MsgTransferLeader MessageType = 13 - MsgTimeoutNow MessageType = 14 - MsgReadIndex MessageType = 15 - MsgReadIndexResp MessageType = 16 - MsgPreVote MessageType = 17 - MsgPreVoteResp MessageType = 18 -) - -var MessageType_name = map[int32]string{ - 0: "MsgHup", - 1: "MsgBeat", - 2: "MsgProp", - 3: "MsgApp", - 4: "MsgAppResp", - 5: "MsgVote", - 6: "MsgVoteResp", - 7: "MsgSnap", - 8: "MsgHeartbeat", - 9: "MsgHeartbeatResp", - 10: "MsgUnreachable", - 11: "MsgSnapStatus", - 12: "MsgCheckQuorum", - 13: "MsgTransferLeader", - 14: "MsgTimeoutNow", - 15: "MsgReadIndex", - 16: "MsgReadIndexResp", - 17: "MsgPreVote", - 18: "MsgPreVoteResp", -} - -var MessageType_value = map[string]int32{ - "MsgHup": 0, - "MsgBeat": 1, - "MsgProp": 2, - "MsgApp": 3, - "MsgAppResp": 4, - "MsgVote": 5, - "MsgVoteResp": 6, - "MsgSnap": 7, - "MsgHeartbeat": 8, - "MsgHeartbeatResp": 9, - "MsgUnreachable": 10, - "MsgSnapStatus": 11, - "MsgCheckQuorum": 12, - "MsgTransferLeader": 13, - "MsgTimeoutNow": 14, - "MsgReadIndex": 15, - "MsgReadIndexResp": 16, - "MsgPreVote": 17, - "MsgPreVoteResp": 18, -} - -func (x MessageType) Enum() *MessageType { - p := new(MessageType) - *p = x - return p -} - -func (x MessageType) String() string { - return proto.EnumName(MessageType_name, int32(x)) -} - -func (x *MessageType) UnmarshalJSON(data []byte) error { - value, err := proto.UnmarshalJSONEnum(MessageType_value, data, "MessageType") - if err != nil { - return err - } - *x = MessageType(value) - return nil -} - -func (MessageType) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{1} -} - -// ConfChangeTransition specifies the behavior of a configuration change with -// respect to joint consensus. -type ConfChangeTransition int32 - -const ( - // Automatically use the simple protocol if possible, otherwise fall back - // to ConfChangeJointImplicit. Most applications will want to use this. - ConfChangeTransitionAuto ConfChangeTransition = 0 - // Use joint consensus unconditionally, and transition out of them - // automatically (by proposing a zero configuration change). - // - // This option is suitable for applications that want to minimize the time - // spent in the joint configuration and do not store the joint configuration - // in the state machine (outside of InitialState). - ConfChangeTransitionJointImplicit ConfChangeTransition = 1 - // Use joint consensus and remain in the joint configuration until the - // application proposes a no-op configuration change. This is suitable for - // applications that want to explicitly control the transitions, for example - // to use a custom payload (via the Context field). - ConfChangeTransitionJointExplicit ConfChangeTransition = 2 -) - -var ConfChangeTransition_name = map[int32]string{ - 0: "ConfChangeTransitionAuto", - 1: "ConfChangeTransitionJointImplicit", - 2: "ConfChangeTransitionJointExplicit", -} - -var ConfChangeTransition_value = map[string]int32{ - "ConfChangeTransitionAuto": 0, - "ConfChangeTransitionJointImplicit": 1, - "ConfChangeTransitionJointExplicit": 2, -} - -func (x ConfChangeTransition) Enum() *ConfChangeTransition { - p := new(ConfChangeTransition) - *p = x - return p -} - -func (x ConfChangeTransition) String() string { - return proto.EnumName(ConfChangeTransition_name, int32(x)) -} - -func (x *ConfChangeTransition) UnmarshalJSON(data []byte) error { - value, err := proto.UnmarshalJSONEnum(ConfChangeTransition_value, data, "ConfChangeTransition") - if err != nil { - return err - } - *x = ConfChangeTransition(value) - return nil -} - -func (ConfChangeTransition) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{2} -} - -type ConfChangeType int32 - -const ( - ConfChangeAddNode ConfChangeType = 0 - ConfChangeRemoveNode ConfChangeType = 1 - ConfChangeUpdateNode ConfChangeType = 2 - ConfChangeAddLearnerNode ConfChangeType = 3 -) - -var ConfChangeType_name = map[int32]string{ - 0: "ConfChangeAddNode", - 1: "ConfChangeRemoveNode", - 2: "ConfChangeUpdateNode", - 3: "ConfChangeAddLearnerNode", -} - -var ConfChangeType_value = map[string]int32{ - "ConfChangeAddNode": 0, - "ConfChangeRemoveNode": 1, - "ConfChangeUpdateNode": 2, - "ConfChangeAddLearnerNode": 3, -} - -func (x ConfChangeType) Enum() *ConfChangeType { - p := new(ConfChangeType) - *p = x - return p -} - -func (x ConfChangeType) String() string { - return proto.EnumName(ConfChangeType_name, int32(x)) -} - -func (x *ConfChangeType) UnmarshalJSON(data []byte) error { - value, err := proto.UnmarshalJSONEnum(ConfChangeType_value, data, "ConfChangeType") - if err != nil { - return err - } - *x = ConfChangeType(value) - return nil -} - -func (ConfChangeType) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{3} -} - -type Entry struct { - Term uint64 `protobuf:"varint,2,opt,name=Term" json:"Term"` - Index uint64 `protobuf:"varint,3,opt,name=Index" json:"Index"` - Type EntryType `protobuf:"varint,1,opt,name=Type,enum=raftpb.EntryType" json:"Type"` - Data []byte `protobuf:"bytes,4,opt,name=Data" json:"Data,omitempty"` -} - -func (m *Entry) Reset() { *m = Entry{} } -func (m *Entry) String() string { return proto.CompactTextString(m) } -func (*Entry) ProtoMessage() {} -func (*Entry) Descriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{0} -} -func (m *Entry) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *Entry) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_Entry.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *Entry) XXX_Merge(src proto.Message) { - xxx_messageInfo_Entry.Merge(m, src) -} -func (m *Entry) XXX_Size() int { - return m.Size() -} -func (m *Entry) XXX_DiscardUnknown() { - xxx_messageInfo_Entry.DiscardUnknown(m) -} - -var xxx_messageInfo_Entry proto.InternalMessageInfo - -type SnapshotMetadata struct { - ConfState ConfState `protobuf:"bytes,1,opt,name=conf_state,json=confState" json:"conf_state"` - Index uint64 `protobuf:"varint,2,opt,name=index" json:"index"` - Term uint64 `protobuf:"varint,3,opt,name=term" json:"term"` -} - -func (m *SnapshotMetadata) Reset() { *m = SnapshotMetadata{} } -func (m *SnapshotMetadata) String() string { return proto.CompactTextString(m) } -func (*SnapshotMetadata) ProtoMessage() {} -func (*SnapshotMetadata) Descriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{1} -} -func (m *SnapshotMetadata) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *SnapshotMetadata) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_SnapshotMetadata.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *SnapshotMetadata) XXX_Merge(src proto.Message) { - xxx_messageInfo_SnapshotMetadata.Merge(m, src) -} -func (m *SnapshotMetadata) XXX_Size() int { - return m.Size() -} -func (m *SnapshotMetadata) XXX_DiscardUnknown() { - xxx_messageInfo_SnapshotMetadata.DiscardUnknown(m) -} - -var xxx_messageInfo_SnapshotMetadata proto.InternalMessageInfo - -type Snapshot struct { - Data []byte `protobuf:"bytes,1,opt,name=data" json:"data,omitempty"` - Metadata SnapshotMetadata `protobuf:"bytes,2,opt,name=metadata" json:"metadata"` -} - -func (m *Snapshot) Reset() { *m = Snapshot{} } -func (m *Snapshot) String() string { return proto.CompactTextString(m) } -func (*Snapshot) ProtoMessage() {} -func (*Snapshot) Descriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{2} -} -func (m *Snapshot) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *Snapshot) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_Snapshot.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *Snapshot) XXX_Merge(src proto.Message) { - xxx_messageInfo_Snapshot.Merge(m, src) -} -func (m *Snapshot) XXX_Size() int { - return m.Size() -} -func (m *Snapshot) XXX_DiscardUnknown() { - xxx_messageInfo_Snapshot.DiscardUnknown(m) -} - -var xxx_messageInfo_Snapshot proto.InternalMessageInfo - -type Message struct { - Type MessageType `protobuf:"varint,1,opt,name=type,enum=raftpb.MessageType" json:"type"` - To uint64 `protobuf:"varint,2,opt,name=to" json:"to"` - From uint64 `protobuf:"varint,3,opt,name=from" json:"from"` - Term uint64 `protobuf:"varint,4,opt,name=term" json:"term"` - // logTerm is generally used for appending Raft logs to followers. For example, - // (type=MsgApp,index=100,logTerm=5) means leader appends entries starting at - // index=101, and the term of entry at index 100 is 5. - // (type=MsgAppResp,reject=true,index=100,logTerm=5) means follower rejects some - // entries from its leader as it already has an entry with term 5 at index 100. - LogTerm uint64 `protobuf:"varint,5,opt,name=logTerm" json:"logTerm"` - Index uint64 `protobuf:"varint,6,opt,name=index" json:"index"` - Entries []Entry `protobuf:"bytes,7,rep,name=entries" json:"entries"` - Commit uint64 `protobuf:"varint,8,opt,name=commit" json:"commit"` - // snapshot is non-nil and non-empty for MsgSnap messages and nil for all other - // message types. However, peer nodes running older binary versions may send a - // non-nil, empty value for the snapshot field of non-MsgSnap messages. Code - // should be prepared to handle such messages. - Snapshot *Snapshot `protobuf:"bytes,9,opt,name=snapshot" json:"snapshot,omitempty"` - Reject bool `protobuf:"varint,10,opt,name=reject" json:"reject"` - RejectHint uint64 `protobuf:"varint,11,opt,name=rejectHint" json:"rejectHint"` - Context []byte `protobuf:"bytes,12,opt,name=context" json:"context,omitempty"` -} - -func (m *Message) Reset() { *m = Message{} } -func (m *Message) String() string { return proto.CompactTextString(m) } -func (*Message) ProtoMessage() {} -func (*Message) Descriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{3} -} -func (m *Message) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *Message) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_Message.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *Message) XXX_Merge(src proto.Message) { - xxx_messageInfo_Message.Merge(m, src) -} -func (m *Message) XXX_Size() int { - return m.Size() -} -func (m *Message) XXX_DiscardUnknown() { - xxx_messageInfo_Message.DiscardUnknown(m) -} - -var xxx_messageInfo_Message proto.InternalMessageInfo - -type HardState struct { - Term uint64 `protobuf:"varint,1,opt,name=term" json:"term"` - Vote uint64 `protobuf:"varint,2,opt,name=vote" json:"vote"` - Commit uint64 `protobuf:"varint,3,opt,name=commit" json:"commit"` -} - -func (m *HardState) Reset() { *m = HardState{} } -func (m *HardState) String() string { return proto.CompactTextString(m) } -func (*HardState) ProtoMessage() {} -func (*HardState) Descriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{4} -} -func (m *HardState) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *HardState) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_HardState.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *HardState) XXX_Merge(src proto.Message) { - xxx_messageInfo_HardState.Merge(m, src) -} -func (m *HardState) XXX_Size() int { - return m.Size() -} -func (m *HardState) XXX_DiscardUnknown() { - xxx_messageInfo_HardState.DiscardUnknown(m) -} - -var xxx_messageInfo_HardState proto.InternalMessageInfo - -type ConfState struct { - // The voters in the incoming config. (If the configuration is not joint, - // then the outgoing config is empty). - Voters []uint64 `protobuf:"varint,1,rep,name=voters" json:"voters,omitempty"` - // The learners in the incoming config. - Learners []uint64 `protobuf:"varint,2,rep,name=learners" json:"learners,omitempty"` - // The voters in the outgoing config. - VotersOutgoing []uint64 `protobuf:"varint,3,rep,name=voters_outgoing,json=votersOutgoing" json:"voters_outgoing,omitempty"` - // The nodes that will become learners when the outgoing config is removed. - // These nodes are necessarily currently in nodes_joint (or they would have - // been added to the incoming config right away). - LearnersNext []uint64 `protobuf:"varint,4,rep,name=learners_next,json=learnersNext" json:"learners_next,omitempty"` - // If set, the config is joint and Raft will automatically transition into - // the final config (i.e. remove the outgoing config) when this is safe. - AutoLeave bool `protobuf:"varint,5,opt,name=auto_leave,json=autoLeave" json:"auto_leave"` -} - -func (m *ConfState) Reset() { *m = ConfState{} } -func (m *ConfState) String() string { return proto.CompactTextString(m) } -func (*ConfState) ProtoMessage() {} -func (*ConfState) Descriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{5} -} -func (m *ConfState) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *ConfState) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_ConfState.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *ConfState) XXX_Merge(src proto.Message) { - xxx_messageInfo_ConfState.Merge(m, src) -} -func (m *ConfState) XXX_Size() int { - return m.Size() -} -func (m *ConfState) XXX_DiscardUnknown() { - xxx_messageInfo_ConfState.DiscardUnknown(m) -} - -var xxx_messageInfo_ConfState proto.InternalMessageInfo - -type ConfChange struct { - Type ConfChangeType `protobuf:"varint,2,opt,name=type,enum=raftpb.ConfChangeType" json:"type"` - NodeID uint64 `protobuf:"varint,3,opt,name=node_id,json=nodeId" json:"node_id"` - Context []byte `protobuf:"bytes,4,opt,name=context" json:"context,omitempty"` - // NB: this is used only by etcd to thread through a unique identifier. - // Ideally it should really use the Context instead. No counterpart to - // this field exists in ConfChangeV2. - ID uint64 `protobuf:"varint,1,opt,name=id" json:"id"` -} - -func (m *ConfChange) Reset() { *m = ConfChange{} } -func (m *ConfChange) String() string { return proto.CompactTextString(m) } -func (*ConfChange) ProtoMessage() {} -func (*ConfChange) Descriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{6} -} -func (m *ConfChange) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *ConfChange) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_ConfChange.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *ConfChange) XXX_Merge(src proto.Message) { - xxx_messageInfo_ConfChange.Merge(m, src) -} -func (m *ConfChange) XXX_Size() int { - return m.Size() -} -func (m *ConfChange) XXX_DiscardUnknown() { - xxx_messageInfo_ConfChange.DiscardUnknown(m) -} - -var xxx_messageInfo_ConfChange proto.InternalMessageInfo - -// ConfChangeSingle is an individual configuration change operation. Multiple -// such operations can be carried out atomically via a ConfChangeV2. -type ConfChangeSingle struct { - Type ConfChangeType `protobuf:"varint,1,opt,name=type,enum=raftpb.ConfChangeType" json:"type"` - NodeID uint64 `protobuf:"varint,2,opt,name=node_id,json=nodeId" json:"node_id"` -} - -func (m *ConfChangeSingle) Reset() { *m = ConfChangeSingle{} } -func (m *ConfChangeSingle) String() string { return proto.CompactTextString(m) } -func (*ConfChangeSingle) ProtoMessage() {} -func (*ConfChangeSingle) Descriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{7} -} -func (m *ConfChangeSingle) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *ConfChangeSingle) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_ConfChangeSingle.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *ConfChangeSingle) XXX_Merge(src proto.Message) { - xxx_messageInfo_ConfChangeSingle.Merge(m, src) -} -func (m *ConfChangeSingle) XXX_Size() int { - return m.Size() -} -func (m *ConfChangeSingle) XXX_DiscardUnknown() { - xxx_messageInfo_ConfChangeSingle.DiscardUnknown(m) -} - -var xxx_messageInfo_ConfChangeSingle proto.InternalMessageInfo - -// ConfChangeV2 messages initiate configuration changes. They support both the -// simple "one at a time" membership change protocol and full Joint Consensus -// allowing for arbitrary changes in membership. -// -// The supplied context is treated as an opaque payload and can be used to -// attach an action on the state machine to the application of the config change -// proposal. Note that contrary to Joint Consensus as outlined in the Raft -// paper[1], configuration changes become active when they are *applied* to the -// state machine (not when they are appended to the log). -// -// The simple protocol can be used whenever only a single change is made. -// -// Non-simple changes require the use of Joint Consensus, for which two -// configuration changes are run. The first configuration change specifies the -// desired changes and transitions the Raft group into the joint configuration, -// in which quorum requires a majority of both the pre-changes and post-changes -// configuration. Joint Consensus avoids entering fragile intermediate -// configurations that could compromise survivability. For example, without the -// use of Joint Consensus and running across three availability zones with a -// replication factor of three, it is not possible to replace a voter without -// entering an intermediate configuration that does not survive the outage of -// one availability zone. -// -// The provided ConfChangeTransition specifies how (and whether) Joint Consensus -// is used, and assigns the task of leaving the joint configuration either to -// Raft or the application. Leaving the joint configuration is accomplished by -// proposing a ConfChangeV2 with only and optionally the Context field -// populated. -// -// For details on Raft membership changes, see: -// -// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf -type ConfChangeV2 struct { - Transition ConfChangeTransition `protobuf:"varint,1,opt,name=transition,enum=raftpb.ConfChangeTransition" json:"transition"` - Changes []ConfChangeSingle `protobuf:"bytes,2,rep,name=changes" json:"changes"` - Context []byte `protobuf:"bytes,3,opt,name=context" json:"context,omitempty"` -} - -func (m *ConfChangeV2) Reset() { *m = ConfChangeV2{} } -func (m *ConfChangeV2) String() string { return proto.CompactTextString(m) } -func (*ConfChangeV2) ProtoMessage() {} -func (*ConfChangeV2) Descriptor() ([]byte, []int) { - return fileDescriptor_b042552c306ae59b, []int{8} -} -func (m *ConfChangeV2) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *ConfChangeV2) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_ConfChangeV2.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *ConfChangeV2) XXX_Merge(src proto.Message) { - xxx_messageInfo_ConfChangeV2.Merge(m, src) -} -func (m *ConfChangeV2) XXX_Size() int { - return m.Size() -} -func (m *ConfChangeV2) XXX_DiscardUnknown() { - xxx_messageInfo_ConfChangeV2.DiscardUnknown(m) -} - -var xxx_messageInfo_ConfChangeV2 proto.InternalMessageInfo - -func init() { - proto.RegisterEnum("raftpb.EntryType", EntryType_name, EntryType_value) - proto.RegisterEnum("raftpb.MessageType", MessageType_name, MessageType_value) - proto.RegisterEnum("raftpb.ConfChangeTransition", ConfChangeTransition_name, ConfChangeTransition_value) - proto.RegisterEnum("raftpb.ConfChangeType", ConfChangeType_name, ConfChangeType_value) - proto.RegisterType((*Entry)(nil), "raftpb.Entry") - proto.RegisterType((*SnapshotMetadata)(nil), "raftpb.SnapshotMetadata") - proto.RegisterType((*Snapshot)(nil), "raftpb.Snapshot") - proto.RegisterType((*Message)(nil), "raftpb.Message") - proto.RegisterType((*HardState)(nil), "raftpb.HardState") - proto.RegisterType((*ConfState)(nil), "raftpb.ConfState") - proto.RegisterType((*ConfChange)(nil), "raftpb.ConfChange") - proto.RegisterType((*ConfChangeSingle)(nil), "raftpb.ConfChangeSingle") - proto.RegisterType((*ConfChangeV2)(nil), "raftpb.ConfChangeV2") -} - -func init() { proto.RegisterFile("raft.proto", fileDescriptor_b042552c306ae59b) } - -var fileDescriptor_b042552c306ae59b = []byte{ - // 1028 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x55, 0xcd, 0x6e, 0xdb, 0x46, - 0x17, 0xe5, 0x50, 0xb4, 0x7e, 0xae, 0x64, 0x79, 0x7c, 0xe3, 0x2f, 0x20, 0x0c, 0x43, 0xd1, 0xa7, - 0xa4, 0x88, 0xe0, 0x22, 0x6e, 0xa1, 0x45, 0x51, 0x74, 0xe7, 0x9f, 0x00, 0x76, 0x61, 0xb9, 0xa9, - 0xec, 0x78, 0x51, 0xa0, 0x30, 0xc6, 0xe2, 0x88, 0x66, 0x2b, 0x72, 0x08, 0x72, 0xe4, 0xda, 0x9b, - 0xa2, 0xe8, 0x13, 0x74, 0xd9, 0x4d, 0xb6, 0x7d, 0x80, 0x3e, 0x85, 0x97, 0x06, 0xba, 0xe9, 0x2a, - 0x68, 0xec, 0x17, 0x29, 0x66, 0x38, 0x94, 0x28, 0xd9, 0xc8, 0xa2, 0xbb, 0x99, 0x73, 0xcf, 0xdc, - 0x39, 0xe7, 0xde, 0xcb, 0x21, 0x40, 0xc2, 0x46, 0x72, 0x2b, 0x4e, 0x84, 0x14, 0x58, 0x56, 0xeb, - 0xf8, 0x7c, 0x7d, 0xcd, 0x17, 0xbe, 0xd0, 0xd0, 0x67, 0x6a, 0x95, 0x45, 0x3b, 0x3f, 0xc3, 0xd2, - 0xeb, 0x48, 0x26, 0xd7, 0xe8, 0x82, 0x73, 0xc2, 0x93, 0xd0, 0xb5, 0xdb, 0xa4, 0xeb, 0xec, 0x38, - 0x37, 0xef, 0x9f, 0x59, 0x03, 0x8d, 0xe0, 0x3a, 0x2c, 0x1d, 0x44, 0x1e, 0xbf, 0x72, 0x4b, 0x85, - 0x50, 0x06, 0xe1, 0xa7, 0xe0, 0x9c, 0x5c, 0xc7, 0xdc, 0x25, 0x6d, 0xd2, 0x6d, 0xf6, 0x56, 0xb7, - 0xb2, 0xbb, 0xb6, 0x74, 0x4a, 0x15, 0x98, 0x26, 0xba, 0x8e, 0x39, 0x22, 0x38, 0x7b, 0x4c, 0x32, - 0xd7, 0x69, 0x93, 0x6e, 0x63, 0xa0, 0xd7, 0x9d, 0x5f, 0x08, 0xd0, 0xe3, 0x88, 0xc5, 0xe9, 0x85, - 0x90, 0x7d, 0x2e, 0x99, 0xc7, 0x24, 0xc3, 0x2f, 0x00, 0x86, 0x22, 0x1a, 0x9d, 0xa5, 0x92, 0xc9, - 0x2c, 0x77, 0x7d, 0x96, 0x7b, 0x57, 0x44, 0xa3, 0x63, 0x15, 0x30, 0xb9, 0x6b, 0xc3, 0x1c, 0x50, - 0x4a, 0x03, 0xad, 0xb4, 0x68, 0x22, 0x83, 0x94, 0x3f, 0xa9, 0xfc, 0x15, 0x4d, 0x68, 0xa4, 0xf3, - 0x1d, 0x54, 0x73, 0x05, 0x4a, 0xa2, 0x52, 0xa0, 0xef, 0x6c, 0x0c, 0xf4, 0x1a, 0xbf, 0x82, 0x6a, - 0x68, 0x94, 0xe9, 0xc4, 0xf5, 0x9e, 0x9b, 0x6b, 0x59, 0x54, 0x6e, 0xf2, 0x4e, 0xf9, 0x9d, 0x77, - 0x25, 0xa8, 0xf4, 0x79, 0x9a, 0x32, 0x9f, 0xe3, 0x2b, 0x70, 0xe4, 0xac, 0x56, 0x4f, 0xf2, 0x1c, - 0x26, 0x5c, 0xac, 0x96, 0xa2, 0xe1, 0x1a, 0xd8, 0x52, 0xcc, 0x39, 0xb1, 0xa5, 0x50, 0x36, 0x46, - 0x89, 0x58, 0xb0, 0xa1, 0x90, 0xa9, 0x41, 0x67, 0xd1, 0x20, 0xb6, 0xa0, 0x32, 0x16, 0xbe, 0xee, - 0xee, 0x52, 0x21, 0x98, 0x83, 0xb3, 0xb2, 0x95, 0x1f, 0x96, 0xed, 0x15, 0x54, 0x78, 0x24, 0x93, - 0x80, 0xa7, 0x6e, 0xa5, 0x5d, 0xea, 0xd6, 0x7b, 0xcb, 0x73, 0x3d, 0xce, 0x53, 0x19, 0x0e, 0x6e, - 0x40, 0x79, 0x28, 0xc2, 0x30, 0x90, 0x6e, 0xb5, 0x90, 0xcb, 0x60, 0xd8, 0x83, 0x6a, 0x6a, 0x2a, - 0xe6, 0xd6, 0x74, 0x25, 0xe9, 0x62, 0x25, 0xf5, 0x09, 0x32, 0x98, 0xf2, 0x54, 0xc6, 0x84, 0xff, - 0xc0, 0x87, 0xd2, 0x85, 0x36, 0xe9, 0x56, 0xf3, 0x8c, 0x19, 0x86, 0x2f, 0x00, 0xb2, 0xd5, 0x7e, - 0x10, 0x49, 0xb7, 0x5e, 0xb8, 0xb3, 0x80, 0xa3, 0x0b, 0x95, 0xa1, 0x88, 0x24, 0xbf, 0x92, 0x6e, - 0x43, 0x37, 0x36, 0xdf, 0x76, 0xbe, 0x87, 0xda, 0x3e, 0x4b, 0xbc, 0x6c, 0x7c, 0xf2, 0x0a, 0x92, - 0x07, 0x15, 0x74, 0xc1, 0xb9, 0x14, 0x92, 0xcf, 0x7f, 0x1c, 0x0a, 0x29, 0x18, 0x2e, 0x3d, 0x34, - 0xdc, 0xf9, 0x93, 0x40, 0x6d, 0x3a, 0xaf, 0xf8, 0x14, 0xca, 0xea, 0x4c, 0x92, 0xba, 0xa4, 0x5d, - 0xea, 0x3a, 0x03, 0xb3, 0xc3, 0x75, 0xa8, 0x8e, 0x39, 0x4b, 0x22, 0x15, 0xb1, 0x75, 0x64, 0xba, - 0xc7, 0x97, 0xb0, 0x92, 0xb1, 0xce, 0xc4, 0x44, 0xfa, 0x22, 0x88, 0x7c, 0xb7, 0xa4, 0x29, 0xcd, - 0x0c, 0xfe, 0xc6, 0xa0, 0xf8, 0x1c, 0x96, 0xf3, 0x43, 0x67, 0x91, 0x72, 0xea, 0x68, 0x5a, 0x23, - 0x07, 0x8f, 0xf8, 0x95, 0xc4, 0xe7, 0x00, 0x6c, 0x22, 0xc5, 0xd9, 0x98, 0xb3, 0x4b, 0xae, 0x87, - 0x21, 0x2f, 0x68, 0x4d, 0xe1, 0x87, 0x0a, 0xee, 0xbc, 0x23, 0x00, 0x4a, 0xf4, 0xee, 0x05, 0x8b, - 0x7c, 0x8e, 0x9f, 0x9b, 0xb1, 0xb5, 0xf5, 0xd8, 0x3e, 0x2d, 0x7e, 0x86, 0x19, 0xe3, 0xc1, 0xe4, - 0xbe, 0x84, 0x4a, 0x24, 0x3c, 0x7e, 0x16, 0x78, 0xa6, 0x28, 0x4d, 0x15, 0xbc, 0x7b, 0xff, 0xac, - 0x7c, 0x24, 0x3c, 0x7e, 0xb0, 0x37, 0x28, 0xab, 0xf0, 0x81, 0x57, 0xec, 0x8b, 0x33, 0xd7, 0x17, - 0x5c, 0x07, 0x3b, 0xf0, 0x4c, 0x23, 0xc0, 0x9c, 0xb6, 0x0f, 0xf6, 0x06, 0x76, 0xe0, 0x75, 0x42, - 0xa0, 0xb3, 0xcb, 0x8f, 0x83, 0xc8, 0x1f, 0xcf, 0x44, 0x92, 0xff, 0x22, 0xd2, 0xfe, 0x98, 0xc8, - 0xce, 0x1f, 0x04, 0x1a, 0xb3, 0x3c, 0xa7, 0x3d, 0xdc, 0x01, 0x90, 0x09, 0x8b, 0xd2, 0x40, 0x06, - 0x22, 0x32, 0x37, 0x6e, 0x3c, 0x72, 0xe3, 0x94, 0x93, 0x4f, 0xe4, 0xec, 0x14, 0x7e, 0x09, 0x95, - 0xa1, 0x66, 0x65, 0x1d, 0x2f, 0x3c, 0x29, 0x8b, 0xd6, 0xf2, 0x2f, 0xcc, 0xd0, 0x8b, 0x35, 0x2b, - 0xcd, 0xd5, 0x6c, 0x73, 0x1f, 0x6a, 0xd3, 0x77, 0x17, 0x57, 0xa0, 0xae, 0x37, 0x47, 0x22, 0x09, - 0xd9, 0x98, 0x5a, 0xf8, 0x04, 0x56, 0x34, 0x30, 0xcb, 0x4f, 0x09, 0xfe, 0x0f, 0x56, 0x17, 0xc0, - 0xd3, 0x1e, 0xb5, 0x37, 0xff, 0xb2, 0xa1, 0x5e, 0x78, 0x96, 0x10, 0xa0, 0xdc, 0x4f, 0xfd, 0xfd, - 0x49, 0x4c, 0x2d, 0xac, 0x43, 0xa5, 0x9f, 0xfa, 0x3b, 0x9c, 0x49, 0x4a, 0xcc, 0xe6, 0x4d, 0x22, - 0x62, 0x6a, 0x1b, 0xd6, 0x76, 0x1c, 0xd3, 0x12, 0x36, 0x01, 0xb2, 0xf5, 0x80, 0xa7, 0x31, 0x75, - 0x0c, 0xf1, 0x54, 0x48, 0x4e, 0x97, 0x94, 0x36, 0xb3, 0xd1, 0xd1, 0xb2, 0x89, 0xaa, 0x27, 0x80, - 0x56, 0x90, 0x42, 0x43, 0x5d, 0xc6, 0x59, 0x22, 0xcf, 0xd5, 0x2d, 0x55, 0x5c, 0x03, 0x5a, 0x44, - 0xf4, 0xa1, 0x1a, 0x22, 0x34, 0xfb, 0xa9, 0xff, 0x36, 0x4a, 0x38, 0x1b, 0x5e, 0xb0, 0xf3, 0x31, - 0xa7, 0x80, 0xab, 0xb0, 0x6c, 0x12, 0xa9, 0x2f, 0x6e, 0x92, 0xd2, 0xba, 0xa1, 0xed, 0x5e, 0xf0, - 0xe1, 0x8f, 0xdf, 0x4e, 0x44, 0x32, 0x09, 0x69, 0x43, 0xd9, 0xee, 0xa7, 0xbe, 0x6e, 0xd0, 0x88, - 0x27, 0x87, 0x9c, 0x79, 0x3c, 0xa1, 0xcb, 0xe6, 0xf4, 0x49, 0x10, 0x72, 0x31, 0x91, 0x47, 0xe2, - 0x27, 0xda, 0x34, 0x62, 0x06, 0x9c, 0x79, 0xfa, 0x7f, 0x47, 0x57, 0x8c, 0x98, 0x29, 0xa2, 0xc5, - 0x50, 0xe3, 0xf7, 0x4d, 0xc2, 0xb5, 0xc5, 0x55, 0x73, 0xab, 0xd9, 0x6b, 0x0e, 0x6e, 0xfe, 0x4a, - 0x60, 0xed, 0xb1, 0xf1, 0xc0, 0x0d, 0x70, 0x1f, 0xc3, 0xb7, 0x27, 0x52, 0x50, 0x0b, 0x3f, 0x81, - 0xff, 0x3f, 0x16, 0xfd, 0x5a, 0x04, 0x91, 0x3c, 0x08, 0xe3, 0x71, 0x30, 0x0c, 0x54, 0x2b, 0x3e, - 0x46, 0x7b, 0x7d, 0x65, 0x68, 0xf6, 0xe6, 0x35, 0x34, 0xe7, 0x3f, 0x0a, 0x55, 0x8c, 0x19, 0xb2, - 0xed, 0x79, 0x6a, 0xfc, 0xa9, 0x85, 0x6e, 0x51, 0xec, 0x80, 0x87, 0xe2, 0x92, 0xeb, 0x08, 0x99, - 0x8f, 0xbc, 0x8d, 0x3d, 0x26, 0xb3, 0x88, 0x3d, 0x6f, 0x64, 0xdb, 0xf3, 0x0e, 0xb3, 0xb7, 0x47, - 0x47, 0x4b, 0x3b, 0x2f, 0x6e, 0x3e, 0xb4, 0xac, 0xdb, 0x0f, 0x2d, 0xeb, 0xe6, 0xae, 0x45, 0x6e, - 0xef, 0x5a, 0xe4, 0x9f, 0xbb, 0x16, 0xf9, 0xed, 0xbe, 0x65, 0xfd, 0x7e, 0xdf, 0xb2, 0x6e, 0xef, - 0x5b, 0xd6, 0xdf, 0xf7, 0x2d, 0xeb, 0xdf, 0x00, 0x00, 0x00, 0xff, 0xff, 0xc9, 0x35, 0x94, 0xd2, - 0xbb, 0x08, 0x00, 0x00, -} - -func (m *Entry) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *Entry) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *Entry) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - if m.Data != nil { - i -= len(m.Data) - copy(dAtA[i:], m.Data) - i = encodeVarintRaft(dAtA, i, uint64(len(m.Data))) - i-- - dAtA[i] = 0x22 - } - i = encodeVarintRaft(dAtA, i, uint64(m.Index)) - i-- - dAtA[i] = 0x18 - i = encodeVarintRaft(dAtA, i, uint64(m.Term)) - i-- - dAtA[i] = 0x10 - i = encodeVarintRaft(dAtA, i, uint64(m.Type)) - i-- - dAtA[i] = 0x8 - return len(dAtA) - i, nil -} - -func (m *SnapshotMetadata) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *SnapshotMetadata) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *SnapshotMetadata) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - i = encodeVarintRaft(dAtA, i, uint64(m.Term)) - i-- - dAtA[i] = 0x18 - i = encodeVarintRaft(dAtA, i, uint64(m.Index)) - i-- - dAtA[i] = 0x10 - { - size, err := m.ConfState.MarshalToSizedBuffer(dAtA[:i]) - if err != nil { - return 0, err - } - i -= size - i = encodeVarintRaft(dAtA, i, uint64(size)) - } - i-- - dAtA[i] = 0xa - return len(dAtA) - i, nil -} - -func (m *Snapshot) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *Snapshot) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *Snapshot) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - { - size, err := m.Metadata.MarshalToSizedBuffer(dAtA[:i]) - if err != nil { - return 0, err - } - i -= size - i = encodeVarintRaft(dAtA, i, uint64(size)) - } - i-- - dAtA[i] = 0x12 - if m.Data != nil { - i -= len(m.Data) - copy(dAtA[i:], m.Data) - i = encodeVarintRaft(dAtA, i, uint64(len(m.Data))) - i-- - dAtA[i] = 0xa - } - return len(dAtA) - i, nil -} - -func (m *Message) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *Message) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *Message) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - if m.Context != nil { - i -= len(m.Context) - copy(dAtA[i:], m.Context) - i = encodeVarintRaft(dAtA, i, uint64(len(m.Context))) - i-- - dAtA[i] = 0x62 - } - i = encodeVarintRaft(dAtA, i, uint64(m.RejectHint)) - i-- - dAtA[i] = 0x58 - i-- - if m.Reject { - dAtA[i] = 1 - } else { - dAtA[i] = 0 - } - i-- - dAtA[i] = 0x50 - if m.Snapshot != nil { - { - size, err := m.Snapshot.MarshalToSizedBuffer(dAtA[:i]) - if err != nil { - return 0, err - } - i -= size - i = encodeVarintRaft(dAtA, i, uint64(size)) - } - i-- - dAtA[i] = 0x4a - } - i = encodeVarintRaft(dAtA, i, uint64(m.Commit)) - i-- - dAtA[i] = 0x40 - if len(m.Entries) > 0 { - for iNdEx := len(m.Entries) - 1; iNdEx >= 0; iNdEx-- { - { - size, err := m.Entries[iNdEx].MarshalToSizedBuffer(dAtA[:i]) - if err != nil { - return 0, err - } - i -= size - i = encodeVarintRaft(dAtA, i, uint64(size)) - } - i-- - dAtA[i] = 0x3a - } - } - i = encodeVarintRaft(dAtA, i, uint64(m.Index)) - i-- - dAtA[i] = 0x30 - i = encodeVarintRaft(dAtA, i, uint64(m.LogTerm)) - i-- - dAtA[i] = 0x28 - i = encodeVarintRaft(dAtA, i, uint64(m.Term)) - i-- - dAtA[i] = 0x20 - i = encodeVarintRaft(dAtA, i, uint64(m.From)) - i-- - dAtA[i] = 0x18 - i = encodeVarintRaft(dAtA, i, uint64(m.To)) - i-- - dAtA[i] = 0x10 - i = encodeVarintRaft(dAtA, i, uint64(m.Type)) - i-- - dAtA[i] = 0x8 - return len(dAtA) - i, nil -} - -func (m *HardState) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *HardState) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *HardState) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - i = encodeVarintRaft(dAtA, i, uint64(m.Commit)) - i-- - dAtA[i] = 0x18 - i = encodeVarintRaft(dAtA, i, uint64(m.Vote)) - i-- - dAtA[i] = 0x10 - i = encodeVarintRaft(dAtA, i, uint64(m.Term)) - i-- - dAtA[i] = 0x8 - return len(dAtA) - i, nil -} - -func (m *ConfState) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *ConfState) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *ConfState) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - i-- - if m.AutoLeave { - dAtA[i] = 1 - } else { - dAtA[i] = 0 - } - i-- - dAtA[i] = 0x28 - if len(m.LearnersNext) > 0 { - for iNdEx := len(m.LearnersNext) - 1; iNdEx >= 0; iNdEx-- { - i = encodeVarintRaft(dAtA, i, uint64(m.LearnersNext[iNdEx])) - i-- - dAtA[i] = 0x20 - } - } - if len(m.VotersOutgoing) > 0 { - for iNdEx := len(m.VotersOutgoing) - 1; iNdEx >= 0; iNdEx-- { - i = encodeVarintRaft(dAtA, i, uint64(m.VotersOutgoing[iNdEx])) - i-- - dAtA[i] = 0x18 - } - } - if len(m.Learners) > 0 { - for iNdEx := len(m.Learners) - 1; iNdEx >= 0; iNdEx-- { - i = encodeVarintRaft(dAtA, i, uint64(m.Learners[iNdEx])) - i-- - dAtA[i] = 0x10 - } - } - if len(m.Voters) > 0 { - for iNdEx := len(m.Voters) - 1; iNdEx >= 0; iNdEx-- { - i = encodeVarintRaft(dAtA, i, uint64(m.Voters[iNdEx])) - i-- - dAtA[i] = 0x8 - } - } - return len(dAtA) - i, nil -} - -func (m *ConfChange) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *ConfChange) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *ConfChange) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - if m.Context != nil { - i -= len(m.Context) - copy(dAtA[i:], m.Context) - i = encodeVarintRaft(dAtA, i, uint64(len(m.Context))) - i-- - dAtA[i] = 0x22 - } - i = encodeVarintRaft(dAtA, i, uint64(m.NodeID)) - i-- - dAtA[i] = 0x18 - i = encodeVarintRaft(dAtA, i, uint64(m.Type)) - i-- - dAtA[i] = 0x10 - i = encodeVarintRaft(dAtA, i, uint64(m.ID)) - i-- - dAtA[i] = 0x8 - return len(dAtA) - i, nil -} - -func (m *ConfChangeSingle) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *ConfChangeSingle) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *ConfChangeSingle) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - i = encodeVarintRaft(dAtA, i, uint64(m.NodeID)) - i-- - dAtA[i] = 0x10 - i = encodeVarintRaft(dAtA, i, uint64(m.Type)) - i-- - dAtA[i] = 0x8 - return len(dAtA) - i, nil -} - -func (m *ConfChangeV2) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *ConfChangeV2) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *ConfChangeV2) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - if m.Context != nil { - i -= len(m.Context) - copy(dAtA[i:], m.Context) - i = encodeVarintRaft(dAtA, i, uint64(len(m.Context))) - i-- - dAtA[i] = 0x1a - } - if len(m.Changes) > 0 { - for iNdEx := len(m.Changes) - 1; iNdEx >= 0; iNdEx-- { - { - size, err := m.Changes[iNdEx].MarshalToSizedBuffer(dAtA[:i]) - if err != nil { - return 0, err - } - i -= size - i = encodeVarintRaft(dAtA, i, uint64(size)) - } - i-- - dAtA[i] = 0x12 - } - } - i = encodeVarintRaft(dAtA, i, uint64(m.Transition)) - i-- - dAtA[i] = 0x8 - return len(dAtA) - i, nil -} - -func encodeVarintRaft(dAtA []byte, offset int, v uint64) int { - offset -= sovRaft(v) - base := offset - for v >= 1<<7 { - dAtA[offset] = uint8(v&0x7f | 0x80) - v >>= 7 - offset++ - } - dAtA[offset] = uint8(v) - return base -} -func (m *Entry) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - n += 1 + sovRaft(uint64(m.Type)) - n += 1 + sovRaft(uint64(m.Term)) - n += 1 + sovRaft(uint64(m.Index)) - if m.Data != nil { - l = len(m.Data) - n += 1 + l + sovRaft(uint64(l)) - } - return n -} - -func (m *SnapshotMetadata) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - l = m.ConfState.Size() - n += 1 + l + sovRaft(uint64(l)) - n += 1 + sovRaft(uint64(m.Index)) - n += 1 + sovRaft(uint64(m.Term)) - return n -} - -func (m *Snapshot) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - if m.Data != nil { - l = len(m.Data) - n += 1 + l + sovRaft(uint64(l)) - } - l = m.Metadata.Size() - n += 1 + l + sovRaft(uint64(l)) - return n -} - -func (m *Message) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - n += 1 + sovRaft(uint64(m.Type)) - n += 1 + sovRaft(uint64(m.To)) - n += 1 + sovRaft(uint64(m.From)) - n += 1 + sovRaft(uint64(m.Term)) - n += 1 + sovRaft(uint64(m.LogTerm)) - n += 1 + sovRaft(uint64(m.Index)) - if len(m.Entries) > 0 { - for _, e := range m.Entries { - l = e.Size() - n += 1 + l + sovRaft(uint64(l)) - } - } - n += 1 + sovRaft(uint64(m.Commit)) - if m.Snapshot != nil { - l = m.Snapshot.Size() - n += 1 + l + sovRaft(uint64(l)) - } - n += 2 - n += 1 + sovRaft(uint64(m.RejectHint)) - if m.Context != nil { - l = len(m.Context) - n += 1 + l + sovRaft(uint64(l)) - } - return n -} - -func (m *HardState) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - n += 1 + sovRaft(uint64(m.Term)) - n += 1 + sovRaft(uint64(m.Vote)) - n += 1 + sovRaft(uint64(m.Commit)) - return n -} - -func (m *ConfState) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - if len(m.Voters) > 0 { - for _, e := range m.Voters { - n += 1 + sovRaft(uint64(e)) - } - } - if len(m.Learners) > 0 { - for _, e := range m.Learners { - n += 1 + sovRaft(uint64(e)) - } - } - if len(m.VotersOutgoing) > 0 { - for _, e := range m.VotersOutgoing { - n += 1 + sovRaft(uint64(e)) - } - } - if len(m.LearnersNext) > 0 { - for _, e := range m.LearnersNext { - n += 1 + sovRaft(uint64(e)) - } - } - n += 2 - return n -} - -func (m *ConfChange) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - n += 1 + sovRaft(uint64(m.ID)) - n += 1 + sovRaft(uint64(m.Type)) - n += 1 + sovRaft(uint64(m.NodeID)) - if m.Context != nil { - l = len(m.Context) - n += 1 + l + sovRaft(uint64(l)) - } - return n -} - -func (m *ConfChangeSingle) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - n += 1 + sovRaft(uint64(m.Type)) - n += 1 + sovRaft(uint64(m.NodeID)) - return n -} - -func (m *ConfChangeV2) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - n += 1 + sovRaft(uint64(m.Transition)) - if len(m.Changes) > 0 { - for _, e := range m.Changes { - l = e.Size() - n += 1 + l + sovRaft(uint64(l)) - } - } - if m.Context != nil { - l = len(m.Context) - n += 1 + l + sovRaft(uint64(l)) - } - return n -} - -func sovRaft(x uint64) (n int) { - return (math_bits.Len64(x|1) + 6) / 7 -} -func sozRaft(x uint64) (n int) { - return sovRaft(uint64((x << 1) ^ uint64((int64(x) >> 63)))) -} -func (m *Entry) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: Entry: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: Entry: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Type", wireType) - } - m.Type = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Type |= EntryType(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 2: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) - } - m.Term = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Term |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 3: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Index", wireType) - } - m.Index = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Index |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 4: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Data", wireType) - } - var byteLen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - byteLen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if byteLen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + byteLen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.Data = append(m.Data[:0], dAtA[iNdEx:postIndex]...) - if m.Data == nil { - m.Data = []byte{} - } - iNdEx = postIndex - default: - iNdEx = preIndex - skippy, err := skipRaft(dAtA[iNdEx:]) - if err != nil { - return err - } - if (skippy < 0) || (iNdEx+skippy) < 0 { - return ErrInvalidLengthRaft - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func (m *SnapshotMetadata) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: SnapshotMetadata: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: SnapshotMetadata: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field ConfState", wireType) - } - var msglen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - msglen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if msglen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + msglen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - if err := m.ConfState.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { - return err - } - iNdEx = postIndex - case 2: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Index", wireType) - } - m.Index = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Index |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 3: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) - } - m.Term = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Term |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - default: - iNdEx = preIndex - skippy, err := skipRaft(dAtA[iNdEx:]) - if err != nil { - return err - } - if (skippy < 0) || (iNdEx+skippy) < 0 { - return ErrInvalidLengthRaft - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func (m *Snapshot) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: Snapshot: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: Snapshot: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Data", wireType) - } - var byteLen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - byteLen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if byteLen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + byteLen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.Data = append(m.Data[:0], dAtA[iNdEx:postIndex]...) - if m.Data == nil { - m.Data = []byte{} - } - iNdEx = postIndex - case 2: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Metadata", wireType) - } - var msglen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - msglen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if msglen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + msglen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - if err := m.Metadata.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { - return err - } - iNdEx = postIndex - default: - iNdEx = preIndex - skippy, err := skipRaft(dAtA[iNdEx:]) - if err != nil { - return err - } - if (skippy < 0) || (iNdEx+skippy) < 0 { - return ErrInvalidLengthRaft - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func (m *Message) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: Message: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: Message: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Type", wireType) - } - m.Type = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Type |= MessageType(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 2: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field To", wireType) - } - m.To = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.To |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 3: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field From", wireType) - } - m.From = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.From |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 4: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) - } - m.Term = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Term |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 5: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field LogTerm", wireType) - } - m.LogTerm = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.LogTerm |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 6: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Index", wireType) - } - m.Index = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Index |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 7: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Entries", wireType) - } - var msglen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - msglen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if msglen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + msglen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.Entries = append(m.Entries, Entry{}) - if err := m.Entries[len(m.Entries)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { - return err - } - iNdEx = postIndex - case 8: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Commit", wireType) - } - m.Commit = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Commit |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 9: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Snapshot", wireType) - } - var msglen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - msglen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if msglen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + msglen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - if m.Snapshot == nil { - m.Snapshot = &Snapshot{} - } - if err := m.Snapshot.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { - return err - } - iNdEx = postIndex - case 10: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Reject", wireType) - } - var v int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - v |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - m.Reject = bool(v != 0) - case 11: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field RejectHint", wireType) - } - m.RejectHint = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.RejectHint |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 12: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Context", wireType) - } - var byteLen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - byteLen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if byteLen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + byteLen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.Context = append(m.Context[:0], dAtA[iNdEx:postIndex]...) - if m.Context == nil { - m.Context = []byte{} - } - iNdEx = postIndex - default: - iNdEx = preIndex - skippy, err := skipRaft(dAtA[iNdEx:]) - if err != nil { - return err - } - if (skippy < 0) || (iNdEx+skippy) < 0 { - return ErrInvalidLengthRaft - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func (m *HardState) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: HardState: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: HardState: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) - } - m.Term = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Term |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 2: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Vote", wireType) - } - m.Vote = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Vote |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 3: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Commit", wireType) - } - m.Commit = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Commit |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - default: - iNdEx = preIndex - skippy, err := skipRaft(dAtA[iNdEx:]) - if err != nil { - return err - } - if (skippy < 0) || (iNdEx+skippy) < 0 { - return ErrInvalidLengthRaft - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func (m *ConfState) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: ConfState: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: ConfState: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType == 0 { - var v uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - v |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - m.Voters = append(m.Voters, v) - } else if wireType == 2 { - var packedLen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - packedLen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if packedLen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + packedLen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - var elementCount int - var count int - for _, integer := range dAtA[iNdEx:postIndex] { - if integer < 128 { - count++ - } - } - elementCount = count - if elementCount != 0 && len(m.Voters) == 0 { - m.Voters = make([]uint64, 0, elementCount) - } - for iNdEx < postIndex { - var v uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - v |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - m.Voters = append(m.Voters, v) - } - } else { - return fmt.Errorf("proto: wrong wireType = %d for field Voters", wireType) - } - case 2: - if wireType == 0 { - var v uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - v |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - m.Learners = append(m.Learners, v) - } else if wireType == 2 { - var packedLen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - packedLen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if packedLen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + packedLen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - var elementCount int - var count int - for _, integer := range dAtA[iNdEx:postIndex] { - if integer < 128 { - count++ - } - } - elementCount = count - if elementCount != 0 && len(m.Learners) == 0 { - m.Learners = make([]uint64, 0, elementCount) - } - for iNdEx < postIndex { - var v uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - v |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - m.Learners = append(m.Learners, v) - } - } else { - return fmt.Errorf("proto: wrong wireType = %d for field Learners", wireType) - } - case 3: - if wireType == 0 { - var v uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - v |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - m.VotersOutgoing = append(m.VotersOutgoing, v) - } else if wireType == 2 { - var packedLen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - packedLen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if packedLen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + packedLen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - var elementCount int - var count int - for _, integer := range dAtA[iNdEx:postIndex] { - if integer < 128 { - count++ - } - } - elementCount = count - if elementCount != 0 && len(m.VotersOutgoing) == 0 { - m.VotersOutgoing = make([]uint64, 0, elementCount) - } - for iNdEx < postIndex { - var v uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - v |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - m.VotersOutgoing = append(m.VotersOutgoing, v) - } - } else { - return fmt.Errorf("proto: wrong wireType = %d for field VotersOutgoing", wireType) - } - case 4: - if wireType == 0 { - var v uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - v |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - m.LearnersNext = append(m.LearnersNext, v) - } else if wireType == 2 { - var packedLen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - packedLen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if packedLen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + packedLen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - var elementCount int - var count int - for _, integer := range dAtA[iNdEx:postIndex] { - if integer < 128 { - count++ - } - } - elementCount = count - if elementCount != 0 && len(m.LearnersNext) == 0 { - m.LearnersNext = make([]uint64, 0, elementCount) - } - for iNdEx < postIndex { - var v uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - v |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - m.LearnersNext = append(m.LearnersNext, v) - } - } else { - return fmt.Errorf("proto: wrong wireType = %d for field LearnersNext", wireType) - } - case 5: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field AutoLeave", wireType) - } - var v int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - v |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - m.AutoLeave = bool(v != 0) - default: - iNdEx = preIndex - skippy, err := skipRaft(dAtA[iNdEx:]) - if err != nil { - return err - } - if (skippy < 0) || (iNdEx+skippy) < 0 { - return ErrInvalidLengthRaft - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func (m *ConfChange) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: ConfChange: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: ConfChange: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) - } - m.ID = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.ID |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 2: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Type", wireType) - } - m.Type = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Type |= ConfChangeType(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 3: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field NodeID", wireType) - } - m.NodeID = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.NodeID |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 4: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Context", wireType) - } - var byteLen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - byteLen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if byteLen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + byteLen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.Context = append(m.Context[:0], dAtA[iNdEx:postIndex]...) - if m.Context == nil { - m.Context = []byte{} - } - iNdEx = postIndex - default: - iNdEx = preIndex - skippy, err := skipRaft(dAtA[iNdEx:]) - if err != nil { - return err - } - if (skippy < 0) || (iNdEx+skippy) < 0 { - return ErrInvalidLengthRaft - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func (m *ConfChangeSingle) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: ConfChangeSingle: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: ConfChangeSingle: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Type", wireType) - } - m.Type = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Type |= ConfChangeType(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 2: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field NodeID", wireType) - } - m.NodeID = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.NodeID |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - default: - iNdEx = preIndex - skippy, err := skipRaft(dAtA[iNdEx:]) - if err != nil { - return err - } - if (skippy < 0) || (iNdEx+skippy) < 0 { - return ErrInvalidLengthRaft - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func (m *ConfChangeV2) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: ConfChangeV2: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: ConfChangeV2: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Transition", wireType) - } - m.Transition = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Transition |= ConfChangeTransition(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 2: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Changes", wireType) - } - var msglen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - msglen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if msglen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + msglen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.Changes = append(m.Changes, ConfChangeSingle{}) - if err := m.Changes[len(m.Changes)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { - return err - } - iNdEx = postIndex - case 3: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Context", wireType) - } - var byteLen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowRaft - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - byteLen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if byteLen < 0 { - return ErrInvalidLengthRaft - } - postIndex := iNdEx + byteLen - if postIndex < 0 { - return ErrInvalidLengthRaft - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.Context = append(m.Context[:0], dAtA[iNdEx:postIndex]...) - if m.Context == nil { - m.Context = []byte{} - } - iNdEx = postIndex - default: - iNdEx = preIndex - skippy, err := skipRaft(dAtA[iNdEx:]) - if err != nil { - return err - } - if (skippy < 0) || (iNdEx+skippy) < 0 { - return ErrInvalidLengthRaft - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func skipRaft(dAtA []byte) (n int, err error) { - l := len(dAtA) - iNdEx := 0 - depth := 0 - for iNdEx < l { - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowRaft - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= (uint64(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - wireType := int(wire & 0x7) - switch wireType { - case 0: - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowRaft - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - iNdEx++ - if dAtA[iNdEx-1] < 0x80 { - break - } - } - case 1: - iNdEx += 8 - case 2: - var length int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowRaft - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - length |= (int(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - if length < 0 { - return 0, ErrInvalidLengthRaft - } - iNdEx += length - case 3: - depth++ - case 4: - if depth == 0 { - return 0, ErrUnexpectedEndOfGroupRaft - } - depth-- - case 5: - iNdEx += 4 - default: - return 0, fmt.Errorf("proto: illegal wireType %d", wireType) - } - if iNdEx < 0 { - return 0, ErrInvalidLengthRaft - } - if depth == 0 { - return iNdEx, nil - } - } - return 0, io.ErrUnexpectedEOF -} - -var ( - ErrInvalidLengthRaft = fmt.Errorf("proto: negative length found during unmarshaling") - ErrIntOverflowRaft = fmt.Errorf("proto: integer overflow") - ErrUnexpectedEndOfGroupRaft = fmt.Errorf("proto: unexpected end of group") -) diff --git a/raft/raftpb/raft.proto b/raft/raftpb/raft.proto deleted file mode 100644 index 3b058ba6281b..000000000000 --- a/raft/raftpb/raft.proto +++ /dev/null @@ -1,197 +0,0 @@ -syntax = "proto2"; -package raftpb; - -import "gogoproto/gogo.proto"; - -option (gogoproto.marshaler_all) = true; -option (gogoproto.sizer_all) = true; -option (gogoproto.unmarshaler_all) = true; -option (gogoproto.goproto_getters_all) = false; -option (gogoproto.goproto_enum_prefix_all) = false; -option (gogoproto.goproto_unkeyed_all) = false; -option (gogoproto.goproto_unrecognized_all) = false; -option (gogoproto.goproto_sizecache_all) = false; - -enum EntryType { - - EntryNormal = 0; - EntryConfChange = 1; // corresponds to pb.ConfChange - EntryConfChangeV2 = 2; // corresponds to pb.ConfChangeV2 -} - -message Entry { - - optional uint64 Term = 2 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations - optional uint64 Index = 3 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations - optional EntryType Type = 1 [(gogoproto.nullable) = false]; - optional bytes Data = 4; -} - -message SnapshotMetadata { - optional ConfState conf_state = 1 [(gogoproto.nullable) = false]; - optional uint64 index = 2 [(gogoproto.nullable) = false]; - optional uint64 term = 3 [(gogoproto.nullable) = false]; -} - -message Snapshot { - optional bytes data = 1; - optional SnapshotMetadata metadata = 2 [(gogoproto.nullable) = false]; -} - -// For description of different message types, see: -// https://pkg.go.dev/go.etcd.io/etcd/raft/v3#hdr-MessageType -enum MessageType { - MsgHup = 0; - MsgBeat = 1; - MsgProp = 2; - MsgApp = 3; - MsgAppResp = 4; - MsgVote = 5; - MsgVoteResp = 6; - MsgSnap = 7; - MsgHeartbeat = 8; - MsgHeartbeatResp = 9; - MsgUnreachable = 10; - MsgSnapStatus = 11; - MsgCheckQuorum = 12; - MsgTransferLeader = 13; - MsgTimeoutNow = 14; - MsgReadIndex = 15; - MsgReadIndexResp = 16; - MsgPreVote = 17; - MsgPreVoteResp = 18; - // NOTE: when adding new message types, remember to update the isLocalMsg and - // isResponseMsg arrays in raft/util.go and update the corresponding tests in - // raft/util_test.go. -} - -message Message { - optional MessageType type = 1 [(gogoproto.nullable) = false]; - optional uint64 to = 2 [(gogoproto.nullable) = false]; - optional uint64 from = 3 [(gogoproto.nullable) = false]; - optional uint64 term = 4 [(gogoproto.nullable) = false]; - // logTerm is generally used for appending Raft logs to followers. For example, - // (type=MsgApp,index=100,logTerm=5) means leader appends entries starting at - // index=101, and the term of entry at index 100 is 5. - // (type=MsgAppResp,reject=true,index=100,logTerm=5) means follower rejects some - // entries from its leader as it already has an entry with term 5 at index 100. - optional uint64 logTerm = 5 [(gogoproto.nullable) = false]; - optional uint64 index = 6 [(gogoproto.nullable) = false]; - repeated Entry entries = 7 [(gogoproto.nullable) = false]; - optional uint64 commit = 8 [(gogoproto.nullable) = false]; - // snapshot is non-nil and non-empty for MsgSnap messages and nil for all other - // message types. However, peer nodes running older binary versions may send a - // non-nil, empty value for the snapshot field of non-MsgSnap messages. Code - // should be prepared to handle such messages. - optional Snapshot snapshot = 9 [(gogoproto.nullable) = true]; - optional bool reject = 10 [(gogoproto.nullable) = false]; - optional uint64 rejectHint = 11 [(gogoproto.nullable) = false]; - optional bytes context = 12; -} - -message HardState { - optional uint64 term = 1 [(gogoproto.nullable) = false]; - optional uint64 vote = 2 [(gogoproto.nullable) = false]; - optional uint64 commit = 3 [(gogoproto.nullable) = false]; -} - -// ConfChangeTransition specifies the behavior of a configuration change with -// respect to joint consensus. -enum ConfChangeTransition { - // Automatically use the simple protocol if possible, otherwise fall back - // to ConfChangeJointImplicit. Most applications will want to use this. - ConfChangeTransitionAuto = 0; - // Use joint consensus unconditionally, and transition out of them - // automatically (by proposing a zero configuration change). - // - // This option is suitable for applications that want to minimize the time - // spent in the joint configuration and do not store the joint configuration - // in the state machine (outside of InitialState). - ConfChangeTransitionJointImplicit = 1; - // Use joint consensus and remain in the joint configuration until the - // application proposes a no-op configuration change. This is suitable for - // applications that want to explicitly control the transitions, for example - // to use a custom payload (via the Context field). - ConfChangeTransitionJointExplicit = 2; -} - -message ConfState { - // The voters in the incoming config. (If the configuration is not joint, - // then the outgoing config is empty). - repeated uint64 voters = 1; - // The learners in the incoming config. - repeated uint64 learners = 2; - // The voters in the outgoing config. - repeated uint64 voters_outgoing = 3; - // The nodes that will become learners when the outgoing config is removed. - // These nodes are necessarily currently in nodes_joint (or they would have - // been added to the incoming config right away). - repeated uint64 learners_next = 4; - // If set, the config is joint and Raft will automatically transition into - // the final config (i.e. remove the outgoing config) when this is safe. - optional bool auto_leave = 5 [(gogoproto.nullable) = false]; -} - -enum ConfChangeType { - ConfChangeAddNode = 0; - ConfChangeRemoveNode = 1; - ConfChangeUpdateNode = 2; - ConfChangeAddLearnerNode = 3; -} - -message ConfChange { - optional ConfChangeType type = 2 [(gogoproto.nullable) = false]; - optional uint64 node_id = 3 [(gogoproto.nullable) = false, (gogoproto.customname) = "NodeID"]; - optional bytes context = 4; - - // NB: this is used only by etcd to thread through a unique identifier. - // Ideally it should really use the Context instead. No counterpart to - // this field exists in ConfChangeV2. - optional uint64 id = 1 [(gogoproto.nullable) = false, (gogoproto.customname) = "ID"]; -} - -// ConfChangeSingle is an individual configuration change operation. Multiple -// such operations can be carried out atomically via a ConfChangeV2. -message ConfChangeSingle { - optional ConfChangeType type = 1 [(gogoproto.nullable) = false]; - optional uint64 node_id = 2 [(gogoproto.nullable) = false, (gogoproto.customname) = "NodeID"]; -} - -// ConfChangeV2 messages initiate configuration changes. They support both the -// simple "one at a time" membership change protocol and full Joint Consensus -// allowing for arbitrary changes in membership. -// -// The supplied context is treated as an opaque payload and can be used to -// attach an action on the state machine to the application of the config change -// proposal. Note that contrary to Joint Consensus as outlined in the Raft -// paper[1], configuration changes become active when they are *applied* to the -// state machine (not when they are appended to the log). -// -// The simple protocol can be used whenever only a single change is made. -// -// Non-simple changes require the use of Joint Consensus, for which two -// configuration changes are run. The first configuration change specifies the -// desired changes and transitions the Raft group into the joint configuration, -// in which quorum requires a majority of both the pre-changes and post-changes -// configuration. Joint Consensus avoids entering fragile intermediate -// configurations that could compromise survivability. For example, without the -// use of Joint Consensus and running across three availability zones with a -// replication factor of three, it is not possible to replace a voter without -// entering an intermediate configuration that does not survive the outage of -// one availability zone. -// -// The provided ConfChangeTransition specifies how (and whether) Joint Consensus -// is used, and assigns the task of leaving the joint configuration either to -// Raft or the application. Leaving the joint configuration is accomplished by -// proposing a ConfChangeV2 with only and optionally the Context field -// populated. -// -// For details on Raft membership changes, see: -// -// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf -message ConfChangeV2 { - - optional ConfChangeTransition transition = 1 [(gogoproto.nullable) = false]; - repeated ConfChangeSingle changes = 2 [(gogoproto.nullable) = false]; - optional bytes context = 3; -} diff --git a/raft/raftpb/raft_test.go b/raft/raftpb/raft_test.go deleted file mode 100644 index 7057446c223d..000000000000 --- a/raft/raftpb/raft_test.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2021 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raftpb - -import ( - "math/bits" - "testing" - "unsafe" -) - -func TestProtoMemorySizes(t *testing.T) { - assert := func(size, exp uintptr, name string) { - t.Helper() - if size != exp { - t.Errorf("expected size of %s proto to be %d bytes, found %d bytes", name, exp, size) - } - } - - if64Bit := func(yes, no uintptr) uintptr { - if bits.UintSize == 64 { - return yes - } - return no - } - - var e Entry - assert(unsafe.Sizeof(e), if64Bit(48, 32), "Entry") - - var sm SnapshotMetadata - assert(unsafe.Sizeof(sm), if64Bit(120, 68), "SnapshotMetadata") - - var s Snapshot - assert(unsafe.Sizeof(s), if64Bit(144, 80), "Snapshot") - - var m Message - assert(unsafe.Sizeof(m), if64Bit(128, 92), "Message") - - var hs HardState - assert(unsafe.Sizeof(hs), 24, "HardState") - - var cs ConfState - assert(unsafe.Sizeof(cs), if64Bit(104, 52), "ConfState") - - var cc ConfChange - assert(unsafe.Sizeof(cc), if64Bit(48, 32), "ConfChange") - - var ccs ConfChangeSingle - assert(unsafe.Sizeof(ccs), if64Bit(16, 12), "ConfChangeSingle") - - var ccv2 ConfChangeV2 - assert(unsafe.Sizeof(ccv2), if64Bit(56, 28), "ConfChangeV2") -} diff --git a/raft/rafttest/doc.go b/raft/rafttest/doc.go deleted file mode 100644 index bba9a1a38681..000000000000 --- a/raft/rafttest/doc.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package rafttest provides functional tests for etcd's raft implementation. -package rafttest diff --git a/raft/rafttest/interaction_env.go b/raft/rafttest/interaction_env.go deleted file mode 100644 index 75c223837bf4..000000000000 --- a/raft/rafttest/interaction_env.go +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "bufio" - "fmt" - "math" - "strings" - - "go.etcd.io/etcd/raft/v3" - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -// InteractionOpts groups the options for an InteractionEnv. -type InteractionOpts struct { - OnConfig func(*raft.Config) -} - -// Node is a member of a raft group tested via an InteractionEnv. -type Node struct { - *raft.RawNode - Storage - - Config *raft.Config - History []pb.Snapshot -} - -// InteractionEnv facilitates testing of complex interactions between the -// members of a raft group. -type InteractionEnv struct { - Options *InteractionOpts - Nodes []Node - Messages []pb.Message // in-flight messages - - Output *RedirectLogger -} - -// NewInteractionEnv initializes an InteractionEnv. opts may be nil. -func NewInteractionEnv(opts *InteractionOpts) *InteractionEnv { - if opts == nil { - opts = &InteractionOpts{} - } - return &InteractionEnv{ - Options: opts, - Output: &RedirectLogger{ - Builder: &strings.Builder{}, - }, - } -} - -func (env *InteractionEnv) withIndent(f func()) { - orig := env.Output.Builder - env.Output.Builder = &strings.Builder{} - f() - - scanner := bufio.NewScanner(strings.NewReader(env.Output.Builder.String())) - for scanner.Scan() { - orig.WriteString(" " + scanner.Text() + "\n") - } - env.Output.Builder = orig -} - -// Storage is the interface used by InteractionEnv. It is comprised of raft's -// Storage interface plus access to operations that maintain the log and drive -// the Ready handling loop. -type Storage interface { - raft.Storage - SetHardState(state pb.HardState) error - ApplySnapshot(pb.Snapshot) error - Compact(newFirstIndex uint64) error - Append([]pb.Entry) error -} - -// raftConfigStub sets up a raft.Config stub with reasonable testing defaults. -// In particular, no limits are set. It is not a complete config: ID and Storage -// must be set for each node using the stub as a template. -func raftConfigStub() raft.Config { - return raft.Config{ - ElectionTick: 3, - HeartbeatTick: 1, - MaxSizePerMsg: math.MaxUint64, - MaxInflightMsgs: math.MaxInt32, - } -} - -func defaultEntryFormatter(b []byte) string { - return fmt.Sprintf("%q", b) -} diff --git a/raft/rafttest/interaction_env_handler.go b/raft/rafttest/interaction_env_handler.go deleted file mode 100644 index 73e706feff76..000000000000 --- a/raft/rafttest/interaction_env_handler.go +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "fmt" - "strconv" - "testing" - - "github.com/cockroachdb/datadriven" -) - -// Handle is the entrypoint for data-driven interaction testing. Commands and -// parameters are parsed from the supplied TestData. Errors during data parsing -// are reported via the supplied *testing.T; errors from the raft nodes and the -// storage engine are reported to the output buffer. -func (env *InteractionEnv) Handle(t *testing.T, d datadriven.TestData) string { - env.Output.Reset() - var err error - switch d.Cmd { - case "_breakpoint": - // This is a helper case to attach a debugger to when a problem needs - // to be investigated in a longer test file. In such a case, add the - // following stanza immediately before the interesting behavior starts: - // - // _breakpoint: - // ---- - // ok - // - // and set a breakpoint on the `case` above. - case "add-nodes": - // Example: - // - // add-nodes voters=(1 2 3) learners=(4 5) index=2 content=foo - err = env.handleAddNodes(t, d) - case "campaign": - // Example: - // - // campaign - err = env.handleCampaign(t, d) - case "compact": - // Example: - // - // compact - err = env.handleCompact(t, d) - case "deliver-msgs": - // Deliver the messages for a given recipient. - // - // Example: - // - // deliver-msgs - err = env.handleDeliverMsgs(t, d) - case "process-ready": - // Example: - // - // process-ready 3 - err = env.handleProcessReady(t, d) - case "log-level": - // Set the log level. NONE disables all output, including from the test - // harness (except errors). - // - // Example: - // - // log-level WARN - err = env.handleLogLevel(t, d) - case "raft-log": - // Print the Raft log. - // - // Example: - // - // raft-log 3 - err = env.handleRaftLog(t, d) - case "raft-state": - // Print Raft state of all nodes (whether the node is leading, - // following, etc.). The information for node n is based on - // n's view. - err = env.handleRaftState() - case "stabilize": - // Deliver messages to and run process-ready on the set of IDs until - // no more work is to be done. If no ids are given, all nodes are used. - // - // Example: - // - // stabilize 1 4 - err = env.handleStabilize(t, d) - case "status": - // Print Raft status. - // - // Example: - // - // status 5 - err = env.handleStatus(t, d) - case "tick-heartbeat": - // Tick a heartbeat interval. - // - // Example: - // - // tick-heartbeat 3 - err = env.handleTickHeartbeat(t, d) - case "transfer-leadership": - // Transfer the Raft leader. - // - // Example: - // - // transfer-leadership from=1 to=4 - err = env.handleTransferLeadership(t, d) - case "propose": - // Propose an entry. - // - // Example: - // - // propose 1 foo - err = env.handlePropose(t, d) - case "propose-conf-change": - // Propose a configuration change, or transition out of a previously - // proposed joint configuration change that requested explicit - // transitions. When adding nodes, this command can be used to - // logically add nodes to the configuration, but add-nodes is needed - // to "create" the nodes. - // - // propose-conf-change node_id [v1=] [transition=] - // command string - // See ConfChangesFromString for command string format. - // Arguments are: - // node_id - the node proposing the configuration change. - // v1 - make one change at a time, false by default. - // transition - "auto" (the default), "explicit" or "implicit". - // Example: - // - // propose-conf-change 1 transition=explicit - // v1 v3 l4 r5 - // - // Example: - // - // propose-conf-change 2 v1=true - // v5 - err = env.handleProposeConfChange(t, d) - default: - err = fmt.Errorf("unknown command") - } - if err != nil { - env.Output.WriteString(err.Error()) - } - // NB: the highest log level suppresses all output, including that of the - // handlers. This comes in useful during setup which can be chatty. - // However, errors are always logged. - if env.Output.Len() == 0 { - return "ok" - } - if env.Output.Lvl == len(lvlNames)-1 { - if err != nil { - return err.Error() - } - return "ok (quiet)" - } - return env.Output.String() -} - -func firstAsInt(t *testing.T, d datadriven.TestData) int { - t.Helper() - n, err := strconv.Atoi(d.CmdArgs[0].Key) - if err != nil { - t.Fatal(err) - } - return n -} - -func firstAsNodeIdx(t *testing.T, d datadriven.TestData) int { - t.Helper() - n := firstAsInt(t, d) - return n - 1 -} - -func nodeIdxs(t *testing.T, d datadriven.TestData) []int { - var ints []int - for i := 0; i < len(d.CmdArgs); i++ { - if len(d.CmdArgs[i].Vals) != 0 { - continue - } - n, err := strconv.Atoi(d.CmdArgs[i].Key) - if err != nil { - t.Fatal(err) - } - ints = append(ints, n-1) - } - return ints -} diff --git a/raft/rafttest/interaction_env_handler_add_nodes.go b/raft/rafttest/interaction_env_handler_add_nodes.go deleted file mode 100644 index b72c96505217..000000000000 --- a/raft/rafttest/interaction_env_handler_add_nodes.go +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "errors" - "fmt" - "reflect" - "testing" - - "github.com/cockroachdb/datadriven" - "go.etcd.io/etcd/raft/v3" - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -func (env *InteractionEnv) handleAddNodes(t *testing.T, d datadriven.TestData) error { - n := firstAsInt(t, d) - var snap pb.Snapshot - cfg := raftConfigStub() - for _, arg := range d.CmdArgs[1:] { - for i := range arg.Vals { - switch arg.Key { - case "voters": - var id uint64 - arg.Scan(t, i, &id) - snap.Metadata.ConfState.Voters = append(snap.Metadata.ConfState.Voters, id) - case "learners": - var id uint64 - arg.Scan(t, i, &id) - snap.Metadata.ConfState.Learners = append(snap.Metadata.ConfState.Learners, id) - case "inflight": - arg.Scan(t, i, &cfg.MaxInflightMsgs) - case "index": - arg.Scan(t, i, &snap.Metadata.Index) - cfg.Applied = snap.Metadata.Index - case "content": - arg.Scan(t, i, &snap.Data) - } - } - } - return env.AddNodes(n, cfg, snap) -} - -type snapOverrideStorage struct { - Storage - snapshotOverride func() (pb.Snapshot, error) -} - -func (s snapOverrideStorage) Snapshot() (pb.Snapshot, error) { - if s.snapshotOverride != nil { - return s.snapshotOverride() - } - return s.Storage.Snapshot() -} - -var _ raft.Storage = snapOverrideStorage{} - -// AddNodes adds n new nodes initialized from the given snapshot (which may be -// empty), and using the cfg as template. They will be assigned consecutive IDs. -func (env *InteractionEnv) AddNodes(n int, cfg raft.Config, snap pb.Snapshot) error { - bootstrap := !reflect.DeepEqual(snap, pb.Snapshot{}) - for i := 0; i < n; i++ { - id := uint64(1 + len(env.Nodes)) - s := snapOverrideStorage{ - Storage: raft.NewMemoryStorage(), - // When you ask for a snapshot, you get the most recent snapshot. - // - // TODO(tbg): this is sort of clunky, but MemoryStorage itself will - // give you some fixed snapshot and also the snapshot changes - // whenever you compact the logs and vice versa, so it's all a bit - // awkward to use. - snapshotOverride: func() (pb.Snapshot, error) { - snaps := env.Nodes[int(id-1)].History - return snaps[len(snaps)-1], nil - }, - } - if bootstrap { - // NB: we could make this work with 1, but MemoryStorage just - // doesn't play well with that and it's not a loss of generality. - if snap.Metadata.Index <= 1 { - return errors.New("index must be specified as > 1 due to bootstrap") - } - snap.Metadata.Term = 1 - if err := s.ApplySnapshot(snap); err != nil { - return err - } - fi, err := s.FirstIndex() - if err != nil { - return err - } - // At the time of writing and for *MemoryStorage, applying a - // snapshot also truncates appropriately, but this would change with - // other storage engines potentially. - if exp := snap.Metadata.Index + 1; fi != exp { - return fmt.Errorf("failed to establish first index %d; got %d", exp, fi) - } - } - cfg := cfg // fork the config stub - cfg.ID, cfg.Storage = id, s - if env.Options.OnConfig != nil { - env.Options.OnConfig(&cfg) - if cfg.ID != id { - // This could be supported but then we need to do more work - // translating back and forth -- not worth it. - return errors.New("OnConfig must not change the ID") - } - } - if cfg.Logger != nil { - return errors.New("OnConfig must not set Logger") - } - cfg.Logger = env.Output - - rn, err := raft.NewRawNode(&cfg) - if err != nil { - return err - } - - node := Node{ - RawNode: rn, - // TODO(tbg): allow a more general Storage, as long as it also allows - // us to apply snapshots, append entries, and update the HardState. - Storage: s, - Config: &cfg, - History: []pb.Snapshot{snap}, - } - env.Nodes = append(env.Nodes, node) - } - return nil -} diff --git a/raft/rafttest/interaction_env_handler_campaign.go b/raft/rafttest/interaction_env_handler_campaign.go deleted file mode 100644 index bde5cc42e1f1..000000000000 --- a/raft/rafttest/interaction_env_handler_campaign.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "testing" - - "github.com/cockroachdb/datadriven" -) - -func (env *InteractionEnv) handleCampaign(t *testing.T, d datadriven.TestData) error { - idx := firstAsNodeIdx(t, d) - return env.Campaign(t, idx) -} - -// Campaign the node at the given index. -func (env *InteractionEnv) Campaign(t *testing.T, idx int) error { - return env.Nodes[idx].Campaign() -} diff --git a/raft/rafttest/interaction_env_handler_compact.go b/raft/rafttest/interaction_env_handler_compact.go deleted file mode 100644 index 25fa1d22c91e..000000000000 --- a/raft/rafttest/interaction_env_handler_compact.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "strconv" - "testing" - - "github.com/cockroachdb/datadriven" -) - -func (env *InteractionEnv) handleCompact(t *testing.T, d datadriven.TestData) error { - idx := firstAsNodeIdx(t, d) - newFirstIndex, err := strconv.ParseUint(d.CmdArgs[1].Key, 10, 64) - if err != nil { - return err - } - return env.Compact(idx, newFirstIndex) -} - -// Compact truncates the log on the node at index idx so that the supplied new -// first index results. -func (env *InteractionEnv) Compact(idx int, newFirstIndex uint64) error { - if err := env.Nodes[idx].Compact(newFirstIndex); err != nil { - return err - } - return env.RaftLog(idx) -} diff --git a/raft/rafttest/interaction_env_handler_deliver_msgs.go b/raft/rafttest/interaction_env_handler_deliver_msgs.go deleted file mode 100644 index 8072e876ffd6..000000000000 --- a/raft/rafttest/interaction_env_handler_deliver_msgs.go +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "fmt" - "strconv" - "testing" - - "github.com/cockroachdb/datadriven" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" -) - -func (env *InteractionEnv) handleDeliverMsgs(t *testing.T, d datadriven.TestData) error { - var rs []Recipient - for _, arg := range d.CmdArgs { - if len(arg.Vals) == 0 { - id, err := strconv.ParseUint(arg.Key, 10, 64) - if err != nil { - t.Fatal(err) - } - rs = append(rs, Recipient{ID: id}) - } - for i := range arg.Vals { - switch arg.Key { - case "drop": - var id uint64 - arg.Scan(t, i, &id) - var found bool - for _, r := range rs { - if r.ID == id { - found = true - } - } - if found { - t.Fatalf("can't both deliver and drop msgs to %d", id) - } - rs = append(rs, Recipient{ID: id, Drop: true}) - } - } - } - - if n := env.DeliverMsgs(rs...); n == 0 { - env.Output.WriteString("no messages\n") - } - return nil -} - -type Recipient struct { - ID uint64 - Drop bool -} - -// DeliverMsgs goes through env.Messages and, depending on the Drop flag, -// delivers or drops messages to the specified Recipients. Returns the -// number of messages handled (i.e. delivered or dropped). A handled message -// is removed from env.Messages. -func (env *InteractionEnv) DeliverMsgs(rs ...Recipient) int { - var n int - for _, r := range rs { - var msgs []raftpb.Message - msgs, env.Messages = splitMsgs(env.Messages, r.ID) - n += len(msgs) - for _, msg := range msgs { - if r.Drop { - fmt.Fprint(env.Output, "dropped: ") - } - fmt.Fprintln(env.Output, raft.DescribeMessage(msg, defaultEntryFormatter)) - if r.Drop { - // NB: it's allowed to drop messages to nodes that haven't been instantiated yet, - // we haven't used msg.To yet. - continue - } - toIdx := int(msg.To - 1) - if err := env.Nodes[toIdx].Step(msg); err != nil { - fmt.Fprintln(env.Output, err) - } - } - } - return n -} diff --git a/raft/rafttest/interaction_env_handler_process_ready.go b/raft/rafttest/interaction_env_handler_process_ready.go deleted file mode 100644 index d94ac60334f1..000000000000 --- a/raft/rafttest/interaction_env_handler_process_ready.go +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "fmt" - "testing" - - "github.com/cockroachdb/datadriven" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" -) - -func (env *InteractionEnv) handleProcessReady(t *testing.T, d datadriven.TestData) error { - idxs := nodeIdxs(t, d) - for _, idx := range idxs { - var err error - if len(idxs) > 1 { - fmt.Fprintf(env.Output, "> %d handling Ready\n", idx+1) - env.withIndent(func() { err = env.ProcessReady(idx) }) - } else { - err = env.ProcessReady(idx) - } - if err != nil { - return err - } - } - return nil -} - -// ProcessReady runs Ready handling on the node with the given index. -func (env *InteractionEnv) ProcessReady(idx int) error { - // TODO(tbg): Allow simulating crashes here. - rn, s := env.Nodes[idx].RawNode, env.Nodes[idx].Storage - rd := rn.Ready() - env.Output.WriteString(raft.DescribeReady(rd, defaultEntryFormatter)) - // TODO(tbg): the order of operations here is not necessarily safe. See: - // https://github.com/etcd-io/etcd/pull/10861 - if !raft.IsEmptyHardState(rd.HardState) { - if err := s.SetHardState(rd.HardState); err != nil { - return err - } - } - if err := s.Append(rd.Entries); err != nil { - return err - } - if !raft.IsEmptySnap(rd.Snapshot) { - if err := s.ApplySnapshot(rd.Snapshot); err != nil { - return err - } - } - for _, ent := range rd.CommittedEntries { - var update []byte - var cs *raftpb.ConfState - switch ent.Type { - case raftpb.EntryConfChange: - var cc raftpb.ConfChange - if err := cc.Unmarshal(ent.Data); err != nil { - return err - } - update = cc.Context - cs = rn.ApplyConfChange(cc) - case raftpb.EntryConfChangeV2: - var cc raftpb.ConfChangeV2 - if err := cc.Unmarshal(ent.Data); err != nil { - return err - } - cs = rn.ApplyConfChange(cc) - update = cc.Context - default: - update = ent.Data - } - - // Record the new state by starting with the current state and applying - // the command. - lastSnap := env.Nodes[idx].History[len(env.Nodes[idx].History)-1] - var snap raftpb.Snapshot - snap.Data = append(snap.Data, lastSnap.Data...) - // NB: this hard-codes an "appender" state machine. - snap.Data = append(snap.Data, update...) - snap.Metadata.Index = ent.Index - snap.Metadata.Term = ent.Term - if cs == nil { - sl := env.Nodes[idx].History - cs = &sl[len(sl)-1].Metadata.ConfState - } - snap.Metadata.ConfState = *cs - env.Nodes[idx].History = append(env.Nodes[idx].History, snap) - } - - env.Messages = append(env.Messages, rd.Messages...) - - rn.Advance(rd) - return nil -} diff --git a/raft/rafttest/interaction_env_handler_propose.go b/raft/rafttest/interaction_env_handler_propose.go deleted file mode 100644 index 7e8832340cf9..000000000000 --- a/raft/rafttest/interaction_env_handler_propose.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "testing" - - "github.com/cockroachdb/datadriven" -) - -func (env *InteractionEnv) handlePropose(t *testing.T, d datadriven.TestData) error { - idx := firstAsNodeIdx(t, d) - if len(d.CmdArgs) != 2 || len(d.CmdArgs[1].Vals) > 0 { - t.Fatalf("expected exactly one key with no vals: %+v", d.CmdArgs[1:]) - } - return env.Propose(idx, []byte(d.CmdArgs[1].Key)) -} - -// Propose a regular entry. -func (env *InteractionEnv) Propose(idx int, data []byte) error { - return env.Nodes[idx].Propose(data) -} diff --git a/raft/rafttest/interaction_env_handler_propose_conf_change.go b/raft/rafttest/interaction_env_handler_propose_conf_change.go deleted file mode 100644 index ddc8ffcbd083..000000000000 --- a/raft/rafttest/interaction_env_handler_propose_conf_change.go +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "fmt" - "strconv" - "testing" - - "github.com/cockroachdb/datadriven" - "go.etcd.io/etcd/raft/v3/raftpb" -) - -func (env *InteractionEnv) handleProposeConfChange(t *testing.T, d datadriven.TestData) error { - idx := firstAsNodeIdx(t, d) - var v1 bool - transition := raftpb.ConfChangeTransitionAuto - for _, arg := range d.CmdArgs[1:] { - for _, val := range arg.Vals { - switch arg.Key { - case "v1": - var err error - v1, err = strconv.ParseBool(val) - if err != nil { - return err - } - case "transition": - switch val { - case "auto": - transition = raftpb.ConfChangeTransitionAuto - case "implicit": - transition = raftpb.ConfChangeTransitionJointImplicit - case "explicit": - transition = raftpb.ConfChangeTransitionJointExplicit - default: - return fmt.Errorf("unknown transition %s", val) - } - default: - return fmt.Errorf("unknown command %s", arg.Key) - } - } - } - - ccs, err := raftpb.ConfChangesFromString(d.Input) - if err != nil { - return err - } - - var c raftpb.ConfChangeI - if v1 { - if len(ccs) > 1 || transition != raftpb.ConfChangeTransitionAuto { - return fmt.Errorf("v1 conf change can only have one operation and no transition") - } - c = raftpb.ConfChange{ - Type: ccs[0].Type, - NodeID: ccs[0].NodeID, - } - } else { - c = raftpb.ConfChangeV2{ - Transition: transition, - Changes: ccs, - } - } - return env.ProposeConfChange(idx, c) -} - -// ProposeConfChange proposes a configuration change on the node with the given index. -func (env *InteractionEnv) ProposeConfChange(idx int, c raftpb.ConfChangeI) error { - return env.Nodes[idx].ProposeConfChange(c) -} diff --git a/raft/rafttest/interaction_env_handler_raft_log.go b/raft/rafttest/interaction_env_handler_raft_log.go deleted file mode 100644 index 5a99e3e180d8..000000000000 --- a/raft/rafttest/interaction_env_handler_raft_log.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "fmt" - "math" - "testing" - - "github.com/cockroachdb/datadriven" - "go.etcd.io/etcd/raft/v3" -) - -func (env *InteractionEnv) handleRaftLog(t *testing.T, d datadriven.TestData) error { - idx := firstAsNodeIdx(t, d) - return env.RaftLog(idx) -} - -// RaftLog pretty prints the raft log to the output buffer. -func (env *InteractionEnv) RaftLog(idx int) error { - s := env.Nodes[idx].Storage - fi, err := s.FirstIndex() - if err != nil { - return err - } - li, err := s.LastIndex() - if err != nil { - return err - } - if li < fi { - // TODO(tbg): this is what MemoryStorage returns, but unclear if it's - // the "correct" thing to do. - fmt.Fprintf(env.Output, "log is empty: first index=%d, last index=%d", fi, li) - return nil - } - ents, err := s.Entries(fi, li+1, math.MaxUint64) - if err != nil { - return err - } - env.Output.WriteString(raft.DescribeEntries(ents, defaultEntryFormatter)) - return err -} diff --git a/raft/rafttest/interaction_env_handler_raftstate.go b/raft/rafttest/interaction_env_handler_raftstate.go deleted file mode 100644 index 4e8610f1a819..000000000000 --- a/raft/rafttest/interaction_env_handler_raftstate.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2021 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "fmt" - - "go.etcd.io/etcd/raft/v3" -) - -// isVoter checks whether node id is in the voter list within st. -func isVoter(id uint64, st raft.Status) bool { - idMap := st.Config.Voters.IDs() - for idx := range idMap { - if id == idx { - return true - } - } - return false -} - -// handleRaftState pretty-prints the raft state for all nodes to the output buffer. -// For each node, the information is based on its own configuration view. -func (env *InteractionEnv) handleRaftState() error { - for _, n := range env.Nodes { - st := n.Status() - var voterStatus string - if isVoter(st.ID, st) { - voterStatus = "(Voter)" - } else { - voterStatus = "(Non-Voter)" - } - fmt.Fprintf(env.Output, "%d: %s %s\n", st.ID, st.RaftState, voterStatus) - } - return nil -} diff --git a/raft/rafttest/interaction_env_handler_stabilize.go b/raft/rafttest/interaction_env_handler_stabilize.go deleted file mode 100644 index 573e215f4c35..000000000000 --- a/raft/rafttest/interaction_env_handler_stabilize.go +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "fmt" - "testing" - - "github.com/cockroachdb/datadriven" - "go.etcd.io/etcd/raft/v3/raftpb" -) - -func (env *InteractionEnv) handleStabilize(t *testing.T, d datadriven.TestData) error { - idxs := nodeIdxs(t, d) - return env.Stabilize(idxs...) -} - -// Stabilize repeatedly runs Ready handling on and message delivery to the set -// of nodes specified via the idxs slice until reaching a fixed point. -func (env *InteractionEnv) Stabilize(idxs ...int) error { - var nodes []Node - for _, idx := range idxs { - nodes = append(nodes, env.Nodes[idx]) - } - if len(nodes) == 0 { - nodes = env.Nodes - } - - for { - done := true - for _, rn := range nodes { - if rn.HasReady() { - done = false - idx := int(rn.Status().ID - 1) - fmt.Fprintf(env.Output, "> %d handling Ready\n", idx+1) - env.withIndent(func() { env.ProcessReady(idx) }) - } - } - for _, rn := range nodes { - id := rn.Status().ID - // NB: we grab the messages just to see whether to print the header. - // DeliverMsgs will do it again. - if msgs, _ := splitMsgs(env.Messages, id); len(msgs) > 0 { - fmt.Fprintf(env.Output, "> %d receiving messages\n", id) - env.withIndent(func() { env.DeliverMsgs(Recipient{ID: id}) }) - done = false - } - } - if done { - return nil - } - } -} - -func splitMsgs(msgs []raftpb.Message, to uint64) (toMsgs []raftpb.Message, rmdr []raftpb.Message) { - // NB: this method does not reorder messages. - for _, msg := range msgs { - if msg.To == to { - toMsgs = append(toMsgs, msg) - } else { - rmdr = append(rmdr, msg) - } - } - return toMsgs, rmdr -} diff --git a/raft/rafttest/interaction_env_handler_status.go b/raft/rafttest/interaction_env_handler_status.go deleted file mode 100644 index bf5973a3b56a..000000000000 --- a/raft/rafttest/interaction_env_handler_status.go +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "fmt" - "testing" - - "github.com/cockroachdb/datadriven" - "go.etcd.io/etcd/raft/v3/tracker" -) - -func (env *InteractionEnv) handleStatus(t *testing.T, d datadriven.TestData) error { - idx := firstAsNodeIdx(t, d) - return env.Status(idx) -} - -// Status pretty-prints the raft status for the node at the given index to the output -// buffer. -func (env *InteractionEnv) Status(idx int) error { - // TODO(tbg): actually print the full status. - st := env.Nodes[idx].Status() - m := tracker.ProgressMap{} - for id, pr := range st.Progress { - pr := pr // loop-local copy - m[id] = &pr - } - fmt.Fprint(env.Output, m) - return nil -} diff --git a/raft/rafttest/interaction_env_handler_tick_heartbeat.go b/raft/rafttest/interaction_env_handler_tick_heartbeat.go deleted file mode 100644 index 349ca78efad2..000000000000 --- a/raft/rafttest/interaction_env_handler_tick_heartbeat.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "testing" - - "github.com/cockroachdb/datadriven" -) - -func (env *InteractionEnv) handleTickHeartbeat(t *testing.T, d datadriven.TestData) error { - idx := firstAsNodeIdx(t, d) - return env.Tick(idx, env.Nodes[idx].Config.HeartbeatTick) -} - -// Tick the node at the given index the given number of times. -func (env *InteractionEnv) Tick(idx int, num int) error { - for i := 0; i < num; i++ { - env.Nodes[idx].Tick() - } - return nil -} diff --git a/raft/rafttest/interaction_env_handler_transfer_leadership.go b/raft/rafttest/interaction_env_handler_transfer_leadership.go deleted file mode 100644 index dc7f366d1f5e..000000000000 --- a/raft/rafttest/interaction_env_handler_transfer_leadership.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2021 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "testing" - - "github.com/cockroachdb/datadriven" -) - -func (env *InteractionEnv) handleTransferLeadership(t *testing.T, d datadriven.TestData) error { - var from, to uint64 - d.ScanArgs(t, "from", &from) - d.ScanArgs(t, "to", &to) - if from == 0 || from > uint64(len(env.Nodes)) { - t.Fatalf(`expected valid "from" argument`) - } - if to == 0 || to > uint64(len(env.Nodes)) { - t.Fatalf(`expected valid "to" argument`) - } - return env.transferLeadership(from, to) -} - -// Initiate leadership transfer. -func (env *InteractionEnv) transferLeadership(from, to uint64) error { - fromIdx := from - 1 - env.Nodes[fromIdx].TransferLeader(to) - return nil -} diff --git a/raft/rafttest/interaction_env_logger.go b/raft/rafttest/interaction_env_logger.go deleted file mode 100644 index 1b883d559dfe..000000000000 --- a/raft/rafttest/interaction_env_logger.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "fmt" - "strings" - - "go.etcd.io/etcd/raft/v3" -) - -type logLevels [6]string - -var lvlNames logLevels = [...]string{"DEBUG", "INFO", "WARN", "ERROR", "FATAL", "NONE"} - -type RedirectLogger struct { - *strings.Builder - Lvl int // 0 = DEBUG, 1 = INFO, 2 = WARNING, 3 = ERROR, 4 = FATAL, 5 = NONE -} - -var _ raft.Logger = (*RedirectLogger)(nil) - -func (l *RedirectLogger) printf(lvl int, format string, args ...interface{}) { - if l.Lvl <= lvl { - fmt.Fprint(l, lvlNames[lvl], " ") - fmt.Fprintf(l, format, args...) - if n := len(format); n > 0 && format[n-1] != '\n' { - l.WriteByte('\n') - } - } -} -func (l *RedirectLogger) print(lvl int, args ...interface{}) { - if l.Lvl <= lvl { - fmt.Fprint(l, lvlNames[lvl], " ") - fmt.Fprintln(l, args...) - } -} - -func (l *RedirectLogger) Debug(v ...interface{}) { - l.print(0, v...) -} - -func (l *RedirectLogger) Debugf(format string, v ...interface{}) { - l.printf(0, format, v...) -} - -func (l *RedirectLogger) Info(v ...interface{}) { - l.print(1, v...) -} - -func (l *RedirectLogger) Infof(format string, v ...interface{}) { - l.printf(1, format, v...) -} - -func (l *RedirectLogger) Warning(v ...interface{}) { - l.print(2, v...) -} - -func (l *RedirectLogger) Warningf(format string, v ...interface{}) { - l.printf(2, format, v...) -} - -func (l *RedirectLogger) Error(v ...interface{}) { - l.print(3, v...) -} - -func (l *RedirectLogger) Errorf(format string, v ...interface{}) { - l.printf(3, format, v...) -} - -func (l *RedirectLogger) Fatal(v ...interface{}) { - l.print(4, v...) -} - -func (l *RedirectLogger) Fatalf(format string, v ...interface{}) { - - l.printf(4, format, v...) -} - -func (l *RedirectLogger) Panic(v ...interface{}) { - l.print(4, v...) -} - -func (l *RedirectLogger) Panicf(format string, v ...interface{}) { - l.printf(4, format, v...) -} diff --git a/raft/rafttest/network.go b/raft/rafttest/network.go deleted file mode 100644 index 0e86bf3a548b..000000000000 --- a/raft/rafttest/network.go +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "math/rand" - "sync" - "time" - - "go.etcd.io/etcd/raft/v3/raftpb" -) - -// a network interface -type iface interface { - send(m raftpb.Message) - recv() chan raftpb.Message - disconnect() - connect() -} - -type raftNetwork struct { - rand *rand.Rand - mu sync.Mutex - disconnected map[uint64]bool - dropmap map[conn]float64 - delaymap map[conn]delay - recvQueues map[uint64]chan raftpb.Message -} - -type conn struct { - from, to uint64 -} - -type delay struct { - d time.Duration - rate float64 -} - -func newRaftNetwork(nodes ...uint64) *raftNetwork { - pn := &raftNetwork{ - rand: rand.New(rand.NewSource(1)), - recvQueues: make(map[uint64]chan raftpb.Message), - dropmap: make(map[conn]float64), - delaymap: make(map[conn]delay), - disconnected: make(map[uint64]bool), - } - - for _, n := range nodes { - pn.recvQueues[n] = make(chan raftpb.Message, 1024) - } - return pn -} - -func (rn *raftNetwork) nodeNetwork(id uint64) iface { - return &nodeNetwork{id: id, raftNetwork: rn} -} - -func (rn *raftNetwork) send(m raftpb.Message) { - rn.mu.Lock() - to := rn.recvQueues[m.To] - if rn.disconnected[m.To] { - to = nil - } - drop := rn.dropmap[conn{m.From, m.To}] - dl := rn.delaymap[conn{m.From, m.To}] - rn.mu.Unlock() - - if to == nil { - return - } - if drop != 0 && rn.rand.Float64() < drop { - return - } - // TODO: shall we dl without blocking the send call? - if dl.d != 0 && rn.rand.Float64() < dl.rate { - rd := rn.rand.Int63n(int64(dl.d)) - time.Sleep(time.Duration(rd)) - } - - // use marshal/unmarshal to copy message to avoid data race. - b, err := m.Marshal() - if err != nil { - panic(err) - } - - var cm raftpb.Message - err = cm.Unmarshal(b) - if err != nil { - panic(err) - } - - select { - case to <- cm: - default: - // drop messages when the receiver queue is full. - } -} - -func (rn *raftNetwork) recvFrom(from uint64) chan raftpb.Message { - rn.mu.Lock() - fromc := rn.recvQueues[from] - if rn.disconnected[from] { - fromc = nil - } - rn.mu.Unlock() - - return fromc -} - -func (rn *raftNetwork) drop(from, to uint64, rate float64) { - rn.mu.Lock() - defer rn.mu.Unlock() - rn.dropmap[conn{from, to}] = rate -} - -func (rn *raftNetwork) delay(from, to uint64, d time.Duration, rate float64) { - rn.mu.Lock() - defer rn.mu.Unlock() - rn.delaymap[conn{from, to}] = delay{d, rate} -} - -func (rn *raftNetwork) disconnect(id uint64) { - rn.mu.Lock() - defer rn.mu.Unlock() - rn.disconnected[id] = true -} - -func (rn *raftNetwork) connect(id uint64) { - rn.mu.Lock() - defer rn.mu.Unlock() - rn.disconnected[id] = false -} - -type nodeNetwork struct { - id uint64 - *raftNetwork -} - -func (nt *nodeNetwork) connect() { - nt.raftNetwork.connect(nt.id) -} - -func (nt *nodeNetwork) disconnect() { - nt.raftNetwork.disconnect(nt.id) -} - -func (nt *nodeNetwork) send(m raftpb.Message) { - nt.raftNetwork.send(m) -} - -func (nt *nodeNetwork) recv() chan raftpb.Message { - return nt.recvFrom(nt.id) -} diff --git a/raft/rafttest/network_test.go b/raft/rafttest/network_test.go deleted file mode 100644 index 39447476e4d8..000000000000 --- a/raft/rafttest/network_test.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "testing" - "time" - - "go.etcd.io/etcd/raft/v3/raftpb" -) - -func TestNetworkDrop(t *testing.T) { - // drop around 10% messages - sent := 1000 - droprate := 0.1 - nt := newRaftNetwork(1, 2) - nt.drop(1, 2, droprate) - for i := 0; i < sent; i++ { - nt.send(raftpb.Message{From: 1, To: 2}) - } - - c := nt.recvFrom(2) - - received := 0 - done := false - for !done { - select { - case <-c: - received++ - default: - done = true - } - } - - drop := sent - received - if drop > int((droprate+0.1)*float64(sent)) || drop < int((droprate-0.1)*float64(sent)) { - t.Errorf("drop = %d, want around %.2f", drop, droprate*float64(sent)) - } -} - -func TestNetworkDelay(t *testing.T) { - sent := 1000 - delay := time.Millisecond - delayrate := 0.1 - nt := newRaftNetwork(1, 2) - - nt.delay(1, 2, delay, delayrate) - var total time.Duration - for i := 0; i < sent; i++ { - s := time.Now() - nt.send(raftpb.Message{From: 1, To: 2}) - total += time.Since(s) - } - - w := time.Duration(float64(sent)*delayrate/2) * delay - // there is some overhead in the send call since it generates random numbers. - if total < w { - t.Errorf("total = %v, want > %v", total, w) - } -} diff --git a/raft/rafttest/node.go b/raft/rafttest/node.go deleted file mode 100644 index cb1a1241fcd4..000000000000 --- a/raft/rafttest/node.go +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "context" - "log" - "math/rand" - "sync" - "time" - - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" -) - -type node struct { - raft.Node - id uint64 - iface iface - stopc chan struct{} - pausec chan bool - - // stable - storage *raft.MemoryStorage - - mu sync.Mutex // guards state - state raftpb.HardState -} - -func startNode(id uint64, peers []raft.Peer, iface iface) *node { - st := raft.NewMemoryStorage() - c := &raft.Config{ - ID: id, - ElectionTick: 10, - HeartbeatTick: 1, - Storage: st, - MaxSizePerMsg: 1024 * 1024, - MaxInflightMsgs: 256, - MaxUncommittedEntriesSize: 1 << 30, - } - rn := raft.StartNode(c, peers) - n := &node{ - Node: rn, - id: id, - storage: st, - iface: iface, - pausec: make(chan bool), - } - n.start() - return n -} - -func (n *node) start() { - n.stopc = make(chan struct{}) - ticker := time.NewTicker(5 * time.Millisecond).C - - go func() { - for { - select { - case <-ticker: - n.Tick() - case rd := <-n.Ready(): - if !raft.IsEmptyHardState(rd.HardState) { - n.mu.Lock() - n.state = rd.HardState - n.mu.Unlock() - n.storage.SetHardState(n.state) - } - n.storage.Append(rd.Entries) - time.Sleep(time.Millisecond) - - // simulate async send, more like real world... - for _, m := range rd.Messages { - mlocal := m - go func() { - time.Sleep(time.Duration(rand.Int63n(10)) * time.Millisecond) - n.iface.send(mlocal) - }() - } - n.Advance() - case m := <-n.iface.recv(): - go n.Step(context.TODO(), m) - case <-n.stopc: - n.Stop() - log.Printf("raft.%d: stop", n.id) - n.Node = nil - close(n.stopc) - return - case p := <-n.pausec: - recvms := make([]raftpb.Message, 0) - for p { - select { - case m := <-n.iface.recv(): - recvms = append(recvms, m) - case p = <-n.pausec: - } - } - // step all pending messages - for _, m := range recvms { - n.Step(context.TODO(), m) - } - } - } - }() -} - -// stop stops the node. stop a stopped node might panic. -// All in memory state of node is discarded. -// All stable MUST be unchanged. -func (n *node) stop() { - n.iface.disconnect() - n.stopc <- struct{}{} - // wait for the shutdown - <-n.stopc -} - -// restart restarts the node. restart a started node -// blocks and might affect the future stop operation. -func (n *node) restart() { - // wait for the shutdown - <-n.stopc - c := &raft.Config{ - ID: n.id, - ElectionTick: 10, - HeartbeatTick: 1, - Storage: n.storage, - MaxSizePerMsg: 1024 * 1024, - MaxInflightMsgs: 256, - MaxUncommittedEntriesSize: 1 << 30, - } - n.Node = raft.RestartNode(c) - n.start() - n.iface.connect() -} - -// pause pauses the node. -// The paused node buffers the received messages and replies -// all of them when it resumes. -func (n *node) pause() { - n.pausec <- true -} - -// resume resumes the paused node. -func (n *node) resume() { - n.pausec <- false -} diff --git a/raft/rafttest/node_bench_test.go b/raft/rafttest/node_bench_test.go deleted file mode 100644 index 6d69003a6290..000000000000 --- a/raft/rafttest/node_bench_test.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "context" - "testing" - "time" - - "go.etcd.io/etcd/raft/v3" -) - -func BenchmarkProposal3Nodes(b *testing.B) { - peers := []raft.Peer{{ID: 1, Context: nil}, {ID: 2, Context: nil}, {ID: 3, Context: nil}} - nt := newRaftNetwork(1, 2, 3) - - nodes := make([]*node, 0) - - for i := 1; i <= 3; i++ { - n := startNode(uint64(i), peers, nt.nodeNetwork(uint64(i))) - nodes = append(nodes, n) - } - // get ready and warm up - time.Sleep(50 * time.Millisecond) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - nodes[0].Propose(context.TODO(), []byte("somedata")) - } - - for _, n := range nodes { - if n.state.Commit != uint64(b.N+4) { - continue - } - } - b.StopTimer() - - for _, n := range nodes { - n.stop() - } -} diff --git a/raft/rafttest/node_test.go b/raft/rafttest/node_test.go deleted file mode 100644 index caa0b570b1ce..000000000000 --- a/raft/rafttest/node_test.go +++ /dev/null @@ -1,175 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rafttest - -import ( - "context" - "testing" - "time" - - "go.etcd.io/etcd/raft/v3" -) - -func TestBasicProgress(t *testing.T) { - peers := []raft.Peer{{ID: 1, Context: nil}, {ID: 2, Context: nil}, {ID: 3, Context: nil}, {ID: 4, Context: nil}, {ID: 5, Context: nil}} - nt := newRaftNetwork(1, 2, 3, 4, 5) - - nodes := make([]*node, 0) - - for i := 1; i <= 5; i++ { - n := startNode(uint64(i), peers, nt.nodeNetwork(uint64(i))) - nodes = append(nodes, n) - } - - waitLeader(nodes) - - for i := 0; i < 100; i++ { - nodes[0].Propose(context.TODO(), []byte("somedata")) - } - - if !waitCommitConverge(nodes, 100) { - t.Errorf("commits failed to converge!") - } - - for _, n := range nodes { - n.stop() - } -} - -func TestRestart(t *testing.T) { - peers := []raft.Peer{{ID: 1, Context: nil}, {ID: 2, Context: nil}, {ID: 3, Context: nil}, {ID: 4, Context: nil}, {ID: 5, Context: nil}} - nt := newRaftNetwork(1, 2, 3, 4, 5) - - nodes := make([]*node, 0) - - for i := 1; i <= 5; i++ { - n := startNode(uint64(i), peers, nt.nodeNetwork(uint64(i))) - nodes = append(nodes, n) - } - - l := waitLeader(nodes) - k1, k2 := (l+1)%5, (l+2)%5 - - for i := 0; i < 30; i++ { - nodes[l].Propose(context.TODO(), []byte("somedata")) - } - nodes[k1].stop() - for i := 0; i < 30; i++ { - nodes[(l+3)%5].Propose(context.TODO(), []byte("somedata")) - } - nodes[k2].stop() - for i := 0; i < 30; i++ { - nodes[(l+4)%5].Propose(context.TODO(), []byte("somedata")) - } - nodes[k2].restart() - for i := 0; i < 30; i++ { - nodes[l].Propose(context.TODO(), []byte("somedata")) - } - nodes[k1].restart() - - if !waitCommitConverge(nodes, 120) { - t.Errorf("commits failed to converge!") - } - - for _, n := range nodes { - n.stop() - } -} - -func TestPause(t *testing.T) { - peers := []raft.Peer{{ID: 1, Context: nil}, {ID: 2, Context: nil}, {ID: 3, Context: nil}, {ID: 4, Context: nil}, {ID: 5, Context: nil}} - nt := newRaftNetwork(1, 2, 3, 4, 5) - - nodes := make([]*node, 0) - - for i := 1; i <= 5; i++ { - n := startNode(uint64(i), peers, nt.nodeNetwork(uint64(i))) - nodes = append(nodes, n) - } - - waitLeader(nodes) - - for i := 0; i < 30; i++ { - nodes[0].Propose(context.TODO(), []byte("somedata")) - } - nodes[1].pause() - for i := 0; i < 30; i++ { - nodes[0].Propose(context.TODO(), []byte("somedata")) - } - nodes[2].pause() - for i := 0; i < 30; i++ { - nodes[0].Propose(context.TODO(), []byte("somedata")) - } - nodes[2].resume() - for i := 0; i < 30; i++ { - nodes[0].Propose(context.TODO(), []byte("somedata")) - } - nodes[1].resume() - - if !waitCommitConverge(nodes, 120) { - t.Errorf("commits failed to converge!") - } - - for _, n := range nodes { - n.stop() - } -} - -func waitLeader(ns []*node) int { - var l map[uint64]struct{} - var lindex int - - for { - l = make(map[uint64]struct{}) - - for i, n := range ns { - lead := n.Status().SoftState.Lead - if lead != 0 { - l[lead] = struct{}{} - if n.id == lead { - lindex = i - } - } - } - - if len(l) == 1 { - return lindex - } - } -} - -func waitCommitConverge(ns []*node, target uint64) bool { - var c map[uint64]struct{} - - for i := 0; i < 50; i++ { - c = make(map[uint64]struct{}) - var good int - - for _, n := range ns { - commit := n.Node.Status().HardState.Commit - c[commit] = struct{}{} - if commit > target { - good++ - } - } - - if len(c) == 1 && good == len(ns) { - return true - } - time.Sleep(100 * time.Millisecond) - } - - return false -} diff --git a/raft/rawnode.go b/raft/rawnode.go deleted file mode 100644 index 112d9f704b95..000000000000 --- a/raft/rawnode.go +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "errors" - - pb "go.etcd.io/etcd/raft/v3/raftpb" - "go.etcd.io/etcd/raft/v3/tracker" -) - -// ErrStepLocalMsg is returned when try to step a local raft message -var ErrStepLocalMsg = errors.New("raft: cannot step raft local message") - -// ErrStepPeerNotFound is returned when try to step a response message -// but there is no peer found in raft.prs for that node. -var ErrStepPeerNotFound = errors.New("raft: cannot step as peer not found") - -// RawNode is a thread-unsafe Node. -// The methods of this struct correspond to the methods of Node and are described -// more fully there. -type RawNode struct { - raft *raft - prevSoftSt *SoftState - prevHardSt pb.HardState -} - -// NewRawNode instantiates a RawNode from the given configuration. -// -// See Bootstrap() for bootstrapping an initial state; this replaces the former -// 'peers' argument to this method (with identical behavior). However, It is -// recommended that instead of calling Bootstrap, applications bootstrap their -// state manually by setting up a Storage that has a first index > 1 and which -// stores the desired ConfState as its InitialState. -func NewRawNode(config *Config) (*RawNode, error) { - r := newRaft(config) - rn := &RawNode{ - raft: r, - } - rn.prevSoftSt = r.softState() - rn.prevHardSt = r.hardState() - return rn, nil -} - -// Tick advances the internal logical clock by a single tick. -func (rn *RawNode) Tick() { - rn.raft.tick() -} - -// TickQuiesced advances the internal logical clock by a single tick without -// performing any other state machine processing. It allows the caller to avoid -// periodic heartbeats and elections when all of the peers in a Raft group are -// known to be at the same state. Expected usage is to periodically invoke Tick -// or TickQuiesced depending on whether the group is "active" or "quiesced". -// -// WARNING: Be very careful about using this method as it subverts the Raft -// state machine. You should probably be using Tick instead. -func (rn *RawNode) TickQuiesced() { - rn.raft.electionElapsed++ -} - -// Campaign causes this RawNode to transition to candidate state. -func (rn *RawNode) Campaign() error { - return rn.raft.Step(pb.Message{ - Type: pb.MsgHup, - }) -} - -// Propose proposes data be appended to the raft log. -func (rn *RawNode) Propose(data []byte) error { - return rn.raft.Step(pb.Message{ - Type: pb.MsgProp, - From: rn.raft.id, - Entries: []pb.Entry{ - {Data: data}, - }}) -} - -// ProposeConfChange proposes a config change. See (Node).ProposeConfChange for -// details. -func (rn *RawNode) ProposeConfChange(cc pb.ConfChangeI) error { - m, err := confChangeToMsg(cc) - if err != nil { - return err - } - return rn.raft.Step(m) -} - -// ApplyConfChange applies a config change to the local node. The app must call -// this when it applies a configuration change, except when it decides to reject -// the configuration change, in which case no call must take place. -func (rn *RawNode) ApplyConfChange(cc pb.ConfChangeI) *pb.ConfState { - cs := rn.raft.applyConfChange(cc.AsV2()) - return &cs -} - -// Step advances the state machine using the given message. -func (rn *RawNode) Step(m pb.Message) error { - // ignore unexpected local messages receiving over network - if IsLocalMsg(m.Type) { - return ErrStepLocalMsg - } - if pr := rn.raft.prs.Progress[m.From]; pr != nil || !IsResponseMsg(m.Type) { - return rn.raft.Step(m) - } - return ErrStepPeerNotFound -} - -// Ready returns the outstanding work that the application needs to handle. This -// includes appending and applying entries or a snapshot, updating the HardState, -// and sending messages. The returned Ready() *must* be handled and subsequently -// passed back via Advance(). -func (rn *RawNode) Ready() Ready { - rd := rn.readyWithoutAccept() - rn.acceptReady(rd) - return rd -} - -// readyWithoutAccept returns a Ready. This is a read-only operation, i.e. there -// is no obligation that the Ready must be handled. -func (rn *RawNode) readyWithoutAccept() Ready { - return newReady(rn.raft, rn.prevSoftSt, rn.prevHardSt) -} - -// acceptReady is called when the consumer of the RawNode has decided to go -// ahead and handle a Ready. Nothing must alter the state of the RawNode between -// this call and the prior call to Ready(). -func (rn *RawNode) acceptReady(rd Ready) { - if rd.SoftState != nil { - rn.prevSoftSt = rd.SoftState - } - if !IsEmptyHardState(rd.HardState) { - rn.prevHardSt = rd.HardState - } - if len(rd.ReadStates) != 0 { - rn.raft.readStates = nil - } - rn.raft.msgs = nil -} - -// HasReady called when RawNode user need to check if any Ready pending. -func (rn *RawNode) HasReady() bool { - r := rn.raft - if !r.softState().equal(rn.prevSoftSt) { - return true - } - if hardSt := r.hardState(); !IsEmptyHardState(hardSt) && !isHardStateEqual(hardSt, rn.prevHardSt) { - return true - } - if r.raftLog.hasPendingSnapshot() { - return true - } - if len(r.msgs) > 0 || len(r.raftLog.unstableEntries()) > 0 || r.raftLog.hasNextCommittedEnts() { - return true - } - if len(r.readStates) != 0 { - return true - } - return false -} - -// Advance notifies the RawNode that the application has applied and saved progress in the -// last Ready results. -func (rn *RawNode) Advance(rd Ready) { - rn.raft.advance(rd) -} - -// Status returns the current status of the given group. This allocates, see -// BasicStatus and WithProgress for allocation-friendlier choices. -func (rn *RawNode) Status() Status { - status := getStatus(rn.raft) - return status -} - -// BasicStatus returns a BasicStatus. Notably this does not contain the -// Progress map; see WithProgress for an allocation-free way to inspect it. -func (rn *RawNode) BasicStatus() BasicStatus { - return getBasicStatus(rn.raft) -} - -// ProgressType indicates the type of replica a Progress corresponds to. -type ProgressType byte - -const ( - // ProgressTypePeer accompanies a Progress for a regular peer replica. - ProgressTypePeer ProgressType = iota - // ProgressTypeLearner accompanies a Progress for a learner replica. - ProgressTypeLearner -) - -// WithProgress is a helper to introspect the Progress for this node and its -// peers. -func (rn *RawNode) WithProgress(visitor func(id uint64, typ ProgressType, pr tracker.Progress)) { - rn.raft.prs.Visit(func(id uint64, pr *tracker.Progress) { - typ := ProgressTypePeer - if pr.IsLearner { - typ = ProgressTypeLearner - } - p := *pr - p.Inflights = nil - visitor(id, typ, p) - }) -} - -// ReportUnreachable reports the given node is not reachable for the last send. -func (rn *RawNode) ReportUnreachable(id uint64) { - _ = rn.raft.Step(pb.Message{Type: pb.MsgUnreachable, From: id}) -} - -// ReportSnapshot reports the status of the sent snapshot. -func (rn *RawNode) ReportSnapshot(id uint64, status SnapshotStatus) { - rej := status == SnapshotFailure - - _ = rn.raft.Step(pb.Message{Type: pb.MsgSnapStatus, From: id, Reject: rej}) -} - -// TransferLeader tries to transfer leadership to the given transferee. -func (rn *RawNode) TransferLeader(transferee uint64) { - _ = rn.raft.Step(pb.Message{Type: pb.MsgTransferLeader, From: transferee}) -} - -// ReadIndex requests a read state. The read state will be set in ready. -// Read State has a read index. Once the application advances further than the read -// index, any linearizable read requests issued before the read request can be -// processed safely. The read state will have the same rctx attached. -func (rn *RawNode) ReadIndex(rctx []byte) { - _ = rn.raft.Step(pb.Message{Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: rctx}}}) -} diff --git a/raft/rawnode_test.go b/raft/rawnode_test.go deleted file mode 100644 index 61448a171ac7..000000000000 --- a/raft/rawnode_test.go +++ /dev/null @@ -1,1234 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "bytes" - "context" - "fmt" - "math" - "reflect" - "testing" - - "go.etcd.io/etcd/raft/v3/quorum" - pb "go.etcd.io/etcd/raft/v3/raftpb" - "go.etcd.io/etcd/raft/v3/tracker" -) - -// rawNodeAdapter is essentially a lint that makes sure that RawNode implements -// "most of" Node. The exceptions (some of which are easy to fix) are listed -// below. -type rawNodeAdapter struct { - *RawNode -} - -var _ Node = (*rawNodeAdapter)(nil) - -// TransferLeadership is to test when node specifies lead, which is pointless, can just be filled in. -func (a *rawNodeAdapter) TransferLeadership(ctx context.Context, lead, transferee uint64) { - a.RawNode.TransferLeader(transferee) -} - -// Stop when node has a goroutine, RawNode doesn't need this. -func (a *rawNodeAdapter) Stop() {} - -// Status retirns RawNode's status as *Status. -func (a *rawNodeAdapter) Status() Status { return a.RawNode.Status() } - -// Advance is when RawNode takes a Ready. It doesn't really have to do that I think? It can hold on -// to it internally. But maybe that approach is frail. -func (a *rawNodeAdapter) Advance() { a.RawNode.Advance(Ready{}) } - -// Ready when RawNode returns a Ready, not a chan of one. -func (a *rawNodeAdapter) Ready() <-chan Ready { return nil } - -// Node takes more contexts. Easy enough to fix. - -func (a *rawNodeAdapter) Campaign(context.Context) error { return a.RawNode.Campaign() } -func (a *rawNodeAdapter) ReadIndex(_ context.Context, rctx []byte) error { - a.RawNode.ReadIndex(rctx) - // RawNode swallowed the error in ReadIndex, it probably should not do that. - return nil -} -func (a *rawNodeAdapter) Step(_ context.Context, m pb.Message) error { return a.RawNode.Step(m) } -func (a *rawNodeAdapter) Propose(_ context.Context, data []byte) error { - return a.RawNode.Propose(data) -} -func (a *rawNodeAdapter) ProposeConfChange(_ context.Context, cc pb.ConfChangeI) error { - return a.RawNode.ProposeConfChange(cc) -} - -// TestRawNodeStep ensures that RawNode.Step ignore local message. -func TestRawNodeStep(t *testing.T) { - for i, msgn := range pb.MessageType_name { - t.Run(msgn, func(t *testing.T) { - s := NewMemoryStorage() - s.SetHardState(pb.HardState{Term: 1, Commit: 1}) - s.Append([]pb.Entry{{Term: 1, Index: 1}}) - if err := s.ApplySnapshot(pb.Snapshot{Metadata: pb.SnapshotMetadata{ - ConfState: pb.ConfState{ - Voters: []uint64{1}, - }, - Index: 1, - Term: 1, - }}); err != nil { - t.Fatal(err) - } - // Append an empty entry to make sure the non-local messages (like - // vote requests) are ignored and don't trigger assertions. - rawNode, err := NewRawNode(newTestConfig(1, 10, 1, s)) - if err != nil { - t.Fatal(err) - } - msgt := pb.MessageType(i) - err = rawNode.Step(pb.Message{Type: msgt}) - // LocalMsg should be ignored. - if IsLocalMsg(msgt) { - if err != ErrStepLocalMsg { - t.Errorf("%d: step should ignore %s", msgt, msgn) - } - } - }) - } -} - -// TestNodeStepUnblock from node_test.go has no equivalent in rawNode because there is -// no goroutine in RawNode. - -// TestRawNodeProposeAndConfChange tests the configuration change mechanism. Each -// test case sends a configuration change which is either simple or joint, verifies -// that it applies and that the resulting ConfState matches expectations, and for -// joint configurations makes sure that they are exited successfully. -func TestRawNodeProposeAndConfChange(t *testing.T) { - testCases := []struct { - cc pb.ConfChangeI - exp pb.ConfState - exp2 *pb.ConfState - }{ - // V1 config change. - { - pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: 2}, - pb.ConfState{Voters: []uint64{1, 2}}, - nil, - }, - // Proposing the same as a V2 change works just the same, without entering - // a joint config. - { - pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ - {Type: pb.ConfChangeAddNode, NodeID: 2}, - }, - }, - pb.ConfState{Voters: []uint64{1, 2}}, - nil, - }, - // Ditto if we add it as a learner instead. - { - pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ - {Type: pb.ConfChangeAddLearnerNode, NodeID: 2}, - }, - }, - pb.ConfState{Voters: []uint64{1}, Learners: []uint64{2}}, - nil, - }, - // We can ask explicitly for joint consensus if we want it. - { - pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ - {Type: pb.ConfChangeAddLearnerNode, NodeID: 2}, - }, - Transition: pb.ConfChangeTransitionJointExplicit, - }, - pb.ConfState{Voters: []uint64{1}, VotersOutgoing: []uint64{1}, Learners: []uint64{2}}, - &pb.ConfState{Voters: []uint64{1}, Learners: []uint64{2}}, - }, - // Ditto, but with implicit transition (the harness checks this). - { - pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ - {Type: pb.ConfChangeAddLearnerNode, NodeID: 2}, - }, - Transition: pb.ConfChangeTransitionJointImplicit, - }, - pb.ConfState{ - Voters: []uint64{1}, VotersOutgoing: []uint64{1}, Learners: []uint64{2}, - AutoLeave: true, - }, - &pb.ConfState{Voters: []uint64{1}, Learners: []uint64{2}}, - }, - // Add a new node and demote n1. This exercises the interesting case in - // which we really need joint config changes and also need LearnersNext. - { - pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ - {NodeID: 2, Type: pb.ConfChangeAddNode}, - {NodeID: 1, Type: pb.ConfChangeAddLearnerNode}, - {NodeID: 3, Type: pb.ConfChangeAddLearnerNode}, - }, - }, - pb.ConfState{ - Voters: []uint64{2}, - VotersOutgoing: []uint64{1}, - Learners: []uint64{3}, - LearnersNext: []uint64{1}, - AutoLeave: true, - }, - &pb.ConfState{Voters: []uint64{2}, Learners: []uint64{1, 3}}, - }, - // Ditto explicit. - { - pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ - {NodeID: 2, Type: pb.ConfChangeAddNode}, - {NodeID: 1, Type: pb.ConfChangeAddLearnerNode}, - {NodeID: 3, Type: pb.ConfChangeAddLearnerNode}, - }, - Transition: pb.ConfChangeTransitionJointExplicit, - }, - pb.ConfState{ - Voters: []uint64{2}, - VotersOutgoing: []uint64{1}, - Learners: []uint64{3}, - LearnersNext: []uint64{1}, - }, - &pb.ConfState{Voters: []uint64{2}, Learners: []uint64{1, 3}}, - }, - // Ditto implicit. - { - pb.ConfChangeV2{ - Changes: []pb.ConfChangeSingle{ - {NodeID: 2, Type: pb.ConfChangeAddNode}, - {NodeID: 1, Type: pb.ConfChangeAddLearnerNode}, - {NodeID: 3, Type: pb.ConfChangeAddLearnerNode}, - }, - Transition: pb.ConfChangeTransitionJointImplicit, - }, - pb.ConfState{ - Voters: []uint64{2}, - VotersOutgoing: []uint64{1}, - Learners: []uint64{3}, - LearnersNext: []uint64{1}, - AutoLeave: true, - }, - &pb.ConfState{Voters: []uint64{2}, Learners: []uint64{1, 3}}, - }, - } - - for _, tc := range testCases { - t.Run("", func(t *testing.T) { - s := newTestMemoryStorage(withPeers(1)) - rawNode, err := NewRawNode(newTestConfig(1, 10, 1, s)) - if err != nil { - t.Fatal(err) - } - - rawNode.Campaign() - proposed := false - var ( - lastIndex uint64 - ccdata []byte - ) - // Propose the ConfChange, wait until it applies, save the resulting - // ConfState. - var cs *pb.ConfState - for cs == nil { - rd := rawNode.Ready() - s.Append(rd.Entries) - for _, ent := range rd.CommittedEntries { - var cc pb.ConfChangeI - if ent.Type == pb.EntryConfChange { - var ccc pb.ConfChange - if err = ccc.Unmarshal(ent.Data); err != nil { - t.Fatal(err) - } - cc = ccc - } else if ent.Type == pb.EntryConfChangeV2 { - var ccc pb.ConfChangeV2 - if err = ccc.Unmarshal(ent.Data); err != nil { - t.Fatal(err) - } - cc = ccc - } - if cc != nil { - cs = rawNode.ApplyConfChange(cc) - } - } - rawNode.Advance(rd) - // Once we are the leader, propose a command and a ConfChange. - if !proposed && rd.SoftState.Lead == rawNode.raft.id { - if err = rawNode.Propose([]byte("somedata")); err != nil { - t.Fatal(err) - } - if ccv1, ok := tc.cc.AsV1(); ok { - ccdata, err = ccv1.Marshal() - if err != nil { - t.Fatal(err) - } - rawNode.ProposeConfChange(ccv1) - } else { - ccv2 := tc.cc.AsV2() - ccdata, err = ccv2.Marshal() - if err != nil { - t.Fatal(err) - } - rawNode.ProposeConfChange(ccv2) - } - proposed = true - } - } - - // Check that the last index is exactly the conf change we put in, - // down to the bits. Note that this comes from the Storage, which - // will not reflect any unstable entries that we'll only be presented - // with in the next Ready. - lastIndex, err = s.LastIndex() - if err != nil { - t.Fatal(err) - } - - entries, err := s.Entries(lastIndex-1, lastIndex+1, noLimit) - if err != nil { - t.Fatal(err) - } - if len(entries) != 2 { - t.Fatalf("len(entries) = %d, want %d", len(entries), 2) - } - if !bytes.Equal(entries[0].Data, []byte("somedata")) { - t.Errorf("entries[0].Data = %v, want %v", entries[0].Data, []byte("somedata")) - } - typ := pb.EntryConfChange - if _, ok := tc.cc.AsV1(); !ok { - typ = pb.EntryConfChangeV2 - } - if entries[1].Type != typ { - t.Fatalf("type = %v, want %v", entries[1].Type, typ) - } - if !bytes.Equal(entries[1].Data, ccdata) { - t.Errorf("data = %v, want %v", entries[1].Data, ccdata) - } - - if exp := &tc.exp; !reflect.DeepEqual(exp, cs) { - t.Fatalf("exp:\n%+v\nact:\n%+v", exp, cs) - } - - var maybePlusOne uint64 - if autoLeave, ok := tc.cc.AsV2().EnterJoint(); ok && autoLeave { - // If this is an auto-leaving joint conf change, it will have - // appended the entry that auto-leaves, so add one to the last - // index that forms the basis of our expectations on - // pendingConfIndex. (Recall that lastIndex was taken from stable - // storage, but this auto-leaving entry isn't on stable storage - // yet). - maybePlusOne = 1 - } - if exp, act := lastIndex+maybePlusOne, rawNode.raft.pendingConfIndex; exp != act { - t.Fatalf("pendingConfIndex: expected %d, got %d", exp, act) - } - - // Move the RawNode along. If the ConfChange was simple, nothing else - // should happen. Otherwise, we're in a joint state, which is either - // left automatically or not. If not, we add the proposal that leaves - // it manually. - rd := rawNode.Ready() - var context []byte - if !tc.exp.AutoLeave { - if len(rd.Entries) > 0 { - t.Fatal("expected no more entries") - } - if tc.exp2 == nil { - return - } - context = []byte("manual") - t.Log("leaving joint state manually") - if err := rawNode.ProposeConfChange(pb.ConfChangeV2{Context: context}); err != nil { - t.Fatal(err) - } - rd = rawNode.Ready() - } - - // Check that the right ConfChange comes out. - if len(rd.Entries) != 1 || rd.Entries[0].Type != pb.EntryConfChangeV2 { - t.Fatalf("expected exactly one more entry, got %+v", rd) - } - var cc pb.ConfChangeV2 - if err := cc.Unmarshal(rd.Entries[0].Data); err != nil { - t.Fatal(err) - } - if !reflect.DeepEqual(cc, pb.ConfChangeV2{Context: context}) { - t.Fatalf("expected zero ConfChangeV2, got %+v", cc) - } - // Lie and pretend the ConfChange applied. It won't do so because now - // we require the joint quorum and we're only running one node. - cs = rawNode.ApplyConfChange(cc) - if exp := tc.exp2; !reflect.DeepEqual(exp, cs) { - t.Fatalf("exp:\n%+v\nact:\n%+v", exp, cs) - } - }) - } -} - -// TestRawNodeJointAutoLeave tests the configuration change auto leave even leader -// lost leadership. -func TestRawNodeJointAutoLeave(t *testing.T) { - testCc := pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ - {Type: pb.ConfChangeAddLearnerNode, NodeID: 2}, - }, - Transition: pb.ConfChangeTransitionJointImplicit, - } - expCs := pb.ConfState{ - Voters: []uint64{1}, VotersOutgoing: []uint64{1}, Learners: []uint64{2}, - AutoLeave: true, - } - exp2Cs := pb.ConfState{Voters: []uint64{1}, Learners: []uint64{2}} - - s := newTestMemoryStorage(withPeers(1)) - rawNode, err := NewRawNode(newTestConfig(1, 10, 1, s)) - if err != nil { - t.Fatal(err) - } - - rawNode.Campaign() - proposed := false - var ( - lastIndex uint64 - ccdata []byte - ) - // Propose the ConfChange, wait until it applies, save the resulting - // ConfState. - var cs *pb.ConfState - for cs == nil { - rd := rawNode.Ready() - s.Append(rd.Entries) - for _, ent := range rd.CommittedEntries { - var cc pb.ConfChangeI - if ent.Type == pb.EntryConfChangeV2 { - var ccc pb.ConfChangeV2 - if err = ccc.Unmarshal(ent.Data); err != nil { - t.Fatal(err) - } - cc = &ccc - } - if cc != nil { - // Force it step down. - rawNode.Step(pb.Message{Type: pb.MsgHeartbeatResp, From: 1, Term: rawNode.raft.Term + 1}) - cs = rawNode.ApplyConfChange(cc) - } - } - rawNode.Advance(rd) - // Once we are the leader, propose a command and a ConfChange. - if !proposed && rd.SoftState.Lead == rawNode.raft.id { - if err = rawNode.Propose([]byte("somedata")); err != nil { - t.Fatal(err) - } - ccdata, err = testCc.Marshal() - if err != nil { - t.Fatal(err) - } - rawNode.ProposeConfChange(testCc) - proposed = true - } - } - - // Check that the last index is exactly the conf change we put in, - // down to the bits. Note that this comes from the Storage, which - // will not reflect any unstable entries that we'll only be presented - // with in the next Ready. - lastIndex, err = s.LastIndex() - if err != nil { - t.Fatal(err) - } - - entries, err := s.Entries(lastIndex-1, lastIndex+1, noLimit) - if err != nil { - t.Fatal(err) - } - if len(entries) != 2 { - t.Fatalf("len(entries) = %d, want %d", len(entries), 2) - } - if !bytes.Equal(entries[0].Data, []byte("somedata")) { - t.Errorf("entries[0].Data = %v, want %v", entries[0].Data, []byte("somedata")) - } - if entries[1].Type != pb.EntryConfChangeV2 { - t.Fatalf("type = %v, want %v", entries[1].Type, pb.EntryConfChangeV2) - } - if !bytes.Equal(entries[1].Data, ccdata) { - t.Errorf("data = %v, want %v", entries[1].Data, ccdata) - } - - if !reflect.DeepEqual(&expCs, cs) { - t.Fatalf("exp:\n%+v\nact:\n%+v", expCs, cs) - } - - if rawNode.raft.pendingConfIndex != 0 { - t.Fatalf("pendingConfIndex: expected %d, got %d", 0, rawNode.raft.pendingConfIndex) - } - - // Move the RawNode along. It should not leave joint because it's follower. - rd := rawNode.readyWithoutAccept() - // Check that the right ConfChange comes out. - if len(rd.Entries) != 0 { - t.Fatalf("expected zero entry, got %+v", rd) - } - - // Make it leader again. It should leave joint automatically after moving apply index. - rawNode.Campaign() - rd = rawNode.Ready() - t.Log(DescribeReady(rd, nil)) - s.Append(rd.Entries) - rawNode.Advance(rd) - rd = rawNode.Ready() - t.Log(DescribeReady(rd, nil)) - s.Append(rd.Entries) - rawNode.Advance(rd) - rd = rawNode.Ready() - t.Log(DescribeReady(rd, nil)) - s.Append(rd.Entries) - // Check that the right ConfChange comes out. - if len(rd.Entries) != 1 || rd.Entries[0].Type != pb.EntryConfChangeV2 { - t.Fatalf("expected exactly one more entry, got %+v", rd) - } - var cc pb.ConfChangeV2 - if err := cc.Unmarshal(rd.Entries[0].Data); err != nil { - t.Fatal(err) - } - if !reflect.DeepEqual(cc, pb.ConfChangeV2{Context: nil}) { - t.Fatalf("expected zero ConfChangeV2, got %+v", cc) - } - // Lie and pretend the ConfChange applied. It won't do so because now - // we require the joint quorum and we're only running one node. - cs = rawNode.ApplyConfChange(cc) - if exp := exp2Cs; !reflect.DeepEqual(&exp, cs) { - t.Fatalf("exp:\n%+v\nact:\n%+v", exp, cs) - } -} - -// TestRawNodeProposeAddDuplicateNode ensures that two proposes to add the same node should -// not affect the later propose to add new node. -func TestRawNodeProposeAddDuplicateNode(t *testing.T) { - s := newTestMemoryStorage(withPeers(1)) - rawNode, err := NewRawNode(newTestConfig(1, 10, 1, s)) - if err != nil { - t.Fatal(err) - } - rd := rawNode.Ready() - s.Append(rd.Entries) - rawNode.Advance(rd) - - rawNode.Campaign() - for { - rd = rawNode.Ready() - s.Append(rd.Entries) - if rd.SoftState.Lead == rawNode.raft.id { - rawNode.Advance(rd) - break - } - rawNode.Advance(rd) - } - - proposeConfChangeAndApply := func(cc pb.ConfChange) { - rawNode.ProposeConfChange(cc) - rd = rawNode.Ready() - s.Append(rd.Entries) - for _, entry := range rd.CommittedEntries { - if entry.Type == pb.EntryConfChange { - var cc pb.ConfChange - cc.Unmarshal(entry.Data) - rawNode.ApplyConfChange(cc) - } - } - rawNode.Advance(rd) - } - - cc1 := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: 1} - ccdata1, err := cc1.Marshal() - if err != nil { - t.Fatal(err) - } - proposeConfChangeAndApply(cc1) - - // try to add the same node again - proposeConfChangeAndApply(cc1) - - // the new node join should be ok - cc2 := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: 2} - ccdata2, err := cc2.Marshal() - if err != nil { - t.Fatal(err) - } - proposeConfChangeAndApply(cc2) - - lastIndex, err := s.LastIndex() - if err != nil { - t.Fatal(err) - } - - // the last three entries should be: ConfChange cc1, cc1, cc2 - entries, err := s.Entries(lastIndex-2, lastIndex+1, noLimit) - if err != nil { - t.Fatal(err) - } - if len(entries) != 3 { - t.Fatalf("len(entries) = %d, want %d", len(entries), 3) - } - if !bytes.Equal(entries[0].Data, ccdata1) { - t.Errorf("entries[0].Data = %v, want %v", entries[0].Data, ccdata1) - } - if !bytes.Equal(entries[2].Data, ccdata2) { - t.Errorf("entries[2].Data = %v, want %v", entries[2].Data, ccdata2) - } -} - -// TestRawNodeReadIndex ensures that Rawnode.ReadIndex sends the MsgReadIndex message -// to the underlying raft. It also ensures that ReadState can be read out. -func TestRawNodeReadIndex(t *testing.T) { - var msgs []pb.Message - appendStep := func(r *raft, m pb.Message) error { - msgs = append(msgs, m) - return nil - } - wrs := []ReadState{{Index: uint64(1), RequestCtx: []byte("somedata")}} - - s := newTestMemoryStorage(withPeers(1)) - c := newTestConfig(1, 10, 1, s) - rawNode, err := NewRawNode(c) - if err != nil { - t.Fatal(err) - } - rawNode.raft.readStates = wrs - // ensure the ReadStates can be read out - hasReady := rawNode.HasReady() - if !hasReady { - t.Errorf("HasReady() returns %t, want %t", hasReady, true) - } - rd := rawNode.Ready() - if !reflect.DeepEqual(rd.ReadStates, wrs) { - t.Errorf("ReadStates = %d, want %d", rd.ReadStates, wrs) - } - s.Append(rd.Entries) - rawNode.Advance(rd) - // ensure raft.readStates is reset after advance - if rawNode.raft.readStates != nil { - t.Errorf("readStates = %v, want %v", rawNode.raft.readStates, nil) - } - - wrequestCtx := []byte("somedata2") - rawNode.Campaign() - for { - rd = rawNode.Ready() - s.Append(rd.Entries) - - if rd.SoftState.Lead == rawNode.raft.id { - rawNode.Advance(rd) - - // Once we are the leader, issue a ReadIndex request - rawNode.raft.step = appendStep - rawNode.ReadIndex(wrequestCtx) - break - } - rawNode.Advance(rd) - } - // ensure that MsgReadIndex message is sent to the underlying raft - if len(msgs) != 1 { - t.Fatalf("len(msgs) = %d, want %d", len(msgs), 1) - } - if msgs[0].Type != pb.MsgReadIndex { - t.Errorf("msg type = %d, want %d", msgs[0].Type, pb.MsgReadIndex) - } - if !bytes.Equal(msgs[0].Entries[0].Data, wrequestCtx) { - t.Errorf("data = %v, want %v", msgs[0].Entries[0].Data, wrequestCtx) - } -} - -// TestBlockProposal from node_test.go has no equivalent in rawNode because there is -// no leader check in RawNode. - -// TestNodeTick from node_test.go has no equivalent in rawNode because -// it reaches into the raft object which is not exposed. - -// TestNodeStop from node_test.go has no equivalent in rawNode because there is -// no goroutine in RawNode. - -// TestRawNodeStart ensures that a node can be started correctly. Note that RawNode -// requires the application to bootstrap the state, i.e. it does not accept peers -// and will not create faux configuration change entries. -func TestRawNodeStart(t *testing.T) { - entries := []pb.Entry{ - {Term: 1, Index: 2, Data: nil}, // empty entry - {Term: 1, Index: 3, Data: []byte("foo")}, // empty entry - } - want := Ready{ - SoftState: &SoftState{Lead: 1, RaftState: StateLeader}, - HardState: pb.HardState{Term: 1, Commit: 3, Vote: 1}, - Entries: nil, // emitted & checked in intermediate Ready cycle - CommittedEntries: entries, - MustSync: false, // since we're only applying, not appending - } - - storage := NewMemoryStorage() - storage.ents[0].Index = 1 - - // TODO(tbg): this is a first prototype of what bootstrapping could look - // like (without the annoying faux ConfChanges). We want to persist a - // ConfState at some index and make sure that this index can't be reached - // from log position 1, so that followers are forced to pick up the - // ConfState in order to move away from log position 1 (unless they got - // bootstrapped in the same way already). Failing to do so would mean that - // followers diverge from the bootstrapped nodes and don't learn about the - // initial config. - // - // NB: this is exactly what CockroachDB does. The Raft log really begins at - // index 10, so empty followers (at index 1) always need a snapshot first. - type appenderStorage interface { - Storage - ApplySnapshot(pb.Snapshot) error - } - bootstrap := func(storage appenderStorage, cs pb.ConfState) error { - if len(cs.Voters) == 0 { - return fmt.Errorf("no voters specified") - } - fi, err := storage.FirstIndex() - if err != nil { - return err - } - if fi < 2 { - return fmt.Errorf("FirstIndex >= 2 is prerequisite for bootstrap") - } - if _, err = storage.Entries(fi, fi, math.MaxUint64); err == nil { - // TODO(tbg): match exact error - return fmt.Errorf("should not have been able to load first index") - } - li, err := storage.LastIndex() - if err != nil { - return err - } - if _, err = storage.Entries(li, li, math.MaxUint64); err == nil { - return fmt.Errorf("should not have been able to load last index") - } - hs, ics, err := storage.InitialState() - if err != nil { - return err - } - if !IsEmptyHardState(hs) { - return fmt.Errorf("HardState not empty") - } - if len(ics.Voters) != 0 { - return fmt.Errorf("ConfState not empty") - } - - meta := pb.SnapshotMetadata{ - Index: 1, - Term: 0, - ConfState: cs, - } - snap := pb.Snapshot{Metadata: meta} - return storage.ApplySnapshot(snap) - } - - if err := bootstrap(storage, pb.ConfState{Voters: []uint64{1}}); err != nil { - t.Fatal(err) - } - - rawNode, err := NewRawNode(newTestConfig(1, 10, 1, storage)) - if err != nil { - t.Fatal(err) - } - if rawNode.HasReady() { - t.Fatalf("unexpected ready: %+v", rawNode.Ready()) - } - rawNode.Campaign() - rawNode.Propose([]byte("foo")) - if !rawNode.HasReady() { - t.Fatal("expected a Ready") - } - rd := rawNode.Ready() - if !reflect.DeepEqual(entries, rd.Entries) { - t.Fatalf("expected to see entries\n%s, not\n%s", DescribeEntries(entries, nil), DescribeEntries(rd.Entries, nil)) - } - storage.Append(rd.Entries) - rawNode.Advance(rd) - - if !rawNode.HasReady() { - t.Fatal("expected a Ready") - } - rd = rawNode.Ready() - if len(rd.Entries) != 0 { - t.Fatalf("unexpected entries: %s", DescribeEntries(rd.Entries, nil)) - } - if rd.MustSync { - t.Fatalf("should not need to sync") - } - rawNode.Advance(rd) - - rd.SoftState, want.SoftState = nil, nil - - if !reflect.DeepEqual(rd, want) { - t.Fatalf("unexpected Ready:\n%+v\nvs\n%+v", rd, want) - } - - if rawNode.HasReady() { - t.Errorf("unexpected Ready: %+v", rawNode.Ready()) - } -} - -func TestRawNodeRestart(t *testing.T) { - entries := []pb.Entry{ - {Term: 1, Index: 1}, - {Term: 1, Index: 2, Data: []byte("foo")}, - } - st := pb.HardState{Term: 1, Commit: 1} - - want := Ready{ - HardState: emptyState, - // commit up to commit index in st - CommittedEntries: entries[:st.Commit], - MustSync: false, - } - - storage := newTestMemoryStorage(withPeers(1)) - storage.SetHardState(st) - storage.Append(entries) - rawNode, err := NewRawNode(newTestConfig(1, 10, 1, storage)) - if err != nil { - t.Fatal(err) - } - rd := rawNode.Ready() - if !reflect.DeepEqual(rd, want) { - t.Errorf("g = %+v,\n w %+v", rd, want) - } - rawNode.Advance(rd) - if rawNode.HasReady() { - t.Errorf("unexpected Ready: %+v", rawNode.Ready()) - } -} - -func TestRawNodeRestartFromSnapshot(t *testing.T) { - snap := pb.Snapshot{ - Metadata: pb.SnapshotMetadata{ - ConfState: pb.ConfState{Voters: []uint64{1, 2}}, - Index: 2, - Term: 1, - }, - } - entries := []pb.Entry{ - {Term: 1, Index: 3, Data: []byte("foo")}, - } - st := pb.HardState{Term: 1, Commit: 3} - - want := Ready{ - HardState: emptyState, - // commit up to commit index in st - CommittedEntries: entries, - MustSync: false, - } - - s := NewMemoryStorage() - s.SetHardState(st) - s.ApplySnapshot(snap) - s.Append(entries) - rawNode, err := NewRawNode(newTestConfig(1, 10, 1, s)) - if err != nil { - t.Fatal(err) - } - if rd := rawNode.Ready(); !reflect.DeepEqual(rd, want) { - t.Errorf("g = %+v,\n w %+v", rd, want) - } else { - rawNode.Advance(rd) - } - if rawNode.HasReady() { - t.Errorf("unexpected Ready: %+v", rawNode.HasReady()) - } -} - -// TestNodeAdvance from node_test.go has no equivalent in rawNode because there is -// no dependency check between Ready() and Advance() - -func TestRawNodeStatus(t *testing.T) { - s := newTestMemoryStorage(withPeers(1)) - rn, err := NewRawNode(newTestConfig(1, 10, 1, s)) - if err != nil { - t.Fatal(err) - } - if status := rn.Status(); status.Progress != nil { - t.Fatalf("expected no Progress because not leader: %+v", status.Progress) - } - if err := rn.Campaign(); err != nil { - t.Fatal(err) - } - status := rn.Status() - if status.Lead != 1 { - t.Fatal("not lead") - } - if status.RaftState != StateLeader { - t.Fatal("not leader") - } - if exp, act := *rn.raft.prs.Progress[1], status.Progress[1]; !reflect.DeepEqual(exp, act) { - t.Fatalf("want: %+v\ngot: %+v", exp, act) - } - expCfg := tracker.Config{Voters: quorum.JointConfig{ - quorum.MajorityConfig{1: {}}, - nil, - }} - if !reflect.DeepEqual(expCfg, status.Config) { - t.Fatalf("want: %+v\ngot: %+v", expCfg, status.Config) - } -} - -// TestRawNodeCommitPaginationAfterRestart is the RawNode version of -// TestNodeCommitPaginationAfterRestart. The anomaly here was even worse as the -// Raft group would forget to apply entries: -// -// - node learns that index 11 is committed -// - nextCommittedEnts returns index 1..10 in CommittedEntries (but index 10 -// already exceeds maxBytes), which isn't noticed internally by Raft -// - Commit index gets bumped to 10 -// - the node persists the HardState, but crashes before applying the entries -// - upon restart, the storage returns the same entries, but `slice` takes a -// different code path and removes the last entry. -// - Raft does not emit a HardState, but when the app calls Advance(), it bumps -// its internal applied index cursor to 10 (when it should be 9) -// - the next Ready asks the app to apply index 11 (omitting index 10), losing a -// write. -func TestRawNodeCommitPaginationAfterRestart(t *testing.T) { - s := &ignoreSizeHintMemStorage{ - MemoryStorage: newTestMemoryStorage(withPeers(1)), - } - persistedHardState := pb.HardState{ - Term: 1, - Vote: 1, - Commit: 10, - } - - s.hardState = persistedHardState - s.ents = make([]pb.Entry, 10) - var size uint64 - for i := range s.ents { - ent := pb.Entry{ - Term: 1, - Index: uint64(i + 1), - Type: pb.EntryNormal, - Data: []byte("a"), - } - - s.ents[i] = ent - size += uint64(ent.Size()) - } - - cfg := newTestConfig(1, 10, 1, s) - // Set a MaxSizePerMsg that would suggest to Raft that the last committed entry should - // not be included in the initial rd.CommittedEntries. However, our storage will ignore - // this and *will* return it (which is how the Commit index ended up being 10 initially). - cfg.MaxSizePerMsg = size - uint64(s.ents[len(s.ents)-1].Size()) - 1 - - s.ents = append(s.ents, pb.Entry{ - Term: 1, - Index: uint64(11), - Type: pb.EntryNormal, - Data: []byte("boom"), - }) - - rawNode, err := NewRawNode(cfg) - if err != nil { - t.Fatal(err) - } - - for highestApplied := uint64(0); highestApplied != 11; { - rd := rawNode.Ready() - n := len(rd.CommittedEntries) - if n == 0 { - t.Fatalf("stopped applying entries at index %d", highestApplied) - } - if next := rd.CommittedEntries[0].Index; highestApplied != 0 && highestApplied+1 != next { - t.Fatalf("attempting to apply index %d after index %d, leaving a gap", next, highestApplied) - } - highestApplied = rd.CommittedEntries[n-1].Index - rawNode.Advance(rd) - rawNode.Step(pb.Message{ - Type: pb.MsgHeartbeat, - To: 1, - From: 2, // illegal, but we get away with it - Term: 1, - Commit: 11, - }) - } -} - -// TestRawNodeBoundedLogGrowthWithPartition tests a scenario where a leader is -// partitioned from a quorum of nodes. It verifies that the leader's log is -// protected from unbounded growth even as new entries continue to be proposed. -// This protection is provided by the MaxUncommittedEntriesSize configuration. -func TestRawNodeBoundedLogGrowthWithPartition(t *testing.T) { - const maxEntries = 16 - data := []byte("testdata") - testEntry := pb.Entry{Data: data} - maxEntrySize := uint64(maxEntries * PayloadSize(testEntry)) - t.Log("maxEntrySize", maxEntrySize) - - s := newTestMemoryStorage(withPeers(1)) - cfg := newTestConfig(1, 10, 1, s) - cfg.MaxUncommittedEntriesSize = maxEntrySize - rawNode, err := NewRawNode(cfg) - if err != nil { - t.Fatal(err) - } - - // Become the leader and apply empty entry. - rawNode.Campaign() - for { - rd := rawNode.Ready() - s.Append(rd.Entries) - rawNode.Advance(rd) - if len(rd.CommittedEntries) > 0 { - break - } - } - - // Simulate a network partition while we make our proposals by never - // committing anything. These proposals should not cause the leader's - // log to grow indefinitely. - for i := 0; i < 1024; i++ { - rawNode.Propose(data) - } - - // Check the size of leader's uncommitted log tail. It should not exceed the - // MaxUncommittedEntriesSize limit. - checkUncommitted := func(exp uint64) { - t.Helper() - if a := rawNode.raft.uncommittedSize; exp != a { - t.Fatalf("expected %d uncommitted entry bytes, found %d", exp, a) - } - } - checkUncommitted(maxEntrySize) - - // Recover from the partition. The uncommitted tail of the Raft log should - // disappear as entries are committed. - rd := rawNode.Ready() - if len(rd.Entries) != maxEntries { - t.Fatalf("expected %d entries, got %d", maxEntries, len(rd.Entries)) - } - s.Append(rd.Entries) - rawNode.Advance(rd) - - // Entries are appended, but not applied. - checkUncommitted(maxEntrySize) - - rd = rawNode.Ready() - if len(rd.Entries) != 0 { - t.Fatalf("unexpected entries: %s", DescribeEntries(rd.Entries, nil)) - } - if len(rd.CommittedEntries) != maxEntries { - t.Fatalf("expected %d entries, got %d", maxEntries, len(rd.CommittedEntries)) - } - rawNode.Advance(rd) - - checkUncommitted(0) -} - -func BenchmarkStatus(b *testing.B) { - setup := func(members int) *RawNode { - peers := make([]uint64, members) - for i := range peers { - peers[i] = uint64(i + 1) - } - cfg := newTestConfig(1, 3, 1, newTestMemoryStorage(withPeers(peers...))) - cfg.Logger = discardLogger - r := newRaft(cfg) - r.becomeFollower(1, 1) - r.becomeCandidate() - r.becomeLeader() - return &RawNode{raft: r} - } - - for _, members := range []int{1, 3, 5, 100} { - b.Run(fmt.Sprintf("members=%d", members), func(b *testing.B) { - rn := setup(members) - - b.Run("Status", func(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - _ = rn.Status() - } - }) - - b.Run("Status-example", func(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - s := rn.Status() - var n uint64 - for _, pr := range s.Progress { - n += pr.Match - } - _ = n - } - }) - - b.Run("BasicStatus", func(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - _ = rn.BasicStatus() - } - }) - - b.Run("WithProgress", func(b *testing.B) { - b.ReportAllocs() - visit := func(uint64, ProgressType, tracker.Progress) {} - - for i := 0; i < b.N; i++ { - rn.WithProgress(visit) - } - }) - b.Run("WithProgress-example", func(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - var n uint64 - visit := func(_ uint64, _ ProgressType, pr tracker.Progress) { - n += pr.Match - } - rn.WithProgress(visit) - _ = n - } - }) - }) - } -} - -func TestRawNodeConsumeReady(t *testing.T) { - // Check that readyWithoutAccept() does not call acceptReady (which resets - // the messages) but Ready() does. - s := newTestMemoryStorage(withPeers(1)) - rn := newTestRawNode(1, 3, 1, s) - m1 := pb.Message{Context: []byte("foo")} - m2 := pb.Message{Context: []byte("bar")} - - // Inject first message, make sure it's visible via readyWithoutAccept. - rn.raft.msgs = append(rn.raft.msgs, m1) - rd := rn.readyWithoutAccept() - if len(rd.Messages) != 1 || !reflect.DeepEqual(rd.Messages[0], m1) { - t.Fatalf("expected only m1 sent, got %+v", rd.Messages) - } - if len(rn.raft.msgs) != 1 || !reflect.DeepEqual(rn.raft.msgs[0], m1) { - t.Fatalf("expected only m1 in raft.msgs, got %+v", rn.raft.msgs) - } - // Now call Ready() which should move the message into the Ready (as opposed - // to leaving it in both places). - rd = rn.Ready() - if len(rn.raft.msgs) > 0 { - t.Fatalf("messages not reset: %+v", rn.raft.msgs) - } - if len(rd.Messages) != 1 || !reflect.DeepEqual(rd.Messages[0], m1) { - t.Fatalf("expected only m1 sent, got %+v", rd.Messages) - } - // Add a message to raft to make sure that Advance() doesn't drop it. - rn.raft.msgs = append(rn.raft.msgs, m2) - rn.Advance(rd) - if len(rn.raft.msgs) != 1 || !reflect.DeepEqual(rn.raft.msgs[0], m2) { - t.Fatalf("expected only m2 in raft.msgs, got %+v", rn.raft.msgs) - } -} - -func BenchmarkRawNode(b *testing.B) { - cases := []struct { - name string - peers []uint64 - }{ - { - name: "single-voter", - peers: []uint64{1}, - }, - { - name: "two-voters", - peers: []uint64{1, 2}, - }, - // You can easily add more cases here. - } - - for _, tc := range cases { - b.Run(tc.name, func(b *testing.B) { - benchmarkRawNodeImpl(b, tc.peers...) - }) - } -} - -func benchmarkRawNodeImpl(b *testing.B, peers ...uint64) { - - const debug = false - - s := newTestMemoryStorage(withPeers(peers...)) - cfg := newTestConfig(1, 10, 1, s) - if !debug { - cfg.Logger = discardLogger // avoid distorting benchmark output - } - rn, err := NewRawNode(cfg) - if err != nil { - b.Fatal(err) - } - - run := make(chan struct{}, 1) - defer close(run) - - var numReady uint64 - stabilize := func() (applied uint64) { - for rn.HasReady() { - numReady++ - rd := rn.Ready() - if debug { - b.Log(DescribeReady(rd, nil)) - } - if n := len(rd.CommittedEntries); n > 0 { - applied = rd.CommittedEntries[n-1].Index - } - s.Append(rd.Entries) - for _, m := range rd.Messages { - if m.Type == pb.MsgVote { - resp := pb.Message{To: m.From, From: m.To, Term: m.Term, Type: pb.MsgVoteResp} - if debug { - b.Log(DescribeMessage(resp, nil)) - } - rn.Step(resp) - } - if m.Type == pb.MsgApp { - idx := m.Index - if n := len(m.Entries); n > 0 { - idx = m.Entries[n-1].Index - } - resp := pb.Message{To: m.From, From: m.To, Type: pb.MsgAppResp, Term: m.Term, Index: idx} - if debug { - b.Log(DescribeMessage(resp, nil)) - } - rn.Step(resp) - } - } - rn.Advance(rd) - } - return applied - } - - rn.Campaign() - stabilize() - - if debug { - b.N = 1 - } - - var applied uint64 - for i := 0; i < b.N; i++ { - if err := rn.Propose([]byte("foo")); err != nil { - b.Fatal(err) - } - applied = stabilize() - } - if applied < uint64(b.N) { - b.Fatalf("did not apply everything: %d < %d", applied, b.N) - } - b.ReportMetric(float64(s.callStats.firstIndex)/float64(b.N), "firstIndex/op") - b.ReportMetric(float64(s.callStats.lastIndex)/float64(b.N), "lastIndex/op") - b.ReportMetric(float64(s.callStats.term)/float64(b.N), "term/op") - b.ReportMetric(float64(numReady)/float64(b.N), "ready/op") - b.Logf("storage access stats: %+v", s.callStats) -} diff --git a/raft/read_only.go b/raft/read_only.go deleted file mode 100644 index 656414894831..000000000000 --- a/raft/read_only.go +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2016 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import pb "go.etcd.io/etcd/raft/v3/raftpb" - -// ReadState provides state for read only query. -// It's caller's responsibility to call ReadIndex first before getting -// this state from ready, it's also caller's duty to differentiate if this -// state is what it requests through RequestCtx, eg. given a unique id as -// RequestCtx -type ReadState struct { - Index uint64 - RequestCtx []byte -} - -type readIndexStatus struct { - req pb.Message - index uint64 - // NB: this never records 'false', but it's more convenient to use this - // instead of a map[uint64]struct{} due to the API of quorum.VoteResult. If - // this becomes performance sensitive enough (doubtful), quorum.VoteResult - // can change to an API that is closer to that of CommittedIndex. - acks map[uint64]bool -} - -type readOnly struct { - option ReadOnlyOption - pendingReadIndex map[string]*readIndexStatus - readIndexQueue []string -} - -func newReadOnly(option ReadOnlyOption) *readOnly { - return &readOnly{ - option: option, - pendingReadIndex: make(map[string]*readIndexStatus), - } -} - -// addRequest adds a read only request into readonly struct. -// `index` is the commit index of the raft state machine when it received -// the read only request. -// `m` is the original read only request message from the local or remote node. -func (ro *readOnly) addRequest(index uint64, m pb.Message) { - s := string(m.Entries[0].Data) - if _, ok := ro.pendingReadIndex[s]; ok { - return - } - ro.pendingReadIndex[s] = &readIndexStatus{index: index, req: m, acks: make(map[uint64]bool)} - ro.readIndexQueue = append(ro.readIndexQueue, s) -} - -// recvAck notifies the readonly struct that the raft state machine received -// an acknowledgment of the heartbeat that attached with the read only request -// context. -func (ro *readOnly) recvAck(id uint64, context []byte) map[uint64]bool { - rs, ok := ro.pendingReadIndex[string(context)] - if !ok { - return nil - } - - rs.acks[id] = true - return rs.acks -} - -// advance advances the read only request queue kept by the readonly struct. -// It dequeues the requests until it finds the read only request that has -// the same context as the given `m`. -func (ro *readOnly) advance(m pb.Message) []*readIndexStatus { - var ( - i int - found bool - ) - - ctx := string(m.Context) - var rss []*readIndexStatus - - for _, okctx := range ro.readIndexQueue { - i++ - rs, ok := ro.pendingReadIndex[okctx] - if !ok { - panic("cannot find corresponding read state from pending map") - } - rss = append(rss, rs) - if okctx == ctx { - found = true - break - } - } - - if found { - ro.readIndexQueue = ro.readIndexQueue[i:] - for _, rs := range rss { - delete(ro.pendingReadIndex, string(rs.req.Entries[0].Data)) - } - return rss - } - - return nil -} - -// lastPendingRequestCtx returns the context of the last pending read only -// request in readonly struct. -func (ro *readOnly) lastPendingRequestCtx() string { - if len(ro.readIndexQueue) == 0 { - return "" - } - return ro.readIndexQueue[len(ro.readIndexQueue)-1] -} diff --git a/raft/status.go b/raft/status.go deleted file mode 100644 index acfb56c3915d..000000000000 --- a/raft/status.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "fmt" - - pb "go.etcd.io/etcd/raft/v3/raftpb" - "go.etcd.io/etcd/raft/v3/tracker" -) - -// Status contains information about this Raft peer and its view of the system. -// The Progress is only populated on the leader. -type Status struct { - BasicStatus - Config tracker.Config - Progress map[uint64]tracker.Progress -} - -// BasicStatus contains basic information about the Raft peer. It does not allocate. -type BasicStatus struct { - ID uint64 - - pb.HardState - SoftState - - Applied uint64 - - LeadTransferee uint64 -} - -func getProgressCopy(r *raft) map[uint64]tracker.Progress { - m := make(map[uint64]tracker.Progress) - r.prs.Visit(func(id uint64, pr *tracker.Progress) { - p := *pr - p.Inflights = pr.Inflights.Clone() - pr = nil - - m[id] = p - }) - return m -} - -func getBasicStatus(r *raft) BasicStatus { - s := BasicStatus{ - ID: r.id, - LeadTransferee: r.leadTransferee, - } - s.HardState = r.hardState() - s.SoftState = *r.softState() - s.Applied = r.raftLog.applied - return s -} - -// getStatus gets a copy of the current raft status. -func getStatus(r *raft) Status { - var s Status - s.BasicStatus = getBasicStatus(r) - if s.RaftState == StateLeader { - s.Progress = getProgressCopy(r) - } - s.Config = r.prs.Config.Clone() - return s -} - -// MarshalJSON translates the raft status into JSON. -// TODO: try to simplify this by introducing ID type into raft -func (s Status) MarshalJSON() ([]byte, error) { - j := fmt.Sprintf(`{"id":"%x","term":%d,"vote":"%x","commit":%d,"lead":"%x","raftState":%q,"applied":%d,"progress":{`, - s.ID, s.Term, s.Vote, s.Commit, s.Lead, s.RaftState, s.Applied) - - if len(s.Progress) == 0 { - j += "}," - } else { - for k, v := range s.Progress { - subj := fmt.Sprintf(`"%x":{"match":%d,"next":%d,"state":%q},`, k, v.Match, v.Next, v.State) - j += subj - } - // remove the trailing "," - j = j[:len(j)-1] + "}," - } - - j += fmt.Sprintf(`"leadtransferee":"%x"}`, s.LeadTransferee) - return []byte(j), nil -} - -func (s Status) String() string { - b, err := s.MarshalJSON() - if err != nil { - getLogger().Panicf("unexpected error: %v", err) - } - return string(b) -} diff --git a/raft/storage.go b/raft/storage.go deleted file mode 100644 index 67ec16b13aa9..000000000000 --- a/raft/storage.go +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "errors" - "sync" - - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -// ErrCompacted is returned by Storage.Entries/Compact when a requested -// index is unavailable because it predates the last snapshot. -var ErrCompacted = errors.New("requested index is unavailable due to compaction") - -// ErrSnapOutOfDate is returned by Storage.CreateSnapshot when a requested -// index is older than the existing snapshot. -var ErrSnapOutOfDate = errors.New("requested index is older than the existing snapshot") - -// ErrUnavailable is returned by Storage interface when the requested log entries -// are unavailable. -var ErrUnavailable = errors.New("requested entry at index is unavailable") - -// ErrSnapshotTemporarilyUnavailable is returned by the Storage interface when the required -// snapshot is temporarily unavailable. -var ErrSnapshotTemporarilyUnavailable = errors.New("snapshot is temporarily unavailable") - -// Storage is an interface that may be implemented by the application -// to retrieve log entries from storage. -// -// If any Storage method returns an error, the raft instance will -// become inoperable and refuse to participate in elections; the -// application is responsible for cleanup and recovery in this case. -type Storage interface { - // TODO(tbg): split this into two interfaces, LogStorage and StateStorage. - - // InitialState returns the saved HardState and ConfState information. - InitialState() (pb.HardState, pb.ConfState, error) - // Entries returns a slice of log entries in the range [lo,hi). - // MaxSize limits the total size of the log entries returned, but - // Entries returns at least one entry if any. - Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) - // Term returns the term of entry i, which must be in the range - // [FirstIndex()-1, LastIndex()]. The term of the entry before - // FirstIndex is retained for matching purposes even though the - // rest of that entry may not be available. - Term(i uint64) (uint64, error) - // LastIndex returns the index of the last entry in the log. - LastIndex() (uint64, error) - // FirstIndex returns the index of the first log entry that is - // possibly available via Entries (older entries have been incorporated - // into the latest Snapshot; if storage only contains the dummy entry the - // first log entry is not available). - FirstIndex() (uint64, error) - // Snapshot returns the most recent snapshot. - // If snapshot is temporarily unavailable, it should return ErrSnapshotTemporarilyUnavailable, - // so raft state machine could know that Storage needs some time to prepare - // snapshot and call Snapshot later. - Snapshot() (pb.Snapshot, error) -} - -type inMemStorageCallStats struct { - initialState, firstIndex, lastIndex, entries, term, snapshot int -} - -// MemoryStorage implements the Storage interface backed by an -// in-memory array. -type MemoryStorage struct { - // Protects access to all fields. Most methods of MemoryStorage are - // run on the raft goroutine, but Append() is run on an application - // goroutine. - sync.Mutex - - hardState pb.HardState - snapshot pb.Snapshot - // ents[i] has raft log position i+snapshot.Metadata.Index - ents []pb.Entry - - callStats inMemStorageCallStats -} - -// NewMemoryStorage creates an empty MemoryStorage. -func NewMemoryStorage() *MemoryStorage { - return &MemoryStorage{ - // When starting from scratch populate the list with a dummy entry at term zero. - ents: make([]pb.Entry, 1), - } -} - -// InitialState implements the Storage interface. -func (ms *MemoryStorage) InitialState() (pb.HardState, pb.ConfState, error) { - ms.callStats.initialState++ - return ms.hardState, ms.snapshot.Metadata.ConfState, nil -} - -// SetHardState saves the current HardState. -func (ms *MemoryStorage) SetHardState(st pb.HardState) error { - ms.Lock() - defer ms.Unlock() - ms.hardState = st - return nil -} - -// Entries implements the Storage interface. -func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) { - ms.Lock() - defer ms.Unlock() - ms.callStats.entries++ - offset := ms.ents[0].Index - if lo <= offset { - return nil, ErrCompacted - } - if hi > ms.lastIndex()+1 { - getLogger().Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, ms.lastIndex()) - } - // only contains dummy entries. - if len(ms.ents) == 1 { - return nil, ErrUnavailable - } - - ents := ms.ents[lo-offset : hi-offset] - return limitSize(ents, maxSize), nil -} - -// Term implements the Storage interface. -func (ms *MemoryStorage) Term(i uint64) (uint64, error) { - ms.Lock() - defer ms.Unlock() - ms.callStats.term++ - offset := ms.ents[0].Index - if i < offset { - return 0, ErrCompacted - } - if int(i-offset) >= len(ms.ents) { - return 0, ErrUnavailable - } - return ms.ents[i-offset].Term, nil -} - -// LastIndex implements the Storage interface. -func (ms *MemoryStorage) LastIndex() (uint64, error) { - ms.Lock() - defer ms.Unlock() - ms.callStats.lastIndex++ - return ms.lastIndex(), nil -} - -func (ms *MemoryStorage) lastIndex() uint64 { - return ms.ents[0].Index + uint64(len(ms.ents)) - 1 -} - -// FirstIndex implements the Storage interface. -func (ms *MemoryStorage) FirstIndex() (uint64, error) { - ms.Lock() - defer ms.Unlock() - ms.callStats.firstIndex++ - return ms.firstIndex(), nil -} - -func (ms *MemoryStorage) firstIndex() uint64 { - return ms.ents[0].Index + 1 -} - -// Snapshot implements the Storage interface. -func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) { - ms.Lock() - defer ms.Unlock() - ms.callStats.snapshot++ - return ms.snapshot, nil -} - -// ApplySnapshot overwrites the contents of this Storage object with -// those of the given snapshot. -func (ms *MemoryStorage) ApplySnapshot(snap pb.Snapshot) error { - ms.Lock() - defer ms.Unlock() - - //handle check for old snapshot being applied - msIndex := ms.snapshot.Metadata.Index - snapIndex := snap.Metadata.Index - if msIndex >= snapIndex { - return ErrSnapOutOfDate - } - - ms.snapshot = snap - ms.ents = []pb.Entry{{Term: snap.Metadata.Term, Index: snap.Metadata.Index}} - return nil -} - -// CreateSnapshot makes a snapshot which can be retrieved with Snapshot() and -// can be used to reconstruct the state at that point. -// If any configuration changes have been made since the last compaction, -// the result of the last ApplyConfChange must be passed in. -func (ms *MemoryStorage) CreateSnapshot(i uint64, cs *pb.ConfState, data []byte) (pb.Snapshot, error) { - ms.Lock() - defer ms.Unlock() - if i <= ms.snapshot.Metadata.Index { - return pb.Snapshot{}, ErrSnapOutOfDate - } - - offset := ms.ents[0].Index - if i > ms.lastIndex() { - getLogger().Panicf("snapshot %d is out of bound lastindex(%d)", i, ms.lastIndex()) - } - - ms.snapshot.Metadata.Index = i - ms.snapshot.Metadata.Term = ms.ents[i-offset].Term - if cs != nil { - ms.snapshot.Metadata.ConfState = *cs - } - ms.snapshot.Data = data - return ms.snapshot, nil -} - -// Compact discards all log entries prior to compactIndex. -// It is the application's responsibility to not attempt to compact an index -// greater than raftLog.applied. -func (ms *MemoryStorage) Compact(compactIndex uint64) error { - ms.Lock() - defer ms.Unlock() - offset := ms.ents[0].Index - if compactIndex <= offset { - return ErrCompacted - } - if compactIndex > ms.lastIndex() { - getLogger().Panicf("compact %d is out of bound lastindex(%d)", compactIndex, ms.lastIndex()) - } - - i := compactIndex - offset - ents := make([]pb.Entry, 1, 1+uint64(len(ms.ents))-i) - ents[0].Index = ms.ents[i].Index - ents[0].Term = ms.ents[i].Term - ents = append(ents, ms.ents[i+1:]...) - ms.ents = ents - return nil -} - -// Append the new entries to storage. -// TODO (xiangli): ensure the entries are continuous and -// entries[0].Index > ms.entries[0].Index -func (ms *MemoryStorage) Append(entries []pb.Entry) error { - if len(entries) == 0 { - return nil - } - - ms.Lock() - defer ms.Unlock() - - first := ms.firstIndex() - last := entries[0].Index + uint64(len(entries)) - 1 - - // shortcut if there is no new entry. - if last < first { - return nil - } - // truncate compacted entries - if first > entries[0].Index { - entries = entries[first-entries[0].Index:] - } - - offset := entries[0].Index - ms.ents[0].Index - switch { - case uint64(len(ms.ents)) > offset: - ms.ents = append([]pb.Entry{}, ms.ents[:offset]...) - ms.ents = append(ms.ents, entries...) - case uint64(len(ms.ents)) == offset: - ms.ents = append(ms.ents, entries...) - default: - getLogger().Panicf("missing log entry [last: %d, append at: %d]", - ms.lastIndex(), entries[0].Index) - } - return nil -} diff --git a/raft/storage_test.go b/raft/storage_test.go deleted file mode 100644 index b7b8ed4e0ea2..000000000000 --- a/raft/storage_test.go +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "math" - "testing" - - "github.com/stretchr/testify/require" - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -func TestStorageTerm(t *testing.T) { - ents := []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}} - tests := []struct { - i uint64 - - werr error - wterm uint64 - wpanic bool - }{ - {2, ErrCompacted, 0, false}, - {3, nil, 3, false}, - {4, nil, 4, false}, - {5, nil, 5, false}, - {6, ErrUnavailable, 0, false}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - s := &MemoryStorage{ents: ents} - - if tt.wpanic { - require.Panics(t, func() { - _, _ = s.Term(tt.i) - }) - } - term, err := s.Term(tt.i) - require.Equal(t, tt.werr, err) - require.Equal(t, tt.wterm, term) - }) - } -} - -func TestStorageEntries(t *testing.T) { - ents := []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}, {Index: 6, Term: 6}} - tests := []struct { - lo, hi, maxsize uint64 - - werr error - wentries []pb.Entry - }{ - {2, 6, math.MaxUint64, ErrCompacted, nil}, - {3, 4, math.MaxUint64, ErrCompacted, nil}, - {4, 5, math.MaxUint64, nil, []pb.Entry{{Index: 4, Term: 4}}}, - {4, 6, math.MaxUint64, nil, []pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 5}}}, - {4, 7, math.MaxUint64, nil, []pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 5}, {Index: 6, Term: 6}}}, - // even if maxsize is zero, the first entry should be returned - {4, 7, 0, nil, []pb.Entry{{Index: 4, Term: 4}}}, - // limit to 2 - {4, 7, uint64(ents[1].Size() + ents[2].Size()), nil, []pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 5}}}, - // limit to 2 - {4, 7, uint64(ents[1].Size() + ents[2].Size() + ents[3].Size()/2), nil, []pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 5}}}, - {4, 7, uint64(ents[1].Size() + ents[2].Size() + ents[3].Size() - 1), nil, []pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 5}}}, - // all - {4, 7, uint64(ents[1].Size() + ents[2].Size() + ents[3].Size()), nil, []pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 5}, {Index: 6, Term: 6}}}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - s := &MemoryStorage{ents: ents} - entries, err := s.Entries(tt.lo, tt.hi, tt.maxsize) - require.Equal(t, tt.werr, err) - require.Equal(t, tt.wentries, entries) - }) - } -} - -func TestStorageLastIndex(t *testing.T) { - ents := []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}} - s := &MemoryStorage{ents: ents} - - last, err := s.LastIndex() - require.NoError(t, err) - require.Equal(t, uint64(5), last) - - require.NoError(t, s.Append([]pb.Entry{{Index: 6, Term: 5}})) - last, err = s.LastIndex() - require.NoError(t, err) - require.Equal(t, uint64(6), last) -} - -func TestStorageFirstIndex(t *testing.T) { - ents := []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}} - s := &MemoryStorage{ents: ents} - - first, err := s.FirstIndex() - require.NoError(t, err) - require.Equal(t, uint64(4), first) - - require.NoError(t, s.Compact(4)) - first, err = s.FirstIndex() - require.NoError(t, err) - require.Equal(t, uint64(5), first) -} - -func TestStorageCompact(t *testing.T) { - ents := []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}} - tests := []struct { - i uint64 - - werr error - windex uint64 - wterm uint64 - wlen int - }{ - {2, ErrCompacted, 3, 3, 3}, - {3, ErrCompacted, 3, 3, 3}, - {4, nil, 4, 4, 2}, - {5, nil, 5, 5, 1}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - s := &MemoryStorage{ents: ents} - require.Equal(t, tt.werr, s.Compact(tt.i)) - require.Equal(t, tt.windex, s.ents[0].Index) - require.Equal(t, tt.wterm, s.ents[0].Term) - require.Equal(t, tt.wlen, len(s.ents)) - }) - } -} - -func TestStorageCreateSnapshot(t *testing.T) { - ents := []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}} - cs := &pb.ConfState{Voters: []uint64{1, 2, 3}} - data := []byte("data") - - tests := []struct { - i uint64 - - werr error - wsnap pb.Snapshot - }{ - {4, nil, pb.Snapshot{Data: data, Metadata: pb.SnapshotMetadata{Index: 4, Term: 4, ConfState: *cs}}}, - {5, nil, pb.Snapshot{Data: data, Metadata: pb.SnapshotMetadata{Index: 5, Term: 5, ConfState: *cs}}}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - s := &MemoryStorage{ents: ents} - snap, err := s.CreateSnapshot(tt.i, cs, data) - require.Equal(t, tt.werr, err) - require.Equal(t, tt.wsnap, snap) - }) - } -} - -func TestStorageAppend(t *testing.T) { - ents := []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}} - tests := []struct { - entries []pb.Entry - - werr error - wentries []pb.Entry - }{ - { - []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}}, - nil, - []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}}, - }, - { - []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}}, - nil, - []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}}, - }, - { - []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 6}, {Index: 5, Term: 6}}, - nil, - []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 6}, {Index: 5, Term: 6}}, - }, - { - []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}, {Index: 6, Term: 5}}, - nil, - []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}, {Index: 6, Term: 5}}, - }, - // Truncate incoming entries, truncate the existing entries and append. - { - []pb.Entry{{Index: 2, Term: 3}, {Index: 3, Term: 3}, {Index: 4, Term: 5}}, - nil, - []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 5}}, - }, - // Truncate the existing entries and append. - { - []pb.Entry{{Index: 4, Term: 5}}, - nil, - []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 5}}, - }, - // Direct append. - { - []pb.Entry{{Index: 6, Term: 5}}, - nil, - []pb.Entry{{Index: 3, Term: 3}, {Index: 4, Term: 4}, {Index: 5, Term: 5}, {Index: 6, Term: 5}}, - }, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - s := &MemoryStorage{ents: ents} - require.Equal(t, tt.werr, s.Append(tt.entries)) - require.Equal(t, tt.wentries, s.ents) - }) - } -} - -func TestStorageApplySnapshot(t *testing.T) { - cs := &pb.ConfState{Voters: []uint64{1, 2, 3}} - data := []byte("data") - - tests := []pb.Snapshot{{Data: data, Metadata: pb.SnapshotMetadata{Index: 4, Term: 4, ConfState: *cs}}, - {Data: data, Metadata: pb.SnapshotMetadata{Index: 3, Term: 3, ConfState: *cs}}, - } - - s := NewMemoryStorage() - - i := 0 - tt := tests[i] - require.NoError(t, s.ApplySnapshot(tt)) - - // ApplySnapshot fails due to ErrSnapOutOfDate. - i = 1 - tt = tests[i] - require.Equal(t, ErrSnapOutOfDate, s.ApplySnapshot(tt)) -} diff --git a/raft/testdata/campaign.txt b/raft/testdata/campaign.txt deleted file mode 100644 index c5deb2dc1825..000000000000 --- a/raft/testdata/campaign.txt +++ /dev/null @@ -1,117 +0,0 @@ -log-level info ----- -ok - -add-nodes 3 voters=(1,2,3) index=2 ----- -INFO 1 switched to configuration voters=(1 2 3) -INFO 1 became follower at term 0 -INFO newRaft 1 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] -INFO 2 switched to configuration voters=(1 2 3) -INFO 2 became follower at term 0 -INFO newRaft 2 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] -INFO 3 switched to configuration voters=(1 2 3) -INFO 3 became follower at term 0 -INFO newRaft 3 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] - -campaign 1 ----- -INFO 1 is starting a new election at term 0 -INFO 1 became candidate at term 1 -INFO 1 received MsgVoteResp from 1 at term 1 -INFO 1 [logterm: 1, index: 2] sent MsgVote request to 2 at term 1 -INFO 1 [logterm: 1, index: 2] sent MsgVote request to 3 at term 1 - -stabilize ----- -> 1 handling Ready - Ready MustSync=true: - Lead:0 State:StateCandidate - HardState Term:1 Vote:1 Commit:2 - Messages: - 1->2 MsgVote Term:1 Log:1/2 - 1->3 MsgVote Term:1 Log:1/2 -> 2 receiving messages - 1->2 MsgVote Term:1 Log:1/2 - INFO 2 [term: 0] received a MsgVote message with higher term from 1 [term: 1] - INFO 2 became follower at term 1 - INFO 2 [logterm: 1, index: 2, vote: 0] cast MsgVote for 1 [logterm: 1, index: 2] at term 1 -> 3 receiving messages - 1->3 MsgVote Term:1 Log:1/2 - INFO 3 [term: 0] received a MsgVote message with higher term from 1 [term: 1] - INFO 3 became follower at term 1 - INFO 3 [logterm: 1, index: 2, vote: 0] cast MsgVote for 1 [logterm: 1, index: 2] at term 1 -> 2 handling Ready - Ready MustSync=true: - HardState Term:1 Vote:1 Commit:2 - Messages: - 2->1 MsgVoteResp Term:1 Log:0/0 -> 3 handling Ready - Ready MustSync=true: - HardState Term:1 Vote:1 Commit:2 - Messages: - 3->1 MsgVoteResp Term:1 Log:0/0 -> 1 receiving messages - 2->1 MsgVoteResp Term:1 Log:0/0 - INFO 1 received MsgVoteResp from 2 at term 1 - INFO 1 has received 2 MsgVoteResp votes and 0 vote rejections - INFO 1 became leader at term 1 - 3->1 MsgVoteResp Term:1 Log:0/0 -> 1 handling Ready - Ready MustSync=true: - Lead:1 State:StateLeader - Entries: - 1/3 EntryNormal "" - Messages: - 1->2 MsgApp Term:1 Log:1/2 Commit:2 Entries:[1/3 EntryNormal ""] - 1->3 MsgApp Term:1 Log:1/2 Commit:2 Entries:[1/3 EntryNormal ""] -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/2 Commit:2 Entries:[1/3 EntryNormal ""] -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/2 Commit:2 Entries:[1/3 EntryNormal ""] -> 2 handling Ready - Ready MustSync=true: - Lead:1 State:StateFollower - Entries: - 1/3 EntryNormal "" - Messages: - 2->1 MsgAppResp Term:1 Log:0/3 -> 3 handling Ready - Ready MustSync=true: - Lead:1 State:StateFollower - Entries: - 1/3 EntryNormal "" - Messages: - 3->1 MsgAppResp Term:1 Log:0/3 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/3 - 3->1 MsgAppResp Term:1 Log:0/3 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:3 - CommittedEntries: - 1/3 EntryNormal "" - Messages: - 1->2 MsgApp Term:1 Log:1/3 Commit:3 - 1->3 MsgApp Term:1 Log:1/3 Commit:3 -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/3 Commit:3 -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/3 Commit:3 -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:3 - CommittedEntries: - 1/3 EntryNormal "" - Messages: - 2->1 MsgAppResp Term:1 Log:0/3 -> 3 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:3 - CommittedEntries: - 1/3 EntryNormal "" - Messages: - 3->1 MsgAppResp Term:1 Log:0/3 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/3 - 3->1 MsgAppResp Term:1 Log:0/3 diff --git a/raft/testdata/campaign_learner_must_vote.txt b/raft/testdata/campaign_learner_must_vote.txt deleted file mode 100644 index 55d42aa436e6..000000000000 --- a/raft/testdata/campaign_learner_must_vote.txt +++ /dev/null @@ -1,152 +0,0 @@ -# Regression test that verifies that learners can vote. This holds only in the -# sense that if a learner is asked to vote, a candidate believes that they are a -# voter based on its current config, which may be more recent than that of the -# learner. If learners which are actually voters but don't know it yet don't -# vote in that situation, the raft group may end up unavailable despite a quorum -# of voters (as of the latest config) being available. -# -# See: -# https://github.com/etcd-io/etcd/pull/10998 - -# Turn output off during boilerplate. -log-level none ----- -ok - -# Bootstrap three nodes. -add-nodes 3 voters=(1,2) learners=(3) index=2 ----- -ok - -# n1 gets to be leader. -campaign 1 ----- -ok - -stabilize ----- -ok (quiet) - -# Propose a conf change on n1 that promotes n3 to voter. -propose-conf-change 1 -v3 ----- -ok - -# Commit and fully apply said conf change. n1 and n2 now consider n3 a voter. -stabilize 1 2 ----- -ok (quiet) - -# Drop all inflight messages to 3. We don't want it to be caught up when it is -# asked to vote. -deliver-msgs drop=(3) ----- -ok (quiet) - -# We now pretend that n1 is dead, and n2 is trying to become leader. - -log-level debug ----- -ok - -campaign 2 ----- -INFO 2 is starting a new election at term 1 -INFO 2 became candidate at term 2 -INFO 2 received MsgVoteResp from 2 at term 2 -INFO 2 [logterm: 1, index: 4] sent MsgVote request to 1 at term 2 -INFO 2 [logterm: 1, index: 4] sent MsgVote request to 3 at term 2 - -# Send out the MsgVote requests. -process-ready 2 ----- -Ready MustSync=true: -Lead:0 State:StateCandidate -HardState Term:2 Vote:2 Commit:4 -Messages: -2->1 MsgVote Term:2 Log:1/4 -2->3 MsgVote Term:2 Log:1/4 - -# n2 is now campaigning while n1 is down (does not respond). The latest config -# has n3 as a voter, but n3 doesn't even have the corresponding conf change in -# its log. Still, it casts a vote for n2 which can in turn become leader and -# catches up n3. -stabilize 3 ----- -> 3 receiving messages - 2->3 MsgVote Term:2 Log:1/4 - INFO 3 [term: 1] received a MsgVote message with higher term from 2 [term: 2] - INFO 3 became follower at term 2 - INFO 3 [logterm: 1, index: 3, vote: 0] cast MsgVote for 2 [logterm: 1, index: 4] at term 2 -> 3 handling Ready - Ready MustSync=true: - Lead:0 State:StateFollower - HardState Term:2 Vote:2 Commit:3 - Messages: - 3->2 MsgVoteResp Term:2 Log:0/0 - -stabilize 2 3 ----- -> 2 receiving messages - 3->2 MsgVoteResp Term:2 Log:0/0 - INFO 2 received MsgVoteResp from 3 at term 2 - INFO 2 has received 2 MsgVoteResp votes and 0 vote rejections - INFO 2 became leader at term 2 -> 2 handling Ready - Ready MustSync=true: - Lead:2 State:StateLeader - Entries: - 2/5 EntryNormal "" - Messages: - 2->1 MsgApp Term:2 Log:1/4 Commit:4 Entries:[2/5 EntryNormal ""] - 2->3 MsgApp Term:2 Log:1/4 Commit:4 Entries:[2/5 EntryNormal ""] -> 3 receiving messages - 2->3 MsgApp Term:2 Log:1/4 Commit:4 Entries:[2/5 EntryNormal ""] - DEBUG 3 [logterm: 0, index: 4] rejected MsgApp [logterm: 1, index: 4] from 2 -> 3 handling Ready - Ready MustSync=false: - Lead:2 State:StateFollower - Messages: - 3->2 MsgAppResp Term:2 Log:1/4 Rejected (Hint: 3) -> 2 receiving messages - 3->2 MsgAppResp Term:2 Log:1/4 Rejected (Hint: 3) - DEBUG 2 received MsgAppResp(rejected, hint: (index 3, term 1)) from 3 for index 4 - DEBUG 2 decreased progress of 3 to [StateProbe match=0 next=4] -> 2 handling Ready - Ready MustSync=false: - Messages: - 2->3 MsgApp Term:2 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v3, 2/5 EntryNormal ""] -> 3 receiving messages - 2->3 MsgApp Term:2 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v3, 2/5 EntryNormal ""] -> 3 handling Ready - Ready MustSync=true: - HardState Term:2 Vote:2 Commit:4 - Entries: - 1/4 EntryConfChangeV2 v3 - 2/5 EntryNormal "" - CommittedEntries: - 1/4 EntryConfChangeV2 v3 - Messages: - 3->2 MsgAppResp Term:2 Log:0/5 - INFO 3 switched to configuration voters=(1 2 3) -> 2 receiving messages - 3->2 MsgAppResp Term:2 Log:0/5 -> 2 handling Ready - Ready MustSync=false: - HardState Term:2 Vote:2 Commit:5 - CommittedEntries: - 2/5 EntryNormal "" - Messages: - 2->3 MsgApp Term:2 Log:2/5 Commit:5 -> 3 receiving messages - 2->3 MsgApp Term:2 Log:2/5 Commit:5 -> 3 handling Ready - Ready MustSync=false: - HardState Term:2 Vote:2 Commit:5 - CommittedEntries: - 2/5 EntryNormal "" - Messages: - 3->2 MsgAppResp Term:2 Log:0/5 -> 2 receiving messages - 3->2 MsgAppResp Term:2 Log:0/5 diff --git a/raft/testdata/confchange_v1_add_single.txt b/raft/testdata/confchange_v1_add_single.txt deleted file mode 100644 index cd07af47944e..000000000000 --- a/raft/testdata/confchange_v1_add_single.txt +++ /dev/null @@ -1,100 +0,0 @@ -# Run a V1 membership change that adds a single voter. - -# Bootstrap n1. -add-nodes 1 voters=(1) index=2 ----- -INFO 1 switched to configuration voters=(1) -INFO 1 became follower at term 0 -INFO newRaft 1 [peers: [1], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] - -campaign 1 ----- -INFO 1 is starting a new election at term 0 -INFO 1 became candidate at term 1 -INFO 1 received MsgVoteResp from 1 at term 1 -INFO 1 became leader at term 1 - -# Add v2 (with an auto transition). -propose-conf-change 1 v1=true -v2 ----- -ok - -# Pull n2 out of thin air. -add-nodes 1 ----- -INFO 2 switched to configuration voters=() -INFO 2 became follower at term 0 -INFO newRaft 2 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0] - -# n1 commits the conf change using itself as commit quorum, immediately transitions into -# the final config, and catches up n2. Note that it's using an EntryConfChange, not an -# EntryConfChangeV2, so this is compatible with nodes that don't know about V2 conf changes. -stabilize ----- -> 1 handling Ready - Ready MustSync=true: - Lead:1 State:StateLeader - HardState Term:1 Vote:1 Commit:2 - Entries: - 1/3 EntryNormal "" - 1/4 EntryConfChange v2 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:4 - CommittedEntries: - 1/3 EntryNormal "" - 1/4 EntryConfChange v2 - INFO 1 switched to configuration voters=(1 2) -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChange v2] -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChange v2] - INFO 2 [term: 0] received a MsgApp message with higher term from 1 [term: 1] - INFO 2 became follower at term 1 - DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 -> 2 handling Ready - Ready MustSync=true: - Lead:1 State:StateFollower - HardState Term:1 Commit:0 - Messages: - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) - DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 2 for index 3 - DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1] - DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1] - DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=1 paused pendingSnap=4] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false -> 2 receiving messages - 1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false - INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 4, term: 1] - INFO 2 switched to configuration voters=(1 2) - INFO 2 [commit: 4, lastindex: 4, lastterm: 1] restored snapshot [index: 4, term: 1] - INFO 2 [commit: 4] restored snapshot [index: 4, term: 1] -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:4 - Snapshot Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false - Messages: - 2->1 MsgAppResp Term:1 Log:0/4 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/4 - DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 2 [StateSnapshot match=4 next=5 paused pendingSnap=4] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgApp Term:1 Log:1/4 Commit:4 -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/4 Commit:4 -> 2 handling Ready - Ready MustSync=false: - Messages: - 2->1 MsgAppResp Term:1 Log:0/4 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/4 diff --git a/raft/testdata/confchange_v1_remove_leader.txt b/raft/testdata/confchange_v1_remove_leader.txt deleted file mode 100644 index 68aab846e0b2..000000000000 --- a/raft/testdata/confchange_v1_remove_leader.txt +++ /dev/null @@ -1,249 +0,0 @@ -# We'll turn this back on after the boilerplate. -log-level none ----- -ok - -# Run a V1 membership change that removes the leader. -# Bootstrap n1, n2, n3. -add-nodes 3 voters=(1,2,3) index=2 ----- -ok - -campaign 1 ----- -ok - -stabilize ----- -ok (quiet) - -log-level debug ----- -ok - -raft-state ----- -1: StateLeader (Voter) -2: StateFollower (Voter) -3: StateFollower (Voter) - -# Start removing n1. -propose-conf-change 1 v1=true -r1 ----- -ok - -raft-state ----- -1: StateLeader (Voter) -2: StateFollower (Voter) -3: StateFollower (Voter) - -# Propose an extra entry which will be sent out together with the conf change. -propose 1 foo ----- -ok - -# Send out the corresponding appends. -process-ready 1 ----- -Ready MustSync=true: -Entries: -1/4 EntryConfChange r1 -1/5 EntryNormal "foo" -Messages: -1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChange r1] -1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChange r1] -1->2 MsgApp Term:1 Log:1/4 Commit:3 Entries:[1/5 EntryNormal "foo"] -1->3 MsgApp Term:1 Log:1/4 Commit:3 Entries:[1/5 EntryNormal "foo"] - -# Send response from n2 (which is enough to commit the entries so far next time -# n1 runs). -stabilize 2 ----- -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChange r1] - 1->2 MsgApp Term:1 Log:1/4 Commit:3 Entries:[1/5 EntryNormal "foo"] -> 2 handling Ready - Ready MustSync=true: - Entries: - 1/4 EntryConfChange r1 - 1/5 EntryNormal "foo" - Messages: - 2->1 MsgAppResp Term:1 Log:0/4 - 2->1 MsgAppResp Term:1 Log:0/5 - -# Put another entry in n1's log. -propose 1 bar ----- -ok - -# n1 applies the conf change, so it has now removed itself. But it still has -# an uncommitted entry in the log. If the leader unconditionally counted itself -# as part of the commit quorum, we'd be in trouble. In the block below, we see -# it send out appends to the other nodes for the 'bar' entry. -stabilize 1 ----- -> 1 handling Ready - Ready MustSync=true: - Entries: - 1/6 EntryNormal "bar" - Messages: - 1->2 MsgApp Term:1 Log:1/5 Commit:3 Entries:[1/6 EntryNormal "bar"] - 1->3 MsgApp Term:1 Log:1/5 Commit:3 Entries:[1/6 EntryNormal "bar"] -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/4 - 2->1 MsgAppResp Term:1 Log:0/5 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:5 - CommittedEntries: - 1/4 EntryConfChange r1 - 1/5 EntryNormal "foo" - Messages: - 1->2 MsgApp Term:1 Log:1/6 Commit:4 - 1->3 MsgApp Term:1 Log:1/6 Commit:4 - 1->2 MsgApp Term:1 Log:1/6 Commit:5 - 1->3 MsgApp Term:1 Log:1/6 Commit:5 - INFO 1 switched to configuration voters=(2 3) - -raft-state ----- -1: StateLeader (Non-Voter) -2: StateFollower (Voter) -3: StateFollower (Voter) - -# n2 responds, n3 doesn't yet. Quorum for 'bar' should not be reached... -stabilize 2 ----- -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/5 Commit:3 Entries:[1/6 EntryNormal "bar"] - 1->2 MsgApp Term:1 Log:1/6 Commit:4 - 1->2 MsgApp Term:1 Log:1/6 Commit:5 -> 2 handling Ready - Ready MustSync=true: - HardState Term:1 Vote:1 Commit:5 - Entries: - 1/6 EntryNormal "bar" - CommittedEntries: - 1/4 EntryConfChange r1 - 1/5 EntryNormal "foo" - Messages: - 2->1 MsgAppResp Term:1 Log:0/6 - 2->1 MsgAppResp Term:1 Log:0/6 - 2->1 MsgAppResp Term:1 Log:0/6 - INFO 2 switched to configuration voters=(2 3) - -# ... which thankfully is what we see on the leader. -stabilize 1 ----- -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/6 - 2->1 MsgAppResp Term:1 Log:0/6 - 2->1 MsgAppResp Term:1 Log:0/6 - -# When n3 responds, quorum is reached and everything falls into place. -stabilize ----- -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChange r1] - 1->3 MsgApp Term:1 Log:1/4 Commit:3 Entries:[1/5 EntryNormal "foo"] - 1->3 MsgApp Term:1 Log:1/5 Commit:3 Entries:[1/6 EntryNormal "bar"] - 1->3 MsgApp Term:1 Log:1/6 Commit:4 - 1->3 MsgApp Term:1 Log:1/6 Commit:5 -> 3 handling Ready - Ready MustSync=true: - HardState Term:1 Vote:1 Commit:5 - Entries: - 1/4 EntryConfChange r1 - 1/5 EntryNormal "foo" - 1/6 EntryNormal "bar" - CommittedEntries: - 1/4 EntryConfChange r1 - 1/5 EntryNormal "foo" - Messages: - 3->1 MsgAppResp Term:1 Log:0/4 - 3->1 MsgAppResp Term:1 Log:0/5 - 3->1 MsgAppResp Term:1 Log:0/6 - 3->1 MsgAppResp Term:1 Log:0/6 - 3->1 MsgAppResp Term:1 Log:0/6 - INFO 3 switched to configuration voters=(2 3) -> 1 receiving messages - 3->1 MsgAppResp Term:1 Log:0/4 - 3->1 MsgAppResp Term:1 Log:0/5 - 3->1 MsgAppResp Term:1 Log:0/6 - 3->1 MsgAppResp Term:1 Log:0/6 - 3->1 MsgAppResp Term:1 Log:0/6 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:6 - CommittedEntries: - 1/6 EntryNormal "bar" - Messages: - 1->2 MsgApp Term:1 Log:1/6 Commit:6 - 1->3 MsgApp Term:1 Log:1/6 Commit:6 -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/6 Commit:6 -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/6 Commit:6 -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:6 - CommittedEntries: - 1/6 EntryNormal "bar" - Messages: - 2->1 MsgAppResp Term:1 Log:0/6 -> 3 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:6 - CommittedEntries: - 1/6 EntryNormal "bar" - Messages: - 3->1 MsgAppResp Term:1 Log:0/6 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/6 - 3->1 MsgAppResp Term:1 Log:0/6 - -# However not all is well. n1 is still leader but unconditionally drops all -# proposals on the floor, so we're effectively stuck if it still heartbeats -# its followers... -propose 1 baz ----- -raft proposal dropped - -tick-heartbeat 1 ----- -ok - -# ... which, uh oh, it does. -# TODO(tbg): change behavior so that a leader that is removed immediately steps -# down, and initiates an optimistic handover. -stabilize ----- -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgHeartbeat Term:1 Log:0/0 Commit:6 - 1->3 MsgHeartbeat Term:1 Log:0/0 Commit:6 -> 2 receiving messages - 1->2 MsgHeartbeat Term:1 Log:0/0 Commit:6 -> 3 receiving messages - 1->3 MsgHeartbeat Term:1 Log:0/0 Commit:6 -> 2 handling Ready - Ready MustSync=false: - Messages: - 2->1 MsgHeartbeatResp Term:1 Log:0/0 -> 3 handling Ready - Ready MustSync=false: - Messages: - 3->1 MsgHeartbeatResp Term:1 Log:0/0 -> 1 receiving messages - 2->1 MsgHeartbeatResp Term:1 Log:0/0 - 3->1 MsgHeartbeatResp Term:1 Log:0/0 - -# Just confirming the issue above - leader does not automatically step down. -raft-state ----- -1: StateLeader (Non-Voter) -2: StateFollower (Voter) -3: StateFollower (Voter) diff --git a/raft/testdata/confchange_v2_add_double_auto.txt b/raft/testdata/confchange_v2_add_double_auto.txt deleted file mode 100644 index 0979bdd6abf4..000000000000 --- a/raft/testdata/confchange_v2_add_double_auto.txt +++ /dev/null @@ -1,406 +0,0 @@ -# Run a V2 membership change that adds two voters at once and auto-leaves the -# joint configuration. (This is the same as specifying an explicit transition -# since more than one change is being made atomically). - -# Bootstrap n1. -add-nodes 1 voters=(1) index=2 ----- -INFO 1 switched to configuration voters=(1) -INFO 1 became follower at term 0 -INFO newRaft 1 [peers: [1], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] - -campaign 1 ----- -INFO 1 is starting a new election at term 0 -INFO 1 became candidate at term 1 -INFO 1 received MsgVoteResp from 1 at term 1 -INFO 1 became leader at term 1 - -propose-conf-change 1 transition=auto -v2 v3 ----- -ok - -# Add two "empty" nodes to the cluster, n2 and n3. -add-nodes 2 ----- -INFO 2 switched to configuration voters=() -INFO 2 became follower at term 0 -INFO newRaft 2 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0] -INFO 3 switched to configuration voters=() -INFO 3 became follower at term 0 -INFO newRaft 3 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0] - -# Process n1 once, so that it can append the entry. -process-ready 1 ----- -Ready MustSync=true: -Lead:1 State:StateLeader -HardState Term:1 Vote:1 Commit:2 -Entries: -1/3 EntryNormal "" -1/4 EntryConfChangeV2 v2 v3 - -# Now n1 applies the conf change. We see that it starts transitioning out of that joint -# configuration (though we will only see that proposal in the next ready handling -# loop, when it is emitted). We also see that this is using joint consensus, which -# it has to since we're carrying out two additions at once. -process-ready 1 ----- -Ready MustSync=false: -HardState Term:1 Vote:1 Commit:4 -CommittedEntries: -1/3 EntryNormal "" -1/4 EntryConfChangeV2 v2 v3 -INFO 1 switched to configuration voters=(1 2 3)&&(1) autoleave -INFO initiating automatic transition out of joint configuration voters=(1 2 3)&&(1) autoleave - -# n1 immediately probes n2 and n3. -stabilize 1 ----- -> 1 handling Ready - Ready MustSync=true: - Entries: - 1/5 EntryConfChangeV2 - Messages: - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2 v3] - 1->3 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2 v3] - -# First, play out the whole interaction between n1 and n2. We see n1's probe to -# n2 get rejected (since n2 needs a snapshot); the snapshot is delivered at which -# point n2 switches to the correct config, and n1 catches it up. This notably -# includes the empty conf change which gets committed and applied by both and -# which transitions them out of their joint configuration into the final one (1 2 3). -stabilize 1 2 ----- -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2 v3] - INFO 2 [term: 0] received a MsgApp message with higher term from 1 [term: 1] - INFO 2 became follower at term 1 - DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 -> 2 handling Ready - Ready MustSync=true: - Lead:1 State:StateFollower - HardState Term:1 Commit:0 - Messages: - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) - DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 2 for index 3 - DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1] - DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1] - DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=1 paused pendingSnap=4] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:true -> 2 receiving messages - 1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:true - INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 4, term: 1] - INFO 2 switched to configuration voters=(1 2 3)&&(1) autoleave - INFO 2 [commit: 4, lastindex: 4, lastterm: 1] restored snapshot [index: 4, term: 1] - INFO 2 [commit: 4] restored snapshot [index: 4, term: 1] -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:4 - Snapshot Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:true - Messages: - 2->1 MsgAppResp Term:1 Log:0/4 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/4 - DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 2 [StateSnapshot match=4 next=5 paused pendingSnap=4] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryConfChangeV2] -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryConfChangeV2] -> 2 handling Ready - Ready MustSync=true: - Entries: - 1/5 EntryConfChangeV2 - Messages: - 2->1 MsgAppResp Term:1 Log:0/5 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/5 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:5 - CommittedEntries: - 1/5 EntryConfChangeV2 - Messages: - 1->2 MsgApp Term:1 Log:1/5 Commit:5 - INFO 1 switched to configuration voters=(1 2 3) -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/5 Commit:5 -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:5 - CommittedEntries: - 1/5 EntryConfChangeV2 - Messages: - 2->1 MsgAppResp Term:1 Log:0/5 - INFO 2 switched to configuration voters=(1 2 3) -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/5 - -# n3 immediately receives a snapshot in the final configuration. -stabilize 1 3 ----- -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2 v3] - INFO 3 [term: 0] received a MsgApp message with higher term from 1 [term: 1] - INFO 3 became follower at term 1 - DEBUG 3 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 -> 3 handling Ready - Ready MustSync=true: - Lead:1 State:StateFollower - HardState Term:1 Commit:0 - Messages: - 3->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) -> 1 receiving messages - 3->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) - DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 3 for index 3 - DEBUG 1 decreased progress of 3 to [StateProbe match=0 next=1] - DEBUG 1 [firstindex: 3, commit: 5] sent snapshot[index: 5, term: 1] to 3 [StateProbe match=0 next=1] - DEBUG 1 paused sending replication messages to 3 [StateSnapshot match=0 next=1 paused pendingSnap=5] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->3 MsgSnap Term:1 Log:0/0 Snapshot: Index:5 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false -> 3 receiving messages - 1->3 MsgSnap Term:1 Log:0/0 Snapshot: Index:5 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false - INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 5, term: 1] - INFO 3 switched to configuration voters=(1 2 3) - INFO 3 [commit: 5, lastindex: 5, lastterm: 1] restored snapshot [index: 5, term: 1] - INFO 3 [commit: 5] restored snapshot [index: 5, term: 1] -> 3 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:5 - Snapshot Index:5 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false - Messages: - 3->1 MsgAppResp Term:1 Log:0/5 -> 1 receiving messages - 3->1 MsgAppResp Term:1 Log:0/5 - DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 3 [StateSnapshot match=5 next=6 paused pendingSnap=5] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->3 MsgApp Term:1 Log:1/5 Commit:5 -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/5 Commit:5 -> 3 handling Ready - Ready MustSync=false: - Messages: - 3->1 MsgAppResp Term:1 Log:0/5 -> 1 receiving messages - 3->1 MsgAppResp Term:1 Log:0/5 - -# Nothing else happens. -stabilize ----- -ok - -# Now remove two nodes. What's new here is that the leader will actually have -# to go to a quorum to commit the transition into the joint config. - -propose-conf-change 1 -r2 r3 ----- -ok - -# n1 sends out MsgApps. -stabilize 1 ----- -> 1 handling Ready - Ready MustSync=true: - Entries: - 1/6 EntryConfChangeV2 r2 r3 - Messages: - 1->2 MsgApp Term:1 Log:1/5 Commit:5 Entries:[1/6 EntryConfChangeV2 r2 r3] - 1->3 MsgApp Term:1 Log:1/5 Commit:5 Entries:[1/6 EntryConfChangeV2 r2 r3] - -# n2, n3 ack them. -stabilize 2 3 ----- -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/5 Commit:5 Entries:[1/6 EntryConfChangeV2 r2 r3] -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/5 Commit:5 Entries:[1/6 EntryConfChangeV2 r2 r3] -> 2 handling Ready - Ready MustSync=true: - Entries: - 1/6 EntryConfChangeV2 r2 r3 - Messages: - 2->1 MsgAppResp Term:1 Log:0/6 -> 3 handling Ready - Ready MustSync=true: - Entries: - 1/6 EntryConfChangeV2 r2 r3 - Messages: - 3->1 MsgAppResp Term:1 Log:0/6 - -# n1 gets some more proposals. This is part of a regression test: There used to -# be a bug in which these proposals would prompt the leader to transition out of -# the same joint state multiple times, which would cause a panic. -propose 1 foo ----- -ok - -propose 1 bar ----- -ok - -# n1 switches to the joint config, then initiates a transition into the final -# config. -stabilize 1 ----- -> 1 handling Ready - Ready MustSync=true: - Entries: - 1/7 EntryNormal "foo" - 1/8 EntryNormal "bar" - Messages: - 1->2 MsgApp Term:1 Log:1/6 Commit:5 Entries:[1/7 EntryNormal "foo"] - 1->3 MsgApp Term:1 Log:1/6 Commit:5 Entries:[1/7 EntryNormal "foo"] - 1->2 MsgApp Term:1 Log:1/7 Commit:5 Entries:[1/8 EntryNormal "bar"] - 1->3 MsgApp Term:1 Log:1/7 Commit:5 Entries:[1/8 EntryNormal "bar"] -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/6 - 3->1 MsgAppResp Term:1 Log:0/6 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:6 - CommittedEntries: - 1/6 EntryConfChangeV2 r2 r3 - Messages: - 1->2 MsgApp Term:1 Log:1/8 Commit:6 - 1->3 MsgApp Term:1 Log:1/8 Commit:6 - INFO 1 switched to configuration voters=(1)&&(1 2 3) autoleave - INFO initiating automatic transition out of joint configuration voters=(1)&&(1 2 3) autoleave -> 1 handling Ready - Ready MustSync=true: - Entries: - 1/9 EntryConfChangeV2 - Messages: - 1->2 MsgApp Term:1 Log:1/8 Commit:6 Entries:[1/9 EntryConfChangeV2] - 1->3 MsgApp Term:1 Log:1/8 Commit:6 Entries:[1/9 EntryConfChangeV2] - -# n2 and n3 also switch to the joint config, and ack the transition out of it. -stabilize 2 3 ----- -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/6 Commit:5 Entries:[1/7 EntryNormal "foo"] - 1->2 MsgApp Term:1 Log:1/7 Commit:5 Entries:[1/8 EntryNormal "bar"] - 1->2 MsgApp Term:1 Log:1/8 Commit:6 - 1->2 MsgApp Term:1 Log:1/8 Commit:6 Entries:[1/9 EntryConfChangeV2] -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/6 Commit:5 Entries:[1/7 EntryNormal "foo"] - 1->3 MsgApp Term:1 Log:1/7 Commit:5 Entries:[1/8 EntryNormal "bar"] - 1->3 MsgApp Term:1 Log:1/8 Commit:6 - 1->3 MsgApp Term:1 Log:1/8 Commit:6 Entries:[1/9 EntryConfChangeV2] -> 2 handling Ready - Ready MustSync=true: - HardState Term:1 Commit:6 - Entries: - 1/7 EntryNormal "foo" - 1/8 EntryNormal "bar" - 1/9 EntryConfChangeV2 - CommittedEntries: - 1/6 EntryConfChangeV2 r2 r3 - Messages: - 2->1 MsgAppResp Term:1 Log:0/7 - 2->1 MsgAppResp Term:1 Log:0/8 - 2->1 MsgAppResp Term:1 Log:0/8 - 2->1 MsgAppResp Term:1 Log:0/9 - INFO 2 switched to configuration voters=(1)&&(1 2 3) autoleave -> 3 handling Ready - Ready MustSync=true: - HardState Term:1 Commit:6 - Entries: - 1/7 EntryNormal "foo" - 1/8 EntryNormal "bar" - 1/9 EntryConfChangeV2 - CommittedEntries: - 1/6 EntryConfChangeV2 r2 r3 - Messages: - 3->1 MsgAppResp Term:1 Log:0/7 - 3->1 MsgAppResp Term:1 Log:0/8 - 3->1 MsgAppResp Term:1 Log:0/8 - 3->1 MsgAppResp Term:1 Log:0/9 - INFO 3 switched to configuration voters=(1)&&(1 2 3) autoleave - -# n2 and n3 also leave the joint config and the dust settles. We see at the very -# end that n1 receives some messages from them that it refuses because it does -# not have them in its config any more. -stabilize ----- -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/7 - 2->1 MsgAppResp Term:1 Log:0/8 - 2->1 MsgAppResp Term:1 Log:0/8 - 2->1 MsgAppResp Term:1 Log:0/9 - 3->1 MsgAppResp Term:1 Log:0/7 - 3->1 MsgAppResp Term:1 Log:0/8 - 3->1 MsgAppResp Term:1 Log:0/8 - 3->1 MsgAppResp Term:1 Log:0/9 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:9 - CommittedEntries: - 1/7 EntryNormal "foo" - 1/8 EntryNormal "bar" - 1/9 EntryConfChangeV2 - Messages: - 1->2 MsgApp Term:1 Log:1/9 Commit:7 - 1->3 MsgApp Term:1 Log:1/9 Commit:7 - 1->2 MsgApp Term:1 Log:1/9 Commit:8 - 1->3 MsgApp Term:1 Log:1/9 Commit:8 - 1->2 MsgApp Term:1 Log:1/9 Commit:9 - 1->3 MsgApp Term:1 Log:1/9 Commit:9 - INFO 1 switched to configuration voters=(1) -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/9 Commit:7 - 1->2 MsgApp Term:1 Log:1/9 Commit:8 - 1->2 MsgApp Term:1 Log:1/9 Commit:9 -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/9 Commit:7 - 1->3 MsgApp Term:1 Log:1/9 Commit:8 - 1->3 MsgApp Term:1 Log:1/9 Commit:9 -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:9 - CommittedEntries: - 1/7 EntryNormal "foo" - 1/8 EntryNormal "bar" - 1/9 EntryConfChangeV2 - Messages: - 2->1 MsgAppResp Term:1 Log:0/9 - 2->1 MsgAppResp Term:1 Log:0/9 - 2->1 MsgAppResp Term:1 Log:0/9 - INFO 2 switched to configuration voters=(1) -> 3 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:9 - CommittedEntries: - 1/7 EntryNormal "foo" - 1/8 EntryNormal "bar" - 1/9 EntryConfChangeV2 - Messages: - 3->1 MsgAppResp Term:1 Log:0/9 - 3->1 MsgAppResp Term:1 Log:0/9 - 3->1 MsgAppResp Term:1 Log:0/9 - INFO 3 switched to configuration voters=(1) -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/9 - raft: cannot step as peer not found - 2->1 MsgAppResp Term:1 Log:0/9 - raft: cannot step as peer not found - 2->1 MsgAppResp Term:1 Log:0/9 - raft: cannot step as peer not found - 3->1 MsgAppResp Term:1 Log:0/9 - raft: cannot step as peer not found - 3->1 MsgAppResp Term:1 Log:0/9 - raft: cannot step as peer not found - 3->1 MsgAppResp Term:1 Log:0/9 - raft: cannot step as peer not found diff --git a/raft/testdata/confchange_v2_add_double_implicit.txt b/raft/testdata/confchange_v2_add_double_implicit.txt deleted file mode 100644 index 45dfc5099b98..000000000000 --- a/raft/testdata/confchange_v2_add_double_implicit.txt +++ /dev/null @@ -1,128 +0,0 @@ -# Run a V2 membership change that adds a single voter but explicitly asks for the -# use of joint consensus (with auto-leaving). - -# TODO(tbg): also verify that if the leader changes while in the joint state, the -# new leader will auto-transition out of the joint state just the same. - -# Bootstrap n1. -add-nodes 1 voters=(1) index=2 ----- -INFO 1 switched to configuration voters=(1) -INFO 1 became follower at term 0 -INFO newRaft 1 [peers: [1], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] - -campaign 1 ----- -INFO 1 is starting a new election at term 0 -INFO 1 became candidate at term 1 -INFO 1 received MsgVoteResp from 1 at term 1 -INFO 1 became leader at term 1 - -propose-conf-change 1 transition=implicit -v2 ----- -ok - -# Add n2. -add-nodes 1 ----- -INFO 2 switched to configuration voters=() -INFO 2 became follower at term 0 -INFO newRaft 2 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0] - -# n1 commits the conf change using itself as commit quorum, then starts catching up n2. -# When that's done, it starts auto-transitioning out. Note that the snapshots propagating -# the joint config have the AutoLeave flag set in their config. -stabilize 1 2 ----- -> 1 handling Ready - Ready MustSync=true: - Lead:1 State:StateLeader - HardState Term:1 Vote:1 Commit:2 - Entries: - 1/3 EntryNormal "" - 1/4 EntryConfChangeV2 v2 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:4 - CommittedEntries: - 1/3 EntryNormal "" - 1/4 EntryConfChangeV2 v2 - INFO 1 switched to configuration voters=(1 2)&&(1) autoleave - INFO initiating automatic transition out of joint configuration voters=(1 2)&&(1) autoleave -> 1 handling Ready - Ready MustSync=true: - Entries: - 1/5 EntryConfChangeV2 - Messages: - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2] -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2] - INFO 2 [term: 0] received a MsgApp message with higher term from 1 [term: 1] - INFO 2 became follower at term 1 - DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 -> 2 handling Ready - Ready MustSync=true: - Lead:1 State:StateFollower - HardState Term:1 Commit:0 - Messages: - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) - DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 2 for index 3 - DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1] - DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1] - DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=1 paused pendingSnap=4] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:true -> 2 receiving messages - 1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:true - INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 4, term: 1] - INFO 2 switched to configuration voters=(1 2)&&(1) autoleave - INFO 2 [commit: 4, lastindex: 4, lastterm: 1] restored snapshot [index: 4, term: 1] - INFO 2 [commit: 4] restored snapshot [index: 4, term: 1] -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:4 - Snapshot Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:true - Messages: - 2->1 MsgAppResp Term:1 Log:0/4 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/4 - DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 2 [StateSnapshot match=4 next=5 paused pendingSnap=4] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryConfChangeV2] -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryConfChangeV2] -> 2 handling Ready - Ready MustSync=true: - Entries: - 1/5 EntryConfChangeV2 - Messages: - 2->1 MsgAppResp Term:1 Log:0/5 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/5 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:5 - CommittedEntries: - 1/5 EntryConfChangeV2 - Messages: - 1->2 MsgApp Term:1 Log:1/5 Commit:5 - INFO 1 switched to configuration voters=(1 2) -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/5 Commit:5 -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:5 - CommittedEntries: - 1/5 EntryConfChangeV2 - Messages: - 2->1 MsgAppResp Term:1 Log:0/5 - INFO 2 switched to configuration voters=(1 2) -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/5 diff --git a/raft/testdata/confchange_v2_add_single_auto.txt b/raft/testdata/confchange_v2_add_single_auto.txt deleted file mode 100644 index 7ee3ab6c33c7..000000000000 --- a/raft/testdata/confchange_v2_add_single_auto.txt +++ /dev/null @@ -1,101 +0,0 @@ -# Run a V2 membership change that adds a single voter in auto mode, which means -# that joint consensus is not used but a direct transition into the new config -# takes place. - -# Bootstrap n1. -add-nodes 1 voters=(1) index=2 ----- -INFO 1 switched to configuration voters=(1) -INFO 1 became follower at term 0 -INFO newRaft 1 [peers: [1], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] - -campaign 1 ----- -INFO 1 is starting a new election at term 0 -INFO 1 became candidate at term 1 -INFO 1 received MsgVoteResp from 1 at term 1 -INFO 1 became leader at term 1 - -# Add v2 (with an auto transition). -propose-conf-change 1 -v2 ----- -ok - -# Pull n2 out of thin air. -add-nodes 1 ----- -INFO 2 switched to configuration voters=() -INFO 2 became follower at term 0 -INFO newRaft 2 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0] - -# n1 commits the conf change using itself as commit quorum, immediately transitions into -# the final config, and catches up n2. -stabilize ----- -> 1 handling Ready - Ready MustSync=true: - Lead:1 State:StateLeader - HardState Term:1 Vote:1 Commit:2 - Entries: - 1/3 EntryNormal "" - 1/4 EntryConfChangeV2 v2 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:4 - CommittedEntries: - 1/3 EntryNormal "" - 1/4 EntryConfChangeV2 v2 - INFO 1 switched to configuration voters=(1 2) -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2] -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2] - INFO 2 [term: 0] received a MsgApp message with higher term from 1 [term: 1] - INFO 2 became follower at term 1 - DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 -> 2 handling Ready - Ready MustSync=true: - Lead:1 State:StateFollower - HardState Term:1 Commit:0 - Messages: - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) - DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 2 for index 3 - DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1] - DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1] - DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=1 paused pendingSnap=4] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false -> 2 receiving messages - 1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false - INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 4, term: 1] - INFO 2 switched to configuration voters=(1 2) - INFO 2 [commit: 4, lastindex: 4, lastterm: 1] restored snapshot [index: 4, term: 1] - INFO 2 [commit: 4] restored snapshot [index: 4, term: 1] -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:4 - Snapshot Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false - Messages: - 2->1 MsgAppResp Term:1 Log:0/4 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/4 - DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 2 [StateSnapshot match=4 next=5 paused pendingSnap=4] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgApp Term:1 Log:1/4 Commit:4 -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/4 Commit:4 -> 2 handling Ready - Ready MustSync=false: - Messages: - 2->1 MsgAppResp Term:1 Log:0/4 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/4 diff --git a/raft/testdata/confchange_v2_add_single_explicit.txt b/raft/testdata/confchange_v2_add_single_explicit.txt deleted file mode 100644 index b4e6e3a83cda..000000000000 --- a/raft/testdata/confchange_v2_add_single_explicit.txt +++ /dev/null @@ -1,209 +0,0 @@ -# Run a V2 membership change that adds a single voter but explicitly asks for the -# use of joint consensus, including wanting to transition out of the joint config -# manually. - -# Bootstrap n1. -add-nodes 1 voters=(1) index=2 ----- -INFO 1 switched to configuration voters=(1) -INFO 1 became follower at term 0 -INFO newRaft 1 [peers: [1], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] - -campaign 1 ----- -INFO 1 is starting a new election at term 0 -INFO 1 became candidate at term 1 -INFO 1 received MsgVoteResp from 1 at term 1 -INFO 1 became leader at term 1 - -# Add v2 with an explicit transition. -propose-conf-change 1 transition=explicit -v2 ----- -ok - -# Pull n2 out of thin air. -add-nodes 1 ----- -INFO 2 switched to configuration voters=() -INFO 2 became follower at term 0 -INFO newRaft 2 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0] - -# n1 commits the conf change using itself as commit quorum, then starts catching up n2. -# Everyone remains in the joint config. Note that the snapshot below has AutoLeave unset. -stabilize 1 2 ----- -> 1 handling Ready - Ready MustSync=true: - Lead:1 State:StateLeader - HardState Term:1 Vote:1 Commit:2 - Entries: - 1/3 EntryNormal "" - 1/4 EntryConfChangeV2 v2 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:4 - CommittedEntries: - 1/3 EntryNormal "" - 1/4 EntryConfChangeV2 v2 - INFO 1 switched to configuration voters=(1 2)&&(1) -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2] -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2] - INFO 2 [term: 0] received a MsgApp message with higher term from 1 [term: 1] - INFO 2 became follower at term 1 - DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 -> 2 handling Ready - Ready MustSync=true: - Lead:1 State:StateFollower - HardState Term:1 Commit:0 - Messages: - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) - DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 2 for index 3 - DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1] - DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1] - DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=1 paused pendingSnap=4] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:false -> 2 receiving messages - 1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:false - INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 4, term: 1] - INFO 2 switched to configuration voters=(1 2)&&(1) - INFO 2 [commit: 4, lastindex: 4, lastterm: 1] restored snapshot [index: 4, term: 1] - INFO 2 [commit: 4] restored snapshot [index: 4, term: 1] -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:4 - Snapshot Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:false - Messages: - 2->1 MsgAppResp Term:1 Log:0/4 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/4 - DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 2 [StateSnapshot match=4 next=5 paused pendingSnap=4] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgApp Term:1 Log:1/4 Commit:4 -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/4 Commit:4 -> 2 handling Ready - Ready MustSync=false: - Messages: - 2->1 MsgAppResp Term:1 Log:0/4 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/4 - -# Check that we're not allowed to change membership again while in the joint state. -# This leads to an empty entry being proposed instead (index 5 in the stabilize block -# below). -propose-conf-change 1 -v3 v4 v5 ----- -INFO 1 ignoring conf change {ConfChangeTransitionAuto [{ConfChangeAddNode 3} {ConfChangeAddNode 4} {ConfChangeAddNode 5}] []} at config voters=(1 2)&&(1): must transition out of joint config first - -# Propose a transition out of the joint config. We'll see this at index 6 below. -propose-conf-change 1 ----- -ok - -# The group commits the command and everyone switches to the final config. -stabilize ----- -> 1 handling Ready - Ready MustSync=true: - Entries: - 1/5 EntryNormal "" - 1/6 EntryConfChangeV2 - Messages: - 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] - 1->2 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2] -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] - 1->2 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2] -> 2 handling Ready - Ready MustSync=true: - Entries: - 1/5 EntryNormal "" - 1/6 EntryConfChangeV2 - Messages: - 2->1 MsgAppResp Term:1 Log:0/5 - 2->1 MsgAppResp Term:1 Log:0/6 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/5 - 2->1 MsgAppResp Term:1 Log:0/6 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:6 - CommittedEntries: - 1/5 EntryNormal "" - 1/6 EntryConfChangeV2 - Messages: - 1->2 MsgApp Term:1 Log:1/6 Commit:5 - 1->2 MsgApp Term:1 Log:1/6 Commit:6 - INFO 1 switched to configuration voters=(1 2) -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/6 Commit:5 - 1->2 MsgApp Term:1 Log:1/6 Commit:6 -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:6 - CommittedEntries: - 1/5 EntryNormal "" - 1/6 EntryConfChangeV2 - Messages: - 2->1 MsgAppResp Term:1 Log:0/6 - 2->1 MsgAppResp Term:1 Log:0/6 - INFO 2 switched to configuration voters=(1 2) -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/6 - 2->1 MsgAppResp Term:1 Log:0/6 - -# Check that trying to transition out again won't do anything. -propose-conf-change 1 ----- -INFO 1 ignoring conf change {ConfChangeTransitionAuto [] []} at config voters=(1 2): not in joint state; refusing empty conf change - -# Finishes work for the empty entry we just proposed. -stabilize ----- -> 1 handling Ready - Ready MustSync=true: - Entries: - 1/7 EntryNormal "" - Messages: - 1->2 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryNormal ""] -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryNormal ""] -> 2 handling Ready - Ready MustSync=true: - Entries: - 1/7 EntryNormal "" - Messages: - 2->1 MsgAppResp Term:1 Log:0/7 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/7 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:7 - CommittedEntries: - 1/7 EntryNormal "" - Messages: - 1->2 MsgApp Term:1 Log:1/7 Commit:7 -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/7 Commit:7 -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:7 - CommittedEntries: - 1/7 EntryNormal "" - Messages: - 2->1 MsgAppResp Term:1 Log:0/7 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/7 diff --git a/raft/testdata/confchange_v2_replace_leader.txt b/raft/testdata/confchange_v2_replace_leader.txt deleted file mode 100644 index be7b573cbaa5..000000000000 --- a/raft/testdata/confchange_v2_replace_leader.txt +++ /dev/null @@ -1,436 +0,0 @@ -# Run a V2 membership change that removes the leader and adds another voter as -# a single operation, using joint consensus and explicitly determining when to -# transition out of the joint config. Leadership is transferred to new joiner -# while in the joint config. After the reconfiguration completes, we verify -# that the removed leader cannot campaign to become leader. - -# We'll turn this back on after the boilerplate. -log-level none ----- -ok - -# Bootstrap n1, n2, n3. -add-nodes 3 voters=(1,2,3) index=2 ----- -ok - -# n1 campaigns to become leader. -campaign 1 ----- -ok - -stabilize ----- -ok (quiet) - -log-level info ----- -ok - -raft-state ----- -1: StateLeader (Voter) -2: StateFollower (Voter) -3: StateFollower (Voter) - -log-level info ----- -ok - -# create n4 -add-nodes 1 ----- -INFO 4 switched to configuration voters=() -INFO 4 became follower at term 0 -INFO newRaft 4 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0] - -# Start reconfiguration to remove n1 and add n4. -propose-conf-change 1 v1=false transition=explicit -r1 v4 ----- -ok - -# Enter joint config. -stabilize ----- -> 1 handling Ready - Ready MustSync=true: - Entries: - 1/4 EntryConfChangeV2 r1 v4 - Messages: - 1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 r1 v4] - 1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 r1 v4] -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 r1 v4] -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 r1 v4] -> 2 handling Ready - Ready MustSync=true: - Entries: - 1/4 EntryConfChangeV2 r1 v4 - Messages: - 2->1 MsgAppResp Term:1 Log:0/4 -> 3 handling Ready - Ready MustSync=true: - Entries: - 1/4 EntryConfChangeV2 r1 v4 - Messages: - 3->1 MsgAppResp Term:1 Log:0/4 -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/4 - 3->1 MsgAppResp Term:1 Log:0/4 -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:4 - CommittedEntries: - 1/4 EntryConfChangeV2 r1 v4 - Messages: - 1->2 MsgApp Term:1 Log:1/4 Commit:4 - 1->3 MsgApp Term:1 Log:1/4 Commit:4 - INFO 1 switched to configuration voters=(2 3 4)&&(1 2 3) -> 2 receiving messages - 1->2 MsgApp Term:1 Log:1/4 Commit:4 -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/4 Commit:4 -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 r1 v4] -> 2 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:4 - CommittedEntries: - 1/4 EntryConfChangeV2 r1 v4 - Messages: - 2->1 MsgAppResp Term:1 Log:0/4 - INFO 2 switched to configuration voters=(2 3 4)&&(1 2 3) -> 3 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:4 - CommittedEntries: - 1/4 EntryConfChangeV2 r1 v4 - Messages: - 3->1 MsgAppResp Term:1 Log:0/4 - INFO 3 switched to configuration voters=(2 3 4)&&(1 2 3) -> 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/4 - 3->1 MsgAppResp Term:1 Log:0/4 -> 4 receiving messages - 1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 r1 v4] - INFO 4 [term: 0] received a MsgApp message with higher term from 1 [term: 1] - INFO 4 became follower at term 1 -> 4 handling Ready - Ready MustSync=true: - Lead:1 State:StateFollower - HardState Term:1 Commit:0 - Messages: - 4->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) -> 1 receiving messages - 4->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->4 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[2 3 4] VotersOutgoing:[1 2 3] Learners:[] LearnersNext:[] AutoLeave:false -> 4 receiving messages - 1->4 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[2 3 4] VotersOutgoing:[1 2 3] Learners:[] LearnersNext:[] AutoLeave:false - INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 4, term: 1] - INFO 4 switched to configuration voters=(2 3 4)&&(1 2 3) - INFO 4 [commit: 4, lastindex: 4, lastterm: 1] restored snapshot [index: 4, term: 1] - INFO 4 [commit: 4] restored snapshot [index: 4, term: 1] -> 4 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:4 - Snapshot Index:4 Term:1 ConfState:Voters:[2 3 4] VotersOutgoing:[1 2 3] Learners:[] LearnersNext:[] AutoLeave:false - Messages: - 4->1 MsgAppResp Term:1 Log:0/4 -> 1 receiving messages - 4->1 MsgAppResp Term:1 Log:0/4 -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->4 MsgApp Term:1 Log:1/4 Commit:4 -> 4 receiving messages - 1->4 MsgApp Term:1 Log:1/4 Commit:4 -> 4 handling Ready - Ready MustSync=false: - Messages: - 4->1 MsgAppResp Term:1 Log:0/4 -> 1 receiving messages - 4->1 MsgAppResp Term:1 Log:0/4 - - -# Transfer leadership while in the joint config. -transfer-leadership from=1 to=4 ----- -INFO 1 [term 1] starts to transfer leadership to 4 -INFO 1 sends MsgTimeoutNow to 4 immediately as 4 already has up-to-date log - -# Leadership transfer wasn't processed yet. -raft-state ----- -1: StateLeader (Voter) -2: StateFollower (Voter) -3: StateFollower (Voter) -4: StateFollower (Voter) - -# Leadership transfer is happening here. -stabilize ----- -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->4 MsgTimeoutNow Term:1 Log:0/0 -> 4 receiving messages - 1->4 MsgTimeoutNow Term:1 Log:0/0 - INFO 4 [term 1] received MsgTimeoutNow from 1 and starts an election to get leadership. - INFO 4 is starting a new election at term 1 - INFO 4 became candidate at term 2 - INFO 4 received MsgVoteResp from 4 at term 2 - INFO 4 [logterm: 1, index: 4] sent MsgVote request to 1 at term 2 - INFO 4 [logterm: 1, index: 4] sent MsgVote request to 2 at term 2 - INFO 4 [logterm: 1, index: 4] sent MsgVote request to 3 at term 2 -> 4 handling Ready - Ready MustSync=true: - Lead:0 State:StateCandidate - HardState Term:2 Vote:4 Commit:4 - Messages: - 4->1 MsgVote Term:2 Log:1/4 - 4->2 MsgVote Term:2 Log:1/4 - 4->3 MsgVote Term:2 Log:1/4 -> 1 receiving messages - 4->1 MsgVote Term:2 Log:1/4 - INFO 1 [term: 1] received a MsgVote message with higher term from 4 [term: 2] - INFO 1 became follower at term 2 - INFO 1 [logterm: 1, index: 4, vote: 0] cast MsgVote for 4 [logterm: 1, index: 4] at term 2 -> 2 receiving messages - 4->2 MsgVote Term:2 Log:1/4 - INFO 2 [term: 1] received a MsgVote message with higher term from 4 [term: 2] - INFO 2 became follower at term 2 - INFO 2 [logterm: 1, index: 4, vote: 0] cast MsgVote for 4 [logterm: 1, index: 4] at term 2 -> 3 receiving messages - 4->3 MsgVote Term:2 Log:1/4 - INFO 3 [term: 1] received a MsgVote message with higher term from 4 [term: 2] - INFO 3 became follower at term 2 - INFO 3 [logterm: 1, index: 4, vote: 0] cast MsgVote for 4 [logterm: 1, index: 4] at term 2 -> 1 handling Ready - Ready MustSync=true: - Lead:0 State:StateFollower - HardState Term:2 Vote:4 Commit:4 - Messages: - 1->4 MsgVoteResp Term:2 Log:0/0 -> 2 handling Ready - Ready MustSync=true: - Lead:0 State:StateFollower - HardState Term:2 Vote:4 Commit:4 - Messages: - 2->4 MsgVoteResp Term:2 Log:0/0 -> 3 handling Ready - Ready MustSync=true: - Lead:0 State:StateFollower - HardState Term:2 Vote:4 Commit:4 - Messages: - 3->4 MsgVoteResp Term:2 Log:0/0 -> 4 receiving messages - 1->4 MsgVoteResp Term:2 Log:0/0 - INFO 4 received MsgVoteResp from 1 at term 2 - INFO 4 has received 2 MsgVoteResp votes and 0 vote rejections - 2->4 MsgVoteResp Term:2 Log:0/0 - INFO 4 received MsgVoteResp from 2 at term 2 - INFO 4 has received 3 MsgVoteResp votes and 0 vote rejections - INFO 4 became leader at term 2 - 3->4 MsgVoteResp Term:2 Log:0/0 -> 4 handling Ready - Ready MustSync=true: - Lead:4 State:StateLeader - Entries: - 2/5 EntryNormal "" - Messages: - 4->1 MsgApp Term:2 Log:1/4 Commit:4 Entries:[2/5 EntryNormal ""] - 4->2 MsgApp Term:2 Log:1/4 Commit:4 Entries:[2/5 EntryNormal ""] - 4->3 MsgApp Term:2 Log:1/4 Commit:4 Entries:[2/5 EntryNormal ""] -> 1 receiving messages - 4->1 MsgApp Term:2 Log:1/4 Commit:4 Entries:[2/5 EntryNormal ""] -> 2 receiving messages - 4->2 MsgApp Term:2 Log:1/4 Commit:4 Entries:[2/5 EntryNormal ""] -> 3 receiving messages - 4->3 MsgApp Term:2 Log:1/4 Commit:4 Entries:[2/5 EntryNormal ""] -> 1 handling Ready - Ready MustSync=true: - Lead:4 State:StateFollower - Entries: - 2/5 EntryNormal "" - Messages: - 1->4 MsgAppResp Term:2 Log:0/5 -> 2 handling Ready - Ready MustSync=true: - Lead:4 State:StateFollower - Entries: - 2/5 EntryNormal "" - Messages: - 2->4 MsgAppResp Term:2 Log:0/5 -> 3 handling Ready - Ready MustSync=true: - Lead:4 State:StateFollower - Entries: - 2/5 EntryNormal "" - Messages: - 3->4 MsgAppResp Term:2 Log:0/5 -> 4 receiving messages - 1->4 MsgAppResp Term:2 Log:0/5 - 2->4 MsgAppResp Term:2 Log:0/5 - 3->4 MsgAppResp Term:2 Log:0/5 -> 4 handling Ready - Ready MustSync=false: - HardState Term:2 Vote:4 Commit:5 - CommittedEntries: - 2/5 EntryNormal "" - Messages: - 4->1 MsgApp Term:2 Log:2/5 Commit:4 - 4->1 MsgApp Term:2 Log:2/5 Commit:5 - 4->2 MsgApp Term:2 Log:2/5 Commit:5 - 4->3 MsgApp Term:2 Log:2/5 Commit:5 -> 1 receiving messages - 4->1 MsgApp Term:2 Log:2/5 Commit:4 - 4->1 MsgApp Term:2 Log:2/5 Commit:5 -> 2 receiving messages - 4->2 MsgApp Term:2 Log:2/5 Commit:5 -> 3 receiving messages - 4->3 MsgApp Term:2 Log:2/5 Commit:5 -> 1 handling Ready - Ready MustSync=false: - HardState Term:2 Vote:4 Commit:5 - CommittedEntries: - 2/5 EntryNormal "" - Messages: - 1->4 MsgAppResp Term:2 Log:0/5 - 1->4 MsgAppResp Term:2 Log:0/5 -> 2 handling Ready - Ready MustSync=false: - HardState Term:2 Vote:4 Commit:5 - CommittedEntries: - 2/5 EntryNormal "" - Messages: - 2->4 MsgAppResp Term:2 Log:0/5 -> 3 handling Ready - Ready MustSync=false: - HardState Term:2 Vote:4 Commit:5 - CommittedEntries: - 2/5 EntryNormal "" - Messages: - 3->4 MsgAppResp Term:2 Log:0/5 -> 4 receiving messages - 1->4 MsgAppResp Term:2 Log:0/5 - 1->4 MsgAppResp Term:2 Log:0/5 - 2->4 MsgAppResp Term:2 Log:0/5 - 3->4 MsgAppResp Term:2 Log:0/5 - -# Leadership transfer succeeded. -raft-state ----- -1: StateFollower (Voter) -2: StateFollower (Voter) -3: StateFollower (Voter) -4: StateLeader (Voter) - -# n4 will propose a transition out of the joint config. -propose-conf-change 4 ----- -ok - -# The group commits the command and everyone switches to the final config. -stabilize ----- -> 4 handling Ready - Ready MustSync=true: - Entries: - 2/6 EntryConfChangeV2 - Messages: - 4->1 MsgApp Term:2 Log:2/5 Commit:5 Entries:[2/6 EntryConfChangeV2] - 4->2 MsgApp Term:2 Log:2/5 Commit:5 Entries:[2/6 EntryConfChangeV2] - 4->3 MsgApp Term:2 Log:2/5 Commit:5 Entries:[2/6 EntryConfChangeV2] -> 1 receiving messages - 4->1 MsgApp Term:2 Log:2/5 Commit:5 Entries:[2/6 EntryConfChangeV2] -> 2 receiving messages - 4->2 MsgApp Term:2 Log:2/5 Commit:5 Entries:[2/6 EntryConfChangeV2] -> 3 receiving messages - 4->3 MsgApp Term:2 Log:2/5 Commit:5 Entries:[2/6 EntryConfChangeV2] -> 1 handling Ready - Ready MustSync=true: - Entries: - 2/6 EntryConfChangeV2 - Messages: - 1->4 MsgAppResp Term:2 Log:0/6 -> 2 handling Ready - Ready MustSync=true: - Entries: - 2/6 EntryConfChangeV2 - Messages: - 2->4 MsgAppResp Term:2 Log:0/6 -> 3 handling Ready - Ready MustSync=true: - Entries: - 2/6 EntryConfChangeV2 - Messages: - 3->4 MsgAppResp Term:2 Log:0/6 -> 4 receiving messages - 1->4 MsgAppResp Term:2 Log:0/6 - 2->4 MsgAppResp Term:2 Log:0/6 - 3->4 MsgAppResp Term:2 Log:0/6 -> 4 handling Ready - Ready MustSync=false: - HardState Term:2 Vote:4 Commit:6 - CommittedEntries: - 2/6 EntryConfChangeV2 - Messages: - 4->1 MsgApp Term:2 Log:2/6 Commit:6 - 4->2 MsgApp Term:2 Log:2/6 Commit:6 - 4->3 MsgApp Term:2 Log:2/6 Commit:6 - INFO 4 switched to configuration voters=(2 3 4) -> 1 receiving messages - 4->1 MsgApp Term:2 Log:2/6 Commit:6 -> 2 receiving messages - 4->2 MsgApp Term:2 Log:2/6 Commit:6 -> 3 receiving messages - 4->3 MsgApp Term:2 Log:2/6 Commit:6 -> 1 handling Ready - Ready MustSync=false: - HardState Term:2 Vote:4 Commit:6 - CommittedEntries: - 2/6 EntryConfChangeV2 - Messages: - 1->4 MsgAppResp Term:2 Log:0/6 - INFO 1 switched to configuration voters=(2 3 4) -> 2 handling Ready - Ready MustSync=false: - HardState Term:2 Vote:4 Commit:6 - CommittedEntries: - 2/6 EntryConfChangeV2 - Messages: - 2->4 MsgAppResp Term:2 Log:0/6 - INFO 2 switched to configuration voters=(2 3 4) -> 3 handling Ready - Ready MustSync=false: - HardState Term:2 Vote:4 Commit:6 - CommittedEntries: - 2/6 EntryConfChangeV2 - Messages: - 3->4 MsgAppResp Term:2 Log:0/6 - INFO 3 switched to configuration voters=(2 3 4) -> 4 receiving messages - 1->4 MsgAppResp Term:2 Log:0/6 - raft: cannot step as peer not found - 2->4 MsgAppResp Term:2 Log:0/6 - 3->4 MsgAppResp Term:2 Log:0/6 - -# n1 is out of the configuration. -raft-state ----- -1: StateFollower (Non-Voter) -2: StateFollower (Voter) -3: StateFollower (Voter) -4: StateLeader (Voter) - -# Make sure n1 cannot campaign to become leader. -campaign 1 ----- -WARN 1 is unpromotable and can not campaign diff --git a/raft/testdata/probe_and_replicate.txt b/raft/testdata/probe_and_replicate.txt deleted file mode 100644 index bebae6ef9c85..000000000000 --- a/raft/testdata/probe_and_replicate.txt +++ /dev/null @@ -1,767 +0,0 @@ -# This test creates a complete Raft log configuration and demonstrates how a -# leader probes and replicates to each of its followers. The log configuration -# constructed is almost[*] identical to the one present in Figure 7 of the raft -# paper (https://raft.github.io/raft.pdf), which looks like: -# -# 1 2 3 4 5 6 7 8 9 10 11 12 -# n1: [1][1][1][4][4][5][5][6][6][6] -# n2: [1][1][1][4][4][5][5][6][6] -# n3: [1][1][1][4] -# n4: [1][1][1][4][4][5][5][6][6][6][6] -# n5: [1][1][1][4][4][5][5][6][7][7][7][7] -# n6: [1][1][1][4][4][4][4] -# n7: [1][1][1][2][2][2][3][3][3][3][3] -# -# Once in this state, we then elect node 1 as the leader and stabilize the -# entire raft group. This demonstrates how a newly elected leader probes for -# matching indexes, overwrites conflicting entries, and catches up all -# followers. -# -# [*] the only differences are: -# 1. n5 is given a larger uncommitted log tail, which is used to demonstrate a -# follower-side probing optimization. -# 2. the log indexes are shifted by 10 in this test because add-nodes wants to -# start with an index > 1. -# - - -# Set up the log configuration. This is mostly unintersting, but the order of -# each leadership change and the nodes that are allowed to hear about them is -# very important. Most readers of this test can skip this section. -log-level none ----- -ok - -## Start with seven nodes. -add-nodes 7 voters=(1,2,3,4,5,6,7) index=10 ----- -ok - -## Create term 1 entries. -campaign 1 ----- -ok - -stabilize ----- -ok (quiet) - -propose 1 prop_1_12 ----- -ok - -propose 1 prop_1_13 ----- -ok - -stabilize ----- -ok (quiet) - -## Create term 2 entries. -campaign 2 ----- -ok - -stabilize 2 ----- -ok (quiet) - -stabilize 6 ----- -ok (quiet) - -stabilize 2 5 7 ----- -ok (quiet) - -propose 2 prop_2_15 ----- -ok - -propose 2 prop_2_16 ----- -ok - -stabilize 2 7 ----- -ok (quiet) - -deliver-msgs drop=(1,2,3,4,5,6,7) ----- -ok (quiet) - -## Create term 3 entries. -campaign 7 ----- -ok - -stabilize 7 ----- -ok (quiet) - -stabilize 1 2 3 4 5 6 ----- -ok (quiet) - -stabilize 7 ----- -ok (quiet) - -propose 7 prop_3_18 ----- -ok - -propose 7 prop_3_19 ----- -ok - -propose 7 prop_3_20 ----- -ok - -propose 7 prop_3_21 ----- -ok - -stabilize 7 ----- -ok (quiet) - -deliver-msgs drop=(1,2,3,4,5,6,7) ----- -ok (quiet) - -## Create term 4 entries. -campaign 6 ----- -ok - -stabilize 1 2 3 4 5 6 ----- -ok (quiet) - -propose 6 prop_4_15 ----- -ok - -stabilize 1 2 4 5 6 ----- -ok (quiet) - -propose 6 prop_4_16 ----- -ok - -propose 6 prop_4_17 ----- -ok - -stabilize 6 ----- -ok (quiet) - -deliver-msgs drop=(1,2,3,4,5,6,7) ----- -ok (quiet) - -## Create term 5 entries. -campaign 5 ----- -ok - -stabilize 1 2 4 5 ----- -ok (quiet) - -propose 5 prop_5_17 ----- -ok - -stabilize 1 2 4 5 ----- -ok (quiet) - -deliver-msgs drop=(1,2,3,4,5,6,7) ----- -ok (quiet) - -## Create term 6 entries. -campaign 4 ----- -ok - -stabilize 1 2 4 5 ----- -ok (quiet) - -propose 4 prop_6_19 ----- -ok - -stabilize 1 2 4 ----- -ok (quiet) - -propose 4 prop_6_20 ----- -ok - -stabilize 1 4 ----- -ok (quiet) - -propose 4 prop_6_21 ----- -ok - -stabilize 4 ----- -ok (quiet) - -deliver-msgs drop=(1,2,3,4,5,6,7) ----- -ok (quiet) - -## Create term 7 entries. -campaign 5 ----- -ok - -stabilize 5 ----- -ok (quiet) - -stabilize 1 3 6 7 ----- -ok (quiet) - -stabilize 5 ----- -ok (quiet) - -propose 5 prop_7_20 ----- -ok - -propose 5 prop_7_21 ----- -ok - -propose 5 prop_7_22 ----- -ok - -stabilize 5 ----- -ok (quiet) - -deliver-msgs drop=(1,2,3,4,5,6,7) ----- -ok (quiet) - - -# Show the Raft log from each node. -log-level info ----- -ok - -raft-log 1 ----- -1/11 EntryNormal "" -1/12 EntryNormal "prop_1_12" -1/13 EntryNormal "prop_1_13" -4/14 EntryNormal "" -4/15 EntryNormal "prop_4_15" -5/16 EntryNormal "" -5/17 EntryNormal "prop_5_17" -6/18 EntryNormal "" -6/19 EntryNormal "prop_6_19" -6/20 EntryNormal "prop_6_20" - -raft-log 2 ----- -1/11 EntryNormal "" -1/12 EntryNormal "prop_1_12" -1/13 EntryNormal "prop_1_13" -4/14 EntryNormal "" -4/15 EntryNormal "prop_4_15" -5/16 EntryNormal "" -5/17 EntryNormal "prop_5_17" -6/18 EntryNormal "" -6/19 EntryNormal "prop_6_19" - -raft-log 3 ----- -1/11 EntryNormal "" -1/12 EntryNormal "prop_1_12" -1/13 EntryNormal "prop_1_13" -4/14 EntryNormal "" - -raft-log 4 ----- -1/11 EntryNormal "" -1/12 EntryNormal "prop_1_12" -1/13 EntryNormal "prop_1_13" -4/14 EntryNormal "" -4/15 EntryNormal "prop_4_15" -5/16 EntryNormal "" -5/17 EntryNormal "prop_5_17" -6/18 EntryNormal "" -6/19 EntryNormal "prop_6_19" -6/20 EntryNormal "prop_6_20" -6/21 EntryNormal "prop_6_21" - -raft-log 5 ----- -1/11 EntryNormal "" -1/12 EntryNormal "prop_1_12" -1/13 EntryNormal "prop_1_13" -4/14 EntryNormal "" -4/15 EntryNormal "prop_4_15" -5/16 EntryNormal "" -5/17 EntryNormal "prop_5_17" -6/18 EntryNormal "" -7/19 EntryNormal "" -7/20 EntryNormal "prop_7_20" -7/21 EntryNormal "prop_7_21" -7/22 EntryNormal "prop_7_22" - -raft-log 6 ----- -1/11 EntryNormal "" -1/12 EntryNormal "prop_1_12" -1/13 EntryNormal "prop_1_13" -4/14 EntryNormal "" -4/15 EntryNormal "prop_4_15" -4/16 EntryNormal "prop_4_16" -4/17 EntryNormal "prop_4_17" - -raft-log 7 ----- -1/11 EntryNormal "" -1/12 EntryNormal "prop_1_12" -1/13 EntryNormal "prop_1_13" -2/14 EntryNormal "" -2/15 EntryNormal "prop_2_15" -2/16 EntryNormal "prop_2_16" -3/17 EntryNormal "" -3/18 EntryNormal "prop_3_18" -3/19 EntryNormal "prop_3_19" -3/20 EntryNormal "prop_3_20" -3/21 EntryNormal "prop_3_21" - - -# Elect node 1 as leader and stabilize. -campaign 1 ----- -INFO 1 is starting a new election at term 7 -INFO 1 became candidate at term 8 -INFO 1 received MsgVoteResp from 1 at term 8 -INFO 1 [logterm: 6, index: 20] sent MsgVote request to 2 at term 8 -INFO 1 [logterm: 6, index: 20] sent MsgVote request to 3 at term 8 -INFO 1 [logterm: 6, index: 20] sent MsgVote request to 4 at term 8 -INFO 1 [logterm: 6, index: 20] sent MsgVote request to 5 at term 8 -INFO 1 [logterm: 6, index: 20] sent MsgVote request to 6 at term 8 -INFO 1 [logterm: 6, index: 20] sent MsgVote request to 7 at term 8 - -## Get elected. -stabilize 1 ----- -> 1 handling Ready - Ready MustSync=true: - Lead:0 State:StateCandidate - HardState Term:8 Vote:1 Commit:18 - Messages: - 1->2 MsgVote Term:8 Log:6/20 - 1->3 MsgVote Term:8 Log:6/20 - 1->4 MsgVote Term:8 Log:6/20 - 1->5 MsgVote Term:8 Log:6/20 - 1->6 MsgVote Term:8 Log:6/20 - 1->7 MsgVote Term:8 Log:6/20 - -stabilize 2 3 4 5 6 7 ----- -> 2 receiving messages - 1->2 MsgVote Term:8 Log:6/20 - INFO 2 [term: 6] received a MsgVote message with higher term from 1 [term: 8] - INFO 2 became follower at term 8 - INFO 2 [logterm: 6, index: 19, vote: 0] cast MsgVote for 1 [logterm: 6, index: 20] at term 8 -> 3 receiving messages - 1->3 MsgVote Term:8 Log:6/20 - INFO 3 [term: 7] received a MsgVote message with higher term from 1 [term: 8] - INFO 3 became follower at term 8 - INFO 3 [logterm: 4, index: 14, vote: 0] cast MsgVote for 1 [logterm: 6, index: 20] at term 8 -> 4 receiving messages - 1->4 MsgVote Term:8 Log:6/20 - INFO 4 [term: 6] received a MsgVote message with higher term from 1 [term: 8] - INFO 4 became follower at term 8 - INFO 4 [logterm: 6, index: 21, vote: 0] rejected MsgVote from 1 [logterm: 6, index: 20] at term 8 -> 5 receiving messages - 1->5 MsgVote Term:8 Log:6/20 - INFO 5 [term: 7] received a MsgVote message with higher term from 1 [term: 8] - INFO 5 became follower at term 8 - INFO 5 [logterm: 7, index: 22, vote: 0] rejected MsgVote from 1 [logterm: 6, index: 20] at term 8 -> 6 receiving messages - 1->6 MsgVote Term:8 Log:6/20 - INFO 6 [term: 7] received a MsgVote message with higher term from 1 [term: 8] - INFO 6 became follower at term 8 - INFO 6 [logterm: 4, index: 17, vote: 0] cast MsgVote for 1 [logterm: 6, index: 20] at term 8 -> 7 receiving messages - 1->7 MsgVote Term:8 Log:6/20 - INFO 7 [term: 7] received a MsgVote message with higher term from 1 [term: 8] - INFO 7 became follower at term 8 - INFO 7 [logterm: 3, index: 21, vote: 0] cast MsgVote for 1 [logterm: 6, index: 20] at term 8 -> 2 handling Ready - Ready MustSync=true: - Lead:0 State:StateFollower - HardState Term:8 Vote:1 Commit:18 - Messages: - 2->1 MsgVoteResp Term:8 Log:0/0 -> 3 handling Ready - Ready MustSync=true: - HardState Term:8 Vote:1 Commit:14 - Messages: - 3->1 MsgVoteResp Term:8 Log:0/0 -> 4 handling Ready - Ready MustSync=true: - Lead:0 State:StateFollower - HardState Term:8 Commit:18 - Messages: - 4->1 MsgVoteResp Term:8 Log:0/0 Rejected (Hint: 0) -> 5 handling Ready - Ready MustSync=true: - Lead:0 State:StateFollower - HardState Term:8 Commit:18 - Messages: - 5->1 MsgVoteResp Term:8 Log:0/0 Rejected (Hint: 0) -> 6 handling Ready - Ready MustSync=true: - HardState Term:8 Vote:1 Commit:15 - Messages: - 6->1 MsgVoteResp Term:8 Log:0/0 -> 7 handling Ready - Ready MustSync=true: - HardState Term:8 Vote:1 Commit:13 - Messages: - 7->1 MsgVoteResp Term:8 Log:0/0 - -stabilize 1 ----- -> 1 receiving messages - 2->1 MsgVoteResp Term:8 Log:0/0 - INFO 1 received MsgVoteResp from 2 at term 8 - INFO 1 has received 2 MsgVoteResp votes and 0 vote rejections - 3->1 MsgVoteResp Term:8 Log:0/0 - INFO 1 received MsgVoteResp from 3 at term 8 - INFO 1 has received 3 MsgVoteResp votes and 0 vote rejections - 4->1 MsgVoteResp Term:8 Log:0/0 Rejected (Hint: 0) - INFO 1 received MsgVoteResp rejection from 4 at term 8 - INFO 1 has received 3 MsgVoteResp votes and 1 vote rejections - 5->1 MsgVoteResp Term:8 Log:0/0 Rejected (Hint: 0) - INFO 1 received MsgVoteResp rejection from 5 at term 8 - INFO 1 has received 3 MsgVoteResp votes and 2 vote rejections - 6->1 MsgVoteResp Term:8 Log:0/0 - INFO 1 received MsgVoteResp from 6 at term 8 - INFO 1 has received 4 MsgVoteResp votes and 2 vote rejections - INFO 1 became leader at term 8 - 7->1 MsgVoteResp Term:8 Log:0/0 -> 1 handling Ready - Ready MustSync=true: - Lead:1 State:StateLeader - Entries: - 8/21 EntryNormal "" - Messages: - 1->2 MsgApp Term:8 Log:6/20 Commit:18 Entries:[8/21 EntryNormal ""] - 1->3 MsgApp Term:8 Log:6/20 Commit:18 Entries:[8/21 EntryNormal ""] - 1->4 MsgApp Term:8 Log:6/20 Commit:18 Entries:[8/21 EntryNormal ""] - 1->5 MsgApp Term:8 Log:6/20 Commit:18 Entries:[8/21 EntryNormal ""] - 1->6 MsgApp Term:8 Log:6/20 Commit:18 Entries:[8/21 EntryNormal ""] - 1->7 MsgApp Term:8 Log:6/20 Commit:18 Entries:[8/21 EntryNormal ""] - -## Recover each follower, one by one. -stabilize 1 2 ----- -> 2 receiving messages - 1->2 MsgApp Term:8 Log:6/20 Commit:18 Entries:[8/21 EntryNormal ""] -> 2 handling Ready - Ready MustSync=false: - Lead:1 State:StateFollower - Messages: - 2->1 MsgAppResp Term:8 Log:6/20 Rejected (Hint: 19) -> 1 receiving messages - 2->1 MsgAppResp Term:8 Log:6/20 Rejected (Hint: 19) -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgApp Term:8 Log:6/19 Commit:18 Entries:[6/20 EntryNormal "prop_6_20", 8/21 EntryNormal ""] -> 2 receiving messages - 1->2 MsgApp Term:8 Log:6/19 Commit:18 Entries:[6/20 EntryNormal "prop_6_20", 8/21 EntryNormal ""] -> 2 handling Ready - Ready MustSync=true: - Entries: - 6/20 EntryNormal "prop_6_20" - 8/21 EntryNormal "" - Messages: - 2->1 MsgAppResp Term:8 Log:0/21 -> 1 receiving messages - 2->1 MsgAppResp Term:8 Log:0/21 -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgApp Term:8 Log:8/21 Commit:18 -> 2 receiving messages - 1->2 MsgApp Term:8 Log:8/21 Commit:18 -> 2 handling Ready - Ready MustSync=false: - Messages: - 2->1 MsgAppResp Term:8 Log:0/21 -> 1 receiving messages - 2->1 MsgAppResp Term:8 Log:0/21 - -stabilize 1 3 ----- -> 3 receiving messages - 1->3 MsgApp Term:8 Log:6/20 Commit:18 Entries:[8/21 EntryNormal ""] -> 3 handling Ready - Ready MustSync=false: - Lead:1 State:StateFollower - Messages: - 3->1 MsgAppResp Term:8 Log:4/20 Rejected (Hint: 14) -> 1 receiving messages - 3->1 MsgAppResp Term:8 Log:4/20 Rejected (Hint: 14) -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->3 MsgApp Term:8 Log:4/14 Commit:18 Entries:[4/15 EntryNormal "prop_4_15", 5/16 EntryNormal "", 5/17 EntryNormal "prop_5_17", 6/18 EntryNormal "", 6/19 EntryNormal "prop_6_19", 6/20 EntryNormal "prop_6_20", 8/21 EntryNormal ""] -> 3 receiving messages - 1->3 MsgApp Term:8 Log:4/14 Commit:18 Entries:[4/15 EntryNormal "prop_4_15", 5/16 EntryNormal "", 5/17 EntryNormal "prop_5_17", 6/18 EntryNormal "", 6/19 EntryNormal "prop_6_19", 6/20 EntryNormal "prop_6_20", 8/21 EntryNormal ""] -> 3 handling Ready - Ready MustSync=true: - HardState Term:8 Vote:1 Commit:18 - Entries: - 4/15 EntryNormal "prop_4_15" - 5/16 EntryNormal "" - 5/17 EntryNormal "prop_5_17" - 6/18 EntryNormal "" - 6/19 EntryNormal "prop_6_19" - 6/20 EntryNormal "prop_6_20" - 8/21 EntryNormal "" - CommittedEntries: - 4/15 EntryNormal "prop_4_15" - 5/16 EntryNormal "" - 5/17 EntryNormal "prop_5_17" - 6/18 EntryNormal "" - Messages: - 3->1 MsgAppResp Term:8 Log:0/21 -> 1 receiving messages - 3->1 MsgAppResp Term:8 Log:0/21 -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->3 MsgApp Term:8 Log:8/21 Commit:18 -> 3 receiving messages - 1->3 MsgApp Term:8 Log:8/21 Commit:18 -> 3 handling Ready - Ready MustSync=false: - Messages: - 3->1 MsgAppResp Term:8 Log:0/21 -> 1 receiving messages - 3->1 MsgAppResp Term:8 Log:0/21 - -stabilize 1 4 ----- -> 4 receiving messages - 1->4 MsgApp Term:8 Log:6/20 Commit:18 Entries:[8/21 EntryNormal ""] - INFO found conflict at index 21 [existing term: 6, conflicting term: 8] - INFO replace the unstable entries from index 21 -> 4 handling Ready - Ready MustSync=true: - Lead:1 State:StateFollower - Entries: - 8/21 EntryNormal "" - Messages: - 4->1 MsgAppResp Term:8 Log:0/21 -> 1 receiving messages - 4->1 MsgAppResp Term:8 Log:0/21 -> 1 handling Ready - Ready MustSync=false: - HardState Term:8 Vote:1 Commit:21 - CommittedEntries: - 6/19 EntryNormal "prop_6_19" - 6/20 EntryNormal "prop_6_20" - 8/21 EntryNormal "" - Messages: - 1->2 MsgApp Term:8 Log:8/21 Commit:21 - 1->3 MsgApp Term:8 Log:8/21 Commit:21 - 1->4 MsgApp Term:8 Log:8/21 Commit:21 -> 4 receiving messages - 1->4 MsgApp Term:8 Log:8/21 Commit:21 -> 4 handling Ready - Ready MustSync=false: - HardState Term:8 Commit:21 - CommittedEntries: - 6/19 EntryNormal "prop_6_19" - 6/20 EntryNormal "prop_6_20" - 8/21 EntryNormal "" - Messages: - 4->1 MsgAppResp Term:8 Log:0/21 -> 1 receiving messages - 4->1 MsgAppResp Term:8 Log:0/21 - -stabilize 1 5 ----- -> 5 receiving messages - 1->5 MsgApp Term:8 Log:6/20 Commit:18 Entries:[8/21 EntryNormal ""] -> 5 handling Ready - Ready MustSync=false: - Lead:1 State:StateFollower - Messages: - 5->1 MsgAppResp Term:8 Log:6/20 Rejected (Hint: 18) -> 1 receiving messages - 5->1 MsgAppResp Term:8 Log:6/20 Rejected (Hint: 18) -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->5 MsgApp Term:8 Log:6/18 Commit:21 Entries:[6/19 EntryNormal "prop_6_19", 6/20 EntryNormal "prop_6_20", 8/21 EntryNormal ""] -> 5 receiving messages - 1->5 MsgApp Term:8 Log:6/18 Commit:21 Entries:[6/19 EntryNormal "prop_6_19", 6/20 EntryNormal "prop_6_20", 8/21 EntryNormal ""] - INFO found conflict at index 19 [existing term: 7, conflicting term: 6] - INFO replace the unstable entries from index 19 -> 5 handling Ready - Ready MustSync=true: - HardState Term:8 Commit:21 - Entries: - 6/19 EntryNormal "prop_6_19" - 6/20 EntryNormal "prop_6_20" - 8/21 EntryNormal "" - CommittedEntries: - 6/19 EntryNormal "prop_6_19" - 6/20 EntryNormal "prop_6_20" - 8/21 EntryNormal "" - Messages: - 5->1 MsgAppResp Term:8 Log:0/21 -> 1 receiving messages - 5->1 MsgAppResp Term:8 Log:0/21 -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->5 MsgApp Term:8 Log:8/21 Commit:21 -> 5 receiving messages - 1->5 MsgApp Term:8 Log:8/21 Commit:21 -> 5 handling Ready - Ready MustSync=false: - Messages: - 5->1 MsgAppResp Term:8 Log:0/21 -> 1 receiving messages - 5->1 MsgAppResp Term:8 Log:0/21 - -stabilize 1 6 ----- -> 6 receiving messages - 1->6 MsgApp Term:8 Log:6/20 Commit:18 Entries:[8/21 EntryNormal ""] -> 6 handling Ready - Ready MustSync=false: - Lead:1 State:StateFollower - Messages: - 6->1 MsgAppResp Term:8 Log:4/20 Rejected (Hint: 17) -> 1 receiving messages - 6->1 MsgAppResp Term:8 Log:4/20 Rejected (Hint: 17) -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->6 MsgApp Term:8 Log:4/15 Commit:21 Entries:[5/16 EntryNormal "", 5/17 EntryNormal "prop_5_17", 6/18 EntryNormal "", 6/19 EntryNormal "prop_6_19", 6/20 EntryNormal "prop_6_20", 8/21 EntryNormal ""] -> 6 receiving messages - 1->6 MsgApp Term:8 Log:4/15 Commit:21 Entries:[5/16 EntryNormal "", 5/17 EntryNormal "prop_5_17", 6/18 EntryNormal "", 6/19 EntryNormal "prop_6_19", 6/20 EntryNormal "prop_6_20", 8/21 EntryNormal ""] - INFO found conflict at index 16 [existing term: 4, conflicting term: 5] - INFO replace the unstable entries from index 16 -> 6 handling Ready - Ready MustSync=true: - HardState Term:8 Vote:1 Commit:21 - Entries: - 5/16 EntryNormal "" - 5/17 EntryNormal "prop_5_17" - 6/18 EntryNormal "" - 6/19 EntryNormal "prop_6_19" - 6/20 EntryNormal "prop_6_20" - 8/21 EntryNormal "" - CommittedEntries: - 5/16 EntryNormal "" - 5/17 EntryNormal "prop_5_17" - 6/18 EntryNormal "" - 6/19 EntryNormal "prop_6_19" - 6/20 EntryNormal "prop_6_20" - 8/21 EntryNormal "" - Messages: - 6->1 MsgAppResp Term:8 Log:0/21 -> 1 receiving messages - 6->1 MsgAppResp Term:8 Log:0/21 -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->6 MsgApp Term:8 Log:8/21 Commit:21 -> 6 receiving messages - 1->6 MsgApp Term:8 Log:8/21 Commit:21 -> 6 handling Ready - Ready MustSync=false: - Messages: - 6->1 MsgAppResp Term:8 Log:0/21 -> 1 receiving messages - 6->1 MsgAppResp Term:8 Log:0/21 - -stabilize 1 7 ----- -> 7 receiving messages - 1->7 MsgApp Term:8 Log:6/20 Commit:18 Entries:[8/21 EntryNormal ""] -> 7 handling Ready - Ready MustSync=false: - Lead:1 State:StateFollower - Messages: - 7->1 MsgAppResp Term:8 Log:3/20 Rejected (Hint: 20) -> 1 receiving messages - 7->1 MsgAppResp Term:8 Log:3/20 Rejected (Hint: 20) -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->7 MsgApp Term:8 Log:1/13 Commit:21 Entries:[4/14 EntryNormal "", 4/15 EntryNormal "prop_4_15", 5/16 EntryNormal "", 5/17 EntryNormal "prop_5_17", 6/18 EntryNormal "", 6/19 EntryNormal "prop_6_19", 6/20 EntryNormal "prop_6_20", 8/21 EntryNormal ""] -> 7 receiving messages - 1->7 MsgApp Term:8 Log:1/13 Commit:21 Entries:[4/14 EntryNormal "", 4/15 EntryNormal "prop_4_15", 5/16 EntryNormal "", 5/17 EntryNormal "prop_5_17", 6/18 EntryNormal "", 6/19 EntryNormal "prop_6_19", 6/20 EntryNormal "prop_6_20", 8/21 EntryNormal ""] - INFO found conflict at index 14 [existing term: 2, conflicting term: 4] - INFO replace the unstable entries from index 14 -> 7 handling Ready - Ready MustSync=true: - HardState Term:8 Vote:1 Commit:21 - Entries: - 4/14 EntryNormal "" - 4/15 EntryNormal "prop_4_15" - 5/16 EntryNormal "" - 5/17 EntryNormal "prop_5_17" - 6/18 EntryNormal "" - 6/19 EntryNormal "prop_6_19" - 6/20 EntryNormal "prop_6_20" - 8/21 EntryNormal "" - CommittedEntries: - 4/14 EntryNormal "" - 4/15 EntryNormal "prop_4_15" - 5/16 EntryNormal "" - 5/17 EntryNormal "prop_5_17" - 6/18 EntryNormal "" - 6/19 EntryNormal "prop_6_19" - 6/20 EntryNormal "prop_6_20" - 8/21 EntryNormal "" - Messages: - 7->1 MsgAppResp Term:8 Log:0/21 -> 1 receiving messages - 7->1 MsgAppResp Term:8 Log:0/21 -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->7 MsgApp Term:8 Log:8/21 Commit:21 -> 7 receiving messages - 1->7 MsgApp Term:8 Log:8/21 Commit:21 -> 7 handling Ready - Ready MustSync=false: - Messages: - 7->1 MsgAppResp Term:8 Log:0/21 -> 1 receiving messages - 7->1 MsgAppResp Term:8 Log:0/21 diff --git a/raft/testdata/replicate_pause.txt b/raft/testdata/replicate_pause.txt deleted file mode 100644 index e7333cccb413..000000000000 --- a/raft/testdata/replicate_pause.txt +++ /dev/null @@ -1,190 +0,0 @@ -# This test ensures that MsgApp stream to a follower is paused when the -# in-flight state exceeds the configured limits. This is a regression test for -# the issue fixed by https://github.com/etcd-io/etcd/pull/14633. - -# Turn off output during the setup of the test. -log-level none ----- -ok - -# Start with 3 nodes, with a limited in-flight capacity. -add-nodes 3 voters=(1,2,3) index=10 inflight=3 ----- -ok - -campaign 1 ----- -ok - -stabilize ----- -ok (quiet) - -# Propose 3 entries. -propose 1 prop_1_12 ----- -ok - -propose 1 prop_1_13 ----- -ok - -propose 1 prop_1_14 ----- -ok - -# Store entries and send proposals. -process-ready 1 ----- -ok (quiet) - -# Re-enable log messages. -log-level debug ----- -ok - -# Expect that in-flight tracking to nodes 2 and 3 is saturated. -status 1 ----- -1: StateReplicate match=14 next=15 -2: StateReplicate match=11 next=15 paused inflight=3[full] -3: StateReplicate match=11 next=15 paused inflight=3[full] - -log-level none ----- -ok - -# Commit entries between nodes 1 and 2. -stabilize 1 2 ----- -ok (quiet) - -log-level debug ----- -ok - -# Expect that the entries are committed and stored on nodes 1 and 2. -status 1 ----- -1: StateReplicate match=14 next=15 -2: StateReplicate match=14 next=15 -3: StateReplicate match=11 next=15 paused inflight=3[full] - -# Drop append messages to node 3. -deliver-msgs drop=3 ----- -dropped: 1->3 MsgApp Term:1 Log:1/11 Commit:11 Entries:[1/12 EntryNormal "prop_1_12"] -dropped: 1->3 MsgApp Term:1 Log:1/12 Commit:11 Entries:[1/13 EntryNormal "prop_1_13"] -dropped: 1->3 MsgApp Term:1 Log:1/13 Commit:11 Entries:[1/14 EntryNormal "prop_1_14"] - - -# Repeat committing 3 entries. -propose 1 prop_1_15 ----- -ok - -propose 1 prop_1_16 ----- -ok - -propose 1 prop_1_17 ----- -ok - -# In-flight tracking to nodes 2 and 3 is saturated, but node 3 is behind. -status 1 ----- -1: StateReplicate match=14 next=15 -2: StateReplicate match=14 next=18 paused inflight=3[full] -3: StateReplicate match=11 next=15 paused inflight=3[full] - -log-level none ----- -ok - -# Commit entries between nodes 1 and 2 again. -stabilize 1 2 ----- -ok (quiet) - -log-level debug ----- -ok - -# Expect that the entries are committed and stored only on nodes 1 and 2. -status 1 ----- -1: StateReplicate match=17 next=18 -2: StateReplicate match=17 next=18 -3: StateReplicate match=11 next=15 paused inflight=3[full] - -# Make a heartbeat roundtrip. -tick-heartbeat 1 ----- -ok - -stabilize 1 ----- -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgHeartbeat Term:1 Log:0/0 Commit:17 - 1->3 MsgHeartbeat Term:1 Log:0/0 Commit:11 - -stabilize 2 3 ----- -> 2 receiving messages - 1->2 MsgHeartbeat Term:1 Log:0/0 Commit:17 -> 3 receiving messages - 1->3 MsgHeartbeat Term:1 Log:0/0 Commit:11 -> 2 handling Ready - Ready MustSync=false: - Messages: - 2->1 MsgHeartbeatResp Term:1 Log:0/0 -> 3 handling Ready - Ready MustSync=false: - Messages: - 3->1 MsgHeartbeatResp Term:1 Log:0/0 - -# After handling heartbeat responses, node 1 sends an empty MsgApp to a -# throttled node 3 because it hasn't yet replied to a single MsgApp, and the -# in-flight tracker is still saturated. -stabilize 1 ----- -> 1 receiving messages - 2->1 MsgHeartbeatResp Term:1 Log:0/0 - 3->1 MsgHeartbeatResp Term:1 Log:0/0 -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->3 MsgApp Term:1 Log:1/14 Commit:17 - -# Node 3 finally receives a MsgApp, but there was a gap, so it rejects it. -stabilize 3 ----- -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/14 Commit:17 - DEBUG 3 [logterm: 0, index: 14] rejected MsgApp [logterm: 1, index: 14] from 1 -> 3 handling Ready - Ready MustSync=false: - Messages: - 3->1 MsgAppResp Term:1 Log:1/14 Rejected (Hint: 11) - -log-level none ----- -ok - -stabilize ----- -ok (quiet) - -log-level debug ----- -ok - -# Eventually all nodes catch up on the committed state. -status 1 ----- -1: StateReplicate match=17 next=18 -2: StateReplicate match=17 next=18 -3: StateReplicate match=17 next=18 \ No newline at end of file diff --git a/raft/testdata/single_node.txt b/raft/testdata/single_node.txt deleted file mode 100644 index 3b6e4f4c1dd9..000000000000 --- a/raft/testdata/single_node.txt +++ /dev/null @@ -1,30 +0,0 @@ -log-level info ----- -ok - -add-nodes 1 voters=(1) index=3 ----- -INFO 1 switched to configuration voters=(1) -INFO 1 became follower at term 0 -INFO newRaft 1 [peers: [1], term: 0, commit: 3, applied: 3, lastindex: 3, lastterm: 1] - -campaign 1 ----- -INFO 1 is starting a new election at term 0 -INFO 1 became candidate at term 1 -INFO 1 received MsgVoteResp from 1 at term 1 -INFO 1 became leader at term 1 - -stabilize ----- -> 1 handling Ready - Ready MustSync=true: - Lead:1 State:StateLeader - HardState Term:1 Vote:1 Commit:3 - Entries: - 1/4 EntryNormal "" -> 1 handling Ready - Ready MustSync=false: - HardState Term:1 Vote:1 Commit:4 - CommittedEntries: - 1/4 EntryNormal "" diff --git a/raft/testdata/snapshot_succeed_via_app_resp.txt b/raft/testdata/snapshot_succeed_via_app_resp.txt deleted file mode 100644 index dbbd5ce11d98..000000000000 --- a/raft/testdata/snapshot_succeed_via_app_resp.txt +++ /dev/null @@ -1,156 +0,0 @@ -# TestSnapshotSucceedViaAppResp regression tests the situation in which a snap- -# shot is sent to a follower at the most recent index (i.e. the snapshot index -# is the leader's last index is the committed index). In that situation, a bug -# in the past left the follower in probing status until the next log entry was -# committed. -# -# See https://github.com/etcd-io/etcd/pull/10308 for additional background. - -# Turn off output during the setup of the test. -log-level none ----- -ok - -# Start with two nodes, but the config already has a third. -add-nodes 2 voters=(1,2,3) index=10 ----- -ok - -campaign 1 ----- -ok - -# Fully replicate everything, including the leader's empty index. -stabilize ----- -ok (quiet) - -compact 1 11 ----- -ok (quiet) - -# Drop inflight messages to n3. -deliver-msgs drop=(3) ----- -ok (quiet) - -# Show the Raft log messages from now on. -log-level debug ----- -ok - -status 1 ----- -1: StateReplicate match=11 next=12 -2: StateReplicate match=11 next=12 -3: StateProbe match=0 next=11 paused inactive - -# Add the node that will receive a snapshot (it has no state at all, does not -# even have a config). -add-nodes 1 ----- -INFO 3 switched to configuration voters=() -INFO 3 became follower at term 0 -INFO newRaft 3 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0] - -# Time passes on the leader so that it will try the previously missing follower -# again. -tick-heartbeat 1 ----- -ok - -process-ready 1 ----- -Ready MustSync=false: -Messages: -1->2 MsgHeartbeat Term:1 Log:0/0 Commit:11 -1->3 MsgHeartbeat Term:1 Log:0/0 - -# Iterate until no more work is done by the new peer. It receives the heartbeat -# and responds. -stabilize 3 ----- -> 3 receiving messages - 1->3 MsgHeartbeat Term:1 Log:0/0 - INFO 3 [term: 0] received a MsgHeartbeat message with higher term from 1 [term: 1] - INFO 3 became follower at term 1 -> 3 handling Ready - Ready MustSync=true: - Lead:1 State:StateFollower - HardState Term:1 Commit:0 - Messages: - 3->1 MsgHeartbeatResp Term:1 Log:0/0 - -# The leader in turn will realize that n3 needs a snapshot, which it initiates. -stabilize 1 ----- -> 1 receiving messages - 3->1 MsgHeartbeatResp Term:1 Log:0/0 - DEBUG 1 [firstindex: 12, commit: 11] sent snapshot[index: 11, term: 1] to 3 [StateProbe match=0 next=11] - DEBUG 1 paused sending replication messages to 3 [StateSnapshot match=0 next=11 paused pendingSnap=11] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->3 MsgSnap Term:1 Log:0/0 Snapshot: Index:11 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false - -status 1 ----- -1: StateReplicate match=11 next=12 -2: StateReplicate match=11 next=12 -3: StateSnapshot match=0 next=11 paused pendingSnap=11 - -# Follower applies the snapshot. Note how it reacts with a MsgAppResp upon completion. -# The snapshot fully catches the follower up (i.e. there are no more log entries it -# needs to apply after). The bug was that the leader failed to realize that the follower -# was now fully caught up. -stabilize 3 ----- -> 3 receiving messages - 1->3 MsgSnap Term:1 Log:0/0 Snapshot: Index:11 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false - INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 11, term: 1] - INFO 3 switched to configuration voters=(1 2 3) - INFO 3 [commit: 11, lastindex: 11, lastterm: 1] restored snapshot [index: 11, term: 1] - INFO 3 [commit: 11] restored snapshot [index: 11, term: 1] -> 3 handling Ready - Ready MustSync=false: - HardState Term:1 Commit:11 - Snapshot Index:11 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false - Messages: - 3->1 MsgAppResp Term:1 Log:0/11 - -# The MsgAppResp lets the leader move the follower back to replicating state. -# Leader sends another MsgAppResp, to communicate the updated commit index. -stabilize 1 ----- -> 1 receiving messages - 3->1 MsgAppResp Term:1 Log:0/11 - DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 3 [StateSnapshot match=11 next=12 paused pendingSnap=11] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->3 MsgApp Term:1 Log:1/11 Commit:11 - -status 1 ----- -1: StateReplicate match=11 next=12 -2: StateReplicate match=11 next=12 -3: StateReplicate match=11 next=12 - -# Let things settle. -stabilize ----- -> 2 receiving messages - 1->2 MsgHeartbeat Term:1 Log:0/0 Commit:11 -> 3 receiving messages - 1->3 MsgApp Term:1 Log:1/11 Commit:11 -> 2 handling Ready - Ready MustSync=false: - Messages: - 2->1 MsgHeartbeatResp Term:1 Log:0/0 -> 3 handling Ready - Ready MustSync=false: - Messages: - 3->1 MsgAppResp Term:1 Log:0/11 -> 1 receiving messages - 2->1 MsgHeartbeatResp Term:1 Log:0/0 - 3->1 MsgAppResp Term:1 Log:0/11 diff --git a/raft/tracker/inflights.go b/raft/tracker/inflights.go deleted file mode 100644 index 350728aec7dc..000000000000 --- a/raft/tracker/inflights.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tracker - -// inflight describes an in-flight MsgApp message. -type inflight struct { - index uint64 // the index of the last entry inside the message - bytes uint64 // the total byte size of the entries in the message -} - -// Inflights limits the number of MsgApp (represented by the largest index -// contained within) sent to followers but not yet acknowledged by them. Callers -// use Full() to check whether more messages can be sent, call Add() whenever -// they are sending a new append, and release "quota" via FreeLE() whenever an -// ack is received. -type Inflights struct { - // the starting index in the buffer - start int - - count int // number of inflight messages in the buffer - bytes uint64 // number of inflight bytes - - size int // the max number of inflight messages - maxBytes uint64 // the max total byte size of inflight messages - - // buffer is a ring buffer containing info about all in-flight messages. - buffer []inflight -} - -// NewInflights sets up an Inflights that allows up to size inflight messages, -// with the total byte size up to maxBytes. If maxBytes is 0 then there is no -// byte size limit. The maxBytes limit is soft, i.e. we accept a single message -// that brings it from size < maxBytes to size >= maxBytes. -func NewInflights(size int, maxBytes uint64) *Inflights { - return &Inflights{ - size: size, - maxBytes: maxBytes, - } -} - -// Clone returns an *Inflights that is identical to but shares no memory with -// the receiver. -func (in *Inflights) Clone() *Inflights { - ins := *in - ins.buffer = append([]inflight(nil), in.buffer...) - return &ins -} - -// Add notifies the Inflights that a new message with the given index and byte -// size is being dispatched. Full() must be called prior to Add() to verify that -// there is room for one more message, and consecutive calls to Add() must -// provide a monotonic sequence of indexes. -func (in *Inflights) Add(index, bytes uint64) { - if in.Full() { - panic("cannot add into a Full inflights") - } - next := in.start + in.count - size := in.size - if next >= size { - next -= size - } - if next >= len(in.buffer) { - in.grow() - } - in.buffer[next] = inflight{index: index, bytes: bytes} - in.count++ - in.bytes += bytes -} - -// grow the inflight buffer by doubling up to inflights.size. We grow on demand -// instead of preallocating to inflights.size to handle systems which have -// thousands of Raft groups per process. -func (in *Inflights) grow() { - newSize := len(in.buffer) * 2 - if newSize == 0 { - newSize = 1 - } else if newSize > in.size { - newSize = in.size - } - newBuffer := make([]inflight, newSize) - copy(newBuffer, in.buffer) - in.buffer = newBuffer -} - -// FreeLE frees the inflights smaller or equal to the given `to` flight. -func (in *Inflights) FreeLE(to uint64) { - if in.count == 0 || to < in.buffer[in.start].index { - // out of the left side of the window - return - } - - idx := in.start - var i int - var bytes uint64 - for i = 0; i < in.count; i++ { - if to < in.buffer[idx].index { // found the first large inflight - break - } - bytes += in.buffer[idx].bytes - - // increase index and maybe rotate - size := in.size - if idx++; idx >= size { - idx -= size - } - } - // free i inflights and set new start index - in.count -= i - in.bytes -= bytes - in.start = idx - if in.count == 0 { - // inflights is empty, reset the start index so that we don't grow the - // buffer unnecessarily. - in.start = 0 - } -} - -// Full returns true if no more messages can be sent at the moment. -func (in *Inflights) Full() bool { - return in.count == in.size || (in.maxBytes != 0 && in.bytes >= in.maxBytes) -} - -// Count returns the number of inflight messages. -func (in *Inflights) Count() int { return in.count } - -// reset frees all inflights. -func (in *Inflights) reset() { - in.count = 0 - in.start = 0 -} diff --git a/raft/tracker/inflights_test.go b/raft/tracker/inflights_test.go deleted file mode 100644 index 3514220df390..000000000000 --- a/raft/tracker/inflights_test.go +++ /dev/null @@ -1,239 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tracker - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestInflightsAdd(t *testing.T) { - // no rotating case - in := &Inflights{ - size: 10, - buffer: make([]inflight, 10), - } - - for i := 0; i < 5; i++ { - in.Add(uint64(i), uint64(100+i)) - } - - wantIn := &Inflights{ - start: 0, - count: 5, - bytes: 510, - size: 10, - buffer: inflightsBuffer( - // ↓------------ - []uint64{0, 1, 2, 3, 4, 0, 0, 0, 0, 0}, - []uint64{100, 101, 102, 103, 104, 0, 0, 0, 0, 0}), - } - require.Equal(t, wantIn, in) - - for i := 5; i < 10; i++ { - in.Add(uint64(i), uint64(100+i)) - } - - wantIn2 := &Inflights{ - start: 0, - count: 10, - bytes: 1045, - size: 10, - buffer: inflightsBuffer( - // ↓--------------------------- - []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - []uint64{100, 101, 102, 103, 104, 105, 106, 107, 108, 109}), - } - require.Equal(t, wantIn2, in) - - // rotating case - in2 := &Inflights{ - start: 5, - size: 10, - buffer: make([]inflight, 10), - } - - for i := 0; i < 5; i++ { - in2.Add(uint64(i), uint64(100+i)) - } - - wantIn21 := &Inflights{ - start: 5, - count: 5, - bytes: 510, - size: 10, - buffer: inflightsBuffer( - // ↓------------ - []uint64{0, 0, 0, 0, 0, 0, 1, 2, 3, 4}, - []uint64{0, 0, 0, 0, 0, 100, 101, 102, 103, 104}), - } - require.Equal(t, wantIn21, in2) - - for i := 5; i < 10; i++ { - in2.Add(uint64(i), uint64(100+i)) - } - - wantIn22 := &Inflights{ - start: 5, - count: 10, - bytes: 1045, - size: 10, - buffer: inflightsBuffer( - // -------------- ↓------------ - []uint64{5, 6, 7, 8, 9, 0, 1, 2, 3, 4}, - []uint64{105, 106, 107, 108, 109, 100, 101, 102, 103, 104}), - } - require.Equal(t, wantIn22, in2) -} - -func TestInflightFreeTo(t *testing.T) { - // no rotating case - in := NewInflights(10, 0) - for i := 0; i < 10; i++ { - in.Add(uint64(i), uint64(100+i)) - } - - in.FreeLE(0) - - wantIn0 := &Inflights{ - start: 1, - count: 9, - bytes: 945, - size: 10, - buffer: inflightsBuffer( - // ↓------------------------ - []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - []uint64{100, 101, 102, 103, 104, 105, 106, 107, 108, 109}), - } - require.Equal(t, wantIn0, in) - - in.FreeLE(4) - - wantIn := &Inflights{ - start: 5, - count: 5, - bytes: 535, - size: 10, - buffer: inflightsBuffer( - // ↓------------ - []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - []uint64{100, 101, 102, 103, 104, 105, 106, 107, 108, 109}), - } - require.Equal(t, wantIn, in) - - in.FreeLE(8) - - wantIn2 := &Inflights{ - start: 9, - count: 1, - bytes: 109, - size: 10, - buffer: inflightsBuffer( - // ↓ - []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - []uint64{100, 101, 102, 103, 104, 105, 106, 107, 108, 109}), - } - require.Equal(t, wantIn2, in) - - // rotating case - for i := 10; i < 15; i++ { - in.Add(uint64(i), uint64(100+i)) - } - - in.FreeLE(12) - - wantIn3 := &Inflights{ - start: 3, - count: 2, - bytes: 227, - size: 10, - buffer: inflightsBuffer( - // ↓----- - []uint64{10, 11, 12, 13, 14, 5, 6, 7, 8, 9}, - []uint64{110, 111, 112, 113, 114, 105, 106, 107, 108, 109}), - } - require.Equal(t, wantIn3, in) - - in.FreeLE(14) - - wantIn4 := &Inflights{ - start: 0, - count: 0, - size: 10, - buffer: inflightsBuffer( - // ↓ - []uint64{10, 11, 12, 13, 14, 5, 6, 7, 8, 9}, - []uint64{110, 111, 112, 113, 114, 105, 106, 107, 108, 109}), - } - require.Equal(t, wantIn4, in) -} - -func TestInflightsFull(t *testing.T) { - for _, tc := range []struct { - name string - size int - maxBytes uint64 - fullAt int - freeLE uint64 - againAt int - }{ - {name: "always-full", size: 0, fullAt: 0}, - {name: "single-entry", size: 1, fullAt: 1, freeLE: 1, againAt: 2}, - {name: "single-entry-overflow", size: 1, maxBytes: 10, fullAt: 1, freeLE: 1, againAt: 2}, - {name: "multi-entry", size: 15, fullAt: 15, freeLE: 6, againAt: 22}, - {name: "slight-overflow", size: 8, maxBytes: 400, fullAt: 4, freeLE: 2, againAt: 7}, - {name: "exact-max-bytes", size: 8, maxBytes: 406, fullAt: 4, freeLE: 3, againAt: 8}, - {name: "larger-overflow", size: 15, maxBytes: 408, fullAt: 5, freeLE: 1, againAt: 6}, - } { - t.Run(tc.name, func(t *testing.T) { - in := NewInflights(tc.size, tc.maxBytes) - - addUntilFull := func(begin, end int) { - for i := begin; i < end; i++ { - if in.Full() { - t.Fatalf("full at %d, want %d", i, end) - } - in.Add(uint64(i), uint64(100+i)) - } - if !in.Full() { - t.Fatalf("not full at %d", end) - } - } - - addUntilFull(0, tc.fullAt) - in.FreeLE(tc.freeLE) - addUntilFull(tc.fullAt, tc.againAt) - - defer func() { - if r := recover(); r == nil { - t.Errorf("Add() did not panic") - } - }() - in.Add(100, 1024) - }) - } -} - -func inflightsBuffer(indices []uint64, sizes []uint64) []inflight { - if len(indices) != len(sizes) { - panic("len(indices) != len(sizes)") - } - buffer := make([]inflight, 0, len(indices)) - for i, idx := range indices { - buffer = append(buffer, inflight{index: idx, bytes: sizes[i]}) - } - return buffer -} diff --git a/raft/tracker/progress.go b/raft/tracker/progress.go deleted file mode 100644 index 5948fadfdfc6..000000000000 --- a/raft/tracker/progress.go +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tracker - -import ( - "fmt" - "sort" - "strings" -) - -// Progress represents a follower’s progress in the view of the leader. Leader -// maintains progresses of all followers, and sends entries to the follower -// based on its progress. -// -// NB(tbg): Progress is basically a state machine whose transitions are mostly -// strewn around `*raft.raft`. Additionally, some fields are only used when in a -// certain State. All of this isn't ideal. -type Progress struct { - Match, Next uint64 - // State defines how the leader should interact with the follower. - // - // When in StateProbe, leader sends at most one replication message - // per heartbeat interval. It also probes actual progress of the follower. - // - // When in StateReplicate, leader optimistically increases next - // to the latest entry sent after sending replication message. This is - // an optimized state for fast replicating log entries to the follower. - // - // When in StateSnapshot, leader should have sent out snapshot - // before and stops sending any replication message. - State StateType - - // PendingSnapshot is used in StateSnapshot. - // If there is a pending snapshot, the pendingSnapshot will be set to the - // index of the snapshot. If pendingSnapshot is set, the replication process of - // this Progress will be paused. raft will not resend snapshot until the pending one - // is reported to be failed. - PendingSnapshot uint64 - - // RecentActive is true if the progress is recently active. Receiving any messages - // from the corresponding follower indicates the progress is active. - // RecentActive can be reset to false after an election timeout. - // This is always true on the leader. - RecentActive bool - - // MsgAppFlowPaused is used when the MsgApp flow to a node is throttled. This - // happens in StateProbe, or StateReplicate with saturated Inflights. In both - // cases, we need to continue sending MsgApp once in a while to guarantee - // progress, but we only do so when MsgAppFlowPaused is false (it is reset on - // receiving a heartbeat response), to not overflow the receiver. See - // IsPaused(). - MsgAppFlowPaused bool - - // Inflights is a sliding window for the inflight messages. - // Each inflight message contains one or more log entries. - // The max number of entries per message is defined in raft config as MaxSizePerMsg. - // Thus inflight effectively limits both the number of inflight messages - // and the bandwidth each Progress can use. - // When inflights is Full, no more message should be sent. - // When a leader sends out a message, the index of the last - // entry should be added to inflights. The index MUST be added - // into inflights in order. - // When a leader receives a reply, the previous inflights should - // be freed by calling inflights.FreeLE with the index of the last - // received entry. - Inflights *Inflights - - // IsLearner is true if this progress is tracked for a learner. - IsLearner bool -} - -// ResetState moves the Progress into the specified State, resetting MsgAppFlowPaused, -// PendingSnapshot, and Inflights. -func (pr *Progress) ResetState(state StateType) { - pr.MsgAppFlowPaused = false - pr.PendingSnapshot = 0 - pr.State = state - pr.Inflights.reset() -} - -func max(a, b uint64) uint64 { - if a > b { - return a - } - return b -} - -func min(a, b uint64) uint64 { - if a > b { - return b - } - return a -} - -// BecomeProbe transitions into StateProbe. Next is reset to Match+1 or, -// optionally and if larger, the index of the pending snapshot. -func (pr *Progress) BecomeProbe() { - // If the original state is StateSnapshot, progress knows that - // the pending snapshot has been sent to this peer successfully, then - // probes from pendingSnapshot + 1. - if pr.State == StateSnapshot { - pendingSnapshot := pr.PendingSnapshot - pr.ResetState(StateProbe) - pr.Next = max(pr.Match+1, pendingSnapshot+1) - } else { - pr.ResetState(StateProbe) - pr.Next = pr.Match + 1 - } -} - -// BecomeReplicate transitions into StateReplicate, resetting Next to Match+1. -func (pr *Progress) BecomeReplicate() { - pr.ResetState(StateReplicate) - pr.Next = pr.Match + 1 -} - -// BecomeSnapshot moves the Progress to StateSnapshot with the specified pending -// snapshot index. -func (pr *Progress) BecomeSnapshot(snapshoti uint64) { - pr.ResetState(StateSnapshot) - pr.PendingSnapshot = snapshoti -} - -// UpdateOnEntriesSend updates the progress on the given number of consecutive -// entries being sent in a MsgApp, with the given total bytes size, appended at -// and after the given log index. -func (pr *Progress) UpdateOnEntriesSend(entries int, bytes, nextIndex uint64) error { - switch pr.State { - case StateReplicate: - if entries > 0 { - last := nextIndex + uint64(entries) - 1 - pr.OptimisticUpdate(last) - pr.Inflights.Add(last, bytes) - } - // If this message overflows the in-flights tracker, or it was already full, - // consider this message being a probe, so that the flow is paused. - pr.MsgAppFlowPaused = pr.Inflights.Full() - case StateProbe: - // TODO(pavelkalinnikov): this condition captures the previous behaviour, - // but we should set MsgAppFlowPaused unconditionally for simplicity, because any - // MsgApp in StateProbe is a probe, not only non-empty ones. - if entries > 0 { - pr.MsgAppFlowPaused = true - } - default: - return fmt.Errorf("sending append in unhandled state %s", pr.State) - } - return nil -} - -// MaybeUpdate is called when an MsgAppResp arrives from the follower, with the -// index acked by it. The method returns false if the given n index comes from -// an outdated message. Otherwise it updates the progress and returns true. -func (pr *Progress) MaybeUpdate(n uint64) bool { - var updated bool - if pr.Match < n { - pr.Match = n - updated = true - pr.MsgAppFlowPaused = false - } - pr.Next = max(pr.Next, n+1) - return updated -} - -// OptimisticUpdate signals that appends all the way up to and including index n -// are in-flight. As a result, Next is increased to n+1. -func (pr *Progress) OptimisticUpdate(n uint64) { pr.Next = n + 1 } - -// MaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The -// arguments are the index of the append message rejected by the follower, and -// the hint that we want to decrease to. -// -// Rejections can happen spuriously as messages are sent out of order or -// duplicated. In such cases, the rejection pertains to an index that the -// Progress already knows were previously acknowledged, and false is returned -// without changing the Progress. -// -// If the rejection is genuine, Next is lowered sensibly, and the Progress is -// cleared for sending log entries. -func (pr *Progress) MaybeDecrTo(rejected, matchHint uint64) bool { - if pr.State == StateReplicate { - // The rejection must be stale if the progress has matched and "rejected" - // is smaller than "match". - if rejected <= pr.Match { - return false - } - // Directly decrease next to match + 1. - // - // TODO(tbg): why not use matchHint if it's larger? - pr.Next = pr.Match + 1 - return true - } - - // The rejection must be stale if "rejected" does not match next - 1. This - // is because non-replicating followers are probed one entry at a time. - if pr.Next-1 != rejected { - return false - } - - pr.Next = max(min(rejected, matchHint+1), 1) - pr.MsgAppFlowPaused = false - return true -} - -// IsPaused returns whether sending log entries to this node has been throttled. -// This is done when a node has rejected recent MsgApps, is currently waiting -// for a snapshot, or has reached the MaxInflightMsgs limit. In normal -// operation, this is false. A throttled node will be contacted less frequently -// until it has reached a state in which it's able to accept a steady stream of -// log entries again. -func (pr *Progress) IsPaused() bool { - switch pr.State { - case StateProbe: - return pr.MsgAppFlowPaused - case StateReplicate: - return pr.MsgAppFlowPaused - case StateSnapshot: - return true - default: - panic("unexpected state") - } -} - -func (pr *Progress) String() string { - var buf strings.Builder - fmt.Fprintf(&buf, "%s match=%d next=%d", pr.State, pr.Match, pr.Next) - if pr.IsLearner { - fmt.Fprint(&buf, " learner") - } - if pr.IsPaused() { - fmt.Fprint(&buf, " paused") - } - if pr.PendingSnapshot > 0 { - fmt.Fprintf(&buf, " pendingSnap=%d", pr.PendingSnapshot) - } - if !pr.RecentActive { - fmt.Fprint(&buf, " inactive") - } - if n := pr.Inflights.Count(); n > 0 { - fmt.Fprintf(&buf, " inflight=%d", n) - if pr.Inflights.Full() { - fmt.Fprint(&buf, "[full]") - } - } - return buf.String() -} - -// ProgressMap is a map of *Progress. -type ProgressMap map[uint64]*Progress - -// String prints the ProgressMap in sorted key order, one Progress per line. -func (m ProgressMap) String() string { - ids := make([]uint64, 0, len(m)) - for k := range m { - ids = append(ids, k) - } - sort.Slice(ids, func(i, j int) bool { - return ids[i] < ids[j] - }) - var buf strings.Builder - for _, id := range ids { - fmt.Fprintf(&buf, "%d: %s\n", id, m[id]) - } - return buf.String() -} diff --git a/raft/tracker/progress_test.go b/raft/tracker/progress_test.go deleted file mode 100644 index 49dedb536b25..000000000000 --- a/raft/tracker/progress_test.go +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tracker - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestProgressString(t *testing.T) { - ins := NewInflights(1, 0) - ins.Add(123, 1) - pr := &Progress{ - Match: 1, - Next: 2, - State: StateSnapshot, - PendingSnapshot: 123, - RecentActive: false, - MsgAppFlowPaused: true, - IsLearner: true, - Inflights: ins, - } - const exp = `StateSnapshot match=1 next=2 learner paused pendingSnap=123 inactive inflight=1[full]` - assert.Equal(t, exp, pr.String()) -} - -func TestProgressIsPaused(t *testing.T) { - tests := []struct { - state StateType - paused bool - - w bool - }{ - {StateProbe, false, false}, - {StateProbe, true, true}, - {StateReplicate, false, false}, - {StateReplicate, true, true}, - {StateSnapshot, false, true}, - {StateSnapshot, true, true}, - } - for i, tt := range tests { - p := &Progress{ - State: tt.state, - MsgAppFlowPaused: tt.paused, - Inflights: NewInflights(256, 0), - } - assert.Equal(t, tt.w, p.IsPaused(), i) - } -} - -// TestProgressResume ensures that MaybeUpdate and MaybeDecrTo will reset -// MsgAppFlowPaused. -func TestProgressResume(t *testing.T) { - p := &Progress{ - Next: 2, - MsgAppFlowPaused: true, - } - p.MaybeDecrTo(1, 1) - assert.False(t, p.MsgAppFlowPaused) - p.MsgAppFlowPaused = true - p.MaybeUpdate(2) - assert.False(t, p.MsgAppFlowPaused) -} - -func TestProgressBecomeProbe(t *testing.T) { - match := uint64(1) - tests := []struct { - p *Progress - wnext uint64 - }{ - { - &Progress{State: StateReplicate, Match: match, Next: 5, Inflights: NewInflights(256, 0)}, - 2, - }, - { - // snapshot finish - &Progress{State: StateSnapshot, Match: match, Next: 5, PendingSnapshot: 10, Inflights: NewInflights(256, 0)}, - 11, - }, - { - // snapshot failure - &Progress{State: StateSnapshot, Match: match, Next: 5, PendingSnapshot: 0, Inflights: NewInflights(256, 0)}, - 2, - }, - } - for i, tt := range tests { - tt.p.BecomeProbe() - assert.Equal(t, StateProbe, tt.p.State, i) - assert.Equal(t, match, tt.p.Match, i) - assert.Equal(t, tt.wnext, tt.p.Next, i) - } -} - -func TestProgressBecomeReplicate(t *testing.T) { - p := &Progress{State: StateProbe, Match: 1, Next: 5, Inflights: NewInflights(256, 0)} - p.BecomeReplicate() - assert.Equal(t, StateReplicate, p.State) - assert.Equal(t, uint64(1), p.Match) - assert.Equal(t, p.Match+1, p.Next) -} - -func TestProgressBecomeSnapshot(t *testing.T) { - p := &Progress{State: StateProbe, Match: 1, Next: 5, Inflights: NewInflights(256, 0)} - p.BecomeSnapshot(10) - assert.Equal(t, StateSnapshot, p.State) - assert.Equal(t, uint64(1), p.Match) - assert.Equal(t, uint64(10), p.PendingSnapshot) -} - -func TestProgressUpdate(t *testing.T) { - prevM, prevN := uint64(3), uint64(5) - tests := []struct { - update uint64 - - wm uint64 - wn uint64 - wok bool - }{ - {prevM - 1, prevM, prevN, false}, // do not decrease match, next - {prevM, prevM, prevN, false}, // do not decrease next - {prevM + 1, prevM + 1, prevN, true}, // increase match, do not decrease next - {prevM + 2, prevM + 2, prevN + 1, true}, // increase match, next - } - for i, tt := range tests { - p := &Progress{ - Match: prevM, - Next: prevN, - } - assert.Equal(t, tt.wok, p.MaybeUpdate(tt.update), i) - assert.Equal(t, tt.wm, p.Match, i) - assert.Equal(t, tt.wn, p.Next, i) - } -} - -func TestProgressMaybeDecr(t *testing.T) { - tests := []struct { - state StateType - m uint64 - n uint64 - rejected uint64 - last uint64 - - w bool - wn uint64 - }{ - { - // state replicate and rejected is not greater than match - StateReplicate, 5, 10, 5, 5, false, 10, - }, - { - // state replicate and rejected is not greater than match - StateReplicate, 5, 10, 4, 4, false, 10, - }, - { - // state replicate and rejected is greater than match - // directly decrease to match+1 - StateReplicate, 5, 10, 9, 9, true, 6, - }, - { - // next-1 != rejected is always false - StateProbe, 0, 0, 0, 0, false, 0, - }, - { - // next-1 != rejected is always false - StateProbe, 0, 10, 5, 5, false, 10, - }, - { - // next>1 = decremented by 1 - StateProbe, 0, 10, 9, 9, true, 9, - }, - { - // next>1 = decremented by 1 - StateProbe, 0, 2, 1, 1, true, 1, - }, - { - // next<=1 = reset to 1 - StateProbe, 0, 1, 0, 0, true, 1, - }, - { - // decrease to min(rejected, last+1) - StateProbe, 0, 10, 9, 2, true, 3, - }, - { - // rejected < 1, reset to 1 - StateProbe, 0, 10, 9, 0, true, 1, - }, - } - for i, tt := range tests { - p := &Progress{ - State: tt.state, - Match: tt.m, - Next: tt.n, - } - assert.Equal(t, tt.w, p.MaybeDecrTo(tt.rejected, tt.last), i) - assert.Equal(t, tt.m, p.Match, i) - assert.Equal(t, tt.wn, p.Next, i) - } -} diff --git a/raft/tracker/state.go b/raft/tracker/state.go deleted file mode 100644 index 7dbdd63fa666..000000000000 --- a/raft/tracker/state.go +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tracker - -// StateType is the state of a tracked follower. -type StateType uint64 - -const ( - // StateProbe indicates a follower whose last index isn't known. Such a - // follower is "probed" (i.e. an append sent periodically) to narrow down - // its last index. In the ideal (and common) case, only one round of probing - // is necessary as the follower will react with a hint. Followers that are - // probed over extended periods of time are often offline. - StateProbe StateType = iota - // StateReplicate is the state steady in which a follower eagerly receives - // log entries to append to its log. - StateReplicate - // StateSnapshot indicates a follower that needs log entries not available - // from the leader's Raft log. Such a follower needs a full snapshot to - // return to StateReplicate. - StateSnapshot -) - -var prstmap = [...]string{ - "StateProbe", - "StateReplicate", - "StateSnapshot", -} - -func (st StateType) String() string { return prstmap[st] } diff --git a/raft/tracker/tracker.go b/raft/tracker/tracker.go deleted file mode 100644 index cf60c23b7515..000000000000 --- a/raft/tracker/tracker.go +++ /dev/null @@ -1,290 +0,0 @@ -// Copyright 2019 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tracker - -import ( - "fmt" - "sort" - "strings" - - "go.etcd.io/etcd/raft/v3/quorum" - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -// Config reflects the configuration tracked in a ProgressTracker. -type Config struct { - Voters quorum.JointConfig - // AutoLeave is true if the configuration is joint and a transition to the - // incoming configuration should be carried out automatically by Raft when - // this is possible. If false, the configuration will be joint until the - // application initiates the transition manually. - AutoLeave bool - // Learners is a set of IDs corresponding to the learners active in the - // current configuration. - // - // Invariant: Learners and Voters does not intersect, i.e. if a peer is in - // either half of the joint config, it can't be a learner; if it is a - // learner it can't be in either half of the joint config. This invariant - // simplifies the implementation since it allows peers to have clarity about - // its current role without taking into account joint consensus. - Learners map[uint64]struct{} - // When we turn a voter into a learner during a joint consensus transition, - // we cannot add the learner directly when entering the joint state. This is - // because this would violate the invariant that the intersection of - // voters and learners is empty. For example, assume a Voter is removed and - // immediately re-added as a learner (or in other words, it is demoted): - // - // Initially, the configuration will be - // - // voters: {1 2 3} - // learners: {} - // - // and we want to demote 3. Entering the joint configuration, we naively get - // - // voters: {1 2} & {1 2 3} - // learners: {3} - // - // but this violates the invariant (3 is both voter and learner). Instead, - // we get - // - // voters: {1 2} & {1 2 3} - // learners: {} - // next_learners: {3} - // - // Where 3 is now still purely a voter, but we are remembering the intention - // to make it a learner upon transitioning into the final configuration: - // - // voters: {1 2} - // learners: {3} - // next_learners: {} - // - // Note that next_learners is not used while adding a learner that is not - // also a voter in the joint config. In this case, the learner is added - // right away when entering the joint configuration, so that it is caught up - // as soon as possible. - LearnersNext map[uint64]struct{} -} - -func (c Config) String() string { - var buf strings.Builder - fmt.Fprintf(&buf, "voters=%s", c.Voters) - if c.Learners != nil { - fmt.Fprintf(&buf, " learners=%s", quorum.MajorityConfig(c.Learners).String()) - } - if c.LearnersNext != nil { - fmt.Fprintf(&buf, " learners_next=%s", quorum.MajorityConfig(c.LearnersNext).String()) - } - if c.AutoLeave { - fmt.Fprint(&buf, " autoleave") - } - return buf.String() -} - -// Clone returns a copy of the Config that shares no memory with the original. -func (c *Config) Clone() Config { - clone := func(m map[uint64]struct{}) map[uint64]struct{} { - if m == nil { - return nil - } - mm := make(map[uint64]struct{}, len(m)) - for k := range m { - mm[k] = struct{}{} - } - return mm - } - return Config{ - Voters: quorum.JointConfig{clone(c.Voters[0]), clone(c.Voters[1])}, - Learners: clone(c.Learners), - LearnersNext: clone(c.LearnersNext), - } -} - -// ProgressTracker tracks the currently active configuration and the information -// known about the nodes and learners in it. In particular, it tracks the match -// index for each peer which in turn allows reasoning about the committed index. -type ProgressTracker struct { - Config - - Progress ProgressMap - - Votes map[uint64]bool - - MaxInflight int - MaxInflightBytes uint64 -} - -// MakeProgressTracker initializes a ProgressTracker. -func MakeProgressTracker(maxInflight int, maxBytes uint64) ProgressTracker { - p := ProgressTracker{ - MaxInflight: maxInflight, - MaxInflightBytes: maxBytes, - Config: Config{ - Voters: quorum.JointConfig{ - quorum.MajorityConfig{}, - nil, // only populated when used - }, - Learners: nil, // only populated when used - LearnersNext: nil, // only populated when used - }, - Votes: map[uint64]bool{}, - Progress: map[uint64]*Progress{}, - } - return p -} - -// ConfState returns a ConfState representing the active configuration. -func (p *ProgressTracker) ConfState() pb.ConfState { - return pb.ConfState{ - Voters: p.Voters[0].Slice(), - VotersOutgoing: p.Voters[1].Slice(), - Learners: quorum.MajorityConfig(p.Learners).Slice(), - LearnersNext: quorum.MajorityConfig(p.LearnersNext).Slice(), - AutoLeave: p.AutoLeave, - } -} - -// IsSingleton returns true if (and only if) there is only one voting member -// (i.e. the leader) in the current configuration. -func (p *ProgressTracker) IsSingleton() bool { - return len(p.Voters[0]) == 1 && len(p.Voters[1]) == 0 -} - -type matchAckIndexer map[uint64]*Progress - -var _ quorum.AckedIndexer = matchAckIndexer(nil) - -// AckedIndex implements IndexLookuper. -func (l matchAckIndexer) AckedIndex(id uint64) (quorum.Index, bool) { - pr, ok := l[id] - if !ok { - return 0, false - } - return quorum.Index(pr.Match), true -} - -// Committed returns the largest log index known to be committed based on what -// the voting members of the group have acknowledged. -func (p *ProgressTracker) Committed() uint64 { - return uint64(p.Voters.CommittedIndex(matchAckIndexer(p.Progress))) -} - -func insertionSort(sl []uint64) { - a, b := 0, len(sl) - for i := a + 1; i < b; i++ { - for j := i; j > a && sl[j] < sl[j-1]; j-- { - sl[j], sl[j-1] = sl[j-1], sl[j] - } - } -} - -// Visit invokes the supplied closure for all tracked progresses in stable order. -func (p *ProgressTracker) Visit(f func(id uint64, pr *Progress)) { - n := len(p.Progress) - // We need to sort the IDs and don't want to allocate since this is hot code. - // The optimization here mirrors that in `(MajorityConfig).CommittedIndex`, - // see there for details. - var sl [7]uint64 - var ids []uint64 - if len(sl) >= n { - ids = sl[:n] - } else { - ids = make([]uint64, n) - } - for id := range p.Progress { - n-- - ids[n] = id - } - insertionSort(ids) - for _, id := range ids { - f(id, p.Progress[id]) - } -} - -// QuorumActive returns true if the quorum is active from the view of the local -// raft state machine. Otherwise, it returns false. -func (p *ProgressTracker) QuorumActive() bool { - votes := map[uint64]bool{} - p.Visit(func(id uint64, pr *Progress) { - if pr.IsLearner { - return - } - votes[id] = pr.RecentActive - }) - - return p.Voters.VoteResult(votes) == quorum.VoteWon -} - -// VoterNodes returns a sorted slice of voters. -func (p *ProgressTracker) VoterNodes() []uint64 { - m := p.Voters.IDs() - nodes := make([]uint64, 0, len(m)) - for id := range m { - nodes = append(nodes, id) - } - sort.Slice(nodes, func(i, j int) bool { return nodes[i] < nodes[j] }) - return nodes -} - -// LearnerNodes returns a sorted slice of learners. -func (p *ProgressTracker) LearnerNodes() []uint64 { - if len(p.Learners) == 0 { - return nil - } - nodes := make([]uint64, 0, len(p.Learners)) - for id := range p.Learners { - nodes = append(nodes, id) - } - sort.Slice(nodes, func(i, j int) bool { return nodes[i] < nodes[j] }) - return nodes -} - -// ResetVotes prepares for a new round of vote counting via recordVote. -func (p *ProgressTracker) ResetVotes() { - p.Votes = map[uint64]bool{} -} - -// RecordVote records that the node with the given id voted for this Raft -// instance if v == true (and declined it otherwise). -func (p *ProgressTracker) RecordVote(id uint64, v bool) { - _, ok := p.Votes[id] - if !ok { - p.Votes[id] = v - } -} - -// TallyVotes returns the number of granted and rejected Votes, and whether the -// election outcome is known. -func (p *ProgressTracker) TallyVotes() (granted int, rejected int, _ quorum.VoteResult) { - // Make sure to populate granted/rejected correctly even if the Votes slice - // contains members no longer part of the configuration. This doesn't really - // matter in the way the numbers are used (they're informational), but might - // as well get it right. - for id, pr := range p.Progress { - if pr.IsLearner { - continue - } - v, voted := p.Votes[id] - if !voted { - continue - } - if v { - granted++ - } else { - rejected++ - } - } - result := p.Voters.VoteResult(p.Votes) - return granted, rejected, result -} diff --git a/raft/util.go b/raft/util.go deleted file mode 100644 index 95591f53400f..000000000000 --- a/raft/util.go +++ /dev/null @@ -1,254 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "bytes" - "fmt" - "strings" - - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -func (st StateType) MarshalJSON() ([]byte, error) { - return []byte(fmt.Sprintf("%q", st.String())), nil -} - -func min(a, b uint64) uint64 { - if a > b { - return b - } - return a -} - -func max(a, b uint64) uint64 { - if a > b { - return a - } - return b -} - -var isLocalMsg = [...]bool{ - pb.MsgHup: true, - pb.MsgBeat: true, - pb.MsgUnreachable: true, - pb.MsgSnapStatus: true, - pb.MsgCheckQuorum: true, -} - -var isResponseMsg = [...]bool{ - pb.MsgAppResp: true, - pb.MsgVoteResp: true, - pb.MsgHeartbeatResp: true, - pb.MsgUnreachable: true, - pb.MsgReadIndexResp: true, - pb.MsgPreVoteResp: true, -} - -func isMsgInArray(msgt pb.MessageType, arr []bool) bool { - i := int(msgt) - return i < len(arr) && arr[i] -} - -func IsLocalMsg(msgt pb.MessageType) bool { - return isMsgInArray(msgt, isLocalMsg[:]) -} - -func IsResponseMsg(msgt pb.MessageType) bool { - return isMsgInArray(msgt, isResponseMsg[:]) -} - -// voteResponseType maps vote and prevote message types to their corresponding responses. -func voteRespMsgType(msgt pb.MessageType) pb.MessageType { - switch msgt { - case pb.MsgVote: - return pb.MsgVoteResp - case pb.MsgPreVote: - return pb.MsgPreVoteResp - default: - panic(fmt.Sprintf("not a vote message: %s", msgt)) - } -} - -func DescribeHardState(hs pb.HardState) string { - var buf strings.Builder - fmt.Fprintf(&buf, "Term:%d", hs.Term) - if hs.Vote != 0 { - fmt.Fprintf(&buf, " Vote:%d", hs.Vote) - } - fmt.Fprintf(&buf, " Commit:%d", hs.Commit) - return buf.String() -} - -func DescribeSoftState(ss SoftState) string { - return fmt.Sprintf("Lead:%d State:%s", ss.Lead, ss.RaftState) -} - -func DescribeConfState(state pb.ConfState) string { - return fmt.Sprintf( - "Voters:%v VotersOutgoing:%v Learners:%v LearnersNext:%v AutoLeave:%v", - state.Voters, state.VotersOutgoing, state.Learners, state.LearnersNext, state.AutoLeave, - ) -} - -func DescribeSnapshot(snap pb.Snapshot) string { - m := snap.Metadata - return fmt.Sprintf("Index:%d Term:%d ConfState:%s", m.Index, m.Term, DescribeConfState(m.ConfState)) -} - -func DescribeReady(rd Ready, f EntryFormatter) string { - var buf strings.Builder - if rd.SoftState != nil { - fmt.Fprint(&buf, DescribeSoftState(*rd.SoftState)) - buf.WriteByte('\n') - } - if !IsEmptyHardState(rd.HardState) { - fmt.Fprintf(&buf, "HardState %s", DescribeHardState(rd.HardState)) - buf.WriteByte('\n') - } - if len(rd.ReadStates) > 0 { - fmt.Fprintf(&buf, "ReadStates %v\n", rd.ReadStates) - } - if len(rd.Entries) > 0 { - buf.WriteString("Entries:\n") - fmt.Fprint(&buf, DescribeEntries(rd.Entries, f)) - } - if !IsEmptySnap(rd.Snapshot) { - fmt.Fprintf(&buf, "Snapshot %s\n", DescribeSnapshot(rd.Snapshot)) - } - if len(rd.CommittedEntries) > 0 { - buf.WriteString("CommittedEntries:\n") - fmt.Fprint(&buf, DescribeEntries(rd.CommittedEntries, f)) - } - if len(rd.Messages) > 0 { - buf.WriteString("Messages:\n") - for _, msg := range rd.Messages { - fmt.Fprint(&buf, DescribeMessage(msg, f)) - buf.WriteByte('\n') - } - } - if buf.Len() > 0 { - return fmt.Sprintf("Ready MustSync=%t:\n%s", rd.MustSync, buf.String()) - } - return "" -} - -// EntryFormatter can be implemented by the application to provide human-readable formatting -// of entry data. Nil is a valid EntryFormatter and will use a default format. -type EntryFormatter func([]byte) string - -// DescribeMessage returns a concise human-readable description of a -// Message for debugging. -func DescribeMessage(m pb.Message, f EntryFormatter) string { - var buf bytes.Buffer - fmt.Fprintf(&buf, "%x->%x %v Term:%d Log:%d/%d", m.From, m.To, m.Type, m.Term, m.LogTerm, m.Index) - if m.Reject { - fmt.Fprintf(&buf, " Rejected (Hint: %d)", m.RejectHint) - } - if m.Commit != 0 { - fmt.Fprintf(&buf, " Commit:%d", m.Commit) - } - if len(m.Entries) > 0 { - fmt.Fprint(&buf, " Entries:[") - for i, e := range m.Entries { - if i != 0 { - buf.WriteString(", ") - } - buf.WriteString(DescribeEntry(e, f)) - } - fmt.Fprint(&buf, "]") - } - if s := m.Snapshot; s != nil && !IsEmptySnap(*s) { - fmt.Fprintf(&buf, " Snapshot: %s", DescribeSnapshot(*s)) - } - return buf.String() -} - -// PayloadSize is the size of the payload of this Entry. Notably, it does not -// depend on its Index or Term. -func PayloadSize(e pb.Entry) int { - return len(e.Data) -} - -// DescribeEntry returns a concise human-readable description of an -// Entry for debugging. -func DescribeEntry(e pb.Entry, f EntryFormatter) string { - if f == nil { - f = func(data []byte) string { return fmt.Sprintf("%q", data) } - } - - formatConfChange := func(cc pb.ConfChangeI) string { - // TODO(tbg): give the EntryFormatter a type argument so that it gets - // a chance to expose the Context. - return pb.ConfChangesToString(cc.AsV2().Changes) - } - - var formatted string - switch e.Type { - case pb.EntryNormal: - formatted = f(e.Data) - case pb.EntryConfChange: - var cc pb.ConfChange - if err := cc.Unmarshal(e.Data); err != nil { - formatted = err.Error() - } else { - formatted = formatConfChange(cc) - } - case pb.EntryConfChangeV2: - var cc pb.ConfChangeV2 - if err := cc.Unmarshal(e.Data); err != nil { - formatted = err.Error() - } else { - formatted = formatConfChange(cc) - } - } - if formatted != "" { - formatted = " " + formatted - } - return fmt.Sprintf("%d/%d %s%s", e.Term, e.Index, e.Type, formatted) -} - -// DescribeEntries calls DescribeEntry for each Entry, adding a newline to -// each. -func DescribeEntries(ents []pb.Entry, f EntryFormatter) string { - var buf bytes.Buffer - for _, e := range ents { - _, _ = buf.WriteString(DescribeEntry(e, f) + "\n") - } - return buf.String() -} - -func limitSize(ents []pb.Entry, maxSize uint64) []pb.Entry { - if len(ents) == 0 { - return ents - } - size := ents[0].Size() - var limit int - for limit = 1; limit < len(ents); limit++ { - size += ents[limit].Size() - if uint64(size) > maxSize { - break - } - } - return ents[:limit] -} - -func assertConfStatesEquivalent(l Logger, cs1, cs2 pb.ConfState) { - err := cs1.Equivalent(cs2) - if err == nil { - return - } - l.Panic(err) -} diff --git a/raft/util_test.go b/raft/util_test.go deleted file mode 100644 index 627bdf676a7d..000000000000 --- a/raft/util_test.go +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2015 The etcd Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package raft - -import ( - "fmt" - "math" - "strings" - "testing" - - "github.com/stretchr/testify/require" - pb "go.etcd.io/etcd/raft/v3/raftpb" -) - -var testFormatter EntryFormatter = func(data []byte) string { - return strings.ToUpper(string(data)) -} - -func TestDescribeEntry(t *testing.T) { - entry := pb.Entry{ - Term: 1, - Index: 2, - Type: pb.EntryNormal, - Data: []byte("hello\x00world"), - } - require.Equal(t, `1/2 EntryNormal "hello\x00world"`, DescribeEntry(entry, nil)) - require.Equal(t, "1/2 EntryNormal HELLO\x00WORLD", DescribeEntry(entry, testFormatter)) -} - -func TestLimitSize(t *testing.T) { - ents := []pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 5}, {Index: 6, Term: 6}} - tests := []struct { - maxsize uint64 - wentries []pb.Entry - }{ - {math.MaxUint64, []pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 5}, {Index: 6, Term: 6}}}, - // Even if maxsize is zero, the first entry should be returned. - {0, []pb.Entry{{Index: 4, Term: 4}}}, - // Limit to 2. - {uint64(ents[0].Size() + ents[1].Size()), []pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 5}}}, - // Limit to 2. - {uint64(ents[0].Size() + ents[1].Size() + ents[2].Size()/2), []pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 5}}}, - {uint64(ents[0].Size() + ents[1].Size() + ents[2].Size() - 1), []pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 5}}}, - // All. - {uint64(ents[0].Size() + ents[1].Size() + ents[2].Size()), []pb.Entry{{Index: 4, Term: 4}, {Index: 5, Term: 5}, {Index: 6, Term: 6}}}, - } - - for _, tt := range tests { - t.Run("", func(t *testing.T) { - require.Equal(t, tt.wentries, limitSize(ents, tt.maxsize)) - }) - } -} - -func TestIsLocalMsg(t *testing.T) { - tests := []struct { - msgt pb.MessageType - isLocal bool - }{ - {pb.MsgHup, true}, - {pb.MsgBeat, true}, - {pb.MsgUnreachable, true}, - {pb.MsgSnapStatus, true}, - {pb.MsgCheckQuorum, true}, - {pb.MsgTransferLeader, false}, - {pb.MsgProp, false}, - {pb.MsgApp, false}, - {pb.MsgAppResp, false}, - {pb.MsgVote, false}, - {pb.MsgVoteResp, false}, - {pb.MsgSnap, false}, - {pb.MsgHeartbeat, false}, - {pb.MsgHeartbeatResp, false}, - {pb.MsgTimeoutNow, false}, - {pb.MsgReadIndex, false}, - {pb.MsgReadIndexResp, false}, - {pb.MsgPreVote, false}, - {pb.MsgPreVoteResp, false}, - } - - for _, tt := range tests { - t.Run(fmt.Sprint(tt.msgt), func(t *testing.T) { - require.Equal(t, tt.isLocal, IsLocalMsg(tt.msgt)) - }) - } -} - -func TestIsResponseMsg(t *testing.T) { - tests := []struct { - msgt pb.MessageType - isResponse bool - }{ - {pb.MsgHup, false}, - {pb.MsgBeat, false}, - {pb.MsgUnreachable, true}, - {pb.MsgSnapStatus, false}, - {pb.MsgCheckQuorum, false}, - {pb.MsgTransferLeader, false}, - {pb.MsgProp, false}, - {pb.MsgApp, false}, - {pb.MsgAppResp, true}, - {pb.MsgVote, false}, - {pb.MsgVoteResp, true}, - {pb.MsgSnap, false}, - {pb.MsgHeartbeat, false}, - {pb.MsgHeartbeatResp, true}, - {pb.MsgTimeoutNow, false}, - {pb.MsgReadIndex, false}, - {pb.MsgReadIndexResp, true}, - {pb.MsgPreVote, false}, - {pb.MsgPreVoteResp, true}, - } - - for i, tt := range tests { - got := IsResponseMsg(tt.msgt) - if got != tt.isResponse { - t.Errorf("#%d: got %v, want %v", i, got, tt.isResponse) - } - } -} diff --git a/scripts/genproto.sh b/scripts/genproto.sh index 764c5849eacf..834e1313f419 100755 --- a/scripts/genproto.sh +++ b/scripts/genproto.sh @@ -23,6 +23,7 @@ GRPC_GATEWAY_BIN=$(tool_get_bin github.com/grpc-ecosystem/grpc-gateway/protoc-ge SWAGGER_BIN=$(tool_get_bin github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger) GOGOPROTO_ROOT="$(tool_pkg_dir github.com/gogo/protobuf/proto)/.." GRPC_GATEWAY_ROOT="$(tool_pkg_dir github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway)/.." +RAFT_ROOT="$(tool_pkg_dir go.etcd.io/raft/v3/raftpb)/.." echo echo "Resolved binary and packages versions:" @@ -31,20 +32,21 @@ echo " - protoc-gen-grpc-gateway: ${GRPC_GATEWAY_BIN}" echo " - swagger: ${SWAGGER_BIN}" echo " - gogoproto-root: ${GOGOPROTO_ROOT}" echo " - grpc-gateway-root: ${GRPC_GATEWAY_ROOT}" +echo " - raft-root: ${RAFT_ROOT}" GOGOPROTO_PATH="${GOGOPROTO_ROOT}:${GOGOPROTO_ROOT}/protobuf" # directories containing protos to be built -DIRS="./server/storage/wal/walpb ./api/etcdserverpb ./server/etcdserver/api/snap/snappb ./raft/raftpb ./api/mvccpb ./server/lease/leasepb ./api/authpb ./server/etcdserver/api/v3lock/v3lockpb ./server/etcdserver/api/v3election/v3electionpb ./api/membershippb ./tests/functional ./api/versionpb" +DIRS="./server/storage/wal/walpb ./api/etcdserverpb ./server/etcdserver/api/snap/snappb ./api/mvccpb ./server/lease/leasepb ./api/authpb ./server/etcdserver/api/v3lock/v3lockpb ./server/etcdserver/api/v3election/v3electionpb ./api/membershippb ./tests/functional ./api/versionpb" log_callout -e "\\nRunning gofast (gogo) proto generation..." for dir in ${DIRS}; do run pushd "${dir}" - run protoc --gofast_out=plugins=grpc:. -I=".:${GOGOPROTO_PATH}:${ETCD_ROOT_DIR}/..:${ETCD_ROOT_DIR}:${GRPC_GATEWAY_ROOT}/third_party/googleapis" \ + run protoc --gofast_out=plugins=grpc:. -I=".:${GOGOPROTO_PATH}:${ETCD_ROOT_DIR}/..:${RAFT_ROOT}:${ETCD_ROOT_DIR}:${GRPC_GATEWAY_ROOT}/third_party/googleapis" \ --plugin="${GOFAST_BIN}" ./**/*.proto run sed -i.bak -E 's|"etcd/api/|"go.etcd.io/etcd/api/v3/|g' ./**/*.pb.go - run sed -i.bak -E 's|"raft/raftpb"|"go.etcd.io/etcd/raft/v3/raftpb"|g' ./**/*.pb.go + run sed -i.bak -E 's|"raftpb"|"go.etcd.io/raft/v3/raftpb"|g' ./**/*.pb.go run sed -i.bak -E 's|"google/protobuf"|"github.com/gogo/protobuf/protoc-gen-gogo/descriptor"|g' ./**/*.pb.go rm -f ./**/*.bak @@ -63,6 +65,7 @@ for pb in api/etcdserverpb/rpc server/etcdserver/api/v3lock/v3lockpb/v3lock serv -I"${GRPC_GATEWAY_ROOT}"/third_party/googleapis \ -I"${GOGOPROTO_PATH}" \ -I"${ETCD_ROOT_DIR}/.." \ + -I"${RAFT_ROOT}" \ --grpc-gateway_out=logtostderr=true,paths=source_relative:. \ --swagger_out=logtostderr=true:./Documentation/dev-guide/apispec/swagger/. \ --plugin="${SWAGGER_BIN}" --plugin="${GRPC_GATEWAY_BIN}" \ diff --git a/scripts/test.sh b/scripts/test.sh index 030c6d80113e..c71560713615 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -53,6 +53,7 @@ fi PASSES=${PASSES:-"fmt bom dep build unit"} PKG=${PKG:-} +SHELLCHECK_VERSION=${SHELLCHECK_VERSION:-"v0.8.0"} if [ -z "$GOARCH" ]; then GOARCH=$(go env GOARCH); @@ -105,7 +106,6 @@ function unit_pass { function integration_extra { if [ -z "${PKG}" ] ; then - run_for_module "." go_test "./contrib/raftexample" "keep_going" : -timeout="${TIMEOUT:-5m}" "${RUN_ARG[@]}" "${COMMON_TEST_FLAGS[@]}" "$@" || return $? run_for_module "tests" go_test "./integration/v2store/..." "keep_going" : -timeout="${TIMEOUT:-5m}" "${RUN_ARG[@]}" "${COMMON_TEST_FLAGS[@]}" "$@" || return $? else log_warning "integration_extra ignored when PKG is specified" @@ -371,7 +371,6 @@ function cov_pass { sed --in-place -E "s|go.etcd.io/etcd/etcdctl/v3/|etcdctl/|g" "${cover_out_file}" || true sed --in-place -E "s|go.etcd.io/etcd/etcdutl/v3/|etcdutl/|g" "${cover_out_file}" || true sed --in-place -E "s|go.etcd.io/etcd/pkg/v3/|pkg/|g" "${cover_out_file}" || true - sed --in-place -E "s|go.etcd.io/etcd/raft/v3/|raft/|g" "${cover_out_file}" || true sed --in-place -E "s|go.etcd.io/etcd/server/v3/|server/|g" "${cover_out_file}" || true # held failures to generate the full coverage file, now fail @@ -390,9 +389,15 @@ function cov_pass { ######### Code formatting checkers ############################################# function shellcheck_pass { - if tool_exists "shellcheck" "https://github.com/koalaman/shellcheck#installing"; then - generic_checker run shellcheck -fgcc scripts/*.sh + SHELLCHECK=shellcheck + if ! tool_exists "shellcheck" "https://github.com/koalaman/shellcheck#installing"; then + log_callout "Installing shellcheck $SHELLCHECK_VERSION" + wget -qO- "https://github.com/koalaman/shellcheck/releases/download/${SHELLCHECK_VERSION}/shellcheck-${SHELLCHECK_VERSION}.linux.x86_64.tar.xz" | tar -xJv -C /tmp/ --strip-components=1 + mkdir -p ./bin + mv /tmp/shellcheck ./bin/ + SHELLCHECK=./bin/shellcheck fi + generic_checker run ${SHELLCHECK} -fgcc scripts/*.sh } function shellws_pass { diff --git a/scripts/test_lib.sh b/scripts/test_lib.sh index 8bcc6f7f44fd..475d91dce22a 100644 --- a/scripts/test_lib.sh +++ b/scripts/test_lib.sh @@ -164,7 +164,7 @@ function run_for_module { } function module_dirs() { - echo "api pkg raft client/pkg client/v2 client/v3 server etcdutl etcdctl tests ." + echo "api pkg client/pkg client/v2 client/v3 server etcdutl etcdctl tests ." } # maybe_run [cmd...] runs given command depending on the DRY_RUN flag. @@ -180,7 +180,6 @@ function modules() { modules=( "${ROOT_MODULE}/api/v3" "${ROOT_MODULE}/pkg/v3" - "${ROOT_MODULE}/raft/v3" "${ROOT_MODULE}/client/pkg/v3" "${ROOT_MODULE}/client/v2" "${ROOT_MODULE}/client/v3" diff --git a/server/etcdserver/api/etcdhttp/health.go b/server/etcdserver/api/etcdhttp/health.go index 1d29d97bee78..6e4f1e70e3a0 100644 --- a/server/etcdserver/api/etcdhttp/health.go +++ b/server/etcdserver/api/etcdhttp/health.go @@ -24,9 +24,9 @@ import ( "go.etcd.io/etcd/api/v3/etcdserverpb" pb "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3" "go.etcd.io/etcd/server/v3/auth" "go.etcd.io/etcd/server/v3/config" + "go.etcd.io/raft/v3" "go.uber.org/zap" ) diff --git a/server/etcdserver/api/etcdhttp/health_test.go b/server/etcdserver/api/etcdhttp/health_test.go index 2cd2c1aad2d7..6dfbd1b19a7e 100644 --- a/server/etcdserver/api/etcdhttp/health_test.go +++ b/server/etcdserver/api/etcdhttp/health_test.go @@ -26,10 +26,10 @@ import ( pb "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/client/pkg/v3/testutil" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3" "go.etcd.io/etcd/server/v3/auth" "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/etcdserver" + "go.etcd.io/raft/v3" "go.uber.org/zap/zaptest" ) diff --git a/server/etcdserver/api/membership/cluster.go b/server/etcdserver/api/membership/cluster.go index d5da50ad5ccf..31fb088f8db9 100644 --- a/server/etcdserver/api/membership/cluster.go +++ b/server/etcdserver/api/membership/cluster.go @@ -30,10 +30,10 @@ import ( "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/pkg/v3/netutil" "go.etcd.io/etcd/pkg/v3/notify" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/v2store" serverversion "go.etcd.io/etcd/server/v3/etcdserver/version" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "github.com/coreos/go-semver/semver" "github.com/prometheus/client_golang/prometheus" diff --git a/server/etcdserver/api/membership/cluster_test.go b/server/etcdserver/api/membership/cluster_test.go index 9ccfab4a7107..ce98472df7b8 100644 --- a/server/etcdserver/api/membership/cluster_test.go +++ b/server/etcdserver/api/membership/cluster_test.go @@ -25,9 +25,9 @@ import ( "go.etcd.io/etcd/client/pkg/v3/testutil" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/v2store" "go.etcd.io/etcd/server/v3/mock/mockstore" + "go.etcd.io/raft/v3/raftpb" ) func TestClusterMember(t *testing.T) { diff --git a/server/etcdserver/api/rafthttp/coder.go b/server/etcdserver/api/rafthttp/coder.go index cc28249697a2..977442998966 100644 --- a/server/etcdserver/api/rafthttp/coder.go +++ b/server/etcdserver/api/rafthttp/coder.go @@ -14,7 +14,7 @@ package rafthttp -import "go.etcd.io/etcd/raft/v3/raftpb" +import "go.etcd.io/raft/v3/raftpb" type encoder interface { // encode encodes the given message to an output stream. diff --git a/server/etcdserver/api/rafthttp/doc.go b/server/etcdserver/api/rafthttp/doc.go index a9486a8bb664..c45dc8178254 100644 --- a/server/etcdserver/api/rafthttp/doc.go +++ b/server/etcdserver/api/rafthttp/doc.go @@ -12,5 +12,5 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package rafthttp implements HTTP transportation layer for etcd/raft pkg. +// Package rafthttp implements HTTP transportation layer for raft pkg. package rafthttp diff --git a/server/etcdserver/api/rafthttp/functional_test.go b/server/etcdserver/api/rafthttp/functional_test.go index 931c247bed5c..55e5f6e0247a 100644 --- a/server/etcdserver/api/rafthttp/functional_test.go +++ b/server/etcdserver/api/rafthttp/functional_test.go @@ -22,9 +22,9 @@ import ( "time" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/etcdserver/api/rafthttp/http.go b/server/etcdserver/api/rafthttp/http.go index 3c784b2bbf24..6e6686b4c87e 100644 --- a/server/etcdserver/api/rafthttp/http.go +++ b/server/etcdserver/api/rafthttp/http.go @@ -27,8 +27,8 @@ import ( "go.etcd.io/etcd/api/v3/version" "go.etcd.io/etcd/client/pkg/v3/types" pioutil "go.etcd.io/etcd/pkg/v3/ioutil" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" + "go.etcd.io/raft/v3/raftpb" humanize "github.com/dustin/go-humanize" "go.uber.org/zap" diff --git a/server/etcdserver/api/rafthttp/http_test.go b/server/etcdserver/api/rafthttp/http_test.go index ad92c82dc364..699cb507d9da 100644 --- a/server/etcdserver/api/rafthttp/http_test.go +++ b/server/etcdserver/api/rafthttp/http_test.go @@ -29,8 +29,8 @@ import ( "go.etcd.io/etcd/api/v3/version" "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/etcdserver/api/rafthttp/msg_codec.go b/server/etcdserver/api/rafthttp/msg_codec.go index 7db880baa24f..5444c01f8fdc 100644 --- a/server/etcdserver/api/rafthttp/msg_codec.go +++ b/server/etcdserver/api/rafthttp/msg_codec.go @@ -20,7 +20,7 @@ import ( "io" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" ) // messageEncoder is a encoder that can encode all kinds of messages. diff --git a/server/etcdserver/api/rafthttp/msg_codec_test.go b/server/etcdserver/api/rafthttp/msg_codec_test.go index 671e354d6934..9b14b45095e9 100644 --- a/server/etcdserver/api/rafthttp/msg_codec_test.go +++ b/server/etcdserver/api/rafthttp/msg_codec_test.go @@ -19,7 +19,7 @@ import ( "reflect" "testing" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" ) func TestMessage(t *testing.T) { diff --git a/server/etcdserver/api/rafthttp/msgappv2_codec.go b/server/etcdserver/api/rafthttp/msgappv2_codec.go index 9a7f4a1770cc..59425aeea69d 100644 --- a/server/etcdserver/api/rafthttp/msgappv2_codec.go +++ b/server/etcdserver/api/rafthttp/msgappv2_codec.go @@ -22,8 +22,8 @@ import ( "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3/raftpb" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" + "go.etcd.io/raft/v3/raftpb" ) const ( diff --git a/server/etcdserver/api/rafthttp/msgappv2_codec_test.go b/server/etcdserver/api/rafthttp/msgappv2_codec_test.go index 15494c68a451..50c7bd25c6d0 100644 --- a/server/etcdserver/api/rafthttp/msgappv2_codec_test.go +++ b/server/etcdserver/api/rafthttp/msgappv2_codec_test.go @@ -20,8 +20,8 @@ import ( "testing" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3/raftpb" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" + "go.etcd.io/raft/v3/raftpb" ) func TestMsgAppV2(t *testing.T) { diff --git a/server/etcdserver/api/rafthttp/peer.go b/server/etcdserver/api/rafthttp/peer.go index c2f79e08a0e0..11d17cacee8a 100644 --- a/server/etcdserver/api/rafthttp/peer.go +++ b/server/etcdserver/api/rafthttp/peer.go @@ -20,10 +20,10 @@ import ( "time" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" "golang.org/x/time/rate" diff --git a/server/etcdserver/api/rafthttp/peer_test.go b/server/etcdserver/api/rafthttp/peer_test.go index 1242be0280a2..d1a4f679367a 100644 --- a/server/etcdserver/api/rafthttp/peer_test.go +++ b/server/etcdserver/api/rafthttp/peer_test.go @@ -17,7 +17,7 @@ package rafthttp import ( "testing" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" ) func TestPeerPick(t *testing.T) { diff --git a/server/etcdserver/api/rafthttp/pipeline.go b/server/etcdserver/api/rafthttp/pipeline.go index 96b35c254499..b8ff3dfcadb8 100644 --- a/server/etcdserver/api/rafthttp/pipeline.go +++ b/server/etcdserver/api/rafthttp/pipeline.go @@ -25,9 +25,9 @@ import ( "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" ) diff --git a/server/etcdserver/api/rafthttp/pipeline_test.go b/server/etcdserver/api/rafthttp/pipeline_test.go index 83b33e96527d..d1fa1b3a9d84 100644 --- a/server/etcdserver/api/rafthttp/pipeline_test.go +++ b/server/etcdserver/api/rafthttp/pipeline_test.go @@ -26,8 +26,8 @@ import ( "go.etcd.io/etcd/api/v3/version" "go.etcd.io/etcd/client/pkg/v3/testutil" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3/raftpb" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/etcdserver/api/rafthttp/remote.go b/server/etcdserver/api/rafthttp/remote.go index eddb3f4da1b4..f40acbb9802a 100644 --- a/server/etcdserver/api/rafthttp/remote.go +++ b/server/etcdserver/api/rafthttp/remote.go @@ -16,7 +16,7 @@ package rafthttp import ( "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" ) diff --git a/server/etcdserver/api/rafthttp/snapshot_sender.go b/server/etcdserver/api/rafthttp/snapshot_sender.go index 9f24a565d4dd..9b98474fe00c 100644 --- a/server/etcdserver/api/rafthttp/snapshot_sender.go +++ b/server/etcdserver/api/rafthttp/snapshot_sender.go @@ -24,8 +24,8 @@ import ( "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/pkg/v3/httputil" pioutil "go.etcd.io/etcd/pkg/v3/ioutil" - "go.etcd.io/etcd/raft/v3" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" + "go.etcd.io/raft/v3" "github.com/dustin/go-humanize" "go.uber.org/zap" diff --git a/server/etcdserver/api/rafthttp/snapshot_test.go b/server/etcdserver/api/rafthttp/snapshot_test.go index ceb987833d1c..cc9cb5be8d7a 100644 --- a/server/etcdserver/api/rafthttp/snapshot_test.go +++ b/server/etcdserver/api/rafthttp/snapshot_test.go @@ -25,8 +25,8 @@ import ( "time" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/etcdserver/api/rafthttp/stream.go b/server/etcdserver/api/rafthttp/stream.go index 83a5649a7c88..c8a1f1fb5ea0 100644 --- a/server/etcdserver/api/rafthttp/stream.go +++ b/server/etcdserver/api/rafthttp/stream.go @@ -28,8 +28,8 @@ import ( "go.etcd.io/etcd/client/pkg/v3/transport" "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/pkg/v3/httputil" - "go.etcd.io/etcd/raft/v3/raftpb" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" + "go.etcd.io/raft/v3/raftpb" "github.com/coreos/go-semver/semver" "go.uber.org/zap" diff --git a/server/etcdserver/api/rafthttp/stream_test.go b/server/etcdserver/api/rafthttp/stream_test.go index 9bf58904975e..ffe551c8dce4 100644 --- a/server/etcdserver/api/rafthttp/stream_test.go +++ b/server/etcdserver/api/rafthttp/stream_test.go @@ -28,8 +28,8 @@ import ( "go.etcd.io/etcd/api/v3/version" "go.etcd.io/etcd/client/pkg/v3/testutil" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3/raftpb" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" "github.com/coreos/go-semver/semver" diff --git a/server/etcdserver/api/rafthttp/transport.go b/server/etcdserver/api/rafthttp/transport.go index fa3011cb39ac..f4af5f3c2a8d 100644 --- a/server/etcdserver/api/rafthttp/transport.go +++ b/server/etcdserver/api/rafthttp/transport.go @@ -22,10 +22,10 @@ import ( "go.etcd.io/etcd/client/pkg/v3/transport" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "github.com/xiang90/probing" "go.uber.org/zap" diff --git a/server/etcdserver/api/rafthttp/transport_bench_test.go b/server/etcdserver/api/rafthttp/transport_bench_test.go index 1e34bdd9f136..6d1e6d7e703a 100644 --- a/server/etcdserver/api/rafthttp/transport_bench_test.go +++ b/server/etcdserver/api/rafthttp/transport_bench_test.go @@ -22,9 +22,9 @@ import ( "time" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/etcdserver/api/rafthttp/transport_test.go b/server/etcdserver/api/rafthttp/transport_test.go index fb520d1eeef8..05ab30203153 100644 --- a/server/etcdserver/api/rafthttp/transport_test.go +++ b/server/etcdserver/api/rafthttp/transport_test.go @@ -22,8 +22,8 @@ import ( "go.etcd.io/etcd/client/pkg/v3/testutil" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3/raftpb" stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" "github.com/xiang90/probing" diff --git a/server/etcdserver/api/rafthttp/util_test.go b/server/etcdserver/api/rafthttp/util_test.go index d93d4d91fddd..743333fbbe39 100644 --- a/server/etcdserver/api/rafthttp/util_test.go +++ b/server/etcdserver/api/rafthttp/util_test.go @@ -23,7 +23,7 @@ import ( "testing" "go.etcd.io/etcd/api/v3/version" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" "github.com/coreos/go-semver/semver" ) diff --git a/server/etcdserver/api/snap/message.go b/server/etcdserver/api/snap/message.go index 523b52b85c60..2b4090c981d1 100644 --- a/server/etcdserver/api/snap/message.go +++ b/server/etcdserver/api/snap/message.go @@ -18,7 +18,7 @@ import ( "io" "go.etcd.io/etcd/pkg/v3/ioutil" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" ) // Message is a struct that contains a raft Message and a ReadCloser. The type diff --git a/server/etcdserver/api/snap/snapshotter.go b/server/etcdserver/api/snap/snapshotter.go index 7e563b4c8948..093ab6bc9149 100644 --- a/server/etcdserver/api/snap/snapshotter.go +++ b/server/etcdserver/api/snap/snapshotter.go @@ -28,10 +28,10 @@ import ( "go.etcd.io/etcd/client/pkg/v3/verify" pioutil "go.etcd.io/etcd/pkg/v3/ioutil" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/snap/snappb" "go.etcd.io/etcd/server/v3/storage/wal/walpb" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" ) diff --git a/server/etcdserver/api/snap/snapshotter_test.go b/server/etcdserver/api/snap/snapshotter_test.go index 0364c1e73e06..54721624d398 100644 --- a/server/etcdserver/api/snap/snapshotter_test.go +++ b/server/etcdserver/api/snap/snapshotter_test.go @@ -23,8 +23,8 @@ import ( "testing" "go.etcd.io/etcd/client/pkg/v3/fileutil" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/storage/wal/walpb" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/etcdserver/api/v2stats/server.go b/server/etcdserver/api/v2stats/server.go index 01e8e8fd46af..e8d218a72095 100644 --- a/server/etcdserver/api/v2stats/server.go +++ b/server/etcdserver/api/v2stats/server.go @@ -20,7 +20,7 @@ import ( "sync" "time" - "go.etcd.io/etcd/raft/v3" + "go.etcd.io/raft/v3" ) // ServerStats encapsulates various statistics about an EtcdServer and its diff --git a/server/etcdserver/api/v3rpc/interceptor.go b/server/etcdserver/api/v3rpc/interceptor.go index 8057812557c4..2ef5bba91b9c 100644 --- a/server/etcdserver/api/v3rpc/interceptor.go +++ b/server/etcdserver/api/v3rpc/interceptor.go @@ -22,9 +22,9 @@ import ( "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3" "go.etcd.io/etcd/server/v3/etcdserver" "go.etcd.io/etcd/server/v3/etcdserver/api" + "go.etcd.io/raft/v3" pb "go.etcd.io/etcd/api/v3/etcdserverpb" "go.uber.org/zap" diff --git a/server/etcdserver/api/v3rpc/maintenance.go b/server/etcdserver/api/v3rpc/maintenance.go index ca266874b032..7ed449a6f3bc 100644 --- a/server/etcdserver/api/v3rpc/maintenance.go +++ b/server/etcdserver/api/v3rpc/maintenance.go @@ -24,7 +24,6 @@ import ( pb "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" "go.etcd.io/etcd/api/v3/version" - "go.etcd.io/etcd/raft/v3" "go.etcd.io/etcd/server/v3/etcdserver" "go.etcd.io/etcd/server/v3/etcdserver/apply" "go.etcd.io/etcd/server/v3/etcdserver/errors" @@ -32,6 +31,7 @@ import ( "go.etcd.io/etcd/server/v3/storage/backend" "go.etcd.io/etcd/server/v3/storage/mvcc" "go.etcd.io/etcd/server/v3/storage/schema" + "go.etcd.io/raft/v3" "go.uber.org/zap" ) diff --git a/server/etcdserver/bootstrap.go b/server/etcdserver/bootstrap.go index a1704292bcfd..03764d028df4 100644 --- a/server/etcdserver/bootstrap.go +++ b/server/etcdserver/bootstrap.go @@ -32,8 +32,6 @@ import ( "go.etcd.io/etcd/client/pkg/v3/fileutil" "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/etcdserver/api" "go.etcd.io/etcd/server/v3/etcdserver/api/membership" @@ -48,6 +46,8 @@ import ( "go.etcd.io/etcd/server/v3/storage/schema" "go.etcd.io/etcd/server/v3/storage/wal" "go.etcd.io/etcd/server/v3/storage/wal/walpb" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" ) func bootstrap(cfg config.ServerConfig) (b *bootstrappedServer, err error) { diff --git a/server/etcdserver/bootstrap_test.go b/server/etcdserver/bootstrap_test.go index 93cc177d4502..d3a2413856ff 100644 --- a/server/etcdserver/bootstrap_test.go +++ b/server/etcdserver/bootstrap_test.go @@ -37,12 +37,12 @@ import ( "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/api/v3/version" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/etcdserver/api/membership" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" "go.etcd.io/etcd/server/v3/etcdserver/api/v2store" serverstorage "go.etcd.io/etcd/server/v3/storage" + "go.etcd.io/raft/v3/raftpb" ) func TestBootstrapExistingClusterNoWALMaxLearner(t *testing.T) { diff --git a/server/etcdserver/raft.go b/server/etcdserver/raft.go index 704e45a7aac3..9496c1b730c2 100644 --- a/server/etcdserver/raft.go +++ b/server/etcdserver/raft.go @@ -23,10 +23,10 @@ import ( "go.etcd.io/etcd/client/pkg/v3/logutil" "go.etcd.io/etcd/pkg/v3/contention" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp" serverstorage "go.etcd.io/etcd/server/v3/storage" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" ) diff --git a/server/etcdserver/raft_test.go b/server/etcdserver/raft_test.go index 47a3ece6d0be..7c017ccc2398 100644 --- a/server/etcdserver/raft_test.go +++ b/server/etcdserver/raft_test.go @@ -24,11 +24,11 @@ import ( "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/membership" "go.etcd.io/etcd/server/v3/mock/mockstorage" serverstorage "go.etcd.io/etcd/server/v3/storage" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 2a5a4c4a5ccf..aee081a0cfd0 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -50,8 +50,6 @@ import ( "go.etcd.io/etcd/pkg/v3/schedule" "go.etcd.io/etcd/pkg/v3/traceutil" "go.etcd.io/etcd/pkg/v3/wait" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/auth" "go.etcd.io/etcd/server/v3/etcdserver/api" httptypes "go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/types" @@ -70,6 +68,8 @@ import ( "go.etcd.io/etcd/server/v3/storage/backend" "go.etcd.io/etcd/server/v3/storage/mvcc" "go.etcd.io/etcd/server/v3/storage/schema" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" ) const ( diff --git a/server/etcdserver/server_test.go b/server/etcdserver/server_test.go index ed08557f8788..8d740dcbb3dc 100644 --- a/server/etcdserver/server_test.go +++ b/server/etcdserver/server_test.go @@ -38,8 +38,6 @@ import ( "go.etcd.io/etcd/pkg/v3/idutil" "go.etcd.io/etcd/pkg/v3/pbutil" "go.etcd.io/etcd/pkg/v3/wait" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/auth" "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/etcdserver/api/membership" @@ -57,6 +55,8 @@ import ( betesting "go.etcd.io/etcd/server/v3/storage/backend/testing" "go.etcd.io/etcd/server/v3/storage/mvcc" "go.etcd.io/etcd/server/v3/storage/schema" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" "go.uber.org/zap/zaptest" ) diff --git a/server/etcdserver/snapshot_merge.go b/server/etcdserver/snapshot_merge.go index 168ed306ea8c..963ead5a7e24 100644 --- a/server/etcdserver/snapshot_merge.go +++ b/server/etcdserver/snapshot_merge.go @@ -17,9 +17,9 @@ package etcdserver import ( "io" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" "go.etcd.io/etcd/server/v3/storage/backend" + "go.etcd.io/raft/v3/raftpb" humanize "github.com/dustin/go-humanize" "go.uber.org/zap" diff --git a/server/etcdserver/txn/util.go b/server/etcdserver/txn/util.go index 64e2e01bcb57..7c4b58b46cc6 100644 --- a/server/etcdserver/txn/util.go +++ b/server/etcdserver/txn/util.go @@ -63,7 +63,11 @@ func WarnOfExpensiveReadOnlyTxnRequest(lg *zap.Logger, warningApplyDuration time for _, r := range txnResponse.Responses { switch op := r.Response.(type) { case *pb.ResponseOp_ResponseRange: - resps = append(resps, fmt.Sprintf("range_response_count:%d", len(op.ResponseRange.Kvs))) + if op.ResponseRange != nil { + resps = append(resps, fmt.Sprintf("range_response_count:%d", len(op.ResponseRange.Kvs))) + } else { + resps = append(resps, "range_response:nil") + } default: // only range responses should be in a read only txn request } diff --git a/server/etcdserver/txn/util_bench_test.go b/server/etcdserver/txn/util_bench_test.go index bfa799ebfe67..b156590cb520 100644 --- a/server/etcdserver/txn/util_bench_test.go +++ b/server/etcdserver/txn/util_bench_test.go @@ -19,7 +19,7 @@ import ( "testing" "time" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/etcdserver/txn/util_test.go b/server/etcdserver/txn/util_test.go new file mode 100644 index 000000000000..205f35e168e9 --- /dev/null +++ b/server/etcdserver/txn/util_test.go @@ -0,0 +1,125 @@ +// Copyright 2022 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package txn + +import ( + "testing" + "time" + + pb "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.uber.org/zap/zaptest" +) + +// TestWarnOfExpensiveReadOnlyTxnRequest verifies WarnOfExpensiveReadOnlyTxnRequest +// never panic no matter what data the txnResponse contains. +func TestWarnOfExpensiveReadOnlyTxnRequest(t *testing.T) { + testCases := []struct { + name string + txnResp *pb.TxnResponse + }{ + { + name: "all readonly responses", + txnResp: &pb.TxnResponse{ + Responses: []*pb.ResponseOp{ + { + Response: &pb.ResponseOp_ResponseRange{ + ResponseRange: &pb.RangeResponse{}, + }, + }, + { + Response: &pb.ResponseOp_ResponseRange{ + ResponseRange: &pb.RangeResponse{}, + }, + }, + }, + }, + }, + { + name: "all readonly responses with partial nil responses", + txnResp: &pb.TxnResponse{ + Responses: []*pb.ResponseOp{ + { + Response: &pb.ResponseOp_ResponseRange{ + ResponseRange: &pb.RangeResponse{}, + }, + }, + { + Response: &pb.ResponseOp_ResponseRange{ + ResponseRange: nil, + }, + }, + }, + }, + }, + { + name: "all readonly responses with all nil responses", + txnResp: &pb.TxnResponse{ + Responses: []*pb.ResponseOp{ + { + Response: &pb.ResponseOp_ResponseRange{ + ResponseRange: nil, + }, + }, + { + Response: &pb.ResponseOp_ResponseRange{ + ResponseRange: nil, + }, + }, + }, + }, + }, + { + name: "partial non readonly responses", + txnResp: &pb.TxnResponse{ + Responses: []*pb.ResponseOp{ + { + Response: &pb.ResponseOp_ResponseRange{ + ResponseRange: nil, + }, + }, + { + Response: &pb.ResponseOp_ResponsePut{}, + }, + { + Response: &pb.ResponseOp_ResponseDeleteRange{}, + }, + }, + }, + }, + { + name: "all non readonly responses", + txnResp: &pb.TxnResponse{ + Responses: []*pb.ResponseOp{ + { + Response: &pb.ResponseOp_ResponsePut{}, + }, + { + Response: &pb.ResponseOp_ResponseDeleteRange{}, + }, + }, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + lg := zaptest.NewLogger(t) + start := time.Now().Add(-1 * time.Second) + // WarnOfExpensiveReadOnlyTxnRequest shouldn't panic. + WarnOfExpensiveReadOnlyTxnRequest(lg, 0, start, &pb.TxnRequest{}, tc.txnResp, nil) + }) + } +} diff --git a/server/etcdserver/util_test.go b/server/etcdserver/util_test.go index 06c3e5a7bd0e..cad7c3cf4528 100644 --- a/server/etcdserver/util_test.go +++ b/server/etcdserver/util_test.go @@ -22,10 +22,10 @@ import ( "go.uber.org/zap/zaptest" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/membership" "go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" + "go.etcd.io/raft/v3/raftpb" ) func TestLongestConnected(t *testing.T) { diff --git a/server/etcdserver/v3_server.go b/server/etcdserver/v3_server.go index 63a190e6ed69..4f1cd6b13eea 100644 --- a/server/etcdserver/v3_server.go +++ b/server/etcdserver/v3_server.go @@ -25,7 +25,6 @@ import ( pb "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/api/v3/version" "go.etcd.io/etcd/pkg/v3/traceutil" - "go.etcd.io/etcd/raft/v3" "go.etcd.io/etcd/server/v3/auth" "go.etcd.io/etcd/server/v3/etcdserver/api/membership" apply2 "go.etcd.io/etcd/server/v3/etcdserver/apply" @@ -34,6 +33,7 @@ import ( "go.etcd.io/etcd/server/v3/lease" "go.etcd.io/etcd/server/v3/lease/leasehttp" "go.etcd.io/etcd/server/v3/storage/mvcc" + "go.etcd.io/raft/v3" "github.com/gogo/protobuf/proto" "go.uber.org/zap" diff --git a/server/etcdserver/zap_raft.go b/server/etcdserver/zap_raft.go index 55139c04b35c..66dd3caad0d3 100644 --- a/server/etcdserver/zap_raft.go +++ b/server/etcdserver/zap_raft.go @@ -17,7 +17,7 @@ package etcdserver import ( "errors" - "go.etcd.io/etcd/raft/v3" + "go.etcd.io/raft/v3" "go.uber.org/zap" "go.uber.org/zap/zapcore" diff --git a/server/go.mod b/server/go.mod index aea05a55163d..5021a8d92a8f 100644 --- a/server/go.mod +++ b/server/go.mod @@ -28,7 +28,7 @@ require ( go.etcd.io/etcd/client/v2 v2.306.0-alpha.0 go.etcd.io/etcd/client/v3 v3.6.0-alpha.0 go.etcd.io/etcd/pkg/v3 v3.6.0-alpha.0 - go.etcd.io/etcd/raft/v3 v3.6.0-alpha.0 + go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.32.0 go.opentelemetry.io/otel v1.7.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.7.0 @@ -81,7 +81,6 @@ replace ( go.etcd.io/etcd/client/v2 => ../client/v2 go.etcd.io/etcd/client/v3 => ../client/v3 go.etcd.io/etcd/pkg/v3 => ../pkg - go.etcd.io/etcd/raft/v3 => ../raft ) // Bad imports are sometimes causing attempts to pull that code. diff --git a/server/go.sum b/server/go.sum index bbf858d4aadf..11ee09f0cadd 100644 --- a/server/go.sum +++ b/server/go.sum @@ -289,6 +289,8 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= +go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a h1:Znv2XJyAf/fsJsFNt9toO8uyXwwHQ44wxqsvdSxipj4= +go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a/go.mod h1:eMshmuwXLWZrjHXN8ZgYrOMQRSbHqi5M84DEZWhG+o4= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= diff --git a/server/mock/mockstorage/storage_recorder.go b/server/mock/mockstorage/storage_recorder.go index 73cad169ff56..16f44e1b0eba 100644 --- a/server/mock/mockstorage/storage_recorder.go +++ b/server/mock/mockstorage/storage_recorder.go @@ -17,8 +17,8 @@ package mockstorage import ( "github.com/coreos/go-semver/semver" "go.etcd.io/etcd/client/pkg/v3/testutil" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" ) type storageRecorder struct { diff --git a/server/storage/backend.go b/server/storage/backend.go index b1101cfa6fcf..a93fd8a3f11d 100644 --- a/server/storage/backend.go +++ b/server/storage/backend.go @@ -19,11 +19,11 @@ import ( "os" "time" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" "go.etcd.io/etcd/server/v3/storage/backend" "go.etcd.io/etcd/server/v3/storage/schema" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" ) diff --git a/server/storage/hooks.go b/server/storage/hooks.go index e9a9f250d419..cf09e06b3a6f 100644 --- a/server/storage/hooks.go +++ b/server/storage/hooks.go @@ -19,10 +19,10 @@ import ( "go.uber.org/zap" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/cindex" "go.etcd.io/etcd/server/v3/storage/backend" "go.etcd.io/etcd/server/v3/storage/schema" + "go.etcd.io/raft/v3/raftpb" ) type BackendHooks struct { diff --git a/server/storage/schema/confstate.go b/server/storage/schema/confstate.go index a0fdad1635e9..21752b79337a 100644 --- a/server/storage/schema/confstate.go +++ b/server/storage/schema/confstate.go @@ -18,8 +18,8 @@ import ( "encoding/json" "log" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/storage/backend" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" ) diff --git a/server/storage/schema/confstate_test.go b/server/storage/schema/confstate_test.go index d4134ab3553c..653bf59c5000 100644 --- a/server/storage/schema/confstate_test.go +++ b/server/storage/schema/confstate_test.go @@ -18,8 +18,8 @@ import ( "testing" "github.com/stretchr/testify/assert" - "go.etcd.io/etcd/raft/v3/raftpb" betesting "go.etcd.io/etcd/server/v3/storage/backend/testing" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/storage/schema/schema_test.go b/server/storage/schema/schema_test.go index 59fb3a1af9b3..87d359fa18a9 100644 --- a/server/storage/schema/schema_test.go +++ b/server/storage/schema/schema_test.go @@ -23,11 +23,11 @@ import ( "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/api/v3/membershippb" "go.etcd.io/etcd/api/v3/version" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/storage/backend" betesting "go.etcd.io/etcd/server/v3/storage/backend/testing" "go.etcd.io/etcd/server/v3/storage/wal" waltesting "go.etcd.io/etcd/server/v3/storage/wal/testing" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" ) diff --git a/server/storage/storage.go b/server/storage/storage.go index 9207e1e4d10a..ea815722aac6 100644 --- a/server/storage/storage.go +++ b/server/storage/storage.go @@ -18,10 +18,10 @@ import ( "sync" "github.com/coreos/go-semver/semver" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" "go.etcd.io/etcd/server/v3/storage/wal" "go.etcd.io/etcd/server/v3/storage/wal/walpb" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" ) diff --git a/server/storage/util.go b/server/storage/util.go index 57a12f0e6df0..e1996cfe5816 100644 --- a/server/storage/util.go +++ b/server/storage/util.go @@ -23,10 +23,10 @@ import ( "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/etcdserver/api/membership" "go.etcd.io/etcd/server/v3/etcdserver/api/v2store" + "go.etcd.io/raft/v3/raftpb" ) // AssertNoV2StoreContent -> depending on the deprecation stage, warns or report an error diff --git a/server/storage/wal/decoder.go b/server/storage/wal/decoder.go index ad535de959d6..dde15ed079fa 100644 --- a/server/storage/wal/decoder.go +++ b/server/storage/wal/decoder.go @@ -24,8 +24,8 @@ import ( "go.etcd.io/etcd/client/pkg/v3/fileutil" "go.etcd.io/etcd/pkg/v3/crc" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/storage/wal/walpb" + "go.etcd.io/raft/v3/raftpb" ) const minSectorSize = 512 diff --git a/server/storage/wal/repair_test.go b/server/storage/wal/repair_test.go index cf868a5056e3..2da9142db769 100644 --- a/server/storage/wal/repair_test.go +++ b/server/storage/wal/repair_test.go @@ -20,8 +20,8 @@ import ( "os" "testing" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/storage/wal/walpb" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/storage/wal/testing/waltesting.go b/server/storage/wal/testing/waltesting.go index a4a7790d8f52..d936bf730a51 100644 --- a/server/storage/wal/testing/waltesting.go +++ b/server/storage/wal/testing/waltesting.go @@ -21,9 +21,9 @@ import ( "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/storage/wal" "go.etcd.io/etcd/server/v3/storage/wal/walpb" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/storage/wal/version.go b/server/storage/wal/version.go index a2d2f15d8e34..d2310726fbaf 100644 --- a/server/storage/wal/version.go +++ b/server/storage/wal/version.go @@ -26,7 +26,7 @@ import ( "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" ) // ReadWALVersion reads remaining entries from opened WAL and returns struct diff --git a/server/storage/wal/version_test.go b/server/storage/wal/version_test.go index 76e93a37715e..904d8a48dc89 100644 --- a/server/storage/wal/version_test.go +++ b/server/storage/wal/version_test.go @@ -25,7 +25,7 @@ import ( "go.etcd.io/etcd/api/v3/membershippb" "go.etcd.io/etcd/api/v3/version" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" "google.golang.org/protobuf/reflect/protoreflect" ) diff --git a/server/storage/wal/wal.go b/server/storage/wal/wal.go index ef7538554720..3d99d243cf22 100644 --- a/server/storage/wal/wal.go +++ b/server/storage/wal/wal.go @@ -28,9 +28,9 @@ import ( "go.etcd.io/etcd/client/pkg/v3/fileutil" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/storage/wal/walpb" + "go.etcd.io/raft/v3" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" ) diff --git a/server/storage/wal/wal_bench_test.go b/server/storage/wal/wal_bench_test.go index 14acf73b7922..0efba051f338 100644 --- a/server/storage/wal/wal_bench_test.go +++ b/server/storage/wal/wal_bench_test.go @@ -17,7 +17,7 @@ package wal import ( "testing" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/storage/wal/wal_test.go b/server/storage/wal/wal_test.go index 24a365d1f0bf..c74f82a856a4 100644 --- a/server/storage/wal/wal_test.go +++ b/server/storage/wal/wal_test.go @@ -32,8 +32,8 @@ import ( "github.com/stretchr/testify/assert" "go.etcd.io/etcd/client/pkg/v3/fileutil" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/storage/wal/walpb" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap/zaptest" ) diff --git a/server/storage/wal/walpb/record.pb.go b/server/storage/wal/walpb/record.pb.go index 654d82636c09..d0eba734d424 100644 --- a/server/storage/wal/walpb/record.pb.go +++ b/server/storage/wal/walpb/record.pb.go @@ -11,7 +11,7 @@ import ( _ "github.com/gogo/protobuf/gogoproto" proto "github.com/golang/protobuf/proto" - raftpb "go.etcd.io/etcd/raft/v3/raftpb" + raftpb "go.etcd.io/raft/v3/raftpb" ) // Reference imports to suppress errors if they are not otherwise used. @@ -119,22 +119,22 @@ func init() { func init() { proto.RegisterFile("record.proto", fileDescriptor_bf94fd919e302a1d) } var fileDescriptor_bf94fd919e302a1d = []byte{ - // 234 bytes of a gzipped FileDescriptorProto + // 233 bytes of a gzipped FileDescriptorProto 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x3c, 0x8e, 0x41, 0x4e, 0xc3, 0x30, - 0x10, 0x45, 0x63, 0xe2, 0x22, 0x18, 0xca, 0x02, 0xab, 0xaa, 0xa2, 0x2c, 0x4c, 0xd4, 0x55, 0x56, + 0x10, 0x45, 0x63, 0xe2, 0x22, 0x18, 0xca, 0xa2, 0x56, 0x85, 0xa2, 0x2c, 0x4c, 0xd4, 0x55, 0x56, 0x29, 0xe2, 0x08, 0x65, 0xcf, 0x22, 0x3d, 0x00, 0x72, 0x1d, 0xa7, 0x20, 0xd1, 0x8c, 0x35, 0xb5, - 0x04, 0xdc, 0x84, 0x23, 0x65, 0xc9, 0x09, 0x10, 0x84, 0x8b, 0xa0, 0x8c, 0x03, 0x1b, 0xfb, 0xeb, - 0x7d, 0xf9, 0x7d, 0xc3, 0x9c, 0x9c, 0x45, 0x6a, 0x2a, 0x4f, 0x18, 0x50, 0xcd, 0x5e, 0xcc, 0xb3, - 0xdf, 0xe5, 0x8b, 0x3d, 0xee, 0x91, 0xc9, 0x7a, 0x4c, 0xb1, 0xcc, 0x97, 0x64, 0xda, 0xb0, 0x1e, - 0x0f, 0xbf, 0xe3, 0x2b, 0xf2, 0xd5, 0x3d, 0x9c, 0xd6, 0x2c, 0x51, 0x19, 0xc8, 0xf0, 0xe6, 0x5d, - 0x26, 0x0a, 0x51, 0xa6, 0x1b, 0xd9, 0x7f, 0x5e, 0x27, 0x35, 0x13, 0xb5, 0x84, 0xd4, 0x92, 0xcd, - 0x4e, 0x0a, 0x51, 0x5e, 0x4e, 0xc5, 0x08, 0x94, 0x02, 0xd9, 0x98, 0x60, 0xb2, 0xb4, 0x10, 0xe5, - 0xbc, 0xe6, 0xbc, 0x22, 0x38, 0xdb, 0x76, 0xc6, 0x1f, 0x1f, 0x31, 0xa8, 0x1c, 0x66, 0x4f, 0x5d, - 0xe3, 0x5e, 0x59, 0x29, 0xa7, 0x97, 0x11, 0xf1, 0x9a, 0xa3, 0x03, 0x4b, 0xe5, 0xff, 0x9a, 0xa3, - 0x83, 0xba, 0x01, 0xb0, 0xd8, 0xb5, 0x0f, 0xc7, 0x60, 0x82, 0x63, 0xf7, 0xc5, 0xed, 0x55, 0x15, - 0x7f, 0x5e, 0xdd, 0x61, 0xd7, 0x6e, 0xc7, 0xa2, 0x3e, 0xb7, 0x7f, 0x71, 0xb3, 0xe8, 0xbf, 0x75, - 0xd2, 0x0f, 0x5a, 0x7c, 0x0c, 0x5a, 0x7c, 0x0d, 0x5a, 0xbc, 0xff, 0xe8, 0xe4, 0x37, 0x00, 0x00, - 0xff, 0xff, 0xc3, 0x36, 0x0c, 0xad, 0x1d, 0x01, 0x00, 0x00, + 0x04, 0xdc, 0x84, 0x23, 0x65, 0xc9, 0x09, 0x10, 0x84, 0x8b, 0xa0, 0x8c, 0x03, 0xab, 0xf9, 0x7a, + 0x5f, 0xff, 0xff, 0x81, 0x39, 0x39, 0x8b, 0xd4, 0x54, 0x9e, 0x30, 0xa0, 0x9a, 0xbd, 0x98, 0x67, + 0xbf, 0xcb, 0x97, 0x7b, 0xdc, 0x23, 0x93, 0xf5, 0xa8, 0xa2, 0x99, 0x2f, 0xc8, 0xb4, 0xc1, 0xef, + 0xd6, 0xe3, 0x89, 0x68, 0x75, 0x0f, 0xa7, 0x35, 0xe7, 0x55, 0x06, 0x32, 0xbc, 0x79, 0x97, 0x89, + 0x42, 0x94, 0xe9, 0x46, 0xf6, 0x9f, 0xd7, 0x49, 0xcd, 0x44, 0x5d, 0x41, 0x6a, 0xc9, 0x66, 0x27, + 0x85, 0x28, 0x2f, 0x27, 0x63, 0x04, 0x4a, 0x81, 0x6c, 0x4c, 0x30, 0x59, 0x5a, 0x88, 0x72, 0x5e, + 0xb3, 0x5e, 0x11, 0x9c, 0x6d, 0x3b, 0xe3, 0x8f, 0x8f, 0x18, 0x54, 0x0e, 0xb3, 0xa7, 0xae, 0x71, + 0xaf, 0x5c, 0x29, 0xa7, 0x64, 0x44, 0xbc, 0xe6, 0xe8, 0xc0, 0xa5, 0xf2, 0x7f, 0xcd, 0xd1, 0x41, + 0xdd, 0x00, 0x58, 0xec, 0xda, 0x87, 0x63, 0x30, 0xc1, 0x71, 0xf7, 0xc5, 0xed, 0xa2, 0x8a, 0x9f, + 0x57, 0x77, 0xd8, 0xb5, 0xdb, 0xd1, 0xa8, 0xcf, 0xed, 0x9f, 0xdc, 0x2c, 0xfb, 0x6f, 0x9d, 0xf4, + 0x83, 0x16, 0x1f, 0x83, 0x16, 0x5f, 0x83, 0x16, 0xef, 0x3f, 0x3a, 0xf9, 0x0d, 0x00, 0x00, 0xff, + 0xff, 0x60, 0x0f, 0x3c, 0x36, 0x18, 0x01, 0x00, 0x00, } func (m *Record) Marshal() (dAtA []byte, err error) { diff --git a/server/storage/wal/walpb/record.proto b/server/storage/wal/walpb/record.proto index 536fa6c19c12..aed4351d3150 100644 --- a/server/storage/wal/walpb/record.proto +++ b/server/storage/wal/walpb/record.proto @@ -2,7 +2,7 @@ syntax = "proto2"; package walpb; import "gogoproto/gogo.proto"; -import "raft/raftpb/raft.proto"; +import "raftpb/raft.proto"; option (gogoproto.marshaler_all) = true; option (gogoproto.sizer_all) = true; diff --git a/server/storage/wal/walpb/record_test.go b/server/storage/wal/walpb/record_test.go index 3655d1421498..2e6349d4477a 100644 --- a/server/storage/wal/walpb/record_test.go +++ b/server/storage/wal/walpb/record_test.go @@ -18,7 +18,7 @@ import ( "testing" "github.com/golang/protobuf/descriptor" - "go.etcd.io/etcd/raft/v3/raftpb" + "go.etcd.io/raft/v3/raftpb" ) func TestSnapshotMetadataCompatibility(t *testing.T) { diff --git a/server/verify/verify.go b/server/verify/verify.go index d3e8b367f26a..2accdd21768b 100644 --- a/server/verify/verify.go +++ b/server/verify/verify.go @@ -18,12 +18,12 @@ import ( "fmt" "go.etcd.io/etcd/client/pkg/v3/verify" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/storage/backend" "go.etcd.io/etcd/server/v3/storage/datadir" "go.etcd.io/etcd/server/v3/storage/schema" wal2 "go.etcd.io/etcd/server/v3/storage/wal" "go.etcd.io/etcd/server/v3/storage/wal/walpb" + "go.etcd.io/raft/v3/raftpb" "go.uber.org/zap" ) diff --git a/tests/e2e/etcd_mix_versions_test.go b/tests/e2e/etcd_mix_versions_test.go index c1240102a441..ae11db6b1c21 100644 --- a/tests/e2e/etcd_mix_versions_test.go +++ b/tests/e2e/etcd_mix_versions_test.go @@ -31,9 +31,9 @@ import ( // TODO(ahrtr): add network partition scenario to trigger snapshots. func TestMixVersionsSendSnapshot(t *testing.T) { cases := []struct { - name string - clusterVersion e2e.ClusterVersion - newInstaceVersion e2e.ClusterVersion + name string + clusterVersion e2e.ClusterVersion + newInstanceVersion e2e.ClusterVersion }{ // etcd doesn't support adding a new member of old version into // a cluster with higher version. For example, etcd cluster @@ -46,15 +46,15 @@ func TestMixVersionsSendSnapshot(t *testing.T) { newInstaceVersion: e2e.LastVersion, },*/ { - name: "etcd instance with current version receives snapshot from the leader with last version", - clusterVersion: e2e.LastVersion, - newInstaceVersion: e2e.CurrentVersion, + name: "etcd instance with current version receives snapshot from the leader with last version", + clusterVersion: e2e.LastVersion, + newInstanceVersion: e2e.CurrentVersion, }, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - mixVersionsSnapshotTest(t, tc.clusterVersion, tc.newInstaceVersion) + mixVersionsSnapshotTest(t, tc.clusterVersion, tc.newInstanceVersion) }) } } diff --git a/tests/framework/e2e/cluster_proxy.go b/tests/framework/e2e/cluster_proxy.go index 98e563524456..4e387410b5d2 100644 --- a/tests/framework/e2e/cluster_proxy.go +++ b/tests/framework/e2e/cluster_proxy.go @@ -116,8 +116,12 @@ func (p *proxyEtcdProcess) Kill() error { return p.etcdProc.Kill() } -func (p *proxyEtcdProcess) Wait() error { - return p.etcdProc.Wait() +func (p *proxyEtcdProcess) IsRunning() bool { + return p.etcdProc.IsRunning() +} + +func (p *proxyEtcdProcess) Wait(ctx context.Context) error { + return p.etcdProc.Wait(ctx) } type proxyProc struct { diff --git a/tests/framework/e2e/etcd_process.go b/tests/framework/e2e/etcd_process.go index 070a77c4d552..d5238b5a26c4 100644 --- a/tests/framework/e2e/etcd_process.go +++ b/tests/framework/e2e/etcd_process.go @@ -42,7 +42,8 @@ type EtcdProcess interface { EndpointsMetrics() []string Client(opts ...config.ClientOption) *EtcdctlV3 - Wait() error + IsRunning() bool + Wait(ctx context.Context) error Start(ctx context.Context) error Restart(ctx context.Context) error Stop() error @@ -201,11 +202,35 @@ func (ep *EtcdServerProcess) Kill() error { return ep.proc.Signal(syscall.SIGKILL) } -func (ep *EtcdServerProcess) Wait() error { - ep.proc.Wait() +func (ep *EtcdServerProcess) Wait(ctx context.Context) error { + ch := make(chan struct{}) + go func() { + defer close(ch) + if ep.proc != nil { + ep.proc.Wait() + ep.cfg.lg.Info("server exited", zap.String("name", ep.cfg.Name)) + } + }() + select { + case <-ch: + ep.proc = nil + return nil + case <-ctx.Done(): + return ctx.Err() + } +} + +func (ep *EtcdServerProcess) IsRunning() bool { + if ep.proc == nil { + return false + } + _, err := ep.proc.ExitCode() + if err == expect.ErrProcessRunning { + return true + } ep.cfg.lg.Info("server exited", zap.String("name", ep.cfg.Name)) ep.proc = nil - return nil + return false } func AssertProcessLogs(t *testing.T, ep EtcdProcess, expectLog string) { diff --git a/tests/framework/integration/cluster.go b/tests/framework/integration/cluster.go index 22eed4da7356..35beb84abace 100644 --- a/tests/framework/integration/cluster.go +++ b/tests/framework/integration/cluster.go @@ -41,7 +41,6 @@ import ( "go.etcd.io/etcd/client/pkg/v3/types" clientv3 "go.etcd.io/etcd/client/v3" "go.etcd.io/etcd/pkg/v3/grpc_testing" - "go.etcd.io/etcd/raft/v3" "go.etcd.io/etcd/server/v3/config" "go.etcd.io/etcd/server/v3/embed" "go.etcd.io/etcd/server/v3/etcdserver" @@ -57,6 +56,7 @@ import ( "go.etcd.io/etcd/server/v3/verify" framecfg "go.etcd.io/etcd/tests/v3/framework/config" "go.etcd.io/etcd/tests/v3/framework/testutils" + "go.etcd.io/raft/v3" "go.uber.org/zap/zapcore" "go.uber.org/zap/zaptest" diff --git a/tests/functional/tester/stresser_key.go b/tests/functional/tester/stresser_key.go index a18afc1df628..227b871c3547 100644 --- a/tests/functional/tester/stresser_key.go +++ b/tests/functional/tester/stresser_key.go @@ -25,9 +25,9 @@ import ( "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" clientv3 "go.etcd.io/etcd/client/v3" - "go.etcd.io/etcd/raft/v3" "go.etcd.io/etcd/server/v3/etcdserver/errors" "go.etcd.io/etcd/tests/v3/functional/rpcpb" + "go.etcd.io/raft/v3" "go.uber.org/zap" "golang.org/x/time/rate" diff --git a/tests/go.mod b/tests/go.mod index 30f570a39ce7..690b1664463a 100644 --- a/tests/go.mod +++ b/tests/go.mod @@ -10,7 +10,6 @@ replace ( go.etcd.io/etcd/etcdctl/v3 => ../etcdctl go.etcd.io/etcd/etcdutl/v3 => ../etcdutl go.etcd.io/etcd/pkg/v3 => ../pkg - go.etcd.io/etcd/raft/v3 => ../raft go.etcd.io/etcd/server/v3 => ../server ) @@ -36,8 +35,8 @@ require ( go.etcd.io/etcd/etcdctl/v3 v3.6.0-alpha.0 go.etcd.io/etcd/etcdutl/v3 v3.6.0-alpha.0 go.etcd.io/etcd/pkg/v3 v3.6.0-alpha.0 - go.etcd.io/etcd/raft/v3 v3.6.0-alpha.0 go.etcd.io/etcd/server/v3 v3.6.0-alpha.0 + go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.32.0 go.opentelemetry.io/otel v1.7.0 go.opentelemetry.io/otel/sdk v1.7.0 diff --git a/tests/go.sum b/tests/go.sum index 55c1e16f9ae7..b154abf001ed 100644 --- a/tests/go.sum +++ b/tests/go.sum @@ -316,6 +316,8 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= +go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a h1:Znv2XJyAf/fsJsFNt9toO8uyXwwHQ44wxqsvdSxipj4= +go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a/go.mod h1:eMshmuwXLWZrjHXN8ZgYrOMQRSbHqi5M84DEZWhG+o4= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= diff --git a/tests/linearizability/client.go b/tests/linearizability/client.go index 5addf729769b..4a4e0c675fd7 100644 --- a/tests/linearizability/client.go +++ b/tests/linearizability/client.go @@ -18,19 +18,16 @@ import ( "context" "time" - "github.com/anishathalye/porcupine" clientv3 "go.etcd.io/etcd/client/v3" "go.uber.org/zap" ) type recordingClient struct { - client clientv3.Client - id int - - operations []porcupine.Operation + client clientv3.Client + history *appendableHistory } -func NewClient(endpoints []string, id int) (*recordingClient, error) { +func NewClient(endpoints []string, ids idProvider) (*recordingClient, error) { cc, err := clientv3.New(clientv3.Config{ Endpoints: endpoints, Logger: zap.NewNop(), @@ -41,9 +38,8 @@ func NewClient(endpoints []string, id int) (*recordingClient, error) { return nil, err } return &recordingClient{ - client: *cc, - id: id, - operations: []porcupine.Operation{}, + client: *cc, + history: newAppendableHistory(ids), }, nil } @@ -58,17 +54,7 @@ func (c *recordingClient) Get(ctx context.Context, key string) error { if err != nil { return err } - var readData string - if len(resp.Kvs) == 1 { - readData = string(resp.Kvs[0].Value) - } - c.operations = append(c.operations, porcupine.Operation{ - ClientId: c.id, - Input: etcdRequest{op: Get, key: key}, - Call: callTime.UnixNano(), - Output: etcdResponse{getData: readData, revision: resp.Header.Revision}, - Return: returnTime.UnixNano(), - }) + c.history.AppendGet(key, callTime, returnTime, resp) return nil } @@ -76,16 +62,14 @@ func (c *recordingClient) Put(ctx context.Context, key, value string) error { callTime := time.Now() resp, err := c.client.Put(ctx, key, value) returnTime := time.Now() - var revision int64 - if resp != nil && resp.Header != nil { - revision = resp.Header.Revision - } - c.operations = append(c.operations, porcupine.Operation{ - ClientId: c.id, - Input: etcdRequest{op: Put, key: key, putData: value}, - Call: callTime.UnixNano(), - Output: etcdResponse{err: err, revision: revision}, - Return: returnTime.UnixNano(), - }) + c.history.AppendPut(key, value, callTime, returnTime, resp, err) + return err +} + +func (c *recordingClient) Delete(ctx context.Context, key string) error { + callTime := time.Now() + resp, err := c.client.Delete(ctx, key) + returnTime := time.Now() + c.history.AppendDelete(key, callTime, returnTime, resp, err) return nil } diff --git a/tests/linearizability/failpoints.go b/tests/linearizability/failpoints.go index 247951c19ef1..81e391852be5 100644 --- a/tests/linearizability/failpoints.go +++ b/tests/linearizability/failpoints.go @@ -31,27 +31,31 @@ import ( "go.etcd.io/etcd/tests/v3/framework/e2e" ) +const ( + triggerTimeout = time.Second +) + var ( KillFailpoint Failpoint = killFailpoint{} - DefragBeforeCopyPanic Failpoint = goFailpoint{"backend/defragBeforeCopy", "panic", triggerDefrag, AnyMember} - DefragBeforeRenamePanic Failpoint = goFailpoint{"backend/defragBeforeRename", "panic", triggerDefrag, AnyMember} - BeforeCommitPanic Failpoint = goFailpoint{"backend/beforeCommit", "panic", nil, AnyMember} - AfterCommitPanic Failpoint = goFailpoint{"backend/afterCommit", "panic", nil, AnyMember} - RaftBeforeSavePanic Failpoint = goFailpoint{"etcdserver/raftBeforeSave", "panic", nil, AnyMember} - RaftAfterSavePanic Failpoint = goFailpoint{"etcdserver/raftAfterSave", "panic", nil, AnyMember} - BackendBeforePreCommitHookPanic Failpoint = goFailpoint{"backend/commitBeforePreCommitHook", "panic", nil, AnyMember} - BackendAfterPreCommitHookPanic Failpoint = goFailpoint{"backend/commitAfterPreCommitHook", "panic", nil, AnyMember} - BackendBeforeStartDBTxnPanic Failpoint = goFailpoint{"backend/beforeStartDBTxn", "panic", nil, AnyMember} - BackendAfterStartDBTxnPanic Failpoint = goFailpoint{"backend/afterStartDBTxn", "panic", nil, AnyMember} - BackendBeforeWritebackBufPanic Failpoint = goFailpoint{"backend/beforeWritebackBuf", "panic", nil, AnyMember} - BackendAfterWritebackBufPanic Failpoint = goFailpoint{"backend/afterWritebackBuf", "panic", nil, AnyMember} - CompactBeforeCommitScheduledCompactPanic Failpoint = goFailpoint{"mvcc/compactBeforeCommitScheduledCompact", "panic", triggerCompact, AnyMember} - CompactAfterCommitScheduledCompactPanic Failpoint = goFailpoint{"mvcc/compactAfterCommitScheduledCompact", "panic", triggerCompact, AnyMember} - CompactBeforeSetFinishedCompactPanic Failpoint = goFailpoint{"mvcc/compactBeforeSetFinishedCompact", "panic", triggerCompact, AnyMember} - CompactAfterSetFinishedCompactPanic Failpoint = goFailpoint{"mvcc/compactAfterSetFinishedCompact", "panic", triggerCompact, AnyMember} - CompactBeforeCommitBatchPanic Failpoint = goFailpoint{"mvcc/compactBeforeCommitBatch", "panic", triggerCompact, AnyMember} - CompactAfterCommitBatchPanic Failpoint = goFailpoint{"mvcc/compactAfterCommitBatch", "panic", triggerCompact, AnyMember} - RaftBeforeLeaderSendPanic Failpoint = goFailpoint{"etcdserver/raftBeforeLeaderSend", "panic", nil, Leader} + DefragBeforeCopyPanic Failpoint = goPanicFailpoint{"backend/defragBeforeCopy", triggerDefrag, AnyMember} + DefragBeforeRenamePanic Failpoint = goPanicFailpoint{"backend/defragBeforeRename", triggerDefrag, AnyMember} + BeforeCommitPanic Failpoint = goPanicFailpoint{"backend/beforeCommit", nil, AnyMember} + AfterCommitPanic Failpoint = goPanicFailpoint{"backend/afterCommit", nil, AnyMember} + RaftBeforeSavePanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeSave", nil, AnyMember} + RaftAfterSavePanic Failpoint = goPanicFailpoint{"etcdserver/raftAfterSave", nil, AnyMember} + BackendBeforePreCommitHookPanic Failpoint = goPanicFailpoint{"backend/commitBeforePreCommitHook", nil, AnyMember} + BackendAfterPreCommitHookPanic Failpoint = goPanicFailpoint{"backend/commitAfterPreCommitHook", nil, AnyMember} + BackendBeforeStartDBTxnPanic Failpoint = goPanicFailpoint{"backend/beforeStartDBTxn", nil, AnyMember} + BackendAfterStartDBTxnPanic Failpoint = goPanicFailpoint{"backend/afterStartDBTxn", nil, AnyMember} + BackendBeforeWritebackBufPanic Failpoint = goPanicFailpoint{"backend/beforeWritebackBuf", nil, AnyMember} + BackendAfterWritebackBufPanic Failpoint = goPanicFailpoint{"backend/afterWritebackBuf", nil, AnyMember} + CompactBeforeCommitScheduledCompactPanic Failpoint = goPanicFailpoint{"mvcc/compactBeforeCommitScheduledCompact", triggerCompact, AnyMember} + CompactAfterCommitScheduledCompactPanic Failpoint = goPanicFailpoint{"mvcc/compactAfterCommitScheduledCompact", triggerCompact, AnyMember} + CompactBeforeSetFinishedCompactPanic Failpoint = goPanicFailpoint{"mvcc/compactBeforeSetFinishedCompact", triggerCompact, AnyMember} + CompactAfterSetFinishedCompactPanic Failpoint = goPanicFailpoint{"mvcc/compactAfterSetFinishedCompact", triggerCompact, AnyMember} + CompactBeforeCommitBatchPanic Failpoint = goPanicFailpoint{"mvcc/compactBeforeCommitBatch", triggerCompact, AnyMember} + CompactAfterCommitBatchPanic Failpoint = goPanicFailpoint{"mvcc/compactAfterCommitBatch", triggerCompact, AnyMember} + RaftBeforeLeaderSendPanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeLeaderSend", nil, Leader} RandomFailpoint Failpoint = randomFailpoint{[]Failpoint{ KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic, DefragBeforeCopyPanic, DefragBeforeRenamePanic, @@ -64,12 +68,12 @@ var ( RaftBeforeLeaderSendPanic, }} // TODO: Figure out how to reliably trigger below failpoints and add them to RandomFailpoint - raftBeforeApplySnapPanic Failpoint = goFailpoint{"etcdserver/raftBeforeApplySnap", "panic", nil, AnyMember} - raftAfterApplySnapPanic Failpoint = goFailpoint{"etcdserver/raftAfterApplySnap", "panic", nil, AnyMember} - raftAfterWALReleasePanic Failpoint = goFailpoint{"etcdserver/raftAfterWALRelease", "panic", nil, AnyMember} - raftBeforeFollowerSendPanic Failpoint = goFailpoint{"etcdserver/raftBeforeFollowerSend", "panic", nil, AnyMember} - raftBeforeSaveSnapPanic Failpoint = goFailpoint{"etcdserver/raftBeforeSaveSnap", "panic", nil, AnyMember} - raftAfterSaveSnapPanic Failpoint = goFailpoint{"etcdserver/raftAfterSaveSnap", "panic", nil, AnyMember} + raftBeforeApplySnapPanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeApplySnap", nil, AnyMember} + raftAfterApplySnapPanic Failpoint = goPanicFailpoint{"etcdserver/raftAfterApplySnap", nil, AnyMember} + raftAfterWALReleasePanic Failpoint = goPanicFailpoint{"etcdserver/raftAfterWALRelease", nil, AnyMember} + raftBeforeFollowerSendPanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeFollowerSend", nil, AnyMember} + raftBeforeSaveSnapPanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeSaveSnap", nil, AnyMember} + raftAfterSaveSnapPanic Failpoint = goPanicFailpoint{"etcdserver/raftAfterSaveSnap", nil, AnyMember} ) type Failpoint interface { @@ -81,15 +85,21 @@ type killFailpoint struct{} func (f killFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdProcessCluster) error { member := clus.Procs[rand.Int()%len(clus.Procs)] - err := member.Kill() - if err != nil { - return err - } - err = member.Wait() - if err != nil && !strings.Contains(err.Error(), "unexpected exit code") { - return err + + killCtx, cancel := context.WithTimeout(ctx, triggerTimeout) + defer cancel() + for member.IsRunning() { + err := member.Kill() + if err != nil { + t.Logf("sending kill signal failed: %v", err) + } + err = member.Wait(killCtx) + if err != nil && !strings.Contains(err.Error(), "unexpected exit code") { + return fmt.Errorf("failed to kill the process within %s, err: %w", triggerTimeout, err) + } } - err = member.Start(ctx) + + err := member.Start(ctx) if err != nil { return err } @@ -100,9 +110,8 @@ func (f killFailpoint) Name() string { return "Kill" } -type goFailpoint struct { +type goPanicFailpoint struct { failpoint string - payload string trigger func(ctx context.Context, member e2e.EtcdProcess) error target failpointTarget } @@ -114,45 +123,55 @@ const ( Leader failpointTarget = "Leader" ) -func (f goFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdProcessCluster) error { - var member e2e.EtcdProcess - switch f.target { - case AnyMember: - member = clus.Procs[rand.Int()%len(clus.Procs)] - case Leader: - member = clus.Procs[clus.WaitLeader(t)] - default: - panic("unknown target") - } +func (f goPanicFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdProcessCluster) error { + member := f.pickMember(t, clus) address := fmt.Sprintf("127.0.0.1:%d", member.Config().GoFailPort) - err := setupGoFailpoint(address, f.failpoint, f.payload) - if err != nil { - return fmt.Errorf("gofailpoint setup failed: %w", err) - } - if f.trigger != nil { - err = f.trigger(ctx, member) + + triggerCtx, cancel := context.WithTimeout(ctx, triggerTimeout) + defer cancel() + + for member.IsRunning() { + err := setupGoFailpoint(triggerCtx, address, f.failpoint, "panic") if err != nil { - return fmt.Errorf("triggering gofailpoint failed: %w", err) + t.Logf("gofailpoint setup failed: %v", err) + } + if f.trigger != nil { + err = f.trigger(triggerCtx, member) + if err != nil { + t.Logf("triggering gofailpoint failed: %v", err) + } + } + err = member.Wait(triggerCtx) + if err != nil && !strings.Contains(err.Error(), "unexpected exit code") { + return fmt.Errorf("failed to trigger a process panic within %s, err: %w", triggerTimeout, err) } } - err = member.Wait() - if err != nil && !strings.Contains(err.Error(), "unexpected exit code") { - return err - } - err = member.Start(ctx) + + err := member.Start(ctx) if err != nil { return err } return nil } -func setupGoFailpoint(host, failpoint, payload string) error { +func (f goPanicFailpoint) pickMember(t *testing.T, clus *e2e.EtcdProcessCluster) e2e.EtcdProcess { + switch f.target { + case AnyMember: + return clus.Procs[rand.Int()%len(clus.Procs)] + case Leader: + return clus.Procs[clus.WaitLeader(t)] + default: + panic("unknown target") + } +} + +func setupGoFailpoint(ctx context.Context, host, failpoint, payload string) error { failpointUrl := url.URL{ Scheme: "http", Host: host, Path: failpoint, } - r, err := http.NewRequest("PUT", failpointUrl.String(), bytes.NewBuffer([]byte(payload))) + r, err := http.NewRequestWithContext(ctx, "PUT", failpointUrl.String(), bytes.NewBuffer([]byte(payload))) if err != nil { return err } @@ -167,7 +186,7 @@ func setupGoFailpoint(host, failpoint, payload string) error { return nil } -func (f goFailpoint) Name() string { +func (f goPanicFailpoint) Name() string { return f.failpoint } diff --git a/tests/linearizability/history.go b/tests/linearizability/history.go new file mode 100644 index 000000000000..388bccd00666 --- /dev/null +++ b/tests/linearizability/history.go @@ -0,0 +1,148 @@ +// Copyright 2022 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linearizability + +import ( + "time" + + "github.com/anishathalye/porcupine" + clientv3 "go.etcd.io/etcd/client/v3" +) + +type appendableHistory struct { + // id of the next write operation. If needed a new id might be requested from idProvider. + id int + idProvider idProvider + + history +} + +func newAppendableHistory(ids idProvider) *appendableHistory { + return &appendableHistory{ + id: ids.ClientId(), + idProvider: ids, + history: history{ + successful: []porcupine.Operation{}, + failed: []porcupine.Operation{}, + }, + } +} + +func (h *appendableHistory) AppendGet(key string, start, end time.Time, resp *clientv3.GetResponse) { + var readData string + if len(resp.Kvs) == 1 { + readData = string(resp.Kvs[0].Value) + } + h.successful = append(h.successful, porcupine.Operation{ + ClientId: h.id, + Input: EtcdRequest{Op: Get, Key: key}, + Call: start.UnixNano(), + Output: EtcdResponse{GetData: readData, Revision: resp.Header.Revision}, + Return: end.UnixNano(), + }) +} + +func (h *appendableHistory) AppendPut(key, value string, start, end time.Time, resp *clientv3.PutResponse, err error) { + request := EtcdRequest{Op: Put, Key: key, PutData: value} + if err != nil { + h.appendFailed(request, start, err) + return + } + var revision int64 + if resp != nil && resp.Header != nil { + revision = resp.Header.Revision + } + h.successful = append(h.successful, porcupine.Operation{ + ClientId: h.id, + Input: EtcdRequest{Op: Put, Key: key, PutData: value}, + Call: start.UnixNano(), + Output: EtcdResponse{Err: err, Revision: revision}, + Return: end.UnixNano(), + }) +} + +func (h *appendableHistory) AppendDelete(key string, start, end time.Time, resp *clientv3.DeleteResponse, err error) { + request := EtcdRequest{Op: Delete, Key: key} + if err != nil { + h.appendFailed(request, start, err) + return + } + var revision int64 + var deleted int64 + if resp != nil && resp.Header != nil { + revision = resp.Header.Revision + deleted = resp.Deleted + } + h.successful = append(h.successful, porcupine.Operation{ + ClientId: h.id, + Input: request, + Call: start.UnixNano(), + Output: EtcdResponse{Revision: revision, Deleted: deleted, Err: err}, + Return: end.UnixNano(), + }) +} + +func (h *appendableHistory) appendFailed(request EtcdRequest, start time.Time, err error) { + h.failed = append(h.failed, porcupine.Operation{ + ClientId: h.id, + Input: request, + Call: start.UnixNano(), + Output: EtcdResponse{Err: err}, + Return: 0, // For failed writes we don't know when request has really finished. + }) + // Operations of single client needs to be sequential. + // As we don't know return time of failed operations, all new writes need to be done with new client id. + h.id = h.idProvider.ClientId() +} + +type history struct { + successful []porcupine.Operation + // failed requests are kept separate as we don't know return time of failed operations. + // Based on https://github.com/anishathalye/porcupine/issues/10 + failed []porcupine.Operation +} + +func (h history) Merge(h2 history) history { + result := history{ + successful: make([]porcupine.Operation, 0, len(h.successful)+len(h2.successful)), + failed: make([]porcupine.Operation, 0, len(h.failed)+len(h2.failed)), + } + result.successful = append(result.successful, h.successful...) + result.successful = append(result.successful, h2.successful...) + result.failed = append(result.failed, h.failed...) + result.failed = append(result.failed, h2.failed...) + return result +} + +func (h history) Operations() []porcupine.Operation { + operations := make([]porcupine.Operation, 0, len(h.successful)+len(h.failed)) + var maxTime int64 + for _, op := range h.successful { + operations = append(operations, op) + if op.Return > maxTime { + maxTime = op.Return + } + } + // Failed requests don't have a known return time. + // We simulate Infinity by using return time of latest successfully request. + for _, op := range h.failed { + if op.Call > maxTime { + continue + } + op.Return = maxTime + 1 + operations = append(operations, op) + } + return operations +} diff --git a/raft/rafttest/interaction_env_handler_log_level.go b/tests/linearizability/id.go similarity index 53% rename from raft/rafttest/interaction_env_handler_log_level.go rename to tests/linearizability/id.go index 2194c9ee1a18..4e8fa3817652 100644 --- a/raft/rafttest/interaction_env_handler_log_level.go +++ b/tests/linearizability/id.go @@ -1,4 +1,4 @@ -// Copyright 2019 The etcd Authors +// Copyright 2022 The etcd Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,26 +12,29 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rafttest +package linearizability -import ( - "fmt" - "strings" - "testing" +import "sync/atomic" - "github.com/cockroachdb/datadriven" -) +type idProvider interface { + ClientId() int + RequestId() int +} + +func newIdProvider() idProvider { + return &atomicProvider{} +} + +type atomicProvider struct { + clientId atomic.Int64 + requestId atomic.Int64 +} -func (env *InteractionEnv) handleLogLevel(t *testing.T, d datadriven.TestData) error { - return env.LogLevel(d.CmdArgs[0].Key) +func (id *atomicProvider) ClientId() int { + // Substract one as ClientId should start from zero. + return int(id.clientId.Add(1) - 1) } -func (env *InteractionEnv) LogLevel(name string) error { - for i, s := range lvlNames { - if strings.EqualFold(s, name) { - env.Output.Lvl = i - return nil - } - } - return fmt.Errorf("log levels must be either of %v", lvlNames) +func (id *atomicProvider) RequestId() int { + return int(id.requestId.Add(1)) } diff --git a/tests/linearizability/linearizability_test.go b/tests/linearizability/linearizability_test.go index 9749c5b8a78e..78780e700c8f 100644 --- a/tests/linearizability/linearizability_test.go +++ b/tests/linearizability/linearizability_test.go @@ -34,8 +34,6 @@ const ( minimalQPS = 100.0 // maximalQPS limits number of requests send to etcd to avoid linearizability analysis taking too long. maximalQPS = 200.0 - // failpointTriggersCount - failpointTriggersCount = 60 // waitBetweenFailpointTriggers waitBetweenFailpointTriggers = time.Second ) @@ -77,7 +75,8 @@ func TestLinearizability(t *testing.T) { t.Run(tc.name, func(t *testing.T) { failpoint := FailpointConfig{ failpoint: tc.failpoint, - count: failpointTriggersCount, + count: 1, + retries: 3, waitBetweenTriggers: waitBetweenFailpointTriggers, } traffic := trafficConfig{ @@ -117,8 +116,8 @@ func triggerFailpoints(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessC var err error successes := 0 failures := 0 - time.Sleep(config.waitBetweenTriggers) - for successes < config.count && failures < config.count { + for successes < config.count && failures < config.retries { + time.Sleep(config.waitBetweenTriggers) err = config.failpoint.Trigger(t, ctx, clus) if err != nil { t.Logf("Failed to trigger failpoint %q, err: %v\n", config.failpoint.Name(), err) @@ -126,24 +125,27 @@ func triggerFailpoints(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessC continue } successes++ - time.Sleep(config.waitBetweenTriggers) } - if successes < config.count || failures >= config.count { + if successes < config.count || failures >= config.retries { return fmt.Errorf("failed to trigger failpoints enough times, err: %v", err) } + time.Sleep(config.waitBetweenTriggers) return nil } type FailpointConfig struct { failpoint Failpoint count int + retries int waitBetweenTriggers time.Duration } -func simulateTraffic(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, config trafficConfig) (operations []porcupine.Operation) { +func simulateTraffic(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, config trafficConfig) []porcupine.Operation { mux := sync.Mutex{} endpoints := clus.EndpointsV3() + ids := newIdProvider() + h := history{} limiter := rate.NewLimiter(rate.Limit(config.maximalQPS), 200) startTime := time.Now() @@ -151,7 +153,7 @@ func simulateTraffic(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessClu for i := 0; i < config.clientCount; i++ { wg.Add(1) endpoints := []string{endpoints[i%len(endpoints)]} - c, err := NewClient(endpoints, i) + c, err := NewClient(endpoints, ids) if err != nil { t.Fatal(err) } @@ -159,14 +161,15 @@ func simulateTraffic(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessClu defer wg.Done() defer c.Close() - config.traffic.Run(ctx, c, limiter) + config.traffic.Run(ctx, c, limiter, ids) mux.Lock() - operations = append(operations, c.operations...) + h = h.Merge(c.history.history) mux.Unlock() }(c) } wg.Wait() endTime := time.Now() + operations := h.Operations() t.Logf("Recorded %d operations", len(operations)) qps := float64(len(operations)) / float64(endTime.Sub(startTime)) * float64(time.Second) diff --git a/tests/linearizability/model.go b/tests/linearizability/model.go index a126f7cf2a8c..ec83d59f1e1d 100644 --- a/tests/linearizability/model.go +++ b/tests/linearizability/model.go @@ -24,27 +24,29 @@ import ( type Operation string const ( - Get Operation = "get" - Put Operation = "put" + Get Operation = "get" + Put Operation = "put" + Delete Operation = "delete" ) -type etcdRequest struct { - op Operation - key string - putData string +type EtcdRequest struct { + Op Operation + Key string + PutData string } -type etcdResponse struct { - getData string - revision int64 - err error +type EtcdResponse struct { + GetData string + Revision int64 + Deleted int64 + Err error } type EtcdState struct { Key string Value string LastRevision int64 - FailedWrites map[string]struct{} + FailedWrite *EtcdRequest } var etcdModel = porcupine.Model{ @@ -55,7 +57,7 @@ var etcdModel = porcupine.Model{ if err != nil { panic(err) } - ok, state := step(state, in.(etcdRequest), out.(etcdResponse)) + ok, state := step(state, in.(EtcdRequest), out.(EtcdResponse)) data, err := json.Marshal(state) if err != nil { panic(err) @@ -63,20 +65,26 @@ var etcdModel = porcupine.Model{ return ok, string(data) }, DescribeOperation: func(in, out interface{}) string { - request := in.(etcdRequest) - response := out.(etcdResponse) - switch request.op { + request := in.(EtcdRequest) + response := out.(EtcdResponse) + switch request.Op { case Get: - if response.err != nil { - return fmt.Sprintf("get(%q) -> %q", request.key, response.err) + if response.Err != nil { + return fmt.Sprintf("get(%q) -> %q", request.Key, response.Err) } else { - return fmt.Sprintf("get(%q) -> %q, rev: %d", request.key, response.getData, response.revision) + return fmt.Sprintf("get(%q) -> %q, rev: %d", request.Key, response.GetData, response.Revision) } case Put: - if response.err != nil { - return fmt.Sprintf("put(%q, %q) -> %s", request.key, request.putData, response.err) + if response.Err != nil { + return fmt.Sprintf("put(%q, %q) -> %s", request.Key, request.PutData, response.Err) } else { - return fmt.Sprintf("put(%q, %q) -> ok, rev: %d", request.key, request.putData, response.revision) + return fmt.Sprintf("put(%q, %q) -> ok, rev: %d", request.Key, request.PutData, response.Revision) + } + case Delete: + if response.Err != nil { + return fmt.Sprintf("delete(%q) -> %s", request.Key, response.Err) + } else { + return fmt.Sprintf("delete(%q) -> ok, rev: %d deleted:%d", request.Key, response.Revision, response.Deleted) } default: return "" @@ -84,40 +92,45 @@ var etcdModel = porcupine.Model{ }, } -func step(state EtcdState, request etcdRequest, response etcdResponse) (bool, EtcdState) { - if request.key == "" { +func step(state EtcdState, request EtcdRequest, response EtcdResponse) (bool, EtcdState) { + if request.Key == "" { panic("invalid request") } if state.Key == "" { return true, initState(request, response) } - if state.Key != request.key { + if state.Key != request.Key { panic("Multiple keys not supported") } - switch request.op { + switch request.Op { case Get: return stepGet(state, request, response) case Put: return stepPut(state, request, response) + case Delete: + return stepDelete(state, request, response) default: panic("Unknown operation") } } -func initState(request etcdRequest, response etcdResponse) EtcdState { +func initState(request EtcdRequest, response EtcdResponse) EtcdState { state := EtcdState{ - Key: request.key, - LastRevision: response.revision, - FailedWrites: map[string]struct{}{}, + Key: request.Key, + LastRevision: response.Revision, } - switch request.op { + switch request.Op { case Get: - state.Value = response.getData + state.Value = response.GetData case Put: - if response.err == nil { - state.Value = request.putData + if response.Err == nil { + state.Value = request.PutData } else { - state.FailedWrites[request.putData] = struct{}{} + state.FailedWrite = &request + } + case Delete: + if response.Err != nil { + state.FailedWrite = &request } default: panic("Unknown operation") @@ -125,29 +138,76 @@ func initState(request etcdRequest, response etcdResponse) EtcdState { return state } -func stepGet(state EtcdState, request etcdRequest, response etcdResponse) (bool, EtcdState) { - if state.Value == response.getData && state.LastRevision <= response.revision { +func stepGet(state EtcdState, request EtcdRequest, response EtcdResponse) (bool, EtcdState) { + if state.Value == response.GetData && state.LastRevision == response.Revision { + state.FailedWrite = nil return true, state } - _, ok := state.FailedWrites[response.getData] - if ok && state.LastRevision < response.revision { - state.Value = response.getData - state.LastRevision = response.revision - delete(state.FailedWrites, response.getData) - return true, state + if state.FailedWrite != nil && state.LastRevision < response.Revision { + var ok bool + switch state.FailedWrite.Op { + case Get: + panic("Expected write") + case Put: + ok = response.GetData == state.FailedWrite.PutData + case Delete: + ok = response.GetData == "" + default: + panic("Unknown operation") + } + if ok { + state.Value = response.GetData + state.LastRevision = response.Revision + state.FailedWrite = nil + return true, state + } } return false, state } -func stepPut(state EtcdState, request etcdRequest, response etcdResponse) (bool, EtcdState) { - if response.err != nil { - state.FailedWrites[request.putData] = struct{}{} +func stepPut(state EtcdState, request EtcdRequest, response EtcdResponse) (bool, EtcdState) { + if response.Err != nil { + state.FailedWrite = &request + return true, state + } + if response.Revision <= state.LastRevision { + return false, state + } + if response.Revision != state.LastRevision+1 && state.FailedWrite == nil { + return false, state + } + state.Value = request.PutData + state.LastRevision = response.Revision + state.FailedWrite = nil + return true, state +} + +func stepDelete(state EtcdState, request EtcdRequest, response EtcdResponse) (bool, EtcdState) { + if response.Err != nil { + state.FailedWrite = &request return true, state } - if state.LastRevision >= response.revision { + // revision should never decrease + if response.Revision < state.LastRevision { return false, state } - state.Value = request.putData - state.LastRevision = response.revision + deleteSucceeded := response.Deleted != 0 + keySet := state.Value != "" + + // non-existent key cannot be deleted. + if deleteSucceeded != keySet && state.FailedWrite == nil { + return false, state + } + //if key was deleted, response revision should increase + if deleteSucceeded && (response.Revision != state.LastRevision+1 || !keySet) && (state.FailedWrite == nil || response.Revision < state.LastRevision+2) { + return false, state + } + //if key was not deleted, response revision should not change + if !deleteSucceeded && state.LastRevision != response.Revision && state.FailedWrite == nil { + return false, state + } + + state.Value = "" + state.LastRevision = response.Revision return true, state } diff --git a/tests/linearizability/model_test.go b/tests/linearizability/model_test.go index 61453310a983..65aca5630502 100644 --- a/tests/linearizability/model_test.go +++ b/tests/linearizability/model_test.go @@ -27,81 +27,195 @@ func TestModel(t *testing.T) { { name: "First Get can start from non-empty value and non-zero revision", operations: []testOperation{ - {req: etcdRequest{op: Get, key: "key"}, resp: etcdResponse{getData: "2", revision: 42}}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "2", Revision: 42}}, }, }, { name: "First Put can start from non-zero revision", operations: []testOperation{ - {req: etcdRequest{op: Put, key: "key", putData: "2"}, resp: etcdResponse{revision: 42}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "2"}, resp: EtcdResponse{Revision: 42}}, }, }, { - name: "Get response data should match PUT", + name: "First delete can start from non-zero revision", operations: []testOperation{ - {req: etcdRequest{op: Put, key: "key", putData: "1"}, resp: etcdResponse{revision: 1}}, - {req: etcdRequest{op: Get, key: "key"}, resp: etcdResponse{getData: "2", revision: 1}, failure: true}, - {req: etcdRequest{op: Get, key: "key"}, resp: etcdResponse{getData: "1", revision: 1}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Revision: 42}}, }, }, { - name: "Get response revision should be equal or greater then put", + name: "Get response data should match put", operations: []testOperation{ - {req: etcdRequest{op: Put, key: "key"}, resp: etcdResponse{revision: 2}}, - {req: etcdRequest{op: Get, key: "key"}, resp: etcdResponse{revision: 1}, failure: true}, - {req: etcdRequest{op: Get, key: "key"}, resp: etcdResponse{revision: 2}}, - {req: etcdRequest{op: Get, key: "key"}, resp: etcdResponse{revision: 4}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "1"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "2", Revision: 1}, failure: true}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "2", Revision: 2}, failure: true}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "1", Revision: 1}}, }, }, { - name: "Put bumps revision", + name: "Get revision should be equal to put", operations: []testOperation{ - {req: etcdRequest{op: Put, key: "key", putData: "1"}, resp: etcdResponse{revision: 1}}, - {req: etcdRequest{op: Put, key: "key", putData: "2"}, resp: etcdResponse{revision: 1}, failure: true}, - {req: etcdRequest{op: Put, key: "key", putData: "2"}, resp: etcdResponse{revision: 2}}, + {req: EtcdRequest{Op: Put, Key: "key"}, resp: EtcdResponse{Revision: 2}}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{Revision: 1}, failure: true}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{Revision: 3}, failure: true}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{Revision: 2}}, + }, + }, + { + name: "Put must increase revision by 1", + operations: []testOperation{ + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "1"}, resp: EtcdResponse{Revision: 1}, failure: true}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "1"}, resp: EtcdResponse{Revision: 3}, failure: true}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "2"}, resp: EtcdResponse{Revision: 2}}, }, }, { name: "Put can fail and be lost", operations: []testOperation{ - {req: etcdRequest{op: Put, key: "key", putData: "1"}, resp: etcdResponse{revision: 1}}, - {req: etcdRequest{op: Put, key: "key", putData: "2"}, resp: etcdResponse{err: errors.New("failed")}}, - {req: etcdRequest{op: Put, key: "key", putData: "3"}, resp: etcdResponse{revision: 2}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "1"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "2"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "1", Revision: 1}}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "2", Revision: 1}, failure: true}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "1", Revision: 2}, failure: true}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "2", Revision: 2}, failure: true}, + }, + }, + { + name: "Put can fail but be persisted and increase revision before put", + operations: []testOperation{ + // One failed request, one persisted. + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "2"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "3"}, resp: EtcdResponse{Revision: 3}}, + // Two failed request, two persisted. + {req: EtcdRequest{Op: Put, Key: "key", PutData: "4"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "5"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "6"}, resp: EtcdResponse{Revision: 6}}, + }, + }, + { + name: "Put can fail but be persisted and increase revision before get", + operations: []testOperation{ + // One failed request, one persisted. + {req: EtcdRequest{Op: Put, Key: "key", PutData: "1"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "2"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "3", Revision: 2}, failure: true}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "2", Revision: 1}, failure: true}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "2", Revision: 2}}, + // Two failed request, two persisted. + {req: EtcdRequest{Op: Put, Key: "key", PutData: "3"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "4"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "3", Revision: 3}, failure: true}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "3", Revision: 4}, failure: true}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "4", Revision: 4}}, + }, + }, + { + name: "Put can fail but be persisted and increase revision before delete", + operations: []testOperation{ + // One failed request, one persisted. + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "2"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Deleted: 1, Revision: 1}, failure: true}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Deleted: 1, Revision: 2}, failure: true}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Deleted: 1, Revision: 3}}, + // Two failed request, two persisted. + {req: EtcdRequest{Op: Put, Key: "key", PutData: "4"}, resp: EtcdResponse{Revision: 4}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "5"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "6"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Deleted: 1, Revision: 7}}, + // Two failed request, one persisted. + {req: EtcdRequest{Op: Put, Key: "key", PutData: "8"}, resp: EtcdResponse{Revision: 8}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "9"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "10"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Deleted: 1, Revision: 10}}, + }, + }, + { + name: "Delete only increases revision on success", + operations: []testOperation{ + {req: EtcdRequest{Op: Put, Key: "key", PutData: "1"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Deleted: 1, Revision: 1}, failure: true}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Deleted: 1, Revision: 2}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Deleted: 0, Revision: 3}, failure: true}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Deleted: 0, Revision: 2}}, + }, + }, + { + name: "Delete clears value", + operations: []testOperation{ + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "1", Revision: 1}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Deleted: 1, Revision: 2}}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "1", Revision: 1}, failure: true}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "1", Revision: 2}, failure: true}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{Revision: 2}}, + }, + }, + { + name: "Delete can fail and be lost before get", + operations: []testOperation{ + {req: EtcdRequest{Op: Put, Key: "key", PutData: "1"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{GetData: "1", Revision: 1}}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{Revision: 2}, failure: true}, + }, + }, + { + name: "Delete can fail and be lost before delete", + operations: []testOperation{ + {req: EtcdRequest{Op: Put, Key: "key", PutData: "1"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Deleted: 1, Revision: 1}, failure: true}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Deleted: 1, Revision: 2}}, }, }, { - name: "Put can fail but bump revision", + name: "Delete can fail and be lost before put", operations: []testOperation{ - {req: etcdRequest{op: Put, key: "key", putData: "1"}, resp: etcdResponse{revision: 1}}, - {req: etcdRequest{op: Put, key: "key", putData: "2"}, resp: etcdResponse{err: errors.New("failed")}}, - {req: etcdRequest{op: Put, key: "key", putData: "3"}, resp: etcdResponse{revision: 3}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "1"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "2"}, resp: EtcdResponse{Revision: 2}}, }, }, { - name: "Put can fail but be persisted and bump revision", + name: "Delete can fail but be persisted before get", operations: []testOperation{ - {req: etcdRequest{op: Put, key: "key", putData: "1"}, resp: etcdResponse{revision: 1}}, - {req: etcdRequest{op: Put, key: "key", putData: "2"}, resp: etcdResponse{err: errors.New("failed")}}, - {req: etcdRequest{op: Get, key: "key"}, resp: etcdResponse{getData: "2", revision: 1}, failure: true}, - {req: etcdRequest{op: Get, key: "key"}, resp: etcdResponse{getData: "2", revision: 2}}, + // One failed request, one persisted. + {req: EtcdRequest{Op: Put, Key: "key", PutData: "1"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{Revision: 2}}, + // Two failed request, one persisted. + {req: EtcdRequest{Op: Put, Key: "key", PutData: "3"}, resp: EtcdResponse{Revision: 3}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Get, Key: "key"}, resp: EtcdResponse{Revision: 4}}, }, }, { - name: "Put can fail but be persisted later", + name: "Delete can fail but be persisted before put", operations: []testOperation{ - {req: etcdRequest{op: Put, key: "key", putData: "1"}, resp: etcdResponse{err: errors.New("failed")}}, - {req: etcdRequest{op: Put, key: "key", putData: "2"}, resp: etcdResponse{revision: 2}}, - {req: etcdRequest{op: Get, key: "key"}, resp: etcdResponse{getData: "2", revision: 2}}, - {req: etcdRequest{op: Get, key: "key"}, resp: etcdResponse{getData: "1", revision: 3}}, + // One failed request, one persisted. + {req: EtcdRequest{Op: Put, Key: "key", PutData: "1"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "3"}, resp: EtcdResponse{Revision: 3}}, + // Two failed request, one persisted. + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "5"}, resp: EtcdResponse{Revision: 5}}, }, }, { - name: "Put can fail but bump revision later", + name: "Delete can fail but be persisted before delete", operations: []testOperation{ - {req: etcdRequest{op: Put, key: "key", putData: "1"}, resp: etcdResponse{err: errors.New("failed")}}, - {req: etcdRequest{op: Put, key: "key", putData: "2"}, resp: etcdResponse{revision: 2}}, - {req: etcdRequest{op: Get, key: "key"}, resp: etcdResponse{getData: "2", revision: 2}}, - {req: etcdRequest{op: Put, key: "key", putData: "3"}, resp: etcdResponse{revision: 4}}, + // One failed request, one persisted. + {req: EtcdRequest{Op: Put, Key: "key", PutData: "1"}, resp: EtcdResponse{Revision: 1}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Revision: 2}}, + {req: EtcdRequest{Op: Put, Key: "key", PutData: "3"}, resp: EtcdResponse{Revision: 3}}, + // Two failed request, one persisted. + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Err: errors.New("failed")}}, + {req: EtcdRequest{Op: Delete, Key: "key"}, resp: EtcdResponse{Revision: 4}}, }, }, } @@ -121,7 +235,7 @@ func TestModel(t *testing.T) { } type testOperation struct { - req etcdRequest - resp etcdResponse + req EtcdRequest + resp EtcdResponse failure bool } diff --git a/tests/linearizability/traffic.go b/tests/linearizability/traffic.go index 83c0e101ca56..413f3b8e2e53 100644 --- a/tests/linearizability/traffic.go +++ b/tests/linearizability/traffic.go @@ -24,11 +24,11 @@ import ( ) var ( - DefaultTraffic Traffic = readWriteSingleKey{key: "key", writes: []opChance{{operation: Put, chance: 100}}} + DefaultTraffic Traffic = readWriteSingleKey{key: "key", writes: []opChance{{operation: Put, chance: 90}, {operation: Delete, chance: 10}}} ) type Traffic interface { - Run(ctx context.Context, c *recordingClient, limiter *rate.Limiter) + Run(ctx context.Context, c *recordingClient, limiter *rate.Limiter, ids idProvider) } type readWriteSingleKey struct { @@ -41,12 +41,9 @@ type opChance struct { chance int } -func (t readWriteSingleKey) Run(ctx context.Context, c *recordingClient, limiter *rate.Limiter) { - maxOperationsPerClient := 1000000 - minId := maxOperationsPerClient * c.id - maxId := maxOperationsPerClient * (c.id + 1) +func (t readWriteSingleKey) Run(ctx context.Context, c *recordingClient, limiter *rate.Limiter, ids idProvider) { - for writeId := minId; writeId < maxId; { + for { select { case <-ctx.Done(): return @@ -58,10 +55,8 @@ func (t readWriteSingleKey) Run(ctx context.Context, c *recordingClient, limiter continue } // Provide each write with unique id to make it easier to validate operation history. - t.Write(ctx, c, limiter, writeId) - writeId++ + t.Write(ctx, c, limiter, ids.RequestId()) } - return } func (t readWriteSingleKey) Read(ctx context.Context, c *recordingClient, limiter *rate.Limiter) error { @@ -81,6 +76,8 @@ func (t readWriteSingleKey) Write(ctx context.Context, c *recordingClient, limit switch t.pickWriteOperation() { case Put: err = c.Put(putCtx, t.key, fmt.Sprintf("%d", id)) + case Delete: + err = c.Delete(putCtx, t.key) default: panic("invalid operation") } diff --git a/tools/etcd-dump-logs/etcd-dump-log_test.go b/tools/etcd-dump-logs/etcd-dump-log_test.go index 5aa15ee9266f..bc57887a2866 100644 --- a/tools/etcd-dump-logs/etcd-dump-log_test.go +++ b/tools/etcd-dump-logs/etcd-dump-log_test.go @@ -29,8 +29,8 @@ import ( "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/client/pkg/v3/fileutil" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/storage/wal" + "go.etcd.io/raft/v3/raftpb" ) func TestEtcdDumpLogEntryType(t *testing.T) { diff --git a/tools/etcd-dump-logs/main.go b/tools/etcd-dump-logs/main.go index 3c64d338ebb9..bdd7c36ce255 100644 --- a/tools/etcd-dump-logs/main.go +++ b/tools/etcd-dump-logs/main.go @@ -34,10 +34,10 @@ import ( "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/client/pkg/v3/types" "go.etcd.io/etcd/pkg/v3/pbutil" - "go.etcd.io/etcd/raft/v3/raftpb" "go.etcd.io/etcd/server/v3/etcdserver/api/snap" "go.etcd.io/etcd/server/v3/storage/wal" "go.etcd.io/etcd/server/v3/storage/wal/walpb" + "go.etcd.io/raft/v3/raftpb" ) const ( diff --git a/tools/mod/go.mod b/tools/mod/go.mod index 14593baa04bc..cf2ecb037e6c 100644 --- a/tools/mod/go.mod +++ b/tools/mod/go.mod @@ -17,6 +17,7 @@ require ( github.com/mikefarah/yq/v4 v4.24.2 go.etcd.io/gofail v0.0.0-20221125214112-fc21f61ba88a go.etcd.io/protodoc v0.0.0-20180829002748-484ab544e116 + go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a gotest.tools/gotestsum v1.7.0 gotest.tools/v3 v3.1.0 honnef.co/go/tools v0.3.0 @@ -49,7 +50,7 @@ require ( github.com/go-stack/stack v1.8.0 // indirect github.com/goccy/go-yaml v1.9.5 // indirect github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b // indirect - github.com/golang/protobuf v1.3.3 // indirect + github.com/golang/protobuf v1.5.2 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/jinzhu/copier v0.3.5 // indirect @@ -82,6 +83,7 @@ require ( golang.org/x/tools v0.1.12 // indirect golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884 // indirect + google.golang.org/protobuf v1.27.1 // indirect gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/tools/mod/go.sum b/tools/mod/go.sum index 954fa24acc90..f9052aac9553 100644 --- a/tools/mod/go.sum +++ b/tools/mod/go.sum @@ -19,12 +19,16 @@ github.com/asaskevich/govalidator v0.0.0-20200907205600-7a23bdc65eef/go.mod h1:W github.com/bmatcuk/doublestar/v4 v4.0.2 h1:X0krlUVAVmtr2cRoTqR8aDMrDqnB36ht8wpWTiQ3jsA= github.com/bmatcuk/doublestar/v4 v4.0.2/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054 h1:uH66TXeswKn5PW5zdZ39xEwfS9an067BirqA+P4QaLI= github.com/chavacava/garif v0.0.0-20220316182200-5cad0b5181d4 h1:tFXjAxje9thrTF4h57Ckik+scJjTWdwAtZqZPtOT48M= github.com/chavacava/garif v0.0.0-20220316182200-5cad0b5181d4/go.mod h1:W8EnPSQ8Nv4fUjc/v1/8tHFqhuOJXnRub0dTfuAQktU= github.com/chzchzchz/goword v0.0.0-20170907005317-a9744cb52b03 h1:0wUHjDfbCAROEAZ96zAJGwcNMkPIheFaIjtQyv3QqfM= github.com/chzchzchz/goword v0.0.0-20170907005317-a9744cb52b03/go.mod h1:uFE9hX+zXEwvyUThZ4gDb9vkAwc5DoHUnRSEpH0VrOs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5 h1:xD/lrqdvwsc+O2bjSSi3YqY73Ke3LAiSCx49aCesA0E= +github.com/cockroachdb/errors v1.2.4 h1:Lap807SXTH5tri2TivECb/4abUkMZC9zRoLarvcKDqs= +github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f h1:o/kfcElHqOiXqcou5a3rIlMc7oJbMQkeLk0VQJ7zgqY= github.com/coreos/license-bill-of-materials v0.0.0-20190913234955-13baff47494e h1:vHRufSa2k8tfkcDdia1vJFa+oiBvvPxW94mg76PPAoA= github.com/coreos/license-bill-of-materials v0.0.0-20190913234955-13baff47494e/go.mod h1:4xMOusJ7xxc84WclVxKT8+lNfGYDwojOUC2OQNCwcj4= github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= @@ -47,6 +51,7 @@ github.com/fatih/structtag v1.2.0 h1:/OdNE99OxoI/PqaW/SuSK9uxxT3f/tcSZgon/ssNSx4 github.com/fatih/structtag v1.2.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/getsentry/raven-go v0.2.0 h1:no+xWJRb5ZI7eE8TWgIq1jLulQiIoLG0IfYxv5JYMGs= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-openapi/analysis v0.21.2 h1:hXFrOYFHUAMQdu6zwAiKKJHJQ8kqZs1ux/ru1P1wLJU= @@ -107,16 +112,18 @@ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfU github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.3 h1:gyjaxf+svBWX08ZjK86iN9geUJF0H6gp2IRKX6Nf6/I= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/addlicense v1.0.0 h1:cqvo5suPWlsk6r6o42Fs2K66xYCl2tnhVPUYoP3EnO4= github.com/google/addlicense v1.0.0/go.mod h1:Sm/DHu7Jk+T5miFHHehdIjbi4M5+dJDRS3Cq0rncIxA= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -242,6 +249,8 @@ go.etcd.io/gofail v0.0.0-20221125214112-fc21f61ba88a h1:+VXd5SHsxTf5hPVx4YsE358C go.etcd.io/gofail v0.0.0-20221125214112-fc21f61ba88a/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M= go.etcd.io/protodoc v0.0.0-20180829002748-484ab544e116 h1:QQiUXlqz+d96jyNG71NE+IGTgOK6Xlhdx+PzvfbLHlQ= go.etcd.io/protodoc v0.0.0-20180829002748-484ab544e116/go.mod h1:F9kog+iVAuvPJucb1dkYcDcbV0g4uyGEHllTP5NrXiw= +go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a h1:Znv2XJyAf/fsJsFNt9toO8uyXwwHQ44wxqsvdSxipj4= +go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a/go.mod h1:eMshmuwXLWZrjHXN8ZgYrOMQRSbHqi5M84DEZWhG+o4= go.mongodb.org/mongo-driver v1.7.3 h1:G4l/eYY9VrQAK/AUgkV0koQKzQnyddnWxrd/Etf0jIs= go.mongodb.org/mongo-driver v1.7.3/go.mod h1:NqaYOwnXWr5Pm7AOpO5QFxKJ503nbMse/R79oO62zWg= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -364,6 +373,10 @@ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= diff --git a/tools/mod/tools.go b/tools/mod/tools.go index c2d1ce5819ec..61f54b50cc79 100644 --- a/tools/mod/tools.go +++ b/tools/mod/tools.go @@ -36,6 +36,7 @@ import ( _ "github.com/mikefarah/yq/v4" _ "go.etcd.io/gofail" _ "go.etcd.io/protodoc" + _ "go.etcd.io/raft/v3" _ "gotest.tools/gotestsum" _ "gotest.tools/v3" _ "honnef.co/go/tools/cmd/staticcheck"