diff --git a/auth/auth.go b/auth/auth.go index 2572a2ddfa..118d9fbc1f 100644 --- a/auth/auth.go +++ b/auth/auth.go @@ -32,11 +32,12 @@ type Authenticator struct { } type AuthenticatorOptions struct { - ClientPartitionWindow time.Duration - ChannelsWarningThreshold *uint32 - SessionCookieName string - BcryptCost int - LogCtx context.Context + ClientPartitionWindow time.Duration + ChannelsWarningThreshold *uint32 + ServerlessChannelThreshold uint32 + SessionCookieName string + BcryptCost int + LogCtx context.Context // Collections defines the set of collections used by the authenticator when rebuilding channels. // Channels are only recomputed for collections included in this set. @@ -196,6 +197,17 @@ func (auth *Authenticator) getPrincipal(docID string, factory func() Principal) } changed = true } + // If the channel threshold has been set we need to check the inherited channels across all scopes and collections against the limit + if auth.ServerlessChannelThreshold != 0 { + channelsLength, err := auth.getInheritedChannelsLength(user) + if err != nil { + return nil, nil, false, err + } + err = auth.checkChannelLimits(channelsLength, user) + if err != nil { + return nil, nil, false, err + } + } } if changed { @@ -223,6 +235,73 @@ func (auth *Authenticator) getPrincipal(docID string, factory func() Principal) return princ, nil } +// inheritedCollectionChannels returns channels for a given scope + collection +func (auth *Authenticator) inheritedCollectionChannels(user User, scope, collection string) (ch.TimedSet, error) { + roles, err := auth.getUserRoles(user) + if err != nil { + return nil, err + } + + channels := user.CollectionChannels(scope, collection) + for _, role := range roles { + roleSince := user.RoleNames()[role.Name()] + channels.AddAtSequence(role.CollectionChannels(scope, collection), roleSince.Sequence) + } + return channels, nil +} + +// getInheritedChannelsLength returns number of channels a user has access to across all collections +func (auth *Authenticator) getInheritedChannelsLength(user User) (int, error) { + var cumulativeChannels int + for scope, collections := range auth.Collections { + for collection := range collections { + channels, err := auth.inheritedCollectionChannels(user, scope, collection) + if err != nil { + return 0, err + } + cumulativeChannels += len(channels) + } + } + return cumulativeChannels, nil +} + +// checkChannelLimits logs a warning when the warning threshold is met and will return an error when the channel limit is met +func (auth *Authenticator) checkChannelLimits(channels int, user User) error { + // Error if ServerlessChannelThreshold is set and is >= than the threshold + if uint32(channels) >= auth.ServerlessChannelThreshold { + base.ErrorfCtx(auth.LogCtx, "User ID: %v channel count: %d exceeds %d for channels per user threshold. Auth will be rejected until rectified", + base.UD(user.Name()), channels, auth.ServerlessChannelThreshold) + return base.ErrMaximumChannelsForUserExceeded + } + + // This function is likely to be called once per session when a channel limit is applied, the sync once + // applied here ensures we don't fill logs with warnings about being over warning threshold. We may want + // to revisit this implementation around the warning threshold in future + user.GetWarnChanSync().Do(func() { + if channelsPerUserThreshold := auth.ChannelsWarningThreshold; channelsPerUserThreshold != nil { + if uint32(channels) >= *channelsPerUserThreshold { + base.WarnfCtx(auth.LogCtx, "User ID: %v channel count: %d exceeds %d for channels per user warning threshold", + base.UD(user.Name()), channels, *channelsPerUserThreshold) + } + } + }) + return nil +} + +// getUserRoles gets all roles a user has been granted +func (auth *Authenticator) getUserRoles(user User) ([]Role, error) { + roles := make([]Role, 0, len(user.RoleNames())) + for name := range user.RoleNames() { + role, err := auth.GetRole(name) + if err != nil { + return nil, err + } else if role != nil { + roles = append(roles, role) + } + } + return roles, nil +} + // Rebuild channels computes the full set of channels for all collections defined for the authenticator. // For each collection in Authenticator.collections: // - if there is no CollectionAccess on the principal for the collection, rebuilds channels for that collection @@ -230,6 +309,7 @@ func (auth *Authenticator) getPrincipal(docID string, factory func() Principal) func (auth *Authenticator) rebuildChannels(princ Principal) (changed bool, err error) { changed = false + for scope, collections := range auth.Collections { for collection, _ := range collections { // If collection channels are nil, they have been invalidated and must be rebuilt @@ -242,6 +322,7 @@ func (auth *Authenticator) rebuildChannels(princ Principal) (changed bool, err e } } } + return changed, nil } diff --git a/auth/auth_test.go b/auth/auth_test.go index d6d7079372..f1378b763d 100644 --- a/auth/auth_test.go +++ b/auth/auth_test.go @@ -2752,6 +2752,117 @@ func TestObtainChannelsForDeletedRole(t *testing.T) { } } +func TestServerlessChannelLimitsRoles(t *testing.T) { + testCases := []struct { + Name string + Collection bool + }{ + { + Name: "Single role", + }, + { + Name: "Muliple roles", + }, + } + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + testBucket := base.GetTestBucket(t) + defer testBucket.Close() + dataStore := testBucket.GetSingleDataStore() + var role2 Role + + opts := DefaultAuthenticatorOptions() + opts.ServerlessChannelThreshold = 5 + opts.Collections = map[string]map[string]struct{}{ + "scope1": {"collection1": struct{}{}, "collection2": struct{}{}}, + } + auth := NewAuthenticator(dataStore, nil, opts) + user1, err := auth.NewUser("user1", "pass", ch.BaseSetOf(t, "ABC")) + require.NoError(t, err) + err = auth.Save(user1) + require.NoError(t, err) + _, err = auth.AuthenticateUser("user1", "pass") + require.NoError(t, err) + + role1, err := auth.NewRole("role1", nil) + require.NoError(t, err) + if testCase.Name == "Single role" { + user1.SetExplicitRoles(ch.TimedSet{"role1": ch.NewVbSimpleSequence(1)}, 1) + require.NoError(t, auth.Save(user1)) + _, err = auth.AuthenticateUser("user1", "pass") + require.NoError(t, err) + + role1.SetCollectionExplicitChannels("scope1", "collection1", ch.AtSequence(ch.BaseSetOf(t, "ABC", "DEF", "GHI", "JKL"), 1), 1) + require.NoError(t, auth.Save(role1)) + } else { + role2, err = auth.NewRole("role2", nil) + require.NoError(t, err) + user1.SetExplicitRoles(ch.TimedSet{"role1": ch.NewVbSimpleSequence(1), "role2": ch.NewVbSimpleSequence(1)}, 1) + require.NoError(t, auth.Save(user1)) + role1.SetCollectionExplicitChannels("scope1", "collection1", ch.AtSequence(ch.BaseSetOf(t, "ABC", "DEF", "GHI", "JKL"), 1), 1) + role2.SetCollectionExplicitChannels("scope1", "collection2", ch.AtSequence(ch.BaseSetOf(t, "MNO", "PQR"), 1), 1) + require.NoError(t, auth.Save(role1)) + require.NoError(t, auth.Save(role2)) + } + _, err = auth.AuthenticateUser("user1", "pass") + require.Error(t, err) + }) + } +} + +func TestServerlessChannelLimits(t *testing.T) { + + testCases := []struct { + Name string + Collection bool + }{ + { + Name: "Collection not enabled", + Collection: false, + }, + { + Name: "Collection is enabled", + Collection: true, + }, + } + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + testBucket := base.GetTestBucket(t) + defer testBucket.Close() + dataStore := testBucket.GetSingleDataStore() + + opts := DefaultAuthenticatorOptions() + opts.ServerlessChannelThreshold = 5 + if testCase.Collection { + opts.Collections = map[string]map[string]struct{}{ + "scope1": {"collection1": struct{}{}, "collection2": struct{}{}}, + } + } + auth := NewAuthenticator(dataStore, nil, opts) + user1, err := auth.NewUser("user1", "pass", ch.BaseSetOf(t, "ABC")) + require.NoError(t, err) + err = auth.Save(user1) + require.NoError(t, err) + _, err = auth.AuthenticateUser("user1", "pass") + require.NoError(t, err) + + if !testCase.Collection { + user1.SetCollectionExplicitChannels("_default", "_default", ch.AtSequence(ch.BaseSetOf(t, "ABC", "DEF", "GHI", "JKL", "MNO", "PQR"), 1), 1) + err = auth.Save(user1) + require.NoError(t, err) + } else { + user1.SetCollectionExplicitChannels("scope1", "collection1", ch.AtSequence(ch.BaseSetOf(t, "ABC", "DEF", "GHI", "JKL"), 1), 1) + user1.SetCollectionExplicitChannels("scope1", "collection2", ch.AtSequence(ch.BaseSetOf(t, "MNO", "PQR"), 1), 1) + err = auth.Save(user1) + require.NoError(t, err) + } + _, err = auth.AuthenticateUser("user1", "pass") + require.Error(t, err) + assert.Contains(t, err.Error(), base.ErrMaximumChannelsForUserExceeded.Error()) + }) + } +} + func TestInvalidateRoles(t *testing.T) { testBucket := base.GetTestBucket(t) defer testBucket.Close() diff --git a/auth/principal.go b/auth/principal.go index 0f9d83ef7d..39c439c749 100644 --- a/auth/principal.go +++ b/auth/principal.go @@ -9,6 +9,7 @@ package auth import ( + "sync" "time" "github.com/couchbase/sync_gateway/base" @@ -125,6 +126,8 @@ type User interface { InitializeRoles() + GetWarnChanSync() *sync.Once + revokedChannels(since uint64, lowSeq uint64, triggeredBy uint64) RevokedChannels // Obtains the period over which the user had access to the given channel. Either directly or via a role. diff --git a/auth/user.go b/auth/user.go index 34582e76d3..54a2ca9090 100644 --- a/auth/user.go +++ b/auth/user.go @@ -183,6 +183,10 @@ func (user *userImpl) SetEmail(email string) error { return nil } +func (user *userImpl) GetWarnChanSync() *sync.Once { + return &user.warnChanThresholdOnce +} + func (user *userImpl) RoleNames() ch.TimedSet { if user.RoleInvalSeq != 0 { return nil diff --git a/base/bootstrap.go b/base/bootstrap.go index ed7126e868..33ea8e3749 100644 --- a/base/bootstrap.go +++ b/base/bootstrap.go @@ -53,11 +53,11 @@ type CouchbaseCluster struct { clusterOptions gocb.ClusterOptions forcePerBucketAuth bool // Forces perBucketAuth authenticators to be used to connect to the bucket perBucketAuth map[string]*gocb.Authenticator - bucketConnectionMode BucketConnectionMode // Whether to cache cluster connections - cachedClusterConnection *gocb.Cluster // Cached cluster connection, should only be used by GetConfigBuckets - cachedBucketConnections map[string]*cachedBucket // Per-bucket cached connections - cachedConnectionLock sync.Mutex // mutex for access to cachedBucketConnections - configPersistence ConfigPersistence // ConfigPersistence mode + bucketConnectionMode BucketConnectionMode // Whether to cache cluster connections + cachedClusterConnection *gocb.Cluster // Cached cluster connection, should only be used by GetConfigBuckets + cachedBucketConnections cachedBucketConnections // Per-bucket cached connections + cachedConnectionLock sync.Mutex // mutex for access to cachedBucketConnections + configPersistence ConfigPersistence // ConfigPersistence mode } type BucketConnectionMode int @@ -70,13 +70,73 @@ const ( ) type cachedBucket struct { - bucket *gocb.Bucket - teardownFn func() + bucket *gocb.Bucket // underlying bucket + bucketCloseFn func() // teardown function which will close the gocb connection + refcount int // count of how many functions are using this cachedBucket + shouldClose bool // mark this cachedBucket as needing to be closed with ref } -// noopTeardown is returned by getBucket when using a cached bucket - these buckets are torn down -// when CouchbaseCluster.Close is called. -func noopTeardown() {} +// cahedBucketConnections is a lockable map cached buckets containing refcounts +type cachedBucketConnections struct { + buckets map[string]*cachedBucket + lock sync.Mutex +} + +// removeOutdatedBuckets marks any active buckets for closure and removes the cached connections. +func (c *cachedBucketConnections) removeOutdatedBuckets(activeBuckets Set) { + c.lock.Lock() + defer c.lock.Unlock() + for bucketName, bucket := range c.buckets { + _, exists := activeBuckets[bucketName] + if exists { + continue + } + bucket.shouldClose = true + c._teardown(bucketName) + } +} + +// closeAll removes all cached bucekts +func (c *cachedBucketConnections) closeAll() { + c.lock.Lock() + defer c.lock.Unlock() + for _, bucket := range c.buckets { + bucket.shouldClose = true + bucket.bucketCloseFn() + } +} + +// teardown closes the cached bucket connection while locked, suitable for CouchbaseCluster.getBucket() teardowns +func (c *cachedBucketConnections) teardown(bucketName string) { + c.lock.Lock() + defer c.lock.Unlock() + c.buckets[bucketName].refcount-- + c._teardown(bucketName) +} + +// _teardown closes expects the lock to be acquired before calling this function and the reference count to be up to date. +func (c *cachedBucketConnections) _teardown(bucketName string) { + if !c.buckets[bucketName].shouldClose || c.buckets[bucketName].refcount > 0 { + return + } + c.buckets[bucketName].bucketCloseFn() + delete(c.buckets, bucketName) +} + +// get returns a cachedBucket for a given bucketName, or nil if it doesn't exist +func (c *cachedBucketConnections) _get(bucketName string) *cachedBucket { + bucket, ok := c.buckets[bucketName] + if !ok { + return nil + } + c.buckets[bucketName].refcount++ + return bucket +} + +// set adds a cachedBucket for a given bucketName, or nil if it doesn't exist +func (c *cachedBucketConnections) _set(bucketName string, bucket *cachedBucket) { + c.buckets[bucketName] = bucket +} var _ BootstrapConnection = &CouchbaseCluster{} @@ -128,7 +188,7 @@ func NewCouchbaseCluster(server, username, password, } if bucketMode == CachedClusterConnections { - cbCluster.cachedBucketConnections = make(map[string]*cachedBucket) + cbCluster.cachedBucketConnections = cachedBucketConnections{buckets: make(map[string]*cachedBucket)} } cbCluster.configPersistence = &DocumentBootstrapPersistence{} @@ -243,6 +303,8 @@ func (cc *CouchbaseCluster) GetConfigBuckets() ([]string, error) { bucketList = append(bucketList, bucketName) } + cc.cachedBucketConnections.removeOutdatedBuckets(SetOf(bucketList...)) + return bucketList, nil } @@ -399,13 +461,11 @@ func (cc *CouchbaseCluster) KeyExists(location, docID string) (exists bool, err // Close calls teardown for any cached buckets and removes from cachedBucketConnections func (cc *CouchbaseCluster) Close() { + cc.cachedBucketConnections.closeAll() + cc.cachedConnectionLock.Lock() defer cc.cachedConnectionLock.Unlock() - for bucketName, cachedBucket := range cc.cachedBucketConnections { - cachedBucket.teardownFn() - delete(cc.cachedBucketConnections, bucketName) - } if cc.cachedClusterConnection != nil { _ = cc.cachedClusterConnection.Close(nil) cc.cachedClusterConnection = nil @@ -418,36 +478,28 @@ func (cc *CouchbaseCluster) getBucket(bucketName string) (b *gocb.Bucket, teardo return cc.connectToBucket(bucketName) } - cc.cachedConnectionLock.Lock() - defer cc.cachedConnectionLock.Unlock() - - cacheBucket, ok := cc.cachedBucketConnections[bucketName] - if ok { - return cacheBucket.bucket, noopTeardown, nil + teardownFn = func() { + cc.cachedBucketConnections.teardown(bucketName) + } + cc.cachedBucketConnections.lock.Lock() + defer cc.cachedBucketConnections.lock.Unlock() + bucket := cc.cachedBucketConnections._get(bucketName) + if bucket != nil { + return bucket.bucket, teardownFn, nil } // cached bucket not found, connect and add - newBucket, newTeardownFn, err := cc.connectToBucket(bucketName) + newBucket, bucketCloseFn, err := cc.connectToBucket(bucketName) if err != nil { return nil, nil, err } - cc.cachedBucketConnections[bucketName] = &cachedBucket{ - bucket: newBucket, - teardownFn: newTeardownFn, - } - return newBucket, noopTeardown, nil -} - -// For unrecoverable errors when using cached buckets, remove the bucket from the cache to trigger a new connection on next usage -func (cc *CouchbaseCluster) onCachedBucketError(bucketName string) { + cc.cachedBucketConnections._set(bucketName, &cachedBucket{ + bucket: newBucket, + bucketCloseFn: bucketCloseFn, + refcount: 1, + }) - cc.cachedConnectionLock.Lock() - defer cc.cachedConnectionLock.Unlock() - cacheBucket, ok := cc.cachedBucketConnections[bucketName] - if ok { - cacheBucket.teardownFn() - delete(cc.cachedBucketConnections, bucketName) - } + return newBucket, teardownFn, nil } // connectToBucket establishes a new connection to a bucket, and returns the bucket after waiting for it to be ready. diff --git a/base/bootstrap_test.go b/base/bootstrap_test.go index 4c29b1ddb3..368b224501 100644 --- a/base/bootstrap_test.go +++ b/base/bootstrap_test.go @@ -9,6 +9,8 @@ package base import ( + "strings" + "sync" "testing" "github.com/imdario/mergo" @@ -33,3 +35,91 @@ func TestMergeStructPointer(t *testing.T) { assert.Equal(t, "changed", source.Ptr.S) assert.Equal(t, IntPtr(5), source.Ptr.I) } + +func TestBootstrapRefCounting(t *testing.T) { + if UnitTestUrlIsWalrus() { + t.Skip("Test requires making a connection to CBS") + } + // Integration tests are configured to run in these parameters, they are used in main_test_bucket_pool.go + // Future enhancement would be to allow all integration tests to run with TLS + x509CertPath := "" + x509KeyPath := "" + caCertPath := "" + forcePerBucketAuth := false + tlsSkipVerify := BoolPtr(false) + var perBucketCredentialsConfig map[string]*CredentialsConfig + + cluster, err := NewCouchbaseCluster(UnitTestUrl(), TestClusterUsername(), TestClusterPassword(), x509CertPath, x509KeyPath, caCertPath, forcePerBucketAuth, perBucketCredentialsConfig, tlsSkipVerify, BoolPtr(TestUseXattrs()), CachedClusterConnections) + require.NoError(t, err) + defer cluster.Close() + require.NotNil(t, cluster) + + clusterConnection, err := cluster.getClusterConnection() + require.NoError(t, err) + require.NotNil(t, clusterConnection) + + buckets, err := cluster.GetConfigBuckets() + require.NoError(t, err) + var testBuckets []string + for _, bucket := range buckets { + if strings.HasPrefix(bucket, tbpBucketNamePrefix) { + testBuckets = append(testBuckets, bucket) + } + + } + require.Len(t, testBuckets, tbpNumBuckets()) + // GetConfigBuckets doesn't cache connections, it uses cluster connection to determine number of buckets + require.Len(t, cluster.cachedBucketConnections.buckets, 0) + + primeBucketConnectionCache := func(bucketNames []string) { + // Bucket CRUD ops do cache connections + for _, bucketName := range bucketNames { + exists, err := cluster.KeyExists(bucketName, "keyThatDoesNotExist") + require.NoError(t, err) + require.False(t, exists) + } + } + + primeBucketConnectionCache(buckets) + require.Len(t, cluster.cachedBucketConnections.buckets, len(buckets)) + + // call removeOutdatedBuckets as no-op + cluster.cachedBucketConnections.removeOutdatedBuckets(SetOf(buckets...)) + require.Len(t, cluster.cachedBucketConnections.buckets, len(buckets)) + + // call removeOutdatedBuckets to remove all cached buckets, call multiple times to make sure idempotent + for i := 0; i < 3; i++ { + cluster.cachedBucketConnections.removeOutdatedBuckets(Set{}) + require.Len(t, cluster.cachedBucketConnections.buckets, 0) + } + + primeBucketConnectionCache(buckets) + require.Len(t, cluster.cachedBucketConnections.buckets, len(buckets)) + + // make sure that you can still use an active connection while the bucket has been removed + wg := sync.WaitGroup{} + wg.Add(1) + makeConnection := make(chan struct{}) + go func() { + defer wg.Done() + b, teardown, err := cluster.getBucket(buckets[0]) + defer teardown() + require.NoError(t, err) + require.NotNil(t, b) + <-makeConnection + // make sure that we can still use bucket after it is no longer cached + exists, err := cluster.configPersistence.keyExists(b.DefaultCollection(), "keyThatDoesNotExist") + require.NoError(t, err) + require.False(t, exists) + }() + + cluster.cachedBucketConnections.removeOutdatedBuckets(Set{}) + require.Len(t, cluster.cachedBucketConnections.buckets, 0) + makeConnection <- struct{}{} + + wg.Wait() + + // make sure you can "remove" a non existent bucket in the case that bucket removal is called multiple times + cluster.cachedBucketConnections.removeOutdatedBuckets(SetOf("not-a-bucket")) + +} diff --git a/base/bucket_gocb_test.go b/base/bucket_gocb_test.go index 9d56239766..8490ec0204 100644 --- a/base/bucket_gocb_test.go +++ b/base/bucket_gocb_test.go @@ -362,22 +362,35 @@ func TestIncrCounter(t *testing.T) { } }() - // New Counter - incr 1, default 1 - value, err := dataStore.Incr(key, 1, 1, 0) - assert.NoError(t, err, "Error incrementing non-existent counter") - - // key did not exist - so expect the "initial" value of 1 - assert.Equal(t, uint64(1), value) + // New Counter - incr 0, default 0 - expect zero-value counter doc to be created + value, err := dataStore.Incr(key, 0, 0, 0) + require.NoError(t, err, "Error incrementing non-existent counter") + require.Equal(t, uint64(0), value) // Retrieve existing counter value using GetCounter retrieval, err := GetCounter(dataStore, key) - assert.NoError(t, err, "Error retrieving value for existing counter") - assert.Equal(t, uint64(1), retrieval) + require.NoError(t, err, "Error retrieving value for existing counter") + require.Equal(t, uint64(0), retrieval) + + // remove zero value so we're able to test default below + require.NoError(t, dataStore.Delete(key)) + + // New Counter - incr 1, default 5 + value, err = dataStore.Incr(key, 1, 5, 0) + require.NoError(t, err, "Error incrementing non-existent counter") + + // key did not exist - so expect the "initial" value of 5 + require.Equal(t, uint64(5), value) + + // Retrieve existing counter value using GetCounter + retrieval, err = GetCounter(dataStore, key) + require.NoError(t, err, "Error retrieving value for existing counter") + require.Equal(t, uint64(5), retrieval) // Increment existing counter - retrieval, err = dataStore.Incr(key, 1, 1, 0) - assert.NoError(t, err, "Error incrementing value for existing counter") - assert.Equal(t, uint64(2), retrieval) + retrieval, err = dataStore.Incr(key, 1, 5, 0) + require.NoError(t, err, "Error incrementing value for existing counter") + require.Equal(t, uint64(6), retrieval) } func TestGetAndTouchRaw(t *testing.T) { diff --git a/base/collection_gocb.go b/base/collection_gocb.go index 0a14c809fe..5be7e21d0a 100644 --- a/base/collection_gocb.go +++ b/base/collection_gocb.go @@ -356,9 +356,6 @@ func (c *Collection) Update(k string, exp uint32, callback sgbucket.UpdateFunc) func (c *Collection) Incr(k string, amt, def uint64, exp uint32) (uint64, error) { c.Bucket.waitForAvailKvOp() defer c.Bucket.releaseKvOp() - if amt == 0 { - return 0, errors.New("amt passed to Incr must be non-zero") - } incrOptions := gocb.IncrementOptions{ Initial: int64(def), Delta: amt, diff --git a/base/constants.go b/base/constants.go index e5169a0aa8..ca804d9533 100644 --- a/base/constants.go +++ b/base/constants.go @@ -151,6 +151,12 @@ const ( // DefaultJavascriptTimeoutSecs is number of seconds before Javascript functions (i.e. the sync function or import filter) timeout // If set to zero, timeout is disabled. DefaultJavascriptTimeoutSecs = uint32(0) + + // ServerlessChannelLimit is hard limit on channels allowed per user when running in serverless mode + ServerlessChannelLimit = 500 + + // FromConnStrWarningThreshold determines the amount of time it should take before we warn about parsing a connstr (mostly for DNS resolution) + FromConnStrWarningThreshold = 10 * time.Second ) const ( diff --git a/base/dcp_client.go b/base/dcp_client.go index 46547ec52b..d57cf465c7 100644 --- a/base/dcp_client.go +++ b/base/dcp_client.go @@ -26,7 +26,7 @@ import ( const openStreamTimeout = 30 * time.Second const openRetryCount = uint32(10) -const defaultNumWorkers = 8 +const DefaultNumWorkers = 8 // DCP buffer size if we are running in serverless const DefaultDCPBufferServerless = 1 * 1024 * 1024 @@ -37,7 +37,7 @@ const infiniteOpenStreamRetries = uint32(math.MaxUint32) type endStreamCallbackFunc func(e endStreamEvent) -var errVbUUIDMismatch = errors.New("VbUUID mismatch when failOnRollback set") +var ErrVbUUIDMismatch = errors.New("VbUUID mismatch when failOnRollback set") type DCPClient struct { ID string // unique ID for DCPClient - used for DCP stream name, must be unique @@ -81,7 +81,7 @@ type DCPClientOptions struct { func NewDCPClient(ID string, callback sgbucket.FeedEventCallbackFunc, options DCPClientOptions, bucket *GocbV2Bucket) (*DCPClient, error) { - numWorkers := defaultNumWorkers + numWorkers := DefaultNumWorkers if options.NumWorkers > 0 { numWorkers = options.NumWorkers } @@ -320,10 +320,17 @@ func (dc *DCPClient) initAgent(spec BucketSpec) error { } agentConfig := gocbcore.DCPAgentConfig{} + DebugfCtx(context.TODO(), KeyAll, "Parsing cluster connection string %q", UD(connStr)) + beforeFromConnStr := time.Now() connStrError := agentConfig.FromConnStr(connStr) if connStrError != nil { return fmt.Errorf("Unable to start DCP Client - error building conn str: %v", connStrError) } + if d := time.Since(beforeFromConnStr); d > FromConnStrWarningThreshold { + WarnfCtx(context.TODO(), "Parsed cluster connection string %q in: %v", UD(connStr), d) + } else { + DebugfCtx(context.TODO(), KeyAll, "Parsed cluster connection string %q in: %v", UD(connStr), d) + } auth, authErr := spec.GocbcoreAuthProvider() if authErr != nil { @@ -440,7 +447,7 @@ func (dc *DCPClient) openStream(vbID uint16, maxRetries uint32) error { case errors.As(openStreamErr, &rollbackErr): if dc.failOnRollback { InfofCtx(logCtx, KeyDCP, "Open stream for vbID %d failed due to rollback or range error, closing client based on failOnRollback=true", vbID) - return fmt.Errorf("%s, failOnRollback requested", openStreamErr) + return fmt.Errorf("%w, failOnRollback requested", openStreamErr) } InfofCtx(logCtx, KeyDCP, "Open stream for vbID %d failed due to rollback or range error, will roll back metadata and retry: %v", vbID, openStreamErr) @@ -449,7 +456,7 @@ func (dc *DCPClient) openStream(vbID uint16, maxRetries uint32) error { err := fmt.Errorf("Invalid metadata out of range for vbID %d, err: %v metadata %+v, shutting down agent", vbID, openStreamErr, dc.metadata.GetMeta(vbID)) WarnfCtx(logCtx, "%s", err) return err - case errors.Is(openStreamErr, errVbUUIDMismatch): + case errors.Is(openStreamErr, ErrVbUUIDMismatch): WarnfCtx(logCtx, "Closing Stream for vbID: %d, %s", vbID, openStreamErr) return openStreamErr case errors.Is(openStreamErr, gocbcore.ErrShutdown): @@ -549,7 +556,7 @@ func (dc *DCPClient) verifyFailoverLog(vbID uint16, f []gocbcore.FailoverEntry) currentVbUUID := getLatestVbUUID(f) // if previousVbUUID hasn't been set yet (is zero), don't treat as rollback. if previousMeta.VbUUID != currentVbUUID { - return errVbUUIDMismatch + return ErrVbUUIDMismatch } } return nil @@ -650,3 +657,7 @@ func getLatestVbUUID(failoverLog []gocbcore.FailoverEntry) (vbUUID gocbcore.VbUU entry := failoverLog[len(failoverLog)-1] return entry.VbUUID } + +func (dc *DCPClient) GetMetadataKeyPrefix() string { + return dc.metadata.GetKeyPrefix() +} diff --git a/base/dcp_client_metadata.go b/base/dcp_client_metadata.go index 4137421ba1..8a5542bac6 100644 --- a/base/dcp_client_metadata.go +++ b/base/dcp_client_metadata.go @@ -62,6 +62,9 @@ type DCPMetadataStore interface { // Purge removes all metadata associated with the metadata store from the bucket. It does not remove the // in-memory metadata. Purge(numWorkers int) + + // GetKeyPrefix will retrieve the key prefix used for metadata persistence + GetKeyPrefix() string } type dcpMetadataBase struct { @@ -155,6 +158,10 @@ func (md *DCPMetadataMem) Purge(numWorkers int) { return } +func (md *DCPMetadataMem) GetKeyPrefix() string { + return "" +} + // Reset sets metadata sequences to zero, but maintains vbucket UUID and failover entries. Used for scenarios // that want to restart a feed from zero, but detect failover func (md *DCPMetadata) Reset() { @@ -261,6 +268,10 @@ func (m *DCPMetadataCS) Purge(numWorkers int) { } } +func (m *DCPMetadataCS) GetKeyPrefix() string { + return m.keyPrefix +} + func (m *DCPMetadataCS) getMetadataKey(workerID int) string { return fmt.Sprintf("%s%d", m.keyPrefix, workerID) } diff --git a/base/dcp_client_test.go b/base/dcp_client_test.go index 87994ad828..15647522e8 100644 --- a/base/dcp_client_test.go +++ b/base/dcp_client_test.go @@ -341,7 +341,7 @@ func TestContinuousDCPRollback(t *testing.T) { counterCallback := func(event sgbucket.FeedEvent) bool { if bytes.HasPrefix(event.Key, []byte(t.Name())) { atomic.AddUint64(&mutationCount, 1) - if atomic.LoadUint64(&mutationCount) == uint64(1000) { + if atomic.LoadUint64(&mutationCount) == uint64(10000) { c <- true } } @@ -374,14 +374,11 @@ func TestContinuousDCPRollback(t *testing.T) { dcpClient, err := NewDCPClient(feedID, counterCallback, dcpClientOpts, gocbv2Bucket) require.NoError(t, err) - // function to force the rollback of some vBuckets - dcpClient.forceRollbackvBucket(vbUUID) - _, startErr := dcpClient.Start() require.NoError(t, startErr) // Add documents - const numDocs = 1000 + const numDocs = 10000 updatedBody := map[string]interface{}{"foo": "bar"} for i := 0; i < numDocs; i++ { key := fmt.Sprintf("%s_%d", t.Name(), i) @@ -393,17 +390,38 @@ func TestContinuousDCPRollback(t *testing.T) { select { case <-c: mutationCount := atomic.LoadUint64(&mutationCount) - require.Equal(t, uint64(1000), mutationCount) + require.Equal(t, uint64(10000), mutationCount) case <-timeout: t.Fatalf("timeout on client reached") } + // new dcp client to simulate a rollback + dcpClientOpts = DCPClientOptions{ + InitialMetadata: dcpClient.GetMetadata(), + FailOnRollback: false, + OneShot: false, + CollectionIDs: collectionIDs, + CheckpointPrefix: DefaultMetadataKeys.DCPCheckpointPrefix(t.Name()), + MetadataStoreType: DCPMetadataStoreInMemory, + } + require.NoError(t, dcpClient.Close()) + + dcpClient1, err := NewDCPClient(feedID, counterCallback, dcpClientOpts, gocbv2Bucket) + require.NoError(t, err) + // function to force the rollback of some vBuckets + dcpClient1.forceRollbackvBucket(vbUUID) + + _, startErr = dcpClient1.Start() + require.NoError(t, startErr) + // Assert that the number of vBuckets active are the same as the total number of vBuckets on the client. // In continuous rollback the streams should not close after they're finished. - numVBuckets := len(dcpClient.activeVbuckets) - require.Equal(t, dcpClient.numVbuckets, uint16(numVBuckets)) + numVBuckets := len(dcpClient1.activeVbuckets) + require.Equal(t, dcpClient1.numVbuckets, uint16(numVBuckets)) - require.NoError(t, dcpClient.Close()) + defer func() { + assert.NoError(t, dcpClient1.Close()) + }() } @@ -412,13 +430,12 @@ func TestContinuousDCPRollback(t *testing.T) { func (dc *DCPClient) forceRollbackvBucket(uuid gocbcore.VbUUID) { metadata := make([]DCPMetadata, dc.numVbuckets) for i := uint16(0); i < dc.numVbuckets; i++ { + // rollback roughly half the vBuckets if i%2 == 0 { metadata[i] = dc.metadata.GetMeta(i) metadata[i].VbUUID = uuid - } else { - metadata[i] = dc.metadata.GetMeta(i) + dc.metadata.SetMeta(i, metadata[i]) } - dc.metadata.SetMeta(i, metadata[i]) } } diff --git a/base/dcp_sharded.go b/base/dcp_sharded.go index 7d5272a924..83b4d3c117 100644 --- a/base/dcp_sharded.go +++ b/base/dcp_sharded.go @@ -16,7 +16,6 @@ import ( "crypto/tls" "fmt" "sort" - "strconv" "strings" "sync" @@ -52,6 +51,11 @@ type CbgtContext struct { Cfg cbgt.Cfg // Cfg manages storage of the current pindex set and node assignment heartbeater Heartbeater // Heartbeater used for failed node detection heartbeatListener *importHeartbeatListener // Listener subscribed to failed node alerts from heartbeater + eventHandlers *sgMgrEventHandlers // Event handler callbacks + ctx context.Context // Log context + dbName string // Database name + sourceName string // cbgt source name. Store on CbgtContext for access during teardown + sourceUUID string // cbgt source UUID. Store on CbgtContext for access during teardown } // StartShardedDCPFeed initializes and starts a CBGT Manager targeting the provided bucket. @@ -116,11 +120,6 @@ func GenerateLegacyIndexName(dbName string) string { func createCBGTIndex(ctx context.Context, c *CbgtContext, dbName string, configGroupID string, bucket Bucket, spec BucketSpec, scope string, collections []string, numPartitions uint16) error { sourceType := SOURCE_DCP_SG - bucketUUID, err := bucket.UUID() - if err != nil { - return err - } - sourceParams, err := cbgtFeedParams(spec, scope, collections, dbName) if err != nil { return err @@ -191,8 +190,8 @@ func createCBGTIndex(ctx context.Context, c *CbgtContext, dbName string, configG indexType := CBGTIndexTypeSyncGatewayImport + configGroupID err = c.Manager.CreateIndex( sourceType, // sourceType - bucket.GetName(), // sourceName - bucketUUID, // sourceUUID + c.sourceName, // bucket name + c.sourceUUID, // bucket UUID sourceParams, // sourceParams indexType, // indexType indexName, // indexName @@ -316,14 +315,16 @@ func initCBGTManager(ctx context.Context, bucket Bucket, spec BucketSpec, cfgSG // avoids file system usage, in conjunction with managerLoadDataDir=false in options. dataDir := "" - eventHandlers := &sgMgrEventHandlers{ctx: ctx} + eventHandlersCtx, eventHandlersCancel := context.WithCancel(ctx) + eventHandlers := &sgMgrEventHandlers{ctx: eventHandlersCtx, ctxCancel: eventHandlersCancel} // Specify one feed per pindex options := make(map[string]string) options[cbgt.FeedAllotmentOption] = cbgt.FeedAllotmentOnePerPIndex options["managerLoadDataDir"] = "false" - // Ensure we always use TLS if configured - cbgt defaults to non-TLS on initial connection - options["feedInitialBootstrapNonTLS"] = strconv.FormatBool(!spec.IsTLS()) + // TLS is controlled by the connection string. + // cbgt uses this parameter to run in mixed mode - non-TLS for CCCP but TLS for memcached. Sync Gateway does not need to set this parameter. + options["feedInitialBootstrapNonTLS"] = "false" // Disable collections if unsupported if !bucket.IsSupported(sgbucket.BucketStoreFeatureCollections) { @@ -345,10 +346,21 @@ func initCBGTManager(ctx context.Context, bucket Bucket, spec BucketSpec, cfgSG serverURL, eventHandlers, options) + eventHandlers.manager = mgr + + bucketUUID, err := bucket.UUID() + if err != nil { + return nil, fmt.Errorf("failed to fetch UUID of bucket %v: %w", MD(bucket.GetName()).Redact(), err) + } cbgtContext := &CbgtContext{ - Manager: mgr, - Cfg: cfgSG, + Manager: mgr, + Cfg: cfgSG, + eventHandlers: eventHandlers, + ctx: ctx, + dbName: dbName, + sourceName: bucket.GetName(), + sourceUUID: bucketUUID, } if spec.Auth != nil || (spec.Certpath != "" && spec.Keypath != "") { @@ -357,10 +369,6 @@ func initCBGTManager(ctx context.Context, bucket Bucket, spec BucketSpec, cfgSG } if spec.IsTLS() { - bucketUUID, err := bucket.UUID() - if err != nil { - return nil, fmt.Errorf("failed to fetch UUID of bucket %v: %w", MD(bucket.GetName()).Redact(), err) - } if spec.TLSSkipVerify { setCbgtRootCertsForBucket(bucketUUID, nil) } else { @@ -439,13 +447,32 @@ func getMinNodeVersion(cfg cbgt.Cfg) (*ComparableVersion, error) { return minVersion, nil } -// StopHeartbeatListener unregisters the listener from the heartbeater, and stops it. -func (c *CbgtContext) StopHeartbeatListener() { +// Stop unregisters the listener from the heartbeater, and stops it and associated handlers. +func (c *CbgtContext) Stop() { + if c.eventHandlers != nil { + c.eventHandlers.ctxCancel() + } if c.heartbeatListener != nil { c.heartbeater.UnregisterListener(c.heartbeatListener.Name()) c.heartbeatListener.Stop() } + + // Close open PIndexes before stopping the manager. + _, pindexes := c.Manager.CurrentMaps() + for _, pIndex := range pindexes { + err := c.Manager.ClosePIndex(pIndex) + if err != nil { + DebugfCtx(c.ctx, KeyImport, "Error closing pindex: %v", err) + } + } + // ClosePIndex calls are synchronous, so can stop manager once they've completed + c.Manager.Stop() + // CloseStatsClients closes the memcached connection cbgt uses for stats calls (highseqno, etc). sourceName and + // sourceUUID are bucketName/bucket UUID in our usage. cbgt has a single global stats connection per bucket, + // but does a refcount check before closing, so handles the case of multiple SG databases targeting the same bucket. + cbgt.CloseStatsClients(c.sourceName, c.sourceUUID) + c.RemoveFeedCredentials(c.dbName) } func (c *CbgtContext) RemoveFeedCredentials(dbName string) { @@ -696,7 +723,9 @@ func GetDefaultImportPartitions(serverless bool) uint16 { } type sgMgrEventHandlers struct { - ctx context.Context + ctx context.Context + ctxCancel context.CancelFunc + manager *cbgt.Manager } func (meh *sgMgrEventHandlers) OnRefreshManagerOptions(options map[string]string) { @@ -711,40 +740,38 @@ func (meh *sgMgrEventHandlers) OnUnregisterPIndex(pindex *cbgt.PIndex) { // No-op for SG } -// OnFeedError is required to trigger reconnection to a feed on an closed connection (EOF). -// Handling below based on cbft implementation - checks whether the underlying source (bucket) -// still exists with VerifySourceNotExists, and if it exists, calls NotifyMgrOnClose. -// This will trigger cbgt closing and then attempting to reconnect to the feed. -func (meh *sgMgrEventHandlers) OnFeedError(srcType string, r cbgt.Feed, err error) { - - DebugfCtx(meh.ctx, KeyDCP, "cbgt Mgr OnFeedError, srcType: %s, feed name: %s, err: %v", - srcType, r.Name(), err) +// OnFeedError is required to trigger reconnection to a feed on a closed connection (EOF). +// NotifyMgrOnClose will trigger cbgt closing and then attempt to reconnect to the feed, if the manager hasn't +// been stopped. +func (meh *sgMgrEventHandlers) OnFeedError(srcType string, r cbgt.Feed, feedErr error) { + // cbgt always passes srcType = SOURCE_GOCBCORE, but we have a wrapped type associated with our indexes - use that instead + // for our logging + srcType = SOURCE_DCP_SG + var bucketName, bucketUUID string dcpFeed, ok := r.(cbgt.FeedEx) - if !ok { - return - } - - gone, indexUUID, er := dcpFeed.VerifySourceNotExists() - DebugfCtx(meh.ctx, KeyDCP, "cbgt Mgr OnFeedError, VerifySourceNotExists,"+ - " srcType: %s, gone: %t, indexUUID: %s, err: %v", - srcType, gone, indexUUID, er) - if !gone { - // If we get an EOF error from the feeds and the bucket is still alive, - // then there could at the least two potential error scenarios. - // - // 1. Faulty kv node is failed over. - // 2. Ephemeral network connection issues with the host. - // - // In either case, the current feed instance turns dangling. - // Hence we can close the feeds so that they get refreshed to fix - // the connectivity problems either during the next rebalance - // (new kv node after failover-recovery rebalance) or - // on the next janitor work cycle(ephemeral network issue to the same node). - if strings.Contains(err.Error(), "EOF") { - InfofCtx(meh.ctx, KeyDCP, "Handling EOF on cbgt feed - notifying manager to trigger reconnection to feed. indexUUID: %v, err: %v", indexUUID, err) - dcpFeed.NotifyMgrOnClose() + if ok { + bucketName, bucketUUID = dcpFeed.GetBucketDetails() + } + DebugfCtx(meh.ctx, KeyDCP, "cbgt Mgr OnFeedError, srcType: %s, feed name: %s, bucket name: %s, err: %v", + srcType, r.Name(), MD(bucketName), feedErr) + + // If we get an EOF error from the feeds and the import listener hasn't been closed, + // then there could at the least two potential error scenarios. + // + // 1. Faulty kv node is failed over. + // 2. Ephemeral network connection issues with the host. + // + // In either case, the current feed instance turns dangling. + // Hence we can close the feeds so that they get refreshed to fix + // the connectivity problems either during the next rebalance + // (new kv node after failover-recovery rebalance) or + // on the next janitor work cycle(ephemeral network issue to the same node). + if strings.Contains(feedErr.Error(), "EOF") { + // If this wasn't an intentional close, log about the EOF + if meh.ctx.Err() != context.Canceled { + InfofCtx(meh.ctx, KeyDCP, "Handling EOF on cbgt feed - notifying manager to trigger reconnection to feed for bucketName:%v, bucketUUID:%v, err: %v", MD(bucketName), bucketUUID, feedErr) } + dcpFeed.NotifyMgrOnClose() } - } diff --git a/base/error.go b/base/error.go index 45bb127da5..aaa7d0cf82 100644 --- a/base/error.go +++ b/base/error.go @@ -67,6 +67,12 @@ var ( // ErrConfigRegistryReloadRequired is returned when a db config fetch requires a registry reload based on version mismatch (config is newer) ErrConfigRegistryReloadRequired = &sgError{"Config registry reload required"} + + // ErrMaximumChannelsForUserExceeded is returned when running in serverless mode and the user has more than 500 channels granted to them + ErrMaximumChannelsForUserExceeded = &sgError{fmt.Sprintf("User has exceeded maximum of %d channels", ServerlessChannelLimit)} + + // ErrReplicationLimitExceeded is returned when then replication connection threshold is exceeded + ErrReplicationLimitExceeded = &sgError{"Replication limit exceeded. Try agin later."} ) func (e *sgError) Error() string { @@ -115,6 +121,10 @@ func ErrorAsHTTPStatus(err error) (int, string) { return http.StatusRequestEntityTooLarge, "Document too large!" case ErrViewTimeoutError: return http.StatusServiceUnavailable, unwrappedErr.Error() + case ErrMaximumChannelsForUserExceeded: + return http.StatusInternalServerError, "Maximum number of channels exceeded for this user" + case ErrReplicationLimitExceeded: + return http.StatusServiceUnavailable, unwrappedErr.Error() } // gocb V2 errors diff --git a/base/gocb_dcp_feed.go b/base/gocb_dcp_feed.go index 12edfbe35e..811861c87e 100644 --- a/base/gocb_dcp_feed.go +++ b/base/gocb_dcp_feed.go @@ -50,10 +50,6 @@ func getHighSeqMetadata(cbstore CouchbaseBucketStore) ([]DCPMetadata, error) { // StartGocbDCPFeed starts a DCP Feed. func StartGocbDCPFeed(bucket *GocbV2Bucket, bucketName string, args sgbucket.FeedArguments, callback sgbucket.FeedEventCallbackFunc, dbStats *expvar.Map, metadataStoreType DCPMetadataStoreType, groupID string) error { - metadata, err := getHighSeqMetadata(bucket) - if err != nil { - return err - } feedName, err := GenerateDcpStreamName(args.ID) if err != nil { return err @@ -84,19 +80,27 @@ func StartGocbDCPFeed(bucket *GocbV2Bucket, bucketName string, args sgbucket.Fee } } } + options := DCPClientOptions{ + MetadataStoreType: metadataStoreType, + GroupID: groupID, + DbStats: dbStats, + CollectionIDs: collectionIDs, + AgentPriority: gocbcore.DcpAgentPriorityMed, + CheckpointPrefix: args.CheckpointPrefix, + } + + if args.Backfill == sgbucket.FeedNoBackfill { + metadata, err := getHighSeqMetadata(bucket) + if err != nil { + return err + } + options.InitialMetadata = metadata + } dcpClient, err := NewDCPClient( feedName, callback, - DCPClientOptions{ - MetadataStoreType: metadataStoreType, - GroupID: groupID, - InitialMetadata: metadata, - DbStats: dbStats, - CollectionIDs: collectionIDs, - AgentPriority: gocbcore.DcpAgentPriorityMed, - CheckpointPrefix: args.CheckpointPrefix, - }, + options, bucket) if err != nil { return err diff --git a/base/leaky_bucket.go b/base/leaky_bucket.go index 0fc2c0aec9..cf9e257e86 100644 --- a/base/leaky_bucket.go +++ b/base/leaky_bucket.go @@ -11,6 +11,7 @@ package base import ( "expvar" + "fmt" "math" "time" @@ -27,6 +28,7 @@ type LeakyBucket struct { } var _ sgbucket.BucketStore = &LeakyBucket{} +var _ sgbucket.DynamicDataStoreBucket = &LeakyBucket{} func NewLeakyBucket(bucket Bucket, config LeakyBucketConfig) *LeakyBucket { return &LeakyBucket{ @@ -94,6 +96,22 @@ func (b *LeakyBucket) GetUnderlyingBucket() Bucket { return b.bucket } +func (b *LeakyBucket) CreateDataStore(name sgbucket.DataStoreName) error { + dynamicDataStore, ok := b.GetUnderlyingBucket().(sgbucket.DynamicDataStoreBucket) + if !ok { + return fmt.Errorf("Bucket %T doesn't support dynamic collection creation", b.GetUnderlyingBucket()) + } + return dynamicDataStore.CreateDataStore(name) +} + +func (b *LeakyBucket) DropDataStore(name sgbucket.DataStoreName) error { + dynamicDataStore, ok := b.GetUnderlyingBucket().(sgbucket.DynamicDataStoreBucket) + if !ok { + return fmt.Errorf("Bucket %T doesn't support dynamic collection creation", b.GetUnderlyingBucket()) + } + return dynamicDataStore.DropDataStore(name) +} + // The config object that controls the LeakyBucket behavior type LeakyBucketConfig struct { // Incr() fails N times before finally succeeding diff --git a/base/redactable_error.go b/base/redactable_error.go index 9d59071e39..db979f01da 100644 --- a/base/redactable_error.go +++ b/base/redactable_error.go @@ -12,14 +12,18 @@ package base import "fmt" -// A redactable error can be used as a drop-in replacement for a base error (as would have been created via -// fmt.Errorf), which has the ability to redact any sensitive user data by calling redact() on all if it's -// stored args. +// RedactableError is an error that can be used as a drop-in replacement for an error, +// which has the ability to redact any sensitive data by calling redact() on all of its args. type RedactableError struct { fmt string args []interface{} } +var ( + _ error = &RedactableError{} + _ Redactor = &RedactableError{} +) + // Create a new redactable error. Same signature as fmt.Errorf() for easy drop-in replacement. func RedactErrorf(fmt string, args ...interface{}) *RedactableError { return &RedactableError{ @@ -28,12 +32,17 @@ func RedactErrorf(fmt string, args ...interface{}) *RedactableError { } } -// Satisfy error interface +// Error satisfies the error interface func (re *RedactableError) Error() string { + return re.String() +} + +// String returns a non-redacted version of the error - satisfies the Redactor interface. +func (re *RedactableError) String() string { return fmt.Sprintf(re.fmt, re.args...) } -// Satisfy redact interface +// Redact returns a redacted version of the error - satisfies the Redactor interface. func (re *RedactableError) Redact() string { redactedArgs := redact(re.args) return fmt.Sprintf(re.fmt, redactedArgs...) diff --git a/base/stats.go b/base/stats.go index fd5560a78b..224aafbcf0 100644 --- a/base/stats.go +++ b/base/stats.go @@ -376,7 +376,8 @@ type CBLReplicationPullStats struct { // The total amount of time processing rev messages (revisions) during pull revision. RevProcessingTime *SgwIntStat `json:"rev_processing_time"` // The total number of rev messages processed during replication. - RevSendCount *SgwIntStat `json:"rev_send_count"` + RevSendCount *SgwIntStat `json:"rev_send_count"` + RevErrorCount *SgwIntStat `json:"rev_error_count"` // The total amount of time between Sync Gateway receiving a request for a revision and that revision being sent. // // In a pull replication, Sync Gateway sends a /_changes request to the client and the client responds with the list of revisions it wants to receive. @@ -392,6 +393,8 @@ type CBLReplicationPushStats struct { AttachmentPushCount *SgwIntStat `json:"attachment_push_count"` // The total number of documents pushed. DocPushCount *SgwIntStat `json:"doc_push_count"` + // The total number of documents that failed to push. + DocPushErrorCount *SgwIntStat `json:"doc_push_error_count"` // The total number of changes and-or proposeChanges messages processed since node start-up. ProposeChangeCount *SgwIntStat `json:"propose_change_count"` // The total time spent processing changes and/or proposeChanges messages. @@ -430,6 +433,8 @@ type CollectionStats struct { } type DatabaseStats struct { + ReplicationBytesReceived *SgwIntStat `json:"replication_bytes_received"` + ReplicationBytesSent *SgwIntStat `json:"replication_bytes_sent"` // The compaction_attachment_start_time. CompactionAttachmentStartTime *SgwIntStat `json:"compaction_attachment_start_time"` // The compaction_tombstone_start_time. @@ -493,6 +498,8 @@ type DatabaseStats struct { SyncFunctionTime *SgwIntStat `json:"sync_function_time"` // The total number of times that a sync function encountered an exception (across all collections). SyncFunctionExceptionCount *SgwIntStat `json:"sync_function_exception_count"` + // The total number of times a replication connection is rejected due ot it being over the threshold + NumReplicationsRejectedLimit *SgwIntStat `json:"num_replications_rejected_limit"` // These can be cleaned up in future versions of SGW, implemented as maps to reduce amount of potential risk // prior to Hydrogen release. These are not exported as part of prometheus and only exposed through expvars @@ -1198,6 +1205,10 @@ func (d *DbStats) initCBLReplicationPullStats() error { if err != nil { return err } + resUtil.RevErrorCount, err = NewIntStat(SubsystemReplicationPull, "rev_error_count", labelKeys, labelVals, prometheus.CounterValue, 0) + if err != nil { + return err + } resUtil.RevSendLatency, err = NewIntStat(SubsystemReplicationPull, "rev_send_latency", labelKeys, labelVals, prometheus.CounterValue, 0) if err != nil { return err @@ -1223,6 +1234,7 @@ func (d *DbStats) unregisterCBLReplicationPullStats() { prometheus.Unregister(d.CBLReplicationPullStats.RequestChangesTime) prometheus.Unregister(d.CBLReplicationPullStats.RevProcessingTime) prometheus.Unregister(d.CBLReplicationPullStats.RevSendCount) + prometheus.Unregister(d.CBLReplicationPullStats.RevErrorCount) prometheus.Unregister(d.CBLReplicationPullStats.RevSendLatency) } @@ -1248,6 +1260,10 @@ func (d *DbStats) initCBLReplicationPushStats() error { if err != nil { return err } + resUtil.DocPushErrorCount, err = NewIntStat(SubsystemReplicationPush, "doc_push_error_count", labelKeys, labelVals, prometheus.GaugeValue, 0) + if err != nil { + return err + } resUtil.ProposeChangeCount, err = NewIntStat(SubsystemReplicationPush, "propose_change_count", labelKeys, labelVals, prometheus.CounterValue, 0) if err != nil { return err @@ -1269,6 +1285,7 @@ func (d *DbStats) unregisterCBLReplicationPushStats() { prometheus.Unregister(d.CBLReplicationPushStats.AttachmentPushBytes) prometheus.Unregister(d.CBLReplicationPushStats.AttachmentPushCount) prometheus.Unregister(d.CBLReplicationPushStats.DocPushCount) + prometheus.Unregister(d.CBLReplicationPushStats.DocPushErrorCount) prometheus.Unregister(d.CBLReplicationPushStats.ProposeChangeCount) prometheus.Unregister(d.CBLReplicationPushStats.ProposeChangeTime) prometheus.Unregister(d.CBLReplicationPushStats.WriteProcessingTime) @@ -1284,6 +1301,14 @@ func (d *DbStats) initDatabaseStats() error { labelKeys := []string{DatabaseLabelKey} labelVals := []string{d.dbName} + resUtil.ReplicationBytesReceived, err = NewIntStat(SubsystemDatabaseKey, "replication_bytes_received", labelKeys, labelVals, prometheus.CounterValue, 0) + if err != nil { + return err + } + resUtil.ReplicationBytesSent, err = NewIntStat(SubsystemDatabaseKey, "replication_bytes_sent", labelKeys, labelVals, prometheus.CounterValue, 0) + if err != nil { + return err + } resUtil.CompactionAttachmentStartTime, err = NewIntStat(SubsystemDatabaseKey, "compaction_attachment_start_time", labelKeys, labelVals, prometheus.GaugeValue, 0) if err != nil { return err @@ -1412,6 +1437,10 @@ func (d *DbStats) initDatabaseStats() error { if err != nil { return err } + resUtil.NumReplicationsRejectedLimit, err = NewIntStat(SubsystemDatabaseKey, "num_replications_rejected_limit", labelKeys, labelVals, prometheus.CounterValue, 0) + if err != nil { + return err + } resUtil.ImportFeedMapStats = &ExpVarMapWrapper{new(expvar.Map).Init()} resUtil.CacheFeedMapStats = &ExpVarMapWrapper{new(expvar.Map).Init()} @@ -1421,6 +1450,8 @@ func (d *DbStats) initDatabaseStats() error { } func (d *DbStats) unregisterDatabaseStats() { + prometheus.Unregister(d.DatabaseStats.ReplicationBytesReceived) + prometheus.Unregister(d.DatabaseStats.ReplicationBytesSent) prometheus.Unregister(d.DatabaseStats.CompactionAttachmentStartTime) prometheus.Unregister(d.DatabaseStats.CompactionTombstoneStartTime) prometheus.Unregister(d.DatabaseStats.ConflictWriteCount) @@ -1453,6 +1484,7 @@ func (d *DbStats) unregisterDatabaseStats() { prometheus.Unregister(d.DatabaseStats.SyncFunctionCount) prometheus.Unregister(d.DatabaseStats.SyncFunctionTime) prometheus.Unregister(d.DatabaseStats.SyncFunctionExceptionCount) + prometheus.Unregister(d.DatabaseStats.NumReplicationsRejectedLimit) } func (d *DbStats) CollectionStat(scopeName, collectionName string) (*CollectionStats, error) { diff --git a/base/util.go b/base/util.go index 91484eff4d..39a49d981d 100644 --- a/base/util.go +++ b/base/util.go @@ -1652,7 +1652,7 @@ func GetHttpClientForWebSocket(insecureSkipVerify bool) *http.Client { // (There's really no reason for a caller to take note of the return value.) func turnOffNoDelay(ctx context.Context, conn net.Conn) bool { if tcpConn, ok := conn.(*net.TCPConn); !ok { - WarnfCtx(ctx, "Couldn't turn off NODELAY for %v: it's not a TCPConn", conn) + WarnfCtx(ctx, "Couldn't turn off NODELAY for %v: %T is not type *net.TCPConn", conn, conn) } else if err := tcpConn.SetNoDelay(false); err != nil { WarnfCtx(ctx, "Couldn't turn off NODELAY for %v: %v", conn, err) } else { diff --git a/base/util_testing.go b/base/util_testing.go index 806da93c07..44501772d7 100644 --- a/base/util_testing.go +++ b/base/util_testing.go @@ -174,6 +174,22 @@ func (b *TestBucket) GetMetadataStore() sgbucket.DataStore { return b.Bucket.DefaultDataStore() } +func (b *TestBucket) CreateDataStore(name sgbucket.DataStoreName) error { + dynamicDataStore, ok := b.Bucket.(sgbucket.DynamicDataStoreBucket) + if !ok { + return fmt.Errorf("Bucket %T doesn't support dynamic collection creation", b.Bucket) + } + return dynamicDataStore.CreateDataStore(name) +} + +func (b *TestBucket) DropDataStore(name sgbucket.DataStoreName) error { + dynamicDataStore, ok := b.GetUnderlyingBucket().(sgbucket.DynamicDataStoreBucket) + if !ok { + return fmt.Errorf("Bucket %T doesn't support dynamic collection creation", b.GetUnderlyingBucket()) + } + return dynamicDataStore.DropDataStore(name) +} + // GetDefaultDataStore returns the default DataStore. This is likely never actually wanted over GetSingleDataStore, so is left commented until absolutely required. // func (b *TestBucket) GetDefaultDataStore() sgbucket.DataStore { // b.t.Logf("Using default collection - Are you sure you want this instead of GetSingleDataStore() ?") diff --git a/db/active_replicator.go b/db/active_replicator.go index 39e7e86cb2..17147f0207 100644 --- a/db/active_replicator.go +++ b/db/active_replicator.go @@ -214,7 +214,7 @@ func connect(arc *activeReplicatorCommon, idSuffix string) (blipSender *blip.Sen blipContext.WebsocketPingInterval = arc.config.WebsocketPingInterval blipContext.OnExitCallback = func() { // fall into a reconnect loop only if the connection is unexpectedly closed. - if arc.ctx.Err() == nil && arc.config.TotalReconnectTimeout != 0 { + if arc.ctx.Err() == nil { go arc.reconnectLoop() } } diff --git a/db/active_replicator_common.go b/db/active_replicator_common.go index 884e7a39ce..2ff157c07f 100644 --- a/db/active_replicator_common.go +++ b/db/active_replicator_common.go @@ -153,7 +153,9 @@ func (a *activeReplicatorCommon) reconnectLoop() { // if a reconnect timeout is set, we'll wrap the existing so both can stop the retry loop var deadlineCancel context.CancelFunc - ctx, deadlineCancel = context.WithDeadline(ctx, time.Now().Add(a.config.TotalReconnectTimeout)) + if a.config.TotalReconnectTimeout != 0 { + ctx, deadlineCancel = context.WithDeadline(ctx, time.Now().Add(a.config.TotalReconnectTimeout)) + } sleeperFunc := base.SleeperFuncCtx( base.CreateIndefiniteMaxDoublingSleeperFunc( diff --git a/db/active_replicator_config.go b/db/active_replicator_config.go index 31789dde1f..2cf61b50d6 100644 --- a/db/active_replicator_config.go +++ b/db/active_replicator_config.go @@ -80,7 +80,7 @@ type ActiveReplicatorConfig struct { InitialReconnectInterval time.Duration // MaxReconnectInterval is the maximum amount of time to wait between exponential backoff reconnect attempts. MaxReconnectInterval time.Duration - // TotalReconnectTimeout, if non-zero, is the amount of time to wait before giving up trying to reconnect. Zero disables reconnect entirely. + // TotalReconnectTimeout, if non-zero, is the amount of time to wait before giving up trying to reconnect. Zero value will retry indefinitely. TotalReconnectTimeout time.Duration // CollectionsEnabled can be set to replicate one or more named collections, rather than just the default collection. diff --git a/db/active_replicator_pull.go b/db/active_replicator_pull.go index 9d355ab773..8e4ab5ec3e 100644 --- a/db/active_replicator_pull.go +++ b/db/active_replicator_pull.go @@ -59,7 +59,7 @@ func (apr *ActivePullReplicator) Start(ctx context.Context) error { base.WarnfCtx(apr.ctx, "Couldn't connect: %v", err) if errors.Is(err, fatalReplicatorConnectError) { base.WarnfCtx(apr.ctx, "Stopping replication connection attempt") - } else if apr.config.TotalReconnectTimeout != 0 { + } else { base.InfofCtx(apr.ctx, base.KeyReplicate, "Attempting to reconnect in background: %v", err) apr.reconnectActive.Set(true) go apr.reconnectLoop() diff --git a/db/active_replicator_push.go b/db/active_replicator_push.go index 58e50bc774..98b6e5eb04 100644 --- a/db/active_replicator_push.go +++ b/db/active_replicator_push.go @@ -63,7 +63,7 @@ func (apr *ActivePushReplicator) Start(ctx context.Context) error { base.WarnfCtx(apr.ctx, "Couldn't connect: %s", err) if errors.Is(err, fatalReplicatorConnectError) { base.WarnfCtx(apr.ctx, "Stopping replication connection attempt") - } else if apr.config.TotalReconnectTimeout != 0 { + } else { base.InfofCtx(apr.ctx, base.KeyReplicate, "Attempting to reconnect in background: %v", err) apr.reconnectActive.Set(true) go apr.reconnectLoop() diff --git a/db/attachment_compaction.go b/db/attachment_compaction.go index d0e565d0ec..cd5fb914bf 100644 --- a/db/attachment_compaction.go +++ b/db/attachment_compaction.go @@ -29,7 +29,7 @@ const ( CleanupPhase = "cleanup" ) -func attachmentCompactMarkPhase(ctx context.Context, dataStore base.DataStore, collectionID uint32, db *Database, compactionID string, terminator *base.SafeTerminator, markedAttachmentCount *base.AtomicInt) (count int64, vbUUIDs []uint64, err error) { +func attachmentCompactMarkPhase(ctx context.Context, dataStore base.DataStore, collectionID uint32, db *Database, compactionID string, terminator *base.SafeTerminator, markedAttachmentCount *base.AtomicInt) (count int64, vbUUIDs []uint64, checkpointPrefix string, err error) { base.InfofCtx(ctx, base.KeyAll, "Starting first phase of attachment compaction (mark phase) with compactionID: %q", compactionID) compactionLoggingID := "Compaction Mark: " + compactionID @@ -131,32 +131,30 @@ func attachmentCompactMarkPhase(ctx context.Context, dataStore base.DataStore, c clientOptions, err := getCompactionDCPClientOptions(collectionID, db.Options.GroupID, db.MetadataKeys.DCPCheckpointPrefix(db.Options.GroupID)) if err != nil { - return 0, nil, err + return 0, nil, "", err } base.InfofCtx(ctx, base.KeyAll, "[%s] Starting DCP feed for mark phase of attachment compaction", compactionLoggingID) - dcpFeedKey := generateCompactionDCPStreamName(compactionID, MarkPhase) - if err != nil { - return 0, nil, err - } + dcpFeedKey := GenerateCompactionDCPStreamName(compactionID, MarkPhase) bucket, err := base.AsGocbV2Bucket(db.Bucket) if err != nil { - return 0, nil, err + return 0, nil, "", err } dcpClient, err := base.NewDCPClient(dcpFeedKey, callback, *clientOptions, bucket) if err != nil { base.WarnfCtx(ctx, "[%s] Failed to create attachment compaction DCP client! %v", compactionLoggingID, err) - return 0, nil, err + return 0, nil, "", err } + metadataKeyPrefix := dcpClient.GetMetadataKeyPrefix() doneChan, err := dcpClient.Start() if err != nil { base.WarnfCtx(ctx, "[%s] Failed to start attachment compaction DCP feed! %v", compactionLoggingID, err) _ = dcpClient.Close() - return 0, nil, err + return 0, nil, metadataKeyPrefix, err } base.DebugfCtx(ctx, base.KeyAll, "[%s] DCP feed started.", compactionLoggingID) @@ -165,27 +163,27 @@ func attachmentCompactMarkPhase(ctx context.Context, dataStore base.DataStore, c base.InfofCtx(ctx, base.KeyAll, "[%s] Mark phase of attachment compaction completed. Marked %d attachments", compactionLoggingID, markedAttachmentCount.Value()) err = dcpClient.Close() if markProcessFailureErr != nil { - return markedAttachmentCount.Value(), nil, markProcessFailureErr + return markedAttachmentCount.Value(), nil, metadataKeyPrefix, markProcessFailureErr } case <-terminator.Done(): base.DebugfCtx(ctx, base.KeyAll, "[%s] Terminator closed. Stopping mark phase.", compactionLoggingID) err = dcpClient.Close() if markProcessFailureErr != nil { - return markedAttachmentCount.Value(), nil, markProcessFailureErr + return markedAttachmentCount.Value(), nil, metadataKeyPrefix, markProcessFailureErr } if err != nil { - return markedAttachmentCount.Value(), base.GetVBUUIDs(dcpClient.GetMetadata()), err + return markedAttachmentCount.Value(), base.GetVBUUIDs(dcpClient.GetMetadata()), metadataKeyPrefix, err } err = <-doneChan if err != nil { - return markedAttachmentCount.Value(), base.GetVBUUIDs(dcpClient.GetMetadata()), err + return markedAttachmentCount.Value(), base.GetVBUUIDs(dcpClient.GetMetadata()), metadataKeyPrefix, err } base.InfofCtx(ctx, base.KeyAll, "[%s] Mark phase of attachment compaction was terminated. Marked %d attachments", compactionLoggingID, markedAttachmentCount.Value()) } - return markedAttachmentCount.Value(), base.GetVBUUIDs(dcpClient.GetMetadata()), err + return markedAttachmentCount.Value(), base.GetVBUUIDs(dcpClient.GetMetadata()), metadataKeyPrefix, err } // AttachmentsMetaMap struct is a very minimal struct to unmarshal into when getting attachments from bodies @@ -363,7 +361,7 @@ func attachmentCompactSweepPhase(ctx context.Context, dataStore base.DataStore, } clientOptions.InitialMetadata = base.BuildDCPMetadataSliceFromVBUUIDs(vbUUIDs) - dcpFeedKey := generateCompactionDCPStreamName(compactionID, SweepPhase) + dcpFeedKey := GenerateCompactionDCPStreamName(compactionID, SweepPhase) bucket, err := base.AsGocbV2Bucket(db.Bucket) if err != nil { @@ -408,7 +406,7 @@ func attachmentCompactSweepPhase(ctx context.Context, dataStore base.DataStore, return purgedAttachmentCount.Value(), err } -func attachmentCompactCleanupPhase(ctx context.Context, dataStore base.DataStore, collectionID uint32, db *Database, compactionID string, vbUUIDs []uint64, terminator *base.SafeTerminator) error { +func attachmentCompactCleanupPhase(ctx context.Context, dataStore base.DataStore, collectionID uint32, db *Database, compactionID string, vbUUIDs []uint64, terminator *base.SafeTerminator) (string, error) { base.InfofCtx(ctx, base.KeyAll, "Starting third phase of attachment compaction (cleanup phase) with compactionID: %q", compactionID) compactionLoggingID := "Compaction Cleanup: " + compactionID @@ -495,31 +493,32 @@ func attachmentCompactCleanupPhase(ctx context.Context, dataStore base.DataStore clientOptions, err := getCompactionDCPClientOptions(collectionID, db.Options.GroupID, db.MetadataKeys.DCPCheckpointPrefix(db.Options.GroupID)) if err != nil { - return err + return "", err } clientOptions.InitialMetadata = base.BuildDCPMetadataSliceFromVBUUIDs(vbUUIDs) base.InfofCtx(ctx, base.KeyAll, "[%s] Starting DCP feed for cleanup phase of attachment compaction", compactionLoggingID) - dcpFeedKey := generateCompactionDCPStreamName(compactionID, CleanupPhase) + dcpFeedKey := GenerateCompactionDCPStreamName(compactionID, CleanupPhase) bucket, err := base.AsGocbV2Bucket(db.Bucket) if err != nil { - return err + return "", err } dcpClient, err := base.NewDCPClient(dcpFeedKey, callback, *clientOptions, bucket) if err != nil { base.WarnfCtx(ctx, "[%s] Failed to create attachment compaction DCP client! %v", compactionLoggingID, err) - return err + return "", err } + metadataKeyPrefix := dcpClient.GetMetadataKeyPrefix() doneChan, err := dcpClient.Start() if err != nil { base.WarnfCtx(ctx, "[%s] Failed to start attachment compaction DCP feed! %v", compactionLoggingID, err) // simplify close in CBG-2234 _ = dcpClient.Close() - return err + return metadataKeyPrefix, err } select { @@ -532,18 +531,18 @@ func attachmentCompactCleanupPhase(ctx context.Context, dataStore base.DataStore err = dcpClient.Close() if err != nil { base.WarnfCtx(ctx, "[%s] Failed to close attachment compaction DCP client! %v", compactionLoggingID, err) - return err + return metadataKeyPrefix, err } err = <-doneChan if err != nil { - return err + return metadataKeyPrefix, err } base.InfofCtx(ctx, base.KeyAll, "[%s] Cleanup phase of attachment compaction was terminated", compactionLoggingID) } - return err + return metadataKeyPrefix, err } // getCompactionIDSubDocPath is just a tiny helper func that just concatenates the subdoc path we're using to store @@ -566,7 +565,7 @@ func getCompactionDCPClientOptions(collectionID uint32, groupID string, prefix s } -func generateCompactionDCPStreamName(compactionID, compactionAction string) string { +func GenerateCompactionDCPStreamName(compactionID, compactionAction string) string { return fmt.Sprintf( "sg-%v:att_compaction:%v_%v", base.ProductAPIVersion, diff --git a/db/attachment_compaction_test.go b/db/attachment_compaction_test.go index 17a00bc56a..be96483a22 100644 --- a/db/attachment_compaction_test.go +++ b/db/attachment_compaction_test.go @@ -18,6 +18,7 @@ import ( "testing" "time" + "github.com/couchbase/gocbcore/v10" "github.com/couchbase/sync_gateway/base" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -60,7 +61,7 @@ func TestAttachmentMark(t *testing.T) { attKeys = append(attKeys, createDocWithInBodyAttachment(t, ctx, "inBodyDoc", []byte(`{}`), "attForInBodyRef", []byte(`{"val": "inBodyAtt"}`), databaseCollection)) terminator := base.NewSafeTerminator() - attachmentsMarked, _, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDb, t.Name(), terminator, &base.AtomicInt{}) + attachmentsMarked, _, _, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDb, t.Name(), terminator, &base.AtomicInt{}) assert.NoError(t, err) assert.Equal(t, int64(13), attachmentsMarked) @@ -196,7 +197,7 @@ func TestAttachmentCleanup(t *testing.T) { } terminator := base.NewSafeTerminator() - err := attachmentCompactCleanupPhase(ctx, dataStore, collectionID, testDb, t.Name(), nil, terminator) + _, err := attachmentCompactCleanupPhase(ctx, dataStore, collectionID, testDb, t.Name(), nil, terminator) assert.NoError(t, err) for _, docID := range singleMarkedAttIDs { @@ -234,6 +235,90 @@ func TestAttachmentCleanup(t *testing.T) { } +func TestAttachmentCleanupRollback(t *testing.T) { + if base.UnitTestUrlIsWalrus() { + t.Skip("This test only works against Couchbase Server since it requires DCP") + } + base.SetUpTestLogging(t, base.LevelInfo, base.KeyAll) + dbcOptions := DatabaseContextOptions{ + Scopes: GetScopesOptionsDefaultCollectionOnly(t), + } + testDb, ctx := SetupTestDBWithOptions(t, dbcOptions) + defer testDb.Close(ctx) + + var garbageVBUUID gocbcore.VbUUID = 1234 + collection := GetSingleDatabaseCollection(t, testDb.DatabaseContext) + dataStore := collection.dataStore + collectionID := collection.GetCollectionID() + + makeMarkedDoc := func(docid string, compactID string) { + err := dataStore.SetRaw(docid, 0, nil, []byte("{}")) + assert.NoError(t, err) + _, err = dataStore.SetXattr(docid, getCompactionIDSubDocPath(compactID), []byte(strconv.Itoa(int(time.Now().Unix())))) + assert.NoError(t, err) + } + + // create some marked attachments + singleMarkedAttIDs := make([]string, 0, 100) + for i := 0; i < 100; i++ { + docID := fmt.Sprintf("%s%s%d", base.AttPrefix, "marked", i) + makeMarkedDoc(docID, t.Name()) + singleMarkedAttIDs = append(singleMarkedAttIDs, docID) + } + + // assert there are marked attachments to clean up + for _, docID := range singleMarkedAttIDs { + var xattr map[string]interface{} + _, err := dataStore.GetXattr(docID, base.AttachmentCompactionXattrName, &xattr) + assert.NoError(t, err) + } + + bucket, err := base.AsGocbV2Bucket(testDb.Bucket) + require.NoError(t, err) + dcpFeedKey := GenerateCompactionDCPStreamName(t.Name(), CleanupPhase) + clientOptions, err := getCompactionDCPClientOptions(collectionID, testDb.Options.GroupID, testDb.MetadataKeys.DCPCheckpointPrefix(testDb.Options.GroupID)) + require.NoError(t, err) + dcpClient, err := base.NewDCPClient(dcpFeedKey, nil, *clientOptions, bucket) + require.NoError(t, err) + + // alter dcp metadata to feed into the compaction manager + vbUUID := base.GetVBUUIDs(dcpClient.GetMetadata()) + vbUUID[0] = uint64(garbageVBUUID) + + metadataKeys := base.NewMetadataKeys(testDb.Options.MetadataID) + testDb.AttachmentCompactionManager = NewAttachmentCompactionManager(dataStore, metadataKeys) + manager := AttachmentCompactionManager{CompactID: t.Name(), Phase: CleanupPhase, VBUUIDs: vbUUID} + testDb.AttachmentCompactionManager.Process = &manager + + terminator := base.NewSafeTerminator() + err = testDb.AttachmentCompactionManager.Process.Run(ctx, map[string]interface{}{"database": testDb}, testDb.AttachmentCompactionManager.UpdateStatusClusterAware, terminator) + require.NoError(t, err) + + err = WaitForConditionWithOptions(func() bool { + var status AttachmentManagerResponse + rawStatus, err := testDb.AttachmentCompactionManager.GetStatus() + assert.NoError(t, err) + err = base.JSONUnmarshal(rawStatus, &status) + require.NoError(t, err) + + if status.State == BackgroundProcessStateCompleted { + return true + } + + return false + }, 100, 1000) + require.NoError(t, err) + + // assert that the marked attachments have been "cleaned up" + for _, docID := range singleMarkedAttIDs { + var xattr map[string]interface{} + _, err := dataStore.GetXattr(docID, base.AttachmentCompactionXattrName, &xattr) + assert.Error(t, err) + assert.True(t, errors.Is(err, base.ErrXattrNotFound)) + } + +} + func TestAttachmentMarkAndSweepAndCleanup(t *testing.T) { base.SetUpTestLogging(t, base.LevelDebug, base.KeyAll) if base.UnitTestUrlIsWalrus() { @@ -271,7 +356,7 @@ func TestAttachmentMarkAndSweepAndCleanup(t *testing.T) { } terminator := base.NewSafeTerminator() - attachmentsMarked, vbUUIDS, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDb, t.Name(), terminator, &base.AtomicInt{}) + attachmentsMarked, vbUUIDS, _, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDb, t.Name(), terminator, &base.AtomicInt{}) assert.NoError(t, err) assert.Equal(t, int64(10), attachmentsMarked) @@ -293,7 +378,7 @@ func TestAttachmentMarkAndSweepAndCleanup(t *testing.T) { } } - err = attachmentCompactCleanupPhase(ctx, dataStore, collectionID, testDb, t.Name(), vbUUIDS, terminator) + _, err = attachmentCompactCleanupPhase(ctx, dataStore, collectionID, testDb, t.Name(), vbUUIDS, terminator) assert.NoError(t, err) for _, attDocKey := range attKeys { @@ -620,7 +705,7 @@ func TestAttachmentDifferentVBUUIDsBetweenPhases(t *testing.T) { // Run mark phase as usual terminator := base.NewSafeTerminator() - _, vbUUIDs, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDB, t.Name(), terminator, &base.AtomicInt{}) + _, vbUUIDs, _, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDB, t.Name(), terminator, &base.AtomicInt{}) assert.NoError(t, err) // Manually modify a vbUUID and ensure the Sweep phase errors @@ -891,7 +976,7 @@ func TestAttachmentCompactIncorrectStat(t *testing.T) { stat := &base.AtomicInt{} count := int64(0) go func() { - attachmentCount, _, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDb, "mark", terminator, stat) + attachmentCount, _, _, err := attachmentCompactMarkPhase(ctx, dataStore, collectionID, testDb, "mark", terminator, stat) atomic.StoreInt64(&count, attachmentCount) require.NoError(t, err) }() diff --git a/db/background_mgr_attachment_compaction.go b/db/background_mgr_attachment_compaction.go index d085eac459..de28b8a0e8 100644 --- a/db/background_mgr_attachment_compaction.go +++ b/db/background_mgr_attachment_compaction.go @@ -10,9 +10,11 @@ package db import ( "context" + "errors" "sync" "time" + "github.com/couchbase/gocbcore/v10" "github.com/couchbase/sync_gateway/base" "github.com/google/uuid" ) @@ -100,6 +102,23 @@ func (a *AttachmentCompactionManager) Init(ctx context.Context, options map[stri return newRunInit() } +func (a *AttachmentCompactionManager) PurgeDCPMetadata(ctx context.Context, datastore base.DataStore, database *Database, metadataKeyPrefix string) error { + + bucket, err := base.AsGocbV2Bucket(database.Bucket) + if err != nil { + return err + } + numVbuckets, err := bucket.GetMaxVbno() + if err != nil { + return err + } + + metadata := base.NewDCPMetadataCS(datastore, numVbuckets, base.DefaultNumWorkers, metadataKeyPrefix) + base.InfofCtx(ctx, base.KeyDCP, "purging persisted dcp metadata for attachment compaction run %s", a.CompactID) + metadata.Purge(base.DefaultNumWorkers) + return nil +} + func (a *AttachmentCompactionManager) Run(ctx context.Context, options map[string]interface{}, persistClusterStatusCallback updateStatusCallbackFunc, terminator *base.SafeTerminator) error { database := options["database"].(*Database) @@ -110,6 +129,7 @@ func (a *AttachmentCompactionManager) Run(ctx context.Context, options map[strin // but we'll consider that a follow-up enhancement to point this compaction operation at arbitrary collections. dataStore := database.Bucket.DefaultDataStore() collectionID := base.DefaultCollectionID + var metadataKeyPrefix string persistClusterStatus := func() { err := persistClusterStatusCallback() @@ -120,15 +140,29 @@ func (a *AttachmentCompactionManager) Run(ctx context.Context, options map[strin defer persistClusterStatus() + var rollbackErr gocbcore.DCPRollbackError + // Need to check the current phase in the event we are resuming - No need to run mark again if we got as far as // cleanup last time... var err error switch a.Phase { case "mark", "": a.SetPhase("mark") - persistClusterStatus() - _, a.VBUUIDs, err = attachmentCompactMarkPhase(ctx, dataStore, collectionID, database, a.CompactID, terminator, &a.MarkedAttachments) + worker := func() (shouldRetry bool, err error, value interface{}) { + persistClusterStatus() + _, a.VBUUIDs, metadataKeyPrefix, err = attachmentCompactMarkPhase(ctx, dataStore, collectionID, database, a.CompactID, terminator, &a.MarkedAttachments) + if err != nil { + shouldRetry, err = a.handleAttachmentCompactionRollbackError(ctx, options, dataStore, database, err, MarkPhase, metadataKeyPrefix) + } + return shouldRetry, err, nil + } + // retry loop for handling a rollback during mark phase of compaction process + err, _ = base.RetryLoop("attachmentCompactMarkPhase", worker, base.CreateMaxDoublingSleeperFunc(25, 100, 10000)) if err != nil || terminator.IsClosed() { + if errors.As(err, &rollbackErr) || errors.Is(err, base.ErrVbUUIDMismatch) { + // log warning to show we hit max number of retries + base.WarnfCtx(ctx, "maximum retry attempts reached on mark phase: %v", err) + } return err } fallthrough @@ -142,9 +176,21 @@ func (a *AttachmentCompactionManager) Run(ctx context.Context, options map[strin fallthrough case "cleanup": a.SetPhase("cleanup") - persistClusterStatus() - err := attachmentCompactCleanupPhase(ctx, dataStore, collectionID, database, a.CompactID, a.VBUUIDs, terminator) + worker := func() (shouldRetry bool, err error, value interface{}) { + persistClusterStatus() + metadataKeyPrefix, err = attachmentCompactCleanupPhase(ctx, dataStore, collectionID, database, a.CompactID, a.VBUUIDs, terminator) + if err != nil { + shouldRetry, err = a.handleAttachmentCompactionRollbackError(ctx, options, dataStore, database, err, CleanupPhase, metadataKeyPrefix) + } + return shouldRetry, err, nil + } + // retry loop for handling a rollback during mark phase of compaction process + err, _ = base.RetryLoop("attachmentCompactCleanupPhase", worker, base.CreateMaxDoublingSleeperFunc(25, 100, 10000)) if err != nil || terminator.IsClosed() { + if errors.As(err, &rollbackErr) || errors.Is(err, base.ErrVbUUIDMismatch) { + // log warning to show we hit max number of retries + base.WarnfCtx(ctx, "maximum retry attempts reached on cleanup phase: %v", err) + } return err } } @@ -153,6 +199,36 @@ func (a *AttachmentCompactionManager) Run(ctx context.Context, options map[strin return nil } +func (a *AttachmentCompactionManager) handleAttachmentCompactionRollbackError(ctx context.Context, options map[string]interface{}, dataStore base.DataStore, database *Database, err error, phase, keyPrefix string) (bool, error) { + var rollbackErr gocbcore.DCPRollbackError + if errors.As(err, &rollbackErr) || errors.Is(err, base.ErrVbUUIDMismatch) { + base.InfofCtx(ctx, base.KeyDCP, "rollback indicated on %s phase of attachment compaction, resetting the task", phase) + // to rollback any phase for attachment compaction we need to purge all persisted dcp metadata + err = a.PurgeDCPMetadata(ctx, dataStore, database, keyPrefix) + if err != nil { + base.WarnfCtx(ctx, "error occurred during purging of dcp metadata: %w", err) + return false, err + } + if phase == MarkPhase { + // initialise new compaction run as we want to start the phase mark again in event of rollback + err = a.Init(ctx, options, nil) + if err != nil { + base.WarnfCtx(ctx, "error on initialization of new run after rollback has been indicated, %w", err) + return false, err + } + } else { + // we only handle rollback for mark and cleanup so if we call here it will be for cleanup phase + // we need to clear the vbUUID's on the manager for cleanup phase otherwise we will end up in loop of constant rollback + // as these are used for the initial metadata on the client + a.VBUUIDs = nil + } + // we should try again if it is rollback error + return true, nil + } + // if error isn't rollback then assume it's not recoverable + return false, err +} + func (a *AttachmentCompactionManager) SetPhase(phase string) { a.lock.Lock() defer a.lock.Unlock() diff --git a/db/blip.go b/db/blip.go index 39a927e387..198f022729 100644 --- a/db/blip.go +++ b/db/blip.go @@ -57,6 +57,8 @@ func NewSGBlipContextWithProtocols(ctx context.Context, id string, protocol ...s bc, err = blip.NewContextCustomID(id, protocol...) } + bc.MaxDispatchedBytes = BlipMaxIncomingBytesBeingDispatched + bc.LogMessages = base.LogDebugEnabled(base.KeyWebSocket) bc.LogFrames = base.LogDebugEnabled(base.KeyWebSocketFrame) bc.Logger = defaultBlipLogger(ctx) diff --git a/db/blip_handler.go b/db/blip_handler.go index 109bbeb871..aed8bc2c94 100644 --- a/db/blip_handler.go +++ b/db/blip_handler.go @@ -51,8 +51,25 @@ var kConnectedClientHandlersByProfile = map[string]blipHandlerFunc{ MessageGraphQL: userBlipHandler((*blipHandler).handleGraphQL), } +// max number of concurrent handlers for each message type. Default is 0 meaning 'unlimited'. +var handlerConcurrencyByProfile = map[string]int{ + MessageChanges: 1, + MessageProposeChanges: 1, + MessageRev: 16, + MessageGetAttachment: 10, + MessageGetRev: 10, + MessagePutRev: 10, +} + +// Handlers that run immediately, i.e. before the message has completely arrived. +// This guarantees they are called in message order. +var handlerImmediacyByProfile = map[string]bool{ + MessageChanges: true, + MessageProposeChanges: true, +} + // maxInFlightChangesBatches is the maximum number of in-flight changes batches a client is allowed to send without being throttled. -const maxInFlightChangesBatches = 2 +const maxInFlightChangesBatches = 4 type blipHandler struct { *BlipSyncContext @@ -295,6 +312,19 @@ func (bh *blipHandler) handleSubChanges(rq *blip.Message) error { continuous := subChangesParams.continuous() + requestPlusSeq := uint64(0) + // If non-continuous, check whether requestPlus handling is set for request or via database config + if continuous == false { + useRequestPlus := subChangesParams.requestPlus(bh.db.Options.ChangesRequestPlus) + if useRequestPlus { + seq, requestPlusErr := bh.db.GetRequestPlusSequence() + if requestPlusErr != nil { + return base.HTTPErrorf(http.StatusServiceUnavailable, "Unable to retrieve current sequence for requestPlus=true: %v", requestPlusErr) + } + requestPlusSeq = seq + } + } + // Start asynchronous changes goroutine go func() { // Pull replication stats by type @@ -325,6 +355,7 @@ func (bh *blipHandler) handleSubChanges(rq *blip.Message) error { clientType: clientType, ignoreNoConflicts: clientType == clientTypeSGR2, // force this side to accept a "changes" message, even in no conflicts mode for SGR2. changesCtx: collectionCtx.changesCtx, + requestPlusSeq: requestPlusSeq, }) base.DebugfCtx(bh.loggingCtx, base.KeySyncMsg, "#%d: Type:%s --> Time:%v", bh.serialNumber, rq.Profile(), time.Since(startTime)) }() @@ -358,6 +389,7 @@ type sendChangesOptions struct { revocations bool ignoreNoConflicts bool changesCtx context.Context + requestPlusSeq uint64 } type changesDeletedFlag uint @@ -385,14 +417,15 @@ func (bh *blipHandler) sendChanges(sender *blip.Sender, opts *sendChangesOptions base.InfofCtx(bh.loggingCtx, base.KeySync, "Sending changes since %v", opts.since) options := ChangesOptions{ - Since: opts.since, - Conflicts: false, // CBL 2.0/BLIP don't support branched rev trees (LiteCore #437) - Continuous: opts.continuous, - ActiveOnly: opts.activeOnly, - Revocations: opts.revocations, - LoggingCtx: bh.loggingCtx, - clientType: opts.clientType, - ChangesCtx: opts.changesCtx, + Since: opts.since, + Conflicts: false, // CBL 2.0/BLIP don't support branched rev trees (LiteCore #437) + Continuous: opts.continuous, + ActiveOnly: opts.activeOnly, + Revocations: opts.revocations, + LoggingCtx: bh.loggingCtx, + clientType: opts.clientType, + ChangesCtx: opts.changesCtx, + RequestPlusSeq: opts.requestPlusSeq, } channelSet := opts.channels @@ -519,6 +552,7 @@ func (bh *blipHandler) buildChangesRow(change *ChangeEntry, revID string) []inte func (bh *blipHandler) sendBatchOfChanges(sender *blip.Sender, changeArray [][]interface{}, ignoreNoConflicts bool) error { outrq := blip.NewRequest() + outrq.SetUrgent(true) outrq.SetProfile("changes") if ignoreNoConflicts { outrq.Properties[ChangesMessageIgnoreNoConflicts] = trueProperty @@ -532,42 +566,43 @@ func (bh *blipHandler) sendBatchOfChanges(sender *blip.Sender, changeArray [][]i } if len(changeArray) > 0 { + // Wait before sending, if client has not caught up: + bh.inFlightChangesThrottle <- struct{}{} + // Check for user updates before creating the db copy for handleChangesResponse if err := bh.refreshUser(); err != nil { return err } - handleChangesResponseDbCollection, err := bh.copyDatabaseCollectionWithUser(bh.collectionIdx) - if err != nil { - return err - } - sendTime := time.Now() - if !bh.sendBLIPMessage(sender, outrq) { - return ErrClosedBLIPSender - } - - bh.inFlightChangesThrottle <- struct{}{} atomic.AddInt64(&bh.changesPendingResponseCount, 1) - bh.replicationStats.SendChangesCount.Add(int64(len(changeArray))) - // Spawn a goroutine to await the client's response: - go func(bh *blipHandler, sender *blip.Sender, response *blip.Message, changeArray [][]interface{}, sendTime time.Time, dbCollection *DatabaseCollectionWithUser) { - if err := bh.handleChangesResponse(sender, response, changeArray, sendTime, dbCollection, bh.collectionIdx); err != nil { - base.WarnfCtx(bh.loggingCtx, "Error from bh.handleChangesResponse: %v", err) - if bh.fatalErrorCallback != nil { - bh.fatalErrorCallback(err) + + // await the client's response: + outrq.OnResponse(func(response *blip.Message) { + bh.threadPool.Go(func() { + if err := bh.handleChangesResponse(sender, response, changeArray, sendTime, bh.collectionIdx); err != nil { + base.WarnfCtx(bh.loggingCtx, "Error from bh.handleChangesResponse: %v", err) + if bh.fatalErrorCallback != nil { + bh.fatalErrorCallback(err) + } } - } + base.InfofCtx(bh.loggingCtx, base.KeySync, "...sent requested revs, from %s", changeArray[0][0].(SequenceID).String()) - // Sent all of the revs for this changes batch, allow another changes batch to be sent. - select { - case <-bh.inFlightChangesThrottle: - case <-bh.terminator: - } + // Sent all of the revs for this changes batch, allow another changes batch to be sent. + select { + case <-bh.inFlightChangesThrottle: + case <-bh.terminator: + } + + atomic.AddInt64(&bh.changesPendingResponseCount, -1) + }) + }) + + if !bh.sendBLIPMessage(sender, outrq) { + return ErrClosedBLIPSender + } - atomic.AddInt64(&bh.changesPendingResponseCount, -1) - }(bh, sender, outrq.Response(), changeArray, sendTime, handleChangesResponseDbCollection) } else { outrq.SetNoReply(true) if !bh.sendBLIPMessage(sender, outrq) { @@ -745,6 +780,7 @@ func (bh *blipHandler) handleProposeChanges(rq *blip.Message) error { output := bytes.NewBuffer(make([]byte, 0, 5*len(changeList))) output.Write([]byte("[")) nWritten := 0 + nRequested := 0 // proposeChanges stats startTime := time.Now() @@ -764,7 +800,10 @@ func (bh *blipHandler) handleProposeChanges(rq *blip.Message) error { if status == ProposedRev_OK_IsNew { // Remember that the doc doesn't exist locally, in order to optimize the upcoming Put: bh.collectionCtx.notePendingInsertion(docID) - } else if status != ProposedRev_OK { + nRequested++ + } else if status == ProposedRev_OK { + nRequested++ + } else { // Reject the proposed change. // Skip writing trailing zeroes; but if we write a number afterwards we have to catch up if nWritten > 0 { @@ -789,6 +828,12 @@ func (bh *blipHandler) handleProposeChanges(rq *blip.Message) error { } } output.Write([]byte("]")) + + if nRequested > 0 { + // Notify the sequenceAllocator it's going to be asked for nRequested sequences soon: + bh.db.sequences.reserveRequest(uint64(nRequested)) + } + response := rq.Response() if bh.sgCanUseDeltas { base.DebugfCtx(bh.loggingCtx, base.KeyAll, "Setting deltas=true property on proposeChanges response") @@ -801,51 +846,54 @@ func (bh *blipHandler) handleProposeChanges(rq *blip.Message) error { // ////// DOCUMENTS: -func (bsc *BlipSyncContext) sendRevAsDelta(sender *blip.Sender, docID, revID string, deltaSrcRevID string, seq SequenceID, knownRevs map[string]bool, maxHistory int, handleChangesResponseCollection *DatabaseCollectionWithUser, collectionIdx *int) error { +// Returns false, nil if there is no error but the rev can't be sent as a delta. +func (bsc *BlipSyncContext) sendRevAsDelta(collection *DatabaseCollectionWithUser, r *revToSend, knownRevs map[string]bool, deltaSrcRevID string) (bool, error) { bsc.replicationStats.SendRevDeltaRequestedCount.Add(1) - revDelta, redactedRev, err := handleChangesResponseCollection.GetDelta(bsc.loggingCtx, docID, deltaSrcRevID, revID) + revDelta, redactedRev, err := collection.GetDelta(bsc.loggingCtx, r.docID, deltaSrcRevID, r.revID) if err == ErrForbidden { // nolint: gocritic // can't convert if/else if to switch since base.IsFleeceDeltaError is not switchable - return err + return false, err } else if base.IsFleeceDeltaError(err) { // Something went wrong in the diffing library. We want to know about this! - base.WarnfCtx(bsc.loggingCtx, "Falling back to full body replication. Error generating delta from %s to %s for key %s - err: %v", deltaSrcRevID, revID, base.UD(docID), err) - return bsc.sendRevision(sender, docID, revID, seq, knownRevs, maxHistory, handleChangesResponseCollection, collectionIdx) + base.WarnfCtx(bsc.loggingCtx, "Falling back to full body replication. Error generating delta from %s to %s for key %s - err: %v", deltaSrcRevID, r.revID, base.UD(r.docID), err) + return false, nil } else if err == base.ErrDeltaSourceIsTombstone { - base.TracefCtx(bsc.loggingCtx, base.KeySync, "Falling back to full body replication. Delta source %s is tombstone. Unable to generate delta to %s for key %s", deltaSrcRevID, revID, base.UD(docID)) - return bsc.sendRevision(sender, docID, revID, seq, knownRevs, maxHistory, handleChangesResponseCollection, collectionIdx) + base.TracefCtx(bsc.loggingCtx, base.KeySync, "Falling back to full body replication. Delta source %s is tombstone. Unable to generate delta to %s for key %s", deltaSrcRevID, r.revID, base.UD(r.docID)) + return false, nil } else if err != nil { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Falling back to full body replication. Couldn't get delta from %s to %s for key %s - err: %v", deltaSrcRevID, revID, base.UD(docID), err) - return bsc.sendRevision(sender, docID, revID, seq, knownRevs, maxHistory, handleChangesResponseCollection, collectionIdx) + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Falling back to full body replication. Couldn't get delta from %s to %s for key %s - err: %v", deltaSrcRevID, r.revID, base.UD(r.docID), err) + return false, nil } if redactedRev != nil { - history := toHistory(redactedRev.History, knownRevs, maxHistory) - properties := blipRevMessageProperties(history, redactedRev.Deleted, seq) - return bsc.sendRevisionWithProperties(sender, docID, revID, collectionIdx, redactedRev.BodyBytes, nil, properties, seq, nil) + history := toHistory(redactedRev.History, knownRevs, r.maxHistory) + properties := blipRevMessageProperties(history, redactedRev.Deleted, r.seq) + return true, bsc.sendRevisionWithProperties(r, redactedRev.BodyBytes, nil, properties, nil) } if revDelta == nil { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Falling back to full body replication. Couldn't get delta from %s to %s for key %s", deltaSrcRevID, revID, base.UD(docID)) - return bsc.sendRevision(sender, docID, revID, seq, knownRevs, maxHistory, handleChangesResponseCollection, collectionIdx) + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Falling back to full body replication. Couldn't get delta from %s to %s for key %s", deltaSrcRevID, r.revID, base.UD(r.docID)) + return false, nil } resendFullRevisionFunc := func() error { - base.InfofCtx(bsc.loggingCtx, base.KeySync, "Resending revision as full body. Peer couldn't process delta %s from %s to %s for key %s", base.UD(revDelta.DeltaBytes), deltaSrcRevID, revID, base.UD(docID)) - return bsc.sendRevision(sender, docID, revID, seq, knownRevs, maxHistory, handleChangesResponseCollection, collectionIdx) + base.InfofCtx(bsc.loggingCtx, base.KeySync, "Resending revision as full body. Peer couldn't process delta %s from %s to %s for key %s", base.UD(revDelta.DeltaBytes), deltaSrcRevID, r.revID, base.UD(r.docID)) + r.useDelta = false + bsc.revSender.addRevs([]*revToSend{r}) + return nil } - base.TracefCtx(bsc.loggingCtx, base.KeySync, "docID: %s - delta: %v", base.UD(docID), base.UD(string(revDelta.DeltaBytes))) - if err := bsc.sendDelta(sender, docID, collectionIdx, deltaSrcRevID, revDelta, seq, resendFullRevisionFunc); err != nil { - return err + base.TracefCtx(bsc.loggingCtx, base.KeySync, "r.docID: %s - delta: %v", base.UD(r.docID), base.UD(string(revDelta.DeltaBytes))) + if err := bsc.sendDelta(r, deltaSrcRevID, revDelta, resendFullRevisionFunc); err != nil { + return false, err } // We'll consider this one doc read for collection stats purposes, since GetDelta doesn't go through the normal getRev codepath. - handleChangesResponseCollection.collectionStats.NumDocReads.Add(1) - handleChangesResponseCollection.collectionStats.DocReadsBytes.Add(int64(len(revDelta.DeltaBytes))) + collection.collectionStats.NumDocReads.Add(1) + collection.collectionStats.DocReadsBytes.Add(int64(len(revDelta.DeltaBytes))) bsc.replicationStats.SendRevDeltaSentCount.Add(1) - return nil + return true, nil } func (bh *blipHandler) handleNoRev(rq *blip.Message) error { @@ -1204,7 +1252,9 @@ func (bh *blipHandler) handleProveAttachment(rq *blip.Message) error { return base.HTTPErrorf(http.StatusBadRequest, "no digest sent with proveAttachment") } - attData, err := bh.collection.GetAttachment(base.AttPrefix + digest) + allowedAttachment := bh.allowedAttachment(digest) + attachmentKey := MakeAttachmentKey(allowedAttachment.version, allowedAttachment.docID, digest) + attData, err := bh.collection.GetAttachment(attachmentKey) if err != nil { if bh.clientType == BLIPClientTypeSGR2 { return ErrAttachmentNotFound @@ -1294,7 +1344,7 @@ func (bh *blipHandler) sendGetAttachment(sender *blip.Sender, docID string, name return nil, ErrClosedBLIPSender } - resp := outrq.Response() + resp := outrq.Response() // TODO: Don't block the handler! respBody, err := resp.Body() if err != nil { @@ -1340,7 +1390,7 @@ func (bh *blipHandler) sendProveAttachment(sender *blip.Sender, docID, name, dig return ErrClosedBLIPSender } - resp := outrq.Response() + resp := outrq.Response() // TODO: Don't block the handler! body, err := resp.Body() if err != nil { diff --git a/db/blip_rev_sender.go b/db/blip_rev_sender.go new file mode 100644 index 0000000000..3edb493087 --- /dev/null +++ b/db/blip_rev_sender.go @@ -0,0 +1,170 @@ +/* +Copyright 2023-Present Couchbase, Inc. + +Use of this software is governed by the Business Source License included in +the file licenses/BSL-Couchbase.txt. As of the Change Date specified in that +file, in accordance with the Business Source License, use of this software will +be governed by the Apache License, Version 2.0, included in the file +licenses/APL2.txt. +*/ + +package db + +import ( + "sync" + "sync/atomic" + "time" + + "github.com/couchbase/go-blip" + "github.com/couchbase/sync_gateway/base" +) + +// A queue that takes `revToSend` structs that define a revision to send to the peer, +// and sends those to BlipSyncContext's `sendRevision` and `sendRevAsDelta` methods at a rate +// that ensures only a limited number of outgoing in-memory BLIP "rev" messages are present at once. +type blipRevSender struct { + bsc *BlipSyncContext // The main sync object [const] + maxActiveCount int // Max number of messages I can be sending at once [const] + maxActiveBytes int64 // Max total size of messages I'm sending [const] + mutex sync.Mutex // Synchronizes access to queue,activeCount + queue []*revToSend // Ordered queue of revisions to be sent [synced] + activeCount int // Number of revs being fetched, processed, sent [synced] + sendingCountA int64 // Number of BLIP messages being sent to the socket [atomic] + sendingBytesA int64 // Total size of BLIP messages I'm sending [atomic] +} + +// Captures the information about a "rev" message to send. Queued by blipRevSender. +type revToSend struct { + seq SequenceID // Sequence + docID string // Document ID to send + revID string // Revision ID to send + knownRevs []any // RevIDs the client already has + maxHistory int // Max length of rev history to send + useDelta bool // If true, send as delta if possible + collectionIdx *int // Identifies which collection + sender *blip.Sender // BLIP sender + timestamp time.Time // When the 'changes' response was received + messageLen int // Length of BLIP message; must be filled in when message sent +} + +// Creates a new blipRevSender. +// - `maxActiveCount` is the maximum number of revisions that can be actively processed: +// fetched from the database, converted to 'rev' messages, and being written to the socket. +// - `maxActiveBytes` is the (approximate) maximum total size in bytes of those messages, +// or 0 for no size limit. +func newBlipRevSender(blipSyncContext *BlipSyncContext, maxActiveCount int, maxActiveBytes int64) *blipRevSender { + return &blipRevSender{ + bsc: blipSyncContext, + maxActiveCount: maxActiveCount, + maxActiveBytes: maxActiveBytes, + } +} + +// Queues revisions to send. +func (s *blipRevSender) addRevs(revs []*revToSend) { + s.mutex.Lock() + defer s.mutex.Unlock() + + s.queue = append(s.queue, revs...) + s._sendMore() +} + +// To be called by the BlipSyncContext when it's finished writing a 'rev' message to the socket. +func (s *blipRevSender) completedRev(rev *revToSend) { + messageLen := rev.messageLen + s.bsc.threadPool.Go(func() { + s.mutex.Lock() + defer s.mutex.Unlock() + + s.activeCount-- + atomic.AddInt64(&s.sendingCountA, int64(-1)) + atomic.AddInt64(&s.sendingBytesA, int64(-messageLen)) + s._sendMore() + }) +} + +func (s *blipRevSender) _sendMore() { + // Mutex must be locked when calling this! + + // Get the current total size, and estimate the size of a message: + curSendingCount := atomic.LoadInt64(&s.sendingCountA) + estSendingBytes := atomic.LoadInt64(&s.sendingBytesA) + var estMessageSize int64 = 4096 + if curSendingCount > 0 { + estMessageSize = estSendingBytes / curSendingCount + } + + n := 0 + for s.activeCount < s.maxActiveCount && len(s.queue) > 0 { + if s.maxActiveBytes > 0 && estSendingBytes+estMessageSize > s.maxActiveBytes { + // Stop if the byte count is too high + break + } + // Send the next revision (asynchronously): + next := s.queue[0] + s.queue = s.queue[1:] + s.activeCount++ + s.bsc.threadPool.Go(func() { s._sendNow(next) }) + estSendingBytes += estMessageSize + n++ + } + // if len(s.queue) > 0 { + // base.WarnfCtx(s.bsc.loggingCtx, "_sendMore: stopping after %d, at %d bytes (est), %d messages ... avg msg size is %d", n, estSendingBytes, s.activeCount, estMessageSize) + // } +} + +func (s *blipRevSender) _sendNow(rev *revToSend) { + // Sends a 'rev' message, or if that fails, sends a 'norev'; then updates stats. + if err := s._trySendNow(rev); err != nil { + if base.IsDocNotFoundError(err) { + // If rev isn't available, send a 'norev'. This is important for client bookkeeping. + err = s.bsc.sendNoRev(rev, err) + } + if err != nil { + base.ErrorfCtx(s.bsc.loggingCtx, "Error sending 'rev' over BLIP: %s", err) + if cb := s.bsc.fatalErrorCallback; cb != nil { + cb(err) + } + } + } + + atomic.AddInt64(&s.sendingCountA, int64(1)) + atomic.AddInt64(&s.sendingBytesA, int64(rev.messageLen)) + + latency := time.Since(rev.timestamp).Nanoseconds() + s.bsc.replicationStats.HandleChangesSendRevCount.Add(1) + s.bsc.replicationStats.HandleChangesSendRevLatency.Add(latency) +} + +func (s *blipRevSender) _trySendNow(rev *revToSend) error { + // Sends a 'rev' message or returns an error. (Subroutine of _sendNow.) + + // Convert knownRevs to a set of strings: + knownRevs := make(map[string]bool, len(rev.knownRevs)) + var deltaSrcRevID *string + for _, knownRev := range rev.knownRevs { + if revID, ok := knownRev.(string); ok { + knownRevs[revID] = true + if deltaSrcRevID == nil { + // The first element of the knownRevs array is the one to use as deltaSrc + deltaSrcRevID = &revID + } + } else { + base.ErrorfCtx(s.bsc.loggingCtx, "Invalid knownRevs in response to 'changes' message") + } + } + rev.knownRevs = nil // (no longer needed) + + collection, err := s.bsc.copyDatabaseCollectionWithUser(rev.collectionIdx) + if err != nil { + return err + } + if rev.useDelta && deltaSrcRevID != nil { + sent, err := s.bsc.sendRevAsDelta(collection, rev, knownRevs, *deltaSrcRevID) + if sent || err != nil { + return err + } + // if rev can't be sent as a delta, send it as a full revision... + } + return s.bsc.sendRevision(collection, rev, knownRevs) +} diff --git a/db/blip_sync_context.go b/db/blip_sync_context.go index 85c7e285a0..d09173b5e1 100644 --- a/db/blip_sync_context.go +++ b/db/blip_sync_context.go @@ -19,6 +19,7 @@ import ( "runtime/debug" "strconv" "sync" + "sync/atomic" "time" "github.com/couchbase/go-blip" @@ -26,9 +27,23 @@ import ( ) const ( - // Blip default vals + // Number of revisions to include in a 'changes' message BlipDefaultBatchSize = uint64(200) BlipMinimumBatchSize = uint64(10) // Not in the replication spec - is this required? + + // Number of goroutines handling incoming BLIP requests (and other tasks) + BlipThreadPoolSize = 5 + + // Maximum total size of incoming BLIP requests that are currently being dispatched and handled. + // Above this amount, the BLIP engine stops reading from the WebSocket, applying back-pressure + // to the client and keeping memory usage down. + BlipMaxIncomingBytesBeingDispatched = 100000 // bytes + + // Max number of outgoing revisions in memory being sent + BlipMaxRevsSending = 50 + + // Max total size (bytes) of outgoing revisions in memory being sent + BlipMaxRevsLengthSending = 100 * 1000 ) var ErrClosedBLIPSender = errors.New("use of closed BLIP sender") @@ -44,10 +59,13 @@ func NewBlipSyncContext(ctx context.Context, bc *blip.Context, db *Database, con replicationStats: replicationStats, inFlightChangesThrottle: make(chan struct{}, maxInFlightChangesBatches), collections: &blipCollections{}, + threadPool: blip.ThreadPool{Concurrency: BlipThreadPoolSize}, } if bsc.replicationStats == nil { bsc.replicationStats = NewBlipSyncStats() } + bsc.stats.lastReportTime.Store(time.Now().UnixMilli()) + bsc.revSender = newBlipRevSender(bsc, BlipMaxRevsSending, BlipMaxRevsLengthSending) if u := db.User(); u != nil { bsc.userName = u.Name() @@ -58,22 +76,25 @@ func NewBlipSyncContext(ctx context.Context, bc *blip.Context, db *Database, con } // Register default handlers - bc.DefaultHandler = bsc.NotFoundHandler bc.FatalErrorHandler = func(err error) { base.InfofCtx(ctx, base.KeyHTTP, "%s: --> BLIP+WebSocket connection error: %v", contextID, err) } + dispatcher := &blip.ByProfileDispatcher{} + dispatcher.SetDefaultHandler(bsc.NotFoundHandler) + // Register 2.x replicator handlers for profile, handlerFn := range handlersByProfile { - bsc.register(profile, handlerFn) + bsc.register(dispatcher, profile, handlerFn) } - if db.Options.UnsupportedOptions.ConnectedClient { // Register Connected Client handlers for profile, handlerFn := range kConnectedClientHandlersByProfile { - bsc.register(profile, handlerFn) + bsc.register(dispatcher, profile, handlerFn) } } + bsc.blipContext.RequestHandler = dispatcher.Dispatch + bsc.threadPool.Start() return bsc } @@ -82,6 +103,7 @@ func NewBlipSyncContext(ctx context.Context, bc *blip.Context, db *Database, con // This connection remains open until the client closes it, and can receive any number of requests. type BlipSyncContext struct { blipContext *blip.Context + threadPool blip.ThreadPool blipContextDb *Database // 'master' database instance for the replication, used as source when creating handler-specific databases loggingCtx context.Context // logging context for connection dbUserLock sync.RWMutex // Must be held when refreshing the db user @@ -115,6 +137,16 @@ type BlipSyncContext struct { readOnly bool collections *blipCollections // all collections handled by blipSyncContext, implicit or via GetCollections + revSender *blipRevSender // schedules sending 'rev' messages + stats blipSyncStats // internal structure to store stats +} + +// blipSyncStats has support structures to support reporting stats at regular interval +type blipSyncStats struct { + bytesSent atomic.Uint64 // Total bytes sent to client + bytesReceived atomic.Uint64 // Total bytes received from client + lastReportTime atomic.Int64 // last time reported by time.Time // Last time blip stats were reported + lock sync.Mutex } // AllowedAttachment contains the metadata for handling allowed attachments @@ -131,10 +163,11 @@ func (bsc *BlipSyncContext) SetClientType(clientType BLIPSyncContextClientType) // Registers a BLIP handler including the outer-level work of logging & error handling. // Includes the outer handler as a nested function. -func (bsc *BlipSyncContext) register(profile string, handlerFn func(*blipHandler, *blip.Message) error) { +func (bsc *BlipSyncContext) register(dispatcher *blip.ByProfileDispatcher, profile string, handlerFn func(*blipHandler, *blip.Message) error) { // Wrap the handler function with a function that adds handling needed by all handlers - handlerFnWrapper := func(rq *blip.Message) { + handler := func(rq *blip.Message, onComplete func()) { + defer onComplete() // Recover to log panic from handlers and repanic for go-blip response handling defer func() { @@ -195,10 +228,23 @@ func (bsc *BlipSyncContext) register(profile string, handlerFn func(*blipHandler respBody, _ := resp.Body() base.TracefCtx(bsc.loggingCtx, base.KeySyncMsg, "Recv Rsp %s: Body: '%s' Properties: %v", resp, base.UD(respBody), base.UD(resp.Properties)) } + + bsc.reportStats(false) } - bsc.blipContext.HandlerForProfile[profile] = handlerFnWrapper + // Handlers run on the thread pool + handler = bsc.threadPool.WrapAsyncHandler(handler) + if concurrency := handlerConcurrencyByProfile[profile]; concurrency > 0 { + // Limit number of concurrently running handlers for some profiles: + throttle := blip.ThrottlingDispatcher{ + Handler: handler, + MaxConcurrency: concurrency, + } + handler = throttle.Dispatch + } + + dispatcher.SetHandler(profile, handler) } func (bsc *BlipSyncContext) Close() { @@ -214,15 +260,17 @@ func (bsc *BlipSyncContext) Close() { collection.changesCtxCancel() } + bsc.reportStats(true) + bsc.threadPool.Stop() close(bsc.terminator) }) } // NotFoundHandler is used for unknown requests -func (bsc *BlipSyncContext) NotFoundHandler(rq *blip.Message) { +func (bsc *BlipSyncContext) NotFoundHandler(rq *blip.Message, onComplete func()) { base.InfofCtx(bsc.loggingCtx, base.KeySync, "%s Type:%q", rq, rq.Profile()) base.InfofCtx(bsc.loggingCtx, base.KeySync, "%s --> 404 Unknown profile", rq) - blip.Unhandled(rq) + blip.UnhandledAsync(rq, onComplete) } func (bsc *BlipSyncContext) copyContextDatabase() *Database { @@ -249,7 +297,7 @@ func (bsc *BlipSyncContext) _copyContextDatabase() *Database { } // Handles the response to a pushed "changes" message, i.e. the list of revisions the client wants -func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response *blip.Message, changeArray [][]interface{}, requestSent time.Time, handleChangesResponseDbCollection *DatabaseCollectionWithUser, collectionIdx *int) error { +func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response *blip.Message, changeArray [][]interface{}, requestSent time.Time, collectionIdx *int) error { defer func() { if panicked := recover(); panicked != nil { bsc.replicationStats.NumHandlersPanicked.Add(1) @@ -293,13 +341,9 @@ func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response base.TracefCtx(bsc.loggingCtx, base.KeySync, "Client didn't specify 'deltas' property in 'changes' response. useDeltas: %v", bsc.useDeltas) } - // Maps docID --> a map containing true for revIDs known to the client - knownRevsByDoc := make(map[string]map[string]bool, len(answer)) - // `answer` is an array where each item is either an array of known rev IDs, or a non-array // placeholder (probably 0). The item numbers match those of changeArray. - var revSendTimeLatency int64 - var revSendCount int64 + revsToSend := make([]*revToSend, 0, len(answer)) sentSeqs := make([]SequenceID, 0) alreadyKnownSeqs := make([]SequenceID, 0) @@ -312,49 +356,23 @@ func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response seq := changeArray[i][0].(SequenceID) docID := changeArray[i][1].(string) revID := changeArray[i][2].(string) - if knownRevsArray, ok := knownRevsArrayInterface.([]interface{}); ok { - deltaSrcRevID := "" - knownRevs := knownRevsByDoc[docID] - if knownRevs == nil { - knownRevs = make(map[string]bool, len(knownRevsArray)) - knownRevsByDoc[docID] = knownRevs - } - - // The first element of the knownRevsArray returned from CBL is the parent revision to use as deltaSrc - if bsc.useDeltas && len(knownRevsArray) > 0 { - if revID, ok := knownRevsArray[0].(string); ok { - deltaSrcRevID = revID - } - } - - for _, rev := range knownRevsArray { - if revID, ok := rev.(string); ok { - knownRevs[revID] = true - } else { - base.ErrorfCtx(bsc.loggingCtx, "Invalid response to 'changes' message") - return nil - } - } - - var err error - if deltaSrcRevID != "" { - err = bsc.sendRevAsDelta(sender, docID, revID, deltaSrcRevID, seq, knownRevs, maxHistory, handleChangesResponseDbCollection, collectionIdx) - } else { - err = bsc.sendRevision(sender, docID, revID, seq, knownRevs, maxHistory, handleChangesResponseDbCollection, collectionIdx) - } - if err != nil { - return err - } - - revSendTimeLatency += time.Since(changesResponseReceived).Nanoseconds() - revSendCount++ - + revsToSend = append(revsToSend, &revToSend{ + seq: seq, + docID: docID, + revID: revID, + knownRevs: knownRevsArray, + maxHistory: maxHistory, + useDelta: bsc.useDeltas, + collectionIdx: collectionIdx, + sender: sender, + timestamp: changesResponseReceived, + }) if collectionCtx.sgr2PushAddExpectedSeqsCallback != nil { sentSeqs = append(sentSeqs, seq) } } else { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Peer didn't want revision %s / %s (seq:%v)", base.UD(docID), revID, seq) + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Peer didn't want revision %s / %s (rev.seq:%v)", base.UD(docID), revID, seq) if collectionCtx.sgr2PushAlreadyKnownSeqsCallback != nil { alreadyKnownSeqs = append(alreadyKnownSeqs, seq) } @@ -365,27 +383,29 @@ func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response collectionCtx.sgr2PushAlreadyKnownSeqsCallback(alreadyKnownSeqs...) } - if revSendCount > 0 { + if len(revsToSend) > 0 { + bsc.revSender.addRevs(revsToSend) if collectionCtx.sgr2PushAddExpectedSeqsCallback != nil { collectionCtx.sgr2PushAddExpectedSeqsCallback(sentSeqs...) } - - bsc.replicationStats.HandleChangesSendRevCount.Add(revSendCount) - bsc.replicationStats.HandleChangesSendRevLatency.Add(revSendTimeLatency) bsc.replicationStats.HandleChangesSendRevTime.Add(time.Since(changesResponseReceived).Nanoseconds()) } return nil } -// Pushes a revision body to the client -func (bsc *BlipSyncContext) sendRevisionWithProperties(sender *blip.Sender, docID string, revID string, collectionIdx *int, - bodyBytes []byte, attMeta []AttachmentStorageMeta, properties blip.Properties, seq SequenceID, resendFullRevisionFunc func() error) error { +// Pushes a revision body to the client. Returns length of body in bytes. +func (bsc *BlipSyncContext) sendRevisionWithProperties(r *revToSend, + bodyBytes []byte, attMeta []AttachmentStorageMeta, properties blip.Properties, resendFullRevisionFunc func() error) error { + + docID := r.docID + revID := r.revID + seq := r.seq outrq := NewRevMessage() outrq.SetID(docID) outrq.SetRev(revID) - outrq.SetCollection(collectionIdx) + outrq.SetCollection(r.collectionIdx) if bsc.sendRevNoConflicts { outrq.SetNoConflicts(true) } @@ -394,7 +414,7 @@ func (bsc *BlipSyncContext) sendRevisionWithProperties(sender *blip.Sender, docI outrq.SetProperties(properties) outrq.SetJSONBodyAsBytes(bodyBytes) - + r.messageLen = len(bodyBytes) // Update read stats if messageBody, err := outrq.Body(); err == nil { bsc.replicationStats.SendRevBytes.Add(int64(len(messageBody))) @@ -402,7 +422,7 @@ func (bsc *BlipSyncContext) sendRevisionWithProperties(sender *blip.Sender, docI base.TracefCtx(bsc.loggingCtx, base.KeySync, "Sending revision %s/%s, body:%s, properties: %v, attDigests: %v", base.UD(docID), revID, base.UD(string(bodyBytes)), base.UD(properties), attMeta) - collectionCtx, err := bsc.collections.get(collectionIdx) + collectionCtx, err := bsc.collections.get(r.collectionIdx) if err != nil { return err } @@ -418,68 +438,70 @@ func (bsc *BlipSyncContext) sendRevisionWithProperties(sender *blip.Sender, docI outrq.SetNoReply(true) } + outrq.OnSent(func() { bsc.revSender.completedRev(r) }) + // send the rev - if !bsc.sendBLIPMessage(sender, outrq.Message) { + if !bsc.sendBLIPMessage(r.sender, outrq.Message) { bsc.removeAllowedAttachments(docID, attMeta, activeSubprotocol) return ErrClosedBLIPSender } if awaitResponse { - go func(activeSubprotocol string) { - defer func() { - if panicked := recover(); panicked != nil { - bsc.replicationStats.NumHandlersPanicked.Add(1) - base.WarnfCtx(bsc.loggingCtx, "PANIC handling 'sendRevision' response: %v\n%s", panicked, debug.Stack()) - bsc.Close() + outrq.OnResponse(func(resp *blip.Message) { + bsc.threadPool.Go(func() { + defer func() { + if panicked := recover(); panicked != nil { + bsc.replicationStats.NumHandlersPanicked.Add(1) + base.WarnfCtx(bsc.loggingCtx, "PANIC handling 'sendRevision' response: %v\n%s", panicked, debug.Stack()) + bsc.Close() + } + }() + + respBody, err := resp.Body() + if err != nil { + base.WarnfCtx(bsc.loggingCtx, "couldn't get response body for rev: %v", err) } - }() - resp := outrq.Response() // blocks till reply is received + base.TracefCtx(bsc.loggingCtx, base.KeySync, "Received response for sendRevisionWithProperties rev message %s/%s", base.UD(docID), revID) - respBody, err := resp.Body() - if err != nil { - base.WarnfCtx(bsc.loggingCtx, "couldn't get response body for rev: %v", err) - } + if resp.Type() == blip.ErrorType { + bsc.replicationStats.SendRevErrorTotal.Add(1) + base.InfofCtx(bsc.loggingCtx, base.KeySync, "error %s in response to rev: %s", resp.Properties["Error-Code"], respBody) - base.TracefCtx(bsc.loggingCtx, base.KeySync, "Received response for sendRevisionWithProperties rev message %s/%s", base.UD(docID), revID) - - if resp.Type() == blip.ErrorType { - bsc.replicationStats.SendRevErrorTotal.Add(1) - base.InfofCtx(bsc.loggingCtx, base.KeySync, "error %s in response to rev: %s", resp.Properties["Error-Code"], respBody) - - if errorDomainIsHTTP(resp) { - switch resp.Properties["Error-Code"] { - case "409": - bsc.replicationStats.SendRevErrorConflictCount.Add(1) - case "403": - bsc.replicationStats.SendRevErrorRejectedCount.Add(1) - case "422", "404": - // unprocessable entity, CBL has not been able to use the delta we sent, so we should re-send the revision in full - if resendFullRevisionFunc != nil { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "sending full body replication for doc %s/%s due to unprocessable entity", base.UD(docID), revID) - if err := resendFullRevisionFunc(); err != nil { - base.WarnfCtx(bsc.loggingCtx, "unable to resend revision: %v", err) - } - } - case "500": - // runtime exceptions return 500 status codes, but we have no other way to determine if this 500 error was caused by the sync-function than matching on the error message. - if bytes.Contains(respBody, []byte("JS sync function")) { + if errorDomainIsHTTP(resp) { + switch resp.Properties["Error-Code"] { + case "409": + bsc.replicationStats.SendRevErrorConflictCount.Add(1) + case "403": bsc.replicationStats.SendRevErrorRejectedCount.Add(1) - } else { - bsc.replicationStats.SendRevErrorOtherCount.Add(1) + case "422", "404": + // unprocessable entity, CBL has not been able to use the delta we sent, so we should re-send the revision in full + if resendFullRevisionFunc != nil { + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "sending full body replication for doc %s/%s due to unprocessable entity", base.UD(docID), revID) + if err := resendFullRevisionFunc(); err != nil { + base.WarnfCtx(bsc.loggingCtx, "unable to resend revision: %v", err) + } + } + case "500": + // runtime exceptions return 500 status codes, but we have no other way to determine if this 500 error was caused by the sync-function than matching on the error message. + if bytes.Contains(respBody, []byte("JS sync function")) { + bsc.replicationStats.SendRevErrorRejectedCount.Add(1) + } else { + bsc.replicationStats.SendRevErrorOtherCount.Add(1) + } } } + } else { + bsc.replicationStats.SendRevCount.Add(1) } - } else { - bsc.replicationStats.SendRevCount.Add(1) - } - bsc.removeAllowedAttachments(docID, attMeta, activeSubprotocol) + bsc.removeAllowedAttachments(docID, attMeta, activeSubprotocol) - if collectionCtx.sgr2PushProcessedSeqCallback != nil { - collectionCtx.sgr2PushProcessedSeqCallback(seq) - } - }(activeSubprotocol) + if collectionCtx.sgr2PushProcessedSeqCallback != nil { + collectionCtx.sgr2PushProcessedSeqCallback(seq) + } + }) + }) } return nil @@ -528,14 +550,14 @@ func (bsc *BlipSyncContext) setUseDeltas(clientCanUseDeltas bool) { } } -func (bsc *BlipSyncContext) sendDelta(sender *blip.Sender, docID string, collectionIdx *int, deltaSrcRevID string, revDelta *RevisionDelta, seq SequenceID, resendFullRevisionFunc func() error) error { +func (bsc *BlipSyncContext) sendDelta(r *revToSend, deltaSrcRevID string, revDelta *RevisionDelta, resendFullRevisionFunc func() error) error { - properties := blipRevMessageProperties(revDelta.RevisionHistory, revDelta.ToDeleted, seq) + properties := blipRevMessageProperties(revDelta.RevisionHistory, revDelta.ToDeleted, r.seq) properties[RevMessageDeltaSrc] = deltaSrcRevID - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Sending rev %q %s as delta. DeltaSrc:%s", base.UD(docID), revDelta.ToRevID, deltaSrcRevID) - return bsc.sendRevisionWithProperties(sender, docID, revDelta.ToRevID, collectionIdx, revDelta.DeltaBytes, revDelta.AttachmentStorageMeta, - properties, seq, resendFullRevisionFunc) + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Sending rev %q %s as delta. DeltaSrc:%s", base.UD(r.docID), revDelta.ToRevID, deltaSrcRevID) + return bsc.sendRevisionWithProperties(r, revDelta.DeltaBytes, revDelta.AttachmentStorageMeta, + properties, resendFullRevisionFunc) } // sendBLIPMessage is a simple wrapper around all sent BLIP messages @@ -548,17 +570,17 @@ func (bsc *BlipSyncContext) sendBLIPMessage(sender *blip.Sender, msg *blip.Messa return ok } -func (bsc *BlipSyncContext) sendNoRev(sender *blip.Sender, docID, revID string, collectionIdx *int, seq SequenceID, err error) error { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Sending norev %q %s due to unavailable revision: %v", base.UD(docID), revID, err) +func (bsc *BlipSyncContext) sendNoRev(r *revToSend, err error) error { + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Sending norev %q %s due to unavailable revision: %v", base.UD(r.docID), r.revID, err) noRevRq := NewNoRevMessage() - noRevRq.SetId(docID) - noRevRq.SetRev(revID) - noRevRq.SetCollection(collectionIdx) + noRevRq.SetId(r.docID) + noRevRq.SetRev(r.revID) + noRevRq.SetCollection(r.collectionIdx) if bsc.blipContext.ActiveSubprotocol() == BlipCBMobileReplicationV2 && bsc.clientType == BLIPClientTypeSGR2 { - noRevRq.SetSeq(seq) + noRevRq.SetSeq(r.seq) } else { - noRevRq.SetSequence(seq) + noRevRq.SetSequence(r.seq) } status, reason := base.ErrorAsHTTPStatus(err) @@ -568,32 +590,34 @@ func (bsc *BlipSyncContext) sendNoRev(sender *blip.Sender, docID, revID string, noRevRq.SetReason(reason) noRevRq.SetNoReply(true) - if !bsc.sendBLIPMessage(sender, noRevRq.Message) { + noRevRq.OnSent(func() { bsc.revSender.completedRev(r) }) + if !bsc.sendBLIPMessage(r.sender, noRevRq.Message) { return ErrClosedBLIPSender } - collectionCtx, err := bsc.collections.get(collectionIdx) + collectionCtx, err := bsc.collections.get(r.collectionIdx) if err != nil { return err } if collectionCtx.sgr2PushProcessedSeqCallback != nil { - collectionCtx.sgr2PushProcessedSeqCallback(seq) + collectionCtx.sgr2PushProcessedSeqCallback(r.seq) } return nil } // Pushes a revision body to the client -func (bsc *BlipSyncContext) sendRevision(sender *blip.Sender, docID, revID string, seq SequenceID, knownRevs map[string]bool, maxHistory int, handleChangesResponseCollection *DatabaseCollectionWithUser, collectionIdx *int) error { - rev, err := handleChangesResponseCollection.GetRev(bsc.loggingCtx, docID, revID, true, nil) - if base.IsDocNotFoundError(err) { - return bsc.sendNoRev(sender, docID, revID, collectionIdx, seq, err) - } else if err != nil { - return fmt.Errorf("failed to GetRev for doc %s with rev %s: %w", base.UD(docID).Redact(), base.MD(revID).Redact(), err) +func (bsc *BlipSyncContext) sendRevision(collection *DatabaseCollectionWithUser, r *revToSend, knownRevs map[string]bool) error { + rev, err := collection.GetRev(bsc.loggingCtx, r.docID, r.revID, true, nil) + if err != nil { + if !base.IsDocNotFoundError(err) { + err = fmt.Errorf("failed to GetRev for doc %s with rev %s: %w", base.UD(r.docID).Redact(), base.MD(r.revID).Redact(), err) + } + return err } - base.TracefCtx(bsc.loggingCtx, base.KeySync, "sendRevision, rev attachments for %s/%s are %v", base.UD(docID), revID, base.UD(rev.Attachments)) + base.TracefCtx(bsc.loggingCtx, base.KeySync, "sendRevision, rev attachments for %s/%s are %v", base.UD(r.docID), r.revID, base.UD(rev.Attachments)) attachmentStorageMeta := ToAttachmentStorageMeta(rev.Attachments) var bodyBytes []byte if base.IsEnterpriseEdition() { @@ -610,7 +634,7 @@ func (bsc *BlipSyncContext) sendRevision(sender *blip.Sender, docID, revID strin } else { body, err := rev.Body() if err != nil { - return bsc.sendNoRev(sender, docID, revID, collectionIdx, seq, err) + return err } // Still need to stamp _attachments into BLIP messages @@ -621,16 +645,16 @@ func (bsc *BlipSyncContext) sendRevision(sender *blip.Sender, docID, revID strin bodyBytes, err = base.JSONMarshalCanonical(body) if err != nil { - return bsc.sendNoRev(sender, docID, revID, collectionIdx, seq, err) + return err } } - history := toHistory(rev.History, knownRevs, maxHistory) - properties := blipRevMessageProperties(history, rev.Deleted, seq) + history := toHistory(rev.History, knownRevs, r.maxHistory) + properties := blipRevMessageProperties(history, rev.Deleted, r.seq) if base.LogDebugEnabled(base.KeySync) { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Sending rev %q %s based on %d known, digests: %v", base.UD(docID), revID, len(knownRevs), digests(attachmentStorageMeta)) + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Sending rev %q %s based on %d known, digests: %v", base.UD(r.docID), r.revID, len(r.knownRevs), digests(attachmentStorageMeta)) } - return bsc.sendRevisionWithProperties(sender, docID, revID, collectionIdx, bodyBytes, attachmentStorageMeta, properties, seq, nil) + return bsc.sendRevisionWithProperties(r, bodyBytes, attachmentStorageMeta, properties, nil) } // digests returns a slice of digest extracted from the given attachment meta. @@ -655,3 +679,41 @@ func toHistory(revisions Revisions, knownRevs map[string]bool, maxHistory int) [ } return history } + +// timeElapsedForStatsReporting will return true if enough time has passed since the previous report. +func (bsc *BlipSyncContext) timeElapsedForStatsReporting(currentTime int64) bool { + return (currentTime - bsc.stats.lastReportTime.Load()) >= bsc.blipContextDb.Options.BlipStatsReportingInterval +} + +// reportStats will update the stats on a database immediately if updateImmediately is true, otherwise update on BlipStatsReportinInterval +func (bsc *BlipSyncContext) reportStats(updateImmediately bool) { + if bsc.blipContextDb == nil || bsc.blipContext == nil { + return + } + dbStats := bsc.blipContextDb.DbStats.Database() + if dbStats == nil { + return + } + currentTime := time.Now().UnixMilli() + if !updateImmediately && !bsc.timeElapsedForStatsReporting(currentTime) { + return + } + + bsc.stats.lock.Lock() + defer bsc.stats.lock.Unlock() + + // check a second time after acquiring the lock to see stats reporting was slow enough that a waiting mutex doesn't need to run + if !updateImmediately && !bsc.timeElapsedForStatsReporting(time.Now().UnixMilli()) { + return + } + + totalBytesSent := bsc.blipContext.GetBytesSent() + newBytesSent := totalBytesSent - bsc.stats.bytesSent.Swap(totalBytesSent) + dbStats.ReplicationBytesSent.Add(int64(newBytesSent)) + + totalBytesReceived := bsc.blipContext.GetBytesReceived() + newBytesReceived := totalBytesReceived - bsc.stats.bytesReceived.Swap(totalBytesReceived) + dbStats.ReplicationBytesReceived.Add(int64(newBytesReceived)) + bsc.stats.lastReportTime.Store(currentTime) + +} diff --git a/db/blip_sync_messages.go b/db/blip_sync_messages.go index 381591be0b..1e2a161e73 100644 --- a/db/blip_sync_messages.go +++ b/db/blip_sync_messages.go @@ -68,6 +68,8 @@ const ( SubChangesContinuous = "continuous" SubChangesBatch = "batch" SubChangesRevocations = "revocations" + SubChangesRequestPlus = "requestPlus" + SubChangesFuture = "future" // rev message properties RevMessageID = "id" @@ -163,7 +165,7 @@ func NewSubChangesParams(logCtx context.Context, rq *blip.Message, zeroSeq Seque // Determine incoming since and docIDs once, since there is some overhead associated with their calculation sinceSequenceId := zeroSeq var err error - if rq.Properties["future"] == trueProperty { + if rq.Properties[SubChangesFuture] == trueProperty { sinceSequenceId, err = latestSeq() } else if sinceStr, found := rq.Properties[SubChangesSince]; found { if sinceSequenceId, err = sequenceIDParser(sinceStr); err != nil { @@ -234,6 +236,14 @@ func (s *SubChangesParams) activeOnly() bool { return (s.rq.Properties[SubChangesActiveOnly] == trueProperty) } +func (s *SubChangesParams) requestPlus(defaultValue bool) (value bool) { + propertyValue, isDefined := s.rq.Properties[SubChangesRequestPlus] + if !isDefined { + return defaultValue + } + return propertyValue == trueProperty +} + func (s *SubChangesParams) filter() string { return s.rq.Properties[SubChangesFilter] } diff --git a/db/blip_sync_stats.go b/db/blip_sync_stats.go index 25fedee4ff..6286eae453 100644 --- a/db/blip_sync_stats.go +++ b/db/blip_sync_stats.go @@ -129,11 +129,13 @@ func BlipSyncStatsForCBL(dbStats *base.DbStats) *BlipSyncStats { blipStats.SendRevBytes = dbStats.Database().DocReadsBytesBlip blipStats.SendRevCount = dbStats.Database().NumDocReadsBlip + blipStats.SendRevErrorTotal = dbStats.CBLReplicationPull().RevErrorCount blipStats.HandleRevBytes = dbStats.Database().DocWritesBytesBlip blipStats.HandleRevProcessingTime = dbStats.CBLReplicationPush().WriteProcessingTime blipStats.HandleRevCount = dbStats.CBLReplicationPush().DocPushCount + blipStats.HandleRevErrorCount = dbStats.CBLReplicationPush().DocPushErrorCount blipStats.HandleGetAttachment = dbStats.CBLReplicationPull().AttachmentPullCount blipStats.HandleGetAttachmentBytes = dbStats.CBLReplicationPull().AttachmentPullBytes diff --git a/db/changes.go b/db/changes.go index d430d51cda..53b1df1cc3 100644 --- a/db/changes.go +++ b/db/changes.go @@ -26,19 +26,20 @@ import ( // Options for changes-feeds. ChangesOptions must not contain any mutable pointer references, as // changes processing currently assumes a deep copy when doing chanOpts := changesOptions. type ChangesOptions struct { - Since SequenceID // sequence # to start _after_ - Limit int // Max number of changes to return, if nonzero - Conflicts bool // Show all conflicting revision IDs, not just winning one? - IncludeDocs bool // Include doc body of each change? - Wait bool // Wait for results, instead of immediately returning empty result? - Continuous bool // Run continuously until terminated? - HeartbeatMs uint64 // How often to send a heartbeat to the client - TimeoutMs uint64 // After this amount of time, close the longpoll connection - ActiveOnly bool // If true, only return information on non-deleted, non-removed revisions - Revocations bool // Specifies whether revocation messages should be sent on the changes feed - clientType clientType // Can be used to determine if the replication is being started from a CBL 2.x or SGR2 client - LoggingCtx context.Context // Used for adding context to logs - ChangesCtx context.Context // Used for cancelling checking the changes feed should stop + Since SequenceID // sequence # to start _after_ + Limit int // Max number of changes to return, if nonzero + Conflicts bool // Show all conflicting revision IDs, not just winning one? + IncludeDocs bool // Include doc body of each change? + Wait bool // Wait for results, instead of immediately returning empty result? + Continuous bool // Run continuously until terminated? + RequestPlusSeq uint64 // Do not stop changes before cached sequence catches up with requestPlusSeq + HeartbeatMs uint64 // How often to send a heartbeat to the client + TimeoutMs uint64 // After this amount of time, close the longpoll connection + ActiveOnly bool // If true, only return information on non-deleted, non-removed revisions + Revocations bool // Specifies whether revocation messages should be sent on the changes feed + clientType clientType // Can be used to determine if the replication is being started from a CBL 2.x or SGR2 client + LoggingCtx context.Context // Used for adding context to logs + ChangesCtx context.Context // Used for cancelling checking the changes feed should stop } // A changes entry; Database.GetChanges returns an array of these. @@ -629,8 +630,9 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex var changeWaiter *ChangeWaiter var lowSequence uint64 - var currentCachedSequence uint64 + var currentCachedSequence uint64 // The highest contiguous sequence buffered over the caching feed var lateSequenceFeeds map[channels.ID]*lateSequenceFeed + var useLateSequenceFeeds bool // LateSequence feeds are only used for continuous, or one-shot where options.RequestPlusSeq > currentCachedSequence var userCounter uint64 // Wait counter used to identify changes to the user document var changedChannels channels.ChangedKeys // Tracks channels added/removed to the user during changes processing. var userChanged bool // Whether the user document has changed in a given iteration loop @@ -638,9 +640,9 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex // Retrieve the current max cached sequence - ensures there isn't a race between the subsequent channel cache queries currentCachedSequence = col.changeCache().getChannelCache().GetHighCacheSequence() - if options.Wait { - options.Wait = false + // If changes feed requires more than one ChangesLoop iteration, initialize changeWaiter + if options.Wait || options.RequestPlusSeq > currentCachedSequence { changeWaiter = col.startChangeWaiter() // Waiter is updated with the actual channel set (post-user reload) at the start of the outer changes loop userCounter = changeWaiter.CurrentUserCount() // Reload user to pick up user changes that happened between auth and the change waiter @@ -676,7 +678,8 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex // For a continuous feed, initialise the lateSequenceFeeds that track late-arriving sequences // to the channel caches. - if options.Continuous { + if options.Continuous || options.RequestPlusSeq > currentCachedSequence { + useLateSequenceFeeds = true lateSequenceFeeds = make(map[channels.ID]*lateSequenceFeed) defer col.closeLateFeeds(lateSequenceFeeds) } @@ -741,7 +744,7 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex // Handles previously skipped sequences prior to options.Since that // have arrived in the channel cache since this changes request started. Only needed for // continuous feeds - one-off changes requests only require the standard channel cache. - if options.Continuous { + if useLateSequenceFeeds { lateSequenceFeedHandler := lateSequenceFeeds[chanID] if lateSequenceFeedHandler != nil { latefeed, err := col.getLateFeed(lateSequenceFeedHandler, singleChannelCache) @@ -957,14 +960,19 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex } } } - if !options.Continuous && (sentSomething || changeWaiter == nil) { - break + + // Check whether non-continuous changes feeds that aren't waiting to reach requestPlus sequence can exit + if !options.Continuous && currentCachedSequence >= options.RequestPlusSeq { + // If non-longpoll, or longpoll has sent something, can exit + if !options.Wait || sentSomething { + break + } } // For longpoll requests that didn't send any results, reset low sequence to the original since value, // as the system low sequence may change before the longpoll request wakes up, and longpoll feeds don't // use lateSequenceFeeds. - if !options.Continuous { + if !useLateSequenceFeeds { options.Since.LowSeq = requestLowSeq } @@ -981,6 +989,7 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex waitForChanges: for { + col.dbStats().CBLReplicationPull().NumPullReplTotalCaughtUp.Add(1) // If we're in a deferred Backfill, the user may not get notification when the cache catches up to the backfill (e.g. when the granting doc isn't // visible to the user), and so ChangeWaiter.Wait() would block until the next user-visible doc arrives. Use a hardcoded wait instead // Similar handling for when we see sequences later than the stable sequence. @@ -992,7 +1001,6 @@ func (col *DatabaseCollectionWithUser) SimpleMultiChangesFeed(ctx context.Contex break waitForChanges } - col.dbStats().CBLReplicationPull().NumPullReplTotalCaughtUp.Add(1) col.dbStats().CBLReplicationPull().NumPullReplCaughtUp.Add(1) waitResponse := changeWaiter.Wait() col.dbStats().CBLReplicationPull().NumPullReplCaughtUp.Add(-1) @@ -1310,7 +1318,7 @@ func createChangesEntry(ctx context.Context, docid string, db *DatabaseCollectio func (options ChangesOptions) String() string { return fmt.Sprintf( - `{Since: %s, Limit: %d, Conflicts: %t, IncludeDocs: %t, Wait: %t, Continuous: %t, HeartbeatMs: %d, TimeoutMs: %d, ActiveOnly: %t}`, + `{Since: %s, Limit: %d, Conflicts: %t, IncludeDocs: %t, Wait: %t, Continuous: %t, HeartbeatMs: %d, TimeoutMs: %d, ActiveOnly: %t, RequestPlusSeq: %d}`, options.Since, options.Limit, options.Conflicts, @@ -1320,6 +1328,7 @@ func (options ChangesOptions) String() string { options.HeartbeatMs, options.TimeoutMs, options.ActiveOnly, + options.RequestPlusSeq, ) } diff --git a/db/database.go b/db/database.go index e1026af8e2..bf8495de39 100644 --- a/db/database.go +++ b/db/database.go @@ -174,6 +174,8 @@ type DatabaseContextOptions struct { skipRegisterImportPIndex bool // if set, skips the global gocb PIndex registration MetadataStore base.DataStore // If set, use this location/connection for SG metadata storage - if not set, metadata is stored using the same location/connection as the bucket used for data storage. MetadataID string // MetadataID used for metadata storage + BlipStatsReportingInterval int64 // interval to report blip stats in milliseconds + ChangesRequestPlus bool // Sets the default value for request_plus, for non-continuous changes feeds } type ScopesOptions map[string]ScopeOptions @@ -308,17 +310,15 @@ func connectToBucketErrorHandling(ctx context.Context, spec base.BucketSpec, got return false, nil } -type OpenBucketFn func(ctx context.Context, spec base.BucketSpec) (base.Bucket, error) +type OpenBucketFn func(context.Context, base.BucketSpec, bool) (base.Bucket, error) -// connectToBucketFailFast opens a Couchbase connect and return a specific bucket without retrying on failure. -func connectToBucketFailFast(ctx context.Context, spec base.BucketSpec) (bucket base.Bucket, err error) { - bucket, err = base.GetBucket(spec) - _, err = connectToBucketErrorHandling(ctx, spec, err) - return bucket, err -} - -// connectToBucket opens a Couchbase connection and return a specific bucket. -func connectToBucket(ctx context.Context, spec base.BucketSpec) (base.Bucket, error) { +// ConnectToBucket opens a Couchbase connection and return a specific bucket. If failFast is set, fail immediately if the bucket doesn't exist, otherwise retry waiting for bucket to exist. +func ConnectToBucket(ctx context.Context, spec base.BucketSpec, failFast bool) (base.Bucket, error) { + if failFast { + bucket, err := base.GetBucket(spec) + _, err = connectToBucketErrorHandling(ctx, spec, err) + return bucket, err + } // start a retry loop to connect to the bucket backing off double the delay each time worker := func() (bool, error, interface{}) { @@ -340,14 +340,6 @@ func connectToBucket(ctx context.Context, spec base.BucketSpec) (base.Bucket, er return ibucket.(base.Bucket), nil } -// GetConnectToBucketFn returns a different OpenBucketFn to connect to the bucket depending on the value of failFast -func GetConnectToBucketFn(failFast bool) OpenBucketFn { - if failFast { - return connectToBucketFailFast - } - return connectToBucket -} - // Returns Couchbase Server Cluster UUID on a timeout. If running against walrus, do return an empty string. func getServerUUID(ctx context.Context, bucket base.Bucket) (string, error) { gocbV2Bucket, err := base.AsGocbV2Bucket(bucket) @@ -1065,16 +1057,21 @@ func (context *DatabaseContext) Authenticator(ctx context.Context) *auth.Authent if context.Options.UnsupportedOptions != nil && context.Options.UnsupportedOptions.WarningThresholds != nil { channelsWarningThreshold = context.Options.UnsupportedOptions.WarningThresholds.ChannelsPerUser } + var channelServerlessThreshold uint32 + if context.IsServerless() { + channelServerlessThreshold = base.ServerlessChannelLimit + } // Authenticators are lightweight & stateless, so it's OK to return a new one every time authenticator := auth.NewAuthenticator(context.MetadataStore, context, auth.AuthenticatorOptions{ - ClientPartitionWindow: context.Options.ClientPartitionWindow, - ChannelsWarningThreshold: channelsWarningThreshold, - SessionCookieName: sessionCookieName, - BcryptCost: context.Options.BcryptCost, - LogCtx: ctx, - Collections: context.CollectionNames, - MetaKeys: context.MetadataKeys, + ClientPartitionWindow: context.Options.ClientPartitionWindow, + ChannelsWarningThreshold: channelsWarningThreshold, + ServerlessChannelThreshold: channelServerlessThreshold, + SessionCookieName: sessionCookieName, + BcryptCost: context.Options.BcryptCost, + LogCtx: ctx, + Collections: context.CollectionNames, + MetaKeys: context.MetadataKeys, }) return authenticator @@ -2319,3 +2316,10 @@ func (dbc *DatabaseContext) AuthenticatorOptions() auth.AuthenticatorOptions { defaultOptions.MetaKeys = dbc.MetadataKeys return defaultOptions } + +// GetRequestPlusSequence fetches the current value of the sequence counter for the database. +// Uses getSequence (instead of lastSequence) as it's intended to be up to date with allocations +// across all nodes, while lastSequence is just the latest allocation from this node +func (dbc *DatabaseContext) GetRequestPlusSequence() (uint64, error) { + return dbc.sequences.getSequence() +} diff --git a/db/database_test.go b/db/database_test.go index 275f336760..51600e642a 100644 --- a/db/database_test.go +++ b/db/database_test.go @@ -49,10 +49,12 @@ func setupTestDBForBucket(t testing.TB, bucket *base.TestBucket) (*Database, con return SetupTestDBForDataStoreWithOptions(t, bucket, dbcOptions) } -func setupTestDBWithOptionsAndImport(t testing.TB, dbcOptions DatabaseContextOptions) (*Database, context.Context) { +func setupTestDBWithOptionsAndImport(t testing.TB, tBucket *base.TestBucket, dbcOptions DatabaseContextOptions) (*Database, context.Context) { ctx := base.TestCtx(t) AddOptionsFromEnvironmentVariables(&dbcOptions) - tBucket := base.GetTestBucket(t) + if tBucket == nil { + tBucket = base.GetTestBucket(t) + } if dbcOptions.Scopes == nil { dbcOptions.Scopes = GetScopesOptions(t, tBucket, 1) } @@ -1849,9 +1851,10 @@ func BenchmarkDatabase(b *testing.B) { for i := 0; i < b.N; i++ { ctx := base.TestCtx(b) - bucket, _ := connectToBucket(ctx, base.BucketSpec{ + bucket, _ := ConnectToBucket(ctx, base.BucketSpec{ Server: base.UnitTestUrl(), - BucketName: fmt.Sprintf("b-%d", i)}) + BucketName: fmt.Sprintf("b-%d", i)}, + true) dbCtx, _ := NewDatabaseContext(ctx, "db", bucket, false, DatabaseContextOptions{}) db, _ := CreateDatabase(dbCtx) collection := GetSingleDatabaseCollectionWithUser(b, db) @@ -1867,9 +1870,10 @@ func BenchmarkPut(b *testing.B) { base.DisableTestLogging(b) ctx := base.TestCtx(b) - bucket, _ := connectToBucket(ctx, base.BucketSpec{ + bucket, _ := ConnectToBucket(ctx, base.BucketSpec{ Server: base.UnitTestUrl(), - BucketName: "Bucket"}) + BucketName: "Bucket"}, + true) context, _ := NewDatabaseContext(ctx, "db", bucket, false, DatabaseContextOptions{}) db, _ := CreateDatabase(context) collection := GetSingleDatabaseCollectionWithUser(b, db) @@ -2456,7 +2460,7 @@ func TestDeleteWithNoTombstoneCreationSupport(t *testing.T) { t.Skip("Xattrs required") } - db, ctx := setupTestDBWithOptionsAndImport(t, DatabaseContextOptions{}) + db, ctx := setupTestDBWithOptionsAndImport(t, nil, DatabaseContextOptions{}) defer db.Close(ctx) collection := GetSingleDatabaseCollectionWithUser(t, db) @@ -2971,7 +2975,7 @@ func TestImportCompactPanic(t *testing.T) { } // Set the compaction and purge interval unrealistically low to reproduce faster - db, ctx := setupTestDBWithOptionsAndImport(t, DatabaseContextOptions{ + db, ctx := setupTestDBWithOptionsAndImport(t, nil, DatabaseContextOptions{ CompactInterval: 1, }) defer db.Close(ctx) diff --git a/db/import_listener.go b/db/import_listener.go index 72ef8a8734..7cb9142bcc 100644 --- a/db/import_listener.go +++ b/db/import_listener.go @@ -214,19 +214,7 @@ func (il *importListener) ImportFeedEvent(event sgbucket.FeedEvent) { func (il *importListener) Stop() { if il != nil { if il.cbgtContext != nil { - il.cbgtContext.StopHeartbeatListener() - - // Close open PIndexes before stopping the manager. - _, pindexes := il.cbgtContext.Manager.CurrentMaps() - for _, pIndex := range pindexes { - err := il.cbgtContext.Manager.ClosePIndex(pIndex) - if err != nil { - base.DebugfCtx(il.loggingCtx, base.KeyImport, "Error closing pindex: %v", err) - } - } - // ClosePIndex calls are synchronous, so can stop manager once they've completed - il.cbgtContext.Manager.Stop() - il.cbgtContext.RemoveFeedCredentials(il.dbName) + il.cbgtContext.Stop() // Remove entry from global listener directory base.RemoveDestFactory(il.importDestKey) diff --git a/db/import_test.go b/db/import_test.go index d41412d169..838e273def 100644 --- a/db/import_test.go +++ b/db/import_test.go @@ -525,3 +525,37 @@ func TestImportStampClusterUUID(t *testing.T) { require.NoError(t, err) require.Equal(t, 32, len(xattr["cluster_uuid"])) } + +// TestImporNonZeroStart makes sure docs written before sync gateway start get imported +func TestImportNonZeroStart(t *testing.T) { + if base.UnitTestUrlIsWalrus() { + t.Skip("test requires import feed, which requies DCP") + } + + bucket := base.GetTestBucket(t) + + doc1 := "doc1" + revID1 := "1-2a9efe8178aa817f4414ae976aa032d9" + + _, err := bucket.GetSingleDataStore().Add(doc1, 0, rawDocNoMeta()) + require.NoError(t, err) + + db, ctx := setupTestDBWithOptionsAndImport(t, bucket, DatabaseContextOptions{}) + defer db.Close(ctx) + + collection := GetSingleDatabaseCollectionWithUser(t, db) + _, ok := base.WaitForStat(func() int64 { + return collection.collectionStats.ImportCount.Value() + }, 1) + require.True(t, ok) + + _, ok = base.WaitForStat(func() int64 { + return db.DbStats.Database().DCPReceivedCount.Value() + }, 1) + require.True(t, ok) + + doc, err := collection.GetDocument(base.TestCtx(t), doc1, DocUnmarshalAll) + require.NoError(t, err) + require.Equal(t, revID1, doc.SyncData.CurrentRev) + +} diff --git a/db/revision_cache_interface.go b/db/revision_cache_interface.go index c3fc20aca4..2f82796b72 100644 --- a/db/revision_cache_interface.go +++ b/db/revision_cache_interface.go @@ -162,6 +162,9 @@ func (rev *DocumentRevision) Mutable1xBody(db *DatabaseCollectionWithUser, reque if err != nil { return nil, err } + if b == nil { + return nil, base.RedactErrorf("null doc body for docID: %s revID: %s", base.UD(rev.DocID), base.UD(rev.RevID)) + } b[BodyId] = rev.DocID b[BodyRev] = rev.RevID diff --git a/db/sequence_allocator.go b/db/sequence_allocator.go index 1c2f1664f9..db53c0f809 100644 --- a/db/sequence_allocator.go +++ b/db/sequence_allocator.go @@ -53,6 +53,7 @@ type sequenceAllocator struct { lastSequenceReserveTime time.Time // Time of most recent sequence reserve releaseSequenceWait time.Duration // Supports test customization metaKeys *base.MetadataKeys // Key generator for sequence and unused sequence documents + preRequestCount uint64 // Number of sequences pre-requested } func newSequenceAllocator(datastore base.DataStore, dbStatsMap *base.DatabaseStats, metaKeys *base.MetadataKeys) (*sequenceAllocator, error) { @@ -135,6 +136,9 @@ func (s *sequenceAllocator) releaseUnusedSequences() { s.sequenceBatchSize = s.sequenceBatchSize - unusedAmount } + // Ignore any pre-requests if we're releasing sequences + s.preRequestCount = 0 + s.last = s.max s.mutex.Unlock() } @@ -185,9 +189,16 @@ func (s *sequenceAllocator) nextSequence() (sequence uint64, err error) { return sequence, nil } +func (s *sequenceAllocator) reserveRequest(count uint64) { + base.InfofCtx(context.TODO(), base.KeyCRUD, "request for %d sequences on next refill", count) + s.mutex.Lock() + s.preRequestCount += count + s.mutex.Unlock() +} + // Reserve a new sequence range. Called by nextSequence when the previously allocated sequences have all been used. func (s *sequenceAllocator) _reserveSequenceRange() error { - + var count uint64 // If the time elapsed since the last reserveSequenceRange invocation reserve is shorter than our target frequency, // this indicates we're making an incr call more frequently than we want to. Triggers an increase in batch size to // reduce incr frequency. @@ -198,20 +209,36 @@ func (s *sequenceAllocator) _reserveSequenceRange() error { } base.DebugfCtx(context.TODO(), base.KeyCRUD, "Increased sequence batch to %d", s.sequenceBatchSize) } + count = s.sequenceBatchSize + + // If a caller has indicated it will need sequences soon, increase the count: + if s.preRequestCount > count { + count = s.preRequestCount + if count > maxBatchSize { + count = maxBatchSize + } + base.InfofCtx(context.TODO(), base.KeyCRUD, "Reserving %d sequences by special request", count) + } + s.preRequestCount = 0 + + return s._reserveSequenceCount(count) +} - max, err := s.incrementSequence(s.sequenceBatchSize) +// Reserves a specified number of sequences. +func (s *sequenceAllocator) _reserveSequenceCount(count uint64) error { + max, err := s.incrementSequence(count) if err != nil { - base.WarnfCtx(context.TODO(), "Error from incrementSequence in _reserveSequences(%d): %v", s.sequenceBatchSize, err) + base.WarnfCtx(context.TODO(), "Error from incrementSequence in _reserveSequenceCount(%d): %v", count, err) return err } // Update max and last used sequences. Last is updated here to account for sequences allocated/used by other // Sync Gateway nodes s.max = max - s.last = max - s.sequenceBatchSize + s.last = max - count s.lastSequenceReserveTime = time.Now() - s.dbStats.SequenceReservedCount.Add(int64(s.sequenceBatchSize)) + s.dbStats.SequenceReservedCount.Add(int64(count)) return nil } diff --git a/db/sg_replicate_cfg.go b/db/sg_replicate_cfg.go index 37164152fb..952628ecd6 100644 --- a/db/sg_replicate_cfg.go +++ b/db/sg_replicate_cfg.go @@ -1204,6 +1204,12 @@ func (m *sgReplicateManager) GetNumberActiveReplicators() int { return len(m.activeReplicators) } +func (m *sgReplicateManager) GetActiveReplicator(name string) *ActiveReplicator { + m.activeReplicatorsLock.Lock() + defer m.activeReplicatorsLock.Unlock() + return m.activeReplicators[name] +} + // RebalanceReplications distributes the set of defined replications across the set of available nodes func (c *SGRCluster) RebalanceReplications() { diff --git a/db/util_testing.go b/db/util_testing.go index 67433123c8..7ed3e4b79b 100644 --- a/db/util_testing.go +++ b/db/util_testing.go @@ -17,6 +17,7 @@ import ( "testing" "time" + "github.com/couchbase/go-blip" sgbucket "github.com/couchbase/sg-bucket" "github.com/couchbase/sync_gateway/auth" "github.com/couchbase/sync_gateway/base" @@ -95,6 +96,17 @@ func (db *DatabaseContext) WaitForCaughtUp(targetCount int64) error { return errors.New("WaitForCaughtUp didn't catch up") } +func (db *DatabaseContext) WaitForTotalCaughtUp(targetCount int64) error { + for i := 0; i < 100; i++ { + caughtUpCount := db.DbStats.CBLReplicationPull().NumPullReplTotalCaughtUp.Value() + if caughtUpCount >= targetCount { + return nil + } + time.Sleep(100 * time.Millisecond) + } + return errors.New("WaitForCaughtUp didn't catch up") +} + type StatWaiter struct { initCount int64 // Document cached count when NewStatWaiter is called targetCount int64 // Target count used when Wait is called @@ -598,3 +610,21 @@ func GetSingleDatabaseCollection(tb testing.TB, database *DatabaseContext) *Data tb.Fatalf("Could not find a collection") return nil } + +// AllocateTestSequence allocates a sequence via the sequenceAllocator. For use by non-db tests +func AllocateTestSequence(database *DatabaseContext) (uint64, error) { + return database.sequences.incrementSequence(1) +} + +// ReleaseTestSequence releases a sequence via the sequenceAllocator. For use by non-db tests +func ReleaseTestSequence(database *DatabaseContext, sequence uint64) error { + return database.sequences.releaseSequence(sequence) +} + +func (a *ActiveReplicator) GetActiveReplicatorConfig() *ActiveReplicatorConfig { + return a.config +} + +func (apr *ActivePullReplicator) GetBlipSender() *blip.Sender { + return apr.blipSender +} diff --git a/docs/api/components/schemas.yaml b/docs/api/components/schemas.yaml index 40a4ac5cd4..400a6a31f0 100644 --- a/docs/api/components/schemas.yaml +++ b/docs/api/components/schemas.yaml @@ -242,9 +242,11 @@ User: description: A set of access grants by scope and collection. type: object additionalProperties: + x-additionalPropertiesName: scopename description: An object keyed by scope, containing a set of collections. type: object additionalProperties: + x-additionalPropertiesName: collectionname $ref: '#/CollectionAccessConfig' email: description: The email address of the user. @@ -345,9 +347,11 @@ Role: description: A set of access grants by scope and collection. type: object additionalProperties: + x-additionalPropertiesName: scopename description: An object keyed by scope, containing a set of collections. type: object additionalProperties: + x-additionalPropertiesName: collectionname $ref: '#/CollectionAccessConfig' title: Role User-session-information: @@ -465,6 +469,7 @@ Document: _attachments: type: object additionalProperties: + x-additionalPropertiesName: attachmentname description: The name of the attachment. type: object properties: @@ -529,15 +534,15 @@ Design-doc: type: string views: type: object - properties: - additionalProperties: - description: The name of the view. - type: object - properties: - map: - type: string - reduce: - type: string + additionalProperties: + x-additionalPropertiesName: viewname + description: The name of the view. + type: object + properties: + map: + type: string + reduce: + type: string options: type: object properties: @@ -1078,19 +1083,24 @@ Replication-status: - replication_id title: Replication-status Scopes: - description: A map of all the collections with their corresponding configs for this scope + description: Scope-specific configuration. type: object - additionalProperties: - $ref: '#/CollectionConfig' + properties: + collections: + description: An object keyed by collection name containing config for the specific collection. + type: object + additionalProperties: + x-additionalPropertiesName: collectionname + $ref: '#/CollectionConfig' title: Scopes CollectionConfig: - description: The configuration for the individual collection + description: Collection-specific configuration. type: object properties: sync: description: The Javascript function that newly created documents in this collection are ran through. type: string - example: 'function(doc){channel(doc.channels);}' + example: 'function(doc){channel("collection name");}' import_filter: description: |- This is the function that all imported documents in this collection are ran through in order to filter out what to import and what not to import. This allows you to control what is made available to Couchbase Mobile clients. If it is not set, then no documents are filtered when imported. @@ -1156,23 +1166,27 @@ Database: type: integer default: 1000 scopes: - description: Scope and collection specific config. + description: An object keyed by scope name containing config for the specific collection. type: object - properties: - additionalProperties: - $ref: '#/Scopes' + additionalProperties: + x-additionalPropertiesName: scopename + $ref: '#/Scopes' name: description: The name of the database. type: string sync: - description: The Javascript function that newly created documents are ran through for the _default scope and collection. + description: The Javascript function that newly created documents are ran through for the default scope and collection. + + If `scopes` parameter is set, this is ignored. type: string default: 'function(doc){channel(doc.channels);}' users: additionalProperties: + x-additionalPropertiesName: username $ref: '#/User' roles: additionalProperties: + x-additionalPropertiesName: rolename $ref: '#/Role' revs_limit: description: |- @@ -1203,9 +1217,11 @@ Database: default: 16 import_filter: description: |- - This is the function that all imported documents in the _default scope and collection are ran through in order to filter out what to import and what not to import. This allows you to control what is made available to Couchbase Mobile clients. If it is not set, then no documents are filtered when imported. + This is the function that all imported documents in the default scope and collection are ran through in order to filter out what to import and what not to import. This allows you to control what is made available to Couchbase Mobile clients. If it is not set, then no documents are filtered when imported. `import_docs` must be true to make this field applicable. + + If `scopes` parameter is set, this is ignored. type: string example: 'function(doc) { if (doc.type != ''mobile'') { return false; } return true; }' import_backup_old_rev: @@ -1231,8 +1247,8 @@ Database: type: string default: DCP enum: - - TAP - DCP + - TAP deprecated: true allow_empty_password: description: This controls whether users that are created can have an empty password or not. @@ -1443,6 +1459,7 @@ Database: description: Configuration for Local JWT authentication. type: object additionalProperties: + x-additionalPropertiesName: providername description: The providers name. type: object required: ['issuer', 'client_id', 'algorithms', 'keys'] @@ -1529,6 +1546,7 @@ Database: description: List of OpenID Connect issuers. type: object additionalProperties: + x-additionalPropertiesName: providername description: The providers name. type: object properties: @@ -1784,9 +1802,8 @@ Event-config: options: description: The options for the event. type: object - properties: - additionalProperties: - description: The option key and value. + additionalProperties: + description: The option key and value. title: Event-config Resync-status: description: The status of a resync operation @@ -2112,6 +2129,9 @@ Startup-config: type: integer maximum: 9 minimum: 0 + max_concurrent_replications: + description: Maximum number of concurrent replication connections allowed. If set to 0 this limit will be ignored. + type: integer readOnly: true unsupported: description: Settings that are not officially supported. It is highly recommended these are **not** used. @@ -2143,12 +2163,14 @@ Startup-config: description: 'A map of database name to credentials, that can be used instead of the bootstrap ones.' type: object additionalProperties: + x-additionalPropertiesName: databasename $ref: '#/CredentialsConfig' readOnly: true bucket_credentials: description: 'A map of bucket names to credentials, that can be used instead of the bootstrap ones.' type: object additionalProperties: + x-additionalPropertiesName: bucketname $ref: '#/CredentialsConfig' readOnly: true max_file_descriptors: @@ -2162,6 +2184,46 @@ Startup-config: type: integer readOnly: true title: Startup-config +Runtime-config: + type: object + properties: + logging: + description: The configuration settings for modifying Sync Gateway logging. + type: object + properties: + log_file_path: + description: Absolute or relative path on the filesystem to the log file directory. A relative path is from the directory that contains the Sync Gateway executable file. + type: string + readOnly: true + redaction_level: + description: Redaction level to apply to log output. + type: string + default: partial + enum: + - none + - partial + - full + - unset + readOnly: true + console: + $ref: '#/Console-logging-config' + error: + $ref: '#/File-logging-config' + warn: + $ref: '#/File-logging-config' + info: + $ref: '#/File-logging-config' + debug: + $ref: '#/File-logging-config' + trace: + $ref: '#/File-logging-config' + stats: + $ref: '#/File-logging-config' + max_concurrent_replications: + description: Maximum number of concurrent replication connections allowed. If set to 0 this limit will be ignored. + type: integer + default: 0 + title: Runtime-config File-logging-config: type: object properties: diff --git a/docs/api/paths/admin/_config.yaml b/docs/api/paths/admin/_config.yaml index 28dd1a84aa..0bc789a2a8 100644 --- a/docs/api/paths/admin/_config.yaml +++ b/docs/api/paths/admin/_config.yaml @@ -49,7 +49,7 @@ put: content: application/json: schema: - $ref: ../../components/schemas.yaml#/Startup-config + $ref: ../../components/schemas.yaml#/Runtime-config responses: '200': description: Successfully set runtime options diff --git a/docs/api/paths/admin/_post_upgrade.yaml b/docs/api/paths/admin/_post_upgrade.yaml index cb803eec47..9f4c23b3ea 100644 --- a/docs/api/paths/admin/_post_upgrade.yaml +++ b/docs/api/paths/admin/_post_upgrade.yaml @@ -32,6 +32,7 @@ post: description: A map of databases. type: object additionalProperties: + x-additionalPropertiesName: db description: The name of the database that was targetted. type: object properties: diff --git a/docs/api/paths/admin/keyspace-_changes.yaml b/docs/api/paths/admin/keyspace-_changes.yaml index 2ddb95633a..1c3819be88 100644 --- a/docs/api/paths/admin/keyspace-_changes.yaml +++ b/docs/api/paths/admin/keyspace-_changes.yaml @@ -96,6 +96,13 @@ get: - longpoll - continuous - websocket + + - name: request_plus + in: query + description: When true, ensures all valid documents written prior to the request being issued are included in the response. This is only applicable for non-continuous feeds. + schema: + type: boolean + default: 'false' responses: '200': $ref: ../../components/responses.yaml#/changes-feed @@ -156,6 +163,9 @@ post: feed: description: 'The type of changes feed to use. ' type: string + request_plus: + description: 'When true, ensures all valid documents written prior to the request being issued are included in the response. This is only applicable for non-continuous feeds.' + type: string responses: '200': $ref: ../../components/responses.yaml#/changes-feed diff --git a/examples/database_config/collections-with-custom-scope.json b/examples/database_config/collections-with-custom-scope.json new file mode 100644 index 0000000000..5d7a6c4bb6 --- /dev/null +++ b/examples/database_config/collections-with-custom-scope.json @@ -0,0 +1,38 @@ +{ + "name": "db", + "bucket": "bucket", + "scopes" : { + "scope1": { + "collections": { + "collection1" : { + "sync": ` + function(doc, oldDoc, meta) { + if (doc.type != "default") { + throw({forbidden : "Rejected document"}) + } + channel("legacy") + } + `, + "import_filter": ` + function(doc) { + return doc.type == "mobile" + } + ` + }, + "collection2" : { + "sync": ` + function(doc, oldDoc, meta) { + channel("collection1") + } + `, + "import_filter": ` + function(doc) { + return doc.type == "mobile" + } + ` + } + } + } + }, + "num_index_replicas": 0 +} diff --git a/examples/database_config/collections-with-default-collection.json b/examples/database_config/collections-with-default-collection.json new file mode 100644 index 0000000000..907b1f659f --- /dev/null +++ b/examples/database_config/collections-with-default-collection.json @@ -0,0 +1,38 @@ +{ + "name": "db", + "bucket": "bucket", + "scopes" : { + "_default": { + "collections": { + "_default" : { + "sync": ` + function(doc, oldDoc, meta) { + if (doc.collection != "default") { + throw({forbidden : "Rejected document"}) + } + channel("legacy") + } + `, + "import_filter": ` + function(doc) { + return doc.type == "mobile" + } + ` + }, + "collection1" : { + "sync": ` + function(doc, oldDoc, meta) { + channel("collection1") + } + `, + "import_filter": ` + function(doc) { + return doc.type == "mobile" + } + ` + } + } + } + }, + "num_index_replicas": 0 +} diff --git a/go.mod b/go.mod index 4aac998a8c..6769b6acee 100644 --- a/go.mod +++ b/go.mod @@ -5,13 +5,13 @@ go 1.19 require ( github.com/bhoriuchi/graphql-go-tools v1.0.0 github.com/coreos/go-oidc v2.2.1+incompatible - github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46 + github.com/couchbase/cbgt v1.3.4 github.com/couchbase/clog v0.1.0 - github.com/couchbase/go-blip v0.0.0-20221021161139-215cbac22bd7 + github.com/couchbase/go-blip v0.0.0-20230606182423-3f1855f3bfe9 github.com/couchbase/go-couchbase v0.1.1 github.com/couchbase/gocb/v2 v2.6.2 - github.com/couchbase/gocbcore/v10 v10.2.3-0.20230412164057-d9c465de8911 - github.com/couchbase/gomemcached v0.1.4 + github.com/couchbase/gocbcore/v10 v10.2.4-0.20230511103754-8dd1a95f5f33 + github.com/couchbase/gomemcached v0.2.1 github.com/couchbase/goutils v0.1.2 github.com/couchbase/sg-bucket v0.0.0-20230113211151-ac6a75f57046 github.com/couchbaselabs/go-fleecedelta v0.0.0-20200408160354-2ed3f45fde8f @@ -31,9 +31,9 @@ require ( github.com/samuel/go-metrics v0.0.0-20150819231912-7ccf3e0e1fb1 github.com/shirou/gopsutil v3.21.11+incompatible github.com/stretchr/testify v1.8.2 - golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa + golang.org/x/crypto v0.7.0 golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e - golang.org/x/net v0.0.0-20220919232410-f2f64ebce3c1 + golang.org/x/net v0.8.0 golang.org/x/oauth2 v0.0.0-20220718184931-c8730f7fcb92 gopkg.in/couchbaselabs/gocbconnstr.v1 v1.0.4 gopkg.in/square/go-jose.v2 v2.6.0 @@ -46,7 +46,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.1.2 // indirect github.com/couchbase/blance v0.1.3 // indirect - github.com/couchbase/cbauth v0.1.9 // indirect + github.com/couchbase/cbauth v0.1.10 // indirect github.com/couchbase/tools-common v0.0.0-20220810163003-4c3c185822d4 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/go-ole/go-ole v1.2.6 // indirect @@ -68,8 +68,8 @@ require ( github.com/stretchr/objx v0.5.0 // indirect github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect - golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 // indirect - golang.org/x/text v0.4.0 // indirect + golang.org/x/sys v0.6.0 // indirect + golang.org/x/text v0.8.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.28.0 // indirect gopkg.in/couchbase/gocb.v1 v1.6.7 // indirect diff --git a/go.sum b/go.sum index e41f8d6a7c..55b99b61c6 100644 --- a/go.sum +++ b/go.sum @@ -63,23 +63,23 @@ github.com/coreos/go-oidc v2.2.1+incompatible h1:mh48q/BqXqgjVHpy2ZY7WnWAbenxRjs github.com/coreos/go-oidc v2.2.1+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= github.com/couchbase/blance v0.1.3 h1:CJCirD3+N02Z0w/ybZTqqSJa9XMbsCZO9jHxCEAPQqE= github.com/couchbase/blance v0.1.3/go.mod h1:2Sa/nsJSieN/r3T9LsrUYWeQ015qDsuHybhz4F4JcHU= -github.com/couchbase/cbauth v0.1.9 h1:eco/KAIyEIeLNcEXFG6BTsCLPTVAI28lu9FH91ehvM4= -github.com/couchbase/cbauth v0.1.9/go.mod h1:overPK2NvYkkZBWX0eqmuHB82nhhs8rt7A4wi5u7c2g= -github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46 h1:tRxeXfSHBzAq6mtc9NCXLy+BfI3SFMdQZFH3rYarw5M= -github.com/couchbase/cbgt v1.3.2-0.20230405201040-c0ede9925e46/go.mod h1:tJF3TUUO3ZDBU15auN1gNsIVY3Oo+jj46zIXH4RBxk4= +github.com/couchbase/cbauth v0.1.10 h1:ixJGG9mAgL1HnWKt2eKmJ8gJa0rkaUURtDPDak7Dcts= +github.com/couchbase/cbauth v0.1.10/go.mod h1:YHG+5rUI6GoLLlIViT9IfD0nwqEebBcl66TQ4pDFDw4= +github.com/couchbase/cbgt v1.3.4 h1:T5Wi4TOxRRYSYyhgl9R5mwxpQ8wo7pwTkzbomj9dBVM= +github.com/couchbase/cbgt v1.3.4/go.mod h1:PIJ8U/25mrhsn4Dc3eK9iLXZ98jX1i2YuGH2Od+JVv4= github.com/couchbase/clog v0.1.0 h1:4Kh/YHkhRjMCbdQuvRVsm39XZh4FtL1d8fAwJsHrEPY= github.com/couchbase/clog v0.1.0/go.mod h1:7tzUpEOsE+fgU81yfcjy5N1H6XtbVC8SgOz/3mCjmd4= -github.com/couchbase/go-blip v0.0.0-20221021161139-215cbac22bd7 h1:/GTlMVovmGKrFAl5e7u9CXuhjTlR5a4911Ujou18Q4Q= -github.com/couchbase/go-blip v0.0.0-20221021161139-215cbac22bd7/go.mod h1:nSpldGTqAhTOaDDL0Li2dSE0smqbISKagT7fIqYIRec= +github.com/couchbase/go-blip v0.0.0-20230606182423-3f1855f3bfe9 h1:aBMVnwXfrz9jBAjHMrU4B4JgDI22usPTEioaGA037jU= +github.com/couchbase/go-blip v0.0.0-20230606182423-3f1855f3bfe9/go.mod h1:WstEZkP0F1n3ev8e2mzoUqe4pKYHSJzA1uv73ke/GNQ= github.com/couchbase/go-couchbase v0.1.1 h1:ClFXELcKj/ojyoTYbsY34QUrrYCBi/1G749sXSCkdhk= github.com/couchbase/go-couchbase v0.1.1/go.mod h1:+/bddYDxXsf9qt0xpDUtRR47A2GjaXmGGAqQ/k3GJ8A= github.com/couchbase/gocb/v2 v2.6.2 h1:sZg0+3GiYW7OT53ENEGnkkQMXhVuJ1qOJplvZDlM5Xk= github.com/couchbase/gocb/v2 v2.6.2/go.mod h1:baRw5pIpzHil7q39M2zm+bon+ZgjgNAilkKI813zPiE= github.com/couchbase/gocbcore/v10 v10.2.2/go.mod h1:lYQIIk+tzoMcwtwU5GzPbDdqEkwkH3isI2rkSpfL0oM= -github.com/couchbase/gocbcore/v10 v10.2.3-0.20230412164057-d9c465de8911 h1:w9pxVkd0o0Fzk6QgtZGaPj1g9Bt7sNO1eLm2rIteQTA= -github.com/couchbase/gocbcore/v10 v10.2.3-0.20230412164057-d9c465de8911/go.mod h1:lYQIIk+tzoMcwtwU5GzPbDdqEkwkH3isI2rkSpfL0oM= -github.com/couchbase/gomemcached v0.1.4 h1:5n5wmr4dBu+X7XteP8QHP5S9inK9MBjNpN9b7WSQfuA= -github.com/couchbase/gomemcached v0.1.4/go.mod h1:mxliKQxOv84gQ0bJWbI+w9Wxdpt9HjDvgW9MjCym5Vo= +github.com/couchbase/gocbcore/v10 v10.2.4-0.20230511103754-8dd1a95f5f33 h1:l6O5889o5cJBwWt1gws5q5dzi/slDba/KvGNiWY9ieQ= +github.com/couchbase/gocbcore/v10 v10.2.4-0.20230511103754-8dd1a95f5f33/go.mod h1:lYQIIk+tzoMcwtwU5GzPbDdqEkwkH3isI2rkSpfL0oM= +github.com/couchbase/gomemcached v0.2.1 h1:lDONROGbklo8pOt4Sr4eV436PVEaKDr3o9gUlhv9I2U= +github.com/couchbase/gomemcached v0.2.1/go.mod h1:mxliKQxOv84gQ0bJWbI+w9Wxdpt9HjDvgW9MjCym5Vo= github.com/couchbase/goutils v0.1.2 h1:gWr8B6XNWPIhfalHNog3qQKfGiYyh4K4VhO3P2o9BCs= github.com/couchbase/goutils v0.1.2/go.mod h1:h89Ek/tiOxxqjz30nPPlwZdQbdB8BwgnuBxeoUe/ViE= github.com/couchbase/sg-bucket v0.0.0-20230113211151-ac6a75f57046 h1:fyGdhMTONSnC9Sqhc0f9KXhT5Pmpst7obe3Tg92xQsk= @@ -333,8 +333,8 @@ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa h1:zuSxTR4o9y82ebqCUJYNGJbGPo6sKVl54f/TVDObg1c= -golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.7.0 h1:AvwMYaRytfdeVt3u6mLaxYtErKYjxA2OXjJ1HHq6t3A= +golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -397,8 +397,8 @@ golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81R golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.0.0-20220919232410-f2f64ebce3c1 h1:TWZxd/th7FbRSMret2MVQdlI8uT49QEtwZdvJrxjEHU= -golang.org/x/net v0.0.0-20220919232410-f2f64ebce3c1/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= +golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ= +golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -459,8 +459,8 @@ golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 h1:WIoqL4EROvwiPdUtaip4VcDdpZ4kha7wBWZrbVKCIZg= -golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -470,8 +470,8 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg= -golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= +golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= diff --git a/jenkins-integration-build.sh b/jenkins-integration-build.sh index 509d0f52ee..6504d673dd 100755 --- a/jenkins-integration-build.sh +++ b/jenkins-integration-build.sh @@ -149,7 +149,7 @@ if [ "${PIPESTATUS[0]}" -ne "0" ]; then # If test exit code is not 0 (failed) fi # Collect CBS logs if server error occurred -if [ "${SG_CBCOLLECT_ALWAYS:-}" == "true" ] || grep -a -q "server logs for details\|Timed out after 1m0s waiting for a bucket to become available\|unambiguous timeout" "${INT_LOG_FILE_NAME}.out.raw"; then +if [ "${SG_CBCOLLECT_ALWAYS:-}" == "true" ] || grep -a -q "server logs for details\|Timed out after 1m0s waiting for a bucket to become available" "${INT_LOG_FILE_NAME}.out.raw"; then docker exec -t couchbase /opt/couchbase/bin/cbcollect_info /workspace/cbcollect.zip fi diff --git a/manifest/3.0.xml b/manifest/3.0.xml index 2a16128f8a..9a96645ace 100644 --- a/manifest/3.0.xml +++ b/manifest/3.0.xml @@ -25,14 +25,14 @@ licenses/APL2.txt. - + - + diff --git a/manifest/3.0/3.0.6.xml b/manifest/3.0/3.0.6.xml index 218d1abaea..8f38b22ca0 100644 --- a/manifest/3.0/3.0.6.xml +++ b/manifest/3.0/3.0.6.xml @@ -25,7 +25,7 @@ licenses/APL2.txt. - + diff --git a/manifest/3.0/3.0.7.xml b/manifest/3.0/3.0.7.xml new file mode 100644 index 0000000000..137a2c494a --- /dev/null +++ b/manifest/3.0/3.0.7.xml @@ -0,0 +1,162 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/manifest/product-config.json b/manifest/product-config.json index fd5d46acb4..438af693be 100644 --- a/manifest/product-config.json +++ b/manifest/product-config.json @@ -400,10 +400,20 @@ "trigger_blackduck": true, "start_build": 7 }, - "manifest/3.0.xml": { + "manifest/3.0/3.0.7.xml": { + "do-build": false, "release": "3.0.7", "release_name": "Couchbase Sync Gateway 3.0.7", "production": true, + "interval": 1440, + "go_version": "1.16.15", + "trigger_blackduck": true, + "start_build": 7 + }, + "manifest/3.0.xml": { + "release": "3.0.8", + "release_name": "Couchbase Sync Gateway 3.0.8", + "production": true, "interval": 120, "go_version": "1.16.15", "trigger_blackduck": true, diff --git a/rest/admin_api.go b/rest/admin_api.go index ee67dff011..324129eb63 100644 --- a/rest/admin_api.go +++ b/rest/admin_api.go @@ -411,6 +411,7 @@ func (h *handler) handlePutConfig() error { Trace FileLoggerPutConfig `json:"trace,omitempty"` Stats FileLoggerPutConfig `json:"stats,omitempty"` } `json:"logging"` + ReplicationLimit *int `json:"max_concurrent_replications,omitempty"` } var config ServerPutConfig @@ -462,6 +463,16 @@ func (h *handler) handlePutConfig() error { base.EnableStatsLogger(*config.Logging.Stats.Enabled) } + if config.ReplicationLimit != nil { + if *config.ReplicationLimit < 0 { + return base.HTTPErrorf(http.StatusBadRequest, "replication limit cannot be less than 0") + } + h.server.Config.Replicator.MaxConcurrentReplications = *config.ReplicationLimit + h.server.ActiveReplicationsCounter.lock.Lock() + h.server.ActiveReplicationsCounter.activeReplicatorLimit = *config.ReplicationLimit + h.server.ActiveReplicationsCounter.lock.Unlock() + } + return base.HTTPErrorf(http.StatusOK, "Updated") } diff --git a/rest/adminapitest/admin_api_test.go b/rest/adminapitest/admin_api_test.go index 8bf10b64ed..51b18b9743 100644 --- a/rest/adminapitest/admin_api_test.go +++ b/rest/adminapitest/admin_api_test.go @@ -241,6 +241,62 @@ func TestLoggingKeys(t *testing.T) { assert.Equal(t, map[string]interface{}{}, noLogKeys) } +func TestServerlessChangesEndpointLimit(t *testing.T) { + base.RequireNumTestBuckets(t, 2) + base.SetUpTestLogging(t, base.LevelInfo, base.KeyReplicate, base.KeyHTTP, base.KeyHTTPResp, base.KeySync, base.KeySyncMsg, base.KeyChanges) + rt := rest.NewRestTester(t, &rest.RestTesterConfig{ + SyncFn: `function(doc) {channel(doc.channel);}`, + }) + defer rt.Close() + + resp := rt.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 2}`) + rest.RequireStatus(t, resp, http.StatusOK) + resp = rt.SendAdminRequest("PUT", "/db/_user/alice", rest.GetUserPayload(t, "alice", "letmein", "", rt.GetSingleTestDatabaseCollection(), []string{"ABC"}, nil)) + rest.RequireStatus(t, resp, 201) + + // Put several documents in channel PBS + response := rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs1", `{"value":1, "channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs2", `{"value":2, "channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs3", `{"value":3, "channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + + changesJSON := `{"style":"all_docs", + "heartbeat":300000, + "feed":"longpoll", + "limit":50, + "since":"1", + "filter":"` + base.ByChannelFilter + `", + "channels":"ABC,PBS"}` + var wg sync.WaitGroup + wg.Add(2) + + // send some changes requests in go routines to run concurrently along with test + go func() { + defer wg.Done() + resp1 := rt.SendUserRequest(http.MethodPost, "/{{.keyspace}}/_changes", changesJSON, "alice") + rest.RequireStatus(t, resp1, http.StatusOK) + }() + + go func() { + defer wg.Done() + resp2 := rt.SendUserRequest(http.MethodPost, "/{{.keyspace}}/_changes", changesJSON, "alice") + rest.RequireStatus(t, resp2, http.StatusOK) + }() + + // assert count for replicators is correct according to changes request made above + rt.WaitForActiveReplicatorCount(2) + + // assert this request is rejected due to this request taking us over the limit + resp = rt.SendAdminRequest(http.MethodGet, "/{{.keyspace}}/_changes?feed=longpoll&since=999999&timeout=100000", "") + rest.RequireStatus(t, resp, http.StatusServiceUnavailable) + // put doc to end changes feeds + resp = rt.SendAdminRequest("PUT", "/{{.keyspace}}/abc1", `{"value":3, "channel":["ABC"]}`) + rest.RequireStatus(t, resp, 201) + wg.Wait() +} + func TestLoggingLevels(t *testing.T) { if base.GlobalTestLoggingSet.IsTrue() { t.Skip("Test does not work when a global test log level is set") @@ -4311,3 +4367,30 @@ func TestPerDBCredsOverride(t *testing.T) { assert.Equal(t, "invalidUsername", configs["db"].BucketConfig.Username) assert.Equal(t, "invalidPassword", configs["db"].BucketConfig.Password) } + +// Can be used to reproduce connections left open after database close. Manually deleting the bucket used by the test +// once the test reaches the sleep loop will log connection errors for unclosed connections. +func TestDeleteDatabaseCBGTTeardown(t *testing.T) { + t.Skip("Dev-time test used to repro agent connections being left open after database close") + if base.UnitTestUrlIsWalrus() { + t.Skip("This test only works against Couchbase Server") + } + base.SetUpTestLogging(t, base.LevelTrace, base.KeyHTTP, base.KeyImport) + + rtConfig := rest.RestTesterConfig{DatabaseConfig: &rest.DatabaseConfig{DbConfig: rest.DbConfig{AutoImport: true}}} + rt := rest.NewRestTester(t, &rtConfig) + defer rt.Close() + // Initialize database + _ = rt.GetDatabase() + + for i := 0; i < 1; i++ { + time.Sleep(1 * time.Second) // some time for polling + } + + resp := rt.SendAdminRequest(http.MethodDelete, "/db/", "") + rest.RequireStatus(t, resp, http.StatusOK) + + for i := 0; i < 1000; i++ { + time.Sleep(1 * time.Second) // some time for polling + } +} diff --git a/rest/api.go b/rest/api.go index 21884eab03..54001e6392 100644 --- a/rest/api.go +++ b/rest/api.go @@ -218,7 +218,7 @@ func (h *handler) handleFlush() error { } // Manually re-open a temporary bucket connection just for flushing purposes - tempBucketForFlush, err := db.GetConnectToBucketFn(false)(h.ctx(), spec) + tempBucketForFlush, err := db.ConnectToBucket(h.ctx(), spec, false) if err != nil { return err } @@ -330,7 +330,7 @@ func (h *handler) handlePostResync() error { } if dbState != db.DBOffline { - return base.HTTPErrorf(http.StatusServiceUnavailable, "Database must be _offline before calling _resync") + return base.HTTPErrorf(http.StatusServiceUnavailable, "Database must be _offline before calling _resync, current state: %s", db.RunStateString[dbState]) } } diff --git a/rest/api_test.go b/rest/api_test.go index 6458df6b27..04a3c78dcc 100644 --- a/rest/api_test.go +++ b/rest/api_test.go @@ -260,8 +260,12 @@ func TestCORSOrigin(t *testing.T) { response := rt.SendRequestWithHeaders(method, "/{{.keyspace}}/", "", reqHeaders) assert.Equal(t, tc.headerOutput, response.Header().Get("Access-Control-Allow-Origin")) if method == http.MethodGet { - RequireStatus(t, response, http.StatusBadRequest) - require.Contains(t, response.Body.String(), invalidDatabaseName) + if base.TestsUseNamedCollections() { + RequireStatus(t, response, http.StatusBadRequest) + require.Contains(t, response.Body.String(), invalidDatabaseName) + } else { // CBG-2978, should not be different from GSI/collections + RequireStatus(t, response, http.StatusUnauthorized) + } } else { RequireStatus(t, response, http.StatusNoContent) @@ -2678,6 +2682,19 @@ func TestDocChannelSetPruning(t *testing.T) { assert.Equal(t, uint64(12), syncData.ChannelSetHistory[0].End) } +func TestNullDocHandlingForMutable1xBody(t *testing.T) { + rt := NewRestTester(t, nil) + defer rt.Close() + collection := rt.GetSingleTestDatabaseCollectionWithUser() + + documentRev := db.DocumentRevision{DocID: "doc1", BodyBytes: []byte("null")} + + body, err := documentRev.Mutable1xBody(collection, nil, nil, false) + require.Error(t, err) + require.Nil(t, body) + assert.Contains(t, err.Error(), "null doc body for doc") +} + func TestTombstoneCompactionAPI(t *testing.T) { rt := NewRestTester(t, nil) rt.GetDatabase().PurgeInterval = 0 diff --git a/rest/attachment_test.go b/rest/attachment_test.go index d182d90772..1874307f2c 100644 --- a/rest/attachment_test.go +++ b/rest/attachment_test.go @@ -2840,17 +2840,17 @@ func TestProveAttachmentNotFound(t *testing.T) { attachmentData := []byte("attachmentA") attachmentDataEncoded := base64.StdEncoding.EncodeToString(attachmentData) - bt.blipContext.HandlerForProfile[db.MessageProveAttachment] = func(msg *blip.Message) { + bt.dispatcher.SetHandler(db.MessageProveAttachment, blip.AsAsyncHandler(func(msg *blip.Message) { status, errMsg := base.ErrorAsHTTPStatus(db.ErrAttachmentNotFound) msg.Response().SetError("HTTP", status, errMsg) - } + })) // Handler for when full attachment is requested - bt.blipContext.HandlerForProfile[db.MessageGetAttachment] = func(msg *blip.Message) { + bt.dispatcher.SetHandler(db.MessageGetAttachment, blip.AsAsyncHandler(func(msg *blip.Message) { resp := msg.Response() resp.SetBody(attachmentData) resp.SetCompressed(msg.Properties[db.BlipCompress] == "true") - } + })) // Initial set up sent, _, _, err := bt.SendRev("doc1", "1-abc", []byte(`{"key": "val", "_attachments": {"attachment": {"data": "`+attachmentDataEncoded+`"}}}`), blip.Properties{}) diff --git a/rest/attachmentcompactiontest/attachment_compaction_api_test.go b/rest/attachmentcompactiontest/attachment_compaction_api_test.go index 8cc82ba89e..39efe0c24e 100644 --- a/rest/attachmentcompactiontest/attachment_compaction_api_test.go +++ b/rest/attachmentcompactiontest/attachment_compaction_api_test.go @@ -15,6 +15,7 @@ import ( "testing" "time" + "github.com/couchbase/gocbcore/v10" "github.com/couchbase/sync_gateway/base" "github.com/couchbase/sync_gateway/db" "github.com/couchbase/sync_gateway/rest" @@ -415,3 +416,65 @@ func TestAttachmentCompactionAbort(t *testing.T) { status := rt.WaitForAttachmentCompactionStatus(t, db.BackgroundProcessStateStopped) assert.Equal(t, int64(0), status.PurgedAttachments) } + +func TestAttachmentCompactionMarkPhaseRollback(t *testing.T) { + if base.UnitTestUrlIsWalrus() { + t.Skip("This test only works against Couchbase Server") + } + var garbageVBUUID gocbcore.VbUUID = 1234 + base.SetUpTestLogging(t, base.LevelInfo, base.KeyAll) + + rt := rest.NewRestTesterDefaultCollection(t, nil) + defer rt.Close() + dataStore := rt.GetSingleDataStore() + + // Create some 'unmarked' attachments + makeUnmarkedDoc := func(docid string) { + err := dataStore.SetRaw(docid, 0, nil, []byte("{}")) + require.NoError(t, err) + } + + for i := 0; i < 1000; i++ { + docID := fmt.Sprintf("%s%s%d", base.AttPrefix, "unmarked", i) + makeUnmarkedDoc(docID) + } + + // kick off compaction and wait for "mark" phase to begin + resp := rt.SendAdminRequest("POST", "/{{.db}}/_compact?type=attachment", "") + rest.RequireStatus(t, resp, http.StatusOK) + _ = rt.WaitForAttachmentCompactionStatus(t, db.BackgroundProcessStateRunning) + + // immediately stop the compaction process (we just need the status data to be persisted to the bucket) + resp = rt.SendAdminRequest("POST", "/{{.db}}/_compact?type=attachment&action=stop", "") + rest.RequireStatus(t, resp, http.StatusOK) + stat := rt.WaitForAttachmentCompactionStatus(t, db.BackgroundProcessStateStopped) + require.Equal(t, db.MarkPhase, stat.Phase) + + // alter persisted dcp metadata from the first run to force a rollback + name := db.GenerateCompactionDCPStreamName(stat.CompactID, "mark") + checkpointPrefix := fmt.Sprintf("%s:%v", "_sync:dcp_ck:", name) + + meta := base.NewDCPMetadataCS(dataStore, 1024, 8, checkpointPrefix) + vbMeta := meta.GetMeta(0) + vbMeta.VbUUID = garbageVBUUID + meta.SetMeta(0, vbMeta) + meta.Persist(0, []uint16{0}) + + // kick off a new run attempting to start it again (should force into rollback handling) + resp = rt.SendAdminRequest("POST", "/{{.db}}/_compact?type=attachment&action=start", "") + rest.RequireStatus(t, resp, http.StatusOK) + _ = rt.WaitForAttachmentCompactionStatus(t, db.BackgroundProcessStateCompleted) + + // Validate results of recovered attachment compaction process + resp = rt.SendAdminRequest("GET", "/{{.db}}/_compact?type=attachment", "") + rest.RequireStatus(t, resp, http.StatusOK) + + // validate that the compaction process actually recovered from rollback by checking stats + var response db.AttachmentManagerResponse + err := base.JSONUnmarshal(resp.BodyBytes(), &response) + require.NoError(t, err) + require.Equal(t, db.BackgroundProcessStateCompleted, response.State) + require.Equal(t, int64(0), response.MarkedAttachments) + require.Equal(t, int64(1000), response.PurgedAttachments) + +} diff --git a/rest/blip_api_attachment_test.go b/rest/blip_api_attachment_test.go index a322369263..69cd0ca280 100644 --- a/rest/blip_api_attachment_test.go +++ b/rest/blip_api_attachment_test.go @@ -178,6 +178,121 @@ func TestBlipPushPullV2AttachmentV3Client(t *testing.T) { assert.Equal(t, int64(1), rt.GetDatabase().DbStats.CBLReplicationPush().AttachmentPushCount.Value()) assert.Equal(t, int64(11), rt.GetDatabase().DbStats.CBLReplicationPush().AttachmentPushBytes.Value()) } + +// TestBlipProveAttachmentV2 ensures that CBL's proveAttachment for deduplication is working correctly even for v2 attachments which aren't de-duped on the server side. +func TestBlipProveAttachmentV2(t *testing.T) { + base.SetUpTestLogging(t, base.LevelTrace, base.KeyAll) + rtConfig := RestTesterConfig{ + GuestEnabled: true, + } + rt := NewRestTester(t, &rtConfig) + defer rt.Close() + + btc, err := NewBlipTesterClientOptsWithRT(t, rt, &BlipTesterClientOpts{ + SupportedBLIPProtocols: []string{db.BlipCBMobileReplicationV2}, + }) + require.NoError(t, err) + defer btc.Close() + + err = btc.StartPull() + assert.NoError(t, err) + + const ( + doc1ID = "doc1" + doc2ID = "doc2" + ) + + const ( + attachmentName = "hello.txt" + attachmentData = "hello world" + ) + + var ( + attachmentDataB64 = base64.StdEncoding.EncodeToString([]byte(attachmentData)) + attachmentDigest = "sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=" + ) + + // Create two docs with the same attachment data on SG - v2 attachments intentionally result in two copies, + // CBL will still de-dupe attachments based on digest, so will still try proveAttachmnet for the 2nd. + doc1Body := fmt.Sprintf(`{"greetings":[{"hi": "alice"}],"_attachments":{"%s":{"data":"%s"}}}`, attachmentName, attachmentDataB64) + response := rt.SendAdminRequest(http.MethodPut, "/{{.keyspace}}/"+doc1ID, doc1Body) + RequireStatus(t, response, http.StatusCreated) + doc1RevID := RespRevID(t, response) + + data, ok := btc.WaitForRev(doc1ID, doc1RevID) + require.True(t, ok) + bodyTextExpected := fmt.Sprintf(`{"greetings":[{"hi":"alice"}],"_attachments":{"%s":{"revpos":1,"length":%d,"stub":true,"digest":"%s"}}}`, attachmentName, len(attachmentData), attachmentDigest) + require.JSONEq(t, bodyTextExpected, string(data)) + + // create doc2 now that we know the client has the attachment + doc2Body := fmt.Sprintf(`{"greetings":[{"howdy": "bob"}],"_attachments":{"%s":{"data":"%s"}}}`, attachmentName, attachmentDataB64) + response = rt.SendAdminRequest(http.MethodPut, "/{{.keyspace}}/"+doc2ID, doc2Body) + RequireStatus(t, response, http.StatusCreated) + doc2RevID := RespRevID(t, response) + + data, ok = btc.WaitForRev(doc2ID, doc2RevID) + require.True(t, ok) + bodyTextExpected = fmt.Sprintf(`{"greetings":[{"howdy":"bob"}],"_attachments":{"%s":{"revpos":1,"length":%d,"stub":true,"digest":"%s"}}}`, attachmentName, len(attachmentData), attachmentDigest) + require.JSONEq(t, bodyTextExpected, string(data)) + + assert.Equal(t, int64(2), rt.GetDatabase().DbStats.CBLReplicationPull().RevSendCount.Value()) + assert.Equal(t, int64(0), rt.GetDatabase().DbStats.CBLReplicationPull().RevErrorCount.Value()) + assert.Equal(t, int64(1), rt.GetDatabase().DbStats.CBLReplicationPull().AttachmentPullCount.Value()) + assert.Equal(t, int64(len(attachmentData)), rt.GetDatabase().DbStats.CBLReplicationPull().AttachmentPullBytes.Value()) +} + +// TestBlipProveAttachmentV2Push ensures that CBL's attachment deduplication is ignored for push replications - resulting in new server-side digests and duplicated attachment data (v2 attachment format). +func TestBlipProveAttachmentV2Push(t *testing.T) { + base.SetUpTestLogging(t, base.LevelTrace, base.KeyAll) + rtConfig := RestTesterConfig{ + GuestEnabled: true, + } + rt := NewRestTester(t, &rtConfig) + defer rt.Close() + + btc, err := NewBlipTesterClientOptsWithRT(t, rt, &BlipTesterClientOpts{ + SupportedBLIPProtocols: []string{db.BlipCBMobileReplicationV2}, + }) + require.NoError(t, err) + defer btc.Close() + + const ( + doc1ID = "doc1" + doc2ID = "doc2" + ) + + const ( + attachmentName = "hello.txt" + attachmentData = "hello world" + ) + + var ( + attachmentDataB64 = base64.StdEncoding.EncodeToString([]byte(attachmentData)) + // attachmentDigest = "sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=" + ) + + // Create two docs with the same attachment data on the client - v2 attachments intentionally result in two copies stored on the server, despite the client being able to share the data for both. + doc1Body := fmt.Sprintf(`{"greetings":[{"hi": "alice"}],"_attachments":{"%s":{"data":"%s"}}}`, attachmentName, attachmentDataB64) + doc1revID, err := btc.PushRev(doc1ID, "", []byte(doc1Body)) + require.NoError(t, err) + + err = rt.WaitForRev(doc1ID, doc1revID) + require.NoError(t, err) + + // create doc2 now that we know the server has the attachment - SG should still request the attachment data from the client. + doc2Body := fmt.Sprintf(`{"greetings":[{"howdy": "bob"}],"_attachments":{"%s":{"data":"%s"}}}`, attachmentName, attachmentDataB64) + doc2RevID, err := btc.PushRev(doc2ID, "", []byte(doc2Body)) + require.NoError(t, err) + + err = rt.WaitForRev(doc2ID, doc2RevID) + require.NoError(t, err) + + assert.Equal(t, int64(2), rt.GetDatabase().DbStats.CBLReplicationPush().DocPushCount.Value()) + assert.Equal(t, int64(0), rt.GetDatabase().DbStats.CBLReplicationPush().DocPushErrorCount.Value()) + assert.Equal(t, int64(2), rt.GetDatabase().DbStats.CBLReplicationPush().AttachmentPushCount.Value()) + assert.Equal(t, int64(2*len(attachmentData)), rt.GetDatabase().DbStats.CBLReplicationPush().AttachmentPushBytes.Value()) +} + func TestBlipPushPullNewAttachmentCommonAncestor(t *testing.T) { base.SetUpTestLogging(t, base.LevelInfo, base.KeyAll) rtConfig := RestTesterConfig{ diff --git a/rest/blip_api_crud_test.go b/rest/blip_api_crud_test.go index 356b51074a..2fc8d2ba60 100644 --- a/rest/blip_api_crud_test.go +++ b/rest/blip_api_crud_test.go @@ -104,7 +104,7 @@ func TestBlipPushRevisionInspectChanges(t *testing.T) { receivedChangesRequestWg := sync.WaitGroup{} // When this test sends subChanges, Sync Gateway will send a changes request that must be handled - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { log.Printf("got changes message: %+v", request) body, err := request.Body() @@ -136,7 +136,7 @@ func TestBlipPushRevisionInspectChanges(t *testing.T) { receivedChangesRequestWg.Done() - } + }) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back subChangesRequest := bt.newRequest() @@ -176,11 +176,11 @@ func TestContinuousChangesSubscription(t *testing.T) { var numbatchesReceived int32 nonIntegerSequenceReceived := false changeCount := 0 - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { body, err := request.Body() require.NoError(t, err) - log.Printf("got change with body %s, count %d", body, changeCount) + log.Printf("got change msg #%d with body %s, count %d", request.SerialNumber(), body, changeCount) if string(body) != "null" { atomic.AddInt32(&numbatchesReceived, 1) @@ -199,7 +199,7 @@ func TestContinuousChangesSubscription(t *testing.T) { // Make sure sequence numbers are monotonically increasing receivedSeq, ok := change[0].(float64) if ok { - assert.True(t, receivedSeq > lastReceivedSeq) + assert.Greater(t, receivedSeq, lastReceivedSeq, "Sequences out of order") lastReceivedSeq = receivedSeq } else { nonIntegerSequenceReceived = true @@ -228,7 +228,7 @@ func TestContinuousChangesSubscription(t *testing.T) { response.SetBody(emptyResponseValBytes) } - } + }) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back subChangesRequest := bt.newRequest() @@ -295,8 +295,8 @@ func TestBlipOneShotChangesSubscription(t *testing.T) { lastReceivedSeq := float64(0) var numbatchesReceived int32 nonIntegerSequenceReceived := false - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + changesHandler := func(request *blip.Message) { body, err := request.Body() require.NoError(t, err) @@ -318,7 +318,7 @@ func TestBlipOneShotChangesSubscription(t *testing.T) { // Make sure sequence numbers are monotonically increasing receivedSeq, ok := change[0].(float64) if ok { - assert.True(t, receivedSeq > lastReceivedSeq) + assert.Greater(t, receivedSeq, lastReceivedSeq) lastReceivedSeq = receivedSeq } else { nonIntegerSequenceReceived = true @@ -347,8 +347,13 @@ func TestBlipOneShotChangesSubscription(t *testing.T) { assert.NoError(t, err, "Error marshalling response") response.SetBody(emptyResponseValBytes) } + } + dispatcher := blip.ThrottlingDispatcher{ + MaxConcurrency: 1, + Handler: blip.AsAsyncHandler(changesHandler), } + bt.blipContext.RequestHandler = dispatcher.Dispatch // Increment waitgroup to account for the expected 'caught up' nil changes entry. receivedChangesWg.Add(1) @@ -456,14 +461,18 @@ func TestBlipSubChangesDocIDFilter(t *testing.T) { } // When this test sends subChanges, Sync Gateway will send a changes request that must be handled + var changesMutex sync.Mutex lastReceivedSeq := float64(0) var numbatchesReceived int32 nonIntegerSequenceReceived := false - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { + changesMutex.Lock() + defer changesMutex.Unlock() body, err := request.Body() require.NoError(t, err) + log.Printf("Received changes: %s", body) //TEMP if string(body) != "null" { @@ -483,7 +492,7 @@ func TestBlipSubChangesDocIDFilter(t *testing.T) { // Make sure sequence numbers are monotonically increasing receivedSeq, ok := change[0].(float64) if ok { - assert.True(t, receivedSeq > lastReceivedSeq) + assert.Greater(t, receivedSeq, lastReceivedSeq) lastReceivedSeq = receivedSeq } else { nonIntegerSequenceReceived = true @@ -523,7 +532,7 @@ func TestBlipSubChangesDocIDFilter(t *testing.T) { response.SetBody(emptyResponseValBytes) } - } + }) // Increment waitgroup to account for the expected 'caught up' nil changes entry. receivedChangesWg.Add(1) @@ -885,7 +894,7 @@ function(doc, oldDoc) { // Write a doc that grants access to itself for the active replication's user func TestContinuousChangesDynamicGrant(t *testing.T) { - base.SetUpTestLogging(t, base.LevelInfo, base.KeyHTTP, base.KeySync, base.KeySyncMsg, base.KeyChanges, base.KeyCache) + base.SetUpTestLogging(t, base.LevelInfo, base.KeyHTTP, base.KeySync, base.KeySyncMsg, base.KeyWebSocket, base.KeyChanges, base.KeyCache) // Initialize restTester here, so that we can use custom sync function, and later modify user syncFunction := ` function(doc, oldDoc) { @@ -916,7 +925,7 @@ function(doc, oldDoc) { var numbatchesReceived int32 nonIntegerSequenceReceived := false changeCount := 0 - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { body, err := request.Body() require.NoError(t, err) @@ -928,7 +937,7 @@ function(doc, oldDoc) { // Expected changes body: [[1,"foo","1-abc"]] changeListReceived := [][]interface{}{} err = base.JSONUnmarshal(body, &changeListReceived) - assert.NoError(t, err, "Error unmarshalling changes received") + assert.NoError(t, err, "Error unmarshalling changes received: %s", body) for _, change := range changeListReceived { @@ -962,10 +971,10 @@ function(doc, oldDoc) { response.SetBody(responseValBytes) } - } + }) // -------- Rev handler callback -------- - bt.blipContext.HandlerForProfile["rev"] = func(request *blip.Message) { + bt.dispatcher.SetHandler("rev", blip.AsAsyncHandler(func(request *blip.Message) { defer revsFinishedWg.Done() body, err := request.Body() require.NoError(t, err) @@ -979,7 +988,7 @@ function(doc, oldDoc) { _, isRemoved := doc[db.BodyRemoved] assert.False(t, isRemoved) - } + })) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back subChangesRequest := bt.newRequest() @@ -1043,7 +1052,7 @@ function(doc, oldDoc) { var numbatchesReceived int32 nonIntegerSequenceReceived := false changeCount := 0 - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { body, err := request.Body() require.NoError(t, err) @@ -1092,10 +1101,10 @@ function(doc, oldDoc) { response.SetBody(responseValBytes) } - } + }) // -------- Rev handler callback -------- - bt.blipContext.HandlerForProfile["rev"] = func(request *blip.Message) { + bt.dispatcher.SetHandler("rev", blip.AsAsyncHandler(func(request *blip.Message) { defer revsFinishedWg.Done() body, err := request.Body() require.NoError(t, err) @@ -1108,7 +1117,7 @@ function(doc, oldDoc) { _, isRemoved := doc[db.BodyRemoved] require.False(t, isRemoved, fmt.Sprintf("Document %v shouldn't be removed", request.Properties[db.RevMessageID])) - } + })) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back subChangesRequest := bt.newRequest() @@ -1790,6 +1799,8 @@ func TestMissingNoRev(t *testing.T) { defer rt.Close() ctx := rt.Context() + base.SetUpTestLogging(t, base.LevelInfo, base.KeyCRUD, base.KeySync, base.KeySyncMsg, base.KeyWebSocket) + bt, err := NewBlipTesterFromSpecWithRT(t, nil, rt) require.NoError(t, err, "Unexpected error creating BlipTester") defer bt.Close() @@ -2022,7 +2033,7 @@ func TestRemovedMessageWithAlternateAccess(t *testing.T) { messages := btc.pullReplication.GetMessages() var highestMsgSeq uint32 - var highestSeqMsg blip.Message + var highestSeqMsg *blip.Message // Grab most recent changes message for _, message := range messages { messageBody, err := message.Body() @@ -2124,7 +2135,7 @@ func TestRemovedMessageWithAlternateAccessAndChannelFilteredReplication(t *testi messages := btc.pullReplication.GetMessages() var highestMsgSeq uint32 - var highestSeqMsg blip.Message + var highestSeqMsg *blip.Message // Grab most recent changes message for _, message := range messages { messageBody, err := message.Body() @@ -2163,7 +2174,7 @@ func TestMultipleOutstandingChangesSubscriptions(t *testing.T) { bt := NewBlipTesterDefaultCollection(t) defer bt.Close() - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { if !request.NoReply() { // Send an empty response to avoid the Sync: Invalid response to 'changes' message response := request.Response() @@ -2172,7 +2183,7 @@ func TestMultipleOutstandingChangesSubscriptions(t *testing.T) { assert.NoError(t, err, "Error marshalling response") response.SetBody(emptyResponseValBytes) } - } + }) pullStats := bt.restTester.GetDatabase().DbStats.CBLReplicationPull() require.EqualValues(t, 0, pullStats.NumPullReplTotalContinuous.Value()) @@ -2561,10 +2572,10 @@ func TestSendRevisionNoRevHandling(t *testing.T) { // Change noRev handler so it's known when a noRev is received recievedNoRevs := make(chan *blip.Message) - btc.pullReplication.bt.blipContext.HandlerForProfile[db.MessageNoRev] = func(msg *blip.Message) { + btc.pullReplication.bt.dispatcher.SetHandler(db.MessageNoRev, blip.AsAsyncHandler(func(msg *blip.Message) { fmt.Println("Received noRev", msg.Properties) recievedNoRevs <- msg - } + })) resp := rt.SendAdminRequest(http.MethodPut, "/{{.keyspace}}/"+docName, `{"foo":"bar"}`) RequireStatus(t, resp, http.StatusCreated) @@ -2652,3 +2663,122 @@ func TestUnsubChanges(t *testing.T) { _, found = btc.WaitForRev("doc2", resp.Rev) assert.True(t, found) } + +// TestRequestPlusPull tests that a one-shot pull replication waits for pending changes when request plus is set on the replication. +func TestRequestPlusPull(t *testing.T) { + + base.SetUpTestLogging(t, base.LevelInfo, base.KeyDCP, base.KeyChanges, base.KeyHTTP) + defer db.SuspendSequenceBatching()() // Required for slow sequence simulation + + rtConfig := RestTesterConfig{ + SyncFn: `function(doc) { + channel(doc.channel); + if (doc.accessUser != "") { + access(doc.accessUser, doc.accessChannel) + } + }`, + } + rt := NewRestTester(t, &rtConfig) + defer rt.Close() + database := rt.GetDatabase() + + // Initialize blip tester client (will create user) + client, err := NewBlipTesterClientOptsWithRT(t, rt, &BlipTesterClientOpts{ + Username: "bernard", + }) + require.NoError(t, err) + defer client.Close() + + // Put a doc in channel PBS + response := rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-1", `{"channel":["PBS"]}`) + RequireStatus(t, response, 201) + + // Allocate a sequence but do not write a doc for it - will block DCP buffering until sequence is skipped + slowSequence, seqErr := db.AllocateTestSequence(database) + require.NoError(t, seqErr) + + // Write a document granting user 'bernard' access to PBS + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/grantDoc", `{"accessUser":"bernard", "accessChannel":"PBS"}`) + RequireStatus(t, response, 201) + + caughtUpStart := database.DbStats.CBLReplicationPull().NumPullReplTotalCaughtUp.Value() + + // Start a regular one-shot pull + err = client.StartOneshotPullRequestPlus() + assert.NoError(t, err) + + // Wait for the one-shot changes feed to go into wait mode before releasing the slow sequence + require.NoError(t, database.WaitForTotalCaughtUp(caughtUpStart+1)) + + // Release the slow sequence + releaseErr := db.ReleaseTestSequence(database, slowSequence) + require.NoError(t, releaseErr) + + // The one-shot pull should unblock and replicate the document in the granted channel + data, ok := client.WaitForDoc("pbs-1") + assert.True(t, ok) + assert.Equal(t, `{"channel":["PBS"]}`, string(data)) + +} + +// TestRequestPlusPull tests that a one-shot pull replication waits for pending changes when request plus is set on the db config. +func TestRequestPlusPullDbConfig(t *testing.T) { + + base.SetUpTestLogging(t, base.LevelInfo, base.KeyDCP, base.KeyChanges, base.KeyHTTP) + defer db.SuspendSequenceBatching()() // Required for slow sequence simulation + + rtConfig := RestTesterConfig{ + SyncFn: `function(doc) { + channel(doc.channel); + if (doc.accessUser != "") { + access(doc.accessUser, doc.accessChannel) + } + }`, + DatabaseConfig: &DatabaseConfig{ + DbConfig: DbConfig{ + ChangesRequestPlus: base.BoolPtr(true), + }, + }, + } + rt := NewRestTester(t, &rtConfig) + defer rt.Close() + database := rt.GetDatabase() + + // Initialize blip tester client (will create user) + client, err := NewBlipTesterClientOptsWithRT(t, rt, &BlipTesterClientOpts{ + Username: "bernard", + }) + require.NoError(t, err) + defer client.Close() + + // Put a doc in channel PBS + response := rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-1", `{"channel":["PBS"]}`) + RequireStatus(t, response, 201) + + // Allocate a sequence but do not write a doc for it - will block DCP buffering until sequence is skipped + slowSequence, seqErr := db.AllocateTestSequence(database) + require.NoError(t, seqErr) + + // Write a document granting user 'bernard' access to PBS + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/grantDoc", `{"accessUser":"bernard", "accessChannel":"PBS"}`) + RequireStatus(t, response, 201) + + caughtUpStart := database.DbStats.CBLReplicationPull().NumPullReplTotalCaughtUp.Value() + + // Start a regular one-shot pull + err = client.StartOneshotPull() + assert.NoError(t, err) + + // Wait for the one-shot changes feed to go into wait mode before releasing the slow sequence + require.NoError(t, database.WaitForTotalCaughtUp(caughtUpStart+1)) + + // Release the slow sequence + releaseErr := db.ReleaseTestSequence(database, slowSequence) + require.NoError(t, releaseErr) + + // The one-shot pull should unblock and replicate the document in the granted channel + data, ok := client.WaitForDoc("pbs-1") + assert.True(t, ok) + assert.Equal(t, `{"channel":["PBS"]}`, string(data)) + +} diff --git a/rest/blip_client_test.go b/rest/blip_client_test.go index 13558aee7e..448a188994 100644 --- a/rest/blip_client_test.go +++ b/rest/blip_client_test.go @@ -97,7 +97,7 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { btr.replicationStats = db.NewBlipSyncStats() } - btr.bt.blipContext.HandlerForProfile[db.MessageProveAttachment] = func(msg *blip.Message) { + btr.bt.dispatcher.SetHandler(db.MessageProveAttachment, blip.AsAsyncHandler(func(msg *blip.Message) { btr.storeMessage(msg) nonce, err := msg.Body() @@ -126,9 +126,9 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { resp := msg.Response() resp.SetBody([]byte(proof)) btr.replicationStats.ProveAttachment.Add(1) - } + })) - btr.bt.blipContext.HandlerForProfile[db.MessageChanges] = func(msg *blip.Message) { + btr.bt.dispatcher.SetHandler(db.MessageChanges, blip.AsAsyncHandler(func(msg *blip.Message) { btr.storeMessage(msg) btcr := btc.getCollectionClientFromMessage(msg) @@ -217,13 +217,13 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { } response.SetBody(b) - } + })) - btr.bt.blipContext.HandlerForProfile[db.MessageProposeChanges] = func(msg *blip.Message) { + btr.bt.dispatcher.SetHandler(db.MessageProposeChanges, blip.AsAsyncHandler(func(msg *blip.Message) { btc.pullReplication.storeMessage(msg) - } + })) - btr.bt.blipContext.HandlerForProfile[db.MessageRev] = func(msg *blip.Message) { + btr.bt.dispatcher.SetHandler(db.MessageRev, blip.AsAsyncHandler(func(msg *blip.Message) { btc.pullReplication.storeMessage(msg) btcr := btc.getCollectionClientFromMessage(msg) @@ -310,6 +310,7 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { } var missingDigests []string + var knownDigests []string btcr.attachmentsLock.RLock() for _, attachment := range attsMap { attMap, ok := attachment.(map[string]interface{}) @@ -320,10 +321,63 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { if _, found := btcr.attachments[digest]; !found { missingDigests = append(missingDigests, digest) + } else { + if btr.bt.blipContext.ActiveSubprotocol() == db.BlipCBMobileReplicationV2 { + // only v2 clients care about proveAttachments + knownDigests = append(knownDigests, digest) + } } } btcr.attachmentsLock.RUnlock() + for _, digest := range knownDigests { + attData, err := btcr.getAttachment(digest) + if err != nil { + panic(err) + } + nonce, proof, err := db.GenerateProofOfAttachment(attData) + if err != nil { + panic(err) + } + + // if we already have this attachment, _we_ should ask the peer whether _they_ have the attachment + outrq := blip.NewRequest() + outrq.SetProfile(db.MessageProveAttachment) + outrq.Properties[db.ProveAttachmentDigest] = digest + outrq.SetBody(nonce) + + err = btcr.sendPullMsg(outrq) + if err != nil { + panic(err) + } + + resp := outrq.Response() + btc.pullReplication.storeMessage(resp) + respBody, err := resp.Body() + if err != nil { + panic(err) + } + + if resp.Type() == blip.ErrorType { + // forward error from proveAttachment response into rev response + if !msg.NoReply() { + response := msg.Response() + errorCode, _ := strconv.Atoi(resp.Properties["Error-Code"]) + response.SetError(resp.Properties["Error-Code"], errorCode, string(respBody)) + } + return + } + + if string(respBody) != proof { + // forward error from proveAttachment response into rev response + if !msg.NoReply() { + response := msg.Response() + response.SetError(resp.Properties["Error-Code"], http.StatusForbidden, fmt.Sprintf("Incorrect proof for attachment %s", digest)) + } + return + } + } + for _, digest := range missingDigests { outrq := blip.NewRequest() outrq.SetProfile(db.MessageGetAttachment) @@ -385,9 +439,9 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { response := msg.Response() response.SetBody([]byte(`[]`)) } - } + })) - btr.bt.blipContext.HandlerForProfile[db.MessageGetAttachment] = func(msg *blip.Message) { + btr.bt.dispatcher.SetHandler(db.MessageGetAttachment, blip.AsAsyncHandler(func(msg *blip.Message) { btr.storeMessage(msg) digest, ok := msg.Properties[db.GetAttachmentDigest] @@ -405,17 +459,17 @@ func (btr *BlipTesterReplicator) initHandlers(btc *BlipTesterClient) { response := msg.Response() response.SetBody(attachment) btr.replicationStats.GetAttachment.Add(1) - } + })) - btr.bt.blipContext.HandlerForProfile[db.MessageNoRev] = func(msg *blip.Message) { + btr.bt.dispatcher.SetHandler(db.MessageNoRev, blip.AsAsyncHandler(func(msg *blip.Message) { // TODO: Support norev messages btr.storeMessage(msg) - } + })) - btr.bt.blipContext.DefaultHandler = func(msg *blip.Message) { + btr.bt.dispatcher.SetDefaultHandler(blip.AsAsyncHandler(func(msg *blip.Message) { btr.storeMessage(msg) base.PanicfCtx(context.TODO(), "Unknown profile: %s caught by client DefaultHandler - msg: %#v", msg.Profile(), msg) - } + })) } // saveAttachment takes a content-type, and base64 encoded data and stores the attachment on the client @@ -430,10 +484,11 @@ func (btc *BlipTesterCollectionClient) saveAttachment(_, base64data string) (dat digest = db.Sha1DigestKey(data) if _, found := btc.attachments[digest]; found { - return 0, "", fmt.Errorf("attachment with digest already exists") + base.InfofCtx(context.TODO(), base.KeySync, "attachment with digest %s already exists", digest) + } else { + btc.attachments[digest] = data } - btc.attachments[digest] = data return len(data), digest, nil } @@ -632,19 +687,25 @@ func (btc *BlipTesterClient) Collection(collectionName string) *BlipTesterCollec // StartPull will begin a continuous pull replication since 0 between the client and server func (btcc *BlipTesterCollectionClient) StartPull() (err error) { - return btcc.StartPullSince("true", "0", "false", "") + return btcc.StartPullSince("true", "0", "false", "", "") } func (btcc *BlipTesterCollectionClient) StartOneshotPull() (err error) { - return btcc.StartPullSince("false", "0", "false", "") + return btcc.StartPullSince("false", "0", "false", "", "") } func (btcc *BlipTesterCollectionClient) StartOneshotPullFiltered(channels string) (err error) { - return btcc.StartPullSince("false", "0", "false", channels) + return btcc.StartPullSince("false", "0", "false", channels, "") +} + +func (btcc *BlipTesterCollectionClient) StartOneshotPullRequestPlus() (err error) { + return btcc.StartPullSince("false", "0", "false", "", "true") } +//////// HELPER FUNCTIONS: + // StartPullSince will begin a pull replication between the client and server with the given params. -func (btc *BlipTesterCollectionClient) StartPullSince(continuous, since, activeOnly string, channels string) (err error) { +func (btc *BlipTesterCollectionClient) StartPullSince(continuous, since, activeOnly, channels, requestPlus string) (err error) { subChangesRequest := blip.NewRequest() subChangesRequest.SetProfile(db.MessageSubChanges) subChangesRequest.Properties[db.SubChangesContinuous] = continuous @@ -654,6 +715,9 @@ func (btc *BlipTesterCollectionClient) StartPullSince(continuous, since, activeO subChangesRequest.Properties[db.SubChangesFilter] = base.ByChannelFilter subChangesRequest.Properties[db.SubChangesChannels] = channels } + if requestPlus != "" { + subChangesRequest.Properties[db.SubChangesRequestPlus] = requestPlus + } subChangesRequest.SetNoReply(true) if btc.parent.BlipTesterClientOpts.SendRevocations { @@ -923,6 +987,9 @@ func (btc *BlipTesterCollectionClient) GetRev(docID, revID string) (data []byte, // WaitForRev blocks until the given doc ID and rev ID have been stored by the client, and returns the data when found. func (btc *BlipTesterCollectionClient) WaitForRev(docID, revID string) (data []byte, found bool) { + if data, found := btc.GetRev(docID, revID); found { + return data, found + } ticker := time.NewTicker(50 * time.Millisecond) timeout := time.After(10 * time.Second) for { @@ -938,6 +1005,41 @@ func (btc *BlipTesterCollectionClient) WaitForRev(docID, revID string) (data []b } } +// GetDoc returns a rev stored in the Client under the given docID. (if multiple revs are present, rev body returned is non-deterministic) +func (btc *BlipTesterCollectionClient) GetDoc(docID string) (data []byte, found bool) { + btc.docsLock.RLock() + defer btc.docsLock.RUnlock() + + if rev, ok := btc.docs[docID]; ok { + for _, data := range rev { + return data.body, true + } + } + + return nil, false +} + +// WaitForDoc blocks until the given doc ID has been stored by the client, and returns the data when found. +func (btc *BlipTesterCollectionClient) WaitForDoc(docID string) (data []byte, found bool) { + + if data, found := btc.GetDoc(docID); found { + return data, found + } + ticker := time.NewTicker(50 * time.Millisecond) + timeout := time.After(10 * time.Second) + for { + select { + case <-timeout: + btc.parent.rt.TB.Fatalf("BlipTesterClient timed out waiting for doc ID: %v", docID) + return nil, false + case <-ticker.C: + if data, found := btc.GetDoc(docID); found { + return data, found + } + } + } +} + // GetMessage returns the message stored in the Client under the given serial number func (btr *BlipTesterReplicator) GetMessage(serialNumber blip.MessageNumber) (msg *blip.Message, found bool) { btr.messagesLock.RLock() @@ -951,15 +1053,13 @@ func (btr *BlipTesterReplicator) GetMessage(serialNumber blip.MessageNumber) (ms } // GetMessages returns a copy of all messages stored in the Client keyed by serial number -func (btr *BlipTesterReplicator) GetMessages() map[blip.MessageNumber]blip.Message { +func (btr *BlipTesterReplicator) GetMessages() map[blip.MessageNumber]*blip.Message { btr.messagesLock.RLock() defer btr.messagesLock.RUnlock() - messages := make(map[blip.MessageNumber]blip.Message, len(btr.messages)) + messages := make(map[blip.MessageNumber]*blip.Message, len(btr.messages)) for k, v := range btr.messages { - // Read the body before copying, since it might be read asynchronously - _, _ = v.Body() - messages[k] = *v + messages[k] = v.Clone() } return messages @@ -1026,6 +1126,10 @@ func (btc *BlipTesterClient) WaitForRev(docID string, revID string) ([]byte, boo return btc.SingleCollection().WaitForRev(docID, revID) } +func (btc *BlipTesterClient) WaitForDoc(docID string) ([]byte, bool) { + return btc.SingleCollection().WaitForDoc(docID) +} + func (btc *BlipTesterClient) WaitForBlipRevMessage(docID string, revID string) (*blip.Message, bool) { return btc.SingleCollection().WaitForBlipRevMessage(docID, revID) } @@ -1038,16 +1142,20 @@ func (btc *BlipTesterClient) StartOneshotPullFiltered(channels string) error { return btc.SingleCollection().StartOneshotPullFiltered(channels) } +func (btc *BlipTesterClient) StartOneshotPullRequestPlus() error { + return btc.SingleCollection().StartOneshotPullRequestPlus() +} + func (btc *BlipTesterClient) PushRev(docID string, revID string, body []byte) (string, error) { return btc.SingleCollection().PushRev(docID, revID, body) } func (btc *BlipTesterClient) StartPullSince(continuous, since, activeOnly string) error { - return btc.SingleCollection().StartPullSince(continuous, since, activeOnly, "") + return btc.SingleCollection().StartPullSince(continuous, since, activeOnly, "", "") } func (btc *BlipTesterClient) StartFilteredPullSince(continuous, since, activeOnly string, channels string) error { - return btc.SingleCollection().StartPullSince(continuous, since, activeOnly, channels) + return btc.SingleCollection().StartPullSince(continuous, since, activeOnly, channels, "") } func (btc *BlipTesterClient) GetRev(docID, revID string) ([]byte, bool) { diff --git a/rest/blip_stats_test.go b/rest/blip_stats_test.go new file mode 100644 index 0000000000..de640ff761 --- /dev/null +++ b/rest/blip_stats_test.go @@ -0,0 +1,96 @@ +// Copyright 2023-Present Couchbase, Inc. +// +// Use of this software is governed by the Business Source License included +// in the file licenses/BSL-Couchbase.txt. As of the Change Date specified +// in that file, in accordance with the Business Source License, use of this +// software will be governed by the Apache License, Version 2.0, included in +// the file licenses/APL2.txt. + +package rest + +import ( + "testing" + + "github.com/couchbase/sync_gateway/base" + "github.com/stretchr/testify/require" +) + +func sendGetCheckpointRequest(bt *BlipTester) { + t := bt.restTester.TB + rq := bt.newRequest() + rq.SetProfile("getCheckpoint") + require.True(t, bt.sender.Send(rq)) + errorCode, exists := rq.Response().Properties["Error-Code"] + require.True(t, exists) + require.Equal(t, "404", errorCode) +} + +// waitForStatGreaterThan will retry for up to 20 seconds until the result of getStatFunc is equal to the expected value. +func waitForStatGreaterThan(t *testing.T, getStatFunc func() int64, expected int64) { + workerFunc := func() (shouldRetry bool, err error, val interface{}) { + val = getStatFunc() + stat, ok := val.(int64) + require.True(t, ok) + return stat <= expected, nil, val + } + // wait for up to 20 seconds for the stat to meet the expected value + err, val := base.RetryLoop("waitForStatGreaterThan retry loop", workerFunc, base.CreateSleeperFunc(200, 100)) + require.NoError(t, err) + valInt64, ok := val.(int64) + require.True(t, ok) + require.Greater(t, valInt64, expected) +} + +func TestBlipStatsBasic(t *testing.T) { + bt, err := NewBlipTester(t) + require.NoError(t, err) + defer bt.Close() + + // make sure requests have not incremented stats. + /// Note: there is a blip call in NewBlipTester to initialize collections + dbStats := bt.restTester.GetDatabase().DbStats.Database() + require.Equal(t, int64(0), dbStats.ReplicationBytesReceived.Value()) + require.Equal(t, int64(0), dbStats.ReplicationBytesSent.Value()) + + // send a request, close BlipSyncContext and make sure stats are incremented + sendGetCheckpointRequest(bt) + + // requests shouldn't be implemented as part of handler + require.Equal(t, int64(0), dbStats.ReplicationBytesReceived.Value()) + require.Equal(t, int64(0), dbStats.ReplicationBytesSent.Value()) + + bt.sender.Close() + + waitForStatGreaterThan(t, dbStats.ReplicationBytesReceived.Value, 1) + waitForStatGreaterThan(t, dbStats.ReplicationBytesSent.Value, 1) + +} + +func TestBlipStatsFastReport(t *testing.T) { + bt, err := NewBlipTester(t) + require.NoError(t, err) + defer bt.Close() + sendRequest := func() { + rq := bt.newRequest() + rq.SetProfile("getCheckpoint") + require.True(t, bt.sender.Send(rq)) + errorCode, exists := rq.Response().Properties["Error-Code"] + require.True(t, exists) + require.Equal(t, "404", errorCode) + } + + dbStats := bt.restTester.GetDatabase().DbStats.Database() + require.Equal(t, int64(0), dbStats.ReplicationBytesReceived.Value()) + require.Equal(t, int64(0), dbStats.ReplicationBytesSent.Value()) + + sendRequest() + + require.Equal(t, int64(0), dbStats.ReplicationBytesReceived.Value()) + require.Equal(t, int64(0), dbStats.ReplicationBytesSent.Value()) + + // set reporting interval to update stats immediately + bt.restTester.GetDatabase().Options.BlipStatsReportingInterval = 0 + sendRequest() + require.Less(t, int64(0), dbStats.ReplicationBytesReceived.Value()) + require.Less(t, int64(0), dbStats.ReplicationBytesSent.Value()) +} diff --git a/rest/blip_sync.go b/rest/blip_sync.go index 01242a4923..931c96fe8e 100644 --- a/rest/blip_sync.go +++ b/rest/blip_sync.go @@ -11,6 +11,7 @@ licenses/APL2.txt. package rest import ( + "context" "fmt" "net/http" @@ -22,6 +23,16 @@ import ( // HTTP handler for incoming BLIP sync WebSocket request (/db/_blipsync) func (h *handler) handleBLIPSync() error { + needRelease, err := h.server.incrementConcurrentReplications(h.rqCtx) + if err != nil { + h.db.DbStats.Database().NumReplicationsRejectedLimit.Add(1) + return err + } + // if we haven't incremented the active replicator due to MaxConcurrentReplications being 0, we don't need to decrement it + if needRelease { + defer h.server.decrementConcurrentReplications(h.rqCtx) + } + // Exit early when the connection can't be switched to websocket protocol. if _, ok := h.response.(http.Hijacker); !ok { base.DebugfCtx(h.ctx(), base.KeyHTTP, "Non-upgradable request received for BLIP+WebSocket protocol") @@ -71,3 +82,37 @@ func (h *handler) handleBLIPSync() error { return nil } + +// incrementConcurrentReplications increments the number of active replications (if there is capacity to do so) +// and rejects calls if no capacity is available +func (sc *ServerContext) incrementConcurrentReplications(ctx context.Context) (bool, error) { + // lock replications config limit + the active replications counter + sc.ActiveReplicationsCounter.lock.Lock() + defer sc.ActiveReplicationsCounter.lock.Unlock() + // if max concurrent replications is 0 then we don't need to keep track of concurrent replications + if sc.ActiveReplicationsCounter.activeReplicatorLimit == 0 { + return false, nil + } + + capacity := sc.ActiveReplicationsCounter.activeReplicatorLimit + count := sc.ActiveReplicationsCounter.activeReplicatorCount + + if count >= capacity { + base.InfofCtx(ctx, base.KeyHTTP, "Replication limit exceeded (active: %d limit: %d)", count, capacity) + return false, base.ErrReplicationLimitExceeded + } + sc.ActiveReplicationsCounter.activeReplicatorCount++ + base.TracefCtx(ctx, base.KeyHTTP, "Acquired replication slot (active: %d/%d)", sc.ActiveReplicationsCounter.activeReplicatorCount, capacity) + + return true, nil +} + +// decrementConcurrentReplications decrements the number of active replications on the server context +func (sc *ServerContext) decrementConcurrentReplications(ctx context.Context) { + // lock replications config limit + the active replications counter + sc.ActiveReplicationsCounter.lock.Lock() + defer sc.ActiveReplicationsCounter.lock.Unlock() + connections := sc.ActiveReplicationsCounter.activeReplicatorLimit + sc.ActiveReplicationsCounter.activeReplicatorCount-- + base.TracefCtx(ctx, base.KeyHTTP, "Released replication slot (active: %d/%d)", sc.activeReplicatorCount, connections) +} diff --git a/rest/changes_api.go b/rest/changes_api.go index 00c06170c0..b1f39173f3 100644 --- a/rest/changes_api.go +++ b/rest/changes_api.go @@ -37,6 +37,12 @@ const kDefaultTimeoutMS = 5 * 60 * 1000 // Maximum value of _changes?timeout property const kMaxTimeoutMS = 15 * 60 * 1000 +// Values for feed parameter on changes request +const feedTypeContinuous = "continuous" +const feedTypeLongpoll = "longpoll" +const feedTypeNormal = "normal" +const feedTypeWebsocket = "websocket" + func (h *handler) handleRevsDiff() error { var input map[string][]string err := h.readJSONInto(&input) @@ -180,6 +186,16 @@ func (h *handler) handleChanges() error { options.ActiveOnly = h.getBoolQuery("active_only") options.IncludeDocs = h.getBoolQuery("include_docs") options.Revocations = h.getBoolQuery("revocations") + + useRequestPlus, _ := h.getOptBoolQuery("request_plus", h.db.Options.ChangesRequestPlus) + if useRequestPlus && feed != feedTypeContinuous { + var seqErr error + options.RequestPlusSeq, seqErr = h.db.GetRequestPlusSequence() + if seqErr != nil { + return base.HTTPErrorf(http.StatusServiceUnavailable, "Unable to retrieve requestPlus sequence") + } + + } filter = h.getQuery("filter") channelsParam := h.getQuery("channels") if channelsParam != "" { @@ -248,6 +264,15 @@ func (h *handler) handleChanges() error { feed = "normal" } + needRelease, concurrentReplicationsErr := h.server.incrementConcurrentReplications(h.rqCtx) + if concurrentReplicationsErr != nil { + return concurrentReplicationsErr + } + // if we haven't incremented the active replicator due to MaxConcurrentReplications being 0, we don't need to decrement it + if needRelease { + defer h.server.decrementConcurrentReplications(h.rqCtx) + } + // Get the channels as parameters to an imaginary "bychannel" filter. // The default is all channels the user can access. userChannels := base.SetOf(ch.AllChannelWildcard) @@ -303,18 +328,18 @@ func (h *handler) handleChanges() error { var err error switch feed { - case "normal": + case feedTypeNormal: if filter == "_doc_ids" { err, forceClose = h.sendSimpleChanges(userChannels, options, docIdsArray) } else { err, forceClose = h.sendSimpleChanges(userChannels, options, nil) } - case "longpoll": + case feedTypeLongpoll: options.Wait = true err, forceClose = h.sendSimpleChanges(userChannels, options, nil) - case "continuous": + case feedTypeContinuous: err, forceClose = h.sendContinuousChangesByHTTP(userChannels, options) - case "websocket": + case feedTypeWebsocket: err, forceClose = h.sendContinuousChangesByWebSocket(userChannels, options) default: err = base.HTTPErrorf(http.StatusBadRequest, "Unknown feed type") @@ -445,7 +470,7 @@ func (h *handler) generateContinuousChanges(inChannels base.Set, options db.Chan options.Continuous = true err, forceClose := db.GenerateChanges(h.ctx(), h.rq.Context(), h.collection, inChannels, options, nil, send) if sendErr, ok := err.(*db.ChangesSendErr); ok { - h.logStatus(http.StatusOK, fmt.Sprintf("0Write error: %v", sendErr)) + h.logStatus(http.StatusOK, fmt.Sprintf("Write error: %v", sendErr)) return nil, forceClose // error is probably because the client closed the connection } else { h.logStatus(http.StatusOK, "OK (continuous feed closed)") @@ -571,7 +596,8 @@ func (h *handler) readChangesOptionsFromJSON(jsonData []byte) (feed string, opti HeartbeatMs *uint64 `json:"heartbeat"` TimeoutMs *uint64 `json:"timeout"` AcceptEncoding string `json:"accept_encoding"` - ActiveOnly bool `json:"active_only"` // Return active revisions only + ActiveOnly bool `json:"active_only"` // Return active revisions only + RequestPlus *bool `json:"request_plus"` // Wait for sequence buffering to catch up to database seq value at time request was issued } // Initialize since clock and hasher ahead of unmarshalling sequence @@ -615,6 +641,20 @@ func (h *handler) readChangesOptionsFromJSON(jsonData []byte) (feed string, opti compress = (input.AcceptEncoding == "gzip") + if h.db != nil && feed != feedTypeContinuous { + useRequestPlus := h.db.Options.ChangesRequestPlus + if input.RequestPlus != nil { + useRequestPlus = *input.RequestPlus + } + if useRequestPlus { + var seqErr error + options.RequestPlusSeq, seqErr = h.db.GetRequestPlusSequence() + if seqErr != nil { + err = base.HTTPErrorf(http.StatusServiceUnavailable, "Unable to retrieve requestPlus sequence: %v", seqErr) + return + } + } + } return } diff --git a/rest/changestest/changes_api_test.go b/rest/changestest/changes_api_test.go index 0c6472e7af..f9f63c83ca 100644 --- a/rest/changestest/changes_api_test.go +++ b/rest/changestest/changes_api_test.go @@ -3935,6 +3935,292 @@ func TestTombstoneCompaction(t *testing.T) { TestCompact(db.QueryTombstoneBatch + 20) } +// TestOneShotGrantTiming simulates a one-shot changes feed returning before a previously issued grant has been +// buffered over DCP. +func TestOneShotGrantTiming(t *testing.T) { + + base.SetUpTestLogging(t, base.LevelDebug, base.KeyChanges, base.KeyHTTP) + + defer db.SuspendSequenceBatching()() + + rt := rest.NewRestTester(t, + &rest.RestTesterConfig{ + SyncFn: `function(doc) { + channel(doc.channel); + if (doc.accessUser != "") { + access(doc.accessUser, doc.accessChannel) + } + }`, + }) + defer rt.Close() + + // Create user with access to no channels + ctx := rt.Context() + database := rt.GetDatabase() + a := database.Authenticator(ctx) + bernard, err := a.NewUser("bernard", "letmein", nil) + assert.NoError(t, err) + assert.NoError(t, a.Save(bernard)) + + // Put several documents in channel PBS + response := rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-1", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-2", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-3", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-4", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + + var changes struct { + Results []db.ChangeEntry + Last_Seq interface{} + } + + // Allocate a sequence but do not write a doc for it - will block DCP buffering until sequence is skipped + slowSequence, seqErr := db.AllocateTestSequence(database) + require.NoError(t, seqErr) + log.Printf("Allocated slowSequence: %v", slowSequence) + + // Write a document granting user access to PBS + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/grantDoc", `{"accessUser":"bernard", "accessChannel":"PBS"}`) + rest.RequireStatus(t, response, 201) + + // Issue normal one-shot changes request. Expect no results as granting document hasn't been buffered (blocked by + // slowSequence) + changesResponse := rt.SendUserRequest("GET", "/{{.keyspace}}/_changes", "", "bernard") + rest.RequireStatus(t, changesResponse, 200) + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 0) + + // Release the slow sequence and wait for it to be processed over DCP + releaseErr := db.ReleaseTestSequence(database, slowSequence) + require.NoError(t, releaseErr) + require.NoError(t, rt.WaitForPendingChanges()) + + // Issue normal one-shot changes request. Expect results as granting document buffering is unblocked + changesResponse = rt.SendUserRequest("GET", "/{{.keyspace}}/_changes", "", "bernard") + rest.RequireStatus(t, changesResponse, 200) + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 4) + +} + +// TestOneShotGrantRequestPlus simulates a one-shot changes feed being made before a previously issued grant has been +// buffered over DCP. When requestPlus is set, changes feed should block until grant is processed. +func TestOneShotGrantRequestPlus(t *testing.T) { + + base.SetUpTestLogging(t, base.LevelDebug, base.KeyChanges, base.KeyHTTP) + + defer db.SuspendSequenceBatching()() // Required for slow sequence simulation + + rt := rest.NewRestTester(t, + &rest.RestTesterConfig{ + SyncFn: `function(doc) { + channel(doc.channel); + if (doc.accessUser != "") { + access(doc.accessUser, doc.accessChannel) + } + }`, + }) + defer rt.Close() + + // Create user with access to no channels + ctx := rt.Context() + database := rt.GetDatabase() + a := database.Authenticator(ctx) + bernard, err := a.NewUser("bernard", "letmein", nil) + assert.NoError(t, err) + assert.NoError(t, a.Save(bernard)) + + // Put several documents in channel PBS + response := rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-1", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-2", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-3", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-4", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + + // Allocate a sequence but do not write a doc for it - will block DCP buffering until sequence is skipped + slowSequence, seqErr := db.AllocateTestSequence(database) + require.NoError(t, seqErr) + + // Write a document granting user access to PBS + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/grantDoc", `{"accessUser":"bernard", "accessChannel":"PBS"}`) + rest.RequireStatus(t, response, 201) + + caughtUpStart := database.DbStats.CBLReplicationPull().NumPullReplTotalCaughtUp.Value() + + var oneShotComplete sync.WaitGroup + // Issue a GET requestPlus one-shot changes request in a separate goroutine. + oneShotComplete.Add(1) + go func() { + defer oneShotComplete.Done() + var changes rest.ChangesResults + changesResponse := rt.SendUserRequest("GET", "/{{.keyspace}}/_changes?request_plus=true", "", "bernard") + rest.RequireStatus(t, changesResponse, 200) + err := base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 4) + }() + + // Issue a POST requestPlus one-shot changes request in a separate goroutine. + oneShotComplete.Add(1) + go func() { + defer oneShotComplete.Done() + var changes rest.ChangesResults + changesResponse := rt.SendUserRequest("POST", "/{{.keyspace}}/_changes", `{"request_plus":true}`, "bernard") + rest.RequireStatus(t, changesResponse, 200) + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 4) + }() + + // Wait for the one-shot changes feed to go into wait mode before releasing the slow sequence + require.NoError(t, database.WaitForTotalCaughtUp(caughtUpStart+2)) + + // Release the slow sequence and wait for it to be processed over DCP + releaseErr := db.ReleaseTestSequence(database, slowSequence) + require.NoError(t, releaseErr) + require.NoError(t, rt.WaitForPendingChanges()) + + oneShotComplete.Wait() +} + +// TestOneShotGrantRequestPlusDbConfig simulates a one-shot changes feed being made before a previously issued grant has been +// buffered over DCP. When requestPlus is set via config, changes feed should block until grant is processed. +func TestOneShotGrantRequestPlusDbConfig(t *testing.T) { + + base.SetUpTestLogging(t, base.LevelDebug, base.KeyChanges, base.KeyHTTP) + + defer db.SuspendSequenceBatching()() + + rt := rest.NewRestTester(t, + &rest.RestTesterConfig{ + SyncFn: `function(doc) { + channel(doc.channel); + if (doc.accessUser != "") { + access(doc.accessUser, doc.accessChannel) + } + }`, + DatabaseConfig: &rest.DatabaseConfig{ + DbConfig: rest.DbConfig{ + ChangesRequestPlus: base.BoolPtr(true), + }, + }, + }) + defer rt.Close() + + // Create user with access to no channels + ctx := rt.Context() + database := rt.GetDatabase() + a := database.Authenticator(ctx) + bernard, err := a.NewUser("bernard", "letmein", nil) + assert.NoError(t, err) + assert.NoError(t, a.Save(bernard)) + + // Put several documents in channel PBS + response := rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-1", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-2", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-3", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/pbs-4", `{"channel":["PBS"]}`) + rest.RequireStatus(t, response, 201) + + // Allocate a sequence but do not write a doc for it - will block DCP buffering until sequence is skipped + slowSequence, seqErr := db.AllocateTestSequence(database) + require.NoError(t, seqErr) + log.Printf("Allocated slowSequence: %v", slowSequence) + + // Write a document granting user access to PBS + response = rt.SendAdminRequest("PUT", "/{{.keyspace}}/grantDoc", `{"accessUser":"bernard", "accessChannel":"PBS"}`) + rest.RequireStatus(t, response, 201) + + // Issue one-shot GET changes request explicitly setting request_plus=false (should override config value). + // Expect no results as granting document hasn't been buffered (blocked by slowSequence) + changesResponse := rt.SendUserRequest("GET", "/{{.keyspace}}/_changes?request_plus=false", "", "bernard") + rest.RequireStatus(t, changesResponse, 200) + var changes rest.ChangesResults + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 0) + + // Issue one-shot POST changes request explicitly setting request_plus=false (should override config value). + // Expect no results as granting document hasn't been buffered (blocked by slowSequence) + changesResponse = rt.SendUserRequest("POST", "/{{.keyspace}}/_changes", `{"request_plus":false}`, "bernard") + rest.RequireStatus(t, changesResponse, 200) + err = base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 0) + + caughtUpStart := database.DbStats.CBLReplicationPull().NumPullReplTotalCaughtUp.Value() + + var oneShotComplete sync.WaitGroup + // Issue a GET one-shot changes request in a separate goroutine. Should run as request plus based on config + oneShotComplete.Add(1) + go func() { + defer oneShotComplete.Done() + var changes rest.ChangesResults + changesResponse := rt.SendUserRequest("GET", "/{{.keyspace}}/_changes", "", "bernard") + rest.RequireStatus(t, changesResponse, 200) + err := base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 4) + }() + + // Issue a POST one-shot changes request in a separate goroutine. Should run as request plus based on config + oneShotComplete.Add(1) + go func() { + defer oneShotComplete.Done() + var changes rest.ChangesResults + changesResponse := rt.SendUserRequest("POST", "/{{.keyspace}}/_changes", `{}`, "bernard") + rest.RequireStatus(t, changesResponse, 200) + err := base.JSONUnmarshal(changesResponse.Body.Bytes(), &changes) + assert.NoError(t, err, "Error unmarshalling changes response") + for _, entry := range changes.Results { + log.Printf("Entry:%+v", entry) + } + require.Len(t, changes.Results, 4) + }() + + // Wait for the one-shot changes feed to go into wait mode before releasing the slow sequence + require.NoError(t, database.WaitForTotalCaughtUp(caughtUpStart+2)) + + // Release the slow sequence and wait for it to be processed over DCP + releaseErr := db.ReleaseTestSequence(database, slowSequence) + require.NoError(t, releaseErr) + require.NoError(t, rt.WaitForPendingChanges()) + + oneShotComplete.Wait() +} + func waitForCompactStopped(dbc *db.DatabaseContext) error { for i := 0; i < 100; i++ { compactRunning := dbc.CacheCompactActive() diff --git a/rest/config.go b/rest/config.go index 0d6e17feed..2c06550b5d 100644 --- a/rest/config.go +++ b/rest/config.go @@ -165,6 +165,7 @@ type DbConfig struct { GraphQL *functions.GraphQLConfig `json:"graphql,omitempty"` // GraphQL configuration & resolver fns UserFunctions *functions.FunctionsConfig `json:"functions,omitempty"` // Named JS fns for clients to call Suspendable *bool `json:"suspendable,omitempty"` // Allow the database to be suspended + ChangesRequestPlus *bool `json:"changes_request_plus,omitempty"` // If set, is used as the default value of request_plus for non-continuous replications CORS *auth.CORSConfig `json:"cors,omitempty"` } @@ -1266,7 +1267,8 @@ func SetupServerContext(ctx context.Context, config *StartupConfig, persistentCo sc := NewServerContext(ctx, config, persistentConfig) if !base.ServerIsWalrus(config.Bootstrap.Server) { - if err := sc.initializeCouchbaseServerConnections(ctx); err != nil { + failFast := false + if err := sc.initializeCouchbaseServerConnections(ctx, failFast); err != nil { return nil, err } } @@ -1372,7 +1374,7 @@ func (sc *ServerContext) migrateV30Configs(ctx context.Context) error { if getErr == base.ErrNotFound { continue } else if getErr != nil { - return fmt.Errorf("Error retrieving 3.0 config for bucket: %s, groupID: %s: %w", bucketName, groupID, err) + return fmt.Errorf("Error retrieving 3.0 config for bucket: %s, groupID: %s: %w", bucketName, groupID, getErr) } base.InfofCtx(ctx, base.KeyConfig, "Found legacy persisted config for database %s - migrating to db registry.", base.MD(dbConfig.Name)) @@ -1480,6 +1482,9 @@ func (sc *ServerContext) bucketNameFromDbName(dbName string) (bucketName string, return dbc.Bucket.GetName(), true } + if sc.BootstrapContext.Connection == nil { + return "", false + } // To search for database with the specified name, need to iterate over all buckets: // - look for dbName-scoped config file // - fetch default config file (backward compatibility, check internal DB name) @@ -1610,7 +1615,7 @@ func (sc *ServerContext) FetchConfigs(ctx context.Context, isInitialStartup bool // _applyConfigs takes a map of dbName->DatabaseConfig and loads them into the ServerContext where necessary. func (sc *ServerContext) _applyConfigs(ctx context.Context, dbNameConfigs map[string]DatabaseConfig, isInitialStartup bool) (count int) { for dbName, cnf := range dbNameConfigs { - applied, err := sc._applyConfig(base.NewNonCancelCtx(), cnf, false, isInitialStartup) + applied, err := sc._applyConfig(base.NewNonCancelCtx(), cnf, true, isInitialStartup) if err != nil { base.ErrorfCtx(ctx, "Couldn't apply config for database %q: %v", base.MD(dbName), err) continue diff --git a/rest/config_database.go b/rest/config_database.go index 335192a0e0..8d10f75ac2 100644 --- a/rest/config_database.go +++ b/rest/config_database.go @@ -90,6 +90,10 @@ func MergeDatabaseConfigWithDefaults(sc *StartupConfig, dbConfig *DbConfig) (*Db // to provide defaults to include_runtime config endpoints. // Note that this does not include unsupported options func DefaultDbConfig(sc *StartupConfig) *DbConfig { + var partitions *uint16 + if base.IsEnterpriseEdition() { + partitions = base.Uint16Ptr(base.GetDefaultImportPartitions(sc.IsServerless())) + } dbConfig := DbConfig{ BucketConfig: BucketConfig{}, Name: "", @@ -98,7 +102,7 @@ func DefaultDbConfig(sc *StartupConfig) *DbConfig { Roles: nil, RevsLimit: nil, // Set this below struct AutoImport: base.BoolPtr(base.DefaultAutoImport), - ImportPartitions: base.Uint16Ptr(base.GetDefaultImportPartitions(sc.IsServerless())), + ImportPartitions: partitions, ImportFilter: nil, ImportBackupOldRev: base.BoolPtr(false), EventHandlers: nil, diff --git a/rest/config_flags.go b/rest/config_flags.go index 8c69c71521..c53b32c635 100644 --- a/rest/config_flags.go +++ b/rest/config_flags.go @@ -122,8 +122,9 @@ func registerConfigFlags(config *StartupConfig, fs *flag.FlagSet) map[string]con "auth.bcrypt_cost": {&config.Auth.BcryptCost, fs.Int("auth.bcrypt_cost", 0, "Cost to use for bcrypt password hashes")}, - "replicator.max_heartbeat": {&config.Replicator.MaxHeartbeat, fs.String("replicator.max_heartbeat", "", "Max heartbeat value for _changes request")}, - "replicator.blip_compression": {&config.Replicator.BLIPCompression, fs.Int("replicator.blip_compression", 0, "BLIP data compression level (0-9)")}, + "replicator.max_heartbeat": {&config.Replicator.MaxHeartbeat, fs.String("replicator.max_heartbeat", "", "Max heartbeat value for _changes request")}, + "replicator.blip_compression": {&config.Replicator.BLIPCompression, fs.Int("replicator.blip_compression", 0, "BLIP data compression level (0-9)")}, + "replicator.max_concurrent_replications": {&config.Replicator.MaxConcurrentReplications, fs.Int("replicator.max_concurrent_replications", 0, "Maximum number of replication connections to the node")}, "unsupported.stats_log_frequency": {&config.Unsupported.StatsLogFrequency, fs.String("unsupported.stats_log_frequency", "", "How often should stats be written to stats logs")}, "unsupported.use_stdlib_json": {&config.Unsupported.UseStdlibJSON, fs.Bool("unsupported.use_stdlib_json", false, "Bypass the jsoniter package and use Go's stdlib instead")}, diff --git a/rest/config_startup.go b/rest/config_startup.go index fc32247b89..5da211f21d 100644 --- a/rest/config_startup.go +++ b/rest/config_startup.go @@ -136,8 +136,9 @@ type AuthConfig struct { } type ReplicatorConfig struct { - MaxHeartbeat *base.ConfigDuration `json:"max_heartbeat,omitempty" help:"Max heartbeat value for _changes request"` - BLIPCompression *int `json:"blip_compression,omitempty" help:"BLIP data compression level (0-9)"` + MaxHeartbeat *base.ConfigDuration `json:"max_heartbeat,omitempty" help:"Max heartbeat value for _changes request"` + BLIPCompression *int `json:"blip_compression,omitempty" help:"BLIP data compression level (0-9)"` + MaxConcurrentReplications int `json:"max_concurrent_replications,omitempty" help:"Maximum number of replication connections to the node"` } type UnsupportedConfig struct { diff --git a/rest/cors_test.go b/rest/cors_test.go index 5c46d660eb..506669e9d1 100644 --- a/rest/cors_test.go +++ b/rest/cors_test.go @@ -44,8 +44,12 @@ func TestCORSDynamicSet(t *testing.T) { response := rt.SendRequestWithHeaders(method, "/{{.keyspace}}/", "", reqHeaders) require.Equal(t, "http://example.com", response.Header().Get("Access-Control-Allow-Origin")) if method == http.MethodGet { - RequireStatus(t, response, http.StatusBadRequest) - require.Contains(t, response.Body.String(), invalidDatabaseName) + if base.TestsUseNamedCollections() { + RequireStatus(t, response, http.StatusBadRequest) + require.Contains(t, response.Body.String(), invalidDatabaseName) + } else { // CBG-2978, should not be different from GSI/collections + RequireStatus(t, response, http.StatusUnauthorized) + } } else { RequireStatus(t, response, http.StatusNoContent) } @@ -92,8 +96,12 @@ func TestCORSDynamicSet(t *testing.T) { response := rt.SendRequestWithHeaders(method, "/{{.keyspace}}/", "", reqHeaders) if method == http.MethodGet { require.Equal(t, "http://example.com", response.Header().Get("Access-Control-Allow-Origin")) - RequireStatus(t, response, http.StatusBadRequest) - require.Contains(t, response.Body.String(), invalidDatabaseName) + if base.TestsUseNamedCollections() { + RequireStatus(t, response, http.StatusBadRequest) + require.Contains(t, response.Body.String(), invalidDatabaseName) + } else { // CBG-2978, should not be different from GSI/collections + RequireStatus(t, response, http.StatusUnauthorized) + } } else { // information leak: the options request knows about the database and knows it doesn't match require.Equal(t, "", response.Header().Get("Access-Control-Allow-Origin")) diff --git a/rest/handler.go b/rest/handler.go index 3f30b4bd85..7e8edc0017 100644 --- a/rest/handler.go +++ b/rest/handler.go @@ -1,4 +1,4 @@ -// Copyright 2012-Present Couchbase, Inc. +/// Copyright 2012-Present Couchbase, Inc. // // Use of this software is governed by the Business Source License included // in the file licenses/BSL-Couchbase.txt. As of the Change Date specified @@ -100,7 +100,8 @@ type handler struct { serialNumber uint64 formattedSerialNumber string loggedDuration bool - runOffline bool + runOffline bool // allows running on an offline database + allowNilDBContext bool // allow acceess to a database based only on name, looking up in metadata registry queryValues url.Values // Copy of results of rq.URL.Query() permissionsResults map[string]bool authScopeFunc authScopeFunc @@ -123,8 +124,7 @@ type handlerMethod func(*handler) error // Creates an http.Handler that will run a handler with the given method func makeHandler(server *ServerContext, privs handlerPrivs, accessPermissions []Permission, responsePermissions []Permission, method handlerMethod) http.Handler { return http.HandlerFunc(func(r http.ResponseWriter, rq *http.Request) { - runOffline := false - h := newHandler(server, privs, r, rq, runOffline) + h := newHandler(server, privs, r, rq, handlerOptions{}) err := h.invoke(method, accessPermissions, responsePermissions) h.writeError(err) h.logDuration(true) @@ -134,8 +134,24 @@ func makeHandler(server *ServerContext, privs handlerPrivs, accessPermissions [] // Creates an http.Handler that will run a handler with the given method even if the target DB is offline func makeOfflineHandler(server *ServerContext, privs handlerPrivs, accessPermissions []Permission, responsePermissions []Permission, method handlerMethod) http.Handler { return http.HandlerFunc(func(r http.ResponseWriter, rq *http.Request) { - runOffline := true - h := newHandler(server, privs, r, rq, runOffline) + options := handlerOptions{ + runOffline: true, + } + h := newHandler(server, privs, r, rq, options) + err := h.invoke(method, accessPermissions, responsePermissions) + h.writeError(err) + h.logDuration(true) + }) +} + +// makeMetadataDBOfflineHandler creates an http.Handler that will run a handler with the given method even if the target DB is not able to be instantiated +func makeMetadataDBOfflineHandler(server *ServerContext, privs handlerPrivs, accessPermissions []Permission, responsePermissions []Permission, method handlerMethod) http.Handler { + return http.HandlerFunc(func(r http.ResponseWriter, rq *http.Request) { + options := handlerOptions{ + runOffline: true, + allowNilDBContext: true, + } + h := newHandler(server, privs, r, rq, options) err := h.invoke(method, accessPermissions, responsePermissions) h.writeError(err) h.logDuration(true) @@ -146,8 +162,7 @@ func makeOfflineHandler(server *ServerContext, privs handlerPrivs, accessPermiss // given the endpoint payload returns an auth scope. func makeHandlerSpecificAuthScope(server *ServerContext, privs handlerPrivs, accessPermissions []Permission, responsePermissions []Permission, method handlerMethod, dbAuthStringFunc func([]byte) (string, error)) http.Handler { return http.HandlerFunc(func(r http.ResponseWriter, rq *http.Request) { - runOffline := false - h := newHandler(server, privs, r, rq, runOffline) + h := newHandler(server, privs, r, rq, handlerOptions{}) h.authScopeFunc = dbAuthStringFunc err := h.invoke(method, accessPermissions, responsePermissions) h.writeError(err) @@ -155,16 +170,22 @@ func makeHandlerSpecificAuthScope(server *ServerContext, privs handlerPrivs, acc }) } -func newHandler(server *ServerContext, privs handlerPrivs, r http.ResponseWriter, rq *http.Request, runOffline bool) *handler { +type handlerOptions struct { + runOffline bool // if true, allow handler to run when a database is offline + allowNilDBContext bool // if true, allow a db-scoped handler to be invoked with a nil dbContext in cases where the database config exists but has an error preventing dbContext initialization" +} + +func newHandler(server *ServerContext, privs handlerPrivs, r http.ResponseWriter, rq *http.Request, options handlerOptions) *handler { h := &handler{ - server: server, - privs: privs, - rq: rq, - response: r, - status: http.StatusOK, - serialNumber: atomic.AddUint64(&lastSerialNum, 1), - startTime: time.Now(), - runOffline: runOffline, + server: server, + privs: privs, + rq: rq, + response: r, + status: http.StatusOK, + serialNumber: atomic.AddUint64(&lastSerialNum, 1), + startTime: time.Now(), + runOffline: options.runOffline, + allowNilDBContext: options.allowNilDBContext, } // initialize h.rqCtx @@ -300,13 +321,14 @@ func (h *handler) validateAndWriteHeaders(method handlerMethod, accessPermission var dbContext *db.DatabaseContext + var bucketName string + // look up the database context: if keyspaceDb != "" { h.addDatabaseLogContext(keyspaceDb) var err error if dbContext, err = h.server.GetActiveDatabase(keyspaceDb); err != nil { if err == base.ErrNotFound { - if shouldCheckAdminAuth { // Check if authenticated before attempting to get inactive database authorized, err := h.checkAdminAuthenticationOnly() @@ -317,10 +339,11 @@ func (h *handler) validateAndWriteHeaders(method handlerMethod, accessPermission return ErrInvalidLogin } } - dbContext, err = h.server.GetInactiveDatabase(h.ctx(), keyspaceDb) + var dbConfigFound bool + dbContext, dbConfigFound, err = h.server.GetInactiveDatabase(h.ctx(), keyspaceDb) if err != nil { if httpError, ok := err.(*base.HTTPError); ok && httpError.Status == http.StatusNotFound { - if shouldCheckAdminAuth { + if shouldCheckAdminAuth && (!h.allowNilDBContext || !dbConfigFound) { return base.HTTPErrorf(http.StatusForbidden, "") } else if h.privs == regularPrivs || h.privs == publicPrivs { if !h.providedAuthCredentials() { @@ -330,8 +353,11 @@ func (h *handler) validateAndWriteHeaders(method handlerMethod, accessPermission return ErrInvalidLogin } } - base.InfofCtx(h.ctx(), base.KeyHTTP, "Error trying to get db %s: %v", base.MD(keyspaceDb), err) - return err + if !h.allowNilDBContext || !dbConfigFound { + base.InfofCtx(h.ctx(), base.KeyHTTP, "Error trying to get db %s: %v", base.MD(keyspaceDb), err) + return err + } + bucketName, _ = h.server.bucketNameFromDbName(keyspaceDb) } } else { return err @@ -398,7 +424,6 @@ func (h *handler) validateAndWriteHeaders(method handlerMethod, accessPermission } } } - if shouldCheckAdminAuth { // If server is walrus but auth is enabled we should just kick the user out as invalid as we have nothing to // validate credentials against @@ -425,13 +450,12 @@ func (h *handler) validateAndWriteHeaders(method handlerMethod, accessPermission authScope = dbContext.Bucket.GetName() } else { managementEndpoints, httpClient, err = h.server.ObtainManagementEndpointsAndHTTPClient() - authScope = "" + authScope = bucketName } if err != nil { base.WarnfCtx(h.ctx(), "An error occurred whilst obtaining management endpoints: %v", err) return base.HTTPErrorf(http.StatusInternalServerError, "") } - if h.authScopeFunc != nil { body, err := h.readBody() if err != nil { diff --git a/rest/replicatortest/replicator_test.go b/rest/replicatortest/replicator_test.go index 6b05397154..4b54fe3032 100644 --- a/rest/replicatortest/replicator_test.go +++ b/rest/replicatortest/replicator_test.go @@ -545,6 +545,143 @@ func TestPullReplicationAPI(t *testing.T) { assert.Equal(t, "rt2", doc2Body["source"]) } +func TestStopServerlessConnectionLimitingDuringReplications(t *testing.T) { + base.RequireNumTestBuckets(t, 2) + base.SetUpTestLogging(t, base.LevelInfo, base.KeyReplicate, base.KeyHTTP, base.KeyHTTPResp, base.KeySync, base.KeySyncMsg) + + rt1, rt2, remoteURLString, teardown := rest.SetupSGRPeers(t) + defer teardown() + + resp := rt2.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 2}`) + rest.RequireStatus(t, resp, http.StatusOK) + + for i := 0; i < 10; i++ { + _ = rt2.PutDoc(fmt.Sprint(i), `{"source":"rt2","channels":["alice"]}`) + } + + // create two replications to take us to the limit + replicationID := t.Name() + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + replicationID = t.Name() + "1" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + rt1.WaitForActiveReplicatorInitialization(2) + + // try create a new replication to take it beyond the threshold set by runtime config call + // assert it enter error state + replicationID = t.Name() + "2" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateReconnecting) + + // change limit to 0 (turning limiting off) and assert that the replications currently running continue as normal and reject any new ones being added + resp = rt2.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 0}`) + rest.RequireStatus(t, resp, http.StatusOK) + + // assert the replications aren't killed as result of change in limit + rt2.WaitForActiveReplicatorCount(2) + // assert we still can create a new replication given that originally the limit was 2 replications + replicationID = t.Name() + "3" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + +} + +func TestServerlessConnectionLimitingOneshotFeed(t *testing.T) { + base.RequireNumTestBuckets(t, 2) + base.SetUpTestLogging(t, base.LevelInfo, base.KeyReplicate, base.KeyHTTP, base.KeyHTTPResp, base.KeySync, base.KeySyncMsg) + + rt1, rt2, remoteURLString, teardown := rest.SetupSGRPeers(t) + defer teardown() + + // update runtime config to limit to 2 concurrent replication connections + resp := rt2.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 2}`) + rest.RequireStatus(t, resp, http.StatusOK) + + for i := 0; i < 200; i++ { + _ = rt2.PutDoc(fmt.Sprint(i), `{"source":"rt2","channels":["alice"]}`) + } + + replicationID := t.Name() + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, false, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + replicationID = t.Name() + "1" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, false, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + + rt1.WaitForActiveReplicatorInitialization(2) + // assert the active replicator count has increased by 2 + rt2.WaitForActiveReplicatorCount(2) + replicationID = t.Name() + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateStopped) + replicationID = t.Name() + "1" + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateStopped) + + // assert that the count for active replicators has decreased by 2 as both replications have finished + rt2.WaitForActiveReplicatorCount(0) + + // assert we can create a new replication as count has decreased below threshold + replicationID = t.Name() + "2" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, false, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + +} + +func TestServerlessConnectionLimitingContinuous(t *testing.T) { + base.RequireNumTestBuckets(t, 2) + base.SetUpTestLogging(t, base.LevelInfo, base.KeyReplicate, base.KeyHTTP, base.KeyHTTPResp, base.KeySync, base.KeySyncMsg) + + rt1, rt2, remoteURLString, teardown := rest.SetupSGRPeers(t) + defer teardown() + + // update runtime config to limit to 2 concurrent replication connections + resp := rt2.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 2}`) + rest.RequireStatus(t, resp, http.StatusOK) + + for i := 0; i < 200; i++ { + _ = rt2.PutDoc(fmt.Sprint(i), `{"source":"rt2","channels":["alice"]}`) + } + + // create two replications to take us to the limit + replicationID := t.Name() + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + replicationID = t.Name() + "1" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateRunning) + rt1.WaitForActiveReplicatorInitialization(2) + + // try create a new replication to take it beyond the threshold set by runtime config call + // assert it enter error state + replicationID = t.Name() + "2" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateReconnecting) + + // change limit to 1 and assert that the replications currently running continue as normal and reject any new ones being added + resp = rt2.SendAdminRequest(http.MethodPut, "/_config", `{"max_concurrent_replications" : 1}`) + rest.RequireStatus(t, resp, http.StatusOK) + + // assert the replications aren't killed as result of change in limit + rt2.WaitForActiveReplicatorCount(2) + // assert we still can't create a new replication + replicationID = t.Name() + "3" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateReconnecting) + + // stop one of the replicators currently running + resp = rt1.SendAdminRequest(http.MethodPut, "/{{.db}}/_replicationStatus/"+t.Name()+"1?action=stop", "") + rest.RequireStatus(t, resp, http.StatusOK) + rt1.WaitForReplicationStatus(t.Name()+"1", db.ReplicationStateStopped) + // assert the count has been decremented + rt2.WaitForActiveReplicatorCount(1) + + // assert we still can't create new replication (new limit is 1) + replicationID = t.Name() + "4" + rt1.CreateReplication(replicationID, remoteURLString, db.ActiveReplicatorTypePull, nil, true, db.ConflictResolverDefault) + rt1.WaitForReplicationStatus(replicationID, db.ReplicationStateReconnecting) + +} + // TestPullReplicationAPI // - Starts 2 RestTesters, one active, and one passive. // - Creates a continuous pull replication on rt1 via the REST API @@ -640,7 +777,8 @@ func TestReplicationStatusActions(t *testing.T) { } -func TestStatusAfterReplicationRebalanceFail(t *testing.T) { +// TestReplicationRebalanceToZeroNodes checks that the replication goes into an unassigned state when there are no nodes available to run replications. +func TestReplicationRebalanceToZeroNodes(t *testing.T) { base.SetUpTestLogging(t, base.LevelDebug, base.KeyAll) activeRT, remoteRT, _, teardown := rest.SetupSGRPeers(t) defer teardown() @@ -1759,8 +1897,8 @@ func TestDBReplicationStatsTeardown(t *testing.T) { // If CE, recreate the replication if !base.IsEnterpriseEdition() { - rt.CreateReplication("repl1", db2Url.String(), db.ActiveReplicatorTypePush, nil, true, db.ConflictResolverDefault) - rt.WaitForReplicationStatus("repl1", db.ReplicationStateRunning) + rt.CreateReplicationForDB("{{.db1}}", "repl1", db2Url.String(), db.ActiveReplicatorTypePush, nil, true, db.ConflictResolverDefault) + rt.WaitForReplicationStatusForDB("{{.db1}}", "repl1", db.ReplicationStateRunning) } // Wait for second document to replicate to confirm replication restart @@ -2193,6 +2331,139 @@ func TestActiveReplicatorPullSkippedSequence(t *testing.T) { assert.Equal(t, int64(0), dbstats.ProcessedSequenceLenPostCleanup.Value()) } +// TestReplicatorReconnectBehaviour tests the interactive values that configure replicator reconnection behaviour +func TestReplicatorReconnectBehaviour(t *testing.T) { + base.RequireNumTestBuckets(t, 2) + + testCases := []struct { + name string + maxBackoff int + specified bool + reconnectTimeout time.Duration + maxReconnectInterval time.Duration + }{ + { + name: "maxbackoff 0", + specified: true, + maxBackoff: 0, + reconnectTimeout: 10 * time.Minute, + maxReconnectInterval: 5 * time.Minute, + }, + { + name: "max backoff not specified", + specified: false, + reconnectTimeout: 0 * time.Minute, + maxReconnectInterval: 5 * time.Minute, + }, + { + name: "maxbackoff 1", + specified: true, + maxBackoff: 1, + reconnectTimeout: 0 * time.Minute, + maxReconnectInterval: 1 * time.Minute, + }, + } + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + activeRT, _, remoteURL, teardown := rest.SetupSGRPeers(t) + defer teardown() + var resp *rest.TestResponse + + if test.specified { + resp = activeRT.SendAdminRequest(http.MethodPut, "/{{.db}}/_replication/replication1", fmt.Sprintf(`{ + "replication_id": "replication1", "remote": "%s", "direction": "pull", + "collections_enabled": %t, "continuous": true, "max_backoff_time": %d}`, remoteURL, base.TestsUseNamedCollections(), test.maxBackoff)) + rest.RequireStatus(t, resp, http.StatusCreated) + } else { + resp = activeRT.SendAdminRequest(http.MethodPut, "/{{.db}}/_replication/replication1", fmt.Sprintf(`{ + "replication_id": "replication1", "remote": "%s", "direction": "pull", + "collections_enabled": %t, "continuous": true}`, remoteURL, base.TestsUseNamedCollections())) + rest.RequireStatus(t, resp, http.StatusCreated) + } + activeRT.WaitForReplicationStatus("replication1", db.ReplicationStateRunning) + activeRT.WaitForActiveReplicatorInitialization(1) + + activeReplicator := activeRT.GetDatabase().SGReplicateMgr.GetActiveReplicator("replication1") + config := activeReplicator.GetActiveReplicatorConfig() + + assert.Equal(t, test.reconnectTimeout, config.TotalReconnectTimeout) + assert.Equal(t, test.maxReconnectInterval, config.MaxReconnectInterval) + }) + } + +} + +// TestReconnectReplicator: +// - Starts 2 RestTesters, one active, and one remote. +// - creates a pull replication from remote to active rest tester +// - kills the blip sender to simulate a disconnect that was not initiated by the user +// - asserts the replicator enters a reconnecting state and eventually enters a running state again +// - puts some docs on the remote rest tester and assert the replicator pulls these docs to prove reconnect was successful +func TestReconnectReplicator(t *testing.T) { + base.RequireNumTestBuckets(t, 2) + base.SetUpTestLogging(t, base.LevelInfo, base.KeyAll) + + testCases := []struct { + name string + maxBackoff int + specified bool + }{ + { + name: "maxbackoff 0", + specified: true, + maxBackoff: 0, + }, + { + name: "max backoff not specified", + specified: false, + }, + { + name: "maxbackoff 1", + specified: true, + maxBackoff: 1, + }, + } + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + activeRT, remoteRT, remoteURL, teardown := rest.SetupSGRPeers(t) + defer teardown() + var resp *rest.TestResponse + const replicationName = "replication1" + + if test.specified { + resp = activeRT.SendAdminRequest(http.MethodPut, "/{{.db}}/_replication/replication1", fmt.Sprintf(`{ + "replication_id": "%s", "remote": "%s", "direction": "pull", + "collections_enabled": %t, "continuous": true, "max_backoff_time": %d}`, replicationName, remoteURL, base.TestsUseNamedCollections(), test.maxBackoff)) + rest.RequireStatus(t, resp, http.StatusCreated) + } else { + resp = activeRT.SendAdminRequest(http.MethodPut, "/{{.db}}/_replication/replication1", fmt.Sprintf(`{ + "replication_id": "%s", "remote": "%s", "direction": "pull", + "collections_enabled": %t, "continuous": true}`, replicationName, remoteURL, base.TestsUseNamedCollections())) + rest.RequireStatus(t, resp, http.StatusCreated) + } + activeRT.WaitForReplicationStatus("replication1", db.ReplicationStateRunning) + + activeRT.WaitForActiveReplicatorInitialization(1) + ar := activeRT.GetDatabase().SGReplicateMgr.GetActiveReplicator("replication1") + // race between stopping the blip sender here and the initialization of it on the replicator so need this assertion in here to avoid panic + activeRT.WaitForPullBlipSenderInitialisation(replicationName) + ar.Pull.GetBlipSender().Stop() + + activeRT.WaitForReplicationStatus(replicationName, db.ReplicationStateReconnecting) + + activeRT.WaitForReplicationStatus(replicationName, db.ReplicationStateRunning) + + for i := 0; i < 10; i++ { + response := remoteRT.SendAdminRequest(http.MethodPut, "/{{.keyspace}}/"+fmt.Sprint(i), `{"source": "remote"}`) + rest.RequireStatus(t, response, http.StatusCreated) + } + _, err := activeRT.WaitForChanges(10, "/{{.keyspace}}/_changes", "", true) + require.NoError(t, err) + }) + } + +} + // TestActiveReplicatorPullAttachments: // - Starts 2 RestTesters, one active, and one passive. // - Creates a document with an attachment on rt2 which can be pulled by the replicator running in rt1. diff --git a/rest/rest_tester_cluster_test.go b/rest/rest_tester_cluster_test.go index fa57e00e57..5addaf4991 100644 --- a/rest/rest_tester_cluster_test.go +++ b/rest/rest_tester_cluster_test.go @@ -100,9 +100,9 @@ func NewRestTesterCluster(t *testing.T, config *RestTesterClusterConfig) *RestTe // Set group ID for each RestTester from cluster if config.rtConfig == nil { - config.rtConfig = &RestTesterConfig{groupID: config.groupID} + config.rtConfig = &RestTesterConfig{GroupID: config.groupID} } else { - config.rtConfig.groupID = config.groupID + config.rtConfig.GroupID = config.groupID } // only persistent mode is supported for a RestTesterCluster config.rtConfig.PersistentConfig = true diff --git a/rest/revocation_test.go b/rest/revocation_test.go index bf0e50e2b5..32ce7959b3 100644 --- a/rest/revocation_test.go +++ b/rest/revocation_test.go @@ -159,22 +159,22 @@ func InitScenario(t *testing.T, rtConfig *RestTesterConfig) (ChannelRevocationTe defaultSyncFn := ` function (doc, oldDoc){ - if (doc._id === 'userRoles'){ + if (doc._id === 'userRoles'){ for (var key in doc.roles){ role(key, doc.roles[key]); } } - if (doc._id === 'roleChannels'){ + if (doc._id === 'roleChannels'){ for (var key in doc.channels){ access(key, doc.channels[key]); } } - if (doc._id === 'userChannels'){ + if (doc._id === 'userChannels'){ for (var key in doc.channels){ access(key, doc.channels[key]); } } - if (doc._id.indexOf("doc") >= 0){ + if (doc._id.indexOf("doc") >= 0){ channel(doc.channels); } }` @@ -1442,7 +1442,7 @@ func TestRevocationWithUserXattrs(t *testing.T) { access(key, meta.xattrs.channelInfo.userChannels[key]); } } - if (doc._id.indexOf("doc") >= 0){ + if (doc._id.indexOf("doc") >= 0){ channel(doc.channels); } }`, @@ -2328,7 +2328,7 @@ func TestRevocationNoRev(t *testing.T) { messages := btc.pullReplication.GetMessages() var highestMsgSeq uint32 - var highestSeqMsg blip.Message + var highestSeqMsg *blip.Message // Grab most recent changes message for _, message := range messages { messageBody, err := message.Body() diff --git a/rest/routing.go b/rest/routing.go index 79a511b219..895c534643 100644 --- a/rest/routing.go +++ b/rest/routing.go @@ -327,7 +327,7 @@ func CreateAdminRouter(sc *ServerContext) *mux.Router { r.Handle("/{newdb:"+dbRegex+"}/", makeHandlerSpecificAuthScope(sc, adminPrivs, []Permission{PermCreateDb}, nil, (*handler).handleCreateDB, getAuthScopeHandleCreateDB)).Methods("PUT") r.Handle("/{db:"+dbRegex+"}/", - makeOfflineHandler(sc, adminPrivs, []Permission{PermDeleteDb}, nil, (*handler).handleDeleteDB)).Methods("DELETE") + makeMetadataDBOfflineHandler(sc, adminPrivs, []Permission{PermDeleteDb}, nil, (*handler).handleDeleteDB)).Methods("DELETE") r.Handle("/_all_dbs", makeHandler(sc, adminPrivs, []Permission{PermDevOps}, nil, (*handler).handleAllDbs)).Methods("GET", "HEAD") @@ -364,7 +364,7 @@ func wrapRouter(sc *ServerContext, privs handlerPrivs, router *mux.Router) http. router.ServeHTTP(response, rq) } else { // Log the request - h := newHandler(sc, privs, response, rq, false) + h := newHandler(sc, privs, response, rq, handlerOptions{}) h.logRequestLine() // Inject CORS if enabled and requested and not admin port diff --git a/rest/server_context.go b/rest/server_context.go index 483d7d5953..3a88cc5f47 100644 --- a/rest/server_context.go +++ b/rest/server_context.go @@ -41,6 +41,9 @@ const kStatsReportInterval = time.Hour const kDefaultSlowQueryWarningThreshold = 500 // ms const KDefaultNumShards = 16 +// defaultBlipStatsReportingInterval is the default interval when to report blip stats, at the end of a message handler. +const defaultBlipStatsReportingInterval = 30 * time.Second + var errCollectionsUnsupported = base.HTTPErrorf(http.StatusBadRequest, "Named collections specified in database config, but not supported by connected Couchbase Server.") var ErrSuspendingDisallowed = errors.New("database does not allow suspending") @@ -70,6 +73,13 @@ type ServerContext struct { LogContextID string // ID to differentiate log messages from different server context fetchConfigsLastUpdate time.Time // The last time fetchConfigsWithTTL() updated dbConfigs allowScopesInPersistentConfig bool // Test only backdoor to allow scopes in persistent config, not supported for multiple databases with different collections targeting the same bucket + ActiveReplicationsCounter +} + +type ActiveReplicationsCounter struct { + activeReplicatorCount int // The count of concurrent active replicators + activeReplicatorLimit int // The limit on number of active replicators allowed + lock sync.RWMutex // Lock for managing access to shared memory location } // defaultConfigRetryTimeout is the total retry time when waiting for in-flight config updates. Set as a multiple of kv op timeout, @@ -83,10 +93,19 @@ type bootstrapContext struct { doneChan chan struct{} // doneChan is closed when the bootstrap polling goroutine finishes. } +type getOrAddDatabaseConfigOptions struct { + failFast bool // if set, a failure to connect to a bucket of collection will immediately fail + useExisting bool // if true, return an existing DatabaseContext vs return an error + connectToBucketFn db.OpenBucketFn // supply a custom function for buckets, used for testing only +} + func (sc *ServerContext) CreateLocalDatabase(ctx context.Context, dbs DbConfigMap) error { for _, dbConfig := range dbs { dbc := dbConfig.ToDatabaseConfig() - _, err := sc._getOrAddDatabaseFromConfig(ctx, *dbc, false, db.GetConnectToBucketFn(false)) + _, err := sc._getOrAddDatabaseFromConfig(ctx, *dbc, getOrAddDatabaseConfigOptions{ + useExisting: false, + failFast: false, + }) if err != nil { return err } @@ -133,6 +152,9 @@ func NewServerContext(ctx context.Context, config *StartupConfig, persistentConf sc.Config.API.MetricsInterfaceAuthentication = base.BoolPtr(false) } } + if config.Replicator.MaxConcurrentReplications != 0 { + sc.ActiveReplicationsCounter.activeReplicatorLimit = config.Replicator.MaxConcurrentReplications + } sc.startStatsLogger(ctx) @@ -214,7 +236,8 @@ func (sc *ServerContext) Close(ctx context.Context) { func (sc *ServerContext) GetDatabase(ctx context.Context, name string) (*db.DatabaseContext, error) { dbc, err := sc.GetActiveDatabase(name) if err == base.ErrNotFound { - return sc.GetInactiveDatabase(ctx, name) + dbc, _, err := sc.GetInactiveDatabase(ctx, name) + return dbc, err } return dbc, err } @@ -235,35 +258,35 @@ func (sc *ServerContext) GetActiveDatabase(name string) (*db.DatabaseContext, er // GetInactiveDatabase attempts to load the database and return it's DatabaseContext. It will first attempt to unsuspend the // database, and if that fails, try to load the database from the buckets. -// This should be used if GetActiveDatabase fails. -func (sc *ServerContext) GetInactiveDatabase(ctx context.Context, name string) (*db.DatabaseContext, error) { +// This should be used if GetActiveDatabase fails. Turns the database context, a variable to say if the config exists, and an error. +func (sc *ServerContext) GetInactiveDatabase(ctx context.Context, name string) (*db.DatabaseContext, bool, error) { dbc, err := sc.unsuspendDatabase(ctx, name) if err != nil && err != base.ErrNotFound && err != ErrSuspendingDisallowed { - return nil, err + return nil, false, err } else if err == nil { - return dbc, nil + return dbc, true, nil } + var dbConfigFound bool // database not loaded, fallback to fetching it from cluster if sc.BootstrapContext.Connection != nil { - var found bool if sc.Config.IsServerless() { - found, _ = sc.fetchAndLoadDatabaseSince(ctx, name, sc.Config.Unsupported.Serverless.MinConfigFetchInterval) + dbConfigFound, _ = sc.fetchAndLoadDatabaseSince(ctx, name, sc.Config.Unsupported.Serverless.MinConfigFetchInterval) } else { - found, _ = sc.fetchAndLoadDatabase(base.NewNonCancelCtx(), name) + dbConfigFound, _ = sc.fetchAndLoadDatabase(base.NewNonCancelCtx(), name) } - if found { + if dbConfigFound { sc.lock.RLock() defer sc.lock.RUnlock() dbc := sc.databases_[name] if dbc != nil { - return dbc, nil + return dbc, dbConfigFound, nil } } } - return nil, base.HTTPErrorf(http.StatusNotFound, "no such database %q", name) + return nil, dbConfigFound, base.HTTPErrorf(http.StatusNotFound, "no such database %q", name) } func (sc *ServerContext) GetDbConfig(name string) *DbConfig { @@ -343,7 +366,9 @@ func (sc *ServerContext) PostUpgrade(ctx context.Context, preview bool) (postUpg func (sc *ServerContext) _reloadDatabase(ctx context.Context, reloadDbName string, failFast bool) (*db.DatabaseContext, error) { sc._unloadDatabase(ctx, reloadDbName) config := sc.dbConfigs[reloadDbName] - return sc._getOrAddDatabaseFromConfig(ctx, config.DatabaseConfig, true, db.GetConnectToBucketFn(failFast)) + return sc._getOrAddDatabaseFromConfig(ctx, config.DatabaseConfig, getOrAddDatabaseConfigOptions{ + useExisting: true, + failFast: failFast}) } // Removes and re-adds a database to the ServerContext. @@ -364,18 +389,21 @@ func (sc *ServerContext) ReloadDatabaseWithConfig(nonContextStruct base.NonCance func (sc *ServerContext) _reloadDatabaseWithConfig(ctx context.Context, config DatabaseConfig, failFast bool) error { sc._removeDatabase(ctx, config.Name) - _, err := sc._getOrAddDatabaseFromConfig(ctx, config, false, db.GetConnectToBucketFn(failFast)) + _, err := sc._getOrAddDatabaseFromConfig(ctx, config, getOrAddDatabaseConfigOptions{ + useExisting: false, + failFast: failFast, + }) return err } // Adds a database to the ServerContext. Attempts a read after it gets the write // lock to see if it's already been added by another process. If so, returns either the // existing DatabaseContext or an error based on the useExisting flag. -func (sc *ServerContext) getOrAddDatabaseFromConfig(ctx context.Context, config DatabaseConfig, useExisting bool, openBucketFn db.OpenBucketFn) (*db.DatabaseContext, error) { +func (sc *ServerContext) getOrAddDatabaseFromConfig(ctx context.Context, config DatabaseConfig, options getOrAddDatabaseConfigOptions) (*db.DatabaseContext, error) { // Obtain write lock during add database, to avoid race condition when creating based on ConfigServer sc.lock.Lock() defer sc.lock.Unlock() - return sc._getOrAddDatabaseFromConfig(ctx, config, useExisting, openBucketFn) + return sc._getOrAddDatabaseFromConfig(ctx, config, options) } func GetBucketSpec(ctx context.Context, config *DatabaseConfig, serverConfig *StartupConfig) (spec base.BucketSpec, err error) { @@ -419,8 +447,7 @@ func GetBucketSpec(ctx context.Context, config *DatabaseConfig, serverConfig *St // lock to see if it's already been added by another process. If so, returns either the // existing DatabaseContext or an error based on the useExisting flag. // Pass in a bucketFromBucketSpecFn to replace the default ConnectToBucket function. This will cause the failFast argument to be ignored -func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config DatabaseConfig, useExisting bool, openBucketFn db.OpenBucketFn) (*db.DatabaseContext, error) { - +func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config DatabaseConfig, options getOrAddDatabaseConfigOptions) (*db.DatabaseContext, error) { // Generate bucket spec and validate whether db already exists spec, err := GetBucketSpec(ctx, &config, sc.Config) if err != nil { @@ -454,7 +481,7 @@ func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config } if sc.databases_[dbName] != nil { - if useExisting { + if options.useExisting { return sc.databases_[dbName], nil } else { return nil, base.HTTPErrorf(http.StatusPreconditionFailed, // what CouchDB returns @@ -469,11 +496,18 @@ func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config // Connect to bucket base.InfofCtx(ctx, base.KeyAll, "Opening db /%s as bucket %q, pool %q, server <%s>", base.MD(dbName), base.MD(spec.BucketName), base.SD(base.DefaultPool), base.SD(spec.Server)) - bucket, err := openBucketFn(ctx, spec) + + // the connectToBucketFn is used for testing seam + var bucket base.Bucket + if options.connectToBucketFn != nil { + // the connectToBucketFn is used for testing seam + bucket, err = options.connectToBucketFn(ctx, spec, options.failFast) + } else { + bucket, err = db.ConnectToBucket(ctx, spec, options.failFast) + } if err != nil { return nil, err } - // If using a walrus bucket, force use of views useViews := base.BoolDefault(config.UseViews, false) if !useViews && spec.IsWalrusBucket() { @@ -544,10 +578,21 @@ func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config for scopeName, scopeConfig := range config.Scopes { for collectionName, _ := range scopeConfig.Collections { var dataStore sgbucket.DataStore - err := base.WaitForNoError(func() error { + + var err error + if options.failFast { dataStore, err = bucket.NamedDataStore(base.ScopeAndCollectionName{Scope: scopeName, Collection: collectionName}) - return err - }) + } else { + waitForCollection := func() (bool, error, interface{}) { + dataStore, err = bucket.NamedDataStore(base.ScopeAndCollectionName{Scope: scopeName, Collection: collectionName}) + return err != nil, err, nil + } + + err, _ = base.RetryLoop( + fmt.Sprintf("waiting for %s.%s.%s to exist", base.MD(bucket.GetName()), base.MD(scopeName), base.MD(collectionName)), + waitForCollection, + base.CreateMaxDoublingSleeperFunc(30, 10, 1000)) + } if err != nil { return nil, fmt.Errorf("error attempting to create/update database: %w", err) } @@ -704,6 +749,7 @@ func (sc *ServerContext) _getOrAddDatabaseFromConfig(ctx context.Context, config contextOptions.MetadataID = config.MetadataID } + contextOptions.BlipStatsReportingInterval = defaultBlipStatsReportingInterval.Milliseconds() // Create the DB Context dbcontext, err := db.NewDatabaseContext(ctx, dbName, bucket, autoImport, contextOptions) if err != nil { @@ -1036,6 +1082,7 @@ func dbcOptionsFromConfig(ctx context.Context, sc *ServerContext, config *DbConf GroupID: groupID, JavascriptTimeout: javascriptTimeout, Serverless: sc.Config.IsServerless(), + ChangesRequestPlus: base.BoolDefault(config.ChangesRequestPlus, false), // UserQueries: config.UserQueries, // behind feature flag (see below) // UserFunctions: config.UserFunctions, // behind feature flag (see below) // GraphQL: config.GraphQL, // behind feature flag (see below) @@ -1203,13 +1250,15 @@ func (sc *ServerContext) initEventHandlers(ctx context.Context, dbcontext *db.Da // Adds a database to the ServerContext given its configuration. If an existing config is found // for the name, returns an error. func (sc *ServerContext) AddDatabaseFromConfig(ctx context.Context, config DatabaseConfig) (*db.DatabaseContext, error) { - return sc.getOrAddDatabaseFromConfig(ctx, config, false, db.GetConnectToBucketFn(false)) + failFast := false + return sc.getOrAddDatabaseFromConfig(ctx, config, getOrAddDatabaseConfigOptions{useExisting: false, failFast: failFast}) } // AddDatabaseFromConfigFailFast adds a database to the ServerContext given its configuration and fails fast. // If an existing config is found for the name, returns an error. func (sc *ServerContext) AddDatabaseFromConfigFailFast(nonContextStruct base.NonCancellableContext, config DatabaseConfig) (*db.DatabaseContext, error) { - return sc.getOrAddDatabaseFromConfig(nonContextStruct.Ctx, config, false, db.GetConnectToBucketFn(true)) + failFast := true + return sc.getOrAddDatabaseFromConfig(nonContextStruct.Ctx, config, getOrAddDatabaseConfigOptions{useExisting: false, failFast: failFast}) } func (sc *ServerContext) processEventHandlersForEvent(ctx context.Context, events []*EventConfig, eventType db.EventType, dbcontext *db.DatabaseContext) error { @@ -1335,7 +1384,10 @@ func (sc *ServerContext) _unsuspendDatabase(ctx context.Context, dbName string) return nil, fmt.Errorf("unsuspending db %q failed due to an error while trying to retrieve latest config from bucket %q: %w", base.MD(dbName).Redact(), base.MD(bucket).Redact(), err) } dbConfig.cfgCas = cas - dbCtx, err = sc._getOrAddDatabaseFromConfig(ctx, dbConfig.DatabaseConfig, false, db.GetConnectToBucketFn(false)) + failFast := false + dbCtx, err = sc._getOrAddDatabaseFromConfig(ctx, dbConfig.DatabaseConfig, getOrAddDatabaseConfigOptions{ + useExisting: false, + failFast: failFast}) if err != nil { return nil, err } @@ -1508,10 +1560,17 @@ func initClusterAgent(ctx context.Context, clusterAddress, clusterUser, clusterP }, } + base.DebugfCtx(ctx, base.KeyAll, "Parsing cluster connection string %q", base.UD(clusterAddress)) + beforeFromConnStr := time.Now() err = config.FromConnStr(clusterAddress) if err != nil { return nil, err } + if d := time.Since(beforeFromConnStr); d > base.FromConnStrWarningThreshold { + base.WarnfCtx(ctx, "Parsed cluster connection string %q in: %v", base.UD(clusterAddress), d) + } else { + base.DebugfCtx(ctx, base.KeyAll, "Parsed cluster connection string %q in: %v", base.UD(clusterAddress), d) + } agent, err := gocbcore.CreateAgent(&config) if err != nil { @@ -1822,10 +1881,10 @@ func (sc *ServerContext) Database(ctx context.Context, name string) *db.Database return db } -func (sc *ServerContext) initializeCouchbaseServerConnections(ctx context.Context) error { - base.InfofCtx(ctx, base.KeyAll, "initializing server connections") +func (sc *ServerContext) initializeCouchbaseServerConnections(ctx context.Context, failFast bool) error { + base.InfofCtx(ctx, base.KeyAll, "Initializing server connections") defer func() { - base.InfofCtx(ctx, base.KeyAll, "finished initializing server connections") + base.InfofCtx(ctx, base.KeyAll, "Finished initializing server connections") }() goCBAgent, err := sc.initializeGoCBAgent(ctx) if err != nil { diff --git a/rest/server_context_test.go b/rest/server_context_test.go index 03a1224b6e..210c4e9129 100644 --- a/rest/server_context_test.go +++ b/rest/server_context_test.go @@ -22,7 +22,6 @@ import ( "time" "github.com/couchbase/sync_gateway/auth" - "github.com/couchbase/sync_gateway/db" "github.com/couchbase/gocbcore/v10/connstr" sgbucket "github.com/couchbase/sg-bucket" @@ -168,7 +167,7 @@ func TestGetOrAddDatabaseFromConfig(t *testing.T) { // Get or add database name from config without valid database name; throws 400 Illegal database name error dbConfig := DbConfig{OldRevExpirySeconds: &oldRevExpirySeconds, LocalDocExpirySecs: &localDocExpirySecs} - dbContext, err := serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, false, db.GetConnectToBucketFn(false)) + dbContext, err := serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, getOrAddDatabaseConfigOptions{useExisting: false, failFast: false}) assert.Nil(t, dbContext, "Can't create database context without a valid database name") assert.Error(t, err, "It should throw 400 Illegal database name") assert.Contains(t, err.Error(), strconv.Itoa(http.StatusBadRequest)) @@ -187,7 +186,10 @@ func TestGetOrAddDatabaseFromConfig(t *testing.T) { BucketConfig: BucketConfig{Server: &server, Bucket: &bucketName}, } - dbContext, err = serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, false, db.GetConnectToBucketFn(false)) + dbContext, err = serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, getOrAddDatabaseConfigOptions{ + failFast: false, + useExisting: false, + }) assert.Nil(t, dbContext, "Can't create database context from config with unrecognized value for import_docs") assert.Error(t, err, "It should throw Unrecognized value for import_docs") @@ -214,14 +216,22 @@ func TestGetOrAddDatabaseFromConfig(t *testing.T) { AutoImport: false, } - dbContext, err = serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, false, db.GetConnectToBucketFn(false)) + dbContext, err = serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, getOrAddDatabaseConfigOptions{ + failFast: false, + useExisting: false, + }) assert.Nil(t, dbContext, "Can't create database context with duplicate database name") assert.Error(t, err, "It should throw 412 Duplicate database names") assert.Contains(t, err.Error(), strconv.Itoa(http.StatusPreconditionFailed)) // Get or add database from config with duplicate database name and useExisting as true // Existing database context should be returned - dbContext, err = serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, true, db.GetConnectToBucketFn(false)) + dbContext, err = serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, + getOrAddDatabaseConfigOptions{ + failFast: false, + useExisting: true, + }) + assert.NoError(t, err, "No error while trying to get the existing database name") assert.Equal(t, server, dbContext.BucketSpec.Server) assert.Equal(t, bucketName, dbContext.BucketSpec.BucketName) @@ -615,7 +625,12 @@ func TestServerContextSetupCollectionsSupport(t *testing.T) { }, }, } - _, err := serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, false, db.GetConnectToBucketFn(true)) + _, err := serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, + getOrAddDatabaseConfigOptions{ + failFast: false, + useExisting: false, + }) + require.ErrorIs(t, err, errCollectionsUnsupported) } @@ -790,7 +805,11 @@ func TestDisableScopesInLegacyConfig(t *testing.T) { } dbConfig.Scopes = GetCollectionsConfigWithSyncFn(t, bucket, nil, 1) } - dbContext, err := serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, false, db.GetConnectToBucketFn(false)) + dbContext, err := serverContext._getOrAddDatabaseFromConfig(ctx, DatabaseConfig{DbConfig: dbConfig}, + getOrAddDatabaseConfigOptions{ + failFast: false, + useExisting: false, + }) if persistentConfig || scopes == false { require.NoError(t, err) require.NotNil(t, dbContext) diff --git a/rest/serverless_test.go b/rest/serverless_test.go index cfd0c49376..23ab48c109 100644 --- a/rest/serverless_test.go +++ b/rest/serverless_test.go @@ -51,7 +51,7 @@ func TestServerlessPollBuckets(t *testing.T) { assert.Empty(t, configs) // Create a database - rt2 := NewRestTester(t, &RestTesterConfig{CustomTestBucket: tb1.NoCloseClone(), PersistentConfig: true, groupID: &sc.Config.Bootstrap.ConfigGroupID}) + rt2 := NewRestTester(t, &RestTesterConfig{CustomTestBucket: tb1.NoCloseClone(), PersistentConfig: true, GroupID: &sc.Config.Bootstrap.ConfigGroupID}) defer rt2.Close() // Create a new db on the RT to confirm fetch won't retrieve it (due to bucket not being in BucketCredentials) resp := rt2.SendAdminRequest(http.MethodPut, "/db/", fmt.Sprintf(`{ diff --git a/rest/upgradetest/remove_collection_test.go b/rest/upgradetest/remove_collection_test.go new file mode 100644 index 0000000000..ca090396ae --- /dev/null +++ b/rest/upgradetest/remove_collection_test.go @@ -0,0 +1,112 @@ +// Copyright 2023-Present Couchbase, Inc. +// +// Use of this software is governed by the Business Source License included +// in the file licenses/BSL-Couchbase.txt. As of the Change Date specified +// in that file, in accordance with the Business Source License, use of this +// software will be governed by the Apache License, Version 2.0, included in +// the file licenses/APL2.txt. + +package upgradetest + +import ( + "fmt" + "net/http" + "testing" + + "github.com/couchbase/sync_gateway/base" + "github.com/couchbase/sync_gateway/rest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestRemoveCollection tests when a collection has been removed from CBS, and the server is restarted. We should be able to modify or delete the database. +func TestRemoveCollection(t *testing.T) { + if base.UnitTestUrlIsWalrus() { + t.Skip("test relies on bootstrap connection and needs CBS") + } + base.TestRequiresCollections(t) + base.RequireNumTestBuckets(t, 2) + numCollections := 2 + bucket := base.GetPersistentTestBucket(t) + defer bucket.Close() + base.RequireNumTestDataStores(t, numCollections) + rtConfig := &rest.RestTesterConfig{ + CustomTestBucket: bucket.NoCloseClone(), + PersistentConfig: true, + GroupID: base.StringPtr(t.Name()), + AdminInterfaceAuthentication: true, + } + rt := rest.NewRestTesterMultipleCollections(t, rtConfig, 2) + + dbConfig := rt.NewDbConfig() + dbConfig.Scopes = rest.GetCollectionsConfig(t, rt.TestBucket, numCollections) + + dbName := "removecollectiondb" + + dbcJSON, err := base.JSONMarshal(dbConfig) + require.NoError(t, err) + resp := rt.SendAdminRequestWithAuth(http.MethodPut, "/"+dbName+"/", string(dbcJSON), base.TestClusterUsername(), base.TestClusterPassword()) + rest.RequireStatus(t, resp, http.StatusCreated) + + dataStores := rt.TestBucket.GetNonDefaultDatastoreNames() + deletedDataStore := dataStores[1] + + defer func() { + assert.NoError(t, bucket.CreateDataStore(deletedDataStore)) + + }() + // drop a data store + require.NoError(t, rt.TestBucket.DropDataStore(deletedDataStore)) + require.Len(t, rt.TestBucket.GetNonDefaultDatastoreNames(), len(dataStores)-1) + + rt.Close() + rtConfig = &rest.RestTesterConfig{ + CustomTestBucket: bucket.NoCloseClone(), + PersistentConfig: true, + GroupID: base.StringPtr(t.Name()), + AdminInterfaceAuthentication: true, + } + + rt = rest.NewRestTesterMultipleCollections(t, rtConfig, 2) + defer rt.Close() + + bucket2Role := rest.RouteRole{ + RoleName: rest.MobileSyncGatewayRole.RoleName, + DatabaseScoped: true, + } + if base.TestsUseServerCE() { + bucket2Role = rest.RouteRole{ + RoleName: rest.BucketFullAccessRole.RoleName, + DatabaseScoped: true, + } + } + + eps, httpClient, err := rt.ServerContext().ObtainManagementEndpointsAndHTTPClient() + require.NoError(t, err) + + altBucket := base.GetTestBucket(t) + defer altBucket.Close() + const password = "password2" + rest.MakeUser(t, httpClient, eps[0], bucket2Role.RoleName, password, []string{fmt.Sprintf("%s[%s]", bucket2Role.RoleName, altBucket.GetName())}) + defer rest.DeleteUser(t, httpClient, eps[0], bucket2Role.RoleName) + + delete(dbConfig.Scopes[deletedDataStore.ScopeName()].Collections, deletedDataStore.CollectionName()) + + dbcJSON, err = base.JSONMarshal(dbConfig) + require.NoError(t, err) + + resp = rt.SendAdminRequestWithAuth(http.MethodPost, "/"+dbName+"/", string(dbcJSON), base.TestClusterUsername(), base.TestClusterPassword()) + rest.RequireStatus(t, resp, http.StatusForbidden) + + // wrong RBAC user + resp = rt.SendAdminRequestWithAuth(http.MethodDelete, "/"+dbName+"/", "", bucket2Role.RoleName, password) + rest.RequireStatus(t, resp, http.StatusForbidden) + + // bad credentials + resp = rt.SendAdminRequestWithAuth(http.MethodDelete, "/"+dbName+"/", "", "baduser", "badpassword") + rest.RequireStatus(t, resp, http.StatusUnauthorized) + + resp = rt.SendAdminRequestWithAuth(http.MethodDelete, "/"+dbName+"/", "", base.TestClusterUsername(), base.TestClusterPassword()) + rest.RequireStatus(t, resp, http.StatusOK) + +} diff --git a/rest/utilities_testing.go b/rest/utilities_testing.go index 1960d983e1..2ebba55f87 100644 --- a/rest/utilities_testing.go +++ b/rest/utilities_testing.go @@ -66,7 +66,7 @@ type RestTesterConfig struct { enableAdminAuthPermissionsCheck bool useTLSServer bool // If true, TLS will be required for communications with CBS. Default: false PersistentConfig bool - groupID *string + GroupID *string serverless bool // Runs SG in serverless mode. Must be used in conjunction with persistent config collectionConfig collectionConfiguration numCollections int @@ -227,8 +227,8 @@ func (rt *RestTester) Bucket() base.Bucket { } } - if rt.RestTesterConfig.groupID != nil { - sc.Bootstrap.ConfigGroupID = *rt.RestTesterConfig.groupID + if rt.RestTesterConfig.GroupID != nil { + sc.Bootstrap.ConfigGroupID = *rt.RestTesterConfig.GroupID } else if rt.RestTesterConfig.PersistentConfig { // If running in persistent config mode, the database has to be manually created. If the db name is the same as a // past tests db name, a db already exists error could happen if the past tests bucket is still flushing. Prevent this @@ -270,7 +270,7 @@ func (rt *RestTester) Bucket() base.Bucket { rt.TestBucket.BucketSpec.TLSSkipVerify = base.TestTLSSkipVerify() - if err := rt.RestTesterServerContext.initializeCouchbaseServerConnections(ctx); err != nil { + if err := rt.RestTesterServerContext.initializeCouchbaseServerConnections(ctx, true); err != nil { panic("Couldn't initialize Couchbase Server connection: " + err.Error()) } } @@ -1181,9 +1181,14 @@ func (s *SlowResponseRecorder) Write(buf []byte) (int, error) { // AddDatabaseFromConfigWithBucket adds a database to the ServerContext and sets a specific bucket on the database context. // If an existing config is found for the name, returns an error. func (sc *ServerContext) AddDatabaseFromConfigWithBucket(ctx context.Context, tb testing.TB, config DatabaseConfig, bucket base.Bucket) (*db.DatabaseContext, error) { - return sc.getOrAddDatabaseFromConfig(ctx, config, false, func(ctx context.Context, spec base.BucketSpec) (base.Bucket, error) { - return bucket, nil - }) + options := getOrAddDatabaseConfigOptions{ + useExisting: false, + failFast: false, + connectToBucketFn: func(_ context.Context, spec base.BucketSpec, _ bool) (base.Bucket, error) { + return bucket, nil + }, + } + return sc.getOrAddDatabaseFromConfig(ctx, config, options) } // The parameters used to create a BlipTester @@ -1243,6 +1248,8 @@ type BlipTester struct { // with this websocket connection blipContext *blip.Context + dispatcher blip.ByProfileDispatcher + // The blip sender that can be used for sending messages over the websocket connection sender *blip.Sender @@ -1252,7 +1259,7 @@ type BlipTester struct { } // Close the bliptester -func (bt BlipTester) Close() { +func (bt *BlipTester) Close() { bt.sender.Close() if !bt.avoidRestTesterClose { bt.restTester.Close() @@ -1260,7 +1267,7 @@ func (bt BlipTester) Close() { } // Returns database context for blipTester (assumes underlying rest tester is based on a single db - returns first it finds) -func (bt BlipTester) DatabaseContext() *db.DatabaseContext { +func (bt *BlipTester) DatabaseContext() *db.DatabaseContext { dbs := bt.restTester.ServerContext().AllDatabases() for _, database := range dbs { return database @@ -1391,6 +1398,7 @@ func createBlipTesterWithSpec(tb testing.TB, spec BlipTesterSpec, rt *RestTester } // Ensure that errors get correctly surfaced in tests + bt.blipContext.RequestHandler = bt.dispatcher.Dispatch bt.blipContext.FatalErrorHandler = func(err error) { tb.Fatalf("BLIP fatal error: %v", err) } @@ -1558,6 +1566,17 @@ func (bt *BlipTester) SendRev(docId, docRev string, body []byte, properties blip } +// Registers a function as a handler for "changes" messages. Ensures that the messages are +// handled one at a time to avoid race conditions: some of the code assumes that each call sees +// sequences higher than the last, and some of them need the "null" caught-up to come last. +func (bt *BlipTester) RegisterChangesHandler(changesHandler blip.SynchronousHandler) { + throttle := blip.ThrottlingDispatcher{ + MaxConcurrency: 1, + Handler: blip.AsAsyncHandler(changesHandler), + } + bt.dispatcher.SetHandler("changes", throttle.Dispatch) +} + // GetUserPayload will take username, password, email, channels and roles you want to assign a user and create the appropriate payload for the _user endpoint func GetUserPayload(t testing.TB, username, password, email string, collection *db.DatabaseCollection, chans, roles []string) string { config := auth.PrincipalConfig{} @@ -1674,16 +1693,16 @@ func (bt *BlipTester) GetDocAtRev(requestedDocID, requestedDocRev string) (resul defer func() { // Clean up all profile handlers that are registered as part of this test - delete(bt.blipContext.HandlerForProfile, "changes") - delete(bt.blipContext.HandlerForProfile, "rev") + bt.dispatcher.SetHandler("changes", nil) + bt.dispatcher.SetHandler("rev", nil) }() // -------- Changes handler callback -------- - bt.blipContext.HandlerForProfile["changes"] = getChangesHandler(&changesFinishedWg, &revsFinishedWg) + bt.RegisterChangesHandler(getChangesHandler(&changesFinishedWg, &revsFinishedWg)) // -------- Rev handler callback -------- - bt.blipContext.HandlerForProfile["rev"] = func(request *blip.Message) { - + bt.dispatcher.SetHandler("rev", func(request *blip.Message, onComplete func()) { + defer onComplete() defer revsFinishedWg.Done() body, err := request.Body() if err != nil { @@ -1703,8 +1722,7 @@ func (bt *BlipTester) GetDocAtRev(requestedDocID, requestedDocRev string) (resul if docId == requestedDocID && docRev == requestedDocRev { resultDoc = doc } - - } + }) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back changesFinishedWg.Add(1) @@ -1741,7 +1759,7 @@ func (bt *BlipTester) SendRevWithAttachment(input SendRevWithAttachmentInput) (s defer func() { // Clean up all profile handlers that are registered as part of this test - delete(bt.blipContext.HandlerForProfile, "getAttachment") + bt.dispatcher.SetHandler("getAttachment", nil) }() // Create a doc with an attachment @@ -1772,14 +1790,14 @@ func (bt *BlipTester) SendRevWithAttachment(input SendRevWithAttachmentInput) (s getAttachmentWg := sync.WaitGroup{} - bt.blipContext.HandlerForProfile["getAttachment"] = func(request *blip.Message) { + bt.dispatcher.SetHandler("getAttachment", blip.AsAsyncHandler(func(request *blip.Message) { defer getAttachmentWg.Done() if request.Properties["digest"] != myAttachment.Digest { panic(fmt.Sprintf("Unexpected digest. Got: %v, expected: %v", request.Properties["digest"], myAttachment.Digest)) } response := request.Response() response.SetBody([]byte(input.attachmentBody)) - } + })) // Push a rev with an attachment. getAttachmentWg.Add(1) @@ -1827,7 +1845,7 @@ func (bt *BlipTester) GetChanges() (changes [][]interface{}) { defer func() { // Clean up all profile handlers that are registered as part of this test - delete(bt.blipContext.HandlerForProfile, "changes") // a handler for this profile is registered in SubscribeToChanges + bt.dispatcher.SetHandler("changes", nil) // a handler for this profile is registered in SubscribeToChanges }() collectedChanges := [][]interface{}{} @@ -1897,7 +1915,6 @@ func (bt *BlipTester) WaitForNumDocsViaChanges(numDocsExpected int) (docs map[st // It is basically a pull replication without the checkpointing // Warning: this can only be called from a single goroutine, given the fact it registers profile handlers. func (bt *BlipTester) PullDocs() (docs map[string]RestDocument) { - docs = map[string]RestDocument{} // Mutex to avoid write contention on docs while PullDocs is running (as rev messages may be processed concurrently) @@ -1907,17 +1924,16 @@ func (bt *BlipTester) PullDocs() (docs map[string]RestDocument) { defer func() { // Clean up all profile handlers that are registered as part of this test - delete(bt.blipContext.HandlerForProfile, "changes") - delete(bt.blipContext.HandlerForProfile, "rev") + bt.dispatcher.SetHandler("changes", nil) + bt.dispatcher.SetHandler("rev", nil) }() // -------- Changes handler callback -------- // When this test sends subChanges, Sync Gateway will send a changes request that must be handled - bt.blipContext.HandlerForProfile["changes"] = getChangesHandler(&changesFinishedWg, &revsFinishedWg) + bt.RegisterChangesHandler(getChangesHandler(&changesFinishedWg, &revsFinishedWg)) // -------- Rev handler callback -------- - bt.blipContext.HandlerForProfile["rev"] = func(request *blip.Message) { - + bt.dispatcher.SetHandler("rev", blip.AsAsyncHandler(func(request *blip.Message) { defer revsFinishedWg.Done() body, err := request.Body() if err != nil { @@ -1971,15 +1987,15 @@ func (bt *BlipTester) PullDocs() (docs map[string]RestDocument) { response.SetBody([]byte{}) // Empty response to indicate success } - } + })) // -------- Norev handler callback -------- - bt.blipContext.HandlerForProfile["norev"] = func(request *blip.Message) { + bt.dispatcher.SetHandler("norev", blip.AsAsyncHandler(func(request *blip.Message) { // If a norev is received, then don't bother waiting for one of the expected revisions, since it will never come. // The norev could be added to the returned docs map, but so far there is no need for that. The ability // to assert on the number of actually received revisions (which norevs won't affect) meets current test requirements. defer revsFinishedWg.Done() - } + })) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back changesFinishedWg.Add(1) @@ -2004,7 +2020,7 @@ func (bt *BlipTester) PullDocs() (docs map[string]RestDocument) { func (bt *BlipTester) SubscribeToChanges(continuous bool, changes chan<- *blip.Message) { // When this test sends subChanges, Sync Gateway will send a changes request that must be handled - bt.blipContext.HandlerForProfile["changes"] = func(request *blip.Message) { + bt.RegisterChangesHandler(func(request *blip.Message) { changes <- request @@ -2019,7 +2035,7 @@ func (bt *BlipTester) SubscribeToChanges(continuous bool, changes chan<- *blip.M response.SetBody(emptyResponseValBytes) } - } + }) // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back subChangesRequest := blip.NewRequest() @@ -2457,6 +2473,7 @@ func (rt *RestTester) GetChangesOneShot(t testing.TB, keyspace string, since int } func (rt *RestTester) NewDbConfig() DbConfig { + // make sure bucket has been initialized config := DbConfig{ BucketConfig: BucketConfig{ Bucket: base.StringPtr(rt.Bucket().GetName()), diff --git a/rest/utilities_testing_resttester.go b/rest/utilities_testing_resttester.go index b5790205a5..08b52c4fc3 100644 --- a/rest/utilities_testing_resttester.go +++ b/rest/utilities_testing_resttester.go @@ -134,6 +134,14 @@ func (rt *RestTester) WaitForActiveReplicatorInitialization(count int) { require.NoError(rt.TB, rt.WaitForCondition(successFunc), "mismatch on number of active replicators") } +func (rt *RestTester) WaitForPullBlipSenderInitialisation(name string) { + successFunc := func() bool { + bs := rt.GetDatabase().SGReplicateMgr.GetActiveReplicator(name).Pull.GetBlipSender() + return bs != nil + } + require.NoError(rt.TB, rt.WaitForCondition(successFunc), "blip sender on active replicator not initialized") +} + // createReplication creates a replication via the REST API with the specified ID, remoteURL, direction and channel filter func (rt *RestTester) CreateReplication(replicationID string, remoteURLString string, direction db.ActiveReplicatorDirection, channels []string, continuous bool, conflictResolver db.ConflictResolverType) { rt.CreateReplicationForDB("{{.db}}", replicationID, remoteURLString, direction, channels, continuous, conflictResolver) @@ -167,6 +175,21 @@ func (rt *RestTester) WaitForAssignedReplications(count int) { require.NoError(rt.TB, rt.WaitForCondition(successFunc)) } +func (rt *RestTester) GetActiveReplicatorCount() int { + rt.ServerContext().ActiveReplicationsCounter.lock.Lock() + defer rt.ServerContext().ActiveReplicationsCounter.lock.Unlock() + return rt.ServerContext().ActiveReplicationsCounter.activeReplicatorCount +} + +func (rt *RestTester) WaitForActiveReplicatorCount(expCount int) { + var count int + successFunc := func() bool { + count = rt.GetActiveReplicatorCount() + return count == expCount + } + require.NoError(rt.TB, rt.WaitForCondition(successFunc), "Mismatch in active replicator count, expected count %d actual %d", expCount, count) +} + func (rt *RestTester) WaitForReplicationStatusForDB(dbName string, replicationID string, targetStatus string) { var status db.ReplicationStatus successFunc := func() bool {