From a2a389ab8da0e6726692b5cc22c8e2b151cc49b5 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Fri, 16 Aug 2024 15:10:55 -0400 Subject: [PATCH 01/14] RSDK-7403: Improve remote camera clients. --- components/camera/client.go | 510 +++++++++++++-------- components/camera/client_test.go | 509 ++++++++++++++++++++ components/camera/fake/camera.go | 2 + components/camera/rtppassthrough/buffer.go | 2 + go.mod | 2 +- go.sum | 4 +- gostream/stream.go | 8 + gostream/webrtc_track.go | 91 +++- grpc/conn.go | 42 ++ grpc/shared_conn.go | 48 +- grpc/tracker.go | 9 + grpc/tracker_test.go | 20 + robot/client/client.go | 24 +- robot/impl/resource_manager.go | 48 +- robot/web/stream/camera/camera.go | 24 + robot/web/stream/server.go | 258 ++++++++--- robot/web/stream/state/state.go | 477 ++++++++----------- robot/web/stream/state/state_test.go | 486 ++++++++++---------- robot/web/stream/stream.go | 66 ++- robot/web/web_c.go | 6 +- testutils/robottestutils/robot_utils.go | 2 +- 21 files changed, 1755 insertions(+), 883 deletions(-) create mode 100644 grpc/tracker.go create mode 100644 grpc/tracker_test.go create mode 100644 robot/web/stream/camera/camera.go diff --git a/components/camera/client.go b/components/camera/client.go index 69294a5b2a6..cb7fdc89222 100644 --- a/components/camera/client.go +++ b/components/camera/client.go @@ -3,14 +3,17 @@ package camera import ( "bytes" "context" + "errors" "fmt" "image" "io" - "slices" + "os" + "runtime/debug" "sync" + "sync/atomic" + "time" "github.com/pion/rtp" - "github.com/pkg/errors" "github.com/viamrobotics/webrtc/v3" "go.opencensus.io/trace" pb "go.viam.com/api/component/camera/v1" @@ -18,13 +21,14 @@ import ( goutils "go.viam.com/utils" goprotoutils "go.viam.com/utils/protoutils" "go.viam.com/utils/rpc" + "golang.org/x/exp/slices" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/structpb" "go.viam.com/rdk/components/camera/rtppassthrough" "go.viam.com/rdk/data" "go.viam.com/rdk/gostream" - rdkgrpc "go.viam.com/rdk/grpc" + "go.viam.com/rdk/grpc" "go.viam.com/rdk/logging" "go.viam.com/rdk/pointcloud" "go.viam.com/rdk/protoutils" @@ -34,6 +38,16 @@ import ( "go.viam.com/rdk/utils" ) +var ( + red = "\033[31m" + reset = "\033[0m" +) + +// this helps make the test case much easier to read. +func (c *client) redLog(msg string) { + c.logger.Warn(red + msg + reset) +} + var ( // ErrNoPeerConnection indicates there was no peer connection. ErrNoPeerConnection = errors.New("No PeerConnection") @@ -41,23 +55,19 @@ var ( ErrNoSharedPeerConnection = errors.New("No Shared PeerConnection") // ErrUnknownSubscriptionID indicates that a SubscriptionID is unknown. ErrUnknownSubscriptionID = errors.New("SubscriptionID Unknown") + readRTPTimeout = time.Millisecond * 200 ) -type ( - singlePacketCallback func(*rtp.Packet) - bufAndCB struct { - cb singlePacketCallback - buf *rtppassthrough.Buffer - } - bufAndCBByID map[rtppassthrough.SubscriptionID]bufAndCB -) +type bufAndCB struct { + cb func(*rtp.Packet) + buf *rtppassthrough.Buffer +} // client implements CameraServiceClient. type client struct { resource.Named resource.TriviallyReconfigurable - ctx context.Context - cancelFn context.CancelFunc + remoteName string name string conn rpc.ClientConn client pb.CameraServiceClient @@ -65,12 +75,14 @@ type client struct { logger logging.Logger activeBackgroundWorkers sync.WaitGroup - mu sync.Mutex - healthyClientCh chan struct{} - bufAndCBByID bufAndCBByID - currentSubParentID rtppassthrough.SubscriptionID - subParentToChildren map[rtppassthrough.SubscriptionID][]rtppassthrough.SubscriptionID - trackClosed <-chan struct{} + healthyClientChMu sync.Mutex + healthyClientCh chan struct{} + + rtpPassthroughMu sync.Mutex + runningStreams map[rtppassthrough.SubscriptionID]bufAndCB + subGenerationId int + associatedSubs map[int][]rtppassthrough.SubscriptionID + trackClosed <-chan struct{} } // NewClientFromConn constructs a new Client from connection passed in. @@ -85,19 +97,18 @@ func NewClientFromConn( streamClient := streampb.NewStreamServiceClient(conn) trackClosed := make(chan struct{}) close(trackClosed) - closeCtx, cancelFn := context.WithCancel(context.Background()) + logger.SetLevel(logging.DEBUG) return &client{ - ctx: closeCtx, - cancelFn: cancelFn, - Named: name.PrependRemote(remoteName).AsNamed(), - name: name.ShortName(), - conn: conn, - streamClient: streamClient, - client: c, - bufAndCBByID: map[rtppassthrough.SubscriptionID]bufAndCB{}, - trackClosed: trackClosed, - subParentToChildren: map[rtppassthrough.SubscriptionID][]rtppassthrough.SubscriptionID{}, - logger: logger, + remoteName: remoteName, + Named: name.PrependRemote(remoteName).AsNamed(), + name: name.ShortName(), + conn: conn, + streamClient: streamClient, + client: c, + runningStreams: map[rtppassthrough.SubscriptionID]bufAndCB{}, + trackClosed: trackClosed, + associatedSubs: map[int][]rtppassthrough.SubscriptionID{}, + logger: logger, }, nil } @@ -181,12 +192,7 @@ func (c *client) Stream( // New streams concurrent with closing cannot start until this drain completes. There // will never be stream goroutines from the old "generation" running concurrently // with those from the new "generation". - c.mu.Lock() - if c.healthyClientCh == nil { - c.healthyClientCh = make(chan struct{}) - } - healthyClientCh := c.healthyClientCh - c.mu.Unlock() + healthyClientCh := c.maybeResetHealthyClientCh() ctxWithMIME := gostream.WithMIMETypeHint(context.Background(), gostream.MIMETypeHint(ctx, "")) streamCtx, stream, frameCh := gostream.NewMediaStreamForChannel[image.Image](ctxWithMIME) @@ -240,7 +246,7 @@ func (c *client) Images(ctx context.Context) ([]NamedImage, resource.ResponseMet Name: c.name, }) if err != nil { - return nil, resource.ResponseMetadata{}, errors.Wrap(err, "camera client: could not gets images from the camera") + return nil, resource.ResponseMetadata{}, fmt.Errorf("camera client: could not gets images from the camera %w", err) } images := make([]NamedImage, 0, len(resp.Images)) @@ -364,35 +370,44 @@ func (c *client) Close(ctx context.Context) error { _, span := trace.StartSpan(ctx, "camera::client::Close") defer span.End() - c.cancelFn() - c.mu.Lock() + c.logger.Warn("Close START") + defer c.logger.Warn("Close END") + + debug.PrintStack() + c.healthyClientChMu.Lock() if c.healthyClientCh != nil { close(c.healthyClientCh) } c.healthyClientCh = nil + c.healthyClientChMu.Unlock() + // unsubscribe from all video streams that have been established with modular cameras + c.unsubscribeAll() // NOTE: (Nick S) we are intentionally releasing the lock before we wait for // background goroutines to terminate as some of them need to be able // to take the lock to terminate - c.mu.Unlock() c.activeBackgroundWorkers.Wait() return nil } -// SubscribeRTP begins a subscription to receive RTP packets. -// When the Subscription terminates the context in the returned Subscription -// is cancelled. -// It maintains the invariant that there is at most a single track between -// the client and WebRTC peer. -// It is strongly recommended to set a timeout on ctx as the underlying -// WebRTC peer connection's Track can be removed before sending an RTP packet which -// results in SubscribeRTP blocking until the provided context is cancelled or the client -// is closed. -// This rare condition may happen in cases like reconfiguring concurrently with -// a SubscribeRTP call. +func (c *client) maybeResetHealthyClientCh() chan struct{} { + c.healthyClientChMu.Lock() + defer c.healthyClientChMu.Unlock() + if c.healthyClientCh == nil { + c.healthyClientCh = make(chan struct{}) + } + return c.healthyClientCh +} + +// SubscribeRTP begins a subscription to receive RTP packets. SubscribeRTP waits for the VideoTrack +// to be available before returning. The subscription/video is valid until the +// `Subscription.Terminated` context is `Done`. +// +// SubscribeRTP maintains the invariant that there is at most a single track between the client and +// WebRTC peer. Concurrent callers will block and wait for the "winner" to complete. func (c *client) SubscribeRTP( ctx context.Context, bufferSize int, @@ -400,67 +415,120 @@ func (c *client) SubscribeRTP( ) (rtppassthrough.Subscription, error) { ctx, span := trace.StartSpan(ctx, "camera::client::SubscribeRTP") defer span.End() - c.mu.Lock() - defer c.mu.Unlock() - sub, buf, err := rtppassthrough.NewSubscription(bufferSize) + c.redLog(fmt.Sprintf("SubscribeRTP START %s client: %p, pc: %p", c.Name().String(), c, c.conn.PeerConn())) + defer c.redLog(fmt.Sprintf("SubscribeRTP END %s client: %p, pc: %p", c.Name().String(), c, c.conn.PeerConn())) + // RSDK-6340: The resource manager closes remote resources when the underlying + // connection goes bad. However, when the connection is re-established, the client + // objects these resources represent are not re-initialized/marked "healthy". + // `healthyClientCh` helps track these transitions between healthy and unhealthy + // states. + // + // When `client.SubscribeRTP()` is called we will either use the existing + // `healthyClientCh` or create a new one. + // + // The goroutine a `Tracker.AddOnTrackSub()` method spins off will listen to its version of the + // `healthyClientCh` to be notified when the connection has died so it can gracefully + // terminate. + // + // When a connection becomes unhealthy, the resource manager will call `Close` on the + // camera client object. Closing the client will: + // 1. close its `client.healthyClientCh` channel + // 2. wait for existing "SubscribeRTP" goroutines to drain + // 3. nil out the `client.healthyClientCh` member variable + // + // New streams concurrent with closing cannot start until this drain completes. There + // will never be stream goroutines from the old "generation" running concurrently + // with those from the new "generation". + healthyClientCh := c.maybeResetHealthyClientCh() + + c.rtpPassthroughMu.Lock() + defer c.rtpPassthroughMu.Unlock() + + // Create a Subscription object and allocate a ring buffer of RTP packets. + sub, rtpPacketBuffer, err := rtppassthrough.NewSubscription(bufferSize) if err != nil { return sub, err } g := utils.NewGuard(func() { - buf.Close() + c.logger.Info("Error subscribing to RTP. Closing passthrough buffer.") + rtpPacketBuffer.Close() }) defer g.OnFail() + // RTP Passthrough only works over PeerConnection video tracks. if c.conn.PeerConn() == nil { return rtppassthrough.NilSubscription, ErrNoPeerConnection } - // check if we have established a connection that can be shared by multiple clients asking for cameras streams from viam server. - sc, ok := c.conn.(*rdkgrpc.SharedConn) + // Calling `AddStream(streamId)` on the remote viam-server/module will eventually result in + // invoking a local `PeerConnection.OnTrack(streamId, rtpReceiver)` callback. In this callback + // we set up the goroutine to read video data from the remote side and copy those packets + // upstream. + // + // webrtc.PeerConnection objects can only install a single `OnTrack` callback. Because there + // could be multiple video stream sources on the other side of a PeerConnection, we must play a + // game where we only create one `OnTrack` callback, but that callback object does a map lookup + // for which video stream. The `grpc.Tracker` API is for manipulating that map. + tracker, ok := c.conn.(grpc.Tracker) if !ok { + c.logger.Errorw("Client conn is not a `Tracker`", "connType", fmt.Sprintf("%T", c.conn)) return rtppassthrough.NilSubscription, ErrNoSharedPeerConnection } - c.logger.CDebugw(ctx, "SubscribeRTP", "subID", sub.ID.String(), "name", c.Name(), "bufAndCBByID", c.bufAndCBByID.String()) + c.logger.CDebugw(ctx, "SubscribeRTP", "subID", sub.ID.String(), "name", c.Name(), "bufAndCBByID", c.bufAndCBToString()) defer func() { c.logger.CDebugw(ctx, "SubscribeRTP after", "subID", sub.ID.String(), - "name", c.Name(), "bufAndCBByID", c.bufAndCBByID.String()) + "name", c.Name(), "bufAndCBByID", c.bufAndCBToString()) }() - // B/c there is only ever either 0 or 1 peer connections between a module & a viam-server - // once AddStream is called on the module for a given camera model instance & succeeds, we shouldn't - // call it again until the previous track is terminated (by calling RemoveStream) for a few reasons: - // 1. doing so would result in 2 webrtc tracks for the same camera sending the exact same RTP packets which would - // needlessly waste resources - // 2. b/c the signature of RemoveStream just takes the camera name, if there are 2 streams for the same camera - // & the module receives a call to RemoveStream, there is no way for the module to know which camera stream - // should be removed - if len(c.bufAndCBByID) == 0 { - // Wait for previous track to terminate or for the client to close + + // If we do not currently have a video stream open for this camera, we create one. Otherwise + // we'll piggy back on the existing stream. We piggy-back for two reasons: + // + // 1. Not doing so would result in two webrtc tracks for the same camera sending the exact same RTP + // packets. This would needlessly waste resources. + // + // 2. The signature of `RemoveStream` just takes the camera name. If we had two streams open for + // the same camera when the remote receives `RemoveStream`, it wouldn't know which to stop + // sending data for. + if len(c.runningStreams) == 0 { + // A previous subscriber/track may have exited, but its resources have not necessarily been + // cleaned up. We must wait for that to complete. We additionally select on other error + // conditions. select { - case <-ctx.Done(): - err := errors.Wrap(ctx.Err(), "Track not closed within SubscribeRTP provided context") - c.logger.Error(err) - return rtppassthrough.NilSubscription, err - case <-c.ctx.Done(): - err := errors.Wrap(c.ctx.Err(), "Track not closed before client closed") - c.logger.Error(err) - return rtppassthrough.NilSubscription, err case <-c.trackClosed: + case <-ctx.Done(): + return rtppassthrough.NilSubscription, fmt.Errorf("Track not closed within SubscribeRTP provided context %w", ctx.Err()) + case <-healthyClientCh: + return rtppassthrough.NilSubscription, errors.New("camera client is closed") } + + // Create channels for two important events: when a video track is established an when a + // video track exits. I.e: When the `OnTrack` callback is invoked and when the `RTPReceiver` + // is no longer being read from. trackReceived, trackClosed := make(chan struct{}), make(chan struct{}) - // add the camera model's addOnTrackSubFunc to the shared peer connection's - // slice of OnTrack callbacks. This is what allows - // all the bufAndCBByID's callback functions to be called with the - // RTP packets from the module's peer connection's track - sc.AddOnTrackSub(c.Name(), c.addOnTrackSubFunc(trackReceived, trackClosed, sub.ID)) - // remove the OnTrackSub once we either fail or succeed - defer sc.RemoveOnTrackSub(c.Name()) - if _, err := c.streamClient.AddStream(ctx, &streampb.AddStreamRequest{Name: c.Name().String()}); err != nil { - c.logger.CDebugw(ctx, "SubscribeRTP AddStream hit error", "subID", sub.ID.String(), "name", c.Name(), "err", err) + + // We're creating a new video track. Bump the generation ID and associate all new + // subscriptions to this generation. + c.subGenerationId += 1 + c.associatedSubs[c.subGenerationId] = []rtppassthrough.SubscriptionID{} + + // Add the camera's addOnTrackSubFunc to the SharedConn's map of OnTrack callbacks. + tracker.AddOnTrackSub(c.trackName(), c.addOnTrackFunc(healthyClientCh, trackReceived, trackClosed, c.subGenerationId)) + // Remove the OnTrackSub once we either fail or succeed. + defer tracker.RemoveOnTrackSub(c.trackName()) + + c.redLog(fmt.Sprintf("SubscribeRTP AddStream CALL %s client: %p, pc: %p", c.trackName(), c, c.conn.PeerConn())) + // Call `AddStream` on the remote. In the successful case, this will result in a + // PeerConnection renegotiation to add this camera's video track and have the `OnTrack` + // callback invoked. + if _, err := c.streamClient.AddStream(ctx, &streampb.AddStreamRequest{Name: c.trackName()}); err != nil { + c.logger.CDebugw(ctx, "SubscribeRTP AddStream hit error", "subID", sub.ID.String(), "trackName", c.trackName(), "err", err) return rtppassthrough.NilSubscription, err } - // NOTE: (Nick S) This is a workaround to a Pion bug / missing feature. + c.redLog(fmt.Sprintf("SubscribeRTP AddStream RETURN %s client: %p, pc: %p", c.trackName(), c, c.conn.PeerConn())) + // TODO: Test this by adding a sleep before packets are sent. Does anything call `RemoveStream`? + // // If the WebRTC peer on the other side of the PeerConnection calls pc.AddTrack followd by pc.RemoveTrack // before the module writes RTP packets // to the track, the client's PeerConnection.OnTrack callback is never called. @@ -469,202 +537,246 @@ func (c *client) SubscribeRTP( // To prevent that failure mode, we exit with an error if a track is not received within // the SubscribeRTP context. + c.redLog(fmt.Sprintf("SubscribeRTP waiting for track %s client: %p, pc: %p", c.trackName(), c, c.conn.PeerConn())) select { case <-ctx.Done(): - err := errors.Wrap(ctx.Err(), "Track not received within SubscribeRTP provided context") - c.logger.Error(err.Error()) - return rtppassthrough.NilSubscription, err - case <-c.ctx.Done(): - err := errors.Wrap(c.ctx.Err(), "Track not received before client closed") - c.logger.Error(err) - return rtppassthrough.NilSubscription, err + return rtppassthrough.NilSubscription, fmt.Errorf("Track not received within SubscribeRTP provided context %w", ctx.Err()) + case <-healthyClientCh: + return rtppassthrough.NilSubscription, errors.New("Track not received before client closed") case <-trackReceived: - c.logger.Debug("received track") + c.redLog(fmt.Sprintf("received track %s client: %p, pc: %p", c.trackName(), c, c.conn.PeerConn())) } - // set up channel so we can detect when the track has closed (in response to an event / error internal to the - // peer or due to calling RemoveStream) + + // Set up channel to detect when the track has closed. This can happen in response to an + // event / error internal to the peer or due to calling `RemoveStream`. c.trackClosed = trackClosed - // the sub is the new parent of all subsequent subs until the number if subsriptions falls back to 0 - c.currentSubParentID = sub.ID - c.subParentToChildren[c.currentSubParentID] = []rtppassthrough.SubscriptionID{} - c.logger.CDebugw(ctx, "SubscribeRTP called AddStream and succeeded", "subID", sub.ID.String(), - "name", c.Name()) - } - c.subParentToChildren[c.currentSubParentID] = append(c.subParentToChildren[c.currentSubParentID], sub.ID) - // add the subscription to bufAndCBByID so the goroutine spawned by - // addOnTrackSubFunc can forward the packets it receives from the modular camera - // over WebRTC to the SubscribeRTP caller via the packetsCB callback - c.bufAndCBByID[sub.ID] = bufAndCB{ + c.logger.CDebugw(ctx, "SubscribeRTP called AddStream and succeeded", "subID", sub.ID.String()) + } + + // Associate this subscription with the current generation. + c.associatedSubs[c.subGenerationId] = append(c.associatedSubs[c.subGenerationId], sub.ID) + + // Add the subscription to runningStreams. The goroutine spawned by `addOnTrackFunc` will use + // this to know where to forward incoming RTP packets. + c.runningStreams[sub.ID] = bufAndCB{ cb: func(p *rtp.Packet) { packetsCB([]*rtp.Packet{p}) }, - buf: buf, + buf: rtpPacketBuffer, } - buf.Start() + + // Start the goroutine that copies RTP packets + rtpPacketBuffer.Start() g.Success() c.logger.CDebugw(ctx, "SubscribeRTP succeeded", "subID", sub.ID.String(), - "name", c.Name(), "bufAndCBByID", c.bufAndCBByID.String()) + "name", c.Name(), "bufAndCBByID", c.bufAndCBToString()) return sub, nil } -func (c *client) addOnTrackSubFunc( - trackReceived, trackClosed chan struct{}, - parentID rtppassthrough.SubscriptionID, -) rdkgrpc.OnTrackCB { +func (c *client) addOnTrackFunc( + healthyClientCh, trackReceived, trackClosed chan struct{}, + generationId int, +) grpc.OnTrackCB { + + // This is invoked when `PeerConnection.OnTrack` is called for this camera's stream id. return func(tr *webrtc.TrackRemote, r *webrtc.RTPReceiver) { + // Our `OnTrack` was called. Inform `SubscribeRTP` that getting video data was successful. close(trackReceived) c.activeBackgroundWorkers.Add(1) goutils.ManagedGo(func() { + var count atomic.Uint64 + defer close(trackClosed) for { - if c.ctx.Err() != nil { - c.logger.Debugw("SubscribeRTP: camera client", "name ", c.Name(), "parentID", parentID.String(), - "OnTrack callback terminating as the client is closing") - close(trackClosed) + select { + case <-healthyClientCh: + c.logger.Debugw("OnTrack callback terminating as the client is closing", "parentID", generationId) + return + default: + } + + // We set a read deadline such that we don't block in I/O. This allows us to respond + // to events such as shutdown. Normally this would be done with a context on the + // `ReadRTP` method. + deadline := time.Now().Add(readRTPTimeout) + if err := tr.SetReadDeadline(deadline); err != nil { + c.logger.Errorw("SetReadDeadline error", "generationId", generationId, "err", err) return } pkt, _, err := tr.ReadRTP() + if os.IsTimeout(err) { + c.logger.Debugw("ReadRTP read timeout", "generationId", generationId, + "err", err, "timeout", readRTPTimeout.String()) + continue + } + if err != nil { - close(trackClosed) + if errors.Is(err, io.EOF) { + c.logger.Infow("ReadRTP received EOF", "generationId", generationId, "err", err) + } else { + c.logger.Warnw("ReadRTP error", "generationId", generationId, "err", err) + } // NOTE: (Nick S) We need to remember which subscriptions are consuming packets // from to which tr *webrtc.TrackRemote so that we can terminate the child subscriptions // when their track terminate. - c.unsubscribeChildrenSubs(parentID) - if errors.Is(err, io.EOF) { - c.logger.Debugw("SubscribeRTP: camera client", "name ", c.Name(), "parentID", parentID.String(), - "OnTrack callback terminating ReadRTP loop due to ", err.Error()) - return - } - c.logger.Errorw("SubscribeRTP: camera client", "name ", c.Name(), "parentID", parentID.String(), - "OnTrack callback hit unexpected error from ReadRTP err:", err.Error()) + c.unsubscribeChildrenSubs(generationId) return } - c.mu.Lock() - for _, tmp := range c.bufAndCBByID { + c.rtpPassthroughMu.Lock() + for _, tmp := range c.runningStreams { + if count.Add(1)%10000 == 0 { + c.logger.Infow("ReadRTP called. Sampling 1/10000", "count", count.Load(), "packetTs", pkt.Header.Timestamp) + } + // This is needed to prevent the problem described here: // https://go.dev/blog/loopvar-preview bufAndCB := tmp err := bufAndCB.buf.Publish(func() { bufAndCB.cb(pkt) }) if err != nil { - c.logger.Debugw("SubscribeRTP: camera client", - "name", c.Name(), - "parentID", parentID.String(), - "dropped an RTP packet dropped due to", - "err", err.Error()) + c.logger.Debugw("rtp passthrough packet dropped", + "generationId", generationId, "err", err) } } - c.mu.Unlock() + c.rtpPassthroughMu.Unlock() } }, c.activeBackgroundWorkers.Done) } } // Unsubscribe terminates a subscription to receive RTP packets. -// It is strongly recommended to set a timeout on ctx as the underlying -// WebRTC peer connection's Track can be removed before sending an RTP packet which -// results in Unsubscribe blocking until the provided -// context is cancelled or the client is closed. -// This rare condition may happen in cases like reconfiguring concurrently with -// an Unsubscribe call. +// +// It is strongly recommended to set a timeout on ctx as the underlying WebRTC peer connection's +// Track can be removed before sending an RTP packet which results in Unsubscribe blocking until the +// provided context is cancelled or the client is closed. This rare condition may happen in cases +// like reconfiguring concurrently with an Unsubscribe call. func (c *client) Unsubscribe(ctx context.Context, id rtppassthrough.SubscriptionID) error { ctx, span := trace.StartSpan(ctx, "camera::client::Unsubscribe") defer span.End() - c.mu.Lock() if c.conn.PeerConn() == nil { - c.mu.Unlock() return ErrNoPeerConnection } - _, ok := c.conn.(*rdkgrpc.SharedConn) - if !ok { - c.mu.Unlock() - return ErrNoSharedPeerConnection - } - c.logger.CDebugw(ctx, "Unsubscribe called with", "name", c.Name(), "subID", id.String()) + c.healthyClientChMu.Lock() + healthyClientCh := c.healthyClientCh + c.healthyClientChMu.Unlock() - bufAndCB, ok := c.bufAndCBByID[id] + c.logger.CInfow(ctx, "Unsubscribe called", "subID", id.String()) + c.rtpPassthroughMu.Lock() + bufAndCB, ok := c.runningStreams[id] if !ok { - c.logger.CWarnw(ctx, "Unsubscribe called with unknown subID ", "name", c.Name(), "subID", id.String()) - c.mu.Unlock() + c.logger.CWarnw(ctx, "Unsubscribe called with unknown subID", "subID", id.String()) + c.rtpPassthroughMu.Unlock() return ErrUnknownSubscriptionID } - if len(c.bufAndCBByID) == 1 { - c.logger.CDebugw(ctx, "Unsubscribe calling RemoveStream", "name", c.Name(), "subID", id.String()) - if _, err := c.streamClient.RemoveStream(ctx, &streampb.RemoveStreamRequest{Name: c.Name().String()}); err != nil { - c.logger.CWarnw(ctx, "Unsubscribe RemoveStream returned err", "name", c.Name(), "subID", id.String(), "err", err) - c.mu.Unlock() - return err - } - - delete(c.bufAndCBByID, id) + if len(c.runningStreams) > 1 { + // There are still existing output streams for this camera. We close the resources + // associated with the input `SubscriptionID` and return. + delete(c.runningStreams, id) + c.rtpPassthroughMu.Unlock() bufAndCB.buf.Close() + return nil + } - // unlock so that the OnTrack callback can get the lock if it needs to before the ReadRTP method returns an error - // which will close `c.trackClosed`. - c.mu.Unlock() + // There is only one stream left. In addition to closing the input subscription's resources, we + // also close the stream from the underlying remote. + // + // Calling `RemoveStream` releases resources on the remote. We promise to keep retrying until + // we're successful. + c.logger.CInfow(ctx, "Last subscriber. calling RemoveStream", + "trackName", c.trackName(), "subID", id.String()) + request := &streampb.RemoveStreamRequest{Name: c.trackName()} + // We assume the server responds with a success if the requested `Name` is unknown/already + // removed. + if _, err := c.streamClient.RemoveStream(ctx, request); err != nil { + c.logger.CWarnw(ctx, "Unsubscribe RemoveStream returned err", "trackName", + c.trackName(), "subID", id.String(), "err", err) + c.rtpPassthroughMu.Unlock() + return err + } + + // We can only remove `runningStreams` when `RemoveStream` succeeds. If there was an error, the + // upper layers are responsible for retrying `Unsubscribe`. At this point we will not return an + // error to clearly inform the caller that `Unsubscribe` does not need to be retried. + delete(c.runningStreams, id) + bufAndCB.buf.Close() - select { - case <-ctx.Done(): - err := errors.Wrap(ctx.Err(), "track not closed within Unsubscribe provided context. Subscription may be left in broken state") - c.logger.Error(err) - return err - case <-c.ctx.Done(): - err := errors.Wrap(c.ctx.Err(), "track not closed before client closed") - c.logger.Error(err) - return err - case <-c.trackClosed: - } + // The goroutine we're stopping uses the `rtpPassthroughMu` to access the map of + // `runningStreams`. We must unlock while we wait for it to exit. + c.rtpPassthroughMu.Unlock() + + select { + case <-c.trackClosed: + return nil + case <-ctx.Done(): + c.logger.CWarnw(ctx, "RemoveStream successfully called, but errored waiting for stream to send an EOF", "subID", id.String(), "err", ctx.Err()) + return nil + case <-healthyClientCh: + c.logger.CWarnw(ctx, "RemoveStream successfully called, but camera closed waiting for stream to send an EOF", "subID", id.String()) return nil } - c.mu.Unlock() +} - delete(c.bufAndCBByID, id) - bufAndCB.buf.Close() +func (c *client) trackName() string { + // if c.conn is a *grpc.SharedConn then the client + // is talking to a module and we need to send the fully qualified name + if _, ok := c.conn.(*grpc.SharedConn); ok { + return c.Name().String() + } - return nil + // otherwise we know we are talking to either a main or remote robot part + if c.remoteName != "" { + // if c.remoteName != "" it indicates that we are talking to a remote part & we need to pop the remote name + // as the remote doesn't know it's own name from the perspective of the main part + return c.Name().PopRemote().SDPTrackName() + } + // in this case we are talking to a main part & the remote name (if it exists) needs to be preserved + return c.Name().SDPTrackName() } func (c *client) unsubscribeAll() { - if len(c.bufAndCBByID) > 0 { - for id, bufAndCB := range c.bufAndCBByID { - c.logger.Debugw("unsubscribeAll terminating sub", "name", c.Name(), "subID", id.String()) - delete(c.bufAndCBByID, id) + c.rtpPassthroughMu.Lock() + defer c.rtpPassthroughMu.Unlock() + if len(c.runningStreams) > 0 { + // shutdown & delete all *rtppassthrough.Buffer and callbacks + for subId, bufAndCB := range c.runningStreams { + c.logger.Debugw("unsubscribeAll terminating sub", "subID", subId.String()) + delete(c.runningStreams, subId) bufAndCB.buf.Close() } } } -func (c *client) unsubscribeChildrenSubs(parentID rtppassthrough.SubscriptionID) { - c.mu.Lock() - defer c.mu.Unlock() - c.logger.Debugw("client unsubscribeChildrenSubs called", "name", c.Name(), "parentID", parentID.String()) - defer c.logger.Debugw("client unsubscribeChildrenSubs done", "name", c.Name(), "parentID", parentID.String()) - for _, childID := range c.subParentToChildren[parentID] { - bufAndCB, ok := c.bufAndCBByID[childID] +func (c *client) unsubscribeChildrenSubs(generationId int) { + c.rtpPassthroughMu.Lock() + defer c.rtpPassthroughMu.Unlock() + c.logger.Debugw("client unsubscribeChildrenSubs called", "name", c.Name(), "generationId", generationId, "numSubs", len(c.associatedSubs)) + defer c.logger.Debugw("client unsubscribeChildrenSubs done", "name", c.Name(), "generationId", generationId) + for _, subId := range c.associatedSubs[generationId] { + bufAndCB, ok := c.runningStreams[subId] if !ok { continue } - c.logger.Debugw("stopping child subscription", "name", c.Name(), "parentID", parentID.String(), "childID", childID.String()) - delete(c.bufAndCBByID, childID) + c.logger.Debugw("stopping subscription", "name", c.Name(), "generationId", generationId, "subId", subId.String()) + delete(c.runningStreams, subId) bufAndCB.buf.Close() } - delete(c.subParentToChildren, parentID) + delete(c.associatedSubs, generationId) } -func (s bufAndCBByID) String() string { - if len(s) == 0 { +func (c *client) bufAndCBToString() string { + if len(c.runningStreams) == 0 { return "len: 0" } strIds := []string{} strIdsToCB := map[string]bufAndCB{} - for id, cb := range s { + for id, cb := range c.runningStreams { strID := id.String() strIds = append(strIds, strID) strIdsToCB[strID] = cb } slices.Sort(strIds) - ret := fmt.Sprintf("len: %d, ", len(s)) + ret := fmt.Sprintf("len: %d, ", len(c.runningStreams)) for _, strID := range strIds { ret += fmt.Sprintf("%s: %v, ", strID, strIdsToCB[strID]) } diff --git a/components/camera/client_test.go b/components/camera/client_test.go index e11aea814f7..18756e5785a 100644 --- a/components/camera/client_test.go +++ b/components/camera/client_test.go @@ -3,6 +3,7 @@ package camera_test import ( "bytes" "context" + "fmt" "image" "image/color" "image/png" @@ -18,17 +19,26 @@ import ( "google.golang.org/grpc/metadata" "go.viam.com/rdk/components/camera" + "go.viam.com/rdk/components/camera/fake" "go.viam.com/rdk/components/camera/rtppassthrough" + "go.viam.com/rdk/config" "go.viam.com/rdk/data" "go.viam.com/rdk/gostream" + "go.viam.com/rdk/gostream/codec/opus" + "go.viam.com/rdk/gostream/codec/x264" viamgrpc "go.viam.com/rdk/grpc" "go.viam.com/rdk/logging" "go.viam.com/rdk/pointcloud" "go.viam.com/rdk/resource" "go.viam.com/rdk/rimage" "go.viam.com/rdk/rimage/transform" + "go.viam.com/rdk/robot" + robotimpl "go.viam.com/rdk/robot/impl" + "go.viam.com/rdk/robot/web" + weboptions "go.viam.com/rdk/robot/web/options" "go.viam.com/rdk/testutils" "go.viam.com/rdk/testutils/inject" + "go.viam.com/rdk/testutils/robottestutils" rutils "go.viam.com/rdk/utils" "go.viam.com/rdk/utils/contextutils" ) @@ -677,3 +687,502 @@ func TestRTPPassthroughWithoutWebRTC(t *testing.T) { test.That(t, conn.Close(), test.ShouldBeNil) }) } + +func setupRealRobot( + t *testing.T, + robotConfig *config.Config, + logger logging.Logger, +) (context.Context, robot.LocalRobot, string, web.Service) { + t.Helper() + + ctx := context.Background() + robot, err := robotimpl.RobotFromConfig(ctx, robotConfig, logger) + test.That(t, err, test.ShouldBeNil) + + // We initialize with a stream config such that the stream server is capable of creating video stream and + // audio stream data. + webSvc := web.New(robot, logger, web.WithStreamConfig(gostream.StreamConfig{ + AudioEncoderFactory: opus.NewEncoderFactory(), + VideoEncoderFactory: x264.NewEncoderFactory(), + })) + options, _, addr := robottestutils.CreateBaseOptionsAndListener(t) + err = webSvc.Start(ctx, options) + test.That(t, err, test.ShouldBeNil) + + return ctx, robot, addr, webSvc +} + +func setupRealRobotWithOptions( + t *testing.T, + robotConfig *config.Config, + logger logging.Logger, + options weboptions.Options, +) (context.Context, robot.LocalRobot, web.Service) { + t.Helper() + + ctx := context.Background() + robot, err := robotimpl.RobotFromConfig(ctx, robotConfig, logger) + test.That(t, err, test.ShouldBeNil) + + // We initialize with a stream config such that the stream server is capable of creating video stream and + // audio stream data. + webSvc := web.New(robot, logger, web.WithStreamConfig(gostream.StreamConfig{ + AudioEncoderFactory: opus.NewEncoderFactory(), + VideoEncoderFactory: x264.NewEncoderFactory(), + })) + err = webSvc.Start(ctx, options) + test.That(t, err, test.ShouldBeNil) + + return ctx, robot, webSvc +} + +var ( + Green = "\033[32m" + Red = "\033[31m" + Magenta = "\033[35m" + Cyan = "\033[36m" + Yellow = "\033[33m" + Reset = "\033[0m" +) + +// this helps make the test case much easier to read. +func greenLog(t *testing.T, msg string) { + t.Log(Green + msg + Reset) +} + +// this helps make the test case much easier to read. +func redLog(t *testing.T, msg string) { + t.Log(Red + msg + Reset) +} + +// Skipped due to +// https://viam.atlassian.net/browse/RSDK-7637 +func TestMultiplexOverRemoteConnection(t *testing.T) { + t.Skip() + logger := logging.NewTestLogger(t).Sublogger(t.Name()) + + remoteCfg := &config.Config{Components: []resource.Config{ + { + Name: "rtpPassthroughCamera", + API: resource.NewAPI("rdk", "component", "camera"), + Model: resource.DefaultModelFamily.WithModel("fake"), + ConvertedAttributes: &fake.Config{ + RTPPassthrough: true, + }, + }, + }} + + // Create a robot with a single fake camera. + remoteCtx, remoteRobot, addr, remoteWebSvc := setupRealRobot(t, remoteCfg, logger.Sublogger("remote")) + defer remoteRobot.Close(remoteCtx) + defer remoteWebSvc.Close(remoteCtx) + + mainCfg := &config.Config{Remotes: []config.Remote{ + { + Name: "remote", + Address: addr, + Insecure: true, + }, + }} + mainCtx, mainRobot, _, mainWebSvc := setupRealRobot(t, mainCfg, logger.Sublogger("main")) + greenLog(t, "robot setup") + defer mainRobot.Close(mainCtx) + defer mainWebSvc.Close(mainCtx) + + cameraClient, err := camera.FromRobot(mainRobot, "remote:rtpPassthroughCamera") + test.That(t, err, test.ShouldBeNil) + + image, _, err := cameraClient.Images(mainCtx) + test.That(t, err, test.ShouldBeNil) + test.That(t, image, test.ShouldNotBeNil) + greenLog(t, "got images") + + recvPktsCtx, recvPktsFn := context.WithCancel(context.Background()) + sub, err := cameraClient.(rtppassthrough.Source).SubscribeRTP(mainCtx, 2, func(pkts []*rtp.Packet) { + recvPktsFn() + }) + test.That(t, err, test.ShouldBeNil) + <-recvPktsCtx.Done() + greenLog(t, "got packets") + + err = cameraClient.(rtppassthrough.Source).Unsubscribe(mainCtx, sub.ID) + test.That(t, err, test.ShouldBeNil) + greenLog(t, "unsubscribe") +} + +// Skipped due to +// https://viam.atlassian.net/browse/RSDK-7637 +func TestMultiplexOverMultiHopRemoteConnection(t *testing.T) { + logger := logging.NewTestLogger(t).Sublogger(t.Name()) + + remoteCfg2 := &config.Config{ + Network: config.NetworkConfig{NetworkConfigData: config.NetworkConfigData{Sessions: config.SessionsConfig{HeartbeatWindow: time.Hour}}}, + Components: []resource.Config{ + { + Name: "rtpPassthroughCamera", + API: resource.NewAPI("rdk", "component", "camera"), + Model: resource.DefaultModelFamily.WithModel("fake"), + ConvertedAttributes: &fake.Config{ + RTPPassthrough: true, + }, + }, + }, + } + + // Create a robot with a single fake camera. + remote2Ctx, remoteRobot2, addr2, remoteWebSvc2 := setupRealRobot(t, remoteCfg2, logger.Sublogger("remote-2")) + defer remoteRobot2.Close(remote2Ctx) + defer remoteWebSvc2.Close(remote2Ctx) + + remoteCfg1 := &config.Config{ + Network: config.NetworkConfig{NetworkConfigData: config.NetworkConfigData{Sessions: config.SessionsConfig{HeartbeatWindow: time.Hour}}}, + Remotes: []config.Remote{ + { + Name: "remote-2", + Address: addr2, + Insecure: true, + }, + }, + } + + remote1Ctx, remoteRobot1, addr1, remoteWebSvc1 := setupRealRobot(t, remoteCfg1, logger.Sublogger("remote-1")) + defer remoteRobot1.Close(remote1Ctx) + defer remoteWebSvc1.Close(remote1Ctx) + + mainCfg := &config.Config{ + Network: config.NetworkConfig{NetworkConfigData: config.NetworkConfigData{Sessions: config.SessionsConfig{HeartbeatWindow: time.Hour}}}, + Remotes: []config.Remote{ + { + Name: "remote-1", + Address: addr1, + Insecure: true, + }, + }, + } + + mainCtx, mainRobot, _, mainWebSvc := setupRealRobot(t, mainCfg, logger.Sublogger("main")) + defer mainRobot.Close(mainCtx) + defer mainWebSvc.Close(mainCtx) + + cameraClient, err := camera.FromRobot(mainRobot, "remote-1:remote-2:rtpPassthroughCamera") + test.That(t, err, test.ShouldBeNil) + + image, _, err := cameraClient.Images(mainCtx) + test.That(t, err, test.ShouldBeNil) + test.That(t, image, test.ShouldNotBeNil) + greenLog(t, "got images") + + time.Sleep(time.Second) + + recvPktsCtx, recvPktsFn := context.WithCancel(context.Background()) + sub, err := cameraClient.(rtppassthrough.Source).SubscribeRTP(mainCtx, 512, func(pkts []*rtp.Packet) { + recvPktsFn() + }) + test.That(t, err, test.ShouldBeNil) + <-recvPktsCtx.Done() + greenLog(t, "got packets") + + test.That(t, cameraClient.(rtppassthrough.Source).Unsubscribe(mainCtx, sub.ID), test.ShouldBeNil) +} + +// NOTE: These tests fail when this condition occurs: +// logger.go:130: 2024-06-17T16:56:14.097-0400 DEBUG TestGrandRemoteRebooting.remote-1.rdk:remote:/remote-2.webrtc rpc/wrtc_client_channel.go:299 no stream for id; discarding {"ch": 0, "id": 11} +// https://github.com/viamrobotics/goutils/blob/main/rpc/wrtc_client_channel.go#L299 + +// go test -race -v -run=TestWhyMustTimeoutOnReadRTP -timeout 10s +// TestWhyMustTimeoutOnReadRTP shows that if we don't timeout on ReadRTP (and also don't call RemoveStream) on close +// calling Close() on main's camera client blocks forever if there is a live SubscribeRTP subscription with a remote +// due to the fact that the TrackRemote.ReadRTP method blocking forever. +func TestWhyMustTimeoutOnReadRTP(t *testing.T) { + logger := logging.NewTestLogger(t).Sublogger(t.Name()) + + remoteCfg2 := &config.Config{ + Network: config.NetworkConfig{NetworkConfigData: config.NetworkConfigData{Sessions: config.SessionsConfig{HeartbeatWindow: time.Hour}}}, + Components: []resource.Config{ + { + Name: "rtpPassthroughCamera", + API: resource.NewAPI("rdk", "component", "camera"), + Model: resource.DefaultModelFamily.WithModel("fake"), + ConvertedAttributes: &fake.Config{ + RTPPassthrough: true, + }, + }, + }, + } + // Create a robot with a single fake camera. + remote2Ctx, remoteRobot2, addr2, remoteWebSvc2 := setupRealRobot(t, remoteCfg2, logger.Sublogger("remote-2")) + defer remoteRobot2.Close(remote2Ctx) + defer remoteWebSvc2.Close(remote2Ctx) + + remoteCfg1 := &config.Config{ + Network: config.NetworkConfig{NetworkConfigData: config.NetworkConfigData{Sessions: config.SessionsConfig{HeartbeatWindow: time.Hour}}}, + Remotes: []config.Remote{ + { + Name: "remote-2", + Address: addr2, + Insecure: true, + }, + }, + } + + remote1Ctx, remoteRobot1, addr1, remoteWebSvc1 := setupRealRobot(t, remoteCfg1, logger.Sublogger("remote-1")) + defer remoteRobot1.Close(remote1Ctx) + defer remoteWebSvc1.Close(remote1Ctx) + + mainCfg := &config.Config{ + Network: config.NetworkConfig{NetworkConfigData: config.NetworkConfigData{Sessions: config.SessionsConfig{HeartbeatWindow: time.Hour}}}, + Remotes: []config.Remote{ + { + Name: "remote-1", + Address: addr1, + Insecure: true, + }, + }, + } + + mainCtx, mainRobot, _, mainWebSvc := setupRealRobot(t, mainCfg, logger.Sublogger("main")) + greenLog(t, "robot setup") + defer mainRobot.Close(mainCtx) + defer mainWebSvc.Close(mainCtx) + + cameraClient, err := camera.FromRobot(mainRobot, "remote-1:remote-2:rtpPassthroughCamera") + test.That(t, err, test.ShouldBeNil) + + image, _, err := cameraClient.Images(mainCtx) + test.That(t, err, test.ShouldBeNil) + test.That(t, image, test.ShouldNotBeNil) + greenLog(t, "got images") + + greenLog(t, fmt.Sprintf("calling SubscribeRTP on %T, %p", cameraClient, cameraClient)) + time.Sleep(time.Second) + + pktsChan := make(chan []*rtp.Packet) + recvPktsCtx, recvPktsFn := context.WithCancel(context.Background()) + sub, err := cameraClient.(rtppassthrough.Source).SubscribeRTP(mainCtx, 512, func(pkts []*rtp.Packet) { + // first packet + recvPktsFn() + // at some point packets are no longer published + select { + case pktsChan <- pkts: + default: + } + }) + test.That(t, err, test.ShouldBeNil) + <-recvPktsCtx.Done() + greenLog(t, "got packets") + + greenLog(t, "calling close") + test.That(t, remoteRobot2.Close(context.Background()), test.ShouldBeNil) + test.That(t, remoteWebSvc2.Close(context.Background()), test.ShouldBeNil) + greenLog(t, "close called") + + greenLog(t, "waiting for SubscribeRTP to stop receiving packets") + + timeoutCtx, timeoutFn := context.WithTimeout(context.Background(), time.Second) + defer timeoutFn() + + var ( + pktTimeoutCtx context.Context + pktTimeoutFn context.CancelFunc + ) + + // Once we have not received packets for half a second we can assume that no more packets will be published + // by the first instance of remote2 +Loop: + for { + if pktTimeoutFn != nil { + pktTimeoutFn() + } + pktTimeout := time.Millisecond * 500 + pktTimeoutCtx, pktTimeoutFn = context.WithTimeout(context.Background(), pktTimeout) + select { + case <-pktsChan: + continue + case <-pktTimeoutCtx.Done(): + greenLog(t, fmt.Sprintf("it has been at least %s since SubscribeRTP has received a packet", pktTimeout)) + pktTimeoutFn() + // https://go.dev/ref/spec#Break_statements + // The 'Loop' label is needed so that we break out of the loop + // rather than out of the select statement + break Loop + case <-timeoutCtx.Done(): + t.Log("timed out waiting for SubscribeRTP packets to drain") + t.FailNow() + } + } + + greenLog(t, "sleeping") + time.Sleep(time.Second) + + // sub should still be alive + test.That(t, sub.Terminated.Err(), test.ShouldBeNil) +} + +// This tests the following scenario: +// 1. main-part (main) -> remote-part-1 (r1) -> remote-part-2 (r2) where r2 has a camera +// 2. the client in the main part makes an AddStream(r1:r2:rtpPassthroughCamera) request, starting a webrtc video track to be streamed from r2 -> r1 -> main -> client +// 3. r2 reboots +// 4. expect that r1 & main stop getting packets +// 5. when the new instance of r2 comes back online main gets new rtp packets from it's track with r1 +func TestGrandRemoteRebooting(t *testing.T) { + defer redLog(t, "TEST DONE") + logger := logging.NewTestLogger(t).Sublogger(t.Name()) + + remoteCfg2 := &config.Config{ + // Network: config.NetworkConfig{NetworkConfigData: config.NetworkConfigData{Sessions: config.SessionsConfig{HeartbeatWindow: time.Hour}}}, + Components: []resource.Config{ + { + Name: "rtpPassthroughCamera", + API: resource.NewAPI("rdk", "component", "camera"), + Model: resource.DefaultModelFamily.WithModel("fake"), + ConvertedAttributes: &fake.Config{ + RTPPassthrough: true, + }, + }, + }, + } + + // Create a robot with a single fake camera. + options2, _, addr2 := robottestutils.CreateBaseOptionsAndListener(t) + remote2Ctx, remoteRobot2, remoteWebSvc2 := setupRealRobotWithOptions(t, remoteCfg2, logger.Sublogger("remote-2"), options2) + + remoteCfg1 := &config.Config{ + // Network: config.NetworkConfig{NetworkConfigData: config.NetworkConfigData{Sessions: config.SessionsConfig{HeartbeatWindow: time.Hour}}}, + Remotes: []config.Remote{ + { + Name: "remote-2", + Address: addr2, + Insecure: true, + }, + }, + } + + remote1Ctx, remoteRobot1, addr1, remoteWebSvc1 := setupRealRobot(t, remoteCfg1, logger.Sublogger("remote-1")) + defer remoteRobot1.Close(remote1Ctx) + defer remoteWebSvc1.Close(remote1Ctx) + + mainCfg := &config.Config{ + // Network: config.NetworkConfig{NetworkConfigData: config.NetworkConfigData{Sessions: config.SessionsConfig{HeartbeatWindow: time.Hour}}}, + Remotes: []config.Remote{ + { + Name: "remote-1", + Address: addr1, + Insecure: true, + }, + }, + } + + mainCtx, mainRobot, _, mainWebSvc := setupRealRobot(t, mainCfg, logger.Sublogger("main")) + greenLog(t, "robot setup") + defer mainRobot.Close(mainCtx) + defer mainWebSvc.Close(mainCtx) + + mainCameraClient, err := camera.FromRobot(mainRobot, "remote-1:remote-2:rtpPassthroughCamera") + test.That(t, err, test.ShouldBeNil) + + image, _, err := mainCameraClient.Images(mainCtx) + test.That(t, err, test.ShouldBeNil) + test.That(t, image, test.ShouldNotBeNil) + greenLog(t, "got images") + + greenLog(t, fmt.Sprintf("calling SubscribeRTP on %T, %p", mainCameraClient, mainCameraClient)) + time.Sleep(time.Second) + + pktsChan := make(chan []*rtp.Packet) + recvPktsCtx, recvPktsFn := context.WithCancel(context.Background()) + sub, err := mainCameraClient.(rtppassthrough.Source).SubscribeRTP(mainCtx, 512, func(pkts []*rtp.Packet) { + // first packet + recvPktsFn() + // at some point packets are no longer published + select { + case pktsChan <- pkts: + default: + } + }) + test.That(t, err, test.ShouldBeNil) + <-recvPktsCtx.Done() + greenLog(t, "got packets") + + greenLog(t, "calling close") + test.That(t, remoteRobot2.Close(remote2Ctx), test.ShouldBeNil) + test.That(t, remoteWebSvc2.Close(remote2Ctx), test.ShouldBeNil) + greenLog(t, "close called") + + greenLog(t, "waiting for SubscribeRTP to stop receiving packets") + + timeoutCtx, timeoutFn := context.WithTimeout(context.Background(), time.Second) + defer timeoutFn() + + var ( + pktTimeoutCtx context.Context + pktTimeoutFn context.CancelFunc + ) + + // Once we have not received packets for half a second we can assume that no more packets will be published + // by the first instance of remote2 +Loop: + for { + if pktTimeoutFn != nil { + pktTimeoutFn() + } + pktTimeout := time.Millisecond * 500 + pktTimeoutCtx, pktTimeoutFn = context.WithTimeout(context.Background(), pktTimeout) + select { + case <-pktsChan: + continue + case <-pktTimeoutCtx.Done(): + greenLog(t, fmt.Sprintf("it has been at least %s since SubscribeRTP has received a packet", pktTimeout)) + pktTimeoutFn() + // https://go.dev/ref/spec#Break_statements + // The 'Loop' label is needed so that we break out of the loop + // rather than out of the select statement + break Loop + case <-timeoutCtx.Done(): + t.Log("timed out waiting for SubscribeRTP packets to drain") + t.FailNow() + } + } + + // sub should still be alive + test.That(t, sub.Terminated.Err(), test.ShouldBeNil) + + // I'm trying to get the remote-2 to come back online at the same address under the hopes that remote-1 will + // treat it the same as it would if a real robot crasehed & came back online without changing its name. + // The expectation is that SubscribeRTP should start receiving packets from remote-1 when remote-1 starts + // receiving packets from the new remote-2 + // It is not working as remote 1 never detects remote 2 & as a result main calls Close() on it's client with + // remote-1 which can be detectd + // by the fact that sub.Terminated.Done() is always the path this test goes down + + greenLog(t, fmt.Sprintf("old robot address address %s", addr2)) + tcpAddr, ok := options2.Network.Listener.Addr().(*net.TCPAddr) + test.That(t, ok, test.ShouldBeTrue) + newListener, err := net.ListenTCP("tcp", &net.TCPAddr{Port: tcpAddr.Port}) + test.That(t, err, test.ShouldBeNil) + options2.Network.Listener = newListener + + greenLog(t, fmt.Sprintf("setting up new robot at address %s", newListener.Addr().String())) + + remote2CtxSecond, remoteRobot2Second, remoteWebSvc2Second := setupRealRobotWithOptions( + t, + remoteCfg2, + logger.Sublogger("remote-2SecondInstance"), + options2, + ) + defer remoteRobot2Second.Close(remote2CtxSecond) + defer remoteWebSvc2Second.Close(remote2CtxSecond) + sndPktTimeoutCtx, sndPktTimeoutFn := context.WithTimeout(context.Background(), time.Second*20) + defer sndPktTimeoutFn() + select { + case <-sub.Terminated.Done(): + // Right now we are going down this path b/c main's + redLog(t, "main's sub terminated due to close") + t.FailNow() + case <-pktsChan: + // Right now we never go down this path as the test is not able to get remote1 to reconnect to the new remote 2 + greenLog(t, "SubscribeRTP got packets") + case <-sndPktTimeoutCtx.Done(): + redLog(t, "timed out waiting for SubscribeRTP to receive packets") + t.FailNow() + } +} diff --git a/components/camera/fake/camera.go b/components/camera/fake/camera.go index 2eef799b4c6..73153455b02 100644 --- a/components/camera/fake/camera.go +++ b/components/camera/fake/camera.go @@ -294,6 +294,8 @@ func (c *Camera) SubscribeRTP( bufferSize int, packetsCB rtppassthrough.PacketCallback, ) (rtppassthrough.Subscription, error) { + logging.Global().Warnf("SubscribeRTP FAKE START %s", c.Name().String()) + defer logging.Global().Warnf("SubscribeRTP FAKE END %s", c.Name().String()) if !c.RTPPassthrough { return rtppassthrough.NilSubscription, ErrRTPPassthroughNotEnabled } diff --git a/components/camera/rtppassthrough/buffer.go b/components/camera/rtppassthrough/buffer.go index 3627a3dcdb1..4da4bb426b0 100644 --- a/components/camera/rtppassthrough/buffer.go +++ b/components/camera/rtppassthrough/buffer.go @@ -93,6 +93,8 @@ func (w *Buffer) Close() { // where it will be run in the future. // If the buffer is full, it returnns an error and does // add the callback to the buffer. +// +// Dan: Public rtp.Packets and not callbacks? Until we've proved a need for more generality? func (w *Buffer) Publish(cb func()) error { rawErr := w.err.Load() diff --git a/go.mod b/go.mod index 56358cbe73b..0b3c27df582 100644 --- a/go.mod +++ b/go.mod @@ -299,7 +299,7 @@ require ( github.com/pion/stun v0.6.1 // indirect github.com/pion/transport/v2 v2.2.10 // indirect github.com/pion/turn/v2 v2.1.6 // indirect - github.com/pion/webrtc/v3 v3.2.36 // indirect + github.com/pion/webrtc/v3 v3.2.42 // indirect github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect github.com/pkg/profile v1.6.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect diff --git a/go.sum b/go.sum index 29b15901e0c..0208b5911d9 100644 --- a/go.sum +++ b/go.sum @@ -1145,8 +1145,8 @@ github.com/pion/transport/v3 v3.0.7/go.mod h1:YleKiTZ4vqNxVwh77Z0zytYi7rXHl7j6uP github.com/pion/turn/v2 v2.1.3/go.mod h1:huEpByKKHix2/b9kmTAM3YoX6MKP+/D//0ClgUYR2fY= github.com/pion/turn/v2 v2.1.6 h1:Xr2niVsiPTB0FPtt+yAWKFUkU1eotQbGgpTIld4x1Gc= github.com/pion/turn/v2 v2.1.6/go.mod h1:huEpByKKHix2/b9kmTAM3YoX6MKP+/D//0ClgUYR2fY= -github.com/pion/webrtc/v3 v3.2.36 h1:RM/miAv0M4TrhhS7h2mcZXt44K68WmpVDkUOgz2l2l8= -github.com/pion/webrtc/v3 v3.2.36/go.mod h1:wWQz1PuKNSNK4VrJJNpPN3vZmKEi4zA6i2ynaQOlxIU= +github.com/pion/webrtc/v3 v3.2.42 h1:WN/ZuMjtpQOoGRCZUg/zFG+JHEvYLVyDKOxU6H1qWlE= +github.com/pion/webrtc/v3 v3.2.42/go.mod h1:M1RAe3TNTD1tzyvqHrbVODfwdPGSXOUo/OgpoGGJqFY= github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 h1:KoWmjvw+nsYOo29YJK9vDA65RGE3NrOnUtO7a+RF9HU= github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8/go.mod h1:HKlIX3XHQyzLZPlr7++PzdhaXEj94dEiJgZDTsxEqUI= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/gostream/stream.go b/gostream/stream.go index 687d72c0920..996612b5ee0 100644 --- a/gostream/stream.go +++ b/gostream/stream.go @@ -31,6 +31,7 @@ type Stream interface { // Start starts processing frames. Start() WriteRTP(pkt *rtp.Packet) error + // VideoStreamSourceChanged() // Ready signals that there is at least one client connected and that // streams are ready for input. The returned context should be used for @@ -161,6 +162,13 @@ func (bs *basicStream) Start() { utils.ManagedGo(bs.processOutputAudioChunks, bs.activeBackgroundWorkers.Done) } +func (bs *basicStream) VideoStreamSourceChanged() { + bs.videoTrackLocal.rtpTrack.StreamSourceChanged() +} + +// NOTE: (Nick S) This only writes video RTP packets +// if we also need to support writing audio RTP packets, we should split +// this method into WriteVideoRTP and WriteAudioRTP func (bs *basicStream) WriteRTP(pkt *rtp.Packet) error { return bs.videoTrackLocal.rtpTrack.WriteRTP(pkt) } diff --git a/gostream/webrtc_track.go b/gostream/webrtc_track.go index f27ebebb1c7..367cc95d503 100644 --- a/gostream/webrtc_track.go +++ b/gostream/webrtc_track.go @@ -4,12 +4,14 @@ import ( "math" "strings" "sync" + "sync/atomic" "time" "github.com/pion/rtp" "github.com/pion/rtp/codecs" "github.com/viamrobotics/webrtc/v3" "go.uber.org/multierr" + "go.viam.com/rdk/logging" ) // Adapted from https://github.com/pion/webrtc/blob/master/track_local_static.go @@ -34,15 +36,21 @@ type trackLocalStaticRTP struct { bindings []trackBinding codec webrtc.RTPCodecCapability id, rid, streamID string + + // testing + sequenceNumberOffset func(uint16) uint16 + streamSourceChanged atomic.Bool + highestSequenceNumber uint16 } // newtrackLocalStaticRTP returns a trackLocalStaticRTP. func newtrackLocalStaticRTP(c webrtc.RTPCodecCapability, id, streamID string) *trackLocalStaticRTP { return &trackLocalStaticRTP{ - codec: c, - bindings: []trackBinding{}, - id: id, - streamID: streamID, + sequenceNumberOffset: func(u uint16) uint16 { return u }, + codec: c, + bindings: []trackBinding{}, + id: id, + streamID: streamID, } } @@ -112,13 +120,86 @@ func (s *trackLocalStaticRTP) Codec() webrtc.RTPCodecCapability { return s.codec } +func sequenceNumberOffset(lastMaxPacketSequenceNumber uint16, firstNewPacketSequenceNumber uint16) func(uint16) uint16 { + if lastMaxPacketSequenceNumber > firstNewPacketSequenceNumber { + logging.Global().Warnf("new sequenceNumberGenerator which is not identity lastMaxPacketSequenceNumber(%d), firstNewPacketSequenceNumber(%d)", lastMaxPacketSequenceNumber, firstNewPacketSequenceNumber) + // continue with prev + return func(sequenceNumber uint16) uint16 { + return lastMaxPacketSequenceNumber + 1 + sequenceNumber - firstNewPacketSequenceNumber + } + + } + return func(u uint16) uint16 { return u } +} + +func (s *trackLocalStaticRTP) StreamSourceChanged() { + s.streamSourceChanged.Store(true) +} + +func wrapped(currentSequenceNumber uint16, hightestSequenceNumber uint16) bool { + // if the current sequence number is smaller than the currentHighestSequenceNumber by more + // than half the u16, assume that the sequence number has wrapped + if currentSequenceNumber > hightestSequenceNumber { + return false + } + return hightestSequenceNumber-currentSequenceNumber > math.MaxUint16/2 +} + +func (s *trackLocalStaticRTP) WriteRTP(p *rtp.Packet) error { + s.mu.RLock() + defer s.mu.RUnlock() + + writeErrs := []error{} + outboundPacket := *p + + for _, b := range s.bindings { + outboundPacket.Header.SSRC = uint32(b.ssrc) + outboundPacket.Header.PayloadType = uint8(b.payloadType) + if _, err := b.writeStream.WriteRTP(&outboundPacket.Header, outboundPacket.Payload); err != nil { + writeErrs = append(writeErrs, err) + } + } + + return multierr.Combine(writeErrs...) +} + // WriteRTP writes a RTP Packet to the trackLocalStaticRTP // If one PeerConnection fails the packets will still be sent to // all PeerConnections. The error message will contain the ID of the failed // PeerConnections so you can remove them. -func (s *trackLocalStaticRTP) WriteRTP(p *rtp.Packet) error { +func (s *trackLocalStaticRTP) WriteRTPModified(p *rtp.Packet) error { s.mu.RLock() defer s.mu.RUnlock() + originalSequenceNumber := p.Header.SequenceNumber + + // create new generator if the stream has changed needed + if s.streamSourceChanged.CompareAndSwap(true, false) { + // TODO: Rollover + s.sequenceNumberOffset = sequenceNumberOffset(s.highestSequenceNumber, p.Header.SequenceNumber) + } + + // Update the header's sequence number to + p.Header.SequenceNumber = s.sequenceNumberOffset(p.Header.SequenceNumber) + + // set the currentHighestSequenceNumber to the current packet's sequence number + // if the packet's sequence number is greater than the current highest sequence number + // or if the sequence number wrapped + setHighest := false + if p.Header.SequenceNumber > s.highestSequenceNumber { + setHighest = true + } + + if wrapped(p.Header.SequenceNumber, s.highestSequenceNumber) { + logging.Global().Warnf("updating highestSequenceNumber to %d due to wrapping, prevhighestSequenceNumber: %d, originalSN: %d", p.Header.SequenceNumber, s.highestSequenceNumber, originalSequenceNumber) + setHighest = true + } + + if setHighest { + s.highestSequenceNumber = p.Header.SequenceNumber + } else { + logging.Global().Warnf("publishing out of order message p.Header.SequenceNumber(%d), originalSN: %d, highestSequenceNumber: %d", p.Header.SequenceNumber, originalSequenceNumber, s.highestSequenceNumber) + } + logging.Global().Debugf("SN: %d", p.Header.SequenceNumber) writeErrs := []error{} outboundPacket := *p diff --git a/grpc/conn.go b/grpc/conn.go index f94cb330a51..4bc5f79497b 100644 --- a/grpc/conn.go +++ b/grpc/conn.go @@ -6,7 +6,9 @@ import ( "sync" "github.com/viamrobotics/webrtc/v3" + "go.viam.com/rdk/logging" "go.viam.com/utils/rpc" + "golang.org/x/exp/maps" googlegrpc "google.golang.org/grpc" ) @@ -14,6 +16,9 @@ import ( type ReconfigurableClientConn struct { connMu sync.RWMutex conn rpc.ClientConn + + onTrackCBByTrackNameMu sync.Mutex + onTrackCBByTrackName map[string]OnTrackCB } // Return this constant such that backoff error logging can compare consecutive errors and reliably @@ -57,8 +62,31 @@ func (c *ReconfigurableClientConn) NewStream( // ReplaceConn replaces the underlying client connection with the connection passed in. This does not close the // old connection, the caller is expected to close it if needed. func (c *ReconfigurableClientConn) ReplaceConn(conn rpc.ClientConn) { + logging.Global().Info("ReplaceConn START") + defer logging.Global().Info("ReplaceConn END") c.connMu.Lock() c.conn = conn + // It is safe to access this without a mutex as it is only ever nil once at the beginning of the + // ReconfigurableClientConn's lifetime + if c.onTrackCBByTrackName == nil { + c.onTrackCBByTrackName = make(map[string]OnTrackCB) + } + + if pc := conn.PeerConn(); pc != nil { + pc.OnTrack(func(trackRemote *webrtc.TrackRemote, rtpReceiver *webrtc.RTPReceiver) { + logging.Global().Warnf("OnTrack START %s pc: %p", trackRemote.StreamID(), pc) + defer logging.Global().Warnf("OnTrack END %s pc: %p", trackRemote.StreamID(), pc) + c.onTrackCBByTrackNameMu.Lock() + onTrackCB, ok := c.onTrackCBByTrackName[trackRemote.StreamID()] + c.onTrackCBByTrackNameMu.Unlock() + if !ok { + msg := "Callback not found for StreamID (trackName): %s, keys(resOnTrackCBs): %#v" + logging.Global().Errorf(msg, trackRemote.StreamID(), maps.Keys(c.onTrackCBByTrackName)) + return + } + onTrackCB(trackRemote, rtpReceiver) + }) + } c.connMu.Unlock() } @@ -84,3 +112,17 @@ func (c *ReconfigurableClientConn) Close() error { c.conn = nil return conn.Close() } + +// AddOnTrackSub adds an OnTrack subscription for the track. +func (c *ReconfigurableClientConn) AddOnTrackSub(trackName string, onTrackCB OnTrackCB) { + c.onTrackCBByTrackNameMu.Lock() + defer c.onTrackCBByTrackNameMu.Unlock() + c.onTrackCBByTrackName[trackName] = onTrackCB +} + +// RemoveOnTrackSub removes an OnTrack subscription for the track. +func (c *ReconfigurableClientConn) RemoveOnTrackSub(trackName string) { + c.onTrackCBByTrackNameMu.Lock() + defer c.onTrackCBByTrackNameMu.Unlock() + delete(c.onTrackCBByTrackName, trackName) +} diff --git a/grpc/shared_conn.go b/grpc/shared_conn.go index 88ad2f82ff6..1216651c439 100644 --- a/grpc/shared_conn.go +++ b/grpc/shared_conn.go @@ -3,7 +3,6 @@ package grpc import ( "context" "errors" - "fmt" "net" "sync" "time" @@ -15,10 +14,10 @@ import ( "go.uber.org/zap" "go.viam.com/utils" "go.viam.com/utils/rpc" + "golang.org/x/exp/maps" googlegrpc "google.golang.org/grpc" "go.viam.com/rdk/logging" - "go.viam.com/rdk/resource" rutils "go.viam.com/rdk/utils" ) @@ -80,8 +79,8 @@ type SharedConn struct { // set to nil before this channel is closed. peerConnFailed chan struct{} - resOnTrackMu sync.Mutex - resOnTrackCBs map[resource.Name]OnTrackCB + onTrackCBByTrackNameMu sync.Mutex + onTrackCBByTrackName map[string]OnTrackCB logger logging.Logger } @@ -106,18 +105,18 @@ func (sc *SharedConn) NewStream( return sc.grpcConn.NewStream(ctx, desc, method, opts...) } -// AddOnTrackSub adds an OnTrack subscription for the resource. -func (sc *SharedConn) AddOnTrackSub(name resource.Name, onTrackCB OnTrackCB) { - sc.resOnTrackMu.Lock() - defer sc.resOnTrackMu.Unlock() - sc.resOnTrackCBs[name] = onTrackCB +// AddOnTrackSub adds an OnTrack subscription for the track. +func (sc *SharedConn) AddOnTrackSub(trackName string, onTrackCB OnTrackCB) { + sc.onTrackCBByTrackNameMu.Lock() + defer sc.onTrackCBByTrackNameMu.Unlock() + sc.onTrackCBByTrackName[trackName] = onTrackCB } -// RemoveOnTrackSub removes an OnTrack subscription for the resource. -func (sc *SharedConn) RemoveOnTrackSub(name resource.Name) { - sc.resOnTrackMu.Lock() - defer sc.resOnTrackMu.Unlock() - delete(sc.resOnTrackCBs, name) +// RemoveOnTrackSub removes an OnTrack subscription for the track. +func (sc *SharedConn) RemoveOnTrackSub(trackName string) { + sc.onTrackCBByTrackNameMu.Lock() + defer sc.onTrackCBByTrackNameMu.Unlock() + delete(sc.onTrackCBByTrackName, trackName) } // GrpcConn returns a gRPC capable client connection. @@ -159,9 +158,11 @@ func (sc *SharedConn) ResetConn(conn rpc.ClientConn, moduleLogger logging.Logger sc.logger = moduleLogger.Sublogger("networking.conn") } - if sc.resOnTrackCBs == nil { + // It is safe to access this without a mutex as it is only ever nil once at the beginning of the + // SharedConn's lifetime + if sc.onTrackCBByTrackName == nil { // Same initilization argument as above with the logger. - sc.resOnTrackCBs = make(map[resource.Name]OnTrackCB) + sc.onTrackCBByTrackName = make(map[string]OnTrackCB) } sc.peerConnMu.Lock() @@ -199,16 +200,12 @@ func (sc *SharedConn) ResetConn(conn rpc.ClientConn, moduleLogger logging.Logger } sc.peerConn.OnTrack(func(trackRemote *webrtc.TrackRemote, rtpReceiver *webrtc.RTPReceiver) { - name, err := resource.NewFromString(trackRemote.StreamID()) - if err != nil { - sc.logger.Errorw("StreamID did not parse as a ResourceName", "sharedConn", fmt.Sprintf("%p", sc), "streamID", trackRemote.StreamID()) - return - } - sc.resOnTrackMu.Lock() - onTrackCB, ok := sc.resOnTrackCBs[name] - sc.resOnTrackMu.Unlock() + sc.onTrackCBByTrackNameMu.Lock() + onTrackCB, ok := sc.onTrackCBByTrackName[trackRemote.StreamID()] + sc.onTrackCBByTrackNameMu.Unlock() if !ok { - sc.logger.Errorw("Callback not found for StreamID", "sharedConn", fmt.Sprintf("%p", sc), "streamID", trackRemote.StreamID()) + msg := "Callback not found for StreamID: %s, keys(resOnTrackCBs): %#v" + sc.logger.Errorf(msg, trackRemote.StreamID(), maps.Keys(sc.onTrackCBByTrackName)) return } onTrackCB(trackRemote, rtpReceiver) @@ -339,6 +336,7 @@ func NewLocalPeerConnection(logger logging.Logger) (*webrtc.PeerConnection, erro return false }) + settingEngine.DisableSRTPReplayProtection(true) options := []func(a *webrtc.API){webrtc.WithMediaEngine(&m), webrtc.WithInterceptorRegistry(&i)} if utils.Debug { diff --git a/grpc/tracker.go b/grpc/tracker.go new file mode 100644 index 00000000000..6a2e1943d26 --- /dev/null +++ b/grpc/tracker.go @@ -0,0 +1,9 @@ +package grpc + +// Tracker allows callback functions to a WebRTC peer connection's OnTrack callback +// function by track name. +// Both grpc.SharedConn and grpc.ReconfigurableClientConn implement tracker. +type Tracker interface { + AddOnTrackSub(trackName string, onTrackCB OnTrackCB) + RemoveOnTrackSub(trackName string) +} diff --git a/grpc/tracker_test.go b/grpc/tracker_test.go new file mode 100644 index 00000000000..0c677ad3086 --- /dev/null +++ b/grpc/tracker_test.go @@ -0,0 +1,20 @@ +package grpc + +import ( + "reflect" + "testing" + + "go.viam.com/test" +) + +func TestTrackerImplementations(t *testing.T) { + tracker := reflect.TypeOf((*Tracker)(nil)).Elem() + + t.Run("*ReconfigurableClientConn should implement Tracker", func(t *testing.T) { + test.That(t, reflect.TypeOf(&ReconfigurableClientConn{}).Implements(tracker), test.ShouldBeTrue) + }) + + t.Run("*SharedConn should implement Tracker", func(t *testing.T) { + test.That(t, reflect.TypeOf(&SharedConn{}).Implements(tracker), test.ShouldBeTrue) + }) +} diff --git a/robot/client/client.go b/robot/client/client.go index 3bc3d75a6bb..f7665db7873 100644 --- a/robot/client/client.go +++ b/robot/client/client.go @@ -3,6 +3,7 @@ package client import ( "context" + "errors" "flag" "fmt" "io" @@ -15,7 +16,6 @@ import ( grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" "github.com/jhump/protoreflect/desc" "github.com/jhump/protoreflect/grpcreflect" - "github.com/pkg/errors" "go.uber.org/multierr" "go.uber.org/zap" "go.uber.org/zap/zapcore" @@ -132,7 +132,7 @@ func isClosedPipeError(err error) bool { } func (rc *RobotClient) notConnectedToRemoteError() error { - return errors.Errorf("not connected to remote robot at %s", rc.address) + return fmt.Errorf("not connected to remote robot at %s", rc.address) } func (rc *RobotClient) handleUnaryDisconnect( @@ -370,11 +370,15 @@ func (rc *RobotClient) connectWithLock(ctx context.Context) error { rc.conn.ReplaceConn(conn) rc.client = client rc.refClient = refClient + rc.logger.Warn("connected") rc.connected.Store(true) if len(rc.resourceClients) != 0 { + rc.logger.Warn("updateResources START") if err := rc.updateResources(ctx); err != nil { + rc.logger.Errorf("updateResources END, err: %s", err.Error()) return err } + rc.logger.Warnw("updateResources END", "numResources", len(rc.resourceNames)) } if rc.changeChan != nil { @@ -393,6 +397,7 @@ func (rc *RobotClient) updateResourceClients(ctx context.Context) error { for resourceName, client := range rc.resourceClients { // check if no longer an active resource if !activeResources[resourceName] { + rc.logger.Infow("Removing resource from remote client", "resourceName", resourceName) if err := client.Close(ctx); err != nil { rc.Logger().CError(ctx, err) continue @@ -463,6 +468,7 @@ func (rc *RobotClient) checkConnection(ctx context.Context, checkEvery, reconnec "reconnect_interval", reconnectEvery.Seconds(), ) rc.mu.Lock() + rc.logger.Warn("NOT connected") rc.connected.Store(false) if rc.changeChan != nil { rc.changeChan <- true @@ -574,7 +580,8 @@ func (rc *RobotClient) createClient(name resource.Name) (resource.Resource, erro if !ok || apiInfo.RPCClient == nil { return grpc.NewForeignResource(name, &rc.conn), nil } - return apiInfo.RPCClient(rc.backgroundCtx, &rc.conn, rc.remoteName, name, rc.Logger()) + logger := rc.Logger().Sublogger(resource.RemoveRemoteName(name).ShortName()) + return apiInfo.RPCClient(rc.backgroundCtx, &rc.conn, rc.remoteName, name, logger) } func (rc *RobotClient) resources(ctx context.Context) ([]resource.Name, []resource.RPCAPI, error) { @@ -628,7 +635,7 @@ func (rc *RobotClient) resources(ctx context.Context) ([]resource.Name, []resour } svcDesc, ok := symDesc.(*desc.ServiceDescriptor) if !ok { - return nil, nil, errors.Errorf("expected descriptor to be service descriptor but got %T", symDesc) + return nil, nil, fmt.Errorf("expected descriptor to be service descriptor but got %T", symDesc) } resTypes = append(resTypes, resource.RPCAPI{ API: rprotoutils.ResourceNameFromProto(resAPI.Subtype).API, @@ -659,7 +666,9 @@ func (rc *RobotClient) updateResources(ctx context.Context) error { // call metadata service. names, rpcAPIs, err := rc.resources(ctx) + // rc.logger.Infow("robotClient.updateResources", "numNames", len(names), "err", err, "numRc.ResourceNames", len(rc.resourceNames)) if err != nil && status.Code(err) != codes.Unimplemented { + rc.logger.Infow("robotClient.updateResources -- returning error", "numRc.ResourceNames", len(rc.resourceNames)) return err } @@ -721,7 +730,8 @@ func (rc *RobotClient) PackageManager() packages.Manager { return nil } -// ResourceNames returns a list of all known resource names connected to this machine. +// ResourceNames returns a list of all known resource names on the connected remote. Returns nil if +// the connection is not healthy. The empty slice if it is healthy, but the response was empty. // // resource_names := machine.ResourceNames() func (rc *RobotClient) ResourceNames() []resource.Name { @@ -733,6 +743,10 @@ func (rc *RobotClient) ResourceNames() []resource.Name { defer rc.mu.RUnlock() names := make([]resource.Name, 0, len(rc.resourceNames)) names = append(names, rc.resourceNames...) + + if len(names) == 0 { + rc.Logger().Errorw("ClientResourceNames returning 0 things", "checkConnected", rc.checkConnected()) + } return names } diff --git a/robot/impl/resource_manager.go b/robot/impl/resource_manager.go index d63201bbcf9..8f00da6dfe1 100644 --- a/robot/impl/resource_manager.go +++ b/robot/impl/resource_manager.go @@ -72,6 +72,7 @@ func newResourceManager( logger logging.Logger, ) *resourceManager { resLogger := logger.Sublogger("resource_manager") + resLogger.SetLevel(logging.DEBUG) return &resourceManager{ resources: resource.NewGraph(), processManager: newProcessManager(opts, logger), @@ -182,9 +183,14 @@ func (manager *resourceManager) updateRemoteResourceNames( rr internalRemoteRobot, recreateAllClients bool, ) bool { - manager.logger.CDebugw(ctx, "updating remote resource names", "remote", remoteName) + manager.logger.CDebugw(ctx, "updating remote resource names", "remote", remoteName, "recreateAllClients", recreateAllClients) activeResourceNames := map[resource.Name]bool{} newResources := rr.ResourceNames() + if newResources == nil { + // Assuming this means a connection error. + return false // anythingChanged=false + } + oldResources := manager.remoteResourceNames(remoteName) for _, res := range oldResources { activeResourceNames[res] = false @@ -194,26 +200,12 @@ func (manager *resourceManager) updateRemoteResourceNames( for _, resName := range newResources { remoteResName := resName - res, err := rr.ResourceByName(remoteResName) // this returns a remote known OR foreign resource client - if err != nil { - if errors.Is(err, client.ErrMissingClientRegistration) { - manager.logger.CDebugw(ctx, "couldn't obtain remote resource interface", - "name", remoteResName, - "reason", err) - } else { - manager.logger.CErrorw(ctx, "couldn't obtain remote resource interface", - "name", remoteResName, - "reason", err) - } - continue - } - resName = resName.PrependRemote(remoteName.Name) - gNode, ok := manager.resources.Node(resName) + gNode, nodeAlreadyExists := manager.resources.Node(resName) if _, alreadyCurrent := activeResourceNames[resName]; alreadyCurrent { activeResourceNames[resName] = true - if ok && !gNode.IsUninitialized() { + if nodeAlreadyExists && !gNode.IsUninitialized() { // resources that enter this block represent those with names that already exist in the resource graph. // it is possible that we are switching to a new remote with a identical resource name(s), so we may // need to create these resource clients. @@ -239,7 +231,21 @@ func (manager *resourceManager) updateRemoteResourceNames( } } - if ok { + res, err := rr.ResourceByName(remoteResName) // this returns a remote known OR foreign resource client + if err != nil { + if errors.Is(err, client.ErrMissingClientRegistration) { + manager.logger.CDebugw(ctx, "couldn't obtain remote resource interface", + "name", remoteResName, + "reason", err) + } else { + manager.logger.CErrorw(ctx, "couldn't obtain remote resource interface", + "name", remoteResName, + "reason", err) + } + continue + } + + if nodeAlreadyExists { gNode.SwapResource(res, unknownModel) } else { gNode = resource.NewConfiguredGraphNode(resource.Config{}, res, unknownModel) @@ -628,8 +634,7 @@ func (manager *resourceManager) completeConfig( if gNode.IsUninitialized() { verb = "configuring" gNode.InitializeLogger( - manager.logger, resName.String(), conf.LogConfiguration.Level, - ) + manager.logger, resName.String(), conf.LogConfiguration.Level) } else { verb = "reconfiguring" } @@ -759,8 +764,7 @@ func (manager *resourceManager) completeConfigForRemotes(ctx context.Context, lr } if gNode.IsUninitialized() { gNode.InitializeLogger( - manager.logger, fromRemoteNameToRemoteNodeName(remConf.Name).String(), manager.logger.GetLevel(), - ) + manager.logger, fromRemoteNameToRemoteNodeName(remConf.Name).String(), logging.INFO) } // this is done in config validation but partial start rules require us to check again if _, err := remConf.Validate(""); err != nil { diff --git a/robot/web/stream/camera/camera.go b/robot/web/stream/camera/camera.go new file mode 100644 index 00000000000..baf07baf374 --- /dev/null +++ b/robot/web/stream/camera/camera.go @@ -0,0 +1,24 @@ +// Package camera provides functions for looking up a camera from a robot using a stream +package camera + +import ( + "go.viam.com/rdk/components/camera" + "go.viam.com/rdk/gostream" + "go.viam.com/rdk/resource" + "go.viam.com/rdk/robot" +) + +// Dan: After `web/stream/state` is moved into `web/stream` -- this can be moved into +// `web/stream`. And renamed to `CameraFromStreamName` +// +// Camera returns the camera from the robot (derived from the stream) or +// an error if it has no camera. +func Camera(robot robot.Robot, stream gostream.Stream) (camera.Camera, error) { + // Stream names are slightly modified versions of the resource short name + shortName := resource.SDPTrackNameToShortName(stream.Name()) + cam, err := camera.FromRobot(robot, shortName) + if err != nil { + return nil, err + } + return cam, nil +} diff --git a/robot/web/stream/server.go b/robot/web/stream/server.go index 3a69d2c0177..c181df278bf 100644 --- a/robot/web/stream/server.go +++ b/robot/web/stream/server.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "sync" + "time" "github.com/pkg/errors" "github.com/viamrobotics/webrtc/v3" @@ -13,13 +14,18 @@ import ( "go.viam.com/utils" "go.viam.com/utils/rpc" + "go.viam.com/rdk/components/camera" "go.viam.com/rdk/gostream" "go.viam.com/rdk/logging" + "go.viam.com/rdk/resource" "go.viam.com/rdk/robot" + streamCamera "go.viam.com/rdk/robot/web/stream/camera" "go.viam.com/rdk/robot/web/stream/state" rutils "go.viam.com/rdk/utils" ) +var monitorCameraInterval = time.Second + type peerState struct { streamState *state.StreamState senders []*webrtc.RTPSender @@ -28,8 +34,10 @@ type peerState struct { // Server implements the gRPC audio/video streaming service. type Server struct { streampb.UnimplementedStreamServiceServer - logger logging.Logger - r robot.Robot + logger logging.Logger + robot robot.Robot + closedCtx context.Context + closedFn context.CancelFunc mu sync.RWMutex streamNames []string @@ -43,23 +51,29 @@ type Server struct { // stream. func NewServer( streams []gostream.Stream, - r robot.Robot, + robot robot.Robot, logger logging.Logger, ) (*Server, error) { - ss := &Server{ - r: r, + closedCtx, closedFn := context.WithCancel(context.Background()) + server := &Server{ + closedCtx: closedCtx, + closedFn: closedFn, + robot: robot, logger: logger, nameToStreamState: map[string]*state.StreamState{}, activePeerStreams: map[*webrtc.PeerConnection]map[string]*peerState{}, isAlive: true, } + server.logger.SetLevel(logging.DEBUG) for _, stream := range streams { - if err := ss.add(stream); err != nil { + if err := server.add(stream); err != nil { return nil, err } } - return ss, nil + server.startMonitorCameraAvailable() + + return server, nil } // StreamAlreadyRegisteredError indicates that a stream has a name that is already registered on @@ -73,20 +87,20 @@ func (e *StreamAlreadyRegisteredError) Error() string { } // NewStream informs the stream server of new streams that are capable of being streamed. -func (ss *Server) NewStream(config gostream.StreamConfig) (gostream.Stream, error) { - ss.mu.Lock() - defer ss.mu.Unlock() +func (server *Server) NewStream(config gostream.StreamConfig) (gostream.Stream, error) { + server.mu.Lock() + defer server.mu.Unlock() - if _, ok := ss.nameToStreamState[config.Name]; ok { + if _, ok := server.nameToStreamState[config.Name]; ok { return nil, &StreamAlreadyRegisteredError{config.Name} } - stream, err := gostream.NewStream(config, ss.logger) + stream, err := gostream.NewStream(config, server.logger) if err != nil { return nil, err } - if err = ss.add(stream); err != nil { + if err = server.add(stream); err != nil { return nil, err } @@ -94,49 +108,63 @@ func (ss *Server) NewStream(config gostream.StreamConfig) (gostream.Stream, erro } // ListStreams implements part of the StreamServiceServer. -func (ss *Server) ListStreams(ctx context.Context, req *streampb.ListStreamsRequest) (*streampb.ListStreamsResponse, error) { +func (server *Server) ListStreams(ctx context.Context, req *streampb.ListStreamsRequest) (*streampb.ListStreamsResponse, error) { _, span := trace.StartSpan(ctx, "stream::server::ListStreams") defer span.End() - ss.mu.RLock() - defer ss.mu.RUnlock() + server.mu.RLock() + defer server.mu.RUnlock() - names := make([]string, 0, len(ss.streamNames)) - for _, name := range ss.streamNames { - streamState := ss.nameToStreamState[name] + names := make([]string, 0, len(server.streamNames)) + for _, name := range server.streamNames { + streamState := server.nameToStreamState[name] names = append(names, streamState.Stream.Name()) } return &streampb.ListStreamsResponse{Names: names}, nil } // AddStream implements part of the StreamServiceServer. -func (ss *Server) AddStream(ctx context.Context, req *streampb.AddStreamRequest) (*streampb.AddStreamResponse, error) { +func (server *Server) AddStream(ctx context.Context, req *streampb.AddStreamRequest) (*streampb.AddStreamResponse, error) { ctx, span := trace.StartSpan(ctx, "stream::server::AddStream") defer span.End() - // Get the peer connection + // Get the peer connection to the caller. pc, ok := rpc.ContextPeerConnection(ctx) + server.logger.Infow("Adding video stream", "name", req.Name, "peerConn", pc) + defer server.logger.Warnf("AddStream END %s", req.Name) + if !ok { return nil, errors.New("can only add a stream over a WebRTC based connection") } - ss.mu.Lock() - defer ss.mu.Unlock() + server.mu.Lock() + defer server.mu.Unlock() - streamStateToAdd, ok := ss.nameToStreamState[req.Name] + streamStateToAdd, ok := server.nameToStreamState[req.Name] // return error if there is no stream for that camera if !ok { - err := fmt.Errorf("no stream for %q", req.Name) - ss.logger.Error(err.Error()) + var availableStreams string + for n := range server.nameToStreamState { + if availableStreams != "" { + availableStreams += ", " + } + availableStreams += fmt.Sprintf("%q", n) + } + err := fmt.Errorf("no stream for %q, available streams: %s", req.Name, availableStreams) + server.logger.Error(err.Error()) + return nil, err + } + // return error if camera is not in resource graph + if _, err := streamCamera.Camera(server.robot, streamStateToAdd.Stream); err != nil { return nil, err } // return error if the caller's peer connection is already being sent video data - if _, ok := ss.activePeerStreams[pc][req.Name]; ok { + if _, ok := server.activePeerStreams[pc][req.Name]; ok { err := errors.New("stream already active") - ss.logger.Error(err.Error()) + server.logger.Error(err.Error()) return nil, err } - nameToPeerState, ok := ss.activePeerStreams[pc] + nameToPeerState, ok := server.activePeerStreams[pc] // if there is no active video data being sent, set up a callback to remove the peer connection from // the active streams & stop the stream from doing h264 encode if this is the last peer connection // subcribed to the camera's video feed @@ -145,16 +173,16 @@ func (ss *Server) AddStream(ctx context.Context, req *streampb.AddStreamRequest) if !ok { nameToPeerState = map[string]*peerState{} pc.OnConnectionStateChange(func(peerConnectionState webrtc.PeerConnectionState) { - ss.logger.Debugf("%s pc.OnConnectionStateChange state: %s", req.Name, peerConnectionState) + server.logger.Infof("%s pc.OnConnectionStateChange state: %s", req.Name, peerConnectionState) switch peerConnectionState { case webrtc.PeerConnectionStateDisconnected, webrtc.PeerConnectionStateFailed, webrtc.PeerConnectionStateClosed: - ss.mu.Lock() - defer ss.mu.Unlock() + server.mu.Lock() + defer server.mu.Unlock() - if ss.isAlive { + if server.isAlive { // Dan: This conditional closing on `isAlive` is a hack to avoid a data // race. Shutting down a robot causes the PeerConnection to be closed // concurrently with this `stream.Server`. Thus, `stream.Server.Close` waiting @@ -166,21 +194,19 @@ func (ss *Server) AddStream(ctx context.Context, req *streampb.AddStreamRequest) // debt rather than architect how shutdown should holistically work. Revert this // change and run `TestRobotPeerConnect` (double check the test name at PR time) // to reproduce the race. - ss.activeBackgroundWorkers.Add(1) + server.activeBackgroundWorkers.Add(1) utils.PanicCapturingGo(func() { - defer ss.activeBackgroundWorkers.Done() - ss.mu.Lock() - defer ss.mu.Unlock() - defer delete(ss.activePeerStreams, pc) + defer server.activeBackgroundWorkers.Done() + server.mu.Lock() + defer server.mu.Unlock() + defer delete(server.activePeerStreams, pc) var errs error - for _, ps := range ss.activePeerStreams[pc] { - ctx, cancel := context.WithTimeout(context.Background(), state.UnsubscribeTimeout) - errs = multierr.Combine(errs, ps.streamState.Decrement(ctx)) - cancel() + for _, ps := range server.activePeerStreams[pc] { + errs = multierr.Combine(errs, ps.streamState.Decrement()) } // We don't want to log this if the streamState was closed (as it only happens if viam-server is terminating) if errs != nil && !errors.Is(errs, state.ErrClosed) { - ss.logger.Errorw("error(s) stopping the streamState", "errs", errs) + server.logger.Errorw("error(s) stopping the streamState", "errs", errs) } }) } @@ -192,7 +218,7 @@ func (ss *Server) AddStream(ctx context.Context, req *streampb.AddStreamRequest) return } }) - ss.activePeerStreams[pc] = nameToPeerState + server.activePeerStreams[pc] = nameToPeerState } ps, ok := nameToPeerState[req.Name] @@ -204,12 +230,14 @@ func (ss *Server) AddStream(ctx context.Context, req *streampb.AddStreamRequest) guard := rutils.NewGuard(func() { for _, sender := range ps.senders { + logging.Global().Infof("calling RemoveTrack on %s pc: %p", sender.Track().StreamID(), pc) utils.UncheckedError(pc.RemoveTrack(sender)) } }) defer guard.OnFail() addTrack := func(track webrtc.TrackLocal) error { + // logging.Global().Infof("calling AddTrack on %s pc: %p", track.StreamID(), pc) sender, err := pc.AddTrack(track) if err != nil { return err @@ -221,19 +249,19 @@ func (ss *Server) AddStream(ctx context.Context, req *streampb.AddStreamRequest) // if the stream supports video, add the video track if trackLocal, haveTrackLocal := streamStateToAdd.Stream.VideoTrackLocal(); haveTrackLocal { if err := addTrack(trackLocal); err != nil { - ss.logger.Error(err.Error()) + server.logger.Error(err.Error()) return nil, err } } // if the stream supports audio, add the audio track if trackLocal, haveTrackLocal := streamStateToAdd.Stream.AudioTrackLocal(); haveTrackLocal { if err := addTrack(trackLocal); err != nil { - ss.logger.Error(err.Error()) + server.logger.Error(err.Error()) return nil, err } } - if err := streamStateToAdd.Increment(ctx); err != nil { - ss.logger.Error(err.Error()) + if err := streamStateToAdd.Increment(); err != nil { + server.logger.Error(err.Error()) return nil, err } @@ -242,70 +270,152 @@ func (ss *Server) AddStream(ctx context.Context, req *streampb.AddStreamRequest) } // RemoveStream implements part of the StreamServiceServer. -func (ss *Server) RemoveStream(ctx context.Context, req *streampb.RemoveStreamRequest) (*streampb.RemoveStreamResponse, error) { +func (server *Server) RemoveStream(ctx context.Context, req *streampb.RemoveStreamRequest) (*streampb.RemoveStreamResponse, error) { ctx, span := trace.StartSpan(ctx, "stream::server::RemoveStream") defer span.End() pc, ok := rpc.ContextPeerConnection(ctx) + server.logger.Infow("Removing video stream", "name", req.Name, "peerConn", pc) + defer server.logger.Warnf("RemoveStream END %s", req.Name) if !ok { return nil, errors.New("can only remove a stream over a WebRTC based connection") } - ss.mu.Lock() - defer ss.mu.Unlock() + server.mu.Lock() + defer server.mu.Unlock() - streamToRemove, ok := ss.nameToStreamState[req.Name] + streamToRemove, ok := server.nameToStreamState[req.Name] + // Callers of RemoveStream will continue calling RemoveStream until it succeeds. Retrying on the + // following "stream not found" errors is not helpful in this goal. Thus we return a success + // response. if !ok { - return nil, fmt.Errorf("no stream for %q", req.Name) + return &streampb.RemoveStreamResponse{}, nil } - if _, ok := ss.activePeerStreams[pc][req.Name]; !ok { - return nil, errors.New("stream already inactive") + if _, err := streamCamera.Camera(server.robot, streamToRemove.Stream); err != nil { + return &streampb.RemoveStreamResponse{}, nil + } + + if _, ok := server.activePeerStreams[pc][req.Name]; !ok { + return &streampb.RemoveStreamResponse{}, nil } var errs error - for _, sender := range ss.activePeerStreams[pc][req.Name].senders { + for _, sender := range server.activePeerStreams[pc][req.Name].senders { + // logging.Global().Infof("calling RemoveTrack on %s pc: %p", sender.Track().StreamID(), pc) errs = multierr.Combine(errs, pc.RemoveTrack(sender)) } if errs != nil { - ss.logger.Error(errs.Error()) + server.logger.Error(errs.Error()) return nil, errs } - if err := streamToRemove.Decrement(ctx); err != nil { - ss.logger.Error(err.Error()) + if err := streamToRemove.Decrement(); err != nil { + server.logger.Error(err.Error()) return nil, err } - delete(ss.activePeerStreams[pc], req.Name) + delete(server.activePeerStreams[pc], req.Name) return &streampb.RemoveStreamResponse{}, nil } // Close closes the Server and waits for spun off goroutines to complete. -func (ss *Server) Close() error { - ss.mu.Lock() - ss.isAlive = false +func (server *Server) Close() error { + server.closedFn() + server.mu.Lock() + server.isAlive = false var errs error - for _, name := range ss.streamNames { - errs = multierr.Combine(errs, ss.nameToStreamState[name].Close()) + for _, name := range server.streamNames { + errs = multierr.Combine(errs, server.nameToStreamState[name].Close()) } if errs != nil { - ss.logger.Errorf("Stream Server Close > StreamState.Close() errs: %s", errs) + server.logger.Errorf("Stream Server Close > StreamState.Close() errs: %s", errs) } - ss.mu.Unlock() - ss.activeBackgroundWorkers.Wait() + server.mu.Unlock() + server.activeBackgroundWorkers.Wait() return errs } -func (ss *Server) add(stream gostream.Stream) error { +func (server *Server) add(stream gostream.Stream) error { streamName := stream.Name() - if _, ok := ss.nameToStreamState[streamName]; ok { + if _, ok := server.nameToStreamState[streamName]; ok { return &StreamAlreadyRegisteredError{streamName} } - newStreamState := state.New(stream, ss.r, ss.logger) - newStreamState.Init() - ss.nameToStreamState[streamName] = newStreamState - ss.streamNames = append(ss.streamNames, streamName) + logger := server.logger.Sublogger(streamName) + logger.SetLevel(logging.DEBUG) + newStreamState := state.New(stream, server.robot, logger) + server.nameToStreamState[streamName] = newStreamState + server.streamNames = append(server.streamNames, streamName) return nil } + +// startMonitorCameraAvailable monitors whether or not the camera still exists +// If it no longer exists, it: +// 1. calls RemoveTrack on all peer connection senders that +// 2. decrements the number of active peers on the stream state (which should result in the +// stream state having no subscribers and calling gostream.Stop() or rtppaserverthrough.Unsubscribe) +// streaming tracks from it,. +func (server *Server) startMonitorCameraAvailable() { + server.activeBackgroundWorkers.Add(1) + utils.ManagedGo(func() { + logger := server.logger.Sublogger("monitor") + logger.Info("Start") + defer server.logger.Info("End") + for utils.SelectContextOrWait(server.closedCtx, monitorCameraInterval) { + server.removeMissingStreams(logger) + } + }, server.activeBackgroundWorkers.Done) +} + +func (server *Server) removeMissingStreams(logger logging.Logger) { + server.mu.Lock() + defer server.mu.Unlock() + for key, streamState := range server.nameToStreamState { + // Stream names are slightly modified versions of the resource short name + camName := streamState.Stream.Name() + shortName := resource.SDPTrackNameToShortName(camName) + + _, err := camera.FromRobot(server.robot, shortName) + if !resource.IsNotFoundError(err) { + // Cameras can go through transient states during reconfigure that don't necessarily + // imply the camera is missing. E.g: *resource.notAvailableError. To double-check we + // have the right set of exceptions here, we log the error and ignore. + if err != nil { + server.logger.Warnw("Error getting camera from robot", + "camera", camName, "err", err, "errType", fmt.Sprintf("%T", err)) + } + continue + } + + // Best effort close any active peer streams. We'll remove from the known streams + // first. Such that we only try closing/unsubscribing once. + server.logger.Infow("Camera doesn't exist. Closing its streams", + "camera", camName, "err", err, "Type", fmt.Sprintf("%T", err)) + delete(server.nameToStreamState, key) + + for pc, peerStateByCamName := range server.activePeerStreams { + peerState, ok := peerStateByCamName[camName] + if !ok { + // There are no known peers for this camera. Do nothing. + server.logger.Infow("no entry in peer map", "camera", camName) + continue + } + + server.logger.Infow("unsubscribing", "camera", camName, "numSenders", len(peerState.senders)) + var errs error + for _, sender := range peerState.senders { + errs = multierr.Combine(errs, pc.RemoveTrack(sender)) + } + + if errs != nil { + server.logger.Warn(errs.Error()) + } + + if err := streamState.Decrement(); err != nil { + server.logger.Warn(err.Error()) + } + delete(server.activePeerStreams[pc], camName) + } + } +} diff --git a/robot/web/stream/state/state.go b/robot/web/stream/state/state.go index 2f68699ff55..91759cb700e 100644 --- a/robot/web/stream/state/state.go +++ b/robot/web/stream/state/state.go @@ -4,13 +4,14 @@ package state import ( "context" + "errors" "fmt" "sync" "sync/atomic" "time" "github.com/pion/rtp" - "github.com/pkg/errors" + perrors "github.com/pkg/errors" "go.uber.org/multierr" "go.viam.com/utils" @@ -19,37 +20,32 @@ import ( "go.viam.com/rdk/gostream" "go.viam.com/rdk/logging" "go.viam.com/rdk/robot" + streamCamera "go.viam.com/rdk/robot/web/stream/camera" ) var ( - subscribeRTPTimeout = time.Second * 5 - // UnsubscribeTimeout is the timeout used when unsubscribing from an rtppassthrough subscription. - UnsubscribeTimeout = time.Second * 5 // ErrRTPPassthroughNotSupported indicates that rtp_passthrough is not supported by the stream's camera. ErrRTPPassthroughNotSupported = errors.New("RTP Passthrough Not Supported") // ErrClosed indicates that the StreamState is already closed. ErrClosed = errors.New("StreamState already closed") - // ErrUninitialized indicates that Init() has not been called on StreamState prior to Increment or Decrement being called. - ErrUninitialized = errors.New("uniniialized") ) // StreamState controls the source of the RTP packets being written to the stream's subscribers // and ensures there is only one active at a time while there are subsribers. type StreamState struct { // Stream is the StreamState's stream - Stream gostream.Stream - robot robot.Robot - closedCtx context.Context - closedFn context.CancelFunc - wg sync.WaitGroup - logger logging.Logger - initialized atomic.Bool - - msgChan chan msg - restartChan chan struct{} - - activePeers int - streamSource streamSource + Stream gostream.Stream + robot robot.Robot + closedCtx context.Context + closedFn context.CancelFunc + wg sync.WaitGroup + logger logging.Logger + + msgChan chan msg + tickChan chan struct{} + + activeClients int + streamSource streamSource // streamSourceSub is only non nil if streamSource == streamSourcePassthrough streamSourceSub rtppassthrough.Subscription } @@ -63,61 +59,47 @@ func New( logger logging.Logger, ) *StreamState { ctx, cancel := context.WithCancel(context.Background()) - return &StreamState{ - Stream: stream, - closedCtx: ctx, - closedFn: cancel, - robot: r, - msgChan: make(chan msg), - restartChan: make(chan struct{}), - logger: logger, - } -} - -// Init initializes the StreamState -// Init must be called before any other methods. -func (ss *StreamState) Init() { - ss.wg.Add(1) - utils.ManagedGo(ss.initEventHandler, ss.wg.Done) - ss.wg.Add(1) - utils.ManagedGo(ss.initStreamSourceMonitor, ss.wg.Done) - ss.initialized.Store(true) + ret := &StreamState{ + Stream: stream, + closedCtx: ctx, + closedFn: cancel, + robot: r, + msgChan: make(chan msg), + tickChan: make(chan struct{}), + logger: logger, + } + + ret.wg.Add(1) + // The event handler for a stream input manages the following events: + // - There's a new subscriber (bump ref counter) + // - A subscriber has left (dec ref counter) + // - Camera + utils.ManagedGo(ret.sourceEventHandler, ret.wg.Done) + return ret } // Increment increments the peer connections subscribed to the stream. -func (ss *StreamState) Increment(ctx context.Context) error { - if !ss.initialized.Load() { - return ErrUninitialized - } - if err := ss.closedCtx.Err(); err != nil { +func (state *StreamState) Increment() error { + if err := state.closedCtx.Err(); err != nil { return multierr.Combine(ErrClosed, err) } - return ss.send(ctx, msgTypeIncrement) + return state.send(msgTypeIncrement) } // Decrement decrements the peer connections subscribed to the stream. -func (ss *StreamState) Decrement(ctx context.Context) error { - if !ss.initialized.Load() { - return ErrUninitialized - } - if err := ss.closedCtx.Err(); err != nil { +func (state *StreamState) Decrement() error { + if err := state.closedCtx.Err(); err != nil { return multierr.Combine(ErrClosed, err) } - return ss.send(ctx, msgTypeDecrement) -} - -// Restart restarts the stream source after it has terminated. -func (ss *StreamState) Restart(ctx context.Context) { - if err := ss.closedCtx.Err(); err != nil { - return - } - utils.UncheckedError(ss.send(ctx, msgTypeRestart)) + return state.send(msgTypeDecrement) } // Close closes the StreamState. -func (ss *StreamState) Close() error { - ss.closedFn() - ss.wg.Wait() +func (state *StreamState) Close() error { + state.logger.Info("Closing streamState") + defer state.logger.Info("Wait completed") + state.closedFn() + state.wg.Wait() return nil } @@ -152,7 +134,7 @@ const ( msgTypeUnknown msgType = iota msgTypeIncrement msgTypeDecrement - msgTypeRestart + msgTypeTick ) func (mt msgType) String() string { @@ -161,8 +143,8 @@ func (mt msgType) String() string { return "Increment" case msgTypeDecrement: return "Decrement" - case msgTypeRestart: - return "Restart" + case msgTypeTick: + return "Tick" case msgTypeUnknown: fallthrough default: @@ -171,296 +153,219 @@ func (mt msgType) String() string { } type msg struct { - msgType msgType - ctx context.Context - respChan chan error + msgType msgType } -// events (Inc Dec Restart). -func (ss *StreamState) initEventHandler() { - ss.logger.Debug("StreamState initEventHandler booted") - defer ss.logger.Debug("StreamState initEventHandler terminated") +// events (Inc Dec Tick). +func (state *StreamState) sourceEventHandler() { + state.logger.Debug("sourceEventHandler booted") defer func() { - ctx, cancel := context.WithTimeout(context.Background(), UnsubscribeTimeout) - defer cancel() - utils.UncheckedError(ss.stopBasedOnSub(ctx)) + state.logger.Debug("sourceEventHandler terminating") + state.stopInputStream() }() + ticker := time.NewTicker(time.Second) + defer ticker.Stop() for { - if ss.closedCtx.Err() != nil { - return - } - + var msg msg select { - case <-ss.closedCtx.Done(): + case <-state.closedCtx.Done(): return - case msg := <-ss.msgChan: - ss.handleMsg(msg) + case msg = <-state.msgChan: + break + case <-ticker.C: + state.tick() + continue } - } -} -func (ss *StreamState) initStreamSourceMonitor() { - for { - select { - case <-ss.closedCtx.Done(): - return - case <-ss.restartChan: - ctx, cancel := context.WithTimeout(ss.closedCtx, subscribeRTPTimeout) - ss.Restart(ctx) - cancel() + switch msg.msgType { + case msgTypeTick: + state.tick() + case msgTypeIncrement: + state.activeClients++ + state.logger.Debugw("activeClients incremented", "activeClientCnt", state.activeClients) + if state.activeClients == 1 { + state.tick() + } + case msgTypeDecrement: + state.activeClients-- + state.logger.Debugw("activeClients decremented", "activeClientCnt", state.activeClients) + if state.activeClients == 0 { + state.tick() + } + case msgTypeUnknown: + fallthrough + default: + state.logger.Errorw("Invalid StreamState msg type received", "type", msg.msgType) } } } -func (ss *StreamState) monitorSubscription(sub rtppassthrough.Subscription) { - if ss.streamSource == streamSourceGoStream { - ss.logger.Debugf("monitorSubscription stopping gostream %s", ss.Stream.Name()) - // if we were streaming using gostream, stop streaming using gostream as we are now using passthrough - ss.Stream.Stop() - } - ss.streamSourceSub = sub - ss.streamSource = streamSourcePassthrough - monitorSubFunc := func() { - // if the stream state is shutting down, terminate - if ss.closedCtx.Err() != nil { - return - } - +func (state *StreamState) monitorSubscription(terminatedCtx context.Context) { + select { + case <-state.closedCtx.Done(): + return + case <-terminatedCtx.Done(): select { - case <-ss.closedCtx.Done(): - return - case <-sub.Terminated.Done(): - select { - case ss.restartChan <- struct{}{}: - case <-ss.closedCtx.Done(): - } - return + case state.tickChan <- struct{}{}: + state.logger.Info("monitorSubscription sent to tickChan") + default: } + return } - - ss.wg.Add(1) - utils.ManagedGo(monitorSubFunc, ss.wg.Done) } -// caller must be holding ss.mu. -func (ss *StreamState) stopBasedOnSub(ctx context.Context) error { - switch ss.streamSource { +func (state *StreamState) stopInputStream() { + switch state.streamSource { case streamSourceGoStream: - ss.logger.Debugf("%s stopBasedOnSub stopping GoStream", ss.Stream.Name()) - ss.Stream.Stop() - ss.streamSource = streamSourceUnknown - return nil + state.logger.Debug("stopping gostream stream") + defer state.logger.Debug("gostream stopped") + state.Stream.Stop() + state.streamSource = streamSourceUnknown + return case streamSourcePassthrough: - ss.logger.Debugf("%s stopBasedOnSub stopping passthrough", ss.Stream.Name()) - err := ss.unsubscribeH264Passthrough(ctx, ss.streamSourceSub.ID) - if err != nil { - return err + state.logger.Debug("stopping h264 passthrough stream") + defer state.logger.Debug("h264 passthrough stream stopped") + err := state.unsubscribeH264Passthrough(state.closedCtx, state.streamSourceSub.ID) + if err != nil && err != camera.ErrUnknownSubscriptionID { + state.logger.Warnw("Error calling unsubscribe", "err", err) + return } - ss.streamSourceSub = rtppassthrough.NilSubscription - ss.streamSource = streamSourceUnknown - return nil - + state.streamSourceSub = rtppassthrough.NilSubscription + state.streamSource = streamSourceUnknown case streamSourceUnknown: - fallthrough default: - return nil } } -func (ss *StreamState) send(ctx context.Context, msgType msgType) error { - if err := ctx.Err(); err != nil { - return err - } - if err := ss.closedCtx.Err(); err != nil { - return err - } - msg := msg{ - ctx: ctx, - msgType: msgType, - respChan: make(chan error), - } +func (state *StreamState) send(msgType msgType) error { select { - case ss.msgChan <- msg: - select { - case err := <-msg.respChan: - return err - case <-ctx.Done(): - return ctx.Err() - case <-ss.closedCtx.Done(): - return ss.closedCtx.Err() - } - case <-ctx.Done(): - return ctx.Err() - case <-ss.closedCtx.Done(): - return ss.closedCtx.Err() - } -} - -func (ss *StreamState) handleMsg(msg msg) { - switch msg.msgType { - case msgTypeIncrement: - err := ss.inc(msg.ctx) - select { - case msg.respChan <- err: - case <-ss.closedCtx.Done(): - return - } - case msgTypeRestart: - ss.restart(msg.ctx) - select { - case msg.respChan <- nil: - case <-ss.closedCtx.Done(): - return - } - case msgTypeDecrement: - err := ss.dec(msg.ctx) - msg.respChan <- err - case msgTypeUnknown: - fallthrough - default: - ss.logger.Error("Invalid StreamState msg type received: %s", msg.msgType) + case state.msgChan <- msg{msgType: msgType}: + state.logger.Infow("pushed msg", "type", msgType) + return nil + case <-state.closedCtx.Done(): + return state.closedCtx.Err() } } -func (ss *StreamState) inc(ctx context.Context) error { - ss.logger.Debugf("increment %s START activePeers: %d", ss.Stream.Name(), ss.activePeers) - defer func() { ss.logger.Debugf("increment %s END activePeers: %d", ss.Stream.Name(), ss.activePeers) }() - if ss.activePeers == 0 { - if ss.streamSource != streamSourceUnknown { - return fmt.Errorf("unexpected stream %s source %s", ss.Stream.Name(), ss.streamSource) - } +func (state *StreamState) tick() { + switch { + case state.activeClients < 0: + state.logger.Fatal("activeClients is less than 0") + case state.activeClients == 0: + // stop stream if there are no active clients + // noop if there is no stream source + state.stopInputStream() + case state.streamSource == streamSourceUnknown: // && state.activeClients > 0 // this is the first subscription, attempt passthrough - ss.logger.CDebugw(ctx, "attempting to subscribe to rtp_passthrough", "name", ss.Stream.Name()) - err := ss.streamH264Passthrough(ctx) - if err != nil { - ss.logger.CDebugw(ctx, "rtp_passthrough not possible, falling back to GoStream", "err", err.Error(), "name", ss.Stream.Name()) - // if passthrough failed, fall back to gostream based approach - ss.Stream.Start() - ss.streamSource = streamSourceGoStream - } - ss.activePeers++ - return nil - } + state.logger.Info("attempting to subscribe to rtp_passthrough") - switch ss.streamSource { - case streamSourcePassthrough: - ss.logger.CDebugw(ctx, "continuing using rtp_passthrough", "name", ss.Stream.Name()) - // noop as we are already subscribed - case streamSourceGoStream: - ss.logger.CDebugw(ctx, "currently using gostream, trying upgrade to rtp_passthrough", "name", ss.Stream.Name()) - // attempt to cut over to passthrough - err := ss.streamH264Passthrough(ctx) + // state.Stream.VideoStreamSourceChanged() + err := state.streamH264Passthrough() if err != nil { - ss.logger.CDebugw(ctx, "rtp_passthrough not possible, continuing with gostream", "err", err.Error(), "name", ss.Stream.Name()) + state.logger.Warnw("tick: rtp_passthrough not possible, falling back to GoStream", "err", err) + // if passthrough failed, fall back to gostream based approach + state.Stream.Start() + state.streamSource = streamSourceGoStream } - case streamSourceUnknown: - fallthrough - default: - err := fmt.Errorf("%s streamSource in unexpected state %s", ss.Stream.Name(), ss.streamSource) - ss.logger.Error(err.Error()) - return err - } - ss.activePeers++ - return nil -} - -func (ss *StreamState) dec(ctx context.Context) error { - ss.logger.Debugf("decrement START %s activePeers: %d", ss.Stream.Name(), ss.activePeers) - defer func() { ss.logger.Debugf("decrement END %s activePeers: %d", ss.Stream.Name(), ss.activePeers) }() + case state.streamSource == streamSourcePassthrough && state.streamSourceSub.Terminated.Err() != nil: + // restart stream if there we were using passthrough but the sub is termianted + state.logger.Info("previous subscription terminated attempting to subscribe to rtp_passthrough") - var err error - defer func() { + // state.Stream.VideoStreamSourceChanged() + err := state.streamH264Passthrough() if err != nil { - ss.logger.Errorf("decrement %s hit error: %s", ss.Stream.Name(), err.Error()) - return - } - ss.activePeers-- - if ss.activePeers <= 0 { - ss.activePeers = 0 + state.logger.Warn("rtp_passthrough not possible, falling back to GoStream", "err", err) + // if passthrough failed, fall back to gostream based approach + state.Stream.Start() + state.streamSource = streamSourceGoStream } - }() - if ss.activePeers == 1 { - ss.logger.Debugf("decrement %s calling stopBasedOnSub", ss.Stream.Name()) - err = ss.stopBasedOnSub(ctx) + case state.streamSource == streamSourcePassthrough: + // no op if we are using passthrough & are healthy + state.logger.Debug("still healthy and using h264 passthrough") + case state.streamSource == streamSourceGoStream: + // Try to upgrade to passthrough if we are using gostream. We leave logs these as debugs as + // we expect some components to not implement rtp passthrough. + state.logger.Debugw("currently using gostream, trying upgrade to rtp_passthrough") + // attempt to cut over to passthrough + err := state.streamH264Passthrough() if err != nil { - ss.logger.Error(err.Error()) - return err + state.logger.Debugw("rtp_passthrough upgrade failed, continuing with gostream", "err", err) } } - return nil -} - -func (ss *StreamState) restart(ctx context.Context) { - ss.logger.Debugf("restart %s START activePeers: %d", ss.Stream.Name(), ss.activePeers) - defer func() { ss.logger.Debugf("restart %s END activePeers: %d", ss.Stream.Name(), ss.activePeers) }() - - if ss.activePeers == 0 { - // nothing to do if we don't have any active peers - return - } - - if ss.streamSource == streamSourceGoStream { - // nothing to do if stream source is gostream - return - } - - if ss.streamSourceSub != rtppassthrough.NilSubscription && ss.streamSourceSub.Terminated.Err() == nil { - // if the stream is still healthy, do nothing - return - } - - err := ss.streamH264Passthrough(ctx) - if err != nil { - ss.logger.CDebugw(ctx, "rtp_passthrough not possible, falling back to GoStream", "err", err.Error(), "name", ss.Stream.Name()) - // if passthrough failed, fall back to gostream based approach - ss.Stream.Start() - ss.streamSource = streamSourceGoStream - - return - } - // passthrough succeeded, listen for when subscription end and call start again if so } -func (ss *StreamState) streamH264Passthrough(ctx context.Context) error { - cam, err := camera.FromRobot(ss.robot, ss.Stream.Name()) +func (state *StreamState) streamH264Passthrough() error { + cam, err := streamCamera.Camera(state.robot, state.Stream) if err != nil { return err } + // Get the camera and see if it implements the rtp passthrough API of SubscribeRTP + Unsubscribe rtpPassthroughSource, ok := cam.(rtppassthrough.Source) if !ok { - err := fmt.Errorf("expected %s to implement rtppassthrough.Source", ss.Stream.Name()) - return errors.Wrap(ErrRTPPassthroughNotSupported, err.Error()) + err := fmt.Errorf("expected %s to implement rtppassthrough.Source", state.Stream.Name()) + return perrors.Wrap(ErrRTPPassthroughNotSupported, err.Error()) } + var count atomic.Uint64 + + // We might be already sending video via gostream. In this case we: + // - First try and create an RTP passthrough subscription + // - If not successful, continue with gostream. + // - Otherwise if successful, stop gostream. + // - Once we're sure gostream is stopped, we close the `releasePackets` channel + // + // This ensures we only start sending passthrough packets after gostream has stopped sending + // video packets. + releasePackets := make(chan struct{}) + cb := func(pkts []*rtp.Packet) { + <-releasePackets for _, pkt := range pkts { - if err := ss.Stream.WriteRTP(pkt); err != nil { - ss.logger.Debugw("stream.WriteRTP", "name", ss.Stream.Name(), "err", err.Error()) + // Also, look at unsubscribe error logs. Definitely a bug. Probably benign. + if count.Add(1)%10000 == 0 { + state.logger.Infow("WriteRTP called. Sampling 1/10000", "stream", state.Stream.Name(), "count", count.Load(), "seqNumber", pkt.Header.SequenceNumber, "ts", pkt.Header.Timestamp) + } + if err := state.Stream.WriteRTP(pkt); err != nil { + state.logger.Debugw("stream.WriteRTP", "name", state.Stream.Name(), "err", err.Error()) } } } - sub, err := rtpPassthroughSource.SubscribeRTP(ctx, rtpBufferSize, cb) + sub, err := rtpPassthroughSource.SubscribeRTP(state.closedCtx, rtpBufferSize, cb) if err != nil { - return errors.Wrap(ErrRTPPassthroughNotSupported, err.Error()) + return perrors.Wrap(ErrRTPPassthroughNotSupported, err.Error()) } - ss.logger.CWarnw(ctx, "Stream using experimental H264 passthrough", "name", ss.Stream.Name()) - ss.monitorSubscription(sub) + state.logger.Warnw("Stream using experimental H264 passthrough", "name", state.Stream.Name()) + + if state.streamSource == streamSourceGoStream { + state.logger.Debugf("monitorSubscription stopping gostream %s", state.Stream.Name()) + // We've succeeded creating a passthrough stream. If we were streaming using gostream, stop it. + state.Stream.Stop() + } + close(releasePackets) + state.streamSourceSub = sub + state.streamSource = streamSourcePassthrough + + state.wg.Add(1) + utils.ManagedGo(func() { + state.monitorSubscription(sub.Terminated) + }, state.wg.Done) return nil } -func (ss *StreamState) unsubscribeH264Passthrough(ctx context.Context, id rtppassthrough.SubscriptionID) error { - cam, err := camera.FromRobot(ss.robot, ss.Stream.Name()) +func (state *StreamState) unsubscribeH264Passthrough(ctx context.Context, id rtppassthrough.SubscriptionID) error { + cam, err := streamCamera.Camera(state.robot, state.Stream) if err != nil { return err } rtpPassthroughSource, ok := cam.(rtppassthrough.Source) if !ok { - err := fmt.Errorf("expected %s to implement rtppassthrough.Source", ss.Stream.Name()) - return errors.Wrap(ErrRTPPassthroughNotSupported, err.Error()) + err := fmt.Errorf("Subscription resource does not implement rtpPassthroughSource", + "streamName", state.Stream.Name(), "camType", fmt.Sprintf("%T", rtpPassthroughSource)) + return perrors.Wrap(ErrRTPPassthroughNotSupported, err.Error()) } if err := rtpPassthroughSource.Unsubscribe(ctx, id); err != nil { diff --git a/robot/web/stream/state/state_test.go b/robot/web/stream/state/state_test.go index ed4c5b337b5..35103505838 100644 --- a/robot/web/stream/state/state_test.go +++ b/robot/web/stream/state/state_test.go @@ -16,6 +16,7 @@ import ( "github.com/viamrobotics/webrtc/v3" "go.viam.com/test" "go.viam.com/utils" + "go.viam.com/utils/testutils" "go.viam.com/rdk/components/camera" "go.viam.com/rdk/components/camera/rtppassthrough" @@ -123,58 +124,8 @@ func mockRobot(s rtppassthrough.Source) robot.Robot { func TestStreamState(t *testing.T) { ctx := context.Background() logger := logging.NewTestLogger(t) - - t.Run("Stream returns the provided stream", func(t *testing.T) { - mockRTPPassthroughSource := &mockRTPPassthroughSource{} - robot := mockRobot(mockRTPPassthroughSource) - streamMock := &mockStream{name: camName, t: t} - s := state.New(streamMock, robot, logger) - test.That(t, s.Stream, test.ShouldEqual, streamMock) - }) - - t.Run("close succeeds if no methods have been called", func(t *testing.T) { - mockRTPPassthroughSource := &mockRTPPassthroughSource{} - robot := mockRobot(mockRTPPassthroughSource) - streamMock := &mockStream{name: camName, t: t} - s := state.New(streamMock, robot, logger) - test.That(t, s.Close(), test.ShouldBeNil) - }) - - t.Run("Increment() returns an error if Init() is not called first", func(t *testing.T) { - mockRTPPassthroughSource := &mockRTPPassthroughSource{} - robot := mockRobot(mockRTPPassthroughSource) - streamMock := &mockStream{name: camName, t: t} - s := state.New(streamMock, robot, logger) - test.That(t, s.Increment(ctx), test.ShouldBeError, state.ErrUninitialized) - }) - - t.Run("Decrement() returns an error if Init() is not called first", func(t *testing.T) { - mockRTPPassthroughSource := &mockRTPPassthroughSource{} - robot := mockRobot(mockRTPPassthroughSource) - streamMock := &mockStream{name: camName, t: t} - s := state.New(streamMock, robot, logger) - test.That(t, s.Decrement(ctx), test.ShouldBeError, state.ErrUninitialized) - }) - - t.Run("Increment() returns an error if called after Close()", func(t *testing.T) { - mockRTPPassthroughSource := &mockRTPPassthroughSource{} - robot := mockRobot(mockRTPPassthroughSource) - streamMock := &mockStream{name: camName, t: t} - s := state.New(streamMock, robot, logger) - s.Init() - s.Close() - test.That(t, s.Increment(ctx), test.ShouldWrap, state.ErrClosed) - }) - - t.Run("Decrement() returns an error if called after Close()", func(t *testing.T) { - mockRTPPassthroughSource := &mockRTPPassthroughSource{} - robot := mockRobot(mockRTPPassthroughSource) - streamMock := &mockStream{name: camName, t: t} - s := state.New(streamMock, robot, logger) - s.Init() - s.Close() - test.That(t, s.Decrement(ctx), test.ShouldWrap, state.ErrClosed) - }) + // we have to use sleep here as we are asserting the state doesn't change after a given period of time + sleepDuration := time.Millisecond * 200 t.Run("when rtppassthrough.Souce is provided but SubscribeRTP always returns an error", func(t *testing.T) { var startCount atomic.Int64 @@ -225,85 +176,90 @@ func TestStreamState(t *testing.T) { test.That(t, startCount.Load(), test.ShouldEqual, 0) test.That(t, stopCount.Load(), test.ShouldEqual, 0) - t.Log("the first Increment() calls SubscribeRTP and then calls Start() when an error is reurned") - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 1) - test.That(t, startCount.Load(), test.ShouldEqual, 1) - test.That(t, stopCount.Load(), test.ShouldEqual, 0) - - t.Log("subsequent Increment() all calls call SubscribeRTP trying to determine " + - "if they can upgrade but don't call any other gostream methods as SubscribeRTP returns an error") - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 3) - test.That(t, startCount.Load(), test.ShouldEqual, 1) - test.That(t, stopCount.Load(), test.ShouldEqual, 0) + t.Log("the first Increment() eventually calls SubscribeRTP and then calls Start() when an error is reurned") + test.That(t, s.Increment(), test.ShouldBeNil) + var prevSubscribeRTPCount int64 + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldBeGreaterThan, 0) + prevSubscribeRTPCount = subscribeRTPCount.Load() + test.That(tb, startCount.Load(), test.ShouldEqual, 1) + test.That(tb, stopCount.Load(), test.ShouldEqual, 0) + }) t.Log("as long as the number of Decrement() calls is less than the number of Increment() calls, no gostream methods are called") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 3) + test.That(t, s.Increment(), test.ShouldBeNil) + test.That(t, s.Increment(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + time.Sleep(sleepDuration) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldBeGreaterThan, prevSubscribeRTPCount) + }) + prevSubscribeRTPCount = subscribeRTPCount.Load() test.That(t, startCount.Load(), test.ShouldEqual, 1) test.That(t, stopCount.Load(), test.ShouldEqual, 0) t.Log("when the number of Decrement() calls is equal to the number of Increment() calls stop is called") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 3) + test.That(t, s.Decrement(), test.ShouldBeNil) + time.Sleep(sleepDuration) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldBeGreaterThan, prevSubscribeRTPCount) + }) + prevSubscribeRTPCount = subscribeRTPCount.Load() test.That(t, startCount.Load(), test.ShouldEqual, 1) test.That(t, stopCount.Load(), test.ShouldEqual, 1) - t.Log("then when the number of Increment() calls exceeds Decrement(), both SubscribeRTP & Start are called again") - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 4) - test.That(t, startCount.Load(), test.ShouldEqual, 2) - test.That(t, stopCount.Load(), test.ShouldEqual, 1) + t.Log("then when the number of Increment() calls exceeds Decrement(), both SubscribeRTP & Start are eventually called again") + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldBeGreaterThan, prevSubscribeRTPCount) + test.That(tb, startCount.Load(), test.ShouldEqual, 2) + test.That(tb, stopCount.Load(), test.ShouldEqual, 1) + }) + prevSubscribeRTPCount = subscribeRTPCount.Load() t.Log("calling Decrement() more times than Increment() has a floor of zero") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 4) - test.That(t, startCount.Load(), test.ShouldEqual, 2) - test.That(t, stopCount.Load(), test.ShouldEqual, 2) + test.That(t, s.Decrement(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldBeGreaterThan, prevSubscribeRTPCount) + test.That(tb, startCount.Load(), test.ShouldEqual, 2) + test.That(tb, stopCount.Load(), test.ShouldEqual, 2) + }) + prevSubscribeRTPCount = subscribeRTPCount.Load() // multiple Decrement() calls when the count is already at zero doesn't call any methods - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 4) test.That(t, startCount.Load(), test.ShouldEqual, 2) test.That(t, stopCount.Load(), test.ShouldEqual, 2) // once the count is at zero , calling Increment() again calls SubscribeRTP and when it returns an error Start - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 5) - test.That(t, startCount.Load(), test.ShouldEqual, 3) - test.That(t, stopCount.Load(), test.ShouldEqual, 2) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 5) + test.That(tb, startCount.Load(), test.ShouldEqual, 3) + test.That(tb, stopCount.Load(), test.ShouldEqual, 2) + }) // set count back to zero - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 5) - test.That(t, startCount.Load(), test.ShouldEqual, 3) - test.That(t, stopCount.Load(), test.ShouldEqual, 3) - - t.Log("calling Increment() with a cancelled context returns an error & does not call any gostream or rtppassthrough.Source methods") - canceledCtx, cancelFn := context.WithCancel(context.Background()) - cancelFn() - test.That(t, s.Increment(canceledCtx), test.ShouldBeError, context.Canceled) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 5) - test.That(t, startCount.Load(), test.ShouldEqual, 3) - test.That(t, stopCount.Load(), test.ShouldEqual, 3) - - // make it so that non cancelled Decrement() would call stop to confirm that does not happen when context is cancelled - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 6) - test.That(t, startCount.Load(), test.ShouldEqual, 4) - test.That(t, stopCount.Load(), test.ShouldEqual, 3) - - t.Log("calling Decrement() with a cancelled context returns an error & does not call any gostream methods") - test.That(t, s.Decrement(canceledCtx), test.ShouldBeError, context.Canceled) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 6) - test.That(t, startCount.Load(), test.ShouldEqual, 4) - test.That(t, stopCount.Load(), test.ShouldEqual, 3) + test.That(t, s.Decrement(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 5) + test.That(tb, startCount.Load(), test.ShouldEqual, 3) + test.That(tb, stopCount.Load(), test.ShouldEqual, 3) + }) + + // make it so that non cancelled Increment() would call stop to confirm that does not happen when context is cancelled + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 6) + test.That(tb, startCount.Load(), test.ShouldEqual, 4) + test.That(tb, stopCount.Load(), test.ShouldEqual, 3) + }) }) t.Run("when rtppassthrough.Souce is provided and SubscribeRTP doesn't return an error", func(t *testing.T) { @@ -386,75 +342,80 @@ func TestStreamState(t *testing.T) { test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) test.That(t, writeRTPCalledCtx.Err(), test.ShouldBeNil) - t.Log("the first Increment() call calls SubscribeRTP()") - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 1) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) + t.Log("the first Increment() eventually call calls SubscribeRTP()") + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 1) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 0) + }) // WriteRTP is called <-writeRTPCalledCtx.Done() t.Log("subsequent Increment() calls don't call any other rtppassthrough.Source methods") - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, s.Increment(ctx), test.ShouldBeNil) + test.That(t, s.Increment(), test.ShouldBeNil) + test.That(t, s.Increment(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 1) test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) t.Log("as long as the number of Decrement() calls is less than the number " + "of Increment() calls, no rtppassthrough.Source methods are called") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 1) test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) t.Log("when the number of Decrement() calls is equal to the number of Increment() calls stop is called") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 1) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 1) + test.That(t, s.Decrement(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 1) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 1) + }) t.Log("then when the number of Increment() calls exceeds Decrement(), SubscribeRTP is called again") - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 2) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 1) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 2) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 1) + }) t.Log("calling Decrement() more times than Increment() has a floor of zero") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 2) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 2) + test.That(t, s.Decrement(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 2) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 2) + }) // multiple Decrement() calls when the count is already at zero doesn't call any methods - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 2) test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 2) // once the count is at zero , calling Increment() again calls start - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 3) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 2) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 3) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 2) + }) // set count back to zero - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 3) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 3) - - t.Log("calling Increment() with a cancelled context returns an error & does not call any rtppassthrough.Source methods") - canceledCtx, cancelFn := context.WithCancel(context.Background()) - cancelFn() - test.That(t, s.Increment(canceledCtx), test.ShouldBeError, context.Canceled) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 3) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 3) + test.That(t, s.Decrement(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 3) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 3) + }) // make it so that non cancelled Decrement() would call stop to confirm that does not happen when context is cancelled - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 4) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 3) - - t.Log("calling Decrement() with a cancelled context returns an error & does not call any rtppassthrough.Source methods") - test.That(t, s.Decrement(canceledCtx), test.ShouldBeError, context.Canceled) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 4) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 3) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 4) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 3) + }) t.Log("when the subscription terminates while there are still subscribers, SubscribeRTP is called again") var cancelledSubs int @@ -567,15 +528,18 @@ func TestStreamState(t *testing.T) { test.That(t, stopCount.Load(), test.ShouldEqual, 0) t.Log("the first Increment() call calls SubscribeRTP() which returns a success") - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 1) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) - test.That(t, startCount.Load(), test.ShouldEqual, 0) - test.That(t, stopCount.Load(), test.ShouldEqual, 0) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 1) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 0) + test.That(tb, startCount.Load(), test.ShouldEqual, 0) + test.That(tb, stopCount.Load(), test.ShouldEqual, 0) + }) t.Log("subsequent Increment() calls don't call any other rtppassthrough.Source or gostream methods") - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, s.Increment(ctx), test.ShouldBeNil) + test.That(t, s.Increment(), test.ShouldBeNil) + test.That(t, s.Increment(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 1) test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) test.That(t, startCount.Load(), test.ShouldEqual, 0) @@ -610,8 +574,9 @@ func TestStreamState(t *testing.T) { t.Log("when the number of Decrement() calls is less than the number of " + "Increment() calls no rtppassthrough.Source or gostream methods are called") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 2) test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) test.That(t, startCount.Load(), test.ShouldEqual, 1) @@ -619,7 +584,8 @@ func TestStreamState(t *testing.T) { t.Log("when the number of Decrement() calls is equal to the number of " + "Increment() calls stop is called (as gostream is the data source)") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 2) test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) test.That(t, startCount.Load(), test.ShouldEqual, 1) @@ -627,64 +593,77 @@ func TestStreamState(t *testing.T) { t.Log("then when the number of Increment() calls exceeds Decrement(), " + "SubscribeRTP is called again followed by Start if it returns an error") - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 3) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) - test.That(t, startCount.Load(), test.ShouldEqual, 2) - test.That(t, stopCount.Load(), test.ShouldEqual, 1) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 3) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 0) + test.That(tb, startCount.Load(), test.ShouldEqual, 2) + test.That(tb, stopCount.Load(), test.ShouldEqual, 1) + }) t.Log("calling Decrement() more times than Increment() has a floor of zero and calls Stop()") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 3) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) - test.That(t, startCount.Load(), test.ShouldEqual, 2) - test.That(t, stopCount.Load(), test.ShouldEqual, 2) + test.That(t, s.Decrement(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 3) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 0) + test.That(tb, startCount.Load(), test.ShouldEqual, 2) + test.That(tb, stopCount.Load(), test.ShouldEqual, 2) + }) // multiple Decrement() calls when the count is already at zero doesn't call any methods - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 3) test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) test.That(t, startCount.Load(), test.ShouldEqual, 2) test.That(t, stopCount.Load(), test.ShouldEqual, 2) // once the count is at zero , calling Increment() again calls SubscribeRTP followed by Start - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 4) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) - test.That(t, startCount.Load(), test.ShouldEqual, 3) - test.That(t, stopCount.Load(), test.ShouldEqual, 2) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 4) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 0) + test.That(tb, startCount.Load(), test.ShouldEqual, 3) + test.That(tb, stopCount.Load(), test.ShouldEqual, 2) + }) t.Log("if while gostream is being used Increment is called and SubscribeRTP succeeds, Stop is called") subscribeRTPReturnError.Store(false) - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 5) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) - test.That(t, startCount.Load(), test.ShouldEqual, 3) - test.That(t, stopCount.Load(), test.ShouldEqual, 3) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 5) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 0) + test.That(tb, startCount.Load(), test.ShouldEqual, 3) + test.That(tb, stopCount.Load(), test.ShouldEqual, 3) + }) // calling Decrement() fewer times than Increment() doesn't call any methods - test.That(t, s.Decrement(ctx), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 5) test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) test.That(t, startCount.Load(), test.ShouldEqual, 3) test.That(t, stopCount.Load(), test.ShouldEqual, 3) t.Log("calling Decrement() more times than Increment() has a floor of zero and calls Unsubscribe()") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 5) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 1) - test.That(t, startCount.Load(), test.ShouldEqual, 3) - test.That(t, stopCount.Load(), test.ShouldEqual, 3) + test.That(t, s.Decrement(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 5) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 1) + test.That(tb, startCount.Load(), test.ShouldEqual, 3) + test.That(tb, stopCount.Load(), test.ShouldEqual, 3) + }) // multiple Decrement() calls when the count is already at zero doesn't call any methods - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 5) test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 1) test.That(t, startCount.Load(), test.ShouldEqual, 3) @@ -692,11 +671,13 @@ func TestStreamState(t *testing.T) { t.Log("if while rtp_passthrough is being used the the subscription " + "terminates & afterwards rtp_passthrough is no longer supported, Start is called") - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 6) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 1) - test.That(t, startCount.Load(), test.ShouldEqual, 3) - test.That(t, stopCount.Load(), test.ShouldEqual, 3) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 6) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 1) + test.That(tb, startCount.Load(), test.ShouldEqual, 3) + test.That(tb, stopCount.Load(), test.ShouldEqual, 3) + }) subscribeRTPReturnError.Store(true) @@ -723,11 +704,13 @@ func TestStreamState(t *testing.T) { test.That(t, stopCount.Load(), test.ShouldEqual, 3) // Decrement() calls Stop() as gostream is the data source - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 7) - test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 1) - test.That(t, startCount.Load(), test.ShouldEqual, 4) - test.That(t, stopCount.Load(), test.ShouldEqual, 4) + test.That(t, s.Decrement(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, subscribeRTPCount.Load(), test.ShouldEqual, 7) + test.That(tb, unsubscribeCount.Load(), test.ShouldEqual, 1) + test.That(tb, startCount.Load(), test.ShouldEqual, 4) + test.That(tb, stopCount.Load(), test.ShouldEqual, 4) + }) }) t.Run("when the camera does not implement rtppassthrough.Souce", func(t *testing.T) { @@ -754,70 +737,75 @@ func TestStreamState(t *testing.T) { s.Init() t.Log("the first Increment() -> Start()") - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, startCount.Load(), test.ShouldEqual, 1) - test.That(t, stopCount.Load(), test.ShouldEqual, 0) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, startCount.Load(), test.ShouldEqual, 1) + test.That(tb, stopCount.Load(), test.ShouldEqual, 0) + }) t.Log("subsequent Increment() all calls don't call any other gostream methods") - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, s.Increment(ctx), test.ShouldBeNil) + test.That(t, s.Increment(), test.ShouldBeNil) + test.That(t, s.Increment(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, startCount.Load(), test.ShouldEqual, 1) test.That(t, stopCount.Load(), test.ShouldEqual, 0) t.Log("as long as the number of Decrement() calls is less than the number of Increment() calls, no gostream methods are called") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, startCount.Load(), test.ShouldEqual, 1) test.That(t, stopCount.Load(), test.ShouldEqual, 0) t.Log("when the number of Decrement() calls is equal to the number of Increment() calls stop is called") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, startCount.Load(), test.ShouldEqual, 1) - test.That(t, stopCount.Load(), test.ShouldEqual, 1) - test.That(t, s.Increment(ctx), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, startCount.Load(), test.ShouldEqual, 1) + test.That(tb, stopCount.Load(), test.ShouldEqual, 1) + }) t.Log("then when the number of Increment() calls exceeds Decrement(), Start is called again") - test.That(t, startCount.Load(), test.ShouldEqual, 2) - test.That(t, stopCount.Load(), test.ShouldEqual, 1) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, startCount.Load(), test.ShouldEqual, 2) + test.That(tb, stopCount.Load(), test.ShouldEqual, 1) + }) t.Log("calling Decrement() more times than Increment() has a floor of zero") - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, startCount.Load(), test.ShouldEqual, 2) - test.That(t, stopCount.Load(), test.ShouldEqual, 2) + test.That(t, s.Decrement(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, startCount.Load(), test.ShouldEqual, 2) + test.That(tb, stopCount.Load(), test.ShouldEqual, 2) + }) // multiple Decrement() calls when the count is already at zero doesn't call any methods - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, s.Decrement(ctx), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + test.That(t, s.Decrement(), test.ShouldBeNil) + time.Sleep(sleepDuration) test.That(t, startCount.Load(), test.ShouldEqual, 2) test.That(t, stopCount.Load(), test.ShouldEqual, 2) // once the count is at zero , calling Increment() again calls start - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, startCount.Load(), test.ShouldEqual, 3) - test.That(t, stopCount.Load(), test.ShouldEqual, 2) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, startCount.Load(), test.ShouldEqual, 3) + test.That(tb, stopCount.Load(), test.ShouldEqual, 2) + }) // set count back to zero - test.That(t, s.Decrement(ctx), test.ShouldBeNil) - test.That(t, startCount.Load(), test.ShouldEqual, 3) - test.That(t, stopCount.Load(), test.ShouldEqual, 3) - - t.Log("calling Increment() with a cancelled context returns an error & does not call any gostream methods") - canceledCtx, cancelFn := context.WithCancel(context.Background()) - cancelFn() - test.That(t, s.Increment(canceledCtx), test.ShouldBeError, context.Canceled) - test.That(t, startCount.Load(), test.ShouldEqual, 3) - test.That(t, stopCount.Load(), test.ShouldEqual, 3) + test.That(t, s.Decrement(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, startCount.Load(), test.ShouldEqual, 3) + test.That(tb, stopCount.Load(), test.ShouldEqual, 3) + }) // make it so that non cancelled Decrement() would call stop to confirm that does not happen when context is cancelled - test.That(t, s.Increment(ctx), test.ShouldBeNil) - test.That(t, startCount.Load(), test.ShouldEqual, 4) - test.That(t, stopCount.Load(), test.ShouldEqual, 3) - - t.Log("calling Decrement() with a cancelled context returns an error & does not call any gostream methods") - test.That(t, s.Decrement(canceledCtx), test.ShouldBeError, context.Canceled) - test.That(t, startCount.Load(), test.ShouldEqual, 4) - test.That(t, stopCount.Load(), test.ShouldEqual, 3) + test.That(t, s.Increment(), test.ShouldBeNil) + testutils.WaitForAssertion(t, func(tb testing.TB) { + test.That(tb, startCount.Load(), test.ShouldEqual, 4) + test.That(tb, stopCount.Load(), test.ShouldEqual, 3) + }) }) } diff --git a/robot/web/stream/stream.go b/robot/web/stream/stream.go index da0f08ac469..d47b4464975 100644 --- a/robot/web/stream/stream.go +++ b/robot/web/stream/stream.go @@ -4,9 +4,11 @@ package webstream import ( + "bytes" "context" "errors" - "math" + "runtime" + "strconv" "time" "go.viam.com/utils" @@ -23,7 +25,7 @@ func StreamVideoSource( backoffOpts *BackoffTuningOptions, logger logging.Logger, ) error { - return gostream.StreamVideoSourceWithErrorHandler(ctx, source, stream, backoffOpts.getErrorThrottledHandler(logger), logger) + return gostream.StreamVideoSourceWithErrorHandler(ctx, source, stream, backoffOpts.getErrorThrottledHandler(logger, stream.Name()), logger) } // StreamAudioSource starts a stream from an audio source with a throttled error handler. @@ -34,7 +36,7 @@ func StreamAudioSource( backoffOpts *BackoffTuningOptions, logger logging.Logger, ) error { - return gostream.StreamAudioSourceWithErrorHandler(ctx, source, stream, backoffOpts.getErrorThrottledHandler(logger), logger) + return gostream.StreamAudioSourceWithErrorHandler(ctx, source, stream, backoffOpts.getErrorThrottledHandler(logger, "audio"), logger) } // BackoffTuningOptions represents a set of parameters for determining exponential @@ -55,38 +57,80 @@ type BackoffTuningOptions struct { Cooldown time.Duration } +var backoffSleeps []time.Duration = []time.Duration{500 * time.Millisecond, time.Second, 2 * time.Second, 5 * time.Second} + // GetSleepTimeFromErrorCount returns a sleep time from an error count. func (opts *BackoffTuningOptions) GetSleepTimeFromErrorCount(errorCount int) time.Duration { if errorCount < 1 || opts == nil { return 0 } - multiplier := math.Pow(2, float64(errorCount-1)) - uncappedSleep := opts.BaseSleep * time.Duration(multiplier) - sleep := math.Min(float64(uncappedSleep), float64(opts.MaxSleep)) - return time.Duration(sleep) + + errorCount -= 1 + if errorCount >= len(backoffSleeps) { + return backoffSleeps[len(backoffSleeps)-1] + } + + return backoffSleeps[errorCount] +} + +// This is terrible, slow, and should never be used. +func goid() int { + var ( + goroutinePrefix = []byte("goroutine ") + ) + + buf := make([]byte, 32) + n := runtime.Stack(buf, false) + buf = buf[:n] + // goroutine 1 [running]: ... + + buf, ok := bytes.CutPrefix(buf, goroutinePrefix) + if !ok { + return -1 + } + + i := bytes.IndexByte(buf, ' ') + if i < 0 { + return -1 + } + + if ret, err := strconv.Atoi(string(buf[:i])); err == nil { + return ret + } else { + return -1 + } } -func (opts *BackoffTuningOptions) getErrorThrottledHandler(logger logging.Logger) func(context.Context, error) { +func (opts *BackoffTuningOptions) getErrorThrottledHandler(logger logging.Logger, streamName string) func(context.Context, error) { var prevErr error var errorCount int lastErrTime := time.Now() return func(ctx context.Context, err error) { now := time.Now() + // logger.Infow("Backoff debug", "this", fmt.Sprintf("%p", opts), "err", err, "lastErr", prevErr, "Is?", errors.Is(prevErr, err), "count", errorCount, "cooldown", opts.Cooldown, "since", now.Sub(lastErrTime), "type", fmt.Sprintf("%T", err), "prevType", fmt.Sprintf("%T", prevErr), "ctxDone?", ctx.Err(), "goroutine", goid()) + // if ctx.Err() != nil { + // debug.PrintStack() + // } + if now.Sub(lastErrTime) > opts.Cooldown { errorCount = 0 } lastErrTime = now - if errors.Is(prevErr, err) { + switch { + case errors.Is(prevErr, err): errorCount++ - } else { + case prevErr != nil && prevErr.Error() == err.Error(): + errorCount++ + default: prevErr = err errorCount = 1 } sleep := opts.GetSleepTimeFromErrorCount(errorCount) - logger.Errorw("error getting media", "error", err, "count", errorCount, "sleep", sleep) + logger.Errorw("error getting media", "streamName", streamName, "error", err, "count", errorCount, "sleep", sleep) + utils.SelectContextOrWait(ctx, sleep) } } diff --git a/robot/web/web_c.go b/robot/web/web_c.go index 6c393629e8f..b103d9a0b25 100644 --- a/robot/web/web_c.go +++ b/robot/web/web_c.go @@ -160,7 +160,7 @@ func (svc *webService) makeStreamServer(ctx context.Context) (*StreamServer, err if len(svc.videoSources) != 0 || len(svc.audioSources) != 0 { svc.logger.Debug("not starting streams due to no stream config being set") } - noopServer, err := webstream.NewServer(streams, svc.r, logging.GetOrNewLogger("rdk.networking")) + noopServer, err := webstream.NewServer(streams, svc.r, svc.logger.Sublogger("stream")) return &StreamServer{noopServer, false}, err } @@ -215,7 +215,7 @@ func (svc *webService) makeStreamServer(ctx context.Context) (*StreamServer, err streamTypes = append(streamTypes, false) } - streamServer, err := webstream.NewServer(streams, svc.r, logging.GetOrNewLogger("rdk.networking")) + streamServer, err := webstream.NewServer(streams, svc.r, svc.logger.Sublogger("stream")) if err != nil { return nil, err } @@ -238,7 +238,7 @@ func (svc *webService) startStream(streamFunc func(opts *webstream.BackoffTuning defer svc.webWorkers.Done() close(waitCh) opts := &webstream.BackoffTuningOptions{ - BaseSleep: 50 * time.Microsecond, + BaseSleep: time.Second, MaxSleep: 2 * time.Second, Cooldown: 5 * time.Second, } diff --git a/testutils/robottestutils/robot_utils.go b/testutils/robottestutils/robot_utils.go index a68f9e40586..40597f2638d 100644 --- a/testutils/robottestutils/robot_utils.go +++ b/testutils/robottestutils/robot_utils.go @@ -119,7 +119,7 @@ func ServerAsSeparateProcess(t *testing.T, cfgFileName string, logger logging.Lo // // WaitForServing will return true if the server has started successfully in the allotted time, and // false otherwise. -//nolint +// nolint func WaitForServing(observer *observer.ObservedLogs, port int) bool { // Message:"\n\\_ 2024-02-07T20:47:03.576Z\tINFO\trobot_server\tweb/web.go:598\tserving\t{\"url\":\"http://127.0.0.1:20000\"}" successRegex := regexp.MustCompile(fmt.Sprintf("\tserving\t.*:%d\"", port)) From 25b3720f1c404f33f02ffef9887de39ab6a563f8 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Fri, 16 Aug 2024 15:28:55 -0400 Subject: [PATCH 02/14] clean up camera/client --- components/camera/client.go | 38 +++++++++---------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/components/camera/client.go b/components/camera/client.go index cb7fdc89222..d85428c9843 100644 --- a/components/camera/client.go +++ b/components/camera/client.go @@ -8,7 +8,6 @@ import ( "image" "io" "os" - "runtime/debug" "sync" "sync/atomic" "time" @@ -38,16 +37,6 @@ import ( "go.viam.com/rdk/utils" ) -var ( - red = "\033[31m" - reset = "\033[0m" -) - -// this helps make the test case much easier to read. -func (c *client) redLog(msg string) { - c.logger.Warn(red + msg + reset) -} - var ( // ErrNoPeerConnection indicates there was no peer connection. ErrNoPeerConnection = errors.New("No PeerConnection") @@ -97,7 +86,6 @@ func NewClientFromConn( streamClient := streampb.NewStreamServiceClient(conn) trackClosed := make(chan struct{}) close(trackClosed) - logger.SetLevel(logging.DEBUG) return &client{ remoteName: remoteName, Named: name.PrependRemote(remoteName).AsNamed(), @@ -370,11 +358,6 @@ func (c *client) Close(ctx context.Context) error { _, span := trace.StartSpan(ctx, "camera::client::Close") defer span.End() - c.logger.Warn("Close START") - defer c.logger.Warn("Close END") - - debug.PrintStack() - c.healthyClientChMu.Lock() if c.healthyClientCh != nil { close(c.healthyClientCh) @@ -383,7 +366,6 @@ func (c *client) Close(ctx context.Context) error { c.healthyClientChMu.Unlock() // unsubscribe from all video streams that have been established with modular cameras - c.unsubscribeAll() // NOTE: (Nick S) we are intentionally releasing the lock before we wait for @@ -415,8 +397,6 @@ func (c *client) SubscribeRTP( ) (rtppassthrough.Subscription, error) { ctx, span := trace.StartSpan(ctx, "camera::client::SubscribeRTP") defer span.End() - c.redLog(fmt.Sprintf("SubscribeRTP START %s client: %p, pc: %p", c.Name().String(), c, c.conn.PeerConn())) - defer c.redLog(fmt.Sprintf("SubscribeRTP END %s client: %p, pc: %p", c.Name().String(), c, c.conn.PeerConn())) // RSDK-6340: The resource manager closes remote resources when the underlying // connection goes bad. However, when the connection is re-established, the client // objects these resources represent are not re-initialized/marked "healthy". @@ -450,7 +430,7 @@ func (c *client) SubscribeRTP( return sub, err } g := utils.NewGuard(func() { - c.logger.Info("Error subscribing to RTP. Closing passthrough buffer.") + c.logger.CInfo(ctx, "Error subscribing to RTP. Closing passthrough buffer.") rtpPacketBuffer.Close() }) defer g.OnFail() @@ -471,7 +451,7 @@ func (c *client) SubscribeRTP( // for which video stream. The `grpc.Tracker` API is for manipulating that map. tracker, ok := c.conn.(grpc.Tracker) if !ok { - c.logger.Errorw("Client conn is not a `Tracker`", "connType", fmt.Sprintf("%T", c.conn)) + c.logger.CErrorw(ctx, "Client conn is not a `Tracker`", "connType", fmt.Sprintf("%T", c.conn)) return rtppassthrough.NilSubscription, ErrNoSharedPeerConnection } @@ -491,6 +471,8 @@ func (c *client) SubscribeRTP( // the same camera when the remote receives `RemoveStream`, it wouldn't know which to stop // sending data for. if len(c.runningStreams) == 0 { + c.logger.CInfow(ctx, "SubscribeRTP is creating a video track", + "client", fmt.Sprintf("%p", c), "peerConnection", fmt.Sprintf("%p", c.conn.PeerConn())) // A previous subscriber/track may have exited, but its resources have not necessarily been // cleaned up. We must wait for that to complete. We additionally select on other error // conditions. @@ -517,7 +499,6 @@ func (c *client) SubscribeRTP( // Remove the OnTrackSub once we either fail or succeed. defer tracker.RemoveOnTrackSub(c.trackName()) - c.redLog(fmt.Sprintf("SubscribeRTP AddStream CALL %s client: %p, pc: %p", c.trackName(), c, c.conn.PeerConn())) // Call `AddStream` on the remote. In the successful case, this will result in a // PeerConnection renegotiation to add this camera's video track and have the `OnTrack` // callback invoked. @@ -525,7 +506,6 @@ func (c *client) SubscribeRTP( c.logger.CDebugw(ctx, "SubscribeRTP AddStream hit error", "subID", sub.ID.String(), "trackName", c.trackName(), "err", err) return rtppassthrough.NilSubscription, err } - c.redLog(fmt.Sprintf("SubscribeRTP AddStream RETURN %s client: %p, pc: %p", c.trackName(), c, c.conn.PeerConn())) // TODO: Test this by adding a sleep before packets are sent. Does anything call `RemoveStream`? // @@ -537,14 +517,14 @@ func (c *client) SubscribeRTP( // To prevent that failure mode, we exit with an error if a track is not received within // the SubscribeRTP context. - c.redLog(fmt.Sprintf("SubscribeRTP waiting for track %s client: %p, pc: %p", c.trackName(), c, c.conn.PeerConn())) + c.logger.CDebugw(ctx, "SubscribeRTP waiting for track", "client", fmt.Sprintf("%p", c), "pc", fmt.Sprintf("%p", c.conn.PeerConn())) select { case <-ctx.Done(): return rtppassthrough.NilSubscription, fmt.Errorf("Track not received within SubscribeRTP provided context %w", ctx.Err()) case <-healthyClientCh: return rtppassthrough.NilSubscription, errors.New("Track not received before client closed") case <-trackReceived: - c.redLog(fmt.Sprintf("received track %s client: %p, pc: %p", c.trackName(), c, c.conn.PeerConn())) + c.logger.CDebugw(ctx, "SubscribeRTP received track data", "client", fmt.Sprintf("%p", c), "pc", fmt.Sprintf("%p", c.conn.PeerConn())) } // Set up channel to detect when the track has closed. This can happen in response to an @@ -750,14 +730,14 @@ func (c *client) unsubscribeAll() { func (c *client) unsubscribeChildrenSubs(generationId int) { c.rtpPassthroughMu.Lock() defer c.rtpPassthroughMu.Unlock() - c.logger.Debugw("client unsubscribeChildrenSubs called", "name", c.Name(), "generationId", generationId, "numSubs", len(c.associatedSubs)) - defer c.logger.Debugw("client unsubscribeChildrenSubs done", "name", c.Name(), "generationId", generationId) + c.logger.Debugw("client unsubscribeChildrenSubs called", "generationId", generationId, "numSubs", len(c.associatedSubs)) + defer c.logger.Debugw("client unsubscribeChildrenSubs done", "generationId", generationId) for _, subId := range c.associatedSubs[generationId] { bufAndCB, ok := c.runningStreams[subId] if !ok { continue } - c.logger.Debugw("stopping subscription", "name", c.Name(), "generationId", generationId, "subId", subId.String()) + c.logger.Debugw("stopping subscription", "generationId", generationId, "subId", subId.String()) delete(c.runningStreams, subId) bufAndCB.buf.Close() } From 0da90a162dab79db06bd4a16b1ee089ba8348599 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Sun, 18 Aug 2024 23:06:12 -0400 Subject: [PATCH 03/14] Clean up output --- components/camera/client_test.go | 115 ++++++++++++++++--------------- robot/client/client.go | 4 -- robot/web/stream/state/state.go | 14 ++-- 3 files changed, 63 insertions(+), 70 deletions(-) diff --git a/components/camera/client_test.go b/components/camera/client_test.go index 18756e5785a..ea7703f370e 100644 --- a/components/camera/client_test.go +++ b/components/camera/client_test.go @@ -3,7 +3,6 @@ package camera_test import ( "bytes" "context" - "fmt" "image" "image/color" "image/png" @@ -745,20 +744,7 @@ var ( Reset = "\033[0m" ) -// this helps make the test case much easier to read. -func greenLog(t *testing.T, msg string) { - t.Log(Green + msg + Reset) -} - -// this helps make the test case much easier to read. -func redLog(t *testing.T, msg string) { - t.Log(Red + msg + Reset) -} - -// Skipped due to -// https://viam.atlassian.net/browse/RSDK-7637 func TestMultiplexOverRemoteConnection(t *testing.T) { - t.Skip() logger := logging.NewTestLogger(t).Sublogger(t.Name()) remoteCfg := &config.Config{Components: []resource.Config{ @@ -785,7 +771,7 @@ func TestMultiplexOverRemoteConnection(t *testing.T) { }, }} mainCtx, mainRobot, _, mainWebSvc := setupRealRobot(t, mainCfg, logger.Sublogger("main")) - greenLog(t, "robot setup") + logger.Info("robot setup") defer mainRobot.Close(mainCtx) defer mainWebSvc.Close(mainCtx) @@ -795,7 +781,7 @@ func TestMultiplexOverRemoteConnection(t *testing.T) { image, _, err := cameraClient.Images(mainCtx) test.That(t, err, test.ShouldBeNil) test.That(t, image, test.ShouldNotBeNil) - greenLog(t, "got images") + logger.Info("got images") recvPktsCtx, recvPktsFn := context.WithCancel(context.Background()) sub, err := cameraClient.(rtppassthrough.Source).SubscribeRTP(mainCtx, 2, func(pkts []*rtp.Packet) { @@ -803,15 +789,13 @@ func TestMultiplexOverRemoteConnection(t *testing.T) { }) test.That(t, err, test.ShouldBeNil) <-recvPktsCtx.Done() - greenLog(t, "got packets") + logger.Info("got packets") err = cameraClient.(rtppassthrough.Source).Unsubscribe(mainCtx, sub.ID) test.That(t, err, test.ShouldBeNil) - greenLog(t, "unsubscribe") + logger.Info("unsubscribe") } -// Skipped due to -// https://viam.atlassian.net/browse/RSDK-7637 func TestMultiplexOverMultiHopRemoteConnection(t *testing.T) { logger := logging.NewTestLogger(t).Sublogger(t.Name()) @@ -870,7 +854,7 @@ func TestMultiplexOverMultiHopRemoteConnection(t *testing.T) { image, _, err := cameraClient.Images(mainCtx) test.That(t, err, test.ShouldBeNil) test.That(t, image, test.ShouldNotBeNil) - greenLog(t, "got images") + logger.Info("got images") time.Sleep(time.Second) @@ -880,7 +864,7 @@ func TestMultiplexOverMultiHopRemoteConnection(t *testing.T) { }) test.That(t, err, test.ShouldBeNil) <-recvPktsCtx.Done() - greenLog(t, "got packets") + logger.Info("got packets") test.That(t, cameraClient.(rtppassthrough.Source).Unsubscribe(mainCtx, sub.ID), test.ShouldBeNil) } @@ -941,7 +925,7 @@ func TestWhyMustTimeoutOnReadRTP(t *testing.T) { } mainCtx, mainRobot, _, mainWebSvc := setupRealRobot(t, mainCfg, logger.Sublogger("main")) - greenLog(t, "robot setup") + logger.Info("robot setup") defer mainRobot.Close(mainCtx) defer mainWebSvc.Close(mainCtx) @@ -951,9 +935,9 @@ func TestWhyMustTimeoutOnReadRTP(t *testing.T) { image, _, err := cameraClient.Images(mainCtx) test.That(t, err, test.ShouldBeNil) test.That(t, image, test.ShouldNotBeNil) - greenLog(t, "got images") + logger.Info("got images") - greenLog(t, fmt.Sprintf("calling SubscribeRTP on %T, %p", cameraClient, cameraClient)) + logger.Infof("calling SubscribeRTP on %T, %p", cameraClient, cameraClient) time.Sleep(time.Second) pktsChan := make(chan []*rtp.Packet) @@ -969,14 +953,14 @@ func TestWhyMustTimeoutOnReadRTP(t *testing.T) { }) test.That(t, err, test.ShouldBeNil) <-recvPktsCtx.Done() - greenLog(t, "got packets") + logger.Info("got packets") - greenLog(t, "calling close") + logger.Info("calling close") test.That(t, remoteRobot2.Close(context.Background()), test.ShouldBeNil) test.That(t, remoteWebSvc2.Close(context.Background()), test.ShouldBeNil) - greenLog(t, "close called") + logger.Info("close called") - greenLog(t, "waiting for SubscribeRTP to stop receiving packets") + logger.Info("waiting for SubscribeRTP to stop receiving packets") timeoutCtx, timeoutFn := context.WithTimeout(context.Background(), time.Second) defer timeoutFn() @@ -999,7 +983,7 @@ Loop: case <-pktsChan: continue case <-pktTimeoutCtx.Done(): - greenLog(t, fmt.Sprintf("it has been at least %s since SubscribeRTP has received a packet", pktTimeout)) + logger.Infof("it has been at least %s since SubscribeRTP has received a packet", pktTimeout) pktTimeoutFn() // https://go.dev/ref/spec#Break_statements // The 'Loop' label is needed so that we break out of the loop @@ -1011,13 +995,18 @@ Loop: } } - greenLog(t, "sleeping") + logger.Info("sleeping") time.Sleep(time.Second) // sub should still be alive test.That(t, sub.Terminated.Err(), test.ShouldBeNil) } +// Notes: +// - Ideally, we'd lower robot client reconnect timers down from 10 seconds. +// - We need to force robot client webrtc connections +// - WebRTC connections need to disable SRTP replay protection + // This tests the following scenario: // 1. main-part (main) -> remote-part-1 (r1) -> remote-part-2 (r2) where r2 has a camera // 2. the client in the main part makes an AddStream(r1:r2:rtpPassthroughCamera) request, starting a webrtc video track to be streamed from r2 -> r1 -> main -> client @@ -1025,7 +1014,6 @@ Loop: // 4. expect that r1 & main stop getting packets // 5. when the new instance of r2 comes back online main gets new rtp packets from it's track with r1 func TestGrandRemoteRebooting(t *testing.T) { - defer redLog(t, "TEST DONE") logger := logging.NewTestLogger(t).Sublogger(t.Name()) remoteCfg2 := &config.Config{ @@ -1073,7 +1061,7 @@ func TestGrandRemoteRebooting(t *testing.T) { } mainCtx, mainRobot, _, mainWebSvc := setupRealRobot(t, mainCfg, logger.Sublogger("main")) - greenLog(t, "robot setup") + logger.Info("robot setup") defer mainRobot.Close(mainCtx) defer mainWebSvc.Close(mainCtx) @@ -1083,32 +1071,37 @@ func TestGrandRemoteRebooting(t *testing.T) { image, _, err := mainCameraClient.Images(mainCtx) test.That(t, err, test.ShouldBeNil) test.That(t, image, test.ShouldNotBeNil) - greenLog(t, "got images") + logger.Info("got images") - greenLog(t, fmt.Sprintf("calling SubscribeRTP on %T, %p", mainCameraClient, mainCameraClient)) + logger.Infof("calling SubscribeRTP on %T, %p", mainCameraClient, mainCameraClient) time.Sleep(time.Second) - pktsChan := make(chan []*rtp.Packet) + pktsChan := make(chan []*rtp.Packet, 10) recvPktsCtx, recvPktsFn := context.WithCancel(context.Background()) + testDone := make(chan struct{}) + defer close(testDone) sub, err := mainCameraClient.(rtppassthrough.Source).SubscribeRTP(mainCtx, 512, func(pkts []*rtp.Packet) { // first packet recvPktsFn() // at some point packets are no longer published + lastPkt := pkts[len(pkts)-1] + logger.Info("Pushing packets: ", len(pkts), " TS:", lastPkt.Timestamp) select { + case <-testDone: case pktsChan <- pkts: - default: } + logger.Info("Pkt pushed. TS:", lastPkt.Timestamp) }) test.That(t, err, test.ShouldBeNil) <-recvPktsCtx.Done() - greenLog(t, "got packets") + logger.Info("got packets") - greenLog(t, "calling close") + logger.Info("calling close") test.That(t, remoteRobot2.Close(remote2Ctx), test.ShouldBeNil) test.That(t, remoteWebSvc2.Close(remote2Ctx), test.ShouldBeNil) - greenLog(t, "close called") + logger.Info("close called") - greenLog(t, "waiting for SubscribeRTP to stop receiving packets") + logger.Info("waiting for SubscribeRTP to stop receiving packets") timeoutCtx, timeoutFn := context.WithTimeout(context.Background(), time.Second) defer timeoutFn() @@ -1128,10 +1121,12 @@ Loop: pktTimeout := time.Millisecond * 500 pktTimeoutCtx, pktTimeoutFn = context.WithTimeout(context.Background(), pktTimeout) select { - case <-pktsChan: + case pkts := <-pktsChan: + lastPkt := pkts[len(pkts)-1] + logger.Info("First RTP packet received. TS: %v", lastPkt.Timestamp) continue case <-pktTimeoutCtx.Done(): - greenLog(t, fmt.Sprintf("it has been at least %s since SubscribeRTP has received a packet", pktTimeout)) + logger.Info("it has been at least %s since SubscribeRTP has received a packet", pktTimeout) pktTimeoutFn() // https://go.dev/ref/spec#Break_statements // The 'Loop' label is needed so that we break out of the loop @@ -1154,14 +1149,14 @@ Loop: // remote-1 which can be detectd // by the fact that sub.Terminated.Done() is always the path this test goes down - greenLog(t, fmt.Sprintf("old robot address address %s", addr2)) + logger.Info("old robot address address %s", addr2) tcpAddr, ok := options2.Network.Listener.Addr().(*net.TCPAddr) test.That(t, ok, test.ShouldBeTrue) newListener, err := net.ListenTCP("tcp", &net.TCPAddr{Port: tcpAddr.Port}) test.That(t, err, test.ShouldBeNil) options2.Network.Listener = newListener - greenLog(t, fmt.Sprintf("setting up new robot at address %s", newListener.Addr().String())) + logger.Info("setting up new robot at address %s", newListener.Addr().String()) remote2CtxSecond, remoteRobot2Second, remoteWebSvc2Second := setupRealRobotWithOptions( t, @@ -1173,16 +1168,24 @@ Loop: defer remoteWebSvc2Second.Close(remote2CtxSecond) sndPktTimeoutCtx, sndPktTimeoutFn := context.WithTimeout(context.Background(), time.Second*20) defer sndPktTimeoutFn() - select { - case <-sub.Terminated.Done(): - // Right now we are going down this path b/c main's - redLog(t, "main's sub terminated due to close") - t.FailNow() - case <-pktsChan: - // Right now we never go down this path as the test is not able to get remote1 to reconnect to the new remote 2 - greenLog(t, "SubscribeRTP got packets") - case <-sndPktTimeoutCtx.Done(): - redLog(t, "timed out waiting for SubscribeRTP to receive packets") - t.FailNow() + testPassed := false + for !testPassed { + select { + case <-sub.Terminated.Done(): + // Right now we are going down this path b/c main's + logger.Info("main's sub terminated due to close") + t.FailNow() + case pkts := <-pktsChan: + lastPkt := pkts[len(pkts)-1] + logger.Info("Test finale RTP packet received. TS: %v", lastPkt.Timestamp) + // Right now we never go down this path as the test is not able to get remote1 to reconnect to the new remote 2 + logger.Info("SubscribeRTP got packets") + testPassed = true + case <-sndPktTimeoutCtx.Done(): + logger.Info("timed out waiting for SubscribeRTP to receive packets") + t.FailNow() + case <-time.After(time.Second): + logger.Info("still waiting for RTP packets") + } } } diff --git a/robot/client/client.go b/robot/client/client.go index f7665db7873..618ba15e6e4 100644 --- a/robot/client/client.go +++ b/robot/client/client.go @@ -370,15 +370,11 @@ func (rc *RobotClient) connectWithLock(ctx context.Context) error { rc.conn.ReplaceConn(conn) rc.client = client rc.refClient = refClient - rc.logger.Warn("connected") rc.connected.Store(true) if len(rc.resourceClients) != 0 { - rc.logger.Warn("updateResources START") if err := rc.updateResources(ctx); err != nil { - rc.logger.Errorf("updateResources END, err: %s", err.Error()) return err } - rc.logger.Warnw("updateResources END", "numResources", len(rc.resourceNames)) } if rc.changeChan != nil { diff --git a/robot/web/stream/state/state.go b/robot/web/stream/state/state.go index 91759cb700e..5fb907ff967 100644 --- a/robot/web/stream/state/state.go +++ b/robot/web/stream/state/state.go @@ -11,7 +11,6 @@ import ( "time" "github.com/pion/rtp" - perrors "github.com/pkg/errors" "go.uber.org/multierr" "go.viam.com/utils" @@ -24,8 +23,6 @@ import ( ) var ( - // ErrRTPPassthroughNotSupported indicates that rtp_passthrough is not supported by the stream's camera. - ErrRTPPassthroughNotSupported = errors.New("RTP Passthrough Not Supported") // ErrClosed indicates that the StreamState is already closed. ErrClosed = errors.New("StreamState already closed") ) @@ -268,7 +265,7 @@ func (state *StreamState) tick() { state.streamSource = streamSourceGoStream } case state.streamSource == streamSourcePassthrough && state.streamSourceSub.Terminated.Err() != nil: - // restart stream if there we were using passthrough but the sub is termianted + // restart stream if there we were using passthrough but the sub is terminated state.logger.Info("previous subscription terminated attempting to subscribe to rtp_passthrough") // state.Stream.VideoStreamSourceChanged() @@ -303,8 +300,7 @@ func (state *StreamState) streamH264Passthrough() error { // Get the camera and see if it implements the rtp passthrough API of SubscribeRTP + Unsubscribe rtpPassthroughSource, ok := cam.(rtppassthrough.Source) if !ok { - err := fmt.Errorf("expected %s to implement rtppassthrough.Source", state.Stream.Name()) - return perrors.Wrap(ErrRTPPassthroughNotSupported, err.Error()) + return fmt.Errorf("Stream does not support RTP passthrough") } var count atomic.Uint64 @@ -334,7 +330,7 @@ func (state *StreamState) streamH264Passthrough() error { sub, err := rtpPassthroughSource.SubscribeRTP(state.closedCtx, rtpBufferSize, cb) if err != nil { - return perrors.Wrap(ErrRTPPassthroughNotSupported, err.Error()) + return fmt.Errorf("SubscribeRTP failed: %w", err) } state.logger.Warnw("Stream using experimental H264 passthrough", "name", state.Stream.Name()) @@ -363,9 +359,7 @@ func (state *StreamState) unsubscribeH264Passthrough(ctx context.Context, id rtp rtpPassthroughSource, ok := cam.(rtppassthrough.Source) if !ok { - err := fmt.Errorf("Subscription resource does not implement rtpPassthroughSource", - "streamName", state.Stream.Name(), "camType", fmt.Sprintf("%T", rtpPassthroughSource)) - return perrors.Wrap(ErrRTPPassthroughNotSupported, err.Error()) + return fmt.Errorf("Subscription resource does not implement rtpPassthroughSource. CamType: %T", rtpPassthroughSource) } if err := rtpPassthroughSource.Unsubscribe(ctx, id); err != nil { From 6be640ff496afe1f20e48c9ffd71395acc32e789 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Sun, 18 Aug 2024 23:09:54 -0400 Subject: [PATCH 04/14] Update goutils --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 0b3c27df582..fedd52b1d2b 100644 --- a/go.mod +++ b/go.mod @@ -87,7 +87,7 @@ require ( go.uber.org/zap v1.24.0 go.viam.com/api v0.1.330 go.viam.com/test v1.1.1-0.20220913152726-5da9916c08a2 - go.viam.com/utils v0.1.94 + go.viam.com/utils v0.1.95 goji.io v2.0.2+incompatible golang.org/x/image v0.15.0 golang.org/x/mobile v0.0.0-20240112133503-c713f31d574b diff --git a/go.sum b/go.sum index 0208b5911d9..9e962adb4b0 100644 --- a/go.sum +++ b/go.sum @@ -1545,6 +1545,8 @@ go.viam.com/test v1.1.1-0.20220913152726-5da9916c08a2 h1:oBiK580EnEIzgFLU4lHOXmG go.viam.com/test v1.1.1-0.20220913152726-5da9916c08a2/go.mod h1:XM0tej6riszsiNLT16uoyq1YjuYPWlRBweTPRDanIts= go.viam.com/utils v0.1.94 h1:fq/AosNoYd1MW4Avc7OS7gKJb03p5AGDHB+faiPswu4= go.viam.com/utils v0.1.94/go.mod h1:hQs6tzFBDRpvSM07K3qurAzASbtlgpHy3JTdRipeGsc= +go.viam.com/utils v0.1.95 h1:KLVO4cPw4u4HstinXEQxG5/zGcynh5ggd7xjnpt1sks= +go.viam.com/utils v0.1.95/go.mod h1:WwnkSrPuTZlMd1LRt+32u9ByJwbJSRZyg4vjDEgA1EU= go4.org/unsafe/assume-no-moving-gc v0.0.0-20230525183740-e7c30c78aeb2 h1:WJhcL4p+YeDxmZWg141nRm7XC8IDmhz7lk5GpadO1Sg= go4.org/unsafe/assume-no-moving-gc v0.0.0-20230525183740-e7c30c78aeb2/go.mod h1:FftLjUGFEDu5k8lt0ddY+HcrH/qU/0qk+H8j9/nTl3E= gocv.io/x/gocv v0.25.0/go.mod h1:Rar2PS6DV+T4FL+PM535EImD/h13hGVaHhnCu1xarBs= From 192e7c7f13b13499f3f6c8094c87d11104435a2c Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Sun, 18 Aug 2024 23:26:04 -0400 Subject: [PATCH 05/14] Try to appease lint --- components/camera/client.go | 6 ++--- components/camera/client_test.go | 2 +- gostream/stream.go | 2 +- robot/web/stream/server.go | 4 +-- robot/web/stream/state/state.go | 2 +- robot/web/stream/state/state_test.go | 8 ------ robot/web/stream/stream.go | 38 +--------------------------- robot/web/web_c.go | 8 +----- 8 files changed, 9 insertions(+), 61 deletions(-) diff --git a/components/camera/client.go b/components/camera/client.go index d85428c9843..6533e05b245 100644 --- a/components/camera/client.go +++ b/components/camera/client.go @@ -463,10 +463,8 @@ func (c *client) SubscribeRTP( // If we do not currently have a video stream open for this camera, we create one. Otherwise // we'll piggy back on the existing stream. We piggy-back for two reasons: - // - // 1. Not doing so would result in two webrtc tracks for the same camera sending the exact same RTP - // packets. This would needlessly waste resources. - // + // 1. Not doing so would result in two webrtc tracks for the same camera sending the exact same + // RTP packets. This would needlessly waste resources. // 2. The signature of `RemoveStream` just takes the camera name. If we had two streams open for // the same camera when the remote receives `RemoveStream`, it wouldn't know which to stop // sending data for. diff --git a/components/camera/client_test.go b/components/camera/client_test.go index ea7703f370e..1e9c61ca1b5 100644 --- a/components/camera/client_test.go +++ b/components/camera/client_test.go @@ -1012,7 +1012,7 @@ Loop: // 2. the client in the main part makes an AddStream(r1:r2:rtpPassthroughCamera) request, starting a webrtc video track to be streamed from r2 -> r1 -> main -> client // 3. r2 reboots // 4. expect that r1 & main stop getting packets -// 5. when the new instance of r2 comes back online main gets new rtp packets from it's track with r1 +// 5. when the new instance of r2 comes back online main gets new rtp packets from it's track with r1. func TestGrandRemoteRebooting(t *testing.T) { logger := logging.NewTestLogger(t).Sublogger(t.Name()) diff --git a/gostream/stream.go b/gostream/stream.go index 996612b5ee0..71d6d1100d2 100644 --- a/gostream/stream.go +++ b/gostream/stream.go @@ -168,7 +168,7 @@ func (bs *basicStream) VideoStreamSourceChanged() { // NOTE: (Nick S) This only writes video RTP packets // if we also need to support writing audio RTP packets, we should split -// this method into WriteVideoRTP and WriteAudioRTP +// this method into WriteVideoRTP and WriteAudioRTP. func (bs *basicStream) WriteRTP(pkt *rtp.Packet) error { return bs.videoTrackLocal.rtpTrack.WriteRTP(pkt) } diff --git a/robot/web/stream/server.go b/robot/web/stream/server.go index c181df278bf..4f69f9b1237 100644 --- a/robot/web/stream/server.go +++ b/robot/web/stream/server.go @@ -363,12 +363,12 @@ func (server *Server) startMonitorCameraAvailable() { logger.Info("Start") defer server.logger.Info("End") for utils.SelectContextOrWait(server.closedCtx, monitorCameraInterval) { - server.removeMissingStreams(logger) + server.removeMissingStreams() } }, server.activeBackgroundWorkers.Done) } -func (server *Server) removeMissingStreams(logger logging.Logger) { +func (server *Server) removeMissingStreams() { server.mu.Lock() defer server.mu.Unlock() for key, streamState := range server.nameToStreamState { diff --git a/robot/web/stream/state/state.go b/robot/web/stream/state/state.go index 5fb907ff967..a61d9ff1f41 100644 --- a/robot/web/stream/state/state.go +++ b/robot/web/stream/state/state.go @@ -223,7 +223,7 @@ func (state *StreamState) stopInputStream() { state.logger.Debug("stopping h264 passthrough stream") defer state.logger.Debug("h264 passthrough stream stopped") err := state.unsubscribeH264Passthrough(state.closedCtx, state.streamSourceSub.ID) - if err != nil && err != camera.ErrUnknownSubscriptionID { + if err != nil && errors.Is(err, camera.ErrUnknownSubscriptionID) { state.logger.Warnw("Error calling unsubscribe", "err", err) return } diff --git a/robot/web/stream/state/state_test.go b/robot/web/stream/state/state_test.go index 35103505838..415504cbb8a 100644 --- a/robot/web/stream/state/state_test.go +++ b/robot/web/stream/state/state_test.go @@ -170,7 +170,6 @@ func TestStreamState(t *testing.T) { robot := mockRobot(mockRTPPassthroughSource) s := state.New(streamMock, robot, logger) defer func() { utils.UncheckedError(s.Close()) }() - s.Init() test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 0) test.That(t, startCount.Load(), test.ShouldEqual, 0) @@ -336,8 +335,6 @@ func TestStreamState(t *testing.T) { s := state.New(streamMock, robot, logger) defer func() { utils.UncheckedError(s.Close()) }() - s.Init() - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 0) test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) test.That(t, writeRTPCalledCtx.Err(), test.ShouldBeNil) @@ -519,9 +516,6 @@ func TestStreamState(t *testing.T) { s := state.New(streamMock, robot, logger) defer func() { utils.UncheckedError(s.Close()) }() - // start with RTPPassthrough being supported - s.Init() - test.That(t, subscribeRTPCount.Load(), test.ShouldEqual, 0) test.That(t, unsubscribeCount.Load(), test.ShouldEqual, 0) test.That(t, startCount.Load(), test.ShouldEqual, 0) @@ -734,8 +728,6 @@ func TestStreamState(t *testing.T) { robot := mockRobot(nil) s := state.New(streamMock, robot, logger) defer func() { utils.UncheckedError(s.Close()) }() - s.Init() - t.Log("the first Increment() -> Start()") test.That(t, s.Increment(), test.ShouldBeNil) testutils.WaitForAssertion(t, func(tb testing.TB) { diff --git a/robot/web/stream/stream.go b/robot/web/stream/stream.go index d47b4464975..7c357ae8302 100644 --- a/robot/web/stream/stream.go +++ b/robot/web/stream/stream.go @@ -4,11 +4,8 @@ package webstream import ( - "bytes" "context" "errors" - "runtime" - "strconv" "time" "go.viam.com/utils" @@ -57,7 +54,7 @@ type BackoffTuningOptions struct { Cooldown time.Duration } -var backoffSleeps []time.Duration = []time.Duration{500 * time.Millisecond, time.Second, 2 * time.Second, 5 * time.Second} +var backoffSleeps = []time.Duration{500 * time.Millisecond, time.Second, 2 * time.Second, 5 * time.Second} // GetSleepTimeFromErrorCount returns a sleep time from an error count. func (opts *BackoffTuningOptions) GetSleepTimeFromErrorCount(errorCount int) time.Duration { @@ -73,34 +70,6 @@ func (opts *BackoffTuningOptions) GetSleepTimeFromErrorCount(errorCount int) tim return backoffSleeps[errorCount] } -// This is terrible, slow, and should never be used. -func goid() int { - var ( - goroutinePrefix = []byte("goroutine ") - ) - - buf := make([]byte, 32) - n := runtime.Stack(buf, false) - buf = buf[:n] - // goroutine 1 [running]: ... - - buf, ok := bytes.CutPrefix(buf, goroutinePrefix) - if !ok { - return -1 - } - - i := bytes.IndexByte(buf, ' ') - if i < 0 { - return -1 - } - - if ret, err := strconv.Atoi(string(buf[:i])); err == nil { - return ret - } else { - return -1 - } -} - func (opts *BackoffTuningOptions) getErrorThrottledHandler(logger logging.Logger, streamName string) func(context.Context, error) { var prevErr error var errorCount int @@ -108,11 +77,6 @@ func (opts *BackoffTuningOptions) getErrorThrottledHandler(logger logging.Logger return func(ctx context.Context, err error) { now := time.Now() - // logger.Infow("Backoff debug", "this", fmt.Sprintf("%p", opts), "err", err, "lastErr", prevErr, "Is?", errors.Is(prevErr, err), "count", errorCount, "cooldown", opts.Cooldown, "since", now.Sub(lastErrTime), "type", fmt.Sprintf("%T", err), "prevType", fmt.Sprintf("%T", prevErr), "ctxDone?", ctx.Err(), "goroutine", goid()) - // if ctx.Err() != nil { - // debug.PrintStack() - // } - if now.Sub(lastErrTime) > opts.Cooldown { errorCount = 0 } diff --git a/robot/web/web_c.go b/robot/web/web_c.go index b103d9a0b25..26b25395ca5 100644 --- a/robot/web/web_c.go +++ b/robot/web/web_c.go @@ -10,7 +10,6 @@ import ( "runtime" "slices" "sync" - "time" "github.com/pkg/errors" streampb "go.viam.com/api/stream/v1" @@ -237,12 +236,7 @@ func (svc *webService) startStream(streamFunc func(opts *webstream.BackoffTuning utils.PanicCapturingGo(func() { defer svc.webWorkers.Done() close(waitCh) - opts := &webstream.BackoffTuningOptions{ - BaseSleep: time.Second, - MaxSleep: 2 * time.Second, - Cooldown: 5 * time.Second, - } - if err := streamFunc(opts); err != nil { + if err := streamFunc(&webstream.BackoffTuningOptions{}); err != nil { if utils.FilterOutError(err, context.Canceled) != nil { svc.logger.Errorw("error streaming", "error", err) } From 96981d9c729c7817700045ec07064fb5edc96fee Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Sun, 18 Aug 2024 23:32:34 -0400 Subject: [PATCH 06/14] Cleanup --- robot/client/client.go | 2 -- robot/impl/resource_manager.go | 1 - robot/web/stream/server.go | 3 --- robot/web/stream/state/state.go | 3 --- 4 files changed, 9 deletions(-) diff --git a/robot/client/client.go b/robot/client/client.go index 618ba15e6e4..24a001844c7 100644 --- a/robot/client/client.go +++ b/robot/client/client.go @@ -464,7 +464,6 @@ func (rc *RobotClient) checkConnection(ctx context.Context, checkEvery, reconnec "reconnect_interval", reconnectEvery.Seconds(), ) rc.mu.Lock() - rc.logger.Warn("NOT connected") rc.connected.Store(false) if rc.changeChan != nil { rc.changeChan <- true @@ -662,7 +661,6 @@ func (rc *RobotClient) updateResources(ctx context.Context) error { // call metadata service. names, rpcAPIs, err := rc.resources(ctx) - // rc.logger.Infow("robotClient.updateResources", "numNames", len(names), "err", err, "numRc.ResourceNames", len(rc.resourceNames)) if err != nil && status.Code(err) != codes.Unimplemented { rc.logger.Infow("robotClient.updateResources -- returning error", "numRc.ResourceNames", len(rc.resourceNames)) return err diff --git a/robot/impl/resource_manager.go b/robot/impl/resource_manager.go index 8f00da6dfe1..aa61ea3b223 100644 --- a/robot/impl/resource_manager.go +++ b/robot/impl/resource_manager.go @@ -72,7 +72,6 @@ func newResourceManager( logger logging.Logger, ) *resourceManager { resLogger := logger.Sublogger("resource_manager") - resLogger.SetLevel(logging.DEBUG) return &resourceManager{ resources: resource.NewGraph(), processManager: newProcessManager(opts, logger), diff --git a/robot/web/stream/server.go b/robot/web/stream/server.go index 4f69f9b1237..2bfb0afad13 100644 --- a/robot/web/stream/server.go +++ b/robot/web/stream/server.go @@ -230,14 +230,12 @@ func (server *Server) AddStream(ctx context.Context, req *streampb.AddStreamRequ guard := rutils.NewGuard(func() { for _, sender := range ps.senders { - logging.Global().Infof("calling RemoveTrack on %s pc: %p", sender.Track().StreamID(), pc) utils.UncheckedError(pc.RemoveTrack(sender)) } }) defer guard.OnFail() addTrack := func(track webrtc.TrackLocal) error { - // logging.Global().Infof("calling AddTrack on %s pc: %p", track.StreamID(), pc) sender, err := pc.AddTrack(track) if err != nil { return err @@ -301,7 +299,6 @@ func (server *Server) RemoveStream(ctx context.Context, req *streampb.RemoveStre var errs error for _, sender := range server.activePeerStreams[pc][req.Name].senders { - // logging.Global().Infof("calling RemoveTrack on %s pc: %p", sender.Track().StreamID(), pc) errs = multierr.Combine(errs, pc.RemoveTrack(sender)) } if errs != nil { diff --git a/robot/web/stream/state/state.go b/robot/web/stream/state/state.go index a61d9ff1f41..bf6e2213f07 100644 --- a/robot/web/stream/state/state.go +++ b/robot/web/stream/state/state.go @@ -255,8 +255,6 @@ func (state *StreamState) tick() { case state.streamSource == streamSourceUnknown: // && state.activeClients > 0 // this is the first subscription, attempt passthrough state.logger.Info("attempting to subscribe to rtp_passthrough") - - // state.Stream.VideoStreamSourceChanged() err := state.streamH264Passthrough() if err != nil { state.logger.Warnw("tick: rtp_passthrough not possible, falling back to GoStream", "err", err) @@ -268,7 +266,6 @@ func (state *StreamState) tick() { // restart stream if there we were using passthrough but the sub is terminated state.logger.Info("previous subscription terminated attempting to subscribe to rtp_passthrough") - // state.Stream.VideoStreamSourceChanged() err := state.streamH264Passthrough() if err != nil { state.logger.Warn("rtp_passthrough not possible, falling back to GoStream", "err", err) From 7e6e5fa5ff33fed22c53402d3c87d91f830aa165 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Sun, 18 Aug 2024 23:36:03 -0400 Subject: [PATCH 07/14] Remove non-essentials --- components/camera/fake/camera.go | 2 - components/camera/rtppassthrough/buffer.go | 2 - gostream/stream.go | 5 -- gostream/webrtc_track.go | 91 ++-------------------- 4 files changed, 5 insertions(+), 95 deletions(-) diff --git a/components/camera/fake/camera.go b/components/camera/fake/camera.go index 73153455b02..2eef799b4c6 100644 --- a/components/camera/fake/camera.go +++ b/components/camera/fake/camera.go @@ -294,8 +294,6 @@ func (c *Camera) SubscribeRTP( bufferSize int, packetsCB rtppassthrough.PacketCallback, ) (rtppassthrough.Subscription, error) { - logging.Global().Warnf("SubscribeRTP FAKE START %s", c.Name().String()) - defer logging.Global().Warnf("SubscribeRTP FAKE END %s", c.Name().String()) if !c.RTPPassthrough { return rtppassthrough.NilSubscription, ErrRTPPassthroughNotEnabled } diff --git a/components/camera/rtppassthrough/buffer.go b/components/camera/rtppassthrough/buffer.go index 4da4bb426b0..3627a3dcdb1 100644 --- a/components/camera/rtppassthrough/buffer.go +++ b/components/camera/rtppassthrough/buffer.go @@ -93,8 +93,6 @@ func (w *Buffer) Close() { // where it will be run in the future. // If the buffer is full, it returnns an error and does // add the callback to the buffer. -// -// Dan: Public rtp.Packets and not callbacks? Until we've proved a need for more generality? func (w *Buffer) Publish(cb func()) error { rawErr := w.err.Load() diff --git a/gostream/stream.go b/gostream/stream.go index 71d6d1100d2..a270f8b2011 100644 --- a/gostream/stream.go +++ b/gostream/stream.go @@ -31,7 +31,6 @@ type Stream interface { // Start starts processing frames. Start() WriteRTP(pkt *rtp.Packet) error - // VideoStreamSourceChanged() // Ready signals that there is at least one client connected and that // streams are ready for input. The returned context should be used for @@ -162,10 +161,6 @@ func (bs *basicStream) Start() { utils.ManagedGo(bs.processOutputAudioChunks, bs.activeBackgroundWorkers.Done) } -func (bs *basicStream) VideoStreamSourceChanged() { - bs.videoTrackLocal.rtpTrack.StreamSourceChanged() -} - // NOTE: (Nick S) This only writes video RTP packets // if we also need to support writing audio RTP packets, we should split // this method into WriteVideoRTP and WriteAudioRTP. diff --git a/gostream/webrtc_track.go b/gostream/webrtc_track.go index 367cc95d503..f27ebebb1c7 100644 --- a/gostream/webrtc_track.go +++ b/gostream/webrtc_track.go @@ -4,14 +4,12 @@ import ( "math" "strings" "sync" - "sync/atomic" "time" "github.com/pion/rtp" "github.com/pion/rtp/codecs" "github.com/viamrobotics/webrtc/v3" "go.uber.org/multierr" - "go.viam.com/rdk/logging" ) // Adapted from https://github.com/pion/webrtc/blob/master/track_local_static.go @@ -36,21 +34,15 @@ type trackLocalStaticRTP struct { bindings []trackBinding codec webrtc.RTPCodecCapability id, rid, streamID string - - // testing - sequenceNumberOffset func(uint16) uint16 - streamSourceChanged atomic.Bool - highestSequenceNumber uint16 } // newtrackLocalStaticRTP returns a trackLocalStaticRTP. func newtrackLocalStaticRTP(c webrtc.RTPCodecCapability, id, streamID string) *trackLocalStaticRTP { return &trackLocalStaticRTP{ - sequenceNumberOffset: func(u uint16) uint16 { return u }, - codec: c, - bindings: []trackBinding{}, - id: id, - streamID: streamID, + codec: c, + bindings: []trackBinding{}, + id: id, + streamID: streamID, } } @@ -120,86 +112,13 @@ func (s *trackLocalStaticRTP) Codec() webrtc.RTPCodecCapability { return s.codec } -func sequenceNumberOffset(lastMaxPacketSequenceNumber uint16, firstNewPacketSequenceNumber uint16) func(uint16) uint16 { - if lastMaxPacketSequenceNumber > firstNewPacketSequenceNumber { - logging.Global().Warnf("new sequenceNumberGenerator which is not identity lastMaxPacketSequenceNumber(%d), firstNewPacketSequenceNumber(%d)", lastMaxPacketSequenceNumber, firstNewPacketSequenceNumber) - // continue with prev - return func(sequenceNumber uint16) uint16 { - return lastMaxPacketSequenceNumber + 1 + sequenceNumber - firstNewPacketSequenceNumber - } - - } - return func(u uint16) uint16 { return u } -} - -func (s *trackLocalStaticRTP) StreamSourceChanged() { - s.streamSourceChanged.Store(true) -} - -func wrapped(currentSequenceNumber uint16, hightestSequenceNumber uint16) bool { - // if the current sequence number is smaller than the currentHighestSequenceNumber by more - // than half the u16, assume that the sequence number has wrapped - if currentSequenceNumber > hightestSequenceNumber { - return false - } - return hightestSequenceNumber-currentSequenceNumber > math.MaxUint16/2 -} - -func (s *trackLocalStaticRTP) WriteRTP(p *rtp.Packet) error { - s.mu.RLock() - defer s.mu.RUnlock() - - writeErrs := []error{} - outboundPacket := *p - - for _, b := range s.bindings { - outboundPacket.Header.SSRC = uint32(b.ssrc) - outboundPacket.Header.PayloadType = uint8(b.payloadType) - if _, err := b.writeStream.WriteRTP(&outboundPacket.Header, outboundPacket.Payload); err != nil { - writeErrs = append(writeErrs, err) - } - } - - return multierr.Combine(writeErrs...) -} - // WriteRTP writes a RTP Packet to the trackLocalStaticRTP // If one PeerConnection fails the packets will still be sent to // all PeerConnections. The error message will contain the ID of the failed // PeerConnections so you can remove them. -func (s *trackLocalStaticRTP) WriteRTPModified(p *rtp.Packet) error { +func (s *trackLocalStaticRTP) WriteRTP(p *rtp.Packet) error { s.mu.RLock() defer s.mu.RUnlock() - originalSequenceNumber := p.Header.SequenceNumber - - // create new generator if the stream has changed needed - if s.streamSourceChanged.CompareAndSwap(true, false) { - // TODO: Rollover - s.sequenceNumberOffset = sequenceNumberOffset(s.highestSequenceNumber, p.Header.SequenceNumber) - } - - // Update the header's sequence number to - p.Header.SequenceNumber = s.sequenceNumberOffset(p.Header.SequenceNumber) - - // set the currentHighestSequenceNumber to the current packet's sequence number - // if the packet's sequence number is greater than the current highest sequence number - // or if the sequence number wrapped - setHighest := false - if p.Header.SequenceNumber > s.highestSequenceNumber { - setHighest = true - } - - if wrapped(p.Header.SequenceNumber, s.highestSequenceNumber) { - logging.Global().Warnf("updating highestSequenceNumber to %d due to wrapping, prevhighestSequenceNumber: %d, originalSN: %d", p.Header.SequenceNumber, s.highestSequenceNumber, originalSequenceNumber) - setHighest = true - } - - if setHighest { - s.highestSequenceNumber = p.Header.SequenceNumber - } else { - logging.Global().Warnf("publishing out of order message p.Header.SequenceNumber(%d), originalSN: %d, highestSequenceNumber: %d", p.Header.SequenceNumber, originalSequenceNumber, s.highestSequenceNumber) - } - logging.Global().Debugf("SN: %d", p.Header.SequenceNumber) writeErrs := []error{} outboundPacket := *p From 677392d0adc4f84267cc555a175639e37783a169 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Sun, 18 Aug 2024 23:45:07 -0400 Subject: [PATCH 08/14] Remove more debugging --- grpc/conn.go | 8 ++------ robot/web/stream/camera/camera.go | 3 --- robot/web/stream/server.go | 4 ---- robot/web/stream/state/state.go | 2 -- 4 files changed, 2 insertions(+), 15 deletions(-) diff --git a/grpc/conn.go b/grpc/conn.go index 4bc5f79497b..22e3deec021 100644 --- a/grpc/conn.go +++ b/grpc/conn.go @@ -62,8 +62,6 @@ func (c *ReconfigurableClientConn) NewStream( // ReplaceConn replaces the underlying client connection with the connection passed in. This does not close the // old connection, the caller is expected to close it if needed. func (c *ReconfigurableClientConn) ReplaceConn(conn rpc.ClientConn) { - logging.Global().Info("ReplaceConn START") - defer logging.Global().Info("ReplaceConn END") c.connMu.Lock() c.conn = conn // It is safe to access this without a mutex as it is only ever nil once at the beginning of the @@ -74,14 +72,12 @@ func (c *ReconfigurableClientConn) ReplaceConn(conn rpc.ClientConn) { if pc := conn.PeerConn(); pc != nil { pc.OnTrack(func(trackRemote *webrtc.TrackRemote, rtpReceiver *webrtc.RTPReceiver) { - logging.Global().Warnf("OnTrack START %s pc: %p", trackRemote.StreamID(), pc) - defer logging.Global().Warnf("OnTrack END %s pc: %p", trackRemote.StreamID(), pc) c.onTrackCBByTrackNameMu.Lock() onTrackCB, ok := c.onTrackCBByTrackName[trackRemote.StreamID()] c.onTrackCBByTrackNameMu.Unlock() if !ok { - msg := "Callback not found for StreamID (trackName): %s, keys(resOnTrackCBs): %#v" - logging.Global().Errorf(msg, trackRemote.StreamID(), maps.Keys(c.onTrackCBByTrackName)) + logging.Global().Errorf("Callback not found for StreamID (trackName): %s, keys(resOnTrackCBs): %#v", + trackRemote.StreamID(), maps.Keys(c.onTrackCBByTrackName)) return } onTrackCB(trackRemote, rtpReceiver) diff --git a/robot/web/stream/camera/camera.go b/robot/web/stream/camera/camera.go index baf07baf374..948bfc112dc 100644 --- a/robot/web/stream/camera/camera.go +++ b/robot/web/stream/camera/camera.go @@ -8,9 +8,6 @@ import ( "go.viam.com/rdk/robot" ) -// Dan: After `web/stream/state` is moved into `web/stream` -- this can be moved into -// `web/stream`. And renamed to `CameraFromStreamName` -// // Camera returns the camera from the robot (derived from the stream) or // an error if it has no camera. func Camera(robot robot.Robot, stream gostream.Stream) (camera.Camera, error) { diff --git a/robot/web/stream/server.go b/robot/web/stream/server.go index 2bfb0afad13..8b8aa5669fa 100644 --- a/robot/web/stream/server.go +++ b/robot/web/stream/server.go @@ -64,7 +64,6 @@ func NewServer( activePeerStreams: map[*webrtc.PeerConnection]map[string]*peerState{}, isAlive: true, } - server.logger.SetLevel(logging.DEBUG) for _, stream := range streams { if err := server.add(stream); err != nil { @@ -273,7 +272,6 @@ func (server *Server) RemoveStream(ctx context.Context, req *streampb.RemoveStre defer span.End() pc, ok := rpc.ContextPeerConnection(ctx) server.logger.Infow("Removing video stream", "name", req.Name, "peerConn", pc) - defer server.logger.Warnf("RemoveStream END %s", req.Name) if !ok { return nil, errors.New("can only remove a stream over a WebRTC based connection") } @@ -357,8 +355,6 @@ func (server *Server) startMonitorCameraAvailable() { server.activeBackgroundWorkers.Add(1) utils.ManagedGo(func() { logger := server.logger.Sublogger("monitor") - logger.Info("Start") - defer server.logger.Info("End") for utils.SelectContextOrWait(server.closedCtx, monitorCameraInterval) { server.removeMissingStreams() } diff --git a/robot/web/stream/state/state.go b/robot/web/stream/state/state.go index bf6e2213f07..fb471df4315 100644 --- a/robot/web/stream/state/state.go +++ b/robot/web/stream/state/state.go @@ -94,7 +94,6 @@ func (state *StreamState) Decrement() error { // Close closes the StreamState. func (state *StreamState) Close() error { state.logger.Info("Closing streamState") - defer state.logger.Info("Wait completed") state.closedFn() state.wg.Wait() return nil @@ -237,7 +236,6 @@ func (state *StreamState) stopInputStream() { func (state *StreamState) send(msgType msgType) error { select { case state.msgChan <- msg{msgType: msgType}: - state.logger.Infow("pushed msg", "type", msgType) return nil case <-state.closedCtx.Done(): return state.closedCtx.Err() From 5529876ba2ae26b0bc01543ce4194bee91718bbb Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Sun, 18 Aug 2024 23:45:40 -0400 Subject: [PATCH 09/14] a single lint error --- robot/web/stream/state/state.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/robot/web/stream/state/state.go b/robot/web/stream/state/state.go index fb471df4315..d871fecc501 100644 --- a/robot/web/stream/state/state.go +++ b/robot/web/stream/state/state.go @@ -295,7 +295,7 @@ func (state *StreamState) streamH264Passthrough() error { // Get the camera and see if it implements the rtp passthrough API of SubscribeRTP + Unsubscribe rtpPassthroughSource, ok := cam.(rtppassthrough.Source) if !ok { - return fmt.Errorf("Stream does not support RTP passthrough") + return fmt.Error("Stream does not support RTP passthrough") } var count atomic.Uint64 From eeda863b546cdf7f17ebc7b59a6f30c405f548f2 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Sun, 18 Aug 2024 23:45:54 -0400 Subject: [PATCH 10/14] a single lint error -- now compiles --- robot/web/stream/state/state.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/robot/web/stream/state/state.go b/robot/web/stream/state/state.go index d871fecc501..af630688496 100644 --- a/robot/web/stream/state/state.go +++ b/robot/web/stream/state/state.go @@ -295,7 +295,7 @@ func (state *StreamState) streamH264Passthrough() error { // Get the camera and see if it implements the rtp passthrough API of SubscribeRTP + Unsubscribe rtpPassthroughSource, ok := cam.(rtppassthrough.Source) if !ok { - return fmt.Error("Stream does not support RTP passthrough") + return error.New("Stream does not support RTP passthrough") } var count atomic.Uint64 From ad73e2d3a5fc1f64bc0d9f91ba53143d31b228fc Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Sun, 18 Aug 2024 23:46:04 -0400 Subject: [PATCH 11/14] a single lint error -- now compiles again --- robot/web/stream/state/state.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/robot/web/stream/state/state.go b/robot/web/stream/state/state.go index af630688496..3029b6354a4 100644 --- a/robot/web/stream/state/state.go +++ b/robot/web/stream/state/state.go @@ -295,7 +295,7 @@ func (state *StreamState) streamH264Passthrough() error { // Get the camera and see if it implements the rtp passthrough API of SubscribeRTP + Unsubscribe rtpPassthroughSource, ok := cam.(rtppassthrough.Source) if !ok { - return error.New("Stream does not support RTP passthrough") + return errors.New("Stream does not support RTP passthrough") } var count atomic.Uint64 From 785a5c089802c9878172f720d74f913bda208b50 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Sun, 18 Aug 2024 23:46:28 -0400 Subject: [PATCH 12/14] remove unused variable --- robot/web/stream/server.go | 1 - 1 file changed, 1 deletion(-) diff --git a/robot/web/stream/server.go b/robot/web/stream/server.go index 8b8aa5669fa..a7a918796d6 100644 --- a/robot/web/stream/server.go +++ b/robot/web/stream/server.go @@ -354,7 +354,6 @@ func (server *Server) add(stream gostream.Stream) error { func (server *Server) startMonitorCameraAvailable() { server.activeBackgroundWorkers.Add(1) utils.ManagedGo(func() { - logger := server.logger.Sublogger("monitor") for utils.SelectContextOrWait(server.closedCtx, monitorCameraInterval) { server.removeMissingStreams() } From d3387739935b723d8ed71266e8126c161bdb4ec3 Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Mon, 19 Aug 2024 13:53:09 -0400 Subject: [PATCH 13/14] Fix logging. Use test framework to fail tests. --- components/camera/client_test.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/components/camera/client_test.go b/components/camera/client_test.go index 1e9c61ca1b5..bbe9360bd5f 100644 --- a/components/camera/client_test.go +++ b/components/camera/client_test.go @@ -1123,18 +1123,17 @@ Loop: select { case pkts := <-pktsChan: lastPkt := pkts[len(pkts)-1] - logger.Info("First RTP packet received. TS: %v", lastPkt.Timestamp) + logger.Infof("First RTP packet received. TS: %v", lastPkt.Timestamp) continue case <-pktTimeoutCtx.Done(): - logger.Info("it has been at least %s since SubscribeRTP has received a packet", pktTimeout) + logger.Infow("SubscribeRTP timed out waiting for a packet. The remote is offline.", "pktTimeout", pktTimeout) pktTimeoutFn() // https://go.dev/ref/spec#Break_statements // The 'Loop' label is needed so that we break out of the loop // rather than out of the select statement break Loop case <-timeoutCtx.Done(): - t.Log("timed out waiting for SubscribeRTP packets to drain") - t.FailNow() + test.That(t, true, test.ShouldEqual, "timed out waiting for SubscribeRTP packets to drain") } } @@ -1149,7 +1148,7 @@ Loop: // remote-1 which can be detectd // by the fact that sub.Terminated.Done() is always the path this test goes down - logger.Info("old robot address address %s", addr2) + logger.Infow("old robot address", "address", addr2) tcpAddr, ok := options2.Network.Listener.Addr().(*net.TCPAddr) test.That(t, ok, test.ShouldBeTrue) newListener, err := net.ListenTCP("tcp", &net.TCPAddr{Port: tcpAddr.Port}) @@ -1173,8 +1172,7 @@ Loop: select { case <-sub.Terminated.Done(): // Right now we are going down this path b/c main's - logger.Info("main's sub terminated due to close") - t.FailNow() + test.That(t, true, test.ShouldEqual, "main's sub terminated due to close") case pkts := <-pktsChan: lastPkt := pkts[len(pkts)-1] logger.Info("Test finale RTP packet received. TS: %v", lastPkt.Timestamp) @@ -1182,8 +1180,7 @@ Loop: logger.Info("SubscribeRTP got packets") testPassed = true case <-sndPktTimeoutCtx.Done(): - logger.Info("timed out waiting for SubscribeRTP to receive packets") - t.FailNow() + test.That(t, true, test.ShouldEqual, "timed out waiting for SubscribeRTP to receive packets") case <-time.After(time.Second): logger.Info("still waiting for RTP packets") } From 3024f21688ad0621346958363c67db1a0da6a53d Mon Sep 17 00:00:00 2001 From: Dan Gottlieb Date: Mon, 19 Aug 2024 14:31:54 -0400 Subject: [PATCH 14/14] appease the linter --- components/camera/client.go | 67 +++++++++++++------------ components/camera/client_test.go | 36 +++++++------ grpc/conn.go | 3 +- logging/proto_conversions.go | 2 +- robot/web/stream/server.go | 2 +- robot/web/stream/state/state.go | 14 +++--- robot/web/stream/stream.go | 2 +- testutils/robottestutils/robot_utils.go | 2 +- 8 files changed, 68 insertions(+), 60 deletions(-) diff --git a/components/camera/client.go b/components/camera/client.go index 6533e05b245..97bf5ea982e 100644 --- a/components/camera/client.go +++ b/components/camera/client.go @@ -39,11 +39,11 @@ import ( var ( // ErrNoPeerConnection indicates there was no peer connection. - ErrNoPeerConnection = errors.New("No PeerConnection") + ErrNoPeerConnection = errors.New("no PeerConnection") // ErrNoSharedPeerConnection indicates there was no shared peer connection. - ErrNoSharedPeerConnection = errors.New("No Shared PeerConnection") + ErrNoSharedPeerConnection = errors.New("no Shared PeerConnection") // ErrUnknownSubscriptionID indicates that a SubscriptionID is unknown. - ErrUnknownSubscriptionID = errors.New("SubscriptionID Unknown") + ErrUnknownSubscriptionID = errors.New("subscriptionID Unknown") readRTPTimeout = time.Millisecond * 200 ) @@ -69,7 +69,7 @@ type client struct { rtpPassthroughMu sync.Mutex runningStreams map[rtppassthrough.SubscriptionID]bufAndCB - subGenerationId int + subGenerationID int associatedSubs map[int][]rtppassthrough.SubscriptionID trackClosed <-chan struct{} } @@ -477,7 +477,7 @@ func (c *client) SubscribeRTP( select { case <-c.trackClosed: case <-ctx.Done(): - return rtppassthrough.NilSubscription, fmt.Errorf("Track not closed within SubscribeRTP provided context %w", ctx.Err()) + return rtppassthrough.NilSubscription, fmt.Errorf("track not closed within SubscribeRTP provided context %w", ctx.Err()) case <-healthyClientCh: return rtppassthrough.NilSubscription, errors.New("camera client is closed") } @@ -489,11 +489,11 @@ func (c *client) SubscribeRTP( // We're creating a new video track. Bump the generation ID and associate all new // subscriptions to this generation. - c.subGenerationId += 1 - c.associatedSubs[c.subGenerationId] = []rtppassthrough.SubscriptionID{} + c.subGenerationID++ + c.associatedSubs[c.subGenerationID] = []rtppassthrough.SubscriptionID{} // Add the camera's addOnTrackSubFunc to the SharedConn's map of OnTrack callbacks. - tracker.AddOnTrackSub(c.trackName(), c.addOnTrackFunc(healthyClientCh, trackReceived, trackClosed, c.subGenerationId)) + tracker.AddOnTrackSub(c.trackName(), c.addOnTrackFunc(healthyClientCh, trackReceived, trackClosed, c.subGenerationID)) // Remove the OnTrackSub once we either fail or succeed. defer tracker.RemoveOnTrackSub(c.trackName()) @@ -518,9 +518,9 @@ func (c *client) SubscribeRTP( c.logger.CDebugw(ctx, "SubscribeRTP waiting for track", "client", fmt.Sprintf("%p", c), "pc", fmt.Sprintf("%p", c.conn.PeerConn())) select { case <-ctx.Done(): - return rtppassthrough.NilSubscription, fmt.Errorf("Track not received within SubscribeRTP provided context %w", ctx.Err()) + return rtppassthrough.NilSubscription, fmt.Errorf("track not received within SubscribeRTP provided context %w", ctx.Err()) case <-healthyClientCh: - return rtppassthrough.NilSubscription, errors.New("Track not received before client closed") + return rtppassthrough.NilSubscription, errors.New("track not received before client closed") case <-trackReceived: c.logger.CDebugw(ctx, "SubscribeRTP received track data", "client", fmt.Sprintf("%p", c), "pc", fmt.Sprintf("%p", c.conn.PeerConn())) } @@ -532,7 +532,7 @@ func (c *client) SubscribeRTP( } // Associate this subscription with the current generation. - c.associatedSubs[c.subGenerationId] = append(c.associatedSubs[c.subGenerationId], sub.ID) + c.associatedSubs[c.subGenerationID] = append(c.associatedSubs[c.subGenerationID], sub.ID) // Add the subscription to runningStreams. The goroutine spawned by `addOnTrackFunc` will use // this to know where to forward incoming RTP packets. @@ -551,9 +551,8 @@ func (c *client) SubscribeRTP( func (c *client) addOnTrackFunc( healthyClientCh, trackReceived, trackClosed chan struct{}, - generationId int, + generationID int, ) grpc.OnTrackCB { - // This is invoked when `PeerConnection.OnTrack` is called for this camera's stream id. return func(tr *webrtc.TrackRemote, r *webrtc.RTPReceiver) { // Our `OnTrack` was called. Inform `SubscribeRTP` that getting video data was successful. @@ -565,7 +564,7 @@ func (c *client) addOnTrackFunc( for { select { case <-healthyClientCh: - c.logger.Debugw("OnTrack callback terminating as the client is closing", "parentID", generationId) + c.logger.Debugw("OnTrack callback terminating as the client is closing", "parentID", generationID) return default: } @@ -575,27 +574,27 @@ func (c *client) addOnTrackFunc( // `ReadRTP` method. deadline := time.Now().Add(readRTPTimeout) if err := tr.SetReadDeadline(deadline); err != nil { - c.logger.Errorw("SetReadDeadline error", "generationId", generationId, "err", err) + c.logger.Errorw("SetReadDeadline error", "generationId", generationID, "err", err) return } pkt, _, err := tr.ReadRTP() if os.IsTimeout(err) { - c.logger.Debugw("ReadRTP read timeout", "generationId", generationId, + c.logger.Debugw("ReadRTP read timeout", "generationId", generationID, "err", err, "timeout", readRTPTimeout.String()) continue } if err != nil { if errors.Is(err, io.EOF) { - c.logger.Infow("ReadRTP received EOF", "generationId", generationId, "err", err) + c.logger.Infow("ReadRTP received EOF", "generationId", generationID, "err", err) } else { - c.logger.Warnw("ReadRTP error", "generationId", generationId, "err", err) + c.logger.Warnw("ReadRTP error", "generationId", generationID, "err", err) } // NOTE: (Nick S) We need to remember which subscriptions are consuming packets // from to which tr *webrtc.TrackRemote so that we can terminate the child subscriptions // when their track terminate. - c.unsubscribeChildrenSubs(generationId) + c.unsubscribeChildrenSubs(generationID) return } @@ -611,7 +610,7 @@ func (c *client) addOnTrackFunc( err := bufAndCB.buf.Publish(func() { bufAndCB.cb(pkt) }) if err != nil { c.logger.Debugw("rtp passthrough packet dropped", - "generationId", generationId, "err", err) + "generationId", generationID, "err", err) } } c.rtpPassthroughMu.Unlock() @@ -687,10 +686,12 @@ func (c *client) Unsubscribe(ctx context.Context, id rtppassthrough.Subscription case <-c.trackClosed: return nil case <-ctx.Done(): - c.logger.CWarnw(ctx, "RemoveStream successfully called, but errored waiting for stream to send an EOF", "subID", id.String(), "err", ctx.Err()) + c.logger.CWarnw(ctx, "RemoveStream successfully called, but errored waiting for stream to send an EOF", + "subID", id.String(), "err", ctx.Err()) return nil case <-healthyClientCh: - c.logger.CWarnw(ctx, "RemoveStream successfully called, but camera closed waiting for stream to send an EOF", "subID", id.String()) + c.logger.CWarnw(ctx, "RemoveStream successfully called, but camera closed waiting for stream to send an EOF", + "subID", id.String()) return nil } } @@ -717,29 +718,29 @@ func (c *client) unsubscribeAll() { defer c.rtpPassthroughMu.Unlock() if len(c.runningStreams) > 0 { // shutdown & delete all *rtppassthrough.Buffer and callbacks - for subId, bufAndCB := range c.runningStreams { - c.logger.Debugw("unsubscribeAll terminating sub", "subID", subId.String()) - delete(c.runningStreams, subId) + for subID, bufAndCB := range c.runningStreams { + c.logger.Debugw("unsubscribeAll terminating sub", "subID", subID.String()) + delete(c.runningStreams, subID) bufAndCB.buf.Close() } } } -func (c *client) unsubscribeChildrenSubs(generationId int) { +func (c *client) unsubscribeChildrenSubs(generationID int) { c.rtpPassthroughMu.Lock() defer c.rtpPassthroughMu.Unlock() - c.logger.Debugw("client unsubscribeChildrenSubs called", "generationId", generationId, "numSubs", len(c.associatedSubs)) - defer c.logger.Debugw("client unsubscribeChildrenSubs done", "generationId", generationId) - for _, subId := range c.associatedSubs[generationId] { - bufAndCB, ok := c.runningStreams[subId] + c.logger.Debugw("client unsubscribeChildrenSubs called", "generationId", generationID, "numSubs", len(c.associatedSubs)) + defer c.logger.Debugw("client unsubscribeChildrenSubs done", "generationId", generationID) + for _, subID := range c.associatedSubs[generationID] { + bufAndCB, ok := c.runningStreams[subID] if !ok { continue } - c.logger.Debugw("stopping subscription", "generationId", generationId, "subId", subId.String()) - delete(c.runningStreams, subId) + c.logger.Debugw("stopping subscription", "generationId", generationID, "subId", subID.String()) + delete(c.runningStreams, subID) bufAndCB.buf.Close() } - delete(c.associatedSubs, generationId) + delete(c.associatedSubs, generationID) } func (c *client) bufAndCBToString() string { diff --git a/components/camera/client_test.go b/components/camera/client_test.go index bbe9360bd5f..94fc4a17abc 100644 --- a/components/camera/client_test.go +++ b/components/camera/client_test.go @@ -869,10 +869,13 @@ func TestMultiplexOverMultiHopRemoteConnection(t *testing.T) { test.That(t, cameraClient.(rtppassthrough.Source).Unsubscribe(mainCtx, sub.ID), test.ShouldBeNil) } +//nolint // NOTE: These tests fail when this condition occurs: -// logger.go:130: 2024-06-17T16:56:14.097-0400 DEBUG TestGrandRemoteRebooting.remote-1.rdk:remote:/remote-2.webrtc rpc/wrtc_client_channel.go:299 no stream for id; discarding {"ch": 0, "id": 11} +// +// logger.go:130: 2024-06-17T16:56:14.097-0400 DEBUG TestGrandRemoteRebooting.remote-1.rdk:remote:/remote-2.webrtc rpc/wrtc_client_channel.go:299 no stream for id; discarding {"ch": 0, "id": 11} +// // https://github.com/viamrobotics/goutils/blob/main/rpc/wrtc_client_channel.go#L299 - +// // go test -race -v -run=TestWhyMustTimeoutOnReadRTP -timeout 10s // TestWhyMustTimeoutOnReadRTP shows that if we don't timeout on ReadRTP (and also don't call RemoveStream) on close // calling Close() on main's camera client blocks forever if there is a live SubscribeRTP subscription with a remote @@ -990,8 +993,9 @@ Loop: // rather than out of the select statement break Loop case <-timeoutCtx.Done(): - t.Log("timed out waiting for SubscribeRTP packets to drain") - t.FailNow() + // Failure case. The following assertion always fails. We use this to get a failure line + // number + error message. + test.That(t, true, test.ShouldEqual, "timed out waiting for SubscribeRTP packets to drain") } } @@ -1006,18 +1010,19 @@ Loop: // - Ideally, we'd lower robot client reconnect timers down from 10 seconds. // - We need to force robot client webrtc connections // - WebRTC connections need to disable SRTP replay protection - +// // This tests the following scenario: -// 1. main-part (main) -> remote-part-1 (r1) -> remote-part-2 (r2) where r2 has a camera -// 2. the client in the main part makes an AddStream(r1:r2:rtpPassthroughCamera) request, starting a webrtc video track to be streamed from r2 -> r1 -> main -> client -// 3. r2 reboots -// 4. expect that r1 & main stop getting packets -// 5. when the new instance of r2 comes back online main gets new rtp packets from it's track with r1. +// 1. main-part (main) -> remote-part-1 (r1) -> remote-part-2 (r2) where r2 has a camera +// 2. the client in the main part makes an AddStream(r1:r2:rtpPassthroughCamera) request, starting a +// webrtc video track to be streamed from r2 -> r1 -> main -> client +// 3. r2 reboots +// 4. expect that r1 & main stop getting packets +// 5. when the new instance of r2 comes back online main gets new rtp packets from it's track with +// r1. func TestGrandRemoteRebooting(t *testing.T) { logger := logging.NewTestLogger(t).Sublogger(t.Name()) remoteCfg2 := &config.Config{ - // Network: config.NetworkConfig{NetworkConfigData: config.NetworkConfigData{Sessions: config.SessionsConfig{HeartbeatWindow: time.Hour}}}, Components: []resource.Config{ { Name: "rtpPassthroughCamera", @@ -1035,7 +1040,6 @@ func TestGrandRemoteRebooting(t *testing.T) { remote2Ctx, remoteRobot2, remoteWebSvc2 := setupRealRobotWithOptions(t, remoteCfg2, logger.Sublogger("remote-2"), options2) remoteCfg1 := &config.Config{ - // Network: config.NetworkConfig{NetworkConfigData: config.NetworkConfigData{Sessions: config.SessionsConfig{HeartbeatWindow: time.Hour}}}, Remotes: []config.Remote{ { Name: "remote-2", @@ -1050,7 +1054,6 @@ func TestGrandRemoteRebooting(t *testing.T) { defer remoteWebSvc1.Close(remote1Ctx) mainCfg := &config.Config{ - // Network: config.NetworkConfig{NetworkConfigData: config.NetworkConfigData{Sessions: config.SessionsConfig{HeartbeatWindow: time.Hour}}}, Remotes: []config.Remote{ { Name: "remote-1", @@ -1133,6 +1136,8 @@ Loop: // rather than out of the select statement break Loop case <-timeoutCtx.Done(): + // Failure case. The following assertion always fails. We use this to get a failure line + // number + error message. test.That(t, true, test.ShouldEqual, "timed out waiting for SubscribeRTP packets to drain") } } @@ -1171,7 +1176,8 @@ Loop: for !testPassed { select { case <-sub.Terminated.Done(): - // Right now we are going down this path b/c main's + // Failure case. The following assertion always fails. We use this to get a failure line + // number + error message. test.That(t, true, test.ShouldEqual, "main's sub terminated due to close") case pkts := <-pktsChan: lastPkt := pkts[len(pkts)-1] @@ -1180,6 +1186,8 @@ Loop: logger.Info("SubscribeRTP got packets") testPassed = true case <-sndPktTimeoutCtx.Done(): + // Failure case. The following assertion always fails. We use this to get a failure line + // number + error message. test.That(t, true, test.ShouldEqual, "timed out waiting for SubscribeRTP to receive packets") case <-time.After(time.Second): logger.Info("still waiting for RTP packets") diff --git a/grpc/conn.go b/grpc/conn.go index 22e3deec021..c73221a9787 100644 --- a/grpc/conn.go +++ b/grpc/conn.go @@ -6,10 +6,11 @@ import ( "sync" "github.com/viamrobotics/webrtc/v3" - "go.viam.com/rdk/logging" "go.viam.com/utils/rpc" "golang.org/x/exp/maps" googlegrpc "google.golang.org/grpc" + + "go.viam.com/rdk/logging" ) // ReconfigurableClientConn allows for the underlying client connections to be swapped under the hood. diff --git a/logging/proto_conversions.go b/logging/proto_conversions.go index 8b4f7d5f0bf..c1e0f4995e1 100644 --- a/logging/proto_conversions.go +++ b/logging/proto_conversions.go @@ -45,7 +45,7 @@ func FieldKeyAndValueFromProto(field *structpb.Struct) (string, any, error) { // This code is modeled after zapcore.Field.AddTo: // https://github.com/uber-go/zap/blob/fcf8ee58669e358bbd6460bef5c2ee7a53c0803a/zapcore/field.go#L114 - //nolint:exhaustive + switch zf.Type { case zapcore.BoolType: fieldValue = zf.Integer == 1 diff --git a/robot/web/stream/server.go b/robot/web/stream/server.go index a7a918796d6..7bd4f1f0369 100644 --- a/robot/web/stream/server.go +++ b/robot/web/stream/server.go @@ -287,6 +287,7 @@ func (server *Server) RemoveStream(ctx context.Context, req *streampb.RemoveStre return &streampb.RemoveStreamResponse{}, nil } + //nolint:nilerr if _, err := streamCamera.Camera(server.robot, streamToRemove.Stream); err != nil { return &streampb.RemoveStreamResponse{}, nil } @@ -338,7 +339,6 @@ func (server *Server) add(stream gostream.Stream) error { } logger := server.logger.Sublogger(streamName) - logger.SetLevel(logging.DEBUG) newStreamState := state.New(stream, server.robot, logger) server.nameToStreamState[streamName] = newStreamState server.streamNames = append(server.streamNames, streamName) diff --git a/robot/web/stream/state/state.go b/robot/web/stream/state/state.go index 3029b6354a4..c0750a2eb8f 100644 --- a/robot/web/stream/state/state.go +++ b/robot/web/stream/state/state.go @@ -22,10 +22,8 @@ import ( streamCamera "go.viam.com/rdk/robot/web/stream/camera" ) -var ( - // ErrClosed indicates that the StreamState is already closed. - ErrClosed = errors.New("StreamState already closed") -) +// ErrClosed indicates that the StreamState is already closed. +var ErrClosed = errors.New("StreamState already closed") // StreamState controls the source of the RTP packets being written to the stream's subscribers // and ensures there is only one active at a time while there are subsribers. @@ -167,7 +165,6 @@ func (state *StreamState) sourceEventHandler() { case <-state.closedCtx.Done(): return case msg = <-state.msgChan: - break case <-ticker.C: state.tick() continue @@ -295,7 +292,7 @@ func (state *StreamState) streamH264Passthrough() error { // Get the camera and see if it implements the rtp passthrough API of SubscribeRTP + Unsubscribe rtpPassthroughSource, ok := cam.(rtppassthrough.Source) if !ok { - return errors.New("Stream does not support RTP passthrough") + return errors.New("stream does not support RTP passthrough") } var count atomic.Uint64 @@ -315,7 +312,8 @@ func (state *StreamState) streamH264Passthrough() error { for _, pkt := range pkts { // Also, look at unsubscribe error logs. Definitely a bug. Probably benign. if count.Add(1)%10000 == 0 { - state.logger.Infow("WriteRTP called. Sampling 1/10000", "stream", state.Stream.Name(), "count", count.Load(), "seqNumber", pkt.Header.SequenceNumber, "ts", pkt.Header.Timestamp) + state.logger.Infow("WriteRTP called. Sampling 1/10000", + "count", count.Load(), "seqNumber", pkt.Header.SequenceNumber, "ts", pkt.Header.Timestamp) } if err := state.Stream.WriteRTP(pkt); err != nil { state.logger.Debugw("stream.WriteRTP", "name", state.Stream.Name(), "err", err.Error()) @@ -354,7 +352,7 @@ func (state *StreamState) unsubscribeH264Passthrough(ctx context.Context, id rtp rtpPassthroughSource, ok := cam.(rtppassthrough.Source) if !ok { - return fmt.Errorf("Subscription resource does not implement rtpPassthroughSource. CamType: %T", rtpPassthroughSource) + return fmt.Errorf("subscription resource does not implement rtpPassthroughSource. CamType: %T", rtpPassthroughSource) } if err := rtpPassthroughSource.Unsubscribe(ctx, id); err != nil { diff --git a/robot/web/stream/stream.go b/robot/web/stream/stream.go index 7c357ae8302..49ed7879ccd 100644 --- a/robot/web/stream/stream.go +++ b/robot/web/stream/stream.go @@ -62,7 +62,7 @@ func (opts *BackoffTuningOptions) GetSleepTimeFromErrorCount(errorCount int) tim return 0 } - errorCount -= 1 + errorCount-- if errorCount >= len(backoffSleeps) { return backoffSleeps[len(backoffSleeps)-1] } diff --git a/testutils/robottestutils/robot_utils.go b/testutils/robottestutils/robot_utils.go index 40597f2638d..a68f9e40586 100644 --- a/testutils/robottestutils/robot_utils.go +++ b/testutils/robottestutils/robot_utils.go @@ -119,7 +119,7 @@ func ServerAsSeparateProcess(t *testing.T, cfgFileName string, logger logging.Lo // // WaitForServing will return true if the server has started successfully in the allotted time, and // false otherwise. -// nolint +//nolint func WaitForServing(observer *observer.ObservedLogs, port int) bool { // Message:"\n\\_ 2024-02-07T20:47:03.576Z\tINFO\trobot_server\tweb/web.go:598\tserving\t{\"url\":\"http://127.0.0.1:20000\"}" successRegex := regexp.MustCompile(fmt.Sprintf("\tserving\t.*:%d\"", port))