@@ -88,6 +88,7 @@ type localRobot struct {
88
88
// map keyed by Module.Name. This is necessary to get the package manager to use a new folder
89
89
// when a local tarball is updated.
90
90
localModuleVersions map [string ]semver.Version
91
+ startFtdcOnce sync.Once
91
92
ftdc * ftdc.FTDC
92
93
93
94
// whether the robot is actively reconfiguring
@@ -244,7 +245,13 @@ func (r *localRobot) Logger() logging.Logger {
244
245
245
246
// StartWeb starts the web server, will return an error if server is already up.
246
247
func (r * localRobot ) StartWeb (ctx context.Context , o weboptions.Options ) (err error ) {
247
- return r .webSvc .Start (ctx , o )
248
+ ret := r .webSvc .Start (ctx , o )
249
+ r .startFtdcOnce .Do (func () {
250
+ if r .ftdc != nil {
251
+ r .ftdc .Start ()
252
+ }
253
+ })
254
+ return ret
248
255
}
249
256
250
257
// StopWeb stops the web server, will be a noop if server is not up.
@@ -421,8 +428,23 @@ func newWithResources(
421
428
partID = cfg .Cloud .ID
422
429
}
423
430
// CloudID is also known as the robot part id.
431
+ //
432
+ // RSDK-9369: We create a new FTDC worker, but do not yet start it. This is because the
433
+ // `webSvc` gets registered with FTDC before we construct the underlying
434
+ // `webSvc.rpcServer`. Which happens when calling `localRobot.StartWeb`. We've postponed
435
+ // starting FTDC to when that method is called (the first time).
436
+ //
437
+ // As per the FTDC.Statser interface documentation, the return value of `webSvc.Stats` must
438
+ // always have the same schema. Otherwise we risk the ftdc "schema" getting out of sync with
439
+ // the data being written. Having `webSvc.Stats` conform to the API requirements is
440
+ // challenging when we want to include stats from the `rpcServer`.
441
+ //
442
+ // RSDK-9369 can be reverted, having the FTDC worker getting started here, when we either:
443
+ // - Relax the requirement that successive calls to `Stats` have the same schema or
444
+ // - Guarantee that the `rpcServer` is initialized (enough) when the web service is
445
+ // constructed to get a valid copy of its stats object (for the schema's sake). Even if
446
+ // the web service has not been "started".
424
447
ftdcWorker = ftdc .New (ftdc .DefaultDirectory (config .ViamDotDir , partID ), logger .Sublogger ("ftdc" ))
425
- ftdcWorker .Start ()
426
448
}
427
449
428
450
closeCtx , cancel := context .WithCancel (ctx )
0 commit comments