Skip to content

Commit 1972342

Browse files
Fix timer cancellation to be reliable in LayerImplSelect. (#22375)
* Duplicate src/system/tests/TestSystemScheduleLambda.cpp history in src/system/tests/TestSystemScheduleWork.cpp history. * Fix timer cancellation to be reliable in LayerImplSelect. To minimize risk, the changes here keep the "grab all the timers we should fire, then fire them" setup instead of switching to the "fire the timers one at a time" approach LayerImplFreeRTOS uses. The fix consists of the following parts: 1) Store the list of timers to fire in a member, so we can cancel things from that list as needed. 2) To avoid canceling things scheduled by ScheduleWork, remove the CancelTimer call in ScheduleWork. This does mean we now allow multiple timers for the same callback+appState in the timer list, if they were created by ScheduleWork, but that should be OK, since the only reason that pair needs to be unique is to allow cancellation and we never want to cancel the things ScheduleWork schedules. As a followup we should stop using the timer list for ScheduleWork altogether and use ScheduleLambda like LayerImplFreeRTOS does, but that involves fixing the unit tests that call ScheduleWork without actually running the platfor manager event loop and expect it to work somehow. TestRead was failing the sanity assert for not losing track of timers to fire, because it was spinning a nested event loop. The changes to that test stop it from doing that. Fixes project-chip/connectedhomeip#19387 Fixes project-chip/connectedhomeip#22160 * Add a unit test that timer cancelling works even for currently "in progress" timers * Address review comments. * Fix shadowing problem. * Turn off TestSystemScheduleWork on esp32 QEMU for now. * Turn off TestSystemScheduleWork on the fake platform too. The fake platform's event loop does not actually process the SystemLayer events. Co-authored-by: Andrei Litvin <[email protected]>
1 parent 9d1aabd commit 1972342

8 files changed

+385
-22
lines changed

src/controller/tests/data_model/TestRead.cpp

+33-15
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,11 @@ class TestReadInteraction : public app::ReadHandler::ApplicationCallback
298298
// succeed.
299299
static void MultipleReadHelper(nlTestSuite * apSuite, TestContext & aCtx, size_t aReadCount);
300300

301+
// Helper for MultipleReadHelper that does not spin the event loop, so we
302+
// don't end up with nested event loops.
303+
static void MultipleReadHelperInternal(nlTestSuite * apSuite, TestContext & aCtx, size_t aReadCount,
304+
uint32_t & aNumSuccessCalls, uint32_t & aNumFailureCalls);
305+
301306
// Establish the given number of subscriptions, then issue the given number
302307
// of reads in parallel and wait for them all to succeed.
303308
static void SubscribeThenReadHelper(nlTestSuite * apSuite, TestContext & aCtx, size_t aSubscribeCount, size_t aReadCount);
@@ -2484,6 +2489,9 @@ void TestReadInteraction::SubscribeThenReadHelper(nlTestSuite * apSuite, TestCon
24842489
uint32_t numSuccessCalls = 0;
24852490
uint32_t numSubscriptionEstablishedCalls = 0;
24862491

2492+
uint32_t numReadSuccessCalls = 0;
2493+
uint32_t numReadFailureCalls = 0;
2494+
24872495
responseDirective = kSendDataResponse;
24882496

24892497
// Passing of stack variables by reference is only safe because of synchronous completion of the interaction. Otherwise, it's
@@ -2501,12 +2509,12 @@ void TestReadInteraction::SubscribeThenReadHelper(nlTestSuite * apSuite, TestCon
25012509
NL_TEST_ASSERT(apSuite, false);
25022510
};
25032511

2504-
auto onSubscriptionEstablishedCb = [&numSubscriptionEstablishedCalls, &apSuite, &aCtx, aSubscribeCount,
2505-
aReadCount](const app::ReadClient & readClient) {
2512+
auto onSubscriptionEstablishedCb = [&numSubscriptionEstablishedCalls, &apSuite, &aCtx, aSubscribeCount, aReadCount,
2513+
&numReadSuccessCalls, &numReadFailureCalls](const app::ReadClient & readClient) {
25062514
numSubscriptionEstablishedCalls++;
25072515
if (numSubscriptionEstablishedCalls == aSubscribeCount)
25082516
{
2509-
MultipleReadHelper(apSuite, aCtx, aReadCount);
2517+
MultipleReadHelperInternal(apSuite, aCtx, aReadCount, numReadSuccessCalls, numReadFailureCalls);
25102518
}
25112519
};
25122520

@@ -2522,40 +2530,50 @@ void TestReadInteraction::SubscribeThenReadHelper(nlTestSuite * apSuite, TestCon
25222530

25232531
NL_TEST_ASSERT(apSuite, numSuccessCalls == aSubscribeCount);
25242532
NL_TEST_ASSERT(apSuite, numSubscriptionEstablishedCalls == aSubscribeCount);
2533+
NL_TEST_ASSERT(apSuite, numReadSuccessCalls == aReadCount);
2534+
NL_TEST_ASSERT(apSuite, numReadFailureCalls == 0);
25252535
}
25262536

2527-
void TestReadInteraction::MultipleReadHelper(nlTestSuite * apSuite, TestContext & aCtx, size_t aReadCount)
2537+
// The guts of MultipleReadHelper which take references to the success/failure
2538+
// counts to modify and assume the consumer will be spinning the event loop.
2539+
void TestReadInteraction::MultipleReadHelperInternal(nlTestSuite * apSuite, TestContext & aCtx, size_t aReadCount,
2540+
uint32_t & aNumSuccessCalls, uint32_t & aNumFailureCalls)
25282541
{
2529-
auto sessionHandle = aCtx.GetSessionBobToAlice();
2530-
uint32_t numSuccessCalls = 0;
2531-
uint32_t numFailureCalls = 0;
2542+
NL_TEST_ASSERT(apSuite, aNumSuccessCalls == 0);
2543+
NL_TEST_ASSERT(apSuite, aNumFailureCalls == 0);
2544+
2545+
auto sessionHandle = aCtx.GetSessionBobToAlice();
25322546

25332547
responseDirective = kSendDataResponse;
25342548

25352549
uint16_t firstExpectedResponse = totalReadCount + 1;
25362550

2537-
// Passing of stack variables by reference is only safe because of synchronous completion of the interaction. Otherwise, it's
2538-
// not safe to do so.
2539-
auto onFailureCb = [&apSuite, &numFailureCalls](const app::ConcreteDataAttributePath * attributePath, CHIP_ERROR aError) {
2540-
numFailureCalls++;
2551+
auto onFailureCb = [apSuite, &aNumFailureCalls](const app::ConcreteDataAttributePath * attributePath, CHIP_ERROR aError) {
2552+
aNumFailureCalls++;
25412553

25422554
NL_TEST_ASSERT(apSuite, attributePath == nullptr);
25432555
};
25442556

25452557
for (size_t i = 0; i < aReadCount; ++i)
25462558
{
2547-
// Passing of stack variables by reference is only safe because of synchronous completion of the interaction. Otherwise,
2548-
// it's not safe to do so.
2549-
auto onSuccessCb = [&numSuccessCalls, &apSuite, firstExpectedResponse,
2559+
auto onSuccessCb = [&aNumSuccessCalls, apSuite, firstExpectedResponse,
25502560
i](const app::ConcreteDataAttributePath & attributePath, const auto & dataResponse) {
25512561
NL_TEST_ASSERT(apSuite, dataResponse == firstExpectedResponse + i);
2552-
numSuccessCalls++;
2562+
aNumSuccessCalls++;
25532563
};
25542564

25552565
NL_TEST_ASSERT(apSuite,
25562566
Controller::ReadAttribute<TestCluster::Attributes::Int16u::TypeInfo>(
25572567
&aCtx.GetExchangeManager(), sessionHandle, kTestEndpointId, onSuccessCb, onFailureCb) == CHIP_NO_ERROR);
25582568
}
2569+
}
2570+
2571+
void TestReadInteraction::MultipleReadHelper(nlTestSuite * apSuite, TestContext & aCtx, size_t aReadCount)
2572+
{
2573+
uint32_t numSuccessCalls = 0;
2574+
uint32_t numFailureCalls = 0;
2575+
2576+
MultipleReadHelperInternal(apSuite, aCtx, aReadCount, numSuccessCalls, numFailureCalls);
25592577

25602578
aCtx.DrainAndServiceIO();
25612579

src/system/SystemLayerImplFreeRTOS.cpp

+23-1
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,32 @@ CHIP_ERROR LayerImplFreeRTOS::ScheduleWork(TimerCompleteCallback onComplete, voi
8888
{
8989
VerifyOrReturnError(mLayerState.IsInitialized(), CHIP_ERROR_INCORRECT_STATE);
9090

91+
// Ideally we would not use a timer here at all, but if we try to just
92+
// ScheduleLambda the lambda needs to capture the following:
93+
// 1) onComplete
94+
// 2) appState
95+
// 3) The `this` pointer, because onComplete needs to be passed a pointer to
96+
// the System::Layer.
97+
//
98+
// On a 64-bit system that's 24 bytes, but lambdas passed to ScheduleLambda
99+
// are capped at CHIP_CONFIG_LAMBDA_EVENT_SIZE which is 16 bytes.
100+
//
101+
// So for now use a timer as a poor-man's closure that captures `this` and
102+
// onComplete and appState in a single pointer, so we fit inside the size
103+
// limit.
104+
//
105+
// TODO: We could do something here where we compile-time condition on the
106+
// sizes of things and use a direct ScheduleLambda if it would fit and this
107+
// setup otherwise.
91108
TimerList::Node * timer = mTimerPool.Create(*this, SystemClock().GetMonotonicTimestamp(), onComplete, appState);
92109
VerifyOrReturnError(timer != nullptr, CHIP_ERROR_NO_MEMORY);
93110

94-
return ScheduleLambda([this, timer] { this->mTimerPool.Invoke(timer); });
111+
CHIP_ERROR err = ScheduleLambda([this, timer] { this->mTimerPool.Invoke(timer); });
112+
if (err != CHIP_NO_ERROR)
113+
{
114+
mTimerPool.Release(timer);
115+
}
116+
return err;
95117
}
96118

97119
/**

src/system/SystemLayerImplSelect.cpp

+36-4
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,14 @@ void LayerImplSelect::CancelTimer(TimerCompleteCallback onComplete, void * appSt
170170
VerifyOrReturn(mLayerState.IsInitialized());
171171

172172
TimerList::Node * timer = mTimerList.Remove(onComplete, appState);
173+
if (timer == nullptr)
174+
{
175+
// The timer was not in our "will fire in the future" list, but it might
176+
// be in the "we're about to fire these" chunk we already grabbed from
177+
// that list. Check for it there too, and if found there we still want
178+
// to cancel it.
179+
timer = mExpiredTimers.Remove(onComplete, appState);
180+
}
173181
VerifyOrReturn(timer != nullptr);
174182

175183
#if CHIP_SYSTEM_CONFIG_USE_DISPATCH
@@ -199,8 +207,31 @@ CHIP_ERROR LayerImplSelect::ScheduleWork(TimerCompleteCallback onComplete, void
199207
}
200208
#endif // CHIP_SYSTEM_CONFIG_USE_DISPATCH
201209

202-
CancelTimer(onComplete, appState);
203-
210+
// Ideally we would not use a timer here at all, but if we try to just
211+
// ScheduleLambda the lambda needs to capture the following:
212+
// 1) onComplete
213+
// 2) appState
214+
// 3) The `this` pointer, because onComplete needs to be passed a pointer to
215+
// the System::Layer.
216+
//
217+
// On a 64-bit system that's 24 bytes, but lambdas passed to ScheduleLambda
218+
// are capped at CHIP_CONFIG_LAMBDA_EVENT_SIZE which is 16 bytes.
219+
//
220+
// So for now use a timer as a poor-man's closure that captures `this` and
221+
// onComplete and appState in a single pointer, so we fit inside the size
222+
// limit.
223+
//
224+
// TODO: We could do something here where we compile-time condition on the
225+
// sizes of things and use a direct ScheduleLambda if it would fit and this
226+
// setup otherwise.
227+
//
228+
// TODO: But also, unit tests seem to do SystemLayer::ScheduleWork without
229+
// actually running a useful event loop (in the PlatformManager sense),
230+
// which breaks if we use ScheduleLambda here, since that does rely on the
231+
// PlatformManager event loop. So for now, keep scheduling an expires-ASAP
232+
// timer, but just make sure we don't cancel existing timers with the same
233+
// callback and appState, so ScheduleWork invocations don't stomp on each
234+
// other.
204235
TimerList::Node * timer = mTimerPool.Create(*this, SystemClock().GetMonotonicTimestamp(), onComplete, appState);
205236
VerifyOrReturnError(timer != nullptr, CHIP_ERROR_NO_MEMORY);
206237

@@ -469,9 +500,10 @@ void LayerImplSelect::HandleEvents()
469500

470501
// Obtain the list of currently expired timers. Any new timers added by timer callback are NOT handled on this pass,
471502
// since that could result in infinite handling of new timers blocking any other progress.
472-
TimerList expiredTimers = mTimerList.ExtractEarlier(Clock::Timeout(1) + SystemClock().GetMonotonicTimestamp());
503+
VerifyOrDieWithMsg(mExpiredTimers.Empty(), DeviceLayer, "Re-entry into HandleEvents from a timer callback?");
504+
mExpiredTimers = mTimerList.ExtractEarlier(Clock::Timeout(1) + SystemClock().GetMonotonicTimestamp());
473505
TimerList::Node * timer = nullptr;
474-
while ((timer = expiredTimers.PopEarliest()) != nullptr)
506+
while ((timer = mExpiredTimers.PopEarliest()) != nullptr)
475507
{
476508
mTimerPool.Invoke(timer);
477509
}

src/system/SystemLayerImplSelect.h

+3
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ class LayerImplSelect : public LayerSocketsLoop
101101

102102
TimerPool<TimerList::Node> mTimerPool;
103103
TimerList mTimerList;
104+
// List of expired timers being processed right now. Stored in a member so
105+
// we can cancel them.
106+
TimerList mExpiredTimers;
104107
timeval mNextTimeout;
105108

106109
// Members for select loop

src/system/tests/BUILD.gn

+4
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ chip_test_suite("tests") {
3131
"TestTimeSource.cpp",
3232
]
3333

34+
if (chip_device_platform != "esp32" && chip_device_platform != "fake") {
35+
test_sources += [ "TestSystemScheduleWork.cpp" ]
36+
}
37+
3438
cflags = [ "-Wconversion" ]
3539

3640
public_deps = [

src/system/tests/TestSystemClock.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ static const nlTest sTests[] =
118118
int TestSystemClock(void)
119119
{
120120
nlTestSuite theSuite = {
121-
"chip-timesource", &sTests[0], nullptr /* setup */, nullptr /* teardown */
121+
"chip-systemclock", &sTests[0], nullptr /* setup */, nullptr /* teardown */
122122
};
123123

124124
// Run test suit againt one context.
+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/*
2+
* Copyright (c) 2021 Project CHIP Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <system/SystemConfig.h>
18+
19+
#include <lib/support/CodeUtils.h>
20+
#include <lib/support/ErrorStr.h>
21+
#include <lib/support/UnitTestRegistration.h>
22+
#include <nlunit-test.h>
23+
#include <platform/CHIPDeviceLayer.h>
24+
25+
static void IncrementIntCounter(chip::System::Layer *, void * state)
26+
{
27+
++(*static_cast<int *>(state));
28+
}
29+
30+
static void StopEventLoop(chip::System::Layer *, void *)
31+
{
32+
chip::DeviceLayer::PlatformMgr().StopEventLoopTask();
33+
}
34+
35+
static void CheckScheduleWorkTwice(nlTestSuite * inSuite, void * aContext)
36+
{
37+
int * callCount = new int(0);
38+
NL_TEST_ASSERT(inSuite, chip::DeviceLayer::SystemLayer().ScheduleWork(IncrementIntCounter, callCount) == CHIP_NO_ERROR);
39+
NL_TEST_ASSERT(inSuite, chip::DeviceLayer::SystemLayer().ScheduleWork(IncrementIntCounter, callCount) == CHIP_NO_ERROR);
40+
NL_TEST_ASSERT(inSuite, chip::DeviceLayer::SystemLayer().ScheduleWork(StopEventLoop, nullptr) == CHIP_NO_ERROR);
41+
chip::DeviceLayer::PlatformMgr().RunEventLoop();
42+
NL_TEST_ASSERT(inSuite, *callCount == 2);
43+
delete callCount;
44+
}
45+
46+
// Test Suite
47+
48+
/**
49+
* Test Suite. It lists all the test functions.
50+
*/
51+
// clang-format off
52+
static const nlTest sTests[] =
53+
{
54+
NL_TEST_DEF("System::TestScheduleWorkTwice", CheckScheduleWorkTwice),
55+
NL_TEST_SENTINEL()
56+
};
57+
// clang-format on
58+
59+
static int TestSetup(void * aContext);
60+
static int TestTeardown(void * aContext);
61+
62+
// clang-format off
63+
static nlTestSuite kTheSuite =
64+
{
65+
"chip-system-schedule-work",
66+
&sTests[0],
67+
TestSetup,
68+
TestTeardown
69+
};
70+
// clang-format on
71+
72+
/**
73+
* Set up the test suite.
74+
*/
75+
static int TestSetup(void * aContext)
76+
{
77+
if (chip::Platform::MemoryInit() != CHIP_NO_ERROR)
78+
{
79+
return FAILURE;
80+
}
81+
82+
if (chip::DeviceLayer::PlatformMgr().InitChipStack() != CHIP_NO_ERROR)
83+
return FAILURE;
84+
85+
return (SUCCESS);
86+
}
87+
88+
/**
89+
* Tear down the test suite.
90+
* Free memory reserved at TestSetup.
91+
*/
92+
static int TestTeardown(void * aContext)
93+
{
94+
chip::DeviceLayer::PlatformMgr().Shutdown();
95+
chip::Platform::MemoryShutdown();
96+
return (SUCCESS);
97+
}
98+
99+
int TestSystemScheduleWork(void)
100+
{
101+
// Run test suit againt one lContext.
102+
nlTestRunner(&kTheSuite, nullptr);
103+
104+
return nlTestRunnerStats(&kTheSuite);
105+
}
106+
107+
CHIP_REGISTER_TEST_SUITE(TestSystemScheduleWork)

0 commit comments

Comments
 (0)