Skip to content

Commit 005ccd6

Browse files
committed
Merge branch 'ros2' into fix/small_updater_fixes
2 parents 311750e + dbaec04 commit 005ccd6

File tree

8 files changed

+199
-38
lines changed

8 files changed

+199
-38
lines changed

diagnostic_aggregator/src/aggregator.cpp

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -151,28 +151,11 @@ void Aggregator::diagCallback(const DiagnosticArray::SharedPtr diag_msg)
151151
checkTimestamp(diag_msg);
152152

153153
bool analyzed = false;
154+
bool immediate_report = false;
154155
{ // lock the whole loop to ensure nothing in the analyzer group changes during it.
155156
std::lock_guard<std::mutex> lock(mutex_);
156157
for (auto j = 0u; j < diag_msg->status.size(); ++j) {
157158
analyzed = false;
158-
159-
const bool top_level_state_transition_to_error =
160-
(last_top_level_state_ != DiagnosticStatus::ERROR) &&
161-
(diag_msg->status[j].level == DiagnosticStatus::ERROR);
162-
163-
if (critical_ && top_level_state_transition_to_error) {
164-
RCLCPP_DEBUG(
165-
logger_, "Received error message: %s, publishing error immediately",
166-
diag_msg->status[j].name.c_str());
167-
DiagnosticStatus diag_toplevel_state;
168-
diag_toplevel_state.name = "toplevel_state_critical";
169-
diag_toplevel_state.level = diag_msg->status[j].level;
170-
toplevel_state_pub_->publish(diag_toplevel_state);
171-
172-
// store the last published state
173-
last_top_level_state_ = diag_toplevel_state.level;
174-
}
175-
176159
auto item = std::make_shared<StatusItem>(&diag_msg->status[j]);
177160

178161
if (analyzer_group_->match(item->getName())) {
@@ -182,8 +165,17 @@ void Aggregator::diagCallback(const DiagnosticArray::SharedPtr diag_msg)
182165
if (!analyzed) {
183166
other_analyzer_->analyze(item);
184167
}
168+
169+
// In case there is a degraded state, publish immediately
170+
if (critical_ && item->getLevel() > last_top_level_state_) {
171+
immediate_report = true;
172+
}
185173
}
186174
}
175+
176+
if (immediate_report) {
177+
publishData();
178+
}
187179
}
188180

189181
Aggregator::~Aggregator()

diagnostic_aggregator/test/test_critical_pub.py

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -67,41 +67,56 @@ def publish_message(self, level):
6767
rclpy.spin_once(self.node)
6868
return self.node.get_clock().now()
6969

70-
def test_critical_publisher(self):
70+
def critical_publisher_test(
71+
self, initial_state=DiagnosticStatus.OK, new_state=DiagnosticStatus.ERROR
72+
):
7173
# Publish the ok message and wait till the toplevel state is received
72-
state = DiagnosticStatus.OK
73-
time_0 = self.publish_message(state)
74+
time_0 = self.publish_message(initial_state)
7475

75-
assert (self.received_state[0] == state), \
76+
assert (self.received_state[0] == initial_state), \
7677
('Received state is not the same as the sent state:'
77-
+ f"'{self.received_state[0]}' != '{state}'")
78+
+ f"'{self.received_state[0]}' != '{initial_state}'")
7879
self.received_state.clear()
7980

8081
# Publish the ok message and expect the toplevel state after 1 second period
81-
time_1 = self.publish_message(state)
82+
time_1 = self.publish_message(initial_state)
8283
assert (time_1 - time_0 > rclpy.duration.Duration(seconds=0.99)), \
8384
'OK message received too early'
84-
assert (self.received_state[0] == state), \
85+
assert (self.received_state[0] == initial_state), \
8586
('Received state is not the same as the sent state:'
86-
+ f"'{self.received_state[0]}' != '{state}'")
87+
+ f"'{self.received_state[0]}' != '{initial_state}'")
8788
self.received_state.clear()
8889

8990
# Publish the message and expect the critical error message immediately
90-
state = DiagnosticStatus.ERROR
91-
time_2 = self.publish_message(state)
91+
time_2 = self.publish_message(new_state)
9292

9393
assert (time_2 - time_1 < rclpy.duration.Duration(seconds=0.1)), \
9494
'Critical error message not received within 0.1 second'
95-
assert (self.received_state[0] == state), \
95+
assert (self.received_state[0] == new_state), \
9696
('Received state is not the same as the sent state:'
97-
+ f"'{self.received_state[0]}' != '{state}'")
97+
+ f"'{self.received_state[0]}' != '{new_state}'")
9898
self.received_state.clear()
9999

100100
# Next error message should be sent at standard 1 second rate
101-
time_3 = self.publish_message(state)
101+
time_3 = self.publish_message(new_state)
102102

103103
assert (time_3 - time_1 > rclpy.duration.Duration(seconds=0.99)), \
104104
'Periodic error message received too early'
105-
assert (self.received_state[0] == state), \
105+
assert (self.received_state[0] == new_state), \
106106
('Received state is not the same as the sent state:'
107-
+ f"'{self.received_state[0]}' != '{state}'")
107+
+ f"'{self.received_state[0]}' != '{new_state}'")
108+
109+
def test_critical_publisher_ok_error(self):
110+
self.critical_publisher_test(
111+
initial_state=DiagnosticStatus.OK, new_state=DiagnosticStatus.ERROR
112+
)
113+
114+
def test_critical_publisher_ok_warn(self):
115+
self.critical_publisher_test(
116+
initial_state=DiagnosticStatus.OK, new_state=DiagnosticStatus.WARN
117+
)
118+
119+
def test_critical_publisher_warn_error(self):
120+
self.critical_publisher_test(
121+
initial_state=DiagnosticStatus.WARN, new_state=DiagnosticStatus.ERROR
122+
)

diagnostic_common_diagnostics/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ ament_python_install_package(${PROJECT_NAME})
1010
install(PROGRAMS
1111
${PROJECT_NAME}/cpu_monitor.py
1212
${PROJECT_NAME}/ntp_monitor.py
13+
${PROJECT_NAME}/ram_monitor.py
1314
DESTINATION lib/${PROJECT_NAME}
1415
)
1516

diagnostic_common_diagnostics/README.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,27 @@ Disable self test.
7070
**To be ported**
7171

7272
## ram_monitor.py
73-
**To be ported**
73+
The `ram_monitor` module allows users to monitor the RAM usage of their system in real-time.
74+
It publishes the usage percentage in a diagnostic message.
75+
76+
* Name of the node is "ram_monitor_" + hostname.
77+
* Uses the following args:
78+
* warning_percentage: If the RAM usage is > warning_percentage, a WARN status will be published.
79+
* window: the maximum length of the used collections.deque for queuing RAM readings.
80+
81+
### Published Topics
82+
#### /diagnostics
83+
diagnostic_msgs/DiagnosticArray
84+
The diagnostics information.
85+
86+
### Parameters
87+
#### warning_percentage
88+
(default: 90)
89+
warning percentage threshold.
90+
91+
#### window
92+
(default: 1)
93+
Length of RAM readings queue.
7494

7595
## sensors_monitor.py
7696
**To be ported**

diagnostic_common_diagnostics/diagnostic_common_diagnostics/ntp_monitor.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ def __init__(self, ntp_hostname, ntp_port, offset=500, self_offset=500,
5050
do_self_test=True):
5151
"""Initialize the NTPMonitor."""
5252
super().__init__(__class__.__name__)
53+
self.declare_parameter('frequency', 10.0)
54+
frequency = self.get_parameter('frequency').get_parameter_value().double_value
5355

5456
self.ntp_hostname = ntp_hostname
5557
self.ntp_port = ntp_port
@@ -85,8 +87,8 @@ def __init__(self, ntp_hostname, ntp_port, offset=500, self_offset=500,
8587

8688
# we need to periodically republish this
8789
self.current_msg = None
88-
self.pubtimer = self.create_timer(0.1, self.pubCB)
89-
self.checktimer = self.create_timer(0.1, self.checkCB)
90+
self.pubtimer = self.create_timer(1/frequency, self.pubCB)
91+
self.checktimer = self.create_timer(1/frequency, self.checkCB)
9092

9193
def pubCB(self):
9294
with self.mutex:
@@ -95,6 +97,7 @@ def pubCB(self):
9597

9698
def checkCB(self):
9799
new_msg = DIAG.DiagnosticArray()
100+
new_msg.header.stamp = self.get_clock().now().to_msg()
98101

99102
st = self.ntp_diag(self.stat)
100103
if st is not None:
@@ -159,7 +162,7 @@ def add_kv(stat_values, key, value):
159162

160163
def ntp_monitor_main(argv=sys.argv[1:]):
161164
# filter out ROS args
162-
argv = [a for a in argv if not a.startswith('__') and not a == '--ros-args' and not a == '-r']
165+
argv = argv[:argv.index('--ros-args')] if '--ros-args' in argv else argv
163166

164167
import argparse
165168
parser = argparse.ArgumentParser()
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/usr/bin/env python3
2+
#
3+
# Software License Agreement (BSD License)
4+
#
5+
# Copyright (c) 2017, TNO IVS, Helmond, Netherlands
6+
# All rights reserved.
7+
#
8+
# Redistribution and use in source and binary forms, with or without
9+
# modification, are permitted provided that the following conditions
10+
# are met:
11+
#
12+
# * Redistributions of source code must retain the above copyright
13+
# notice, this list of conditions and the following disclaimer.
14+
# * Redistributions in binary form must reproduce the above
15+
# copyright notice, this list of conditions and the following
16+
# disclaimer in the documentation and/or other materials provided
17+
# with the distribution.
18+
# * Neither the name of the TNO IVS nor the names of its
19+
# contributors may be used to endorse or promote products derived
20+
# from this software without specific prior written permission.
21+
#
22+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25+
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26+
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27+
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28+
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29+
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31+
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33+
# POSSIBILITY OF SUCH DAMAGE.
34+
35+
# \author Rein Appeldoorn
36+
37+
import collections
38+
import socket
39+
40+
from diagnostic_msgs.msg import DiagnosticStatus
41+
42+
from diagnostic_updater import DiagnosticTask, Updater
43+
44+
import psutil
45+
46+
import rclpy
47+
48+
49+
class RamTask(DiagnosticTask):
50+
51+
def __init__(self, warning_percentage, window):
52+
DiagnosticTask.__init__(self, 'RAM Information')
53+
self._warning_percentage = int(warning_percentage)
54+
self._readings = collections.deque(maxlen=window)
55+
56+
def run(self, stat):
57+
self._readings.append(psutil.virtual_memory().percent)
58+
ram_average = sum(self._readings) / len(self._readings)
59+
60+
stat.add('RAM Load Average', f'{ram_average:.2f}')
61+
62+
if ram_average > self._warning_percentage:
63+
stat.summary(
64+
DiagnosticStatus.WARN,
65+
f'RAM Average exceeds {self._warning_percentage:d} percent',
66+
)
67+
else:
68+
stat.summary(DiagnosticStatus.OK, f'RAM Average {ram_average:.2f} percent')
69+
70+
return stat
71+
72+
73+
def main():
74+
hostname = socket.gethostname()
75+
rclpy.init()
76+
node = rclpy.create_node(f'ram_monitor_{hostname.replace("-", "_")}')
77+
78+
updater = Updater(node)
79+
updater.setHardwareID(hostname)
80+
updater.add(
81+
RamTask(
82+
node.declare_parameter('warning_percentage', 90).value,
83+
node.declare_parameter('window', 1).value,
84+
)
85+
)
86+
87+
rclpy.spin(node)
88+
89+
90+
if __name__ == '__main__':
91+
try:
92+
main()
93+
except KeyboardInterrupt:
94+
pass

diagnostic_updater/include/diagnostic_updater/diagnostic_updater.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ class Updater : public DiagnosticTaskVector
511511
*/
512512
void update()
513513
{
514-
if (rclcpp::ok()) {
514+
if (rclcpp::ok(base_interface_->get_context())) {
515515
bool warn_nohwid = hwid_.empty();
516516

517517
std::vector<diagnostic_msgs::msg::DiagnosticStatus> status_vec;

diagnostic_updater/test/diagnostic_updater_test.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,42 @@ TEST(DiagnosticUpdater, testUpdaterAsNodeClassMember) {
134134
SUCCEED();
135135
}
136136

137+
class SaveIfCalled : public diagnostic_updater::DiagnosticTask
138+
{
139+
public:
140+
SaveIfCalled()
141+
: DiagnosticTask("SaveIfCalled") {}
142+
143+
void run(diagnostic_updater::DiagnosticStatusWrapper & s)
144+
{
145+
s.summary(0, "Have been called!");
146+
has_been_called_ = true;
147+
}
148+
149+
bool has_been_called() const
150+
{
151+
return has_been_called_;
152+
}
153+
154+
private:
155+
bool has_been_called_ = false;
156+
};
157+
158+
159+
TEST(DiagnosticUpdater, testCustomContext) {
160+
auto context = std::make_shared<rclcpp::Context>();
161+
context->init(0, nullptr, rclcpp::InitOptions());
162+
163+
auto node =
164+
std::make_shared<rclcpp::Node>("CustomContextNode", rclcpp::NodeOptions().context(context));
165+
diagnostic_updater::Updater updater(node);
166+
SaveIfCalled save_if_called;
167+
updater.add(save_if_called);
168+
updater.force_update();
169+
ASSERT_TRUE(save_if_called.has_been_called());
170+
context->shutdown("End test");
171+
}
172+
137173
TEST(DiagnosticUpdater, testDiagnosticStatusWrapperKeyValuePairs) {
138174
diagnostic_updater::DiagnosticStatusWrapper stat;
139175

0 commit comments

Comments
 (0)