Skip to content

Commit 5b2c6b3

Browse files
author
Paweł Szulik
committed
Add mon groups for resctrl.
"mon_groups" can be created to monitor subsets of tasks in the CTRL_MON group that is their ancestor. More info: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt Signed-off-by: Paweł Szulik <pawel.szulik@intel.com>
1 parent 9d4c02c commit 5b2c6b3

File tree

10 files changed

+474
-118
lines changed

10 files changed

+474
-118
lines changed

events.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,12 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
154154
}
155155

156156
if is := ls.IntelRdtStats; is != nil {
157-
if intelrdt.IsCatEnabled() {
157+
if intelrdt.IsCatEnabled() && is.L3CacheInfo != nil {
158158
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
159159
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
160160
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
161161
}
162-
if intelrdt.IsMbaEnabled() {
162+
if intelrdt.IsMbaEnabled() && is.MemBwInfo != nil{
163163
s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo)
164164
s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot
165165
s.IntelRdt.MemBwSchema = is.MemBwSchema

libcontainer/SPEC.md

Lines changed: 58 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -158,22 +158,31 @@ init process will block waiting for the parent to finish setup.
158158
### IntelRdt
159159

160160
Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
161-
Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are
162-
two sub-features of RDT.
161+
Cache Allocation Technology (CAT), Cache Monitoring Technology (CMT),
162+
Memory Bandwidth Allocation (MBA) and Memory Bandwidth Monitoring (MBM) are
163+
four sub-features of RDT.
163164

164165
Cache Allocation Technology (CAT) provides a way for the software to restrict
165166
cache allocation to a defined 'subset' of L3 cache which may be overlapping
166167
with other 'subsets'. The different subsets are identified by class of
167168
service (CLOS) and each CLOS has a capacity bitmask (CBM).
168169

170+
Cache Monitoring Technology (CMT) supports monitoring of the last-level cache (LLC)
171+
for each running thread simultaneously.
172+
169173
Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
170174
over memory bandwidth for the software. A user controls the resource by
171-
indicating the percentage of maximum memory bandwidth or memory bandwidth limit
172-
in MBps unit if MBA Software Controller is enabled.
175+
indicating the percentage of maximum memory bandwidth or memory bandwidth
176+
limit in MBps unit if MBA Software Controller is enabled.
177+
178+
Memory Bandwidth Monitoring (MBM) supports monitoring of memory bandwidth total and local
179+
for each running thread simultaneously.
173180

174-
It can be used to handle L3 cache and memory bandwidth resources allocation
175-
for containers if hardware and kernel support Intel RDT CAT and MBA features.
181+
More details about Intel RDT CAT and MBA can be found in the section 17.18
182+
of Intel Software Developer Manual:
183+
https://software.intel.com/en-us/articles/intel-sdm
176184

185+
About Intel RDT kernel interface:
177186
In Linux 4.10 kernel or newer, the interface is defined and exposed via
178187
"resource control" filesystem, which is a "cgroup-like" interface.
179188

@@ -185,7 +194,6 @@ CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via
185194
"resource control" filesystem.
186195

187196
Intel RDT "resource control" filesystem hierarchy:
188-
```
189197
mount -t resctrl resctrl /sys/fs/resctrl
190198
tree /sys/fs/resctrl
191199
/sys/fs/resctrl/
@@ -194,22 +202,40 @@ tree /sys/fs/resctrl
194202
| | |-- cbm_mask
195203
| | |-- min_cbm_bits
196204
| | |-- num_closids
205+
| |-- L3_MON
206+
| | |-- max_threshold_occupancy
207+
| | |-- mon_features
208+
| | |-- num_rmids
197209
| |-- MB
198210
| |-- bandwidth_gran
199211
| |-- delay_linear
200212
| |-- min_bandwidth
201213
| |-- num_closids
202-
|-- ...
214+
|-- mon_data
215+
|-- mon_L3_00
216+
|-- llc_occupancy
217+
|-- mbm_local_bytes
218+
|-- mbm_total_bytes
219+
|-- ...
220+
|-- mon_groups
221+
|-- <container_id>
222+
|-- ...
223+
|-- mon_data
224+
|-- mon_L3_00
225+
|-- llc_occupancy
226+
|-- mbm_local_bytes
227+
|-- mbm_total_bytes
228+
|-- ...
203229
|-- schemata
204230
|-- tasks
205231
|-- <container_id>
206232
|-- ...
207233
|-- schemata
208234
|-- tasks
209-
```
210235

211236
For runc, we can make use of `tasks` and `schemata` configuration for L3
212-
cache and memory bandwidth resources constraints.
237+
cache and memory bandwidth resources constraints, `mon_data` directory for
238+
CMT and MBM statistics.
213239

214240
The file `tasks` has a list of tasks that belongs to this group (e.g.,
215241
<container_id>" group). Tasks can be added to a group by writing the task ID
@@ -223,9 +249,7 @@ Each resource (L3 cache, memory bandwidth) has its own line and format.
223249
L3 cache schema:
224250
It has allocation bitmasks/values for L3 cache on each socket, which
225251
contains L3 cache id and capacity bitmask (CBM).
226-
```
227252
Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
228-
```
229253
For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0"
230254
which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
231255

@@ -239,9 +263,7 @@ set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
239263
Memory bandwidth schema:
240264
It has allocation values for memory bandwidth on each socket, which contains
241265
L3 cache id and memory bandwidth.
242-
```
243266
Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
244-
```
245267
For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
246268

247269
The minimum bandwidth percentage value for each CPU model is predefined and
@@ -251,7 +273,7 @@ that is allocated is also dependent on the CPU model and can be looked up at
251273
min_bw + N * bw_gran. Intermediate values are rounded to the next control
252274
step available on the hardware.
253275

254-
If MBA Software Controller is enabled through mount option "-o mba_MBps"
276+
If MBA Software Controller is enabled through mount option "-o mba_MBps":
255277
mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
256278
We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit
257279
instead of "percentages". The kernel underneath would use a software feedback
@@ -263,11 +285,12 @@ For example, on a two-socket machine, the schema line could be
263285
"MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0
264286
and 7000 MBps memory bandwidth limit on socket 1.
265287

266-
For more information about Intel RDT kernel interface:
288+
For more information about Intel RDT kernel interface:
267289
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
268290

269-
```
291+
270292
An example for runc:
293+
```
271294
Consider a two-socket machine with two L3 caches where the default CBM is
272295
0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10%
273296
with a memory bandwidth granularity of 10%.
@@ -278,10 +301,26 @@ maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
278301
279302
"linux": {
280303
"intelRdt": {
281-
"closID": "guaranteed_group",
282304
"l3CacheSchema": "L3:0=7f0;1=1f",
283305
"memBwSchema": "MB:0=20;1=70"
284-
}
306+
}
307+
}
308+
```
309+
Another example:
310+
```
311+
We only want to monitor memory bandwidth and llc occupancy.
312+
"linux": {
313+
"intelRdt": {
314+
}
315+
}
316+
317+
or
318+
319+
"linux": {
320+
"intelRdt": {
321+
"l3CacheSchema": "",
322+
"memBwSchema": ""
323+
}
285324
}
286325
```
287326

libcontainer/configs/config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ type Config struct {
192192
NoNewKeyring bool `json:"no_new_keyring"`
193193

194194
// IntelRdt specifies settings for Intel RDT group that the container is placed into
195-
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
195+
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available.
196196
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
197197

198198
// RootlessEUID is set when the runc was launched with non-zero EUID.

libcontainer/configs/validate/validator.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,23 +182,31 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
182182

183183
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
184184
if config.IntelRdt != nil {
185+
/*
185186
if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() {
186187
return errors.New("intelRdt is specified in config, but Intel RDT is not supported or enabled")
187188
}
188-
189+
*/
189190
if !intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema != "" {
190191
return errors.New("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
191192
}
192193
if !intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema != "" {
193194
return errors.New("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
194195
}
195196

197+
if !intelrdt.IsMBMEnabled() && config.IntelRdt.L3CacheSchema == "" && config.IntelRdt.MemBwSchema == "" {
198+
return errors.New("intelRdt is pecified in config, but Intel RDT/MBM is not enabled")
199+
}
200+
201+
/*
196202
if intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema == "" {
197203
return errors.New("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
198204
}
199205
if intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema == "" {
200206
return errors.New("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty")
201207
}
208+
209+
*/
202210
}
203211

204212
return nil

libcontainer/container_linux.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1852,10 +1852,6 @@ func (c *linuxContainer) currentState() (*State, error) {
18521852
startTime, _ = c.initProcess.startTime()
18531853
externalDescriptors = c.initProcess.externalDescriptors()
18541854
}
1855-
intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
1856-
if err != nil {
1857-
intelRdtPath = ""
1858-
}
18591855
state := &State{
18601856
BaseState: BaseState{
18611857
ID: c.ID(),
@@ -1866,7 +1862,7 @@ func (c *linuxContainer) currentState() (*State, error) {
18661862
},
18671863
Rootless: c.config.RootlessEUID && c.config.RootlessCgroups,
18681864
CgroupPaths: c.cgroupManager.GetPaths(),
1869-
IntelRdtPath: intelRdtPath,
1865+
IntelRdtPath: c.intelRdtManager.GetPath(),
18701866
NamespacePaths: make(map[configs.NamespaceType]string),
18711867
ExternalDescriptors: externalDescriptors,
18721868
}

libcontainer/factory_linux.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -313,14 +313,13 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
313313
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
314314
root: containerRoot,
315315
created: state.Created,
316+
intelRdtManager: l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath),
316317
}
317318
c.state = &loadedState{c: c}
318319
if err := c.refreshState(); err != nil {
319320
return nil, err
320321
}
321-
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
322-
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
323-
}
322+
324323
return c, nil
325324
}
326325

0 commit comments

Comments
 (0)