Skip to content

Commit a49b6e8

Browse files
authored
Merge pull request #909 from nlgwcy/release-0.5
cherry-pick #610 to Release 0.5
2 parents 1f49f17 + ef9e1e5 commit a49b6e8

File tree

8 files changed

+499
-58
lines changed

8 files changed

+499
-58
lines changed

bpf/deserialization_to_bpf_map/deserialization_to_bpf_map.c

+388-44
Large diffs are not rendered by default.

bpf/deserialization_to_bpf_map/deserialization_to_bpf_map.h

+16-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,21 @@
44
#ifndef __DESERIALIZATION_TO_BPF_MAP_H__
55
#define __DESERIALIZATION_TO_BPF_MAP_H__
66

7+
#include <stdbool.h>
8+
79
/* equal MAP_SIZE_OF_OUTTER_MAP */
8-
#define MAX_OUTTER_MAP_ENTRIES (8192)
10+
#define MAX_OUTTER_MAP_ENTRIES (1 << 20)
11+
#define OUTTER_MAP_USAGE_HIGH_PERCENT (0.7)
12+
#define OUTTER_MAP_USAGE_LOW_PERCENT (0.3)
13+
#define TASK_SIZE (512)
14+
15+
// 32,768
16+
#define OUTTER_MAP_SCALEUP_STEP (1 << 15)
17+
// 8,192
18+
#define OUTTER_MAP_SCALEIN_STEP (1 << 13)
19+
20+
#define ELASTIC_SLOTS_NUM \
21+
((OUTTER_MAP_SCALEUP_STEP > OUTTER_MAP_SCALEIN_STEP) ? OUTTER_MAP_SCALEUP_STEP : OUTTER_MAP_SCALEIN_STEP)
922

1023
struct element_list_node {
1124
void *elem;
@@ -20,6 +33,7 @@ void deserial_free_elem_list(struct element_list_node *head);
2033
int deserial_delete_elem(void *key, const void *msg_desciptor);
2134

2235
int deserial_init();
23-
void deserial_uninit();
36+
void deserial_uninit(bool persist);
37+
int inner_map_mng_persist();
2438

2539
#endif /* __DESERIALIZATION_TO_BPF_MAP_H__ */

bpf/include/bpf_common.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
/* Ip(0.0.0.2 | ::2) used for control command, e.g. KmeshControl */
1818
#define CONTROL_CMD_IP 2
1919

20-
#define MAP_SIZE_OF_OUTTER_MAP 8192
20+
#define MAP_SIZE_OF_OUTTER_MAP (1 << 20)
2121

2222
#define BPF_DATA_MAX_LEN \
2323
192 /* this value should be \

bpf/kmesh/ads/include/config.h

+8-9
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,15 @@
2929
#define MAP_SIZE_OF_PER_CLUSTER 32
3030
#define MAP_SIZE_OF_PER_ENDPOINT 64
3131

32-
#define MAP_SIZE_OF_MAX 8192
33-
#define MAP_SIZE_OF_OUTTER_MAP 8192
32+
#define MAP_SIZE_OF_MAX 8192
3433

35-
#define MAP_SIZE_OF_LISTENER BPF_MIN(MAP_SIZE_OF_MAX, MAP_SIZE_OF_PER_LISTENER)
36-
#define MAP_SIZE_OF_FILTER_CHAIN BPF_MIN(MAP_SIZE_OF_MAX, MAP_SIZE_OF_PER_FILTER_CHAIN *MAP_SIZE_OF_LISTENER)
37-
#define MAP_SIZE_OF_FILTER BPF_MIN(MAP_SIZE_OF_MAX, MAP_SIZE_OF_PER_FILTER *MAP_SIZE_OF_FILTER_CHAIN)
38-
#define MAP_SIZE_OF_VIRTUAL_HOST BPF_MIN(MAP_SIZE_OF_MAX, MAP_SIZE_OF_PER_VIRTUAL_HOST *MAP_SIZE_OF_FILTER)
39-
#define MAP_SIZE_OF_ROUTE BPF_MIN(MAP_SIZE_OF_MAX, MAP_SIZE_OF_PER_ROUTE *MAP_SIZE_OF_VIRTUAL_HOST)
40-
#define MAP_SIZE_OF_CLUSTER BPF_MIN(MAP_SIZE_OF_MAX, MAP_SIZE_OF_PER_CLUSTER *MAP_SIZE_OF_ROUTE)
41-
#define MAP_SIZE_OF_ENDPOINT BPF_MIN(MAP_SIZE_OF_MAX, MAP_SIZE_OF_PER_ENDPOINT *MAP_SIZE_OF_CLUSTER)
34+
#define MAP_SIZE_OF_LISTENER (1 << 10)
35+
#define MAP_SIZE_OF_FILTER_CHAIN (MAP_SIZE_OF_PER_FILTER_CHAIN * MAP_SIZE_OF_LISTENER)
36+
#define MAP_SIZE_OF_FILTER (MAP_SIZE_OF_PER_FILTER * MAP_SIZE_OF_FILTER_CHAIN)
37+
#define MAP_SIZE_OF_VIRTUAL_HOST (MAP_SIZE_OF_PER_VIRTUAL_HOST * MAP_SIZE_OF_FILTER)
38+
#define MAP_SIZE_OF_ROUTE (1 << 14)
39+
#define MAP_SIZE_OF_CLUSTER (1 << 14)
40+
#define MAP_SIZE_OF_ENDPOINT (1 << 17)
4241

4342
// rename map to avoid truncation when name length exceeds BPF_OBJ_NAME_LEN = 16
4443
#define map_of_listener kmesh_listener
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
---
2+
title: map-in-map management enhancement
3+
authors:
4+
- "@nlgwcy"
5+
reviewers:
6+
- "@hzxuzhonghu"
7+
- "@supercharge-xsy"
8+
- "@bitcoffeeiux"
9+
approvers:
10+
- "@robot"
11+
- TBD
12+
13+
creation-date: 2024-07-20
14+
15+
---
16+
17+
## map-in-map management enhancement
18+
19+
### Summary
20+
21+
In ads mode, elastic scaling based on map-in-map records is supported to meet the traffic management requirements of large-scale clusters.
22+
23+
### Motivation
24+
25+
As mentioned in [optimizing_bpf_map_update_in_xDS_mode](https://github.com/kmesh-net/kmesh/blob/main/docs/proposal/optimizing_bpf_map_update_in_xDS_mode-en.md), to solve the problem of slow update of map-in-map records, Kmesh creates all records at a time during startup by exchanging space for time. This problem does not occur in small-scale cluster scenarios, however, when a large-scale cluster (for example, 5000 services and 100,000 pods) is supported, the size defined in the map-in-map table is very large, and the map of the `BPF_MAP_TYPE_ARRAY_OF_MAPS` type does not support `BPF_F_NO_PREALLOC`, which causes a great waste of memory. Elastic scaling of map-in-map records must be supported to meet the traffic management requirements of large-scale clusters.
26+
27+
#### Goals
28+
29+
- Supports traffic management in large-scale clusters.
30+
- Consider the configuration restoration scenario.
31+
32+
### Proposal
33+
34+
Kmesh manages the usage of map-in-map in user mode. To support elastic scaling, the management structure is extended as follows:
35+
36+
```c
37+
struct inner_map_mng {
38+
int inner_fd;
39+
int outter_fd;
40+
struct bpf_map_info inner_info;
41+
struct bpf_map_info outter_info;
42+
struct inner_map_stat inner_maps[MAX_OUTTER_MAP_ENTRIES];
43+
int elastic_slots[OUTTER_MAP_ELASTIC_SIZE];
44+
int used_cnt; // real used count
45+
int alloced_cnt; // real alloced count
46+
int max_alloced_idx; // max alloced index, there may be holes.
47+
int init;
48+
sem_t fin_tasks;
49+
int elastic_task_exit; // elastic scaling thread exit flag
50+
};
51+
52+
struct inner_map_stat {
53+
int map_fd;
54+
unsigned int used : 1;
55+
unsigned int alloced : 1;
56+
unsigned int resv : 30;
57+
};
58+
```
59+
60+
Map-in-map scaling process:
61+
62+
![map-in-map-elastic-process](pics/map-in-map-elastic-process.svg)
63+
64+
The following is an example of map-in-map scale-in and scale-out:
65+
66+
![map-in-map-elastic](pics/map-in-map-elastic.svg)
67+
68+
69+

docs/proposal/pics/map-in-map-elastic-process.svg

+4
Loading

docs/proposal/pics/map-in-map-elastic.svg

+4
Loading

pkg/bpf/bpf.go

+9-2
Original file line numberDiff line numberDiff line change
@@ -173,21 +173,23 @@ func StopMda() error {
173173

174174
func (l *BpfLoader) Stop() {
175175
var err error
176-
if GetExitType() == Restart {
176+
if GetExitType() == Restart && l.config.WdsEnabled() {
177+
C.deserial_uninit(true)
177178
log.Infof("kmesh restart, not clean bpf map and prog")
178179
return
179180
}
180181

181182
closeMap(l.versionMap)
182183

183184
if l.config.AdsEnabled() {
184-
C.deserial_uninit()
185+
C.deserial_uninit(false)
185186
if err = l.obj.Detach(); err != nil {
186187
CleanupBpfMap()
187188
log.Errorf("failed detach when stop kmesh, err:%s", err)
188189
return
189190
}
190191
} else if l.config.WdsEnabled() {
192+
C.deserial_uninit(false)
191193
if err = l.workloadObj.Detach(); err != nil {
192194
CleanupBpfMap()
193195
log.Errorf("failed detach when stop kmesh, err:%s", err)
@@ -311,6 +313,11 @@ func recoverVersionMap(pinPath string) *ebpf.Map {
311313

312314
func closeMap(m *ebpf.Map) {
313315
var err error
316+
317+
if m == nil {
318+
return
319+
}
320+
314321
err = m.Unpin()
315322
if err != nil {
316323
log.Errorf("Failed to unpin kmesh_version: %v", err)

0 commit comments

Comments
 (0)