Skip to content

Commit eac3328

Browse files
committed
refactor: move some c code to go
Move all the stage-1 c code and some of the stage-2 c code to go code, because they are not related to namespaces, they should be implemented by golang. Signed-off-by: lifubang <[email protected]>
1 parent 2cc36c2 commit eac3328

10 files changed

+356
-800
lines changed

libcontainer/configs/config.go

+5
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ type IDMap struct {
2727
Size int64 `json:"size"`
2828
}
2929

30+
// ToString is to serize the IDMap to a string.
31+
func (i IDMap) ToString() string {
32+
return fmt.Sprintf("%d %d %d", i.ContainerID, i.HostID, i.Size)
33+
}
34+
3035
// Seccomp represents syscall restrictions
3136
// By default, only the native architecture of the kernel is allowed to be used
3237
// for syscalls. Additional architectures can be added by specifying them in

libcontainer/container_linux.go

+6-73
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"github.com/opencontainers/runtime-spec/specs-go"
1919
"github.com/sirupsen/logrus"
2020
"github.com/vishvananda/netlink/nl"
21-
"golang.org/x/sys/execabs"
2221
"golang.org/x/sys/unix"
2322

2423
"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -580,6 +579,10 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
580579
cmd.Env = append(cmd.Env,
581580
"_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
582581
)
582+
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.stage1SockChild)
583+
cmd.Env = append(cmd.Env,
584+
"_LIBCONTAINER_STAGE1PIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
585+
)
583586
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.syncSockChild.File())
584587
cmd.Env = append(cmd.Env,
585588
"_LIBCONTAINER_SYNCPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
@@ -1046,17 +1049,6 @@ func (c *Container) orderNamespacePaths(namespaces map[configs.NamespaceType]str
10461049
return paths, nil
10471050
}
10481051

1049-
func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
1050-
data := bytes.NewBuffer(nil)
1051-
for _, im := range idMap {
1052-
line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size)
1053-
if _, err := data.WriteString(line); err != nil {
1054-
return nil, err
1055-
}
1056-
}
1057-
return data.Bytes(), nil
1058-
}
1059-
10601052
// netlinkError is an error wrapper type for use by custom netlink message
10611053
// types. Panics with errors are wrapped in netlinkError so that the recover
10621054
// in bootstrapData can distinguish intentional panics.
@@ -1103,59 +1095,6 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
11031095
})
11041096
}
11051097

1106-
// write namespace paths only when we are not joining an existing user ns
1107-
_, joinExistingUser := nsMaps[configs.NEWUSER]
1108-
if !joinExistingUser {
1109-
// write uid mappings
1110-
if len(c.config.UIDMappings) > 0 {
1111-
if c.config.RootlessEUID {
1112-
// We resolve the paths for new{u,g}idmap from
1113-
// the context of runc to avoid doing a path
1114-
// lookup in the nsexec context.
1115-
if path, err := execabs.LookPath("newuidmap"); err == nil {
1116-
r.AddData(&Bytemsg{
1117-
Type: UidmapPathAttr,
1118-
Value: []byte(path),
1119-
})
1120-
}
1121-
}
1122-
b, err := encodeIDMapping(c.config.UIDMappings)
1123-
if err != nil {
1124-
return nil, err
1125-
}
1126-
r.AddData(&Bytemsg{
1127-
Type: UidmapAttr,
1128-
Value: b,
1129-
})
1130-
}
1131-
1132-
// write gid mappings
1133-
if len(c.config.GIDMappings) > 0 {
1134-
b, err := encodeIDMapping(c.config.GIDMappings)
1135-
if err != nil {
1136-
return nil, err
1137-
}
1138-
r.AddData(&Bytemsg{
1139-
Type: GidmapAttr,
1140-
Value: b,
1141-
})
1142-
if c.config.RootlessEUID {
1143-
if path, err := execabs.LookPath("newgidmap"); err == nil {
1144-
r.AddData(&Bytemsg{
1145-
Type: GidmapPathAttr,
1146-
Value: []byte(path),
1147-
})
1148-
}
1149-
}
1150-
if requiresRootOrMappingTool(c.config) {
1151-
r.AddData(&Boolmsg{
1152-
Type: SetgroupAttr,
1153-
Value: true,
1154-
})
1155-
}
1156-
}
1157-
}
1158-
11591098
if c.config.OomScoreAdj != nil {
11601099
// write oom_score_adj
11611100
r.AddData(&Bytemsg{
@@ -1164,12 +1103,6 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
11641103
})
11651104
}
11661105

1167-
// write rootless
1168-
r.AddData(&Boolmsg{
1169-
Type: RootlessEUIDAttr,
1170-
Value: c.config.RootlessEUID,
1171-
})
1172-
11731106
// write boottime and monotonic time ns offsets.
11741107
if c.config.TimeOffsets != nil {
11751108
var offsetSpec bytes.Buffer
@@ -1210,9 +1143,9 @@ func ignoreTerminateErrors(err error) error {
12101143
return err
12111144
}
12121145

1213-
func requiresRootOrMappingTool(c *configs.Config) bool {
1146+
func requiresRootOrMappingTool(gidMappings []configs.IDMap) bool {
12141147
gidMap := []configs.IDMap{
12151148
{ContainerID: 0, HostID: int64(os.Getegid()), Size: 1},
12161149
}
1217-
return !reflect.DeepEqual(c.GIDMappings, gidMap)
1150+
return !reflect.DeepEqual(gidMappings, gidMap)
12181151
}

libcontainer/container_setup.go

+150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
package libcontainer
2+
3+
import (
4+
"encoding/binary"
5+
"fmt"
6+
"io"
7+
"os"
8+
9+
"github.com/opencontainers/runc/libcontainer/configs"
10+
"github.com/opencontainers/runc/libcontainer/system"
11+
"github.com/sirupsen/logrus"
12+
"github.com/vishvananda/netlink/nl"
13+
"golang.org/x/sys/execabs"
14+
"golang.org/x/sys/unix"
15+
)
16+
17+
// NsExecSyncMsg is used for communication between the parent and child during
18+
// container setup.
19+
type NsExecSyncMsg uint32
20+
21+
const (
22+
syncUsermapPls NsExecSyncMsg = iota + 0x40
23+
syncUsermapAck
24+
syncRecvPidPls
25+
syncRecvPidAck
26+
syncTimeOffsetsPls
27+
syncTimeOffsetsAck
28+
)
29+
30+
type NsExecSetup struct {
31+
process *containerProcess
32+
}
33+
34+
const bufSize int = 4
35+
36+
// parseNsExecSync runs the given callback function on each message received
37+
// from the child. It will return once the child sends SYNC_RECVPID_PLS.
38+
func parseNsExecSync(r io.Reader, fn func(NsExecSyncMsg) error) error {
39+
logrus.Debugf("start to communicate with the nsexec\n")
40+
var msg NsExecSyncMsg
41+
var buf [bufSize]byte
42+
native := nl.NativeEndian()
43+
44+
for {
45+
if _, err := io.ReadAtLeast(r, buf[:], bufSize); err != nil {
46+
return err
47+
}
48+
msg = NsExecSyncMsg(native.Uint32(buf[:]))
49+
if err := fn(msg); err != nil {
50+
return err
51+
}
52+
if msg == syncRecvPidPls {
53+
break
54+
}
55+
}
56+
logrus.Debugf("finished communicating with the nsexec\n")
57+
return nil
58+
}
59+
60+
// ackSyncMsg is used to send a message to the child.
61+
func ackSyncMsg(f *os.File, msg NsExecSyncMsg) error {
62+
var buf [bufSize]byte
63+
native := nl.NativeEndian()
64+
native.PutUint32(buf[:], uint32(msg))
65+
if _, err := unix.Write(int(f.Fd()), buf[:]); err != nil {
66+
logrus.Debugf("failed to write message to nsexec: %v", err)
67+
return err
68+
}
69+
return nil
70+
}
71+
72+
// helpDoingNsExec is used to help the process to communicate with the nsexec.
73+
func (s *NsExecSetup) helpDoingNsExec() error {
74+
syncSock := s.process.comm.stage1SockParent
75+
err := parseNsExecSync(syncSock, func(msg NsExecSyncMsg) error {
76+
switch msg {
77+
case syncUsermapPls:
78+
logrus.Debugf("stage-1 requested userns mappings")
79+
if err := s.setupUsermap(); err != nil {
80+
return err
81+
}
82+
return ackSyncMsg(syncSock, syncUsermapAck)
83+
case syncRecvPidPls:
84+
logrus.Debugf("stage-1 reports pid")
85+
var pid uint32
86+
if err := binary.Read(syncSock, nl.NativeEndian(), &pid); err != nil {
87+
return err
88+
}
89+
s.process.childPid = int(pid)
90+
return ackSyncMsg(syncSock, syncRecvPidAck)
91+
case syncTimeOffsetsPls:
92+
logrus.Debugf("stage-1 request to configure timens offsets")
93+
if err := system.UpdateTimeNsOffsets(s.process.cmd.Process.Pid, s.process.container.config.TimeOffsets); err != nil {
94+
return err
95+
}
96+
return ackSyncMsg(syncSock, syncTimeOffsetsAck)
97+
default:
98+
}
99+
return fmt.Errorf("unexpected message %d", msg)
100+
})
101+
_ = syncSock.Close()
102+
return err
103+
}
104+
105+
// setupUsermap is used to set up the user mappings.
106+
func (s *NsExecSetup) setupUsermap() error {
107+
var uidMapPath, gidMapPath string
108+
109+
// Enable setgroups(2) if we've been asked to. But we also have to explicitly
110+
// disable setgroups(2) if we're creating a rootless container for single-entry
111+
// mapping. (this is required since Linux 3.19).
112+
// For rootless multi-entry mapping, we should use newuidmap/newgidmap
113+
// to do mapping user namespace.
114+
if s.process.config.RootlessEUID && !requiresRootOrMappingTool(s.process.config.Config.GIDMappings) {
115+
_ = system.UpdateSetgroups(s.process.cmd.Process.Pid, system.SetgroupsDeny)
116+
}
117+
118+
nsMaps := make(map[configs.NamespaceType]string)
119+
for _, ns := range s.process.container.config.Namespaces {
120+
if ns.Path != "" {
121+
nsMaps[ns.Type] = ns.Path
122+
}
123+
}
124+
_, joinExistingUser := nsMaps[configs.NEWUSER]
125+
if !joinExistingUser {
126+
// write uid mappings
127+
if len(s.process.container.config.UIDMappings) > 0 {
128+
if s.process.container.config.RootlessEUID {
129+
if path, err := execabs.LookPath("newuidmap"); err == nil {
130+
uidMapPath = path
131+
}
132+
}
133+
}
134+
135+
// write gid mappings
136+
if len(s.process.container.config.GIDMappings) > 0 {
137+
if s.process.container.config.RootlessEUID {
138+
if path, err := execabs.LookPath("newgidmap"); err == nil {
139+
gidMapPath = path
140+
}
141+
}
142+
}
143+
}
144+
145+
/* Set up mappings. */
146+
if err := system.UpdateUidmap(uidMapPath, s.process.cmd.Process.Pid, s.process.container.config.UIDMappings); err != nil {
147+
return err
148+
}
149+
return system.UpdateGidmap(gidMapPath, s.process.cmd.Process.Pid, s.process.container.config.GIDMappings)
150+
}

libcontainer/init_linux.go

+24-5
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"strconv"
1414
"strings"
1515
"syscall"
16+
"unsafe"
1617

1718
"github.com/containerd/console"
1819
"github.com/moby/sys/user"
@@ -35,11 +36,6 @@ const (
3536
initStandard initType = "standard"
3637
)
3738

38-
type pid struct {
39-
Pid int `json:"stage2_pid"`
40-
PidFirstChild int `json:"stage1_pid"`
41-
}
42-
4339
// network is an internal struct used to setup container networks.
4440
type network struct {
4541
configs.Network
@@ -151,6 +147,11 @@ func startInitialization() (retErr error) {
151147

152148
logrus.SetOutput(logPipe)
153149
logrus.SetFormatter(new(logrus.JSONFormatter))
150+
151+
/* For debugging. */
152+
procName := append([]byte("runc:[2:INIT]"), 0)
153+
_ = unix.Prctl(unix.PR_SET_NAME, uintptr(unsafe.Pointer(&procName[0])), 0, 0, 0)
154+
154155
logrus.Debug("child process in init()")
155156

156157
// Only init processes have FIFOFD.
@@ -215,6 +216,24 @@ func startInitialization() (retErr error) {
215216
return err
216217
}
217218

219+
if _, err := unix.Setsid(); err != nil {
220+
return fmt.Errorf("setsid failed: %w", err)
221+
}
222+
223+
if err := unix.Setuid(0); err != nil {
224+
return fmt.Errorf("setuid failed %w", err)
225+
}
226+
227+
if err := unix.Setgid(0); err != nil {
228+
return fmt.Errorf("setgid failed %w", err)
229+
}
230+
231+
if !config.RootlessEUID && requiresRootOrMappingTool(config.Config.GIDMappings) {
232+
if err := unix.Setgroups([]int{0}); err != nil {
233+
return fmt.Errorf("setgroups failed %w", err)
234+
}
235+
}
236+
218237
// If init succeeds, it will not return, hence none of the defers will be called.
219238
return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifoFile, logPipe, dmzExe)
220239
}

libcontainer/message_linux.go

+5-11
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,11 @@ import (
1111
// list of known message types we want to send to bootstrap program
1212
// The number is randomly chosen to not conflict with known netlink types
1313
const (
14-
InitMsg uint16 = 62000
15-
CloneFlagsAttr uint16 = 27281
16-
NsPathsAttr uint16 = 27282
17-
UidmapAttr uint16 = 27283
18-
GidmapAttr uint16 = 27284
19-
SetgroupAttr uint16 = 27285
20-
OomScoreAdjAttr uint16 = 27286
21-
RootlessEUIDAttr uint16 = 27287
22-
UidmapPathAttr uint16 = 27288
23-
GidmapPathAttr uint16 = 27289
24-
TimeOffsetsAttr uint16 = 27290
14+
InitMsg uint16 = 62000
15+
CloneFlagsAttr uint16 = 27281
16+
NsPathsAttr uint16 = 27282
17+
OomScoreAdjAttr uint16 = 27286
18+
TimeOffsetsAttr uint16 = 27290
2519
)
2620

2721
type Int32msg struct {

libcontainer/nsenter/log.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ void write_log(int level, const char *format, ...)
5858
if (stage == NULL)
5959
goto out;
6060
} else {
61-
ret = asprintf(&stage, "nsexec-%d", current_stage);
61+
ret = asprintf(&stage, "nsexec-%d", current_stage + 1);
6262
if (ret < 0) {
6363
stage = NULL;
6464
goto out;

0 commit comments

Comments
 (0)