Skip to content

Commit c45fef7

Browse files
authored
Vminitd: Add pause command (#418)
Due to us supporting a pod type now, and pid ns sharing being quite a common thing for pods, lets add a pause container like command to vminitd to eventually enable pid ns sharing between containers in our variant of a pod. This changes vminitd slightly to have pause and init (default) commands as it seemed simpler than creating a whole new binary to include in the guest image.
1 parent 5c190dc commit c45fef7

File tree

4 files changed

+289
-160
lines changed

4 files changed

+289
-160
lines changed

vminitd/Sources/vminitd/Application.swift

Lines changed: 46 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -14,183 +14,70 @@
1414
// limitations under the License.
1515
//===----------------------------------------------------------------------===//
1616

17-
import Cgroup
18-
import Containerization
19-
import ContainerizationError
2017
import ContainerizationOS
2118
import Foundation
2219
import Logging
23-
import NIOCore
24-
import NIOPosix
25-
26-
#if os(Linux)
27-
import Musl
28-
import LCShim
29-
#endif
3020

3121
@main
3222
struct Application {
33-
private static let foregroundEnvVar = "FOREGROUND"
34-
private static let vsockPort = 1024
35-
private static let standardErrorLock = NSLock()
36-
37-
private static func runInForeground(_ log: Logger) throws {
38-
log.info("running vminitd under pid1")
39-
40-
var command = Command("/sbin/vminitd")
41-
command.attrs = .init(setsid: true)
42-
command.stdin = .standardInput
43-
command.stdout = .standardOutput
44-
command.stderr = .standardError
45-
command.environment = ["\(foregroundEnvVar)=1"]
46-
47-
try command.start()
48-
_ = try command.wait()
49-
}
50-
51-
private static func adjustLimits() throws {
52-
var limits = rlimit()
53-
guard getrlimit(RLIMIT_NOFILE, &limits) == 0 else {
54-
throw POSIXError(.init(rawValue: errno)!)
55-
}
56-
limits.rlim_cur = 65536
57-
limits.rlim_max = 65536
58-
guard setrlimit(RLIMIT_NOFILE, &limits) == 0 else {
59-
throw POSIXError(.init(rawValue: errno)!)
60-
}
61-
}
62-
63-
@Sendable
64-
private static func standardError(label: String) -> StreamLogHandler {
65-
standardErrorLock.withLock {
66-
StreamLogHandler.standardError(label: label)
23+
static func main() async throws {
24+
LoggingSystem.bootstrap(StreamLogHandler.standardError)
25+
26+
// Parse command line arguments
27+
let args = CommandLine.arguments
28+
let command = args.count > 1 ? args[1] : "init"
29+
30+
switch command {
31+
case "pause":
32+
let log = Logger(label: "pause")
33+
34+
log.info("Running pause command")
35+
try PauseCommand.run(log: log)
36+
case "init":
37+
fallthrough
38+
default:
39+
let log = Logger(label: "vminitd")
40+
41+
log.info("Running init command")
42+
try Self.mountProc(log: log)
43+
try await InitCommand.run(log: log)
6744
}
6845
}
6946

70-
static func main() async throws {
71-
LoggingSystem.bootstrap(standardError)
72-
var log = Logger(label: "vminitd")
73-
74-
try adjustLimits()
75-
76-
// when running under debug mode, launch vminitd as a sub process of pid1
77-
// so that we get a chance to collect better logs and errors before pid1 exists
78-
// and the kernel panics.
79-
#if DEBUG
80-
let environment = ProcessInfo.processInfo.environment
81-
let foreground = environment[Self.foregroundEnvVar]
82-
log.info("checking for shim var \(foregroundEnvVar)=\(String(describing: foreground))")
83-
84-
if foreground == nil {
85-
try runInForeground(log)
86-
exit(0)
47+
// Swift seems like it has some fun issues trying to spawn threads if /proc isn't around, so we
48+
// do this before calling our first async function.
49+
static func mountProc(log: Logger) throws {
50+
// Is it already mounted (would only be true in debug builds where we re-exec ourselves)?
51+
if isProcMounted() {
52+
return
8753
}
8854

89-
// since we are not running as pid1 in this mode we must set ourselves
90-
// as a subpreaper so that all child processes are reaped by us and not
91-
// passed onto our parent.
92-
CZ_set_sub_reaper()
93-
#endif
55+
log.info("mounting /proc")
9456

95-
log.logLevel = .debug
96-
97-
signal(SIGPIPE, SIG_IGN)
98-
99-
log.info("vminitd booting")
100-
101-
// Set of mounts necessary to be mounted prior to taking any RPCs.
102-
// 1. /proc as the sysctl rpc wouldn't make sense if it wasn't there.
103-
// 2. /run as that is where we store container state.
104-
// 3. /sys as we need it for /sys/fs/cgroup
105-
// 4. /sys/fs/cgroup to add the agent to a cgroup, as well as containers later.
106-
let mounts = [
107-
ContainerizationOS.Mount(
108-
type: "proc",
109-
source: "proc",
110-
target: "/proc",
111-
options: []
112-
),
113-
ContainerizationOS.Mount(
114-
type: "tmpfs",
115-
source: "tmpfs",
116-
target: "/run",
117-
options: []
118-
),
119-
ContainerizationOS.Mount(
120-
type: "sysfs",
121-
source: "sysfs",
122-
target: "/sys",
123-
options: []
124-
),
125-
ContainerizationOS.Mount(
126-
type: "cgroup2",
127-
source: "none",
128-
target: "/sys/fs/cgroup",
129-
options: []
130-
),
131-
]
132-
133-
for mnt in mounts {
134-
log.info("mounting \(mnt.target)")
135-
136-
try mnt.mount(createWithPerms: 0o755)
137-
}
138-
try Binfmt.mount()
139-
140-
let cgManager = Cgroup2Manager(
141-
group: URL(filePath: "/vminitd"),
142-
logger: log
57+
let mnt = ContainerizationOS.Mount(
58+
type: "proc",
59+
source: "proc",
60+
target: "/proc",
61+
options: []
14362
)
144-
try cgManager.create()
145-
try cgManager.toggleAllAvailableControllers(enable: true)
146-
147-
// Set memory.high threshold to 75 MiB
148-
let threshold: UInt64 = 75 * 1024 * 1024
149-
try cgManager.setMemoryHigh(bytes: threshold)
150-
try cgManager.addProcess(pid: getpid())
63+
try mnt.mount(createWithPerms: 0o755)
64+
}
15165

152-
let memoryMonitor = try MemoryMonitor(
153-
cgroupManager: cgManager,
154-
threshold: threshold,
155-
logger: log
156-
) { [log] (currentUsage, highMark) in
157-
log.warning(
158-
"vminitd memory threshold exceeded",
159-
metadata: [
160-
"threshold_bytes": "\(threshold)",
161-
"current_bytes": "\(currentUsage)",
162-
"high_events_total": "\(highMark)",
163-
])
66+
static func isProcMounted() -> Bool {
67+
guard let data = try? String(contentsOfFile: "/proc/mounts", encoding: .utf8) else {
68+
return false
16469
}
16570

166-
let t = Thread { [log] in
167-
do {
168-
try memoryMonitor.run()
169-
} catch {
170-
log.error("memory monitor failed: \(error)")
71+
for line in data.split(separator: "\n") {
72+
let fields = line.split(separator: " ")
73+
if fields.count >= 2 {
74+
let mountPoint = String(fields[1])
75+
if mountPoint == "/proc" {
76+
return true
77+
}
17178
}
17279
}
173-
t.start()
174-
175-
let eg = MultiThreadedEventLoopGroup(numberOfThreads: System.coreCount)
176-
let server = Initd(log: log, group: eg)
177-
178-
do {
179-
log.info("serving vminitd API")
180-
try await server.serve(port: vsockPort)
181-
log.info("vminitd API returned, syncing filesystems")
18280

183-
#if os(Linux)
184-
Musl.sync()
185-
#endif
186-
} catch {
187-
log.error("vminitd boot error \(error)")
188-
189-
#if os(Linux)
190-
Musl.sync()
191-
#endif
192-
193-
exit(1)
194-
}
81+
return false
19582
}
19683
}

0 commit comments

Comments
 (0)