|
14 | 14 | // limitations under the License. |
15 | 15 | //===----------------------------------------------------------------------===// |
16 | 16 |
|
17 | | -import Cgroup |
18 | | -import Containerization |
19 | | -import ContainerizationError |
20 | 17 | import ContainerizationOS |
21 | 18 | import Foundation |
22 | 19 | import Logging |
23 | | -import NIOCore |
24 | | -import NIOPosix |
25 | | - |
26 | | -#if os(Linux) |
27 | | -import Musl |
28 | | -import LCShim |
29 | | -#endif |
30 | 20 |
|
31 | 21 | @main |
32 | 22 | struct Application { |
33 | | - private static let foregroundEnvVar = "FOREGROUND" |
34 | | - private static let vsockPort = 1024 |
35 | | - private static let standardErrorLock = NSLock() |
36 | | - |
37 | | - private static func runInForeground(_ log: Logger) throws { |
38 | | - log.info("running vminitd under pid1") |
39 | | - |
40 | | - var command = Command("/sbin/vminitd") |
41 | | - command.attrs = .init(setsid: true) |
42 | | - command.stdin = .standardInput |
43 | | - command.stdout = .standardOutput |
44 | | - command.stderr = .standardError |
45 | | - command.environment = ["\(foregroundEnvVar)=1"] |
46 | | - |
47 | | - try command.start() |
48 | | - _ = try command.wait() |
49 | | - } |
50 | | - |
51 | | - private static func adjustLimits() throws { |
52 | | - var limits = rlimit() |
53 | | - guard getrlimit(RLIMIT_NOFILE, &limits) == 0 else { |
54 | | - throw POSIXError(.init(rawValue: errno)!) |
55 | | - } |
56 | | - limits.rlim_cur = 65536 |
57 | | - limits.rlim_max = 65536 |
58 | | - guard setrlimit(RLIMIT_NOFILE, &limits) == 0 else { |
59 | | - throw POSIXError(.init(rawValue: errno)!) |
60 | | - } |
61 | | - } |
62 | | - |
63 | | - @Sendable |
64 | | - private static func standardError(label: String) -> StreamLogHandler { |
65 | | - standardErrorLock.withLock { |
66 | | - StreamLogHandler.standardError(label: label) |
| 23 | + static func main() async throws { |
| 24 | + LoggingSystem.bootstrap(StreamLogHandler.standardError) |
| 25 | + |
| 26 | + // Parse command line arguments |
| 27 | + let args = CommandLine.arguments |
| 28 | + let command = args.count > 1 ? args[1] : "init" |
| 29 | + |
| 30 | + switch command { |
| 31 | + case "pause": |
| 32 | + let log = Logger(label: "pause") |
| 33 | + |
| 34 | + log.info("Running pause command") |
| 35 | + try PauseCommand.run(log: log) |
| 36 | + case "init": |
| 37 | + fallthrough |
| 38 | + default: |
| 39 | + let log = Logger(label: "vminitd") |
| 40 | + |
| 41 | + log.info("Running init command") |
| 42 | + try Self.mountProc(log: log) |
| 43 | + try await InitCommand.run(log: log) |
67 | 44 | } |
68 | 45 | } |
69 | 46 |
|
70 | | - static func main() async throws { |
71 | | - LoggingSystem.bootstrap(standardError) |
72 | | - var log = Logger(label: "vminitd") |
73 | | - |
74 | | - try adjustLimits() |
75 | | - |
76 | | - // when running under debug mode, launch vminitd as a sub process of pid1 |
77 | | - // so that we get a chance to collect better logs and errors before pid1 exists |
78 | | - // and the kernel panics. |
79 | | - #if DEBUG |
80 | | - let environment = ProcessInfo.processInfo.environment |
81 | | - let foreground = environment[Self.foregroundEnvVar] |
82 | | - log.info("checking for shim var \(foregroundEnvVar)=\(String(describing: foreground))") |
83 | | - |
84 | | - if foreground == nil { |
85 | | - try runInForeground(log) |
86 | | - exit(0) |
| 47 | + // Swift seems like it has some fun issues trying to spawn threads if /proc isn't around, so we |
| 48 | + // do this before calling our first async function. |
| 49 | + static func mountProc(log: Logger) throws { |
| 50 | + // Is it already mounted (would only be true in debug builds where we re-exec ourselves)? |
| 51 | + if isProcMounted() { |
| 52 | + return |
87 | 53 | } |
88 | 54 |
|
89 | | - // since we are not running as pid1 in this mode we must set ourselves |
90 | | - // as a subpreaper so that all child processes are reaped by us and not |
91 | | - // passed onto our parent. |
92 | | - CZ_set_sub_reaper() |
93 | | - #endif |
| 55 | + log.info("mounting /proc") |
94 | 56 |
|
95 | | - log.logLevel = .debug |
96 | | - |
97 | | - signal(SIGPIPE, SIG_IGN) |
98 | | - |
99 | | - log.info("vminitd booting") |
100 | | - |
101 | | - // Set of mounts necessary to be mounted prior to taking any RPCs. |
102 | | - // 1. /proc as the sysctl rpc wouldn't make sense if it wasn't there. |
103 | | - // 2. /run as that is where we store container state. |
104 | | - // 3. /sys as we need it for /sys/fs/cgroup |
105 | | - // 4. /sys/fs/cgroup to add the agent to a cgroup, as well as containers later. |
106 | | - let mounts = [ |
107 | | - ContainerizationOS.Mount( |
108 | | - type: "proc", |
109 | | - source: "proc", |
110 | | - target: "/proc", |
111 | | - options: [] |
112 | | - ), |
113 | | - ContainerizationOS.Mount( |
114 | | - type: "tmpfs", |
115 | | - source: "tmpfs", |
116 | | - target: "/run", |
117 | | - options: [] |
118 | | - ), |
119 | | - ContainerizationOS.Mount( |
120 | | - type: "sysfs", |
121 | | - source: "sysfs", |
122 | | - target: "/sys", |
123 | | - options: [] |
124 | | - ), |
125 | | - ContainerizationOS.Mount( |
126 | | - type: "cgroup2", |
127 | | - source: "none", |
128 | | - target: "/sys/fs/cgroup", |
129 | | - options: [] |
130 | | - ), |
131 | | - ] |
132 | | - |
133 | | - for mnt in mounts { |
134 | | - log.info("mounting \(mnt.target)") |
135 | | - |
136 | | - try mnt.mount(createWithPerms: 0o755) |
137 | | - } |
138 | | - try Binfmt.mount() |
139 | | - |
140 | | - let cgManager = Cgroup2Manager( |
141 | | - group: URL(filePath: "/vminitd"), |
142 | | - logger: log |
| 57 | + let mnt = ContainerizationOS.Mount( |
| 58 | + type: "proc", |
| 59 | + source: "proc", |
| 60 | + target: "/proc", |
| 61 | + options: [] |
143 | 62 | ) |
144 | | - try cgManager.create() |
145 | | - try cgManager.toggleAllAvailableControllers(enable: true) |
146 | | - |
147 | | - // Set memory.high threshold to 75 MiB |
148 | | - let threshold: UInt64 = 75 * 1024 * 1024 |
149 | | - try cgManager.setMemoryHigh(bytes: threshold) |
150 | | - try cgManager.addProcess(pid: getpid()) |
| 63 | + try mnt.mount(createWithPerms: 0o755) |
| 64 | + } |
151 | 65 |
|
152 | | - let memoryMonitor = try MemoryMonitor( |
153 | | - cgroupManager: cgManager, |
154 | | - threshold: threshold, |
155 | | - logger: log |
156 | | - ) { [log] (currentUsage, highMark) in |
157 | | - log.warning( |
158 | | - "vminitd memory threshold exceeded", |
159 | | - metadata: [ |
160 | | - "threshold_bytes": "\(threshold)", |
161 | | - "current_bytes": "\(currentUsage)", |
162 | | - "high_events_total": "\(highMark)", |
163 | | - ]) |
| 66 | + static func isProcMounted() -> Bool { |
| 67 | + guard let data = try? String(contentsOfFile: "/proc/mounts", encoding: .utf8) else { |
| 68 | + return false |
164 | 69 | } |
165 | 70 |
|
166 | | - let t = Thread { [log] in |
167 | | - do { |
168 | | - try memoryMonitor.run() |
169 | | - } catch { |
170 | | - log.error("memory monitor failed: \(error)") |
| 71 | + for line in data.split(separator: "\n") { |
| 72 | + let fields = line.split(separator: " ") |
| 73 | + if fields.count >= 2 { |
| 74 | + let mountPoint = String(fields[1]) |
| 75 | + if mountPoint == "/proc" { |
| 76 | + return true |
| 77 | + } |
171 | 78 | } |
172 | 79 | } |
173 | | - t.start() |
174 | | - |
175 | | - let eg = MultiThreadedEventLoopGroup(numberOfThreads: System.coreCount) |
176 | | - let server = Initd(log: log, group: eg) |
177 | | - |
178 | | - do { |
179 | | - log.info("serving vminitd API") |
180 | | - try await server.serve(port: vsockPort) |
181 | | - log.info("vminitd API returned, syncing filesystems") |
182 | 80 |
|
183 | | - #if os(Linux) |
184 | | - Musl.sync() |
185 | | - #endif |
186 | | - } catch { |
187 | | - log.error("vminitd boot error \(error)") |
188 | | - |
189 | | - #if os(Linux) |
190 | | - Musl.sync() |
191 | | - #endif |
192 | | - |
193 | | - exit(1) |
194 | | - } |
| 81 | + return false |
195 | 82 | } |
196 | 83 | } |
0 commit comments