Skip to content

Commit 49d2d37

Browse files
committed
CA-422187: create an emergency reserve of pages
Do not let domains fully use up all available memory on the host, we have too many unexplained bugs in this area. As a workaround try to reserve some amount (e.g. 256MiB) that domains cannot normally use from XAPI's point of view. Then during parallel domain construction this emergency reserve can be used by Xen. Signed-off-by: Edwin Török <[email protected]>
1 parent a4bc2bb commit 49d2d37

File tree

13 files changed

+22
-15
lines changed

13 files changed

+22
-15
lines changed

doc/content/design/numa.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ This function receives as arguments a domain ID and the number of nodes
112112
this domain is using (acquired using `domain_get_numa_info_node_pages`)
113113

114114
The number of NUMA nodes of the host (not domain) is reported by
115-
`Xenctrl.physinfo` which returns a value of type `physinfo`.
115+
`Xenctrlext.physinfo` which returns a value of type `physinfo`.
116116

117117
```diff
118118
index b4579862ff..491bd3fc73 100644

doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ to call:
6464
[wait_xen_free_mem](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L236-L272)
6565
to wait (if necessary), for the Xen memory scrubber to catch up reclaiming memory.
6666
It
67-
1. calls `Xenctrl.physinfo` which returns:
67+
1. calls `Xenctrlext.physinfo` which returns:
6868
- `hostinfo.free_pages` - the free and already scrubbed pages (available)
6969
- `host.scrub_pages` - the not yet scrubbed pages (not yet available)
7070
2. repeats this until a timeout as long as `free_pages` is *lower*

ocaml/libs/xenctrl-ext/xenctrlext.ml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,8 @@ let domain_claim_pages handle domid ?(numa_node = NumaNode.none) nr_pages =
131131
let get_nr_nodes handle =
132132
let info = numainfo handle in
133133
Array.length info.memory
134+
135+
let physinfo xc =
136+
let info = Xenctrl.physinfo xc in
137+
let emergency_reserve_pages = Nativeint.shift_left 1n 16 in
138+
{info with free_pages= Nativeint.sub info.free_pages emergency_reserve_pages}

ocaml/libs/xenctrl-ext/xenctrlext.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,5 @@ val domain_claim_pages : handle -> domid -> ?numa_node:NumaNode.t -> int -> unit
107107

108108
val get_nr_nodes : handle -> int
109109
(** Returns the count of NUMA nodes available in the system. *)
110+
111+
val physinfo : Xenctrl.handle -> Xenctrl.physinfo

ocaml/squeezed/src/squeeze_xen.ml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ let make_host ~verbose ~xc =
579579
pages -- this might cause something else to fail (eg domain builder?) *)
580580
while
581581
Int64.div
582-
((Xenctrl.physinfo xc).Xenctrl.scrub_pages |> Int64.of_nativeint)
582+
((Xenctrlext.physinfo xc).Xenctrl.scrub_pages |> Int64.of_nativeint)
583583
1024L
584584
<> 0L
585585
do
@@ -762,7 +762,7 @@ let make_host ~verbose ~xc =
762762
(* For the host free memory we sum the free pages and the pages needing
763763
scrubbing: we don't want to adjust targets simply because the scrubber is
764764
slow. *)
765-
let physinfo = Xenctrl.physinfo xc in
765+
let physinfo = Xenctrlext.physinfo xc in
766766
let free_pages_kib =
767767
Xenctrl.pages_to_kib (Int64.of_nativeint physinfo.Xenctrl.free_pages)
768768
and scrub_pages_kib =

ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ let dss_pcpus xc =
184184
let len = Array.length !physcpus in
185185
let newinfos =
186186
if len = 0 then (
187-
let physinfo = Xenctrl.physinfo xc in
187+
let physinfo = Xenctrlext.physinfo xc in
188188
let pcpus = physinfo.Xenctrl.nr_cpus in
189189
physcpus := if pcpus > 0 then Array.make pcpus 0L else [||] ;
190190
Xenctrl.pcpu_info xc pcpus
@@ -237,7 +237,7 @@ let count_power_state_running_domains domains =
237237
0 domains
238238

239239
let dss_hostload xc domains =
240-
let physinfo = Xenctrl.physinfo xc in
240+
let physinfo = Xenctrlext.physinfo xc in
241241
let pcpus = physinfo.Xenctrl.nr_cpus in
242242
let rec sum acc n f =
243243
match n with n when n >= 0 -> sum (acc + f n) (n - 1) f | _ -> acc
@@ -298,7 +298,7 @@ let _ =
298298
let _, domains, _ = Xenctrl_lib.domain_snapshot xc in
299299
Process.initialise () ;
300300
(* Share one page per PCPU and dom each *)
301-
let physinfo = Xenctrl.physinfo xc in
301+
let physinfo = Xenctrlext.physinfo xc in
302302
let shared_page_count =
303303
physinfo.Xenctrl.nr_cpus
304304
+ Int.max Rrd_interface.max_supported_vms (List.length domains)

ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ let generate_host_sources xc counters =
169169
in
170170
let memory_reclaimed = bytes_of_kib memory_reclaimed in
171171
let memory_possibly_reclaimed = bytes_of_kib memory_possibly_reclaimed in
172-
let physinfo = Xenctrl.physinfo xc in
172+
let physinfo = Xenctrlext.physinfo xc in
173173
let total_kib =
174174
Xenctrl.pages_to_kib (Int64.of_nativeint physinfo.Xenctrl.total_pages)
175175
in

ocaml/xenopsd/xc/domain.ml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ let wait_xen_free_mem ~xc ?(maximum_wait_time_seconds = 64) required_memory_kib
235235
: bool =
236236
let open Memory in
237237
let rec wait accumulated_wait_time_seconds =
238-
let host_info = Xenctrl.physinfo xc in
238+
let host_info = Xenctrlext.physinfo xc in
239239
let free_memory_kib =
240240
kib_of_pages (Int64.of_nativeint host_info.Xenctrl.free_pages)
241241
in
@@ -275,7 +275,7 @@ let wait_xen_free_mem ~xc ?(maximum_wait_time_seconds = 64) required_memory_kib
275275
let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept
276276
num_of_vbds num_of_vifs =
277277
let open Xenctrl in
278-
let host_info = Xenctrl.physinfo xc in
278+
let host_info = Xenctrlext.physinfo xc in
279279

280280
(* Confirm that the running hypervisor supports a specific capability. *)
281281
let assert_capability cap ~on_error =

ocaml/xenopsd/xc/emu_manager.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ let non_debug_receive ?debug_callback cnx =
205205
let open Memory in
206206
let open Int64 in
207207
let open Xenctrl in
208-
let p = Xenctrl.physinfo xc in
208+
let p = Xenctrlext.physinfo xc in
209209
error "Memory F %Ld KiB S %Ld KiB T %Ld MiB"
210210
(p.free_pages |> of_nativeint |> kib_of_pages)
211211
(p.scrub_pages |> of_nativeint |> kib_of_pages)

ocaml/xenopsd/xc/memory_breakdown.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ let print_memory_field_names () =
217217

218218
(** Prints memory field values to the console. *)
219219
let print_memory_field_values xc xs =
220-
let host = Xenctrl.physinfo xc in
220+
let host = Xenctrlext.physinfo xc in
221221
let control_domain_info = Xenctrl.domain_getinfo xc 0 in
222222
let control_domain_id = control_domain_info.Xenctrl.handle in
223223
let guests =

0 commit comments

Comments
 (0)