Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions openhcl/openhcl_boot/src/arch/x86_64/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,19 @@ pub fn setup_vtl2_memory(shim_params: &ShimParams, partition_info: &PartitionInf
}
}

// For TDVMCALL based hypercalls, take the first 2 MB region from ram_buffer for
// hypercall IO pages. ram_buffer must not be used again beyond this point
// TODO: find an approach that does not require re-using the ram_buffer
if shim_params.isolation_type == IsolationType::Tdx {
// Update the VP context stored in the page of the architectural
// reset vector, such that TDX APs start with the appropriate
// page table and execution controls
crate::arch::tdx::tdx_prepare_ap_trampoline(
shim_params
.ap_page_tables
.expect("AP page tables must be provided for TDX mailbox boot")
.start(),
);

// For TDVMCALL based hypercalls, take the first 2 MB region from ram_buffer for
// hypercall IO pages. ram_buffer must not be used again beyond this point
let free_buffer = ram_buffer.as_mut_ptr() as u64;
assert!(free_buffer % X64_LARGE_PAGE_SIZE == 0);
// SAFETY: The bottom 2MB region of the ram_buffer is unused by the shim
Expand Down
6 changes: 2 additions & 4 deletions openhcl/openhcl_boot/src/arch/x86_64/tdx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ pub fn get_tdx_tsc_reftime() -> Option<u64> {
/// Update the TdxTrampolineContext, setting the necessary control registers for AP startup,
/// and ensuring that LGDT will be skipped, so the GDT page does not need to be added to the
/// e820 entries
pub fn tdx_prepare_ap_trampoline() {
pub fn tdx_prepare_ap_trampoline(cr3: u64) {
let context_ptr: *mut TdxTrampolineContext = RESET_VECTOR_PAGE as *mut TdxTrampolineContext;
// SAFETY: The TdxTrampolineContext is known to be stored at the architectural reset vector address
let tdxcontext: &mut TdxTrampolineContext = unsafe { context_ptr.as_mut().unwrap() };
Expand All @@ -191,13 +191,11 @@ pub fn tdx_prepare_ap_trampoline() {
tdxcontext.code_selector = 0;
tdxcontext.task_selector = 0;
tdxcontext.cr0 |= x86defs::X64_CR0_PG | x86defs::X64_CR0_PE | x86defs::X64_CR0_NE;
tdxcontext.cr3 = cr3;
tdxcontext.cr4 |= x86defs::X64_CR4_PAE | x86defs::X64_CR4_MCE;
}

pub fn setup_vtl2_vp(partition_info: &PartitionInfo) {
// Update the TDX Trampoline Context for AP Startup
tdx_prepare_ap_trampoline();

for cpu in 1..partition_info.cpus.len() {
hvcall()
.tdx_enable_vp_vtl2(cpu as u32)
Expand Down
6 changes: 3 additions & 3 deletions openhcl/openhcl_boot/src/host_params/dt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -472,13 +472,13 @@ impl PartitionInfo {
));
}

// Only specify pagetables as a reserved region on TDX, as they are used
// Only specify ap pagetables as a reserved region on TDX, as they are used
// for AP startup via the mailbox protocol. On other platforms, the
// memory is free to be reclaimed.
if params.isolation_type == IsolationType::Tdx {
assert!(params.page_tables.is_some());
assert!(params.ap_page_tables.is_some());
address_space_builder = address_space_builder
.with_page_tables(params.page_tables.expect("always present on tdx"));
.with_page_tables(params.ap_page_tables.expect("always present on tdx"));
}

address_space_builder
Expand Down
16 changes: 8 additions & 8 deletions openhcl/openhcl_boot/src/host_params/shim_params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ pub struct ShimParams {
/// Memory used by the shim.
pub used: MemoryRange,
pub bounce_buffer: Option<MemoryRange>,
/// Page tables region used by the shim.
pub page_tables: Option<MemoryRange>,
/// AP page tables region used by the shim.
pub ap_page_tables: Option<MemoryRange>,
}

impl ShimParams {
Expand All @@ -129,8 +129,8 @@ impl ShimParams {
used_end,
bounce_buffer_start,
bounce_buffer_size,
page_tables_start,
page_tables_size,
ap_page_tables_start,
ap_page_tables_size,
} = raw;

let isolation_type = get_isolation_type(supported_isolation_type);
Expand All @@ -142,11 +142,11 @@ impl ShimParams {
Some(MemoryRange::new(base..base + bounce_buffer_size))
};

let page_tables = if page_tables_size == 0 {
let ap_page_tables = if ap_page_tables_size == 0 {
None
} else {
let base = shim_base_address.wrapping_add_signed(page_tables_start);
Some(MemoryRange::new(base..base + page_tables_size))
let base = shim_base_address.wrapping_add_signed(ap_page_tables_start);
Some(MemoryRange::new(base..base + ap_page_tables_size))
};

Self {
Expand All @@ -171,7 +171,7 @@ impl ShimParams {
..shim_base_address.wrapping_add_signed(used_end),
),
bounce_buffer,
page_tables,
ap_page_tables,
}
}

Expand Down
8 changes: 4 additions & 4 deletions vm/loader/loader_defs/src/shim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@ pub struct ShimParamsRaw {
pub bounce_buffer_start: i64,
/// The size of the bounce buffer range. This is 0 if unavailable.
pub bounce_buffer_size: u64,
/// The offset to the page_tables start address. This is 0 if unavailable.
pub page_tables_start: i64,
/// The size of the openhcl_boot page tables. This is 0 if unavailable.
pub page_tables_size: u64,
/// The offset to the ap_page_tables start address. This is 0 if unavailable.
pub ap_page_tables_start: i64,
/// The size of the openhcl_boot ap page tables. This is 0 if unavailable.
pub ap_page_tables_size: u64,
}

open_enum! {
Expand Down
2 changes: 1 addition & 1 deletion vm/loader/manifests/openhcl-x64-cvm-dev.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,4 @@
}
}
]
}
}
30 changes: 23 additions & 7 deletions vm/loader/page_table/src/x64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,13 @@ impl PageTableEntry {
const VALID_BITS: u64 = 0x000f_ffff_ffff_f000;

/// Set an AMD64 PDE to either represent a leaf 2MB page or PDE.
/// This sets the PTE to preset, accessed, dirty, read write execute.
pub fn set_entry(&mut self, entry_type: PageTableEntryType) {
self.entry = X64_PTE_PRESENT | X64_PTE_ACCESSED | X64_PTE_READ_WRITE;
/// This sets the PTE to preset, accessed, dirty, execute.
pub fn set_entry(&mut self, entry_type: PageTableEntryType, read_only: bool) {
if read_only {
self.entry = X64_PTE_PRESENT | X64_PTE_ACCESSED;
} else {
self.entry = X64_PTE_PRESENT | X64_PTE_ACCESSED | X64_PTE_READ_WRITE;
}

match entry_type {
PageTableEntryType::Leaf1GbPage(address) => {
Expand Down Expand Up @@ -282,6 +286,7 @@ pub struct PageTableBuilder {
local_map: Option<(u64, u64)>,
confidential_bit: Option<u32>,
map_reset_vector: bool,
read_only: bool,
}

impl PteOps for PageTableBuilder {
Expand All @@ -307,6 +312,7 @@ impl PageTableBuilder {
size: 0,
local_map: None,
confidential_bit: None,
read_only: false,
map_reset_vector: false,
}
}
Expand All @@ -333,6 +339,12 @@ impl PageTableBuilder {
self
}

/// Map all pages as read only
pub fn with_read_only(mut self, read_only: bool) -> Self {
self.read_only = read_only;
self
}

/// Build a set of X64 page tables identity mapping the given region. `size` must be less than 512GB.
/// This creates up to 3+N page tables: 1 PML4E and up to 2 PDPTE tables, and N page tables counted at 1 per GB of size,
/// for 2MB mappings.
Expand Down Expand Up @@ -504,6 +516,7 @@ pub fn build_page_tables_64(
address_bias: u64,
identity_map_size: IdentityMapSize,
pml4e_link: Option<(u64, u64)>,
read_only: bool,
) -> Vec<u8> {
// Allocate page tables. There are up to 6 total page tables:
// 1 PML4E (Level 4) (omitted if the address bias is non-zero)
Expand Down Expand Up @@ -532,13 +545,13 @@ pub fn build_page_tables_64(

// Set PML4E entry linking PML4E to PDPTE.
let output_address = page_table_gpa + pdpte_table_index as u64 * X64_PAGE_SIZE;
pml4e_table[0].set_entry(PageTableEntryType::Pde(output_address));
pml4e_table[0].set_entry(PageTableEntryType::Pde(output_address), read_only);

// Set PML4E entry to link the additional entry if specified.
if let Some((link_target_gpa, linkage_gpa)) = pml4e_link {
assert!((linkage_gpa & 0x7FFFFFFFFF) == 0);
pml4e_table[linkage_gpa as usize >> 39]
.set_entry(PageTableEntryType::Pde(link_target_gpa));
.set_entry(PageTableEntryType::Pde(link_target_gpa), read_only);
}

pdpte_table
Expand Down Expand Up @@ -568,11 +581,14 @@ pub fn build_page_tables_64(
let output_address = page_table_gpa + pde_table_index as u64 * X64_PAGE_SIZE;
let pdpte_entry = &mut pdpte_table[pdpte_index as usize];
assert!(!pdpte_entry.is_present());
pdpte_entry.set_entry(PageTableEntryType::Pde(output_address));
pdpte_entry.set_entry(PageTableEntryType::Pde(output_address), read_only);

// Set all 2MB entries in this PDE table.
for entry in pde_table.iter_mut() {
entry.set_entry(PageTableEntryType::Leaf2MbPage(current_va + address_bias));
entry.set_entry(
PageTableEntryType::Leaf2MbPage(current_va + address_bias),
read_only,
);
current_va += X64_LARGE_PAGE_SIZE;
}
}
Expand Down
1 change: 1 addition & 0 deletions vm/loader/src/linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ pub fn load_config(
0,
IdentityMapSize::Size4Gb,
None,
false,
);
assert!(page_table.len() as u64 % HV_PAGE_SIZE == 0);
importer
Expand Down
95 changes: 65 additions & 30 deletions vm/loader/src/paravisor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,32 +359,10 @@ where
}
_ => None,
};

// HACK: On TDX, the kernel uses the ACPI AP Mailbox protocol to start APs.
// However, the kernel assumes that all kernel ram is identity mapped, as
// the kernel will jump to a startup routine in any arbitrary kernel ram
// range.
//
// For now, describe 3GB of memory identity mapped in the page table used by
// the mailbox assembly stub, so the kernel can start APs regardless of how
// large the initial memory size was. An upcoming change will instead have
// the bootshim modify the pagetable at runtime to guarantee all ranges
// reported in the E820 map to kernel as ram are mapped.
//
// FUTURE: A future kernel change could remove this requirement entirely by
// making the kernel spec compliant, and only require that the reset vector
// page is identity mapped.

let page_table_mapping_size = if isolation_type == IsolationType::Tdx {
3 * 1024 * 1024 * 1024
} else {
memory_size
};

let page_table_base_page_count = 5;
let page_table_dynamic_page_count = {
// Double the count to allow for simpler reconstruction.
calculate_pde_table_count(memory_start_address, page_table_mapping_size) * 2
calculate_pde_table_count(memory_start_address, memory_size) * 2
+ local_map.map_or(0, |v| calculate_pde_table_count(v.0, v.1))
};
let page_table_isolation_page_count = match isolation_type {
Expand All @@ -405,7 +383,7 @@ where
tracing::debug!(page_table_region_start, page_table_region_size);

let mut page_table_builder = PageTableBuilder::new(page_table_region_start)
.with_mapped_region(memory_start_address, page_table_mapping_size);
.with_mapped_region(memory_start_address, memory_size);

if let Some((local_map_start, size)) = local_map {
page_table_builder = page_table_builder.with_local_map(local_map_start, size);
Expand All @@ -427,8 +405,6 @@ where
let page_table_page_base = page_table_region_start / HV_PAGE_SIZE;
assert!(page_table.len() as u64 <= page_table_region_size);

let offset = offset;

if with_relocation {
// Indicate relocation information. Don't include page table region.
importer.relocation_region(
Expand All @@ -451,6 +427,55 @@ where
)?;
}

// TDX-isolated VMs require an AP page table to boot with the mailbox protocol
//
// In the OpenHCL implementation of this protocol, we spin in the architectural reset
// vector until the kernel gives us a vector to jump to. The OpenHCL kernel can place
// this vector anywhere in the lower 4GB of GPA space, so we identity map the lower
// 4GB as R+X
let ap_page_table_region_start = offset;
let (
ap_page_table,
ap_page_table_page_base,
ap_page_table_region_start,
ap_page_table_region_size,
ap_page_table_page_count,
) = if isolation_type == IsolationType::Tdx {
let ap_page_table_size = 4 * 1024 * 1024 * 1024;

// TDX requires up to an extra 3 pages to map the reset vector as a
// 4K page.
let ap_page_table_page_count = (calculate_pde_table_count(0, ap_page_table_size)) + 3;

let ap_page_table_region_size = HV_PAGE_SIZE * ap_page_table_page_count;
offset += ap_page_table_region_size;

tracing::debug!(ap_page_table_region_start, ap_page_table_region_size);

let ap_page_table_builder = PageTableBuilder::new(page_table_region_start)
.with_mapped_region(memory_start_address, memory_size);

let ap_page_table = ap_page_table_builder
.with_read_only(true)
.with_reset_vector(true)
.build();

assert!(ap_page_table.len() as u64 % HV_PAGE_SIZE == 0);
let ap_page_table_page_base = ap_page_table_region_start / HV_PAGE_SIZE;
assert!(ap_page_table.len() as u64 <= ap_page_table_region_size);
(
Some(ap_page_table),
Some(ap_page_table_page_base),
Some(ap_page_table_region_start),
Some(ap_page_table_region_size),
Some(ap_page_table_page_count),
)
} else {
(None, None, None, None, None)
};

let offset = offset;

// The memory used by the loader must be smaller than the memory available.
if offset > memory_start_address + memory_size {
return Err(Error::NotEnoughMemory(offset - memory_start_address));
Expand Down Expand Up @@ -489,8 +514,8 @@ where
used_end: calculate_shim_offset(offset),
bounce_buffer_start: bounce_buffer.map_or(0, |r| calculate_shim_offset(r.start())),
bounce_buffer_size: bounce_buffer.map_or(0, |r| r.len()),
page_tables_start: calculate_shim_offset(page_table_region_start),
page_tables_size: page_table_region_size,
ap_page_tables_start: ap_page_table_region_start.map_or(0, |t| calculate_shim_offset(t)),
ap_page_tables_size: ap_page_table_region_size.unwrap_or(0),
};

tracing::debug!(boot_params_base, "shim gpa");
Expand All @@ -513,6 +538,16 @@ where
&page_table,
)?;

if isolation_type == IsolationType::Tdx {
importer.import_pages(
ap_page_table_page_base.expect("AP page tables are required for TDX"),
ap_page_table_page_count.expect("AP page tables are required for TDX"),
"underhill-ap-page-tables",
BootPageAcceptance::Exclusive,
&ap_page_table.expect("AP page tables are required for TDX"),
)?;
}

// Set selectors and control registers
// Setup two selectors and segment registers.
// ds, es, fs, gs, ss are linearSelector
Expand Down Expand Up @@ -1081,8 +1116,8 @@ where
used_end: calculate_shim_offset(next_addr),
bounce_buffer_start: 0,
bounce_buffer_size: 0,
page_tables_start: 0,
page_tables_size: 0,
ap_page_tables_start: 0,
ap_page_tables_size: 0,
};

importer
Expand Down
Loading