diff --git a/Cargo.lock b/Cargo.lock index d6ca24643..b0f4b00b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -72,6 +72,7 @@ dependencies = [ "kvm-ioctls", "libc", "smbios", + "tdx", "utils", "vm-memory", ] @@ -763,18 +764,16 @@ dependencies = [ [[package]] name = "kvm-bindings" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4933174d0cc4b77b958578cd45784071cc5ae212c2d78fbd755aaaa6dfa71a" +version = "0.9.0" +source = "git+https://github.com/jakecorrenti/kvm-bindings.git?branch=tdx#97032dd33d1509045a4195f3068df3255808d390" dependencies = [ "vmm-sys-util", ] [[package]] name = "kvm-ioctls" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "337d1afa126368bbd6a5c328048f71a69a737e9afe7e436b392a8f8d770c9171" +version = "0.16.0" +source = "git+https://github.com/jakecorrenti/kvm-ioctls.git?branch=tdx#b2cbebff5d59f73d8f6fde899fb69b100235823c" dependencies = [ "bitflags 2.6.0", "kvm-bindings", @@ -816,6 +815,7 @@ dependencies = [ "once_cell", "polly", "utils", + "vm-memory", "vmm", ] @@ -1487,6 +1487,19 @@ version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" +[[package]] +name = "tdx" +version = "0.1.0" +source = "git+https://github.com/jakecorrenti/tdx.git?branch=libkrun-modifications#a6dec7d231edee81adf329baf5864a1d0f3724ff" +dependencies = [ + "bitflags 2.6.0", + "kvm-bindings", + "kvm-ioctls", + "libc", + "uuid", + "vmm-sys-util", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -1669,6 +1682,7 @@ name = "vmm" version = "0.1.0" dependencies = [ "arch", + "arch_gen", "bzip2", "codicon", "cpuid", @@ -1692,6 +1706,7 @@ dependencies = [ "serde", "serde_json", "sev", + "tdx", "utils", "vm-memory", "vmm-sys-util", diff --git a/Makefile b/Makefile index df39daabd..d926ffee5 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,8 @@ SNP_INIT_SRC = init/tee/snp_attest.c \ init/tee/snp_attest.h \ $(KBS_INIT_SRC) \ +TDX_INIT_SRC = $(KBS_INIT_SRC) + KBS_LD_FLAGS = -lcurl -lidn2 -lssl -lcrypto -lzstd -lz -lbrotlidec-static \ -lbrotlicommon-static @@ -27,6 +29,14 @@ ifeq ($(SEV),1) INIT_SRC += $(SNP_INIT_SRC) BUILD_INIT = 0 endif +ifeq ($(TDX),1) + VARIANT = -tdx + FEATURE_FLAGS := --features intel-tdx,tee,blk,kbs-types,serde,serde_json,curl + INIT_DEFS += -DTDX=1 + INIT_DEFS += $(KBS_LD_FLAGS) + INIT_SRC += $(KBS_INIT_SRC) + BUILD_INIT = 0 +endif ifeq ($(GPU),1) FEATURE_FLAGS += --features gpu endif @@ -91,6 +101,9 @@ $(LIBRARY_RELEASE_$(OS)): $(INIT_BINARY) ifeq ($(SEV),1) mv target/release/libkrun.so target/release/$(KRUN_BASE_$(OS)) endif +ifeq ($(TDX),1) + mv target/release/libkrun.so target/release/$(KRUN_BASE_$(OS)) +endif ifeq ($(OS),Darwin) ifeq ($(EFI),1) install_name_tool -id libkrun-efi.dylib target/release/libkrun.dylib @@ -103,6 +116,9 @@ $(LIBRARY_DEBUG_$(OS)): $(INIT_BINARY) cargo build $(FEATURE_FLAGS) ifeq ($(SEV),1) mv target/debug/libkrun.so target/debug/$(KRUN_BASE_$(OS)) +endif +ifeq ($(TDX),1) + mv target/debug/libkrun.so target/debug/$(KRUN_BASE_$(OS)) endif cp target/debug/$(KRUN_BASE_$(OS)) $(LIBRARY_DEBUG_$(OS)) diff --git a/examples/Makefile b/examples/Makefile index 3eedfa471..1cdbe2fdf 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -4,6 +4,7 @@ LDFLAGS_x86_64_Linux = -lkrun LDFLAGS_aarch64_Linux = -lkrun LDFLAGS_arm64_Darwin = -L/opt/homebrew/lib -lkrun LDFLAGS_sev = -lkrun-sev +LDFLAGS_tdx = -lkrun-tdx LDFLAGS_efi = -L/opt/homebrew/lib -lkrun-efi CFLAGS = -O2 -g -I../include ROOTFS_DISTRO := fedora @@ -15,6 +16,9 @@ EXAMPLES := chroot_vm external_kernel ifeq ($(SEV),1) EXAMPLES := launch-tee endif +ifeq ($(TDX),1) + EXAMPLES := launch-tee +endif ifeq ($(EFI),1) EXAMPLES := boot_efi endif @@ -28,7 +32,12 @@ ifeq ($(OS),Darwin) endif launch-tee: launch-tee.c +ifeq ($(SEV),1) gcc -o $@ $< $(CFLAGS) $(LDFLAGS_sev) +endif +ifeq ($(TDX),1) + gcc -o $@ $< $(CFLAGS) $(LDFLAGS_tdx) +endif boot_efi: boot_efi.c gcc -o $@ $< $(CFLAGS) $(LDFLAGS_efi) diff --git a/examples/launch-tee.c b/examples/launch-tee.c index d7867e84d..063cdd5f3 100644 --- a/examples/launch-tee.c +++ b/examples/launch-tee.c @@ -120,6 +120,12 @@ int main(int argc, char *const argv[]) return -1; } + if (err = krun_split_irqchip(ctx_id, true)) { + errno = -err; + perror("Error setting split IRQCHIP property"); + return -1; + } + // Start and enter the microVM. Unless there is some error while creating the microVM // this function never returns. if (err = krun_start_enter(ctx_id)) { diff --git a/examples/tdx-config-noattest.json b/examples/tdx-config-noattest.json new file mode 100644 index 000000000..5a80f15bd --- /dev/null +++ b/examples/tdx-config-noattest.json @@ -0,0 +1,8 @@ +{ + "workload_id": "tdxtest", + "cpus": 1, + "ram_mib": 2048, + "tee": "tdx", + "tee_data": "{\"vendor_chain\": \"\", \"attestation_server_pubkey\": \"\"}", + "attestation_url": "" +} diff --git a/src/arch/Cargo.toml b/src/arch/Cargo.toml index 589c789ae..63284c0fb 100644 --- a/src/arch/Cargo.toml +++ b/src/arch/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [features] tee = [] amd-sev = [ "tee" ] +intel-tdx = [ "tee", "tdx" ] efi = [] [dependencies] @@ -18,8 +19,9 @@ smbios = { path = "../smbios" } utils = { path = "../utils" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings = { git = "https://github.com/jakecorrenti/kvm-bindings.git", branch = "tdx", features = ["fam-wrappers"]} +kvm-ioctls = { git = "https://github.com/jakecorrenti/kvm-ioctls.git", branch = "tdx"} +tdx = { git = "https://github.com/jakecorrenti/tdx.git", branch = "libkrun-modifications", optional = true } [dev-dependencies] utils = { path = "../utils" } diff --git a/src/arch/src/x86_64/mod.rs b/src/arch/src/x86_64/mod.rs index 4253554fc..427b4c0e3 100644 --- a/src/arch/src/x86_64/mod.rs +++ b/src/arch/src/x86_64/mod.rs @@ -51,7 +51,10 @@ pub enum Error { // Where BIOS/VGA magic would live on a real PC. const EBDA_START: u64 = 0x9fc00; +#[cfg(not(feature = "intel-tdx"))] pub const RESET_VECTOR: u64 = 0xfff0; +#[cfg(feature = "intel-tdx")] +pub const RESET_VECTOR: u64 = 0xffff_fff0; pub const RESET_VECTOR_SEV_AP: u64 = 0xfff3; pub const BIOS_START: u64 = 0xffff_0000; pub const BIOS_SIZE: usize = 65536; @@ -268,12 +271,20 @@ pub fn configure_system( params.0.hdr.ramdisk_size = initrd_config.size as u32; } - #[cfg(feature = "tee")] + #[cfg(all(feature = "tee", not(feature = "intel-tdx")))] { params.0.hdr.syssize = num_cpus as u32; } - add_e820_entry(&mut params.0, 0, EBDA_START, E820_RAM)?; + #[cfg(feature = "intel-tdx")] + { + // number of 4k pages + params.0.hdr.syssize = (arch_memory_info.ram_last_addr / 4096) as u32; + // nuymber of vCPUs + params.0.hdr.root_flags = num_cpus as u16; + } + + add_e820_entry(&mut params.0, 0, EBDA_START - 0x10000, E820_RAM)?; let last_addr = GuestAddress(arch_memory_info.ram_last_addr); if last_addr < end_32bit_gap_start { diff --git a/src/cpuid/Cargo.toml b/src/cpuid/Cargo.toml index 41c53aee0..0ec0af57a 100644 --- a/src/cpuid/Cargo.toml +++ b/src/cpuid/Cargo.toml @@ -8,5 +8,5 @@ edition = "2021" vmm-sys-util = ">=0.11" [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings = { git = "https://github.com/jakecorrenti/kvm-bindings.git", branch = "tdx", features = ["fam-wrappers"]} +kvm-ioctls = { git = "https://github.com/jakecorrenti/kvm-ioctls.git", branch = "tdx"} diff --git a/src/cpuid/src/transformer/intel.rs b/src/cpuid/src/transformer/intel.rs index bd0de741a..af86f2413 100644 --- a/src/cpuid/src/transformer/intel.rs +++ b/src/cpuid/src/transformer/intel.rs @@ -17,11 +17,56 @@ pub fn update_feature_info_entry( common::update_feature_info_entry(entry, vm_spec)?; + if entry.index == 0x1 { + println!("adjusting 0x1 index feature"); + entry.ecx &= (1 << 21); + } + entry.ecx.write_bit(ecx::TSC_DEADLINE_TIMER_BITINDEX, true); Ok(()) } +pub fn update_kvm_features(entry: &mut kvm_cpuid_entry2, vm_spec: &VmSpec) -> Result<(), Error> { + // KVM feature bits + const NOP_IO_RELAY: u32 = 1; + const PV_UNHALT: u32 = 1; + const PV_TLB_FLUSH: u32 = 9; + const PV_SEND_IPI: u32 = 11; + const POLL_CONTROL: u32 = 12; + const PV_SCHED_YIELD: u32 = 13; + const MSI_EXT_DEST_ID: u32 = 15; + + // These features are not supported by TDX + entry.eax &= (1 << NOP_IO_RELAY) + | (1 << PV_UNHALT) + | (1 << PV_TLB_FLUSH) + | (1 << PV_SEND_IPI) + | (1 << POLL_CONTROL) + | (1 << PV_SCHED_YIELD) + | (1 << MSI_EXT_DEST_ID); + Ok(()) +} + +pub fn update_0xd_for_tdx(entry: &mut kvm_cpuid_entry2, vm_spec: &VmSpec) -> Result<(), Error> { + if entry.function == 0xD && entry.index == 0 { + const XFEATURE_MASK_XTILE: u32 = (1 << 17) | (1 << 18); + if (entry.eax & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE { + entry.eax &= !XFEATURE_MASK_XTILE; + } + } + + if entry.function == 0xD && entry.index == 1 { + entry.ecx &= !(1 << 15); + const XFEATURE_MASK_CET: u32 = (1 << 11) | (1 << 12); + if entry.ecx & XFEATURE_MASK_CET > 0 { + entry.ecx |= XFEATURE_MASK_CET; + } + } + + Ok(()) +} + fn update_deterministic_cache_entry( entry: &mut kvm_cpuid_entry2, vm_spec: &VmSpec, @@ -146,6 +191,8 @@ impl CpuidTransformer for IntelCpuidTransformer { leaf_0x6::LEAF_NUM => Some(intel::update_power_management_entry), leaf_0xa::LEAF_NUM => Some(intel::update_perf_mon_entry), leaf_0xb::LEAF_NUM => Some(intel::update_extended_cache_topology_entry), + leaf_0xd::LEAF_NUM => Some(intel::update_0xd_for_tdx), + 0x4000_0001 => Some(intel::update_kvm_features), 0x8000_0002..=0x8000_0004 => Some(common::update_brand_string_entry), _ => None, } diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index a78c5ad16..91e4df597 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -43,8 +43,8 @@ lru = ">=0.9" [target.'cfg(target_os = "linux")'.dependencies] rutabaga_gfx = { path = "../rutabaga_gfx", features = ["x"], optional = true } caps = "0.5.5" -kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings = { git = "https://github.com/jakecorrenti/kvm-bindings.git", branch = "tdx", features = ["fam-wrappers"]} +kvm-ioctls = { git = "https://github.com/jakecorrenti/kvm-ioctls.git", branch = "tdx"} [target.'cfg(target_arch = "aarch64")'.dependencies] -vm-fdt = ">= 0.2.0" \ No newline at end of file +vm-fdt = ">= 0.2.0" diff --git a/src/libkrun/Cargo.toml b/src/libkrun/Cargo.toml index 010a66db6..5a9efc14f 100644 --- a/src/libkrun/Cargo.toml +++ b/src/libkrun/Cargo.toml @@ -8,6 +8,7 @@ build = "build.rs" [features] tee = [] amd-sev = [ "blk", "tee" ] +intel-tdx = [ "blk", "tee" ] net = [] blk = [] efi = [ "blk", "net" ] @@ -22,6 +23,7 @@ libc = ">=0.2.39" libloading = "0.8" log = "0.4.0" once_cell = "1.4.1" +vm-memory = { version = ">=0.13", features = ["backend-mmap"] } devices = { path = "../devices" } polly = { path = "../polly" } @@ -32,8 +34,8 @@ vmm = { path = "../vmm" } hvf = { path = "../hvf" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.10", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings = { git = "https://github.com/jakecorrenti/kvm-bindings.git", branch = "tdx", features = ["fam-wrappers"]} +kvm-ioctls = { git = "https://github.com/jakecorrenti/kvm-ioctls.git", branch = "tdx"} [lib] name = "krun" diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index caf374622..84e3ea1e9 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -18,7 +18,13 @@ use std::slice; use std::sync::atomic::{AtomicI32, Ordering}; #[cfg(not(feature = "efi"))] use std::sync::LazyLock; -use std::sync::Mutex; +use std::sync::{Arc, Mutex}; + +use vm_memory::{Address, GuestAddress, GuestMemory, GuestMemoryRegion}; + +use libc::{ + fallocate, madvise, FALLOC_FL_KEEP_SIZE, FALLOC_FL_PUNCH_HOLE, MADV_DONTNEED, MADV_REMOVE, +}; #[cfg(target_os = "macos")] use crossbeam_channel::unbounded; @@ -60,10 +66,12 @@ const KRUN_SUCCESS: i32 = 0; const MAX_ARGS: usize = 4096; // krunfw library name for each context -#[cfg(all(target_os = "linux", not(feature = "amd-sev")))] +#[cfg(all(target_os = "linux", not(feature = "tee")))] const KRUNFW_NAME: &str = "libkrunfw.so.4"; #[cfg(all(target_os = "linux", feature = "amd-sev"))] const KRUNFW_NAME: &str = "libkrunfw-sev.so.4"; +#[cfg(all(target_os = "linux", feature = "intel-tdx"))] +const KRUNFW_NAME: &str = "libkrunfw-tdx.so.4"; #[cfg(all(target_os = "macos", not(feature = "efi")))] const KRUNFW_NAME: &str = "libkrunfw.4.dylib"; @@ -1484,6 +1492,9 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { #[cfg(target_os = "macos")] let (sender, receiver) = unbounded(); + #[cfg(feature = "intel-tdx")] + let (vmcall_sender, vmcall_receiver) = crossbeam_channel::unbounded(); + #[cfg(target_arch = "x86_64")] let (irq_sender, irq_receiver) = crossbeam_channel::unbounded(); @@ -1493,6 +1504,8 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { ctx_cfg.shutdown_efd, #[cfg(target_os = "macos")] sender, + #[cfg(feature = "intel-tdx")] + vmcall_sender, #[cfg(target_arch = "x86_64")] irq_sender, ) { @@ -1509,6 +1522,9 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { #[cfg(target_arch = "x86_64")] let irq_vmm = _vmm.clone(); + #[cfg(feature = "intel-tdx")] + let vmm = _vmm.clone(); + #[cfg(target_os = "macos")] if ctx_cfg.gpu_virgl_flags.is_some() { std::thread::Builder::new() @@ -1577,6 +1593,20 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { .unwrap(); } + #[cfg(feature = "intel-tdx")] + std::thread::Builder::new() + .name("vmcall worker".into()) + .spawn(move || loop { + match vmcall_receiver.recv() { + Err(e) => error!("Error in receiver: {:?}", e), + Ok((conversion_properties, evt_fd)) => { + let _ = convert_memory(&vmm, conversion_properties); + evt_fd.write(1).unwrap(); + } + } + }) + .unwrap(); + loop { match event_manager.run() { Ok(_) => {} @@ -1587,3 +1617,126 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { } } } + +#[cfg(feature = "intel-tdx")] +fn has_guest_memory( + memory_properties: &vmm::MemoryConversionProperties, + memfd_regions: &Vec, +) -> Option { + for gp in memfd_regions.iter() { + if memory_properties.gpa >= gp.gpa.0 + && memory_properties.gpa + memory_properties.size <= gp.gpa.0 + gp.size + { + return Some(gp.memfd_id as i64); + } + } + None +} + +#[cfg(feature = "intel-tdx")] +fn convert_memory( + vmm: &Arc>, + memory_properties: vmm::MemoryConversionProperties, +) -> i32 { + let mut ret = -1; + let vmm = vmm.as_ref().lock().unwrap(); + let guest_memfd_regions = vmm.guest_memfd_regions(); + + let page_size = 4096; + + // check to make sure the starting address is aligned with the page size + // check to make sure the size of the memory block is also aligned with the page size + if (memory_properties.gpa % page_size) != 0 || (memory_properties.size % page_size) != 0 { + return -1; + } + + if memory_properties.size < 1 { + return -1; + } + + let region = vmm + .guest_memory() + .find_region(GuestAddress(memory_properties.gpa)); + if let None = region { + // ignore converting non-assigned region to shared + // + // TDX requires vMMIO region to be shared to inject #VE to guest. OVMF issues + // conservatively MapGPA(shared) on 32bit PCI MMIO region, and vIO-APIC 0xFEC 4k page. OVMF + // assignes 32bit PCI MMIO region to [top of low memory: typically 2GB=0xC0000000, + // 0xFC000000) + if !memory_properties.to_private { + return 0; + } + + return -1; + } + let region = region.unwrap(); + + // retrive the memory region associated with the starting address and the size + // then check to make sure there is a guest memfd backing + let memfd_id = has_guest_memory(&memory_properties, guest_memfd_regions); + if memfd_id.is_none() { + println!("cannot convert non guest_memfd backed memory region"); + return -1; + } + let memfd_id = memfd_id.unwrap(); + + let attr = kvm_bindings::kvm_memory_attributes { + address: memory_properties.gpa, + size: memory_properties.size, + attributes: if memory_properties.to_private { + kvm_bindings::KVM_MEMORY_ATTRIBUTE_PRIVATE as u64 + } else { + 0 + }, + flags: 0, + }; + + vmm.vm_fd().set_memory_attributes(attr).unwrap(); + + let offset = memory_properties.gpa - region.start_addr().raw_value(); + if memory_properties.to_private { + // ram_block_discard_range + let host_addr = region + .get_host_address(vm_memory::MemoryRegionAddress(memory_properties.gpa)) + .unwrap(); + + unsafe { + let _ret = madvise( + host_addr as *mut libc::c_void, + memory_properties.size.try_into().unwrap(), + MADV_DONTNEED, + ); + + if _ret < 0 { + error!( + "error discarding memory range: gpa: 0x{:x} size: 0x{:x} res: {}", + memory_properties.gpa, + memory_properties.size, + std::io::Error::last_os_error() + ); + return _ret; + } + } + } else { + // ram_block_discard_guest_memfd_range + unsafe { + let _ret = fallocate( + memfd_id as i32, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + offset as i64, + memory_properties.size as i64, + ); + + if _ret < 0 { + error!( + "error discarding guest memfd memory range: {}", + std::io::Error::last_os_error() + ); + return _ret; + } + } + } + + 0 +} diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 5af297162..6114ca673 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [features] tee = [] amd-sev = [ "blk", "tee", "codicon", "kbs-types", "procfs", "rdrand", "serde", "serde_json", "sev", "curl" ] +intel-tdx = [ "blk", "tee", "kbs-types", "serde", "serde_json", "curl", "tdx" ] net = [] blk = [] efi = [ "blk", "net" ] @@ -23,6 +24,7 @@ log = "0.4.0" vm-memory = { version = ">=0.13", features = ["backend-mmap"] } arch = { path = "../arch" } +arch_gen = { path = "../arch_gen" } devices = { path = "../devices" } kernel = { path = "../kernel" } utils = { path = "../utils"} @@ -45,8 +47,9 @@ cpuid = { path = "../cpuid" } zstd = "0.13" [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.10", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +tdx = { git = "https://github.com/jakecorrenti/tdx.git", branch = "libkrun-modifications", optional = true } +kvm-bindings = { git = "https://github.com/jakecorrenti/kvm-bindings.git", branch = "tdx", features = ["fam-wrappers"]} +kvm-ioctls = { git = "https://github.com/jakecorrenti/kvm-ioctls.git", branch = "tdx"} [target.'cfg(target_os = "macos")'.dependencies] hvf = { path = "../hvf" } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 67a1511dd..44850c3bc 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -3,6 +3,7 @@ //! Enables pre-boot setup, instantiation and booting of a Firecracker VMM. +use crossbeam_channel::Sender; #[cfg(target_os = "macos")] use crossbeam_channel::{unbounded, Sender}; use kernel::cmdline::Cmdline; @@ -44,6 +45,9 @@ use hvf::MemoryMapping; #[cfg(feature = "tee")] use kbs_types::Tee; +#[cfg(feature = "intel-tdx")] +use crate::{GuestMemfdProperties, MemoryConversionProperties}; + use crate::device_manager; #[cfg(feature = "tee")] use crate::resources::TeeConfig; @@ -509,11 +513,14 @@ pub fn build_microvm( event_manager: &mut EventManager, _shutdown_efd: Option, #[cfg(target_os = "macos")] _map_sender: Sender, + #[cfg(feature = "intel-tdx")] vmcall_sender: Sender<(MemoryConversionProperties, EventFd)>, #[cfg(target_arch = "x86_64")] irq_sender: crossbeam_channel::Sender<( devices::legacy::IrqWorkerMessage, EventFd, )>, ) -> std::result::Result>, StartMicrovmError> { + #[cfg(feature = "intel-tdx")] + let mut guest_memfd_regions: Vec = vec![]; let payload = choose_payload(vm_resources)?; let (guest_memory, arch_memory_info, mut _shm_manager, payload_config) = create_guest_memory( @@ -546,14 +553,22 @@ pub fn build_microvm( let kvm = KvmContext::new() .map_err(Error::KvmContext) .map_err(StartMicrovmError::Internal)?; - let vm = setup_vm(&kvm, &guest_memory, vm_resources.tee_config())?; + let vm = setup_vm( + &kvm, + &guest_memory, + vm_resources, + #[cfg(feature = "intel-tdx")] + &mut guest_memfd_regions, + #[cfg(feature = "intel-tdx")] + vmcall_sender, + )?; (kvm, vm) }; #[cfg(feature = "tee")] let tee = vm_resources.tee_config().tee; - #[cfg(feature = "tee")] + #[cfg(feature = "amd-sev")] let snp_launcher = match tee { Tee::Snp => Some( vm.snp_secure_virt_prepare(&guest_memory) @@ -562,7 +577,16 @@ pub fn build_microvm( _ => None, }; - #[cfg(feature = "tee")] + #[cfg(feature = "intel-tdx")] + let _ = match tee { + Tee::Tdx => Some( + vm.tdx_secure_virt_prepare() + .map_err(StartMicrovmError::SecureVirtPrepare)?, + ), + _ => None, + }; + + #[cfg(all(feature = "tee", not(feature = "intel-tdx")))] let measured_regions = { println!("Injecting and measuring memory regions. This may take a while."); @@ -614,6 +638,32 @@ pub fn build_microvm( ] }; + #[cfg(feature = "intel-tdx")] + let measured_regions = { + println!("Injecting and measuring memory regions. This may take a while."); + let qboot_size = if let Some(qboot_bundle) = &vm_resources.qboot_bundle { + qboot_bundle.size + } else { + return Err(StartMicrovmError::MissingKernelConfig); + }; + let m = vec![ + MeasuredRegion { + guest_addr: 0, + host_addr: guest_memory.get_host_address(GuestAddress(0)).unwrap() as u64, + size: 0x8000_0000, + }, + MeasuredRegion { + guest_addr: arch::BIOS_START, + host_addr: guest_memory + .get_host_address(GuestAddress(arch::BIOS_START)) + .unwrap() as u64, + size: qboot_size as usize, + }, + ]; + + m + }; + // On x86_64 always create a serial device, // while on aarch64 only create it if 'console=' is specified in the boot args. let serial_device = if cfg!(feature = "efi") { @@ -632,6 +682,11 @@ pub fn build_microvm( .map_err(Error::EventFd) .map_err(StartMicrovmError::Internal)?; + #[cfg(feature = "intel-tdx")] + let memory_evt = EventFd::new(utils::eventfd::EFD_SEMAPHORE) + .map_err(Error::EventFd) + .map_err(StartMicrovmError::Internal)?; + #[cfg(target_arch = "x86_64")] // Safe to unwrap 'serial_device' as it's always 'Some' on x86_64. // x86_64 uses the i8042 reset event as the Vmm exit event. @@ -688,6 +743,10 @@ pub fn build_microvm( payload_config.entry_addr, &pio_device_manager.io_bus, &exit_evt, + #[cfg(feature = "intel-tdx")] + vm.vmcall_sender.clone(), + #[cfg(feature = "intel-tdx")] + &memory_evt, ) .map_err(StartMicrovmError::Internal)?; } @@ -756,6 +815,8 @@ pub fn build_microvm( let mut vmm = Vmm { guest_memory, + #[cfg(feature = "intel-tdx")] + guest_memfd_regions, arch_memory_info, kernel_cmdline, vcpus_handles: Vec::new(), @@ -854,6 +915,7 @@ pub fn build_microvm( #[cfg(feature = "tee")] { match tee { + #[cfg(feature = "amd-sev")] Tee::Snp => { let cpuid = kvm .fd() @@ -869,6 +931,15 @@ pub fn build_microvm( ) .map_err(StartMicrovmError::SecureVirtAttest)?; } + #[cfg(feature = "intel-tdx")] + Tee::Tdx => { + vmm.kvm_vm() + .tdx_secure_virt_prepare_memory(&measured_regions) + .map_err(StartMicrovmError::SecureVirtPrepare)?; + vmm.kvm_vm() + .tdx_secure_virt_finalize_vm() + .map_err(StartMicrovmError::SecureVirtPrepare)?; + } _ => return Err(StartMicrovmError::InvalidTee), } @@ -1294,14 +1365,27 @@ pub(crate) fn setup_vm( pub(crate) fn setup_vm( kvm: &KvmContext, guest_memory: &GuestMemoryMmap, - tee_config: &TeeConfig, + resources: &super::resources::VmResources, + #[cfg(feature = "intel-tdx")] guest_memfd_regions: &mut Vec, + #[cfg(feature = "intel-tdx")] vmcall_sender: Sender<(MemoryConversionProperties, EventFd)>, ) -> std::result::Result { - let mut vm = Vm::new(kvm.fd(), tee_config) - .map_err(Error::Vm) - .map_err(StartMicrovmError::Internal)?; - vm.memory_init(guest_memory, kvm.max_memslots()) - .map_err(Error::Vm) - .map_err(StartMicrovmError::Internal)?; + let mut vm = Vm::new( + kvm.fd(), + resources.tee_config(), + resources.vcpu_config().vcpu_count, + #[cfg(feature = "intel-tdx")] + vmcall_sender, + ) + .map_err(Error::Vm) + .map_err(StartMicrovmError::Internal)?; + vm.memory_init( + guest_memory, + kvm.max_memslots(), + #[cfg(feature = "intel-tdx")] + guest_memfd_regions, + ) + .map_err(Error::Vm) + .map_err(StartMicrovmError::Internal)?; Ok(vm) } #[cfg(target_os = "macos")] @@ -1448,6 +1532,8 @@ fn create_vcpus_x86_64( entry_addr: GuestAddress, io_bus: &devices::Bus, exit_evt: &EventFd, + #[cfg(feature = "intel-tdx")] vmcall_sender: Sender<(MemoryConversionProperties, EventFd)>, + #[cfg(feature = "intel-tdx")] memory_evt: &EventFd, ) -> super::Result> { let mut vcpus = Vec::with_capacity(vcpu_config.vcpu_count as usize); for cpu_index in 0..vcpu_config.vcpu_count { @@ -1458,12 +1544,19 @@ fn create_vcpus_x86_64( vm.supported_msrs().clone(), io_bus.clone(), exit_evt.try_clone().map_err(Error::EventFd)?, + #[cfg(feature = "intel-tdx")] + vmcall_sender.clone(), + #[cfg(feature = "intel-tdx")] + memory_evt.try_clone().map_err(Error::EventFd)?, ) .map_err(Error::Vcpu)?; vcpu.configure_x86_64(guest_mem, entry_addr, vcpu_config) .map_err(Error::Vcpu)?; + #[cfg(feature = "intel-tdx")] + vcpu.tdx_secure_virt_init().map_err(Error::Vcpu)?; + vcpus.push(vcpu); } Ok(vcpus) diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index afae58552..1e748d42e 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -190,10 +190,26 @@ pub trait VmmEventsObserver { /// Shorthand result type for internal VMM commands. pub type Result = std::result::Result; +#[cfg(feature = "intel-tdx")] +pub struct GuestMemfdProperties { + pub gpa: vm_memory::GuestAddress, + pub size: u64, + pub memfd_id: u64, +} + +#[cfg(feature = "intel-tdx")] +pub struct MemoryConversionProperties { + pub gpa: u64, + pub size: u64, + pub to_private: bool, +} + /// Contains the state and associated methods required for the Firecracker VMM. pub struct Vmm { // Guest VM core resources. guest_memory: GuestMemoryMmap, + #[cfg(feature = "intel-tdx")] + guest_memfd_regions: Vec, arch_memory_info: ArchMemoryInfo, kernel_cmdline: KernelCmdline, @@ -326,6 +342,15 @@ impl Vmm { &self.guest_memory } + #[cfg(feature = "intel-tdx")] + pub fn guest_memfd_regions(&self) -> &Vec { + &self.guest_memfd_regions + } + + pub fn vm_fd(&self) -> &kvm_ioctls::VmFd { + &self.vm.fd() + } + /// Injects CTRL+ALT+DEL keystroke combo in the i8042 device. #[cfg(target_arch = "x86_64")] pub fn send_ctrl_alt_del(&mut self) -> Result<()> { diff --git a/src/vmm/src/linux/tee/inteltdx.rs b/src/vmm/src/linux/tee/inteltdx.rs new file mode 100644 index 000000000..ac4a30505 --- /dev/null +++ b/src/vmm/src/linux/tee/inteltdx.rs @@ -0,0 +1,77 @@ +use tdx::launch::{TdxCapabilities, TdxVm}; + +use kvm_ioctls::VmFd; + +use std::fs::File; + +#[derive(Debug)] +pub enum Error { + CreateTdxVmStruct, + GetCapabilities, + InitVm, + InitMemoryRegions(i32), + FinalizeVm, +} + +pub struct IntelTdx { + caps: TdxCapabilities, + vm: TdxVm, +} + +impl IntelTdx { + pub fn new(vm_fd: &VmFd, vcpu_count: u8) -> Result { + let vm = TdxVm::new(vm_fd, vcpu_count as u64) + .or_else(|_| return Err(Error::CreateTdxVmStruct))?; + let caps = vm + .get_capabilities(vm_fd) + .or_else(|_| return Err(Error::GetCapabilities))?; + + Ok(IntelTdx { caps, vm }) + } + + pub fn vm_prepare( + &self, + fd: &kvm_ioctls::VmFd, + cpuid: kvm_bindings::CpuId, + ) -> Result<(), Error> { + self.vm + .init_vm(fd, cpuid) + .or_else(|_| return Err(Error::InitVm))?; + + Ok(()) + } + + pub fn configure_td_memory( + &self, + fd: &kvm_ioctls::VmFd, + regions: &Vec, + ) -> Result<(), Error> { + for region in regions { + let ext = if arch::BIOS_START == region.guest_addr { + 1 + } else { + 0 + }; + + if let Err(e) = self.vm.init_mem_region( + fd, + region.guest_addr, + (region.size / 4096) as u64, + ext, + region.host_addr, + ) { + if e.code != libc::EAGAIN { + return Err(Error::InitMemoryRegions(e.code)); + } + } + } + + Ok(()) + } + + pub fn finalize_vm(&self, fd: &kvm_ioctls::VmFd) -> Result<(), Error> { + self.vm + .finalize(fd) + .or_else(|_| return Err(Error::FinalizeVm)) + } +} diff --git a/src/vmm/src/linux/tee/mod.rs b/src/vmm/src/linux/tee/mod.rs index cd9d2ad6d..b4274b649 100644 --- a/src/vmm/src/linux/tee/mod.rs +++ b/src/vmm/src/linux/tee/mod.rs @@ -1,2 +1,5 @@ #[cfg(feature = "amd-sev")] pub mod amdsnp; + +#[cfg(feature = "intel-tdx")] +pub mod inteltdx; diff --git a/src/vmm/src/linux/vstate.rs b/src/vmm/src/linux/vstate.rs index 0e25478c2..ca3ce7326 100644 --- a/src/vmm/src/linux/vstate.rs +++ b/src/vmm/src/linux/vstate.rs @@ -25,6 +25,9 @@ use super::super::{FC_EXIT_CODE_GENERIC_ERROR, FC_EXIT_CODE_OK}; #[cfg(feature = "amd-sev")] use super::tee::amdsnp::{AmdSnp, Error as SnpError}; +#[cfg(feature = "intel-tdx")] +use super::tee::inteltdx::{Error as TdxError, IntelTdx}; + #[cfg(feature = "tee")] use kbs_types::Tee; @@ -41,7 +44,9 @@ use kvm_bindings::{ KVM_MAX_CPUID_ENTRIES, }; use kvm_bindings::{ - kvm_userspace_memory_region, KVM_API_VERSION, KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN, + kvm_create_guest_memfd, kvm_memory_attributes, kvm_userspace_memory_region, + kvm_userspace_memory_region2, KVM_API_VERSION, KVM_MEMORY_ATTRIBUTE_PRIVATE, + KVM_MEM_GUEST_MEMFD, KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN, }; use kvm_ioctls::*; use utils::eventfd::EventFd; @@ -54,15 +59,40 @@ use vm_memory::{ #[cfg(feature = "amd-sev")] use sev::launch::snp; +#[cfg(feature = "intel-tdx")] +use crate::{GuestMemfdProperties, MemoryConversionProperties}; + /// Signal number (SIGRTMIN) used to kick Vcpus. pub(crate) const VCPU_RTSIG_OFFSET: i32 = 0; +#[cfg(feature = "intel-tdx")] +const TDG_VP_VMCALL_MAP_GPA: u64 = 0x10001; +#[cfg(feature = "intel-tdx")] +const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002; +#[cfg(feature = "intel-tdx")] +const TDG_VP_VMCALL_REPORT_FATAL_ERROR: u64 = 0x10003; +#[cfg(feature = "intel-tdx")] +const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004; +#[cfg(feature = "intel-tdx")] +const TDG_VP_VMCALL_SUCCESS: u64 = 0x0000000000000000; +#[cfg(feature = "intel-tdx")] +const TDG_VP_VMCALL_RETRY: u64 = 0x0000000000000001; +#[cfg(feature = "intel-tdx")] +const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000; +#[cfg(feature = "intel-tdx")] +const TDG_VP_VMCALL_GPA_INUSE: u64 = 0x8000000000000001; +#[cfg(feature = "intel-tdx")] +const TDG_VP_VMCALL_ALIGN_ERROR: u64 = 0x8000000000000002; + /// Errors associated with the wrappers over KVM ioctls. #[derive(Debug)] pub enum Error { #[cfg(target_arch = "x86_64")] /// A call to cpuid instruction failed. CpuId(cpuid::Error), + #[cfg(feature = "intel-tdx")] + /// Cannot create guest memfd + CreateGuestMemfd(kvm_ioctls::Error), #[cfg(target_arch = "x86_64")] /// Error configuring the floating point related registers FPUConfiguration(arch::x86_64::regs::Error), @@ -101,6 +131,12 @@ pub enum Error { REGSConfiguration(arch::x86_64::regs::Error), /// Cannot set the memory regions. SetUserMemoryRegion(kvm_ioctls::Error), + #[cfg(feature = "intel-tdx")] + /// Cannot set the memory regions. + SetUserMemoryRegion2(kvm_ioctls::Error), + #[cfg(feature = "intel-tdx")] + /// Cannot set the memory attributes + SetMemoryAttributes(kvm_ioctls::Error), /// Error creating memory map for SHM region. ShmMmap(io::Error), #[cfg(feature = "amd-sev")] @@ -112,6 +148,18 @@ pub enum Error { #[cfg(feature = "amd-sev")] /// Error attesting the Secure VM (SNP). SnpSecVirtAttest(SnpError), + #[cfg(feature = "intel-tdx")] + /// Error initializing the Trust Domain Extensions Backend (TDX) + TdxSecVirtInit(TdxError), + #[cfg(feature = "intel-tdx")] + /// Error preparing the VM for Trust Domain Extensions (TDX) + TdxSecVirtPrepare(TdxError), + #[cfg(feature = "intel-tdx")] + /// Error initializing vCPU for Trust Domain Extensions (TDX) + TdxSecVirtInitVcpu, + #[cfg(feature = "intel-tdx")] + /// Error handling vmcall for Trust Domain Extensions (TDX) + TdxHandleVmcall, #[cfg(feature = "tee")] /// The TEE specified is not supported. InvalidTee, @@ -228,6 +276,8 @@ impl Display for Error { match self { #[cfg(target_arch = "x86_64")] CpuId(e) => write!(f, "Cpuid error: {e:?}"), + #[cfg(feature = "intel-tdx")] + CreateGuestMemfd(e) => write!(f, "Failed to create guest memfd: {e:?}",), GuestMemoryMmap(e) => write!(f, "Guest memory error: {e:?}"), #[cfg(target_arch = "x86_64")] GuestMSRs(e) => write!(f, "Retrieving supported guest MSRs fails: {e:?}"), @@ -254,22 +304,43 @@ impl Display for Error { ), SetUserMemoryRegion(e) => write!(f, "Cannot set the memory regions: {e}"), ShmMmap(e) => write!(f, "Error creating memory map for SHM region: {e}"), - #[cfg(feature = "tee")] + #[cfg(feature = "intel-tdx")] + SetUserMemoryRegion2(e) => write!(f, "Cannot set the memory regions: {e:?}",), + #[cfg(feature = "intel-tdx")] + SetMemoryAttributes(e) => write!(f, "Cannot set the memory attributes: {e:?}",), + #[cfg(feature = "amd-sev")] SnpSecVirtInit(e) => write!( f, "Error initializing the Secure Virtualization Backend (SEV): {e:?}" ), - #[cfg(feature = "tee")] + #[cfg(feature = "amd-sev")] SnpSecVirtPrepare(e) => write!( f, "Error preparing the VM for Secure Virtualization (SNP): {e:?}" ), - #[cfg(feature = "tee")] + #[cfg(feature = "amd-sev")] SnpSecVirtAttest(e) => write!(f, "Error attesting the Secure VM (SNP): {e:?}"), SignalVcpu(e) => write!(f, "Failed to signal Vcpu: {e}"), + #[cfg(feature = "intel-tdx")] + TdxSecVirtInit(e) => write!( + f, + "Error initializing the Trust Domain Extensions Backend (TDX): {e:?}" + ), + #[cfg(feature = "intel-tdx")] + TdxSecVirtPrepare(e) => write!( + f, + "Error preparing the VM for Trust Domain Extensions (TDX): {e:?}" + ), + #[cfg(feature = "intel-tdx")] + TdxSecVirtInitVcpu => write!( + f, + "Error initializing vCPU for Trust Domain Extensions (TDX)" + ), + #[cfg(feature = "intel-tdx")] + TdxHandleVmcall => write!(f, "Error handling vmcall for Trust Domain Extensions (TDX)"), #[cfg(feature = "tee")] MissingTeeConfig => write!(f, "Missing TEE configuration"), #[cfg(target_arch = "x86_64")] @@ -431,8 +502,14 @@ pub struct Vm { #[cfg(feature = "amd-sev")] tee: Option, - #[cfg(feature = "amd-sev")] + #[cfg(feature = "intel-tdx")] + tdx: Option, + + #[cfg(feature = "tee")] pub tee_config: Tee, + + #[cfg(feature = "intel-tdx")] + pub vmcall_sender: crossbeam_channel::Sender<(MemoryConversionProperties, EventFd)>, } impl Vm { @@ -487,6 +564,37 @@ impl Vm { }) } + #[cfg(feature = "intel-tdx")] + pub fn new( + kvm: &Kvm, + tee_config: &TeeConfig, + vcpu_count: u8, + vmcall_sender: crossbeam_channel::Sender<(MemoryConversionProperties, EventFd)>, + ) -> Result { + // create fd for interacting with kvm-vm specific functions + let vm_fd = kvm + .create_vm_with_type(tdx::launch::KVM_X86_TDX_VM) + .map_err(Error::VmFd)?; + + let supported_cpuid = kvm + .get_supported_cpuid(KVM_MAX_CPUID_ENTRIES) + .map_err(Error::VmFd)?; + + let supported_msrs = + arch::x86_64::msr::supported_guest_msrs(kvm).map_err(Error::GuestMSRs)?; + + let tdx = IntelTdx::new(&vm_fd, vcpu_count).map_err(Error::TdxSecVirtInit)?; + Ok(Vm { + fd: vm_fd, + next_mem_slot: 0, + supported_cpuid, + supported_msrs, + tdx: Some(tdx), + tee_config: tee_config.tee, + vmcall_sender, + }) + } + /// Returns a ref to the supported `CpuId` for this Vm. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn supported_cpuid(&self) -> &CpuId { @@ -504,6 +612,7 @@ impl Vm { &mut self, guest_mem: &GuestMemoryMmap, kvm_max_memslots: usize, + #[cfg(feature = "intel-tdx")] guest_memfd_regions: &mut Vec, ) -> Result<()> { if guest_mem.num_regions() > kvm_max_memslots { return Err(Error::NotEnoughMemorySlots); @@ -512,20 +621,74 @@ impl Vm { // It's safe to unwrap because the guest address is valid. let host_addr = guest_mem.get_host_address(region.start_addr()).unwrap(); debug!("Guest memory starts at {:x?}", host_addr); - let memory_region = kvm_userspace_memory_region { - slot: self.next_mem_slot, - guest_phys_addr: region.start_addr().raw_value(), - memory_size: region.len(), - userspace_addr: host_addr as u64, - flags: 0, - }; - // Safe because we mapped the memory region, we made sure that the regions - // are not overlapping. - unsafe { + + #[cfg(feature = "intel-tdx")] + { + let gmem = kvm_create_guest_memfd { + size: region.len(), + flags: 0, + reserved: [0; 6], + }; + let gmem = self + .fd + .create_guest_memfd(gmem) + .map_err(Error::CreateGuestMemfd)?; + + let memory_region = kvm_userspace_memory_region2 { + slot: self.next_mem_slot, + flags: KVM_MEM_GUEST_MEMFD, + guest_phys_addr: region.start_addr().raw_value(), + memory_size: region.len(), + userspace_addr: host_addr as u64, + guest_memfd_offset: 0, + guest_memfd: gmem as u32, + pad1: 0, + pad2: [0; 14], + }; + + // Safe because we mapped the memory region, we made sure that the regions + // are not overlapping. + unsafe { + self.fd + .set_user_memory_region2(memory_region) + .map_err(Error::SetUserMemoryRegion2)?; + } + + let attr = kvm_memory_attributes { + address: region.start_addr().raw_value(), + size: region.len() as u64, + attributes: KVM_MEMORY_ATTRIBUTE_PRIVATE as u64, + flags: 0, + }; + self.fd - .set_user_memory_region(memory_region) - .map_err(Error::SetUserMemoryRegion)?; - }; + .set_memory_attributes(attr) + .map_err(Error::SetMemoryAttributes)?; + + guest_memfd_regions.push(GuestMemfdProperties { + gpa: region.start_addr(), + size: region.len(), + memfd_id: gmem as u64, + }); + } + + #[cfg(not(feature = "intel-tdx"))] + { + let memory_region = kvm_userspace_memory_region { + slot: self.next_mem_slot, + guest_phys_addr: region.start_addr().raw_value(), + memory_size: region.len(), + userspace_addr: host_addr as u64, + flags: 0, + }; + // Safe because we mapped the memory region, we made sure that the regions + // are not overlapping. + unsafe { + self.fd + .set_user_memory_region(memory_region) + .map_err(Error::SetUserMemoryRegion)?; + }; + } self.next_mem_slot += 1; } @@ -537,6 +700,37 @@ impl Vm { Ok(()) } + #[cfg(feature = "intel-tdx")] + pub fn tdx_secure_virt_prepare(&self) -> Result<()> { + match &self.tdx { + Some(t) => t + .vm_prepare(&self.fd, self.supported_cpuid.clone()) + .map_err(Error::TdxSecVirtPrepare), + None => Err(Error::InvalidTee), + } + } + + #[cfg(feature = "intel-tdx")] + pub fn tdx_secure_virt_prepare_memory( + &self, + regions: &Vec, + ) -> Result<()> { + match &self.tdx { + Some(t) => t + .configure_td_memory(&self.fd, ®ions) + .map_err(Error::TdxSecVirtPrepare), + None => Err(Error::InvalidTee), + } + } + + #[cfg(feature = "intel-tdx")] + pub fn tdx_secure_virt_finalize_vm(&self) -> Result<()> { + match &self.tdx { + Some(t) => t.finalize_vm(&self.fd).map_err(Error::TdxSecVirtPrepare), + None => Err(Error::InvalidTee), + } + } + #[cfg(feature = "amd-sev")] pub fn snp_secure_virt_prepare( &self, @@ -687,6 +881,12 @@ pub struct Vcpu { response_receiver: Option>, // The transmitting end of the responses channel owned by the vcpu side. response_sender: Sender, + + #[cfg(feature = "intel-tdx")] + vmcall_sender: Sender<(MemoryConversionProperties, EventFd)>, + + #[cfg(feature = "intel-tdx")] + memory_evt: EventFd, } impl Vcpu { @@ -790,6 +990,8 @@ impl Vcpu { msr_list: MsrList, io_bus: devices::Bus, exit_evt: EventFd, + #[cfg(feature = "intel-tdx")] vmcall_sender: Sender<(MemoryConversionProperties, EventFd)>, + #[cfg(feature = "intel-tdx")] memory_evt: EventFd, ) -> Result { let kvm_vcpu = vm_fd.create_vcpu(id as u64).map_err(Error::VcpuFd)?; let (event_sender, event_receiver) = unbounded(); @@ -808,6 +1010,10 @@ impl Vcpu { event_sender: Some(event_sender), response_receiver: Some(response_receiver), response_sender, + #[cfg(feature = "intel-tdx")] + vmcall_sender, + #[cfg(feature = "intel-tdx")] + memory_evt, }) } @@ -892,12 +1098,15 @@ impl Vcpu { .set_cpuid2(&self.cpuid) .map_err(Error::VcpuSetCpuid)?; - arch::x86_64::msr::setup_msrs(&self.fd).map_err(Error::MSRSConfiguration)?; - arch::x86_64::regs::setup_regs(&self.fd, kernel_start_addr.raw_value(), self.id) - .map_err(Error::REGSConfiguration)?; - arch::x86_64::regs::setup_fpu(&self.fd).map_err(Error::FPUConfiguration)?; - arch::x86_64::regs::setup_sregs(guest_mem, &self.fd, self.id) - .map_err(Error::SREGSConfiguration)?; + #[cfg(not(feature = "intel-tdx"))] + { + arch::x86_64::msr::setup_msrs(&self.fd).map_err(Error::MSRSConfiguration)?; + arch::x86_64::regs::setup_regs(&self.fd, kernel_start_addr.raw_value(), self.id) + .map_err(Error::REGSConfiguration)?; + arch::x86_64::regs::setup_fpu(&self.fd).map_err(Error::FPUConfiguration)?; + arch::x86_64::regs::setup_sregs(guest_mem, &self.fd, self.id) + .map_err(Error::SREGSConfiguration)?; + } arch::x86_64::interrupts::set_lint(&self.fd).map_err(Error::LocalIntConfiguration)?; Ok(()) } @@ -1139,6 +1348,100 @@ impl Vcpu { } Ok(VcpuEmulation::Stopped) } + VcpuExit::MemoryFault { flags, gpa, size } => { + #[cfg(feature = "intel-tdx")] + if flags & !kvm_bindings::KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 != 0 { + println!("KVM_EXIT_MEMORY_FAULT: Unknown flag {}", flags); + Err(Error::VcpuUnhandledKvmExit) + } else { + let attr = (flags & kvm_bindings::KVM_MEMORY_EXIT_FLAG_PRIVATE as u64); + + if let Err(e) = self.vmcall_sender.send(( + MemoryConversionProperties { + gpa, + size, + to_private: attr > 0, + }, + self.memory_evt.try_clone().unwrap(), + )) { + println!( + "KVM_EXIT_MEMORY_FAULT: unable to convert memory: Exit {:#?}", + e + ); + return Err(Error::VcpuUnhandledKvmExit); + } + if let Err(e) = self.memory_evt.read() { + println!( + "KVM_EXIT_MEMORY_FAULT: unable to convert memory: Exit {:#?}", + e + ); + return Err(Error::VcpuUnhandledKvmExit); + } + Ok(VcpuEmulation::Handled) + } + + #[cfg(not(feature = "intel-tdx"))] + { + error!("Received KVM_EXIT_MEMORY_FAULT: flags=0x{:x}, gpa=0x{:x}, size=0x{:x})", flags, gpa, size); + Err(Error::VcpuUnhandledKvmExit) + } + } + #[cfg(feature = "intel-tdx")] + VcpuExit::Tdx => { + let kvm_run = self.fd.get_kvm_run(); + let tdx = unsafe { &mut kvm_run.__bindgen_anon_1.tdx }; + + const KVM_EXIT_TDX_VMCALL: u32 = 1; + + // check if the exit type is KVM_EXIT_TDX_VMCALL + if tdx.type_ != KVM_EXIT_TDX_VMCALL { + error!("unknown tdx exit type 0x{:x}", tdx.type_); + return Err(Error::VcpuUnhandledKvmExit); + } + + // handle the vmcall + let mut vmcall = unsafe { tdx.u.vmcall }; + unsafe { + vmcall.__bindgen_anon_4.status_code = TDG_VP_VMCALL_INVALID_OPERAND; + + if vmcall.__bindgen_anon_2.type_ != 0 { + error!( + "unknown TDG.VP.VMCALL type 0x{:x} subfunction 0x{:x}", + vmcall.__bindgen_anon_2.type_, vmcall.__bindgen_anon_3.subfunction + ); + return Err(Error::VcpuUnhandledKvmExit); + } + + match vmcall.__bindgen_anon_3.subfunction { + TDG_VP_VMCALL_MAP_GPA => { + let ret = self.tdx_handle_map_gpa(&mut vmcall); + if ret < 0 { + return Err(Error::VcpuUnhandledKvmExit); + } + return Ok(VcpuEmulation::Handled); + } + TDG_VP_VMCALL_GET_QUOTE => { + return Ok(VcpuEmulation::Handled); + } + TDG_VP_VMCALL_REPORT_FATAL_ERROR => { + Self::tdx_handle_report_fatal_error(&mut vmcall)?; + return Err(Error::VcpuUnhandledKvmExit); + } + TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => { + Self::tdx_handle_setup_event_notify_interrupt(&mut vmcall); + return Ok(VcpuEmulation::Handled); + } + _ => { + error!( + "unknown TDG.VP.VMCALL type 0x{:x} subfunction 0x{:x}", + vmcall.__bindgen_anon_2.type_, + vmcall.__bindgen_anon_3.subfunction + ); + return Err(Error::VcpuUnhandledKvmExit); + } + } + } + } r => { // TODO: Are we sure we want to finish running a vcpu upon // receiving a vm exit that is not necessarily an error? @@ -1165,6 +1468,144 @@ impl Vcpu { } } + #[cfg(feature = "intel-tdx")] + unsafe fn tdx_handle_map_gpa( + &self, + vmcall: &mut kvm_bindings::kvm_tdx_exit__bindgen_ty_1_kvm_tdx_vmcall, + ) -> i32 { + const TDX_MAP_GPA_MAX_LEN: u64 = (64 * 1024 * 1024); + let phys_bits = unsafe { std::arch::x86_64::__cpuid(0x8000_0008).eax } & 0xff; + let mut retry = false; + let mut size = vmcall.in_r13; + let mut ret = 0; + + let shared_bit = if phys_bits > 48 { 1 << 51 } else { 1 << 47 }; + let gpa = vmcall.in_r12 & !shared_bit; + + let private: bool = if vmcall.in_r12 & shared_bit > 0 { + false + } else { + true + }; + + vmcall.__bindgen_anon_4.status_code = TDG_VP_VMCALL_INVALID_OPERAND; + + if (gpa % 4096) != 0 || (size % 4096) != 0 { + vmcall.__bindgen_anon_4.status_code = TDG_VP_VMCALL_ALIGN_ERROR; + return 0; + } + + // overflow case + if (gpa + size < gpa) { + return 0; + } + + if gpa >= (1 << phys_bits) || gpa + size >= (1 << phys_bits) { + return 0; + } + + if size > TDX_MAP_GPA_MAX_LEN { + retry = true; + size = TDX_MAP_GPA_MAX_LEN; + } + + if (size > 0) { + // convert memory + if let Err(e) = self.vmcall_sender.send(( + MemoryConversionProperties { + gpa, + size, + to_private: private, + }, + self.memory_evt.try_clone().unwrap(), + )) { + ret = -1; + } + if let Err(e) = self.memory_evt.read() { + ret = -1; + } + } + + if ret == 0 { + if retry { + vmcall.__bindgen_anon_4.status_code = TDG_VP_VMCALL_RETRY; + vmcall.out_r11 = gpa + size; + if !private { + vmcall.out_r11 |= shared_bit; + } + } else { + vmcall.__bindgen_anon_4.status_code = TDG_VP_VMCALL_SUCCESS; + } + } + + 0 + } + + #[cfg(feature = "intel-tdx")] + unsafe fn tdx_handle_setup_event_notify_interrupt( + vmcall: &mut kvm_bindings::kvm_tdx_exit__bindgen_ty_1_kvm_tdx_vmcall, + ) { + let vector = vmcall.in_r12; + + if 32 <= vector && vector >= 255 { + // normally this is where we would update the runtime state + // with the vector and the apic_id of the cpu + vmcall.__bindgen_anon_4.status_code = TDG_VP_VMCALL_SUCCESS; + } else { + vmcall.__bindgen_anon_4.status_code = TDG_VP_VMCALL_INVALID_OPERAND; + } + } + + #[cfg(feature = "intel-tdx")] + unsafe fn tdx_handle_report_fatal_error( + vmcall: &mut kvm_bindings::kvm_tdx_exit__bindgen_ty_1_kvm_tdx_vmcall, + ) -> Result<()> { + const GUEST_PANIC_INFO_TDX_MESSAGE_MAX: usize = 64; + let error_code = vmcall.in_r12; + let mut gpa: u64 = 0; + + if (error_code & 0xffff) > 0 { + error!( + "TDX: REPORT_FATAL_ERROR: invalid error code: 0x{:x}", + error_code + ); + return Err(Error::TdxHandleVmcall); + } + + let mut message = [char::default(); GUEST_PANIC_INFO_TDX_MESSAGE_MAX + 1]; + + // optional message + if vmcall.in_r14 > 0 { + message[1] = char::from_u32(u64::to_le(vmcall.in_r14) as u32).unwrap(); + message[2] = char::from_u32(u64::to_le(vmcall.in_r15) as u32).unwrap(); + message[3] = char::from_u32(u64::to_le(vmcall.in_rbx) as u32).unwrap(); + message[4] = char::from_u32(u64::to_le(vmcall.in_rdi) as u32).unwrap(); + message[5] = char::from_u32(u64::to_le(vmcall.in_rsi) as u32).unwrap(); + message[6] = char::from_u32(u64::to_le(vmcall.in_r8) as u32).unwrap(); + message[7] = char::from_u32(u64::to_le(vmcall.in_r9) as u32).unwrap(); + message[8] = char::from_u32(u64::to_le(vmcall.in_rdx) as u32).unwrap(); + message[GUEST_PANIC_INFO_TDX_MESSAGE_MAX] = '\0'; + } + + const TDX_REPORT_FATAL_ERROR_GPA_VALID: u64 = 1 << 63; + if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) > 0 { + gpa = vmcall.in_r13; + } + + let message = { + let mut s = String::new(); + message.iter_mut().for_each(|c| s.push(*c)); + s + }; + + error!( + "TDX: REPORT_FATAL_ERROR: message: {} error_code: {} gpa: 0x{:x}", + message, error_code, gpa + ); + + Ok(()) + } + /// Main loop of the vCPU thread. /// /// Runs the vCPU in KVM context in a loop. Handles KVM_EXITs then goes back in. @@ -1281,6 +1722,11 @@ impl Vcpu { StateMachine::finish() } + #[cfg(feature = "intel-tdx")] + pub fn tdx_secure_virt_init(&self) -> Result<()> { + tdx::launch::TdxVcpu::init(&self.fd, 0).or_else(|_| return Err(Error::TdxSecVirtInitVcpu)) + } + #[cfg(test)] // In tests the main/vmm thread exits without 'exit()'ing the whole process. // All channels get closed on the other side while this Vcpu thread is still running.