From b1e040ad5c628796403267bbaef1cdbf1334d8e9 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Mon, 9 Mar 2026 13:39:13 -0700 Subject: [PATCH 1/6] refactor: move regs submodules into x86_64 subdirectory Move debug_regs, fpu, special_regs, and standard_regs into regs/x86_64/ and re-export through a new x86_64/mod.rs. Update regs.rs to delegate to the x86_64 submodule. This prepares the regs module for architecture-specific register definitions. Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/src/hypervisor/regs.rs | 13 +++------ .../regs/{ => x86_64}/debug_regs.rs | 0 .../src/hypervisor/regs/{ => x86_64}/fpu.rs | 0 .../src/hypervisor/regs/x86_64/mod.rs | 28 +++++++++++++++++++ .../regs/{ => x86_64}/special_regs.rs | 0 .../regs/{ => x86_64}/standard_regs.rs | 0 6 files changed, 32 insertions(+), 9 deletions(-) rename src/hyperlight_host/src/hypervisor/regs/{ => x86_64}/debug_regs.rs (100%) rename src/hyperlight_host/src/hypervisor/regs/{ => x86_64}/fpu.rs (100%) create mode 100644 src/hyperlight_host/src/hypervisor/regs/x86_64/mod.rs rename src/hyperlight_host/src/hypervisor/regs/{ => x86_64}/special_regs.rs (100%) rename src/hyperlight_host/src/hypervisor/regs/{ => x86_64}/standard_regs.rs (100%) diff --git a/src/hyperlight_host/src/hypervisor/regs.rs b/src/hyperlight_host/src/hypervisor/regs.rs index 5d940ba69..ac806e658 100644 --- a/src/hyperlight_host/src/hypervisor/regs.rs +++ b/src/hyperlight_host/src/hypervisor/regs.rs @@ -14,19 +14,14 @@ See the License for the specific language governing permissions and limitations under the License. */ -mod debug_regs; -mod fpu; -mod special_regs; -mod standard_regs; +#[cfg(target_arch = "x86_64")] +mod x86_64; +#[cfg(target_arch = "x86_64")] +pub(crate) use x86_64::*; #[cfg(target_os = "windows")] use std::collections::HashSet; -pub(crate) use debug_regs::*; -pub(crate) use fpu::*; -pub(crate) use special_regs::*; -pub(crate) use standard_regs::*; - #[cfg(target_os = "windows")] #[derive(Debug, PartialEq)] pub(crate) enum FromWhpRegisterError { diff --git a/src/hyperlight_host/src/hypervisor/regs/debug_regs.rs b/src/hyperlight_host/src/hypervisor/regs/x86_64/debug_regs.rs similarity index 100% rename from src/hyperlight_host/src/hypervisor/regs/debug_regs.rs rename to src/hyperlight_host/src/hypervisor/regs/x86_64/debug_regs.rs diff --git a/src/hyperlight_host/src/hypervisor/regs/fpu.rs b/src/hyperlight_host/src/hypervisor/regs/x86_64/fpu.rs similarity index 100% rename from src/hyperlight_host/src/hypervisor/regs/fpu.rs rename to src/hyperlight_host/src/hypervisor/regs/x86_64/fpu.rs diff --git a/src/hyperlight_host/src/hypervisor/regs/x86_64/mod.rs b/src/hyperlight_host/src/hypervisor/regs/x86_64/mod.rs new file mode 100644 index 000000000..88724d95a --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/regs/x86_64/mod.rs @@ -0,0 +1,28 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +mod debug_regs; +mod fpu; +mod special_regs; +mod standard_regs; + +pub(crate) use debug_regs::*; +pub(crate) use fpu::*; +pub(crate) use special_regs::*; +pub(crate) use standard_regs::*; + +#[cfg(target_os = "windows")] +pub(crate) use super::FromWhpRegisterError; diff --git a/src/hyperlight_host/src/hypervisor/regs/special_regs.rs b/src/hyperlight_host/src/hypervisor/regs/x86_64/special_regs.rs similarity index 100% rename from src/hyperlight_host/src/hypervisor/regs/special_regs.rs rename to src/hyperlight_host/src/hypervisor/regs/x86_64/special_regs.rs diff --git a/src/hyperlight_host/src/hypervisor/regs/standard_regs.rs b/src/hyperlight_host/src/hypervisor/regs/x86_64/standard_regs.rs similarity index 100% rename from src/hyperlight_host/src/hypervisor/regs/standard_regs.rs rename to src/hyperlight_host/src/hypervisor/regs/x86_64/standard_regs.rs From 78d232d967cd78085ab31e40aee48cc48f361562 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Mon, 9 Mar 2026 13:39:53 -0700 Subject: [PATCH 2/6] refactor: move kvm.rs and mshv.rs into subdirectories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move kvm.rs → kvm/x86_64.rs and mshv.rs → mshv/x86_64.rs, adding mod.rs files that re-export through the x86_64 submodule. This prepares the virtual machine backends for architecture-specific implementations. Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/hypervisor/virtual_machine/kvm/mod.rs | 20 +++++++++++++++++++ .../virtual_machine/{kvm.rs => kvm/x86_64.rs} | 0 .../hypervisor/virtual_machine/mshv/mod.rs | 20 +++++++++++++++++++ .../{mshv.rs => mshv/x86_64.rs} | 0 4 files changed, 40 insertions(+) create mode 100644 src/hyperlight_host/src/hypervisor/virtual_machine/kvm/mod.rs rename src/hyperlight_host/src/hypervisor/virtual_machine/{kvm.rs => kvm/x86_64.rs} (100%) create mode 100644 src/hyperlight_host/src/hypervisor/virtual_machine/mshv/mod.rs rename src/hyperlight_host/src/hypervisor/virtual_machine/{mshv.rs => mshv/x86_64.rs} (100%) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/mod.rs new file mode 100644 index 000000000..3c886e176 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/mod.rs @@ -0,0 +1,20 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#[cfg(target_arch = "x86_64")] +mod x86_64; +#[cfg(target_arch = "x86_64")] +pub(crate) use x86_64::*; diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs similarity index 100% rename from src/hyperlight_host/src/hypervisor/virtual_machine/kvm.rs rename to src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/mod.rs new file mode 100644 index 000000000..3c886e176 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/mod.rs @@ -0,0 +1,20 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#[cfg(target_arch = "x86_64")] +mod x86_64; +#[cfg(target_arch = "x86_64")] +pub(crate) use x86_64::*; diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs similarity index 100% rename from src/hyperlight_host/src/hypervisor/virtual_machine/mshv.rs rename to src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs From a205a4576434fce09c8b80ad72c18890b67110c9 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Mon, 9 Mar 2026 13:40:19 -0700 Subject: [PATCH 3/6] refactor: convert hyperlight_vm.rs to module directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename hyperlight_vm.rs → hyperlight_vm/mod.rs with no content changes. This prepares for splitting architecture- specific code into separate submodules. Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/hypervisor/{hyperlight_vm.rs => hyperlight_vm/mod.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/hyperlight_host/src/hypervisor/{hyperlight_vm.rs => hyperlight_vm/mod.rs} (100%) diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs similarity index 100% rename from src/hyperlight_host/src/hypervisor/hyperlight_vm.rs rename to src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs From dcad902ae7b6068371f69a841229ccbb5e0216c5 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Mon, 9 Mar 2026 13:41:35 -0700 Subject: [PATCH 4/6] refactor: split hyperlight_vm into shared and x86_64-specific code Extract x86_64-specific methods (new, initialise, dispatch_call_from_host, reset_vcpu, get_root_pt, get_snapshot_sregs, handle_debug, crashdump_context) and the debug submodule into hyperlight_vm/x86_64.rs. Keep shared code in mod.rs: struct definition, error types, helper functions, and architecture-independent methods (map_region, unmap_region, run, handle_io, etc.). Struct fields changed from private to pub(super) so the x86_64 submodule can access them. Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/hypervisor/hyperlight_vm/mod.rs | 2312 +---------------- .../src/hypervisor/hyperlight_vm/x86_64.rs | 2288 ++++++++++++++++ 2 files changed, 2329 insertions(+), 2271 deletions(-) create mode 100644 src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs index d053d15da..0b30e2ed2 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs @@ -14,60 +14,40 @@ See the License for the specific language governing permissions and limitations under the License. */ +#[cfg(target_arch = "x86_64")] +mod x86_64; + +#[cfg(target_arch = "aarch64")] +mod aarch64; #[cfg(gdb)] use std::collections::HashMap; -#[cfg(crashdump)] -use std::path::Path; use std::str::FromStr; -#[cfg(any(kvm, mshv3))] -use std::sync::atomic::AtomicBool; -use std::sync::atomic::AtomicU8; -#[cfg(any(kvm, mshv3))] -use std::sync::atomic::AtomicU64; use std::sync::{Arc, Mutex}; use hyperlight_common::log_level::GuestLogFilter; -use tracing::{Span, instrument}; use tracing_core::LevelFilter; +use crate::HyperlightError; #[cfg(gdb)] -use super::gdb::arch::VcpuStopReasonError; +use crate::hypervisor::gdb::DebuggableVm; #[cfg(gdb)] -use super::gdb::{ - DebugCommChannel, DebugMsg, DebugResponse, DebuggableVm, GdbTargetError, VcpuStopReason, arch, -}; -use super::regs::{CommonFpu, CommonRegisters}; -#[cfg(target_os = "windows")] -use super::{PartitionState, WindowsInterruptHandle}; -use crate::HyperlightError; -#[cfg(any(kvm, mshv3))] -use crate::hypervisor::LinuxInterruptHandle; -#[cfg(crashdump)] -use crate::hypervisor::crashdump; +use crate::hypervisor::gdb::arch::VcpuStopReasonError; #[cfg(gdb)] -use crate::hypervisor::gdb::{DebugError, DebugMemoryAccessError}; +use crate::hypervisor::gdb::{ + DebugCommChannel, DebugError, DebugMsg, DebugResponse, GdbTargetError, VcpuStopReason, +}; #[cfg(gdb)] -use crate::hypervisor::hyperlight_vm::debug::ProcessDebugRequestError; -use crate::hypervisor::regs::{CommonDebugRegs, CommonSpecialRegisters}; +use crate::hypervisor::hyperlight_vm::x86_64::debug::ProcessDebugRequestError; #[cfg(not(gdb))] use crate::hypervisor::virtual_machine::VirtualMachine; -#[cfg(kvm)] -use crate::hypervisor::virtual_machine::kvm::KvmVm; -#[cfg(mshv3)] -use crate::hypervisor::virtual_machine::mshv::MshvVm; -#[cfg(target_os = "windows")] -use crate::hypervisor::virtual_machine::whp::WhpVm; use crate::hypervisor::virtual_machine::{ - HypervisorType, MapMemoryError, RegisterError, RunVcpuError, UnmapMemoryError, VmError, VmExit, - get_available_hypervisor, + MapMemoryError, RegisterError, RunVcpuError, UnmapMemoryError, VmError, VmExit, }; use crate::hypervisor::{InterruptHandle, InterruptHandleImpl}; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags, MemoryRegionType}; use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::RawPtr; use crate::mem::shared_mem::{GuestSharedMemory, HostSharedMemory, SharedMemory}; use crate::metrics::{METRIC_ERRONEOUS_VCPU_KICKS, METRIC_GUEST_CANCELLATION}; -use crate::sandbox::SandboxConfiguration; use crate::sandbox::host_funcs::FunctionRegistry; use crate::sandbox::outb::{HandleOutbError, handle_outb}; use crate::sandbox::snapshot::NextAction; @@ -131,7 +111,7 @@ fn get_max_log_level_filter(rust_log: String) -> LevelFilter { /// Converts a given [`Option`] to a `u64` value to be passed to the guest entrypoint /// If the provided filter is `None`, it uses the `RUST_LOG` environment variable to determine the /// maximum log level filter for the guest and converts it to a `u64` value. -fn get_guest_log_filter(guest_max_log_level: Option) -> u64 { +pub(super) fn get_guest_log_filter(guest_max_log_level: Option) -> u64 { let guest_log_level_filter = match guest_max_log_level { Some(level) => level, None => get_max_log_level_filter(std::env::var("RUST_LOG").unwrap_or_default()), @@ -147,40 +127,41 @@ fn get_guest_log_filter(guest_max_log_level: Option) -> u64 { /// - The vCPU execution loop and handling of VM exits (I/O, MMIO, interrupts). pub(crate) struct HyperlightVm { #[cfg(gdb)] - vm: Box, + pub(super) vm: Box, #[cfg(not(gdb))] - vm: Box, - page_size: usize, - entrypoint: NextAction, // only present if this vm has not yet been initialised - rsp_gva: u64, - interrupt_handle: Arc, + pub(super) vm: Box, + pub(super) page_size: usize, + pub(super) entrypoint: NextAction, // only present if this vm has not yet been initialised + pub(super) rsp_gva: u64, + pub(super) interrupt_handle: Arc, - next_slot: u32, // Monotonically increasing slot number - freed_slots: Vec, // Reusable slots from unmapped regions + pub(super) next_slot: u32, // Monotonically increasing slot number + pub(super) freed_slots: Vec, // Reusable slots from unmapped regions - snapshot_slot: u32, + pub(super) snapshot_slot: u32, // The current snapshot region, used to keep it alive as long as // it is used & when unmapping - snapshot_memory: Option, - scratch_slot: u32, // The slot number used for the scratch region + pub(super) snapshot_memory: Option, + pub(super) scratch_slot: u32, // The slot number used for the scratch region // The current scratch region, used to keep it alive as long as it // is used & when unmapping - scratch_memory: Option, + pub(super) scratch_memory: Option, - mmap_regions: Vec<(u32, MemoryRegion)>, // Later mapped regions (slot number, region) + pub(super) mmap_regions: Vec<(u32, MemoryRegion)>, // Later mapped regions (slot number, region) - pending_tlb_flush: bool, + pub(super) pending_tlb_flush: bool, #[cfg(gdb)] - gdb_conn: Option>, + pub(super) gdb_conn: Option>, #[cfg(gdb)] - sw_breakpoints: HashMap, // addr -> original instruction + pub(super) sw_breakpoints: HashMap, // addr -> original instruction #[cfg(feature = "mem_profile")] - trace_info: MemTraceInfo, + pub(super) trace_info: MemTraceInfo, #[cfg(crashdump)] - rt_cfg: SandboxRuntimeConfig, + pub(super) rt_cfg: SandboxRuntimeConfig, } + /// DispatchGuestCall error #[derive(Debug, thiserror::Error)] pub enum DispatchGuestCallError { @@ -412,187 +393,7 @@ pub enum HyperlightVmError { #[error("Access page table error: {0}")] AccessPageTable(#[from] AccessPageTableError), } - impl HyperlightVm { - /// Create a new HyperlightVm instance (will not run vm until calling `initialise`) - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - #[allow(clippy::too_many_arguments)] - pub(crate) fn new( - snapshot_mem: GuestSharedMemory, - scratch_mem: GuestSharedMemory, - _pml4_addr: u64, - entrypoint: NextAction, - rsp_gva: u64, - #[cfg_attr(target_os = "windows", allow(unused_variables))] config: &SandboxConfiguration, - #[cfg(gdb)] gdb_conn: Option>, - #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig, - #[cfg(feature = "mem_profile")] trace_info: MemTraceInfo, - ) -> std::result::Result { - #[cfg(gdb)] - type VmType = Box; - #[cfg(not(gdb))] - type VmType = Box; - - let vm: VmType = match get_available_hypervisor() { - #[cfg(kvm)] - Some(HypervisorType::Kvm) => Box::new(KvmVm::new().map_err(VmError::CreateVm)?), - #[cfg(mshv3)] - Some(HypervisorType::Mshv) => Box::new(MshvVm::new().map_err(VmError::CreateVm)?), - #[cfg(target_os = "windows")] - Some(HypervisorType::Whp) => Box::new(WhpVm::new().map_err(VmError::CreateVm)?), - None => return Err(CreateHyperlightVmError::NoHypervisorFound), - }; - - #[cfg(not(feature = "nanvix-unstable"))] - vm.set_sregs(&CommonSpecialRegisters::standard_64bit_defaults(_pml4_addr)) - .map_err(VmError::Register)?; - #[cfg(feature = "nanvix-unstable")] - vm.set_sregs(&CommonSpecialRegisters::standard_real_mode_defaults()) - .map_err(VmError::Register)?; - - #[cfg(any(kvm, mshv3))] - let interrupt_handle: Arc = Arc::new(LinuxInterruptHandle { - state: AtomicU8::new(0), - #[cfg(all( - target_arch = "x86_64", - target_vendor = "unknown", - target_os = "linux", - target_env = "musl" - ))] - tid: AtomicU64::new(unsafe { libc::pthread_self() as u64 }), - #[cfg(not(all( - target_arch = "x86_64", - target_vendor = "unknown", - target_os = "linux", - target_env = "musl" - )))] - tid: AtomicU64::new(unsafe { libc::pthread_self() }), - retry_delay: config.get_interrupt_retry_delay(), - sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(), - dropped: AtomicBool::new(false), - }); - - #[cfg(target_os = "windows")] - let interrupt_handle: Arc = Arc::new(WindowsInterruptHandle { - state: AtomicU8::new(0), - partition_state: std::sync::RwLock::new(PartitionState { - handle: vm.partition_handle(), - dropped: false, - }), - }); - - let snapshot_slot = 0u32; - let scratch_slot = 1u32; - #[cfg_attr(not(gdb), allow(unused_mut))] - let mut ret = Self { - vm, - entrypoint, - rsp_gva, - interrupt_handle, - page_size: 0, // Will be set in `initialise` - - next_slot: scratch_slot + 1, - freed_slots: Vec::new(), - - snapshot_slot, - snapshot_memory: None, - scratch_slot, - scratch_memory: None, - - mmap_regions: Vec::new(), - - pending_tlb_flush: false, - - #[cfg(gdb)] - gdb_conn, - #[cfg(gdb)] - sw_breakpoints: HashMap::new(), - #[cfg(feature = "mem_profile")] - trace_info, - #[cfg(crashdump)] - rt_cfg, - }; - - ret.update_snapshot_mapping(snapshot_mem)?; - ret.update_scratch_mapping(scratch_mem)?; - - // Send the interrupt handle to the GDB thread if debugging is enabled - // This is used to allow the GDB thread to stop the vCPU - #[cfg(gdb)] - if ret.gdb_conn.is_some() { - ret.send_dbg_msg(DebugResponse::InterruptHandle(ret.interrupt_handle.clone()))?; - // Add breakpoint to the entry point address, if we are going to initialise - ret.vm.set_debug(true).map_err(VmError::Debug)?; - if let NextAction::Initialise(initialise) = entrypoint { - ret.vm - .add_hw_breakpoint(initialise) - .map_err(CreateHyperlightVmError::AddHwBreakpoint)?; - } - } - - Ok(ret) - } - - /// Initialise the internally stored vCPU with the given PEB address and - /// random number seed, then run it until a HLT instruction. - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - #[allow(clippy::too_many_arguments)] - pub(crate) fn initialise( - &mut self, - peb_addr: RawPtr, - seed: u64, - page_size: u32, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - guest_max_log_level: Option, - #[cfg(gdb)] dbg_mem_access_fn: Arc>>, - ) -> std::result::Result<(), InitializeError> { - let NextAction::Initialise(initialise) = self.entrypoint else { - return Ok(()); - }; - - self.page_size = page_size as usize; - - let regs = CommonRegisters { - rip: initialise, - // We usually keep the top of the stack 16-byte - // aligned. However, the ABI requirement is that the stack - // be aligned _before a call instruction_, which means - // that the stack needs to actually be ≡ 8 mod 16 at the - // first instruction (since, on x64, a call instruction - // automatically pushes a return address). - rsp: self.rsp_gva - 8, - - // function args - rdi: peb_addr.into(), - rsi: seed, - rdx: page_size.into(), - rcx: get_guest_log_filter(guest_max_log_level), - rflags: 1 << 1, - - ..Default::default() - }; - self.vm.set_regs(®s)?; - - self.run( - mem_mgr, - host_funcs, - #[cfg(gdb)] - dbg_mem_access_fn, - ) - .map_err(InitializeError::Run)?; - - let regs = self.vm.regs()?; - // todo(portability): this is architecture-specific - if !regs.rsp.is_multiple_of(16) { - return Err(InitializeError::InvalidStackPointer(regs.rsp)); - } - self.rsp_gva = regs.rsp; - self.entrypoint = NextAction::Call(regs.rax); - - Ok(()) - } - /// Map a region of host memory into the sandbox. /// /// Safety: The caller must ensure that the region points to valid memory and @@ -689,30 +490,6 @@ impl HyperlightVm { Ok(()) } - /// Get the current base page table physical address. - /// - /// By default, reads CR3 from the vCPU special registers. - /// With `nanvix-unstable`, returns 0 (identity-mapped, no page tables). - pub(crate) fn get_root_pt(&self) -> Result { - #[cfg(not(feature = "nanvix-unstable"))] - { - let sregs = self.vm.sregs()?; - // Mask off the flags bits - Ok(sregs.cr3 & !0xfff_u64) - } - #[cfg(feature = "nanvix-unstable")] - { - Ok(0) - } - } - - /// Get the special registers that need to be stored in a snapshot. - pub(crate) fn get_snapshot_sregs( - &mut self, - ) -> Result { - Ok(self.vm.sregs()?) - } - /// Get the current stack top virtual address pub(crate) fn get_stack_top(&mut self) -> u64 { self.rsp_gva @@ -733,62 +510,6 @@ impl HyperlightVm { self.entrypoint = entrypoint } - /// Dispatch a call from the host to the guest using the given pointer - /// to the dispatch function _in the guest's address space_. - /// - /// Do this by setting the instruction pointer to `dispatch_func_addr` - /// and then running the execution loop until a halt instruction. - /// - /// Returns `Ok` if the call succeeded, and an `Err` if it failed - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn dispatch_call_from_host( - &mut self, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - #[cfg(gdb)] dbg_mem_access_fn: Arc>>, - ) -> std::result::Result<(), DispatchGuestCallError> { - let NextAction::Call(dispatch_func_addr) = self.entrypoint else { - return Err(DispatchGuestCallError::Uninitialized); - }; - let mut rflags = 1 << 1; // RFLAGS.1 is RES1 - if self.pending_tlb_flush { - rflags |= 1 << 6; // set ZF if we need a tlb flush done before anything else executes - self.pending_tlb_flush = false; - } - // set RIP and RSP, reset others - let regs = CommonRegisters { - rip: dispatch_func_addr, - // We usually keep the top of the stack 16-byte - // aligned. Since the usual ABI requirement is that the - // stack be aligned _before a call instruction_, one might - // expect that the stack pointer here needs to actually be - // ≡ 8 mod 16 at the first instruction (since, on x64, a - // call instruction automatically pushes a return - // address). However, the x64 entry stub in - // hyperlight_guest::arch::dispatch handles this itself, - // so we do use the aligned address here. - rsp: self.rsp_gva, - rflags, - ..Default::default() - }; - self.vm - .set_regs(®s) - .map_err(DispatchGuestCallError::SetupRegs)?; - - // reset fpu - self.vm - .set_fpu(&CommonFpu::default()) - .map_err(DispatchGuestCallError::SetupRegs)?; - - self.run( - mem_mgr, - host_funcs, - #[cfg(gdb)] - dbg_mem_access_fn, - ) - .map_err(DispatchGuestCallError::Run) - } - pub(crate) fn interrupt_handle(&self) -> Arc { self.interrupt_handle.clone() } @@ -797,7 +518,7 @@ impl HyperlightVm { self.interrupt_handle.clear_cancel(); } - fn run( + pub(super) fn run( &mut self, mem_mgr: &mut SandboxMemoryManager, host_funcs: &Arc>, @@ -880,8 +601,12 @@ impl HyperlightVm { _ => 0, }; // Handle debug event (breakpoints) - let stop_reason = - arch::vcpu_stop_reason(self.vm.as_mut(), dr6, initialise, exception)?; + let stop_reason = crate::hypervisor::gdb::arch::vcpu_stop_reason( + self.vm.as_mut(), + dr6, + initialise, + exception, + )?; if let Err(e) = self.handle_debug(dbg_mem_access_fn.clone(), stop_reason) { break Err(e.into()); } @@ -976,7 +701,7 @@ impl HyperlightVm { Err(e) => { #[cfg(crashdump)] if self.rt_cfg.guest_core_dump { - crashdump::generate_crashdump(self, mem_mgr, None) + crate::hypervisor::crashdump::generate_crashdump(self, mem_mgr, None) .map_err(|e| RunVmError::CrashdumpGeneration(Box::new(e)))?; } @@ -1024,267 +749,6 @@ impl HyperlightVm { Ok(()) } - - /// Resets the following vCPU state: - /// - General purpose registers - /// - Debug registers - /// - XSAVE (includes FPU/SSE state with proper FCW and MXCSR defaults) - /// - Special registers (restored from snapshot, with CR3 updated to new page table location) - // TODO: check if other state needs to be reset - pub(crate) fn reset_vcpu( - &mut self, - cr3: u64, - sregs: &CommonSpecialRegisters, - ) -> std::result::Result<(), RegisterError> { - self.vm.set_regs(&CommonRegisters { - rflags: 1 << 1, // Reserved bit always set - ..Default::default() - })?; - self.vm.set_debug_regs(&CommonDebugRegs::default())?; - self.vm.reset_xsave()?; - - #[cfg(not(feature = "nanvix-unstable"))] - { - // Restore the full special registers from snapshot, but update CR3 - // to point to the new (relocated) page tables - let mut sregs = *sregs; - sregs.cr3 = cr3; - self.pending_tlb_flush = true; - self.vm.set_sregs(&sregs)?; - } - #[cfg(feature = "nanvix-unstable")] - { - let _ = (cr3, sregs); // suppress unused warnings - // TODO: This is probably not correct. - // Let's deal with it when we clean up the nanvix-unstable feature - self.vm - .set_sregs(&CommonSpecialRegisters::standard_real_mode_defaults())?; - } - - Ok(()) - } - - // Handle a debug exit - #[cfg(gdb)] - fn handle_debug( - &mut self, - dbg_mem_access_fn: Arc>>, - stop_reason: VcpuStopReason, - ) -> std::result::Result<(), HandleDebugError> { - use crate::hypervisor::gdb::DebugMemoryAccess; - use crate::hypervisor::hyperlight_vm::debug::ProcessDebugRequestError; - - if self.gdb_conn.is_none() { - return Err(HandleDebugError::DebugNotEnabled); - } - - let mem_access = DebugMemoryAccess { - // TODO: dbg_mem_access_fn could be out of sync with the - // actual snapshot/scratch regions, if a snapshot restore - // has caused either of those to change. - dbg_mem_access_fn, - guest_mmap_regions: self.get_mapped_regions().cloned().collect(), - }; - - match stop_reason { - // If the vCPU stopped because of a crash, we need to handle it differently - // We do not want to allow resuming execution or placing breakpoints - // because the guest has crashed. - // We only allow reading registers and memory - VcpuStopReason::Crash => { - self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason))?; - - loop { - log::debug!("Debug wait for event to resume vCPU"); - // Wait for a message from gdb - let req = self.recv_dbg_msg()?; - - // Flag to store if we should deny continue or step requests - let mut deny_continue = false; - // Flag to store if we should detach from the gdb session - let mut detach = false; - - let response = match req { - // Allow the detach request to disable debugging by continuing resuming - // hypervisor crash error reporting - DebugMsg::DisableDebug => { - detach = true; - DebugResponse::DisableDebug - } - // Do not allow continue or step requests - DebugMsg::Continue | DebugMsg::Step => { - deny_continue = true; - DebugResponse::NotAllowed - } - // Do not allow adding/removing breakpoints and writing to memory or registers - DebugMsg::AddHwBreakpoint(_) - | DebugMsg::AddSwBreakpoint(_) - | DebugMsg::RemoveHwBreakpoint(_) - | DebugMsg::RemoveSwBreakpoint(_) - | DebugMsg::WriteAddr(_, _) - | DebugMsg::WriteRegisters(_) => DebugResponse::NotAllowed, - - // For all other requests, we will process them normally - _ => { - let result = self.process_dbg_request(req, &mem_access); - match result { - Ok(response) => response, - // Treat non-fatal errors separately so the guest doesn't fail - Err(ProcessDebugRequestError::ReadMemory( - DebugMemoryAccessError::TranslateGuestAddress(_), - )) - | Err(ProcessDebugRequestError::Debug(DebugError::TranslateGva( - _, - ))) => DebugResponse::ErrorOccurred, - Err(e) => { - log::error!("Error processing debug request: {:?}", e); - return Err(HandleDebugError::ProcessRequest(e)); - } - } - } - }; - - // Send the response to the request back to gdb - self.send_dbg_msg(response)?; - - // If we are denying continue or step requests, the debugger assumes the - // execution started so we need to report a stop reason as a crash and let - // it request to read registers/memory to figure out what happened - if deny_continue { - self.send_dbg_msg(DebugResponse::VcpuStopped(VcpuStopReason::Crash))?; - } - - // If we are detaching, we will break the loop and the Hypervisor will continue - // to handle the Crash reason - if detach { - break; - } - } - } - // If the vCPU stopped because of any other reason except a crash, we can handle it - // normally - _ => { - // Send the stop reason to the gdb thread - self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason))?; - - loop { - log::debug!("Debug wait for event to resume vCPU"); - // Wait for a message from gdb - let req = self.recv_dbg_msg()?; - - let result = self.process_dbg_request(req, &mem_access); - - let response = match result { - Ok(response) => response, - // Treat non-fatal errors separately so the guest doesn't fail - Err(ProcessDebugRequestError::ReadMemory( - DebugMemoryAccessError::TranslateGuestAddress(_), - )) - | Err(ProcessDebugRequestError::Debug(DebugError::TranslateGva(_))) => { - DebugResponse::ErrorOccurred - } - Err(e) => { - return Err(HandleDebugError::ProcessRequest(e)); - } - }; - - let cont = matches!( - response, - DebugResponse::Continue | DebugResponse::Step | DebugResponse::DisableDebug - ); - - self.send_dbg_msg(response)?; - - // Check if we should continue execution - // We continue if the response is one of the following: Step, Continue, or DisableDebug - if cont { - break; - } - } - } - } - - Ok(()) - } - - #[cfg(crashdump)] - pub(crate) fn crashdump_context( - &self, - mem_mgr: &mut SandboxMemoryManager, - ) -> std::result::Result, CrashDumpError> { - if self.rt_cfg.guest_core_dump { - let mut regs = [0; 27]; - - let vcpu_regs = self.vm.regs()?; - let sregs = self.vm.sregs()?; - let xsave = self.vm.xsave()?; - - // Set up the registers for the crash dump - regs[0] = vcpu_regs.r15; // r15 - regs[1] = vcpu_regs.r14; // r14 - regs[2] = vcpu_regs.r13; // r13 - regs[3] = vcpu_regs.r12; // r12 - regs[4] = vcpu_regs.rbp; // rbp - regs[5] = vcpu_regs.rbx; // rbx - regs[6] = vcpu_regs.r11; // r11 - regs[7] = vcpu_regs.r10; // r10 - regs[8] = vcpu_regs.r9; // r9 - regs[9] = vcpu_regs.r8; // r8 - regs[10] = vcpu_regs.rax; // rax - regs[11] = vcpu_regs.rcx; // rcx - regs[12] = vcpu_regs.rdx; // rdx - regs[13] = vcpu_regs.rsi; // rsi - regs[14] = vcpu_regs.rdi; // rdi - regs[15] = 0; // orig rax - regs[16] = vcpu_regs.rip; // rip - regs[17] = sregs.cs.selector as u64; // cs - regs[18] = vcpu_regs.rflags; // eflags - regs[19] = vcpu_regs.rsp; // rsp - regs[20] = sregs.ss.selector as u64; // ss - regs[21] = sregs.fs.base; // fs_base - regs[22] = sregs.gs.base; // gs_base - regs[23] = sregs.ds.selector as u64; // ds - regs[24] = sregs.es.selector as u64; // es - regs[25] = sregs.fs.selector as u64; // fs - regs[26] = sregs.gs.selector as u64; // gs - - // Get the filename from the binary path - let filename = self.rt_cfg.binary_path.clone().and_then(|path| { - Path::new(&path) - .file_name() - .and_then(|name| name.to_os_string().into_string().ok()) - }); - - // Use the stored entry point address from the runtime config. - // This is the original entry point (load_addr + ELF entry offset) - // which GDB needs for AT_ENTRY to compute the PIE load offset. - // We cannot use self.entrypoint here because it transitions from - // Initialise(addr) to Call(dispatch_addr) after guest init. - let initialise = self.rt_cfg.entry_point.unwrap_or_else(|| { - tracing::warn!( - "entry_point was never set in SandboxRuntimeConfig; AT_ENTRY will be 0" - ); - 0 - }); - let mmap_regions: Vec = self.get_mapped_regions().cloned().collect(); - let root_pt = self.get_root_pt()?; - - let regions = mem_mgr - .get_guest_memory_regions(root_pt, &mmap_regions) - .map_err(|e| CrashDumpError::AccessPageTable(Box::new(e)))?; - - Ok(Some(crashdump::CrashDumpContext::new( - regions, - regs, - xsave.to_vec(), - initialise, - self.rt_cfg.binary_path.clone(), - filename, - ))) - } else { - Ok(None) - } - } } impl Drop for HyperlightVm { @@ -1314,1697 +778,3 @@ fn get_memory_access_violation<'a>( // Treat as a generic access violation for now, unsure if this is reachable. None } - -#[cfg(gdb)] -mod debug { - use hyperlight_common::mem::PAGE_SIZE; - - use super::HyperlightVm; - use crate::hypervisor::gdb::arch::{SW_BP, SW_BP_SIZE}; - use crate::hypervisor::gdb::{ - DebugError, DebugMemoryAccess, DebugMemoryAccessError, DebugMsg, DebugResponse, - }; - use crate::hypervisor::virtual_machine::VmError; - - /// Errors that can occur during GDB debug request processing - #[derive(Debug, thiserror::Error)] - pub enum ProcessDebugRequestError { - #[error("Debug is not enabled")] - DebugNotEnabled, - #[error("Failed to acquire lock at {0}:{1}")] - TryLockError(&'static str, u32), - #[error("VM operation error: {0}")] - Vm(#[from] VmError), - #[error("Debug operation error: {0}")] - Debug(#[from] DebugError), - #[error("Address {0:#x} is not a software breakpoint")] - SwBreakpointNotFound(u64), - #[error("Failed to read memory: {0}")] - ReadMemory(#[from] DebugMemoryAccessError), - #[error("Failed to write memory: {0}")] - WriteMemory(DebugMemoryAccessError), - } - - impl HyperlightVm { - pub(crate) fn process_dbg_request( - &mut self, - req: DebugMsg, - mem_access: &DebugMemoryAccess, - ) -> std::result::Result { - if self.gdb_conn.is_some() { - match req { - DebugMsg::AddHwBreakpoint(addr) => Ok(DebugResponse::AddHwBreakpoint( - self.vm - .add_hw_breakpoint(addr) - .map_err(|e| { - log::error!("Failed to add hw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::AddSwBreakpoint(addr) => Ok(DebugResponse::AddSwBreakpoint( - self.add_sw_breakpoint(addr, mem_access) - .map_err(|e| { - log::error!("Failed to add sw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::Continue => { - self.vm.set_single_step(false).map_err(|e| { - log::error!("Failed to continue execution: {:?}", e); - - e - })?; - - Ok(DebugResponse::Continue) - } - DebugMsg::DisableDebug => { - self.vm.set_debug(false).map_err(|e| { - log::error!("Failed to disable debugging: {:?}", e); - e - })?; - - Ok(DebugResponse::DisableDebug) - } - DebugMsg::GetCodeSectionOffset => { - let offset = mem_access - .dbg_mem_access_fn - .try_lock() - .map_err(|_| ProcessDebugRequestError::TryLockError(file!(), line!()))? - .layout - .get_guest_code_address(); - - Ok(DebugResponse::GetCodeSectionOffset(offset as u64)) - } - DebugMsg::ReadAddr(addr, len) => { - let mut data = vec![0u8; len]; - - self.read_addrs(addr, &mut data, mem_access).map_err(|e| { - log::error!("Failed to read from address: {:?}", e); - - e - })?; - - Ok(DebugResponse::ReadAddr(data)) - } - DebugMsg::ReadRegisters => { - let regs = self.vm.regs().map_err(VmError::Register)?; - let fpu = self.vm.fpu().map_err(VmError::Register)?; - Ok(DebugResponse::ReadRegisters(Box::new((regs, fpu)))) - } - DebugMsg::RemoveHwBreakpoint(addr) => Ok(DebugResponse::RemoveHwBreakpoint( - self.vm - .remove_hw_breakpoint(addr) - .map_err(|e| { - log::error!("Failed to remove hw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::RemoveSwBreakpoint(addr) => Ok(DebugResponse::RemoveSwBreakpoint( - self.remove_sw_breakpoint(addr, mem_access) - .map_err(|e| { - log::error!("Failed to remove sw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::Step => { - self.vm.set_single_step(true).map_err(|e| { - log::error!("Failed to enable step instruction: {:?}", e); - - e - })?; - - Ok(DebugResponse::Step) - } - DebugMsg::WriteAddr(addr, data) => { - self.write_addrs(addr, &data, mem_access).map_err(|e| { - log::error!("Failed to write to address: {:?}", e); - - e - })?; - - Ok(DebugResponse::WriteAddr) - } - DebugMsg::WriteRegisters(boxed_regs) => { - let (regs, fpu) = boxed_regs.as_ref(); - self.vm.set_regs(regs).map_err(VmError::Register)?; - self.vm.set_fpu(fpu).map_err(VmError::Register)?; - - Ok(DebugResponse::WriteRegisters) - } - } - } else { - Err(ProcessDebugRequestError::DebugNotEnabled) - } - } - - pub(crate) fn recv_dbg_msg( - &mut self, - ) -> std::result::Result { - use super::RecvDbgMsgError; - - let gdb_conn = self - .gdb_conn - .as_mut() - .ok_or(RecvDbgMsgError::DebugNotEnabled)?; - - Ok(gdb_conn.recv()?) - } - - pub(crate) fn send_dbg_msg( - &mut self, - cmd: DebugResponse, - ) -> std::result::Result<(), super::SendDbgMsgError> { - use super::SendDbgMsgError; - - log::debug!("Sending {:?}", cmd); - - let gdb_conn = self - .gdb_conn - .as_mut() - .ok_or(SendDbgMsgError::DebugNotEnabled)?; - - Ok(gdb_conn.send(cmd)?) - } - - fn read_addrs( - &mut self, - mut gva: u64, - mut data: &mut [u8], - mem_access: &DebugMemoryAccess, - ) -> std::result::Result<(), ProcessDebugRequestError> { - let data_len = data.len(); - log::debug!("Read addr: {:X} len: {:X}", gva, data_len); - - while !data.is_empty() { - let gpa = self.vm.translate_gva(gva)?; - - let read_len = std::cmp::min( - data.len(), - (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(), - ); - - mem_access.read(&mut data[..read_len], gpa)?; - - data = &mut data[read_len..]; - gva += read_len as u64; - } - - Ok(()) - } - - /// Copies the data from the provided slice to the guest memory address - /// The address is checked to be a valid guest address - fn write_addrs( - &mut self, - mut gva: u64, - mut data: &[u8], - mem_access: &DebugMemoryAccess, - ) -> std::result::Result<(), ProcessDebugRequestError> { - let data_len = data.len(); - log::debug!("Write addr: {:X} len: {:X}", gva, data_len); - - while !data.is_empty() { - let gpa = self.vm.translate_gva(gva)?; - - let write_len = std::cmp::min( - data.len(), - (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(), - ); - - // Use the memory access to write to guest memory - mem_access - .write(&data[..write_len], gpa) - .map_err(ProcessDebugRequestError::WriteMemory)?; - - data = &data[write_len..]; - gva += write_len as u64; - } - - Ok(()) - } - - // Must be idempotent! - fn add_sw_breakpoint( - &mut self, - gva: u64, - mem_access: &DebugMemoryAccess, - ) -> std::result::Result<(), ProcessDebugRequestError> { - // Check if breakpoint already exists - if self.sw_breakpoints.contains_key(&gva) { - return Ok(()); - } - - // Write breakpoint OP code to write to guest memory - let mut save_data = [0; SW_BP_SIZE]; - self.read_addrs(gva, &mut save_data[..], mem_access)?; - self.write_addrs(gva, &SW_BP, mem_access)?; - - // Save guest memory to restore when breakpoint is removed - self.sw_breakpoints.insert(gva, save_data[0]); - - Ok(()) - } - - fn remove_sw_breakpoint( - &mut self, - gva: u64, - mem_access: &DebugMemoryAccess, - ) -> std::result::Result<(), ProcessDebugRequestError> { - if let Some(saved_data) = self.sw_breakpoints.remove(&gva) { - // Restore saved data to the guest's memory - self.write_addrs(gva, &[saved_data], mem_access)?; - - Ok(()) - } else { - Err(ProcessDebugRequestError::SwBreakpointNotFound(gva)) - } - } - } -} - -#[cfg(test)] -#[cfg(not(feature = "nanvix-unstable"))] -#[allow(clippy::needless_range_loop)] -mod tests { - use std::sync::{Arc, Mutex}; - - use hyperlight_common::vmem::{self, BasicMapping, Mapping, MappingKind}; - use rand::RngExt; - - use super::*; - #[cfg(kvm)] - use crate::hypervisor::regs::FP_CONTROL_WORD_DEFAULT; - use crate::hypervisor::regs::{CommonSegmentRegister, CommonTableRegister, MXCSR_DEFAULT}; - use crate::hypervisor::virtual_machine::VirtualMachine; - use crate::mem::layout::SandboxMemoryLayout; - use crate::mem::memory_region::{GuestMemoryRegion, MemoryRegionFlags}; - use crate::mem::mgr::{GuestPageTableBuffer, SandboxMemoryManager}; - use crate::mem::ptr::RawPtr; - use crate::mem::shared_mem::ExclusiveSharedMemory; - use crate::sandbox::SandboxConfiguration; - use crate::sandbox::host_funcs::FunctionRegistry; - #[cfg(any(crashdump, gdb))] - use crate::sandbox::uninitialized::SandboxRuntimeConfig; - use crate::sandbox::uninitialized_evolve::set_up_hypervisor_partition; - - /// Test context holding an initialized VM with memory manager. - /// Used by tests that need to interact with guest memory after execution. - struct TestVmContext { - vm: HyperlightVm, - hshm: SandboxMemoryManager, - host_funcs: Arc>, - #[cfg(gdb)] - dbg_mem_access_hdl: Arc>>, - } - - // ========================================================================== - // Dirty State Builders - Create non-default vCPU state for testing reset - // ========================================================================== - - /// Build dirty general purpose registers for testing reset_vcpu. - fn dirty_regs() -> CommonRegisters { - CommonRegisters { - rax: 0x1111111111111111, - rbx: 0x2222222222222222, - rcx: 0x3333333333333333, - rdx: 0x4444444444444444, - rsi: 0x5555555555555555, - rdi: 0x6666666666666666, - rsp: 0x7777777777777777, - rbp: 0x8888888888888888, - r8: 0x9999999999999999, - r9: 0xAAAAAAAAAAAAAAAA, - r10: 0xBBBBBBBBBBBBBBBB, - r11: 0xCCCCCCCCCCCCCCCC, - r12: 0xDDDDDDDDDDDDDDDD, - r13: 0xEEEEEEEEEEEEEEEE, - r14: 0xFFFFFFFFFFFFFFFF, - r15: 0x0123456789ABCDEF, - rip: 0xFEDCBA9876543210, - rflags: 0x202, // IF + reserved bit 1 - } - } - - /// Build dirty FPU state for testing reset_vcpu. - fn dirty_fpu() -> CommonFpu { - CommonFpu { - fpr: [[0xAB; 16]; 8], - fcw: 0x0F7F, // Different from default 0x037F - fsw: 0x1234, - ftwx: 0xAB, - last_opcode: 0x0123, - last_ip: 0xDEADBEEF00000000, - last_dp: 0xCAFEBABE00000000, - xmm: [[0xCD; 16]; 16], - mxcsr: 0x3F80, // Different from default 0x1F80 - } - } - - /// Build dirty special registers for testing reset_vcpu. - /// Must be consistent for 64-bit long mode (CR0/CR4/EFER). - fn dirty_sregs(_pml4_addr: u64) -> CommonSpecialRegisters { - let segment = CommonSegmentRegister { - base: 0x1000, - limit: 0xFFFF, - selector: 0x10, - type_: 3, // data segment, read/write, accessed - present: 1, - dpl: 0, - db: 1, - s: 1, - l: 0, - g: 1, - avl: 1, - unusable: 0, - padding: 0, - }; - // CS segment - 64-bit code segment - let cs_segment = CommonSegmentRegister { - base: 0, - limit: 0xFFFF, - selector: 0x08, - type_: 0b1011, // code segment, execute/read, accessed - present: 1, - dpl: 0, - db: 0, // must be 0 in 64-bit mode - s: 1, - l: 1, // 64-bit mode - g: 1, - avl: 0, - unusable: 0, - padding: 0, - }; - let table = CommonTableRegister { - base: 0xDEAD0000, - limit: 0xFFFF, - }; - CommonSpecialRegisters { - cs: cs_segment, - ds: segment, - es: segment, - fs: segment, - gs: segment, - ss: segment, - tr: CommonSegmentRegister { - type_: 0b1011, // busy TSS - present: 1, - ..segment - }, - ldt: segment, - gdt: table, - idt: table, - cr0: 0x80000011, // PE + ET + PG - cr2: 0xBADC0DE, - // MSHV validates cr3 and rejects bogus values; use valid _pml4_addr for MSHV - cr3: match get_available_hypervisor() { - #[cfg(mshv3)] - Some(HypervisorType::Mshv) => _pml4_addr, - _ => 0x12345000, - }, - cr4: 0x20, // PAE - cr8: 0x5, - efer: 0x500, // LME + LMA - apic_base: 0xFEE00900, - interrupt_bitmap: [0; 4], // fails if non-zero on MSHV - } - } - - /// Build dirty debug registers for testing reset_vcpu. - /// - /// DR6 bit layout (Intel SDM / AMD APM): - /// Bits 0-3 (B0-B3): Breakpoint condition detected - software writable/clearable - /// Bits 4-10: Reserved, read as 1s on modern processors (read-only) - /// Bit 11 (BLD): Bus Lock Trap - cleared by processor, read-only on older CPUs - /// Bit 12: Reserved, always 0 - /// Bit 13 (BD): Debug Register Access Detected - software clearable - /// Bit 14 (BS): Single-Step - software clearable - /// Bit 15 (BT): Task Switch breakpoint - software clearable - /// Bit 16 (RTM): TSX-related, read-only (1 if no TSX) - /// Bits 17-31: Reserved, read as 1s on modern processors (read-only) - /// Bits 32-63: Reserved, must be 0 - /// - /// Writable bits: 0-3, 13, 14, 15 = mask 0xE00F - /// Reserved 1s: 4-10, 11 (if no BLD), 16 (if no TSX), 17-31 = ~0xE00F on lower 32 bits - const DR6_WRITABLE_MASK: u64 = 0xE00F; // B0-B3, BD, BS, BT - - /// DR7 bit layout: - /// Bits 0-7 (L0-L3, G0-G3): Local/global breakpoint enables - writable - /// Bits 8-9 (LE, GE): Local/Global Exact (386 only, ignored on modern) - writable - /// Bit 10: Reserved, must be 1 (read-only) - /// Bits 11-12: Reserved (RTM/TSX on some CPUs), must be 0 (read-only) - /// Bit 13 (GD): General Detect Enable - writable - /// Bits 14-15: Reserved, must be 0 (read-only) - /// Bits 16-31 (R/W0-3, LEN0-3): Breakpoint conditions and lengths - writable - /// Bits 32-63: Reserved, must be 0 (read-only) - /// - /// Writable bits: 0-9, 13, 16-31 = mask 0xFFFF23FF - const DR7_WRITABLE_MASK: u64 = 0xFFFF_23FF; - - fn dirty_debug_regs() -> CommonDebugRegs { - CommonDebugRegs { - dr0: 0xDEADBEEF00001000, - dr1: 0xDEADBEEF00002000, - dr2: 0xDEADBEEF00003000, - dr3: 0xDEADBEEF00004000, - // Set all writable bits: B0-B3 (0-3), BD (13), BS (14), BT (15) - dr6: DR6_WRITABLE_MASK, - // Set writable bits: L0-L3, G0-G3 (0-7), LE/GE (8-9), GD (13), conditions (16-31) - dr7: DR7_WRITABLE_MASK, - } - } - - /// Returns default test values for reset_vcpu parameters. - /// Uses standard 64-bit defaults since reset_vcpu now restores full sregs from snapshot. - fn default_sregs() -> CommonSpecialRegisters { - CommonSpecialRegisters::standard_64bit_defaults(0) - } - - // ========================================================================== - // Normalizers - Handle hypervisor-specific quirks when comparing vCPU state - // ========================================================================== - - /// Normalize debug registers for comparison by applying writable masks. - /// Reserved bits in DR6/DR7 are read-only (set by CPU), so we copy them from actual. - fn normalize_debug_regs(expected: &mut CommonDebugRegs, actual: &CommonDebugRegs) { - expected.dr6 = (expected.dr6 & DR6_WRITABLE_MASK) | (actual.dr6 & !DR6_WRITABLE_MASK); - expected.dr7 = (expected.dr7 & DR7_WRITABLE_MASK) | (actual.dr7 & !DR7_WRITABLE_MASK); - } - - /// Normalize segment hidden cache fields that hypervisors report differently. - /// Applies to: unusable, granularity (g), and ss.db fields. - /// Does NOT normalize type_ - use this when verifying explicitly-set dirty state. - fn normalize_sregs_hidden_cache( - expected: &mut CommonSpecialRegisters, - actual: &CommonSpecialRegisters, - ) { - expected.ss.db = actual.ss.db; - expected.cs.unusable = actual.cs.unusable; - expected.cs.g = actual.cs.g; - expected.ds.unusable = actual.ds.unusable; - expected.ds.g = actual.ds.g; - expected.es.unusable = actual.es.unusable; - expected.es.g = actual.es.g; - expected.fs.unusable = actual.fs.unusable; - expected.fs.g = actual.fs.g; - expected.gs.unusable = actual.gs.unusable; - expected.gs.g = actual.gs.g; - expected.ss.unusable = actual.ss.unusable; - expected.ss.g = actual.ss.g; - expected.tr.unusable = actual.tr.unusable; - expected.tr.g = actual.tr.g; - expected.ldt.unusable = actual.ldt.unusable; - expected.ldt.g = actual.ldt.g; - } - - /// Normalize sregs for verifying reset state. - /// - /// Handles hypervisor-specific differences in segment descriptor fields: - /// - Hidden cache fields (unusable, granularity bits) vary between KVM/MSHV/WHP - /// - For unusable segments (DS/ES/FS/GS/SS in 64-bit mode), the type_ field - /// is ignored by the CPU and varies between hypervisors - fn normalize_sregs_for_reset( - expected: &mut CommonSpecialRegisters, - actual: &CommonSpecialRegisters, - ) { - normalize_sregs_hidden_cache(expected, actual); - // type_ for unusable segments: hypervisors return different defaults - // (KVM returns type_=1, WHP returns type_=0). - expected.ds.type_ = actual.ds.type_; - expected.es.type_ = actual.es.type_; - expected.fs.type_ = actual.fs.type_; - expected.gs.type_ = actual.gs.type_; - expected.ss.type_ = actual.ss.type_; - } - - /// Normalize sregs for tests that run actual guest code. - /// - /// Handles hypervisor-specific differences in segment descriptor fields: - /// - Hidden cache fields (unusable, db) vary between KVM/MSHV/WHP - /// - For unusable segments (DS/ES/FS/GS/SS in 64-bit mode), the type_ field - /// is ignored by the CPU and varies between hypervisors - fn normalize_sregs_for_run_tests( - expected: &mut CommonSpecialRegisters, - actual: &CommonSpecialRegisters, - ) { - expected.ss.db = actual.ss.db; - expected.cs.unusable = actual.cs.unusable; - expected.ds.unusable = actual.ds.unusable; - expected.ds.type_ = actual.ds.type_; - expected.es.unusable = actual.es.unusable; - expected.es.type_ = actual.es.type_; - expected.fs.unusable = actual.fs.unusable; - expected.fs.type_ = actual.fs.type_; - expected.gs.unusable = actual.gs.unusable; - expected.gs.type_ = actual.gs.type_; - expected.ss.unusable = actual.ss.unusable; - expected.ss.type_ = actual.ss.type_; - expected.tr.unusable = actual.tr.unusable; - expected.ldt.unusable = actual.ldt.unusable; - } - - /// Normalize FPU MXCSR for KVM quirk. - /// KVM doesn't preserve MXCSR via set_fpu/fpu(), so we need to set it manually - /// when comparing FPU state. - #[cfg_attr(not(kvm), allow(unused_variables))] - fn normalize_fpu_mxcsr_for_kvm(fpu: &mut CommonFpu, expected_mxcsr: u32) { - #[cfg(kvm)] - if *get_available_hypervisor().as_ref().unwrap() == HypervisorType::Kvm { - fpu.mxcsr = expected_mxcsr; - } - } - - /// Normalize FPU state for reset comparison. - /// - /// When ftwx == 0, all x87 FPU registers are marked empty. In this state: - /// - `fpr`: Contents are architecturally undefined since registers are empty - /// - `last_ip`, `last_dp`, `last_opcode`: Track the last FPU instruction location. - /// On WHP, the register read API may return stale values even after - /// reset_xsave() properly zeroes the XSAVE area. This is a WHP API quirk - - /// the guest-visible state (via FXSAVE/XSAVE instructions) IS properly reset. - /// - /// IMPORTANT: The `reset_vcpu_fpu_guest_visible_state` test verifies actual - /// guest-visible FPU state by running real guest code with FXSAVE, providing - /// defense-in-depth against hypervisor API quirks masking real issues. - fn normalize_fpu_for_reset(expected: &mut CommonFpu, actual: &CommonFpu) { - if actual.ftwx == 0 { - expected.fpr = actual.fpr; - expected.last_ip = actual.last_ip; - expected.last_dp = actual.last_dp; - expected.last_opcode = actual.last_opcode; - } - } - - // ========================================================================== - // Assertion Helpers - Verify vCPU state after reset - // ========================================================================== - - /// Assert that debug registers are in reset state. - /// Reserved bits in DR6/DR7 are read-only (set by CPU), so we only check - /// that writable bits are cleared to 0 and DR0-DR3 are zeroed. - fn assert_debug_regs_reset(vm: &dyn VirtualMachine) { - let debug_regs = vm.debug_regs().unwrap(); - let expected = CommonDebugRegs { - dr0: 0, - dr1: 0, - dr2: 0, - dr3: 0, - dr6: debug_regs.dr6 & !DR6_WRITABLE_MASK, - dr7: debug_regs.dr7 & !DR7_WRITABLE_MASK, - }; - assert_eq!(debug_regs, expected); - } - - /// Assert that general-purpose registers are in reset state. - /// After reset, all registers should be zeroed except rflags which has - /// reserved bit 1 always set. - fn assert_regs_reset(vm: &dyn VirtualMachine) { - assert_eq!( - vm.regs().unwrap(), - CommonRegisters { - rflags: 1 << 1, // Reserved bit 1 is always set - ..Default::default() - } - ); - } - - /// Assert that FPU state is in reset state. - /// Handles hypervisor-specific quirks (KVM MXCSR, empty FPU registers). - fn assert_fpu_reset(vm: &dyn VirtualMachine) { - let fpu = vm.fpu().unwrap(); - let mut expected_fpu = CommonFpu::default(); - normalize_fpu_mxcsr_for_kvm(&mut expected_fpu, fpu.mxcsr); - normalize_fpu_for_reset(&mut expected_fpu, &fpu); - assert_eq!(fpu, expected_fpu); - } - - /// Assert that special registers are in reset state. - /// Handles hypervisor-specific differences in hidden descriptor cache fields. - fn assert_sregs_reset(vm: &dyn VirtualMachine, pml4_addr: u64) { - let defaults = CommonSpecialRegisters::standard_64bit_defaults(pml4_addr); - let sregs = vm.sregs().unwrap(); - let mut expected_sregs = defaults; - // Normalize hypervisor implementation-specific fields. - // These are part of the hidden descriptor cache. While guests can write them - // indirectly (by loading segments from a crafted GDT), guests cannot read them back - // (e.g., `mov ax, ds` only returns the selector, not the hidden cache). - // KVM and MSHV reset to different default values, but both properly reset so there's - // no information leakage between tenants. - normalize_sregs_for_reset(&mut expected_sregs, &sregs); - assert_eq!(sregs, expected_sregs); - } - - // ========================================================================== - // XSAVE Helpers - Build dirty XSAVE state for testing extended CPU state - // ========================================================================== - - /// Query CPUID.0DH.n for XSAVE component info. - /// Returns (size, offset, align_64) for the given component: - /// - size: CPUID.0DH.n:EAX - size in bytes - /// - offset: CPUID.0DH.n:EBX - offset from XSAVE base (standard format only) - /// - align_64: CPUID.0DH.n:ECX bit 1 - true if 64-byte aligned (compacted format) - fn xsave_component_info(comp_id: u32) -> (usize, usize, bool) { - let result = unsafe { std::arch::x86_64::__cpuid_count(0xD, comp_id) }; - let size = result.eax as usize; - let offset = result.ebx as usize; - let align_64 = (result.ecx & 0b10) != 0; - (size, offset, align_64) - } - - /// Query CPUID.0DH.00H for the bitmap of supported user state components. - /// EDX:EAX forms a 64-bit bitmap where bit i indicates support for component i. - fn xsave_supported_components() -> u64 { - let result = unsafe { std::arch::x86_64::__cpuid_count(0xD, 0) }; - (result.edx as u64) << 32 | (result.eax as u64) - } - - /// Dirty extended state components using compacted XSAVE format (MSHV/WHP). - /// Components are stored contiguously starting at byte 576, with alignment - /// requirements from CPUID.0DH.n:ECX[1]. - /// Returns a bitmask of components that were actually dirtied. - fn dirty_xsave_extended_compacted( - xsave: &mut [u32], - xcomp_bv: u64, - supported_components: u64, - ) -> u64 { - let mut dirtied_mask = 0u64; - let mut offset = 576usize; - - for comp_id in 2..63u32 { - // Skip if component not supported by CPU or not enabled in XCOMP_BV - if (supported_components & (1u64 << comp_id)) == 0 { - continue; - } - if (xcomp_bv & (1u64 << comp_id)) == 0 { - continue; - } - - let (size, _, align_64) = xsave_component_info(comp_id); - - // ECX[1]=1 means 64-byte aligned; ECX[1]=0 means immediately after previous - if align_64 { - offset = offset.next_multiple_of(64); - } - - // Dirty this component's data area (only if it fits in the buffer) - let start_idx = offset / 4; - let end_idx = (offset + size) / 4; - if end_idx <= xsave.len() { - for i in start_idx..end_idx { - xsave[i] = 0x12345678 ^ comp_id.wrapping_mul(0x11111111); - } - dirtied_mask |= 1u64 << comp_id; - } - - offset += size; - } - - dirtied_mask - } - - /// Dirty extended state components using standard XSAVE format (KVM). - /// Components are at fixed offsets from CPUID.0DH.n:EBX. - /// Returns a bitmask of components that were actually dirtied. - fn dirty_xsave_extended_standard(xsave: &mut [u32], supported_components: u64) -> u64 { - let mut dirtied_mask = 0u64; - - for comp_id in 2..63u32 { - // Skip if component not supported by CPU - if (supported_components & (1u64 << comp_id)) == 0 { - continue; - } - - let (size, fixed_offset, _) = xsave_component_info(comp_id); - - let start_idx = fixed_offset / 4; - let end_idx = (fixed_offset + size) / 4; - if end_idx <= xsave.len() { - for i in start_idx..end_idx { - xsave[i] = 0x12345678 ^ comp_id.wrapping_mul(0x11111111); - } - dirtied_mask |= 1u64 << comp_id; - } - } - - dirtied_mask - } - - /// Dirty the legacy XSAVE region (bytes 0-511) for testing reset_vcpu. - /// This includes FPU/x87 state, SSE state, and reserved areas. - /// - /// Layout (from Intel SDM Table 13-1): - /// Bytes 0-1: FCW, 2-3: FSW, 4: FTW, 5: reserved, 6-7: FOP - /// Bytes 8-15: FIP, 16-23: FDP - /// Bytes 24-27: MXCSR, 28-31: MXCSR_MASK (preserve - hardware defined) - /// Bytes 32-159: ST0-ST7/MM0-MM7 (8 regs × 16 bytes) - /// Bytes 160-415: XMM0-XMM15 (16 regs × 16 bytes) - /// Bytes 416-511: Reserved - fn dirty_xsave_legacy(xsave: &mut [u32], current_xsave: &[u8]) { - // FCW (bytes 0-1) + FSW (bytes 2-3) - pack into xsave[0] - // FCW = 0x0F7F (different from default 0x037F), FSW = 0x1234 - xsave[0] = 0x0F7F | (0x1234 << 16); - // FTW (byte 4) + reserved (byte 5) + FOP (bytes 6-7) - pack into xsave[1] - // FTW = 0xAB, FOP = 0x0123 - xsave[1] = 0xAB | (0x0123 << 16); - // FIP (bytes 8-15) - xsave[2] and xsave[3] - xsave[2] = 0xDEAD0001; - xsave[3] = 0xBEEF0002; - // FDP (bytes 16-23) - xsave[4] and xsave[5] - xsave[4] = 0xCAFE0003; - xsave[5] = 0xBABE0004; - // MXCSR (bytes 24-27) - xsave[6], use valid value different from default - xsave[6] = 0x3F80; - // xsave[7] is MXCSR_MASK - preserve from current (hardware defined, read-only) - if current_xsave.len() >= 32 { - xsave[7] = u32::from_le_bytes(current_xsave[28..32].try_into().unwrap()); - } - - // ST0-ST7/MM0-MM7 (bytes 32-159, indices 8-39) - for i in 8..40 { - xsave[i] = 0xCAFEBABE; - } - // XMM0-XMM15 (bytes 160-415, indices 40-103) - for i in 40..104 { - xsave[i] = 0xDEADBEEF; - } - - // Reserved area (bytes 416-511, indices 104-127) - for i in 104..128 { - xsave[i] = 0xABCDEF12; - } - } - - /// Preserve XSAVE header (bytes 512-575) from current state. - /// This includes XSTATE_BV and XCOMP_BV which hypervisors require. - fn preserve_xsave_header(xsave: &mut [u32], current_xsave: &[u8]) { - for i in 128..144 { - let byte_offset = i * 4; - xsave[i] = u32::from_le_bytes( - current_xsave[byte_offset..byte_offset + 4] - .try_into() - .unwrap(), - ); - } - } - - fn dirty_xsave(current_xsave: &[u8]) -> Vec { - let mut xsave = vec![0u32; current_xsave.len() / 4]; - - dirty_xsave_legacy(&mut xsave, current_xsave); - preserve_xsave_header(&mut xsave, current_xsave); - - let xcomp_bv = u64::from_le_bytes(current_xsave[520..528].try_into().unwrap()); - let supported_components = xsave_supported_components(); - - // Dirty extended components and get mask of what was actually dirtied - let extended_mask = if (xcomp_bv & (1u64 << 63)) != 0 { - // Compacted format (MSHV/WHP) - dirty_xsave_extended_compacted(&mut xsave, xcomp_bv, supported_components) - } else { - // Standard format (KVM) - dirty_xsave_extended_standard(&mut xsave, supported_components) - }; - - // UPDATE XSTATE_BV to indicate dirtied components have valid data. - // WHP validates consistency between XSTATE_BV and actual data in the buffer. - // Bits 0,1 = legacy x87/SSE (always set after dirty_xsave_legacy) - // Bits 2+ = extended components that we actually dirtied - let xstate_bv = 0x3 | extended_mask; - - // Write XSTATE_BV to bytes 512-519 (u32 indices 128-129) - xsave[128] = (xstate_bv & 0xFFFFFFFF) as u32; - xsave[129] = (xstate_bv >> 32) as u32; - - xsave - } - - // ========================================================================== - // Test VM Setup - // ========================================================================== - - /// Creates a test VM with the given code. This is the shared setup logic used by - /// both `hyperlight_vm()` and `create_test_vm_context()`. - fn create_test_vm_context(code: &[u8]) -> TestVmContext { - let config: SandboxConfiguration = Default::default(); - #[cfg(any(crashdump, gdb))] - let rt_cfg: SandboxRuntimeConfig = Default::default(); - - let mut layout = SandboxMemoryLayout::new(config, code.len(), 4096, None).unwrap(); - - let pt_base_gpa = layout.get_pt_base_gpa(); - let pt_buf = GuestPageTableBuffer::new(pt_base_gpa as usize); - - for rgn in layout - .get_memory_regions_::(()) - .unwrap() - .iter() - { - let readable = rgn.flags.contains(MemoryRegionFlags::READ); - let writable = rgn.flags.contains(MemoryRegionFlags::WRITE); - let executable = rgn.flags.contains(MemoryRegionFlags::EXECUTE); - let mapping = Mapping { - phys_base: rgn.guest_region.start as u64, - virt_base: rgn.guest_region.start as u64, - len: rgn.guest_region.len() as u64, - kind: MappingKind::Basic(BasicMapping { - readable, - writable, - executable, - }), - }; - unsafe { vmem::map(&pt_buf, mapping) }; - } - - // Map the scratch region at the top of the address space - let scratch_size = config.get_scratch_size(); - let scratch_gpa = hyperlight_common::layout::scratch_base_gpa(scratch_size); - let scratch_gva = hyperlight_common::layout::scratch_base_gva(scratch_size); - let scratch_mapping = Mapping { - phys_base: scratch_gpa, - virt_base: scratch_gva, - len: scratch_size as u64, - kind: MappingKind::Basic(BasicMapping { - readable: true, - writable: true, - executable: true, // Match regular codepath (map_specials) - }), - }; - unsafe { vmem::map(&pt_buf, scratch_mapping) }; - - let pt_bytes = pt_buf.into_bytes(); - layout.set_pt_size(pt_bytes.len()).unwrap(); - - let mem_size = layout.get_memory_size().unwrap(); - let mut eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); - let snapshot_pt_start = mem_size - layout.get_pt_size(); - eshm.copy_from_slice(&pt_bytes, snapshot_pt_start).unwrap(); - eshm.copy_from_slice(code, layout.get_guest_code_offset()) - .unwrap(); - - let scratch_mem = ExclusiveSharedMemory::new(config.get_scratch_size()).unwrap(); - let mut mem_mgr = SandboxMemoryManager::new( - layout, - eshm, - scratch_mem, - NextAction::Initialise(layout.get_guest_code_address() as u64), - ); - mem_mgr.write_memory_layout().unwrap(); - - let (mut hshm, gshm) = mem_mgr.build().unwrap(); - - let peb_address = gshm.layout.peb_address; - let stack_top_gva = hyperlight_common::layout::MAX_GVA as u64 - - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET - + 1; - let mut vm = set_up_hypervisor_partition( - gshm, - &config, - stack_top_gva, - #[cfg(any(crashdump, gdb))] - rt_cfg, - crate::mem::exe::LoadInfo::dummy(), - ) - .unwrap(); - - let seed = rand::rng().random::(); - let peb_addr = RawPtr::from(u64::try_from(peb_address).unwrap()); - let page_size = u32::try_from(page_size::get()).unwrap(); - - #[cfg(gdb)] - let dbg_mem_access_hdl = Arc::new(Mutex::new(hshm.clone())); - - let host_funcs = Arc::new(Mutex::new(FunctionRegistry::default())); - - vm.initialise( - peb_addr, - seed, - page_size, - &mut hshm, - &host_funcs, - None, - #[cfg(gdb)] - dbg_mem_access_hdl.clone(), - ) - .unwrap(); - - TestVmContext { - vm, - hshm, - host_funcs, - #[cfg(gdb)] - dbg_mem_access_hdl, - } - } - - /// Simple helper that returns just the VM for tests that don't need memory access. - fn hyperlight_vm(code: &[u8]) -> HyperlightVm { - create_test_vm_context(code).vm - } - - // ========================================================================== - // Tests - // ========================================================================== - - #[test] - fn reset_vcpu_simple() { - // push rax; hlt - aligns stack to 16 bytes - const CODE: [u8; 2] = [0x50, 0xf4]; - let mut hyperlight_vm = hyperlight_vm(&CODE); - let available_hv = *get_available_hypervisor().as_ref().unwrap(); - - // Get the initial CR3 value before dirtying sregs - let initial_cr3 = hyperlight_vm.vm.sregs().unwrap().cr3; - - // Set all vCPU state to dirty values - let regs = dirty_regs(); - let fpu = dirty_fpu(); - let sregs = dirty_sregs(initial_cr3); - let current_xsave = hyperlight_vm.vm.xsave().unwrap(); - let xsave = dirty_xsave(¤t_xsave); - let debug_regs = dirty_debug_regs(); - - hyperlight_vm.vm.set_xsave(&xsave).unwrap(); - hyperlight_vm.vm.set_regs(®s).unwrap(); - hyperlight_vm.vm.set_fpu(&fpu).unwrap(); - hyperlight_vm.vm.set_sregs(&sregs).unwrap(); - hyperlight_vm.vm.set_debug_regs(&debug_regs).unwrap(); - - // Verify regs were set - assert_eq!(hyperlight_vm.vm.regs().unwrap(), regs); - - // Verify fpu was set - let mut got_fpu = hyperlight_vm.vm.fpu().unwrap(); - let mut expected_fpu = fpu; - // KVM doesn't preserve mxcsr via set_fpu/fpu(), copy expected to got - normalize_fpu_mxcsr_for_kvm(&mut got_fpu, fpu.mxcsr); - // fpr only uses 80 bits per register. Normalize upper bits for comparison. - for i in 0..8 { - expected_fpu.fpr[i][10..16].copy_from_slice(&got_fpu.fpr[i][10..16]); - } - assert_eq!(got_fpu, expected_fpu); - - // Verify xsave was set by checking key dirty values in the legacy region. - // Note: set_fpu() is called after set_xsave(), so XMM registers reflect fpu state (0xCD pattern). - let got_xsave = hyperlight_vm.vm.xsave().unwrap(); - // FCW (bytes 0-1) should be 0x0F7F (set by both xsave and fpu) - let got_fcw = u16::from_le_bytes(got_xsave[0..2].try_into().unwrap()); - assert_eq!(got_fcw, 0x0F7F, "xsave FCW should be dirty"); - // MXCSR (bytes 24-27) should be 0x3F80 (set by xsave; fpu doesn't update it on KVM) - let got_mxcsr = u32::from_le_bytes(got_xsave[24..28].try_into().unwrap()); - assert_eq!(got_mxcsr, 0x3F80, "xsave MXCSR should be dirty"); - // XMM0-XMM15 (bytes 160-415): set_fpu overwrites with 0xCD pattern from dirty_fpu() - for i in 0..16 { - let offset = 160 + i * 16; - let xmm_word = u32::from_le_bytes(got_xsave[offset..offset + 4].try_into().unwrap()); - assert_eq!( - xmm_word, 0xCDCDCDCD, - "xsave XMM{i} should match fpu dirty value" - ); - } - - // Verify debug regs were set - let got_debug_regs = hyperlight_vm.vm.debug_regs().unwrap(); - let mut expected_debug_regs = debug_regs; - normalize_debug_regs(&mut expected_debug_regs, &got_debug_regs); - assert_eq!(got_debug_regs, expected_debug_regs); - - // Verify sregs were set - let got_sregs = hyperlight_vm.vm.sregs().unwrap(); - let mut expected_sregs = sregs; - normalize_sregs_hidden_cache(&mut expected_sregs, &got_sregs); - assert_eq!(got_sregs, expected_sregs); - - // Reset the vCPU - hyperlight_vm.reset_vcpu(0, &default_sregs()).unwrap(); - - // Verify registers are reset to defaults - assert_regs_reset(hyperlight_vm.vm.as_ref()); - - // Verify FPU is reset to defaults - assert_fpu_reset(hyperlight_vm.vm.as_ref()); - - // Verify debug registers are reset to defaults - assert_debug_regs_reset(hyperlight_vm.vm.as_ref()); - - // Verify xsave is reset - should be zeroed except for hypervisor-specific fields - let reset_xsave = hyperlight_vm.vm.xsave().unwrap(); - // Build expected xsave: all zeros with fpu specific defaults. Then copy hypervisor-specific fields from actual - let mut expected_xsave = vec![0u8; reset_xsave.len()]; - #[cfg(mshv3)] - if available_hv == HypervisorType::Mshv { - // FCW (offset 0-1): When XSTATE_BV.LegacyX87 = 0 (init state), the hypervisor - // skips copying the FPU legacy region entirely, leaving zeros in the buffer. - // The actual guest FCW register is 0x037F (verified via fpu() assertion above), - // but xsave() doesn't report it because XSTATE_BV=0 means "init state, buffer - // contents undefined." We copy from actual to handle this. - expected_xsave[0..2].copy_from_slice(&reset_xsave[0..2]); - } - #[cfg(target_os = "windows")] - if available_hv == HypervisorType::Whp { - // FCW (offset 0-1): When XSTATE_BV.LegacyX87 = 0 (init state), the hypervisor - // skips copying the FPU legacy region entirely, leaving zeros in the buffer. - // The actual guest FCW register is 0x037F (verified via fpu() assertion above), - // but xsave() doesn't report it because XSTATE_BV=0 means "init state, buffer - // contents undefined." We copy from actual to handle this. - expected_xsave[0..2].copy_from_slice(&reset_xsave[0..2]); - } - #[cfg(kvm)] - if available_hv == HypervisorType::Kvm { - expected_xsave[0..2].copy_from_slice(&FP_CONTROL_WORD_DEFAULT.to_le_bytes()); - } - - // - MXCSR at offset 24-27: default FPU state set by hypervisor - expected_xsave[24..28].copy_from_slice(&MXCSR_DEFAULT.to_le_bytes()); - // - MXCSR_MASK at offset 28-31: hardware-defined, read-only - expected_xsave[28..32].copy_from_slice(&reset_xsave[28..32]); - // - Reserved bytes at offset 464-511: These are in the reserved/padding area of the legacy - // FXSAVE region (after XMM registers which end at byte 416). On KVM/Intel, these bytes - // may contain hypervisor-specific metadata that isn't cleared during vCPU reset. - // Since this is not guest-visible computational state, we copy from actual to expected. - expected_xsave[464..512].copy_from_slice(&reset_xsave[464..512]); - // - XSAVE header at offset 512-575: contains XSTATE_BV and XCOMP_BV (hypervisor-managed) - // XSTATE_BV (512-519): Bitmap indicating which state components have valid data in the - // buffer. When a bit is 0, the hypervisor uses the architectural init value for that - // component. After reset, xsave() may still return non-zero XSTATE_BV since the - // hypervisor reports which components it manages, not which have been modified. - // XCOMP_BV (520-527): Compaction bitmap. Bit 63 indicates compacted format (used by MSHV/WHP). - // When set, the XSAVE area uses a compact layout where only enabled components are stored - // contiguously. This is a format indicator, not state data, so it's preserved across reset. - // Both fields are managed by the hypervisor to describe the XSAVE area format and capabilities, - // not guest-visible computational state, so they don't need to be zeroed on reset. - if reset_xsave.len() >= 576 { - expected_xsave[512..576].copy_from_slice(&reset_xsave[512..576]); - } - assert_eq!( - reset_xsave, expected_xsave, - "xsave should be zeroed except for hypervisor-specific fields" - ); - - // Verify sregs are reset to defaults (CR3 is 0 as passed to reset_vcpu) - assert_sregs_reset(hyperlight_vm.vm.as_ref(), 0); - } - - /// Tests that actually runs code, as opposed to just setting vCPU state. - mod run_tests { - use iced_x86::code_asm::*; - - use super::*; - - #[test] - fn reset_vcpu_regs() { - let mut a = CodeAssembler::new(64).unwrap(); - a.push(rax).unwrap(); // Align stack to 16 bytes - a.mov(rax, 0x1111111111111111u64).unwrap(); - a.mov(rbx, 0x2222222222222222u64).unwrap(); - a.mov(rcx, 0x3333333333333333u64).unwrap(); - a.mov(rdx, 0x4444444444444444u64).unwrap(); - a.mov(rsi, 0x5555555555555555u64).unwrap(); - a.mov(rdi, 0x6666666666666666u64).unwrap(); - a.mov(rbp, 0x7777777777777777u64).unwrap(); - a.mov(r8, 0x8888888888888888u64).unwrap(); - a.mov(r9, 0x9999999999999999u64).unwrap(); - a.mov(r10, 0xAAAAAAAAAAAAAAAAu64).unwrap(); - a.mov(r11, 0xBBBBBBBBBBBBBBBBu64).unwrap(); - a.mov(r12, 0xCCCCCCCCCCCCCCCCu64).unwrap(); - a.mov(r13, 0xDDDDDDDDDDDDDDDDu64).unwrap(); - a.mov(r14, 0xEEEEEEEEEEEEEEEEu64).unwrap(); - a.mov(r15, 0xFFFFFFFFFFFFFFFFu64).unwrap(); - a.hlt().unwrap(); - let code = a.assemble(0).unwrap(); - - let mut hyperlight_vm = hyperlight_vm(&code); - - // After run, check registers match expected dirty state - let regs = hyperlight_vm.vm.regs().unwrap(); - let mut expected_dirty = CommonRegisters { - rax: 0x1111111111111111, - rbx: 0x2222222222222222, - rcx: 0x3333333333333333, - rdx: 0x4444444444444444, - rsi: 0x5555555555555555, - rdi: 0x6666666666666666, - rsp: 0, - rbp: 0x7777777777777777, - r8: 0x8888888888888888, - r9: 0x9999999999999999, - r10: 0xAAAAAAAAAAAAAAAA, - r11: 0xBBBBBBBBBBBBBBBB, - r12: 0xCCCCCCCCCCCCCCCC, - r13: 0xDDDDDDDDDDDDDDDD, - r14: 0xEEEEEEEEEEEEEEEE, - r15: 0xFFFFFFFFFFFFFFFF, - rip: 0, - rflags: 0, - }; - // rip, rsp, and rflags are set by the CPU, we don't expect those to match our expected values - expected_dirty.rip = regs.rip; - expected_dirty.rsp = regs.rsp; - expected_dirty.rflags = regs.rflags; - assert_eq!(regs, expected_dirty); - - // Reset vcpu - hyperlight_vm.reset_vcpu(0, &default_sregs()).unwrap(); - - // Check registers are reset to defaults - assert_regs_reset(hyperlight_vm.vm.as_ref()); - } - - #[test] - fn reset_vcpu_fpu() { - #[cfg(kvm)] - use crate::hypervisor::regs::MXCSR_DEFAULT; - - #[cfg(kvm)] - let available_hv = *get_available_hypervisor().as_ref().unwrap(); - - // Build code to dirty XMM registers, x87 FPU, and MXCSR - let mut a = CodeAssembler::new(64).unwrap(); - a.push(rax).unwrap(); // Align stack to 16 bytes - - // xmm0-xmm7: use movd + pshufd to fill with pattern - let xmm_regs_low = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]; - let patterns_low: [u32; 8] = [ - 0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777, - 0x88888888, - ]; - for (xmm, pat) in xmm_regs_low.iter().zip(patterns_low.iter()) { - a.mov(eax, *pat).unwrap(); - a.movd(*xmm, eax).unwrap(); - a.pshufd(*xmm, *xmm, 0).unwrap(); - } - - // xmm8-xmm15: upper XMM registers - let xmm_regs_high = [xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15]; - let patterns_high: [u32; 8] = [ - 0x99999999, 0xAAAAAAAA, 0xBBBBBBBB, 0xCCCCCCCC, 0xDDDDDDDD, 0xEEEEEEEE, 0xFFFFFFFF, - 0x12345678, - ]; - for (xmm, pat) in xmm_regs_high.iter().zip(patterns_high.iter()) { - a.mov(eax, *pat).unwrap(); - a.movd(*xmm, eax).unwrap(); - a.pshufd(*xmm, *xmm, 0).unwrap(); - } - - // Use 7 FLDs so TOP=1 after execution, different from default TOP=0. - // This ensures reset properly clears TOP, not just register contents. - a.fldz().unwrap(); // 0.0 - a.fldl2e().unwrap(); // log2(e) - a.fldl2t().unwrap(); // log2(10) - a.fldlg2().unwrap(); // log10(2) - a.fldln2().unwrap(); // ln(2) - a.fldpi().unwrap(); // pi - // Push a memory value to also dirty last_dp - a.mov(rax, 0xDEADBEEFu64).unwrap(); - a.push(rax).unwrap(); - a.fld(qword_ptr(rsp)).unwrap(); // dirties last_dp - a.pop(rax).unwrap(); - - // Dirty FCW (0x0F7F, different from default 0x037F) - a.mov(eax, 0x0F7Fu32).unwrap(); - a.push(rax).unwrap(); - a.fldcw(word_ptr(rsp)).unwrap(); - a.pop(rax).unwrap(); - - // Dirty MXCSR (0x3F80, different from default 0x1F80) - a.mov(eax, 0x3F80u32).unwrap(); - a.push(rax).unwrap(); - a.ldmxcsr(dword_ptr(rsp)).unwrap(); - a.pop(rax).unwrap(); - - a.hlt().unwrap(); - let code = a.assemble(0).unwrap(); - - let mut hyperlight_vm = hyperlight_vm(&code); - - // After run, check FPU state matches expected dirty values - let fpu = hyperlight_vm.vm.fpu().unwrap(); - - #[cfg_attr(not(kvm), allow(unused_mut))] - let mut expected_dirty = CommonFpu { - fcw: 0x0F7F, - ftwx: 0xFE, // 7 registers valid (bit 0 empty after 7 pushes with TOP=1) - xmm: [ - 0x11111111111111111111111111111111_u128.to_le_bytes(), - 0x22222222222222222222222222222222_u128.to_le_bytes(), - 0x33333333333333333333333333333333_u128.to_le_bytes(), - 0x44444444444444444444444444444444_u128.to_le_bytes(), - 0x55555555555555555555555555555555_u128.to_le_bytes(), - 0x66666666666666666666666666666666_u128.to_le_bytes(), - 0x77777777777777777777777777777777_u128.to_le_bytes(), - 0x88888888888888888888888888888888_u128.to_le_bytes(), - 0x99999999999999999999999999999999_u128.to_le_bytes(), - 0xAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA_u128.to_le_bytes(), - 0xBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB_u128.to_le_bytes(), - 0xCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC_u128.to_le_bytes(), - 0xDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD_u128.to_le_bytes(), - 0xEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE_u128.to_le_bytes(), - 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF_u128.to_le_bytes(), - 0x12345678123456781234567812345678_u128.to_le_bytes(), - ], - mxcsr: 0x3F80, - fsw: 0x0802, // TOP=1 after 7 pushes (bits 11-13), DE flag from denormal load - // fpr: 80-bit values with 6 bytes padding; may vary between CPU vendors - fpr: fpu.fpr, - // last_opcode: FPU Opcode update varies by CPU (may only update on unmasked exceptions) - last_opcode: fpu.last_opcode, - // last_ip: code is loaded at runtime-determined address - last_ip: fpu.last_ip, - // last_dp: points to stack (rsp) which is runtime-determined - last_dp: fpu.last_dp, - }; - // KVM doesn't preserve mxcsr via fpu(), copy from actual - normalize_fpu_mxcsr_for_kvm(&mut expected_dirty, fpu.mxcsr); - assert_eq!(fpu, expected_dirty); - - // KVM's get_fpu/set_fpu ioctls don't include MXCSR (it's in the SSE state, - // not x87 FPU state). We must use xsave to verify MXCSR on KVM. - #[cfg(kvm)] - if available_hv == HypervisorType::Kvm { - let xsave = hyperlight_vm.vm.xsave().unwrap(); - let mxcsr = u32::from_le_bytes(xsave[24..28].try_into().unwrap()); - assert_eq!(mxcsr, 0x3F80, "MXCSR in XSAVE should be dirty"); - } - - // Reset vcpu - hyperlight_vm.reset_vcpu(0, &default_sregs()).unwrap(); - - // Check FPU is reset to defaults - assert_fpu_reset(hyperlight_vm.vm.as_ref()); - - // Verify MXCSR via xsave on KVM (fpu() doesn't include it) - #[cfg(kvm)] - if available_hv == HypervisorType::Kvm { - let xsave = hyperlight_vm.vm.xsave().unwrap(); - let mxcsr = u32::from_le_bytes(xsave[24..28].try_into().unwrap()); - assert_eq!(mxcsr, MXCSR_DEFAULT, "MXCSR in XSAVE should be reset"); - } - } - - #[test] - fn reset_vcpu_debug_regs() { - let mut a = CodeAssembler::new(64).unwrap(); - a.push(rax).unwrap(); // Align stack to 16 bytes - a.mov(rax, 0xDEAD_BEEF_0000_0000u64).unwrap(); - a.mov(dr0, rax).unwrap(); - a.mov(rax, 0xDEAD_BEEF_0000_0001u64).unwrap(); - a.mov(dr1, rax).unwrap(); - a.mov(rax, 0xDEAD_BEEF_0000_0002u64).unwrap(); - a.mov(dr2, rax).unwrap(); - a.mov(rax, 0xDEAD_BEEF_0000_0003u64).unwrap(); - a.mov(dr3, rax).unwrap(); - a.mov(rax, 1u64).unwrap(); - a.mov(dr6, rax).unwrap(); - a.mov(rax, 0xFFu64).unwrap(); - a.mov(dr7, rax).unwrap(); - a.hlt().unwrap(); - let code = a.assemble(0).unwrap(); - - let mut hyperlight_vm = hyperlight_vm(&code); - - // Verify debug registers are dirty - let debug_regs = hyperlight_vm.vm.debug_regs().unwrap(); - let expected_dirty = CommonDebugRegs { - dr0: 0xDEAD_BEEF_0000_0000, - dr1: 0xDEAD_BEEF_0000_0001, - dr2: 0xDEAD_BEEF_0000_0002, - dr3: 0xDEAD_BEEF_0000_0003, - // dr6: guest set B0 (bit 0) = 1, reserved bits vary by CPU - dr6: (debug_regs.dr6 & !DR6_WRITABLE_MASK) | 0x1, - // dr7: guest set lower byte = 0xFF, reserved bits vary by CPU - dr7: (debug_regs.dr7 & !DR7_WRITABLE_MASK) | 0xFF, - }; - assert_eq!(debug_regs, expected_dirty); - - // Reset vcpu - hyperlight_vm.reset_vcpu(0, &default_sregs()).unwrap(); - - // Check debug registers are reset to default values - assert_debug_regs_reset(hyperlight_vm.vm.as_ref()); - } - - #[test] - fn reset_vcpu_sregs() { - // Build code that modifies special registers and halts - // We can modify CR0.WP, CR2, CR4.TSD, and CR8 from guest code in ring 0 - let mut a = CodeAssembler::new(64).unwrap(); - a.push(rax).unwrap(); // Align stack to 16 bytes - // Set CR0.WP (Write Protect, bit 16) - a.mov(rax, cr0).unwrap(); - a.or(rax, 0x10000i32).unwrap(); - a.mov(cr0, rax).unwrap(); - // Set CR2 - a.mov(rax, 0xDEADBEEFu64).unwrap(); - a.mov(cr2, rax).unwrap(); - // Set CR4.TSD (Time Stamp Disable, bit 2) - a.mov(rax, cr4).unwrap(); - a.or(rax, 0x4i32).unwrap(); - a.mov(cr4, rax).unwrap(); - // Set CR8 - a.mov(rax, 5u64).unwrap(); - a.mov(cr8, rax).unwrap(); - a.hlt().unwrap(); - let code = a.assemble(0).unwrap(); - - let mut hyperlight_vm = hyperlight_vm(&code); - - // Get the initial CR3 value and expected defaults - let initial_cr3 = hyperlight_vm.vm.sregs().unwrap().cr3; - let defaults = CommonSpecialRegisters::standard_64bit_defaults(initial_cr3); - - // Verify registers are dirty (CR0.WP, CR2, CR4.TSD and CR8 modified by our code) - let sregs = hyperlight_vm.vm.sregs().unwrap(); - let mut expected_dirty = CommonSpecialRegisters { - cr0: defaults.cr0 | 0x10000, // WP bit set - cr2: 0xDEADBEEF, - cr4: defaults.cr4 | 0x4, // TSD bit set - cr8: 0x5, - ..defaults - }; - normalize_sregs_for_run_tests(&mut expected_dirty, &sregs); - assert_eq!(sregs, expected_dirty); - - // Reset vcpu - hyperlight_vm.reset_vcpu(0, &default_sregs()).unwrap(); - - // Check registers are reset to defaults (CR3 is 0 as passed to reset_vcpu) - let sregs = hyperlight_vm.vm.sregs().unwrap(); - let mut expected_reset = CommonSpecialRegisters::standard_64bit_defaults(0); - normalize_sregs_for_run_tests(&mut expected_reset, &sregs); - assert_eq!(sregs, expected_reset); - } - - /// Verifies guest-visible FPU state (via FXSAVE) is properly reset. - /// Unlike tests using hypervisor API, this runs actual guest code with FXSAVE. - #[test] - fn reset_vcpu_fpu_guest_visible_state() { - let mut ctx = hyperlight_vm_with_mem_mgr_fxsave(); - - // Verify FPU was dirtied after first run - let fpu_before_reset = ctx.ctx.vm.vm.fpu().unwrap(); - assert_eq!( - fpu_before_reset.fcw, 0x0F7F, - "FCW should be dirty after first run" - ); - assert_ne!( - fpu_before_reset.ftwx, 0, - "FTW should indicate valid registers after first run" - ); - - let fxsave_before = ctx.read_fxsave(); - let fcw_before = u16::from_le_bytes(fxsave_before[0..2].try_into().unwrap()); - assert_eq!(fcw_before, 0x0F7F, "Guest FXSAVE FCW should be dirty"); - let mxcsr_before = u32::from_le_bytes(fxsave_before[24..28].try_into().unwrap()); - assert_eq!(mxcsr_before, 0x3F80, "Guest FXSAVE MXCSR should be dirty"); - let xmm0_before = u32::from_le_bytes(fxsave_before[160..164].try_into().unwrap()); - assert_eq!(xmm0_before, 0x11111111, "Guest FXSAVE XMM0 should be dirty"); - - let root_pt_addr = ctx.ctx.vm.get_root_pt().unwrap(); - let segment_state = ctx.ctx.vm.get_snapshot_sregs().unwrap(); - - ctx.ctx.vm.reset_vcpu(root_pt_addr, &segment_state).unwrap(); - - // Re-run from entrypoint (flag=1 means guest skips dirty phase, just does FXSAVE) - // Use stack_top - 8 to match initialise()'s behavior (simulates call pushing return addr) - let NextAction::Call(rip) = ctx.ctx.vm.entrypoint else { - panic!("entrypoint should be call"); - }; - let regs = CommonRegisters { - rip, - rsp: ctx.stack_top_gva() - 8, - rflags: 1 << 1, - ..Default::default() - }; - ctx.ctx.vm.vm.set_regs(®s).unwrap(); - ctx.run(); - - // Verify guest-visible state is reset - let fxsave_after = ctx.read_fxsave(); - let fcw_after = u16::from_le_bytes(fxsave_after[0..2].try_into().unwrap()); - assert_eq!( - fcw_after, 0x037F, - "Guest FXSAVE FCW should be reset to default 0x037F, got 0x{:04X}", - fcw_after - ); - - let fsw_after = u16::from_le_bytes(fxsave_after[2..4].try_into().unwrap()); - assert_eq!(fsw_after, 0, "FSW should be reset"); - - let ftw_after = fxsave_after[4]; - assert_eq!(ftw_after, 0, "FTW should be 0 (all empty)"); - - let fop_after = u16::from_le_bytes(fxsave_after[6..8].try_into().unwrap()); - assert_eq!(fop_after, 0, "FOP should be 0"); - - let fip_after = u64::from_le_bytes(fxsave_after[8..16].try_into().unwrap()); - assert_eq!(fip_after, 0, "FIP should be 0"); - - let fdp_after = u64::from_le_bytes(fxsave_after[16..24].try_into().unwrap()); - assert_eq!(fdp_after, 0, "FDP should be 0"); - - let mxcsr_after = u32::from_le_bytes(fxsave_after[24..28].try_into().unwrap()); - assert_eq!( - mxcsr_after, MXCSR_DEFAULT, - "Guest FXSAVE MXCSR should be reset to 0x{:08X}, got 0x{:08X}", - MXCSR_DEFAULT, mxcsr_after - ); - - // ST0-ST7 should be zeroed - for i in 0..8 { - let offset = 32 + i * 16; - let st_bytes = &fxsave_after[offset..offset + 10]; - assert!(st_bytes.iter().all(|&b| b == 0), "ST{} should be zeroed", i); - } - - // XMM0-XMM15 should be zeroed - for i in 0..16 { - let offset = 160 + i * 16; - let xmm_bytes = &fxsave_after[offset..offset + 16]; - assert!( - xmm_bytes.iter().all(|&b| b == 0), - "XMM{} should be zeroed", - i - ); - } - } - - /// Extended test context for FXSAVE tests that need to read memory at a specific offset. - struct FxsaveTestContext { - ctx: TestVmContext, - /// Offset in shared memory where FXSAVE data is stored (output_data region) - fxsave_offset: usize, - } - - impl FxsaveTestContext { - fn run(&mut self) { - self.ctx - .vm - .run( - &mut self.ctx.hshm, - &self.ctx.host_funcs, - #[cfg(gdb)] - self.ctx.dbg_mem_access_hdl.clone(), - ) - .unwrap(); - } - - fn read_fxsave(&self) -> [u8; 512] { - let mut fxsave = [0u8; 512]; - self.ctx - .hshm - .scratch_mem - .copy_to_slice(&mut fxsave, self.fxsave_offset) - .unwrap(); - fxsave - } - - /// Get the stack top GVA, same as the regular codepath. - fn stack_top_gva(&self) -> u64 { - hyperlight_common::layout::MAX_GVA as u64 - - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET - + 1 - } - } - - /// Creates VM with guest code that: dirtys FPU (if flag==0), does FXSAVE to buffer, sets flag=1. - /// Uses output_data region for FXSAVE buffer (like regular guest output), scratch for stack. - fn hyperlight_vm_with_mem_mgr_fxsave() -> FxsaveTestContext { - use iced_x86::code_asm::*; - - // Compute fixed addresses for FXSAVE buffer and flag. - // These are in the output_data region which starts at a known offset. - // We use a default SandboxConfiguration to get the same layout as create_test_vm_context. - let config: SandboxConfiguration = Default::default(); - let layout = SandboxMemoryLayout::new(config, 512, 4096, None).unwrap(); - let fxsave_offset = layout.get_output_data_buffer_scratch_host_offset(); - let fxsave_gva = layout.get_output_data_buffer_gva(); - let flag_gva = fxsave_gva + 512; - - let mut a = CodeAssembler::new(64).unwrap(); - a.push(rax).unwrap(); // Align stack to 16 bytes - - // Check flag at fixed address: if flag != 0, skip dirty phase - a.mov(rax, flag_gva).unwrap(); - a.mov(al, byte_ptr(rax)).unwrap(); - a.test(al, al).unwrap(); - let mut skip_dirty = a.create_label(); - a.jnz(skip_dirty).unwrap(); - - // Dirty x87 FPU (7 pushes so TOP=1) - a.fldz().unwrap(); - a.fldl2e().unwrap(); - a.fldl2t().unwrap(); - a.fldlg2().unwrap(); - a.fldln2().unwrap(); - a.fldpi().unwrap(); - a.fld1().unwrap(); - - // Dirty FCW (0x0F7F vs default 0x037F) - a.sub(rsp, 16i32).unwrap(); - a.mov(dword_ptr(rsp), 0x0F7Fu32).unwrap(); - a.fldcw(word_ptr(rsp)).unwrap(); - a.add(rsp, 16i32).unwrap(); - - // Dirty MXCSR (0x3F80 vs default 0x1F80) - a.sub(rsp, 16i32).unwrap(); - a.mov(dword_ptr(rsp), 0x3F80u32).unwrap(); - a.ldmxcsr(dword_ptr(rsp)).unwrap(); - a.add(rsp, 16i32).unwrap(); - - // Dirty XMM0-7 - let xmm_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]; - for (i, xmm) in xmm_regs.iter().enumerate() { - a.mov(eax, 0x11111111u32 * (i as u32 + 1)).unwrap(); - a.movd(*xmm, eax).unwrap(); - a.pshufd(*xmm, *xmm, 0).unwrap(); - } - - // Set flag = 1 at fixed address - a.mov(rax, flag_gva).unwrap(); - a.mov(byte_ptr(rax), 1u32).unwrap(); - - // FXSAVE to buffer at fixed address (runs on both executions) - a.set_label(&mut skip_dirty).unwrap(); - a.mov(rax, fxsave_gva).unwrap(); - a.fxsave(ptr(rax)).unwrap(); - - // Return dispatch ptr - a.mov(rax, layout.get_guest_code_address() as u64).unwrap(); - - a.hlt().unwrap(); - - let code = a.assemble(0).unwrap(); - - // Reuse common test setup - initialise() will run the code - let ctx = create_test_vm_context(&code); - - FxsaveTestContext { ctx, fxsave_offset } - } - } - - /// ======================================================================== - /// Misc tests - /// ======================================================================== - #[test] - fn test_get_max_log_level_filter_both_guest_and_host() { - let rust_log = "hyperlight_guest=trace,hyperlight_host=debug".to_string(); - let filter = get_max_log_level_filter(rust_log); - - assert_eq!(filter, LevelFilter::TRACE, "Max log level should be Trace"); - } - #[test] - fn test_get_max_log_level_filter_only_guest() { - let rust_log = "hyperlight_guest=info".to_string(); - let filter = get_max_log_level_filter(rust_log); - - assert_eq!(filter, LevelFilter::INFO, "Max log level should be Info"); - } - #[test] - fn test_get_max_log_level_filter_only_host() { - let rust_log = "hyperlight_host=debug".to_string(); - let filter = get_max_log_level_filter(rust_log); - - assert_eq!(filter, LevelFilter::DEBUG, "Max log level should be Debug"); - } - #[test] - fn test_get_max_log_level_filter_only_general() { - let rust_log = "trace".to_string(); - let filter = get_max_log_level_filter(rust_log); - - assert_eq!(filter, LevelFilter::TRACE, "Max log level should be Trace"); - } - #[test] - fn test_get_max_log_level_filter_complex_rust_log_00() { - let rust_log = - "error,hyperlight_guest=debug,hyperlight_host=info,hyperlight_guest_bin=trace" - .to_string(); - let filter = get_max_log_level_filter(rust_log); - - assert_eq!(filter, LevelFilter::DEBUG, "Max log level should be Debug"); - } - #[test] - fn test_get_max_log_level_filter_complex_rust_log_01() { - let rust_log = - "error,hyperlight_host=info,hyperlight_guest=debug,hyperlight_guest_bin=trace" - .to_string(); - let filter = get_max_log_level_filter(rust_log); - - assert_eq!(filter, LevelFilter::DEBUG, "Max log level should be Debug"); - } - #[test] - fn test_get_max_log_level_filter_complex_rust_log_02() { - let rust_log = - "hyperlight_host=info,error,hyperlight_guest=debug,hyperlight_guest_bin=trace" - .to_string(); - let filter = get_max_log_level_filter(rust_log); - - assert_eq!(filter, LevelFilter::DEBUG, "Max log level should be Debug"); - } - #[test] - fn test_get_max_log_level_filter_general_and_others() { - let rust_log = - "trace,hyperlight_component_macro=debug,hyperlight_component_util=error".to_string(); - let filter = get_max_log_level_filter(rust_log); - - assert_eq!(filter, LevelFilter::TRACE, "Max log level should be Trace"); - } - #[test] - fn test_get_max_log_level_filter_default() { - let rust_log = "hyperlight_common=debug,hyperlight_component_util=info".to_string(); - let filter = get_max_log_level_filter(rust_log); - - assert_eq!( - filter, - LevelFilter::ERROR, - "Max log level should default to Error" - ); - } - #[test] - fn test_get_max_log_level_filter_invalid_rust_log() { - let rust_log = "this is an invalid rust log string".to_string(); - let filter = get_max_log_level_filter(rust_log); - - assert_eq!( - filter, - LevelFilter::ERROR, - "Max log level should default to Error" - ); - } - #[test] - fn test_get_max_log_level_filter_empty_rust_log() { - let rust_log = "".to_string(); - let filter = get_max_log_level_filter(rust_log); - - assert_eq!( - filter, - LevelFilter::ERROR, - "Max log level should default to Error" - ); - } -} diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs new file mode 100644 index 000000000..57b09fc20 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs @@ -0,0 +1,2288 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#[cfg(gdb)] +use std::collections::HashMap; +#[cfg(crashdump)] +use std::path::Path; +#[cfg(any(kvm, mshv3))] +use std::sync::atomic::AtomicBool; +use std::sync::atomic::AtomicU8; +#[cfg(any(kvm, mshv3))] +use std::sync::atomic::AtomicU64; +use std::sync::{Arc, Mutex}; + +use tracing::{Span, instrument}; +use tracing_core::LevelFilter; + +use super::*; +use crate::hypervisor::InterruptHandleImpl; +#[cfg(any(kvm, mshv3))] +use crate::hypervisor::LinuxInterruptHandle; +#[cfg(crashdump)] +use crate::hypervisor::crashdump; +#[cfg(gdb)] +use crate::hypervisor::gdb::{ + DebugCommChannel, DebugMsg, DebugResponse, DebuggableVm, VcpuStopReason, +}; +#[cfg(gdb)] +use crate::hypervisor::gdb::{DebugError, DebugMemoryAccessError}; +use crate::hypervisor::regs::{ + CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, +}; +#[cfg(not(gdb))] +use crate::hypervisor::virtual_machine::VirtualMachine; +#[cfg(kvm)] +use crate::hypervisor::virtual_machine::kvm::KvmVm; +#[cfg(mshv3)] +use crate::hypervisor::virtual_machine::mshv::MshvVm; +#[cfg(target_os = "windows")] +use crate::hypervisor::virtual_machine::whp::WhpVm; +use crate::hypervisor::virtual_machine::{ + HypervisorType, RegisterError, VmError, get_available_hypervisor, +}; +#[cfg(target_os = "windows")] +use crate::hypervisor::{PartitionState, WindowsInterruptHandle}; +#[cfg(crashdump)] +use crate::mem::memory_region::MemoryRegion; +use crate::mem::mgr::SandboxMemoryManager; +use crate::mem::ptr::RawPtr; +use crate::mem::shared_mem::{GuestSharedMemory, HostSharedMemory}; +use crate::sandbox::SandboxConfiguration; +use crate::sandbox::host_funcs::FunctionRegistry; +use crate::sandbox::snapshot::NextAction; +#[cfg(feature = "mem_profile")] +use crate::sandbox::trace::MemTraceInfo; +#[cfg(crashdump)] +use crate::sandbox::uninitialized::SandboxRuntimeConfig; + +impl HyperlightVm { + /// Create a new HyperlightVm instance (will not run vm until calling `initialise`) + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + snapshot_mem: GuestSharedMemory, + scratch_mem: GuestSharedMemory, + _pml4_addr: u64, + entrypoint: NextAction, + rsp_gva: u64, + #[cfg_attr(target_os = "windows", allow(unused_variables))] config: &SandboxConfiguration, + #[cfg(gdb)] gdb_conn: Option>, + #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig, + #[cfg(feature = "mem_profile")] trace_info: MemTraceInfo, + ) -> std::result::Result { + #[cfg(gdb)] + type VmType = Box; + #[cfg(not(gdb))] + type VmType = Box; + + let vm: VmType = match get_available_hypervisor() { + #[cfg(kvm)] + Some(HypervisorType::Kvm) => Box::new(KvmVm::new().map_err(VmError::CreateVm)?), + #[cfg(mshv3)] + Some(HypervisorType::Mshv) => Box::new(MshvVm::new().map_err(VmError::CreateVm)?), + #[cfg(target_os = "windows")] + Some(HypervisorType::Whp) => Box::new(WhpVm::new().map_err(VmError::CreateVm)?), + None => return Err(CreateHyperlightVmError::NoHypervisorFound), + }; + + #[cfg(not(feature = "nanvix-unstable"))] + vm.set_sregs(&CommonSpecialRegisters::standard_64bit_defaults(_pml4_addr)) + .map_err(VmError::Register)?; + #[cfg(feature = "nanvix-unstable")] + vm.set_sregs(&CommonSpecialRegisters::standard_real_mode_defaults()) + .map_err(VmError::Register)?; + + #[cfg(any(kvm, mshv3))] + let interrupt_handle: Arc = Arc::new(LinuxInterruptHandle { + state: AtomicU8::new(0), + #[cfg(all( + target_arch = "x86_64", + target_vendor = "unknown", + target_os = "linux", + target_env = "musl" + ))] + tid: AtomicU64::new(unsafe { libc::pthread_self() as u64 }), + #[cfg(not(all( + target_arch = "x86_64", + target_vendor = "unknown", + target_os = "linux", + target_env = "musl" + )))] + tid: AtomicU64::new(unsafe { libc::pthread_self() }), + retry_delay: config.get_interrupt_retry_delay(), + sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(), + dropped: AtomicBool::new(false), + }); + + #[cfg(target_os = "windows")] + let interrupt_handle: Arc = Arc::new(WindowsInterruptHandle { + state: AtomicU8::new(0), + partition_state: std::sync::RwLock::new(PartitionState { + handle: vm.partition_handle(), + dropped: false, + }), + }); + + let snapshot_slot = 0u32; + let scratch_slot = 1u32; + #[cfg_attr(not(gdb), allow(unused_mut))] + let mut ret = Self { + vm, + entrypoint, + rsp_gva, + interrupt_handle, + page_size: 0, // Will be set in `initialise` + + next_slot: scratch_slot + 1, + freed_slots: Vec::new(), + + snapshot_slot, + snapshot_memory: None, + scratch_slot, + scratch_memory: None, + + mmap_regions: Vec::new(), + + pending_tlb_flush: false, + + #[cfg(gdb)] + gdb_conn, + #[cfg(gdb)] + sw_breakpoints: HashMap::new(), + #[cfg(feature = "mem_profile")] + trace_info, + #[cfg(crashdump)] + rt_cfg, + }; + + ret.update_snapshot_mapping(snapshot_mem)?; + ret.update_scratch_mapping(scratch_mem)?; + + // Send the interrupt handle to the GDB thread if debugging is enabled + // This is used to allow the GDB thread to stop the vCPU + #[cfg(gdb)] + if ret.gdb_conn.is_some() { + ret.send_dbg_msg(DebugResponse::InterruptHandle(ret.interrupt_handle.clone()))?; + // Add breakpoint to the entry point address, if we are going to initialise + ret.vm.set_debug(true).map_err(VmError::Debug)?; + if let NextAction::Initialise(initialise) = entrypoint { + ret.vm + .add_hw_breakpoint(initialise) + .map_err(CreateHyperlightVmError::AddHwBreakpoint)?; + } + } + + Ok(ret) + } + + /// Initialise the internally stored vCPU with the given PEB address and + /// random number seed, then run it until a HLT instruction. + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + #[allow(clippy::too_many_arguments)] + pub(crate) fn initialise( + &mut self, + peb_addr: RawPtr, + seed: u64, + page_size: u32, + mem_mgr: &mut SandboxMemoryManager, + host_funcs: &Arc>, + guest_max_log_level: Option, + #[cfg(gdb)] dbg_mem_access_fn: Arc>>, + ) -> std::result::Result<(), InitializeError> { + let NextAction::Initialise(initialise) = self.entrypoint else { + return Ok(()); + }; + + self.page_size = page_size as usize; + + let regs = CommonRegisters { + rip: initialise, + // We usually keep the top of the stack 16-byte + // aligned. However, the ABI requirement is that the stack + // be aligned _before a call instruction_, which means + // that the stack needs to actually be ≡ 8 mod 16 at the + // first instruction (since, on x64, a call instruction + // automatically pushes a return address). + rsp: self.rsp_gva - 8, + + // function args + rdi: peb_addr.into(), + rsi: seed, + rdx: page_size.into(), + rcx: get_guest_log_filter(guest_max_log_level), + rflags: 1 << 1, + + ..Default::default() + }; + self.vm.set_regs(®s)?; + + self.run( + mem_mgr, + host_funcs, + #[cfg(gdb)] + dbg_mem_access_fn, + ) + .map_err(InitializeError::Run)?; + + let regs = self.vm.regs()?; + // todo(portability): this is architecture-specific + if !regs.rsp.is_multiple_of(16) { + return Err(InitializeError::InvalidStackPointer(regs.rsp)); + } + self.rsp_gva = regs.rsp; + self.entrypoint = NextAction::Call(regs.rax); + + Ok(()) + } + + /// Get the current base page table physical address. + /// + /// By default, reads CR3 from the vCPU special registers. + /// With `nanvix-unstable`, returns 0 (identity-mapped, no page tables). + pub(crate) fn get_root_pt(&self) -> Result { + #[cfg(not(feature = "nanvix-unstable"))] + { + let sregs = self.vm.sregs()?; + // Mask off the flags bits + Ok(sregs.cr3 & !0xfff_u64) + } + #[cfg(feature = "nanvix-unstable")] + { + Ok(0) + } + } + + /// Get the special registers that need to be stored in a snapshot. + pub(crate) fn get_snapshot_sregs( + &mut self, + ) -> Result { + Ok(self.vm.sregs()?) + } + + /// Dispatch a call from the host to the guest using the given pointer + /// to the dispatch function _in the guest's address space_. + /// + /// Do this by setting the instruction pointer to `dispatch_func_addr` + /// and then running the execution loop until a halt instruction. + /// + /// Returns `Ok` if the call succeeded, and an `Err` if it failed + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + pub(crate) fn dispatch_call_from_host( + &mut self, + mem_mgr: &mut SandboxMemoryManager, + host_funcs: &Arc>, + #[cfg(gdb)] dbg_mem_access_fn: Arc>>, + ) -> std::result::Result<(), DispatchGuestCallError> { + let NextAction::Call(dispatch_func_addr) = self.entrypoint else { + return Err(DispatchGuestCallError::Uninitialized); + }; + let mut rflags = 1 << 1; // RFLAGS.1 is RES1 + if self.pending_tlb_flush { + rflags |= 1 << 6; // set ZF if we need a tlb flush done before anything else executes + self.pending_tlb_flush = false; + } + // set RIP and RSP, reset others + let regs = CommonRegisters { + rip: dispatch_func_addr, + // We usually keep the top of the stack 16-byte + // aligned. Since the usual ABI requirement is that the + // stack be aligned _before a call instruction_, one might + // expect that the stack pointer here needs to actually be + // ≡ 8 mod 16 at the first instruction (since, on x64, a + // call instruction automatically pushes a return + // address). However, the x64 entry stub in + // hyperlight_guest::arch::dispatch handles this itself, + // so we do use the aligned address here. + rsp: self.rsp_gva, + rflags, + ..Default::default() + }; + self.vm + .set_regs(®s) + .map_err(DispatchGuestCallError::SetupRegs)?; + + // reset fpu + self.vm + .set_fpu(&CommonFpu::default()) + .map_err(DispatchGuestCallError::SetupRegs)?; + + self.run( + mem_mgr, + host_funcs, + #[cfg(gdb)] + dbg_mem_access_fn, + ) + .map_err(DispatchGuestCallError::Run) + } + + /// Resets the following vCPU state: + /// - General purpose registers + /// - Debug registers + /// - XSAVE (includes FPU/SSE state with proper FCW and MXCSR defaults) + /// - Special registers (restored from snapshot, with CR3 updated to new page table location) + // TODO: check if other state needs to be reset + pub(crate) fn reset_vcpu( + &mut self, + cr3: u64, + sregs: &CommonSpecialRegisters, + ) -> std::result::Result<(), RegisterError> { + self.vm.set_regs(&CommonRegisters { + rflags: 1 << 1, // Reserved bit always set + ..Default::default() + })?; + self.vm.set_debug_regs(&CommonDebugRegs::default())?; + self.vm.reset_xsave()?; + + #[cfg(not(feature = "nanvix-unstable"))] + { + // Restore the full special registers from snapshot, but update CR3 + // to point to the new (relocated) page tables + let mut sregs = *sregs; + sregs.cr3 = cr3; + self.pending_tlb_flush = true; + self.vm.set_sregs(&sregs)?; + } + #[cfg(feature = "nanvix-unstable")] + { + let _ = (cr3, sregs); // suppress unused warnings + // TODO: This is probably not correct. + // Let's deal with it when we clean up the nanvix-unstable feature + self.vm + .set_sregs(&CommonSpecialRegisters::standard_real_mode_defaults())?; + } + + Ok(()) + } + + // Handle a debug exit + #[cfg(gdb)] + pub(super) fn handle_debug( + &mut self, + dbg_mem_access_fn: Arc>>, + stop_reason: VcpuStopReason, + ) -> std::result::Result<(), HandleDebugError> { + use debug::ProcessDebugRequestError; + + use crate::hypervisor::gdb::DebugMemoryAccess; + + if self.gdb_conn.is_none() { + return Err(HandleDebugError::DebugNotEnabled); + } + + let mem_access = DebugMemoryAccess { + // TODO: dbg_mem_access_fn could be out of sync with the + // actual snapshot/scratch regions, if a snapshot restore + // has caused either of those to change. + dbg_mem_access_fn, + guest_mmap_regions: self.get_mapped_regions().cloned().collect(), + }; + + match stop_reason { + // If the vCPU stopped because of a crash, we need to handle it differently + // We do not want to allow resuming execution or placing breakpoints + // because the guest has crashed. + // We only allow reading registers and memory + VcpuStopReason::Crash => { + self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason))?; + + loop { + log::debug!("Debug wait for event to resume vCPU"); + // Wait for a message from gdb + let req = self.recv_dbg_msg()?; + + // Flag to store if we should deny continue or step requests + let mut deny_continue = false; + // Flag to store if we should detach from the gdb session + let mut detach = false; + + let response = match req { + // Allow the detach request to disable debugging by continuing resuming + // hypervisor crash error reporting + DebugMsg::DisableDebug => { + detach = true; + DebugResponse::DisableDebug + } + // Do not allow continue or step requests + DebugMsg::Continue | DebugMsg::Step => { + deny_continue = true; + DebugResponse::NotAllowed + } + // Do not allow adding/removing breakpoints and writing to memory or registers + DebugMsg::AddHwBreakpoint(_) + | DebugMsg::AddSwBreakpoint(_) + | DebugMsg::RemoveHwBreakpoint(_) + | DebugMsg::RemoveSwBreakpoint(_) + | DebugMsg::WriteAddr(_, _) + | DebugMsg::WriteRegisters(_) => DebugResponse::NotAllowed, + + // For all other requests, we will process them normally + _ => { + let result = self.process_dbg_request(req, &mem_access); + match result { + Ok(response) => response, + // Treat non-fatal errors separately so the guest doesn't fail + Err(ProcessDebugRequestError::ReadMemory( + DebugMemoryAccessError::TranslateGuestAddress(_), + )) + | Err(ProcessDebugRequestError::Debug(DebugError::TranslateGva( + _, + ))) => DebugResponse::ErrorOccurred, + Err(e) => { + log::error!("Error processing debug request: {:?}", e); + return Err(HandleDebugError::ProcessRequest(e)); + } + } + } + }; + + // Send the response to the request back to gdb + self.send_dbg_msg(response)?; + + // If we are denying continue or step requests, the debugger assumes the + // execution started so we need to report a stop reason as a crash and let + // it request to read registers/memory to figure out what happened + if deny_continue { + self.send_dbg_msg(DebugResponse::VcpuStopped(VcpuStopReason::Crash))?; + } + + // If we are detaching, we will break the loop and the Hypervisor will continue + // to handle the Crash reason + if detach { + break; + } + } + } + // If the vCPU stopped because of any other reason except a crash, we can handle it + // normally + _ => { + // Send the stop reason to the gdb thread + self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason))?; + + loop { + log::debug!("Debug wait for event to resume vCPU"); + // Wait for a message from gdb + let req = self.recv_dbg_msg()?; + + let result = self.process_dbg_request(req, &mem_access); + + let response = match result { + Ok(response) => response, + // Treat non-fatal errors separately so the guest doesn't fail + Err(ProcessDebugRequestError::ReadMemory( + DebugMemoryAccessError::TranslateGuestAddress(_), + )) + | Err(ProcessDebugRequestError::Debug(DebugError::TranslateGva(_))) => { + DebugResponse::ErrorOccurred + } + Err(e) => { + return Err(HandleDebugError::ProcessRequest(e)); + } + }; + + let cont = matches!( + response, + DebugResponse::Continue | DebugResponse::Step | DebugResponse::DisableDebug + ); + + self.send_dbg_msg(response)?; + + // Check if we should continue execution + // We continue if the response is one of the following: Step, Continue, or DisableDebug + if cont { + break; + } + } + } + } + + Ok(()) + } + + #[cfg(crashdump)] + pub(crate) fn crashdump_context( + &self, + mem_mgr: &mut SandboxMemoryManager, + ) -> std::result::Result, CrashDumpError> + { + if self.rt_cfg.guest_core_dump { + let mut regs = [0; 27]; + + let vcpu_regs = self.vm.regs()?; + let sregs = self.vm.sregs()?; + let xsave = self.vm.xsave()?; + + // Set up the registers for the crash dump + regs[0] = vcpu_regs.r15; // r15 + regs[1] = vcpu_regs.r14; // r14 + regs[2] = vcpu_regs.r13; // r13 + regs[3] = vcpu_regs.r12; // r12 + regs[4] = vcpu_regs.rbp; // rbp + regs[5] = vcpu_regs.rbx; // rbx + regs[6] = vcpu_regs.r11; // r11 + regs[7] = vcpu_regs.r10; // r10 + regs[8] = vcpu_regs.r9; // r9 + regs[9] = vcpu_regs.r8; // r8 + regs[10] = vcpu_regs.rax; // rax + regs[11] = vcpu_regs.rcx; // rcx + regs[12] = vcpu_regs.rdx; // rdx + regs[13] = vcpu_regs.rsi; // rsi + regs[14] = vcpu_regs.rdi; // rdi + regs[15] = 0; // orig rax + regs[16] = vcpu_regs.rip; // rip + regs[17] = sregs.cs.selector as u64; // cs + regs[18] = vcpu_regs.rflags; // eflags + regs[19] = vcpu_regs.rsp; // rsp + regs[20] = sregs.ss.selector as u64; // ss + regs[21] = sregs.fs.base; // fs_base + regs[22] = sregs.gs.base; // gs_base + regs[23] = sregs.ds.selector as u64; // ds + regs[24] = sregs.es.selector as u64; // es + regs[25] = sregs.fs.selector as u64; // fs + regs[26] = sregs.gs.selector as u64; // gs + + // Get the filename from the binary path + let filename = self.rt_cfg.binary_path.clone().and_then(|path| { + Path::new(&path) + .file_name() + .and_then(|name| name.to_os_string().into_string().ok()) + }); + + // Use the stored entry point address from the runtime config. + // This is the original entry point (load_addr + ELF entry offset) + // which GDB needs for AT_ENTRY to compute the PIE load offset. + // We cannot use self.entrypoint here because it transitions from + // Initialise(addr) to Call(dispatch_addr) after guest init. + let initialise = self.rt_cfg.entry_point.unwrap_or_else(|| { + tracing::warn!( + "entry_point was never set in SandboxRuntimeConfig; AT_ENTRY will be 0" + ); + 0 + }); + let mmap_regions: Vec = self.get_mapped_regions().cloned().collect(); + let root_pt = self.get_root_pt()?; + + let regions = mem_mgr + .get_guest_memory_regions(root_pt, &mmap_regions) + .map_err(|e| CrashDumpError::AccessPageTable(Box::new(e)))?; + + Ok(Some(crashdump::CrashDumpContext::new( + regions, + regs, + xsave.to_vec(), + initialise, + self.rt_cfg.binary_path.clone(), + filename, + ))) + } else { + Ok(None) + } + } +} + +#[cfg(gdb)] +pub(super) mod debug { + use hyperlight_common::mem::PAGE_SIZE; + + use super::HyperlightVm; + use crate::hypervisor::gdb::arch::{SW_BP, SW_BP_SIZE}; + use crate::hypervisor::gdb::{ + DebugError, DebugMemoryAccess, DebugMemoryAccessError, DebugMsg, DebugResponse, + }; + use crate::hypervisor::virtual_machine::VmError; + + /// Errors that can occur during GDB debug request processing + #[derive(Debug, thiserror::Error)] + pub enum ProcessDebugRequestError { + #[error("Debug is not enabled")] + DebugNotEnabled, + #[error("Failed to acquire lock at {0}:{1}")] + TryLockError(&'static str, u32), + #[error("VM operation error: {0}")] + Vm(#[from] VmError), + #[error("Debug operation error: {0}")] + Debug(#[from] DebugError), + #[error("Address {0:#x} is not a software breakpoint")] + SwBreakpointNotFound(u64), + #[error("Failed to read memory: {0}")] + ReadMemory(#[from] DebugMemoryAccessError), + #[error("Failed to write memory: {0}")] + WriteMemory(DebugMemoryAccessError), + } + + impl HyperlightVm { + pub(crate) fn process_dbg_request( + &mut self, + req: DebugMsg, + mem_access: &DebugMemoryAccess, + ) -> std::result::Result { + if self.gdb_conn.is_some() { + match req { + DebugMsg::AddHwBreakpoint(addr) => Ok(DebugResponse::AddHwBreakpoint( + self.vm + .add_hw_breakpoint(addr) + .map_err(|e| { + log::error!("Failed to add hw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::AddSwBreakpoint(addr) => Ok(DebugResponse::AddSwBreakpoint( + self.add_sw_breakpoint(addr, mem_access) + .map_err(|e| { + log::error!("Failed to add sw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::Continue => { + self.vm.set_single_step(false).map_err(|e| { + log::error!("Failed to continue execution: {:?}", e); + + e + })?; + + Ok(DebugResponse::Continue) + } + DebugMsg::DisableDebug => { + self.vm.set_debug(false).map_err(|e| { + log::error!("Failed to disable debugging: {:?}", e); + e + })?; + + Ok(DebugResponse::DisableDebug) + } + DebugMsg::GetCodeSectionOffset => { + let offset = mem_access + .dbg_mem_access_fn + .try_lock() + .map_err(|_| ProcessDebugRequestError::TryLockError(file!(), line!()))? + .layout + .get_guest_code_address(); + + Ok(DebugResponse::GetCodeSectionOffset(offset as u64)) + } + DebugMsg::ReadAddr(addr, len) => { + let mut data = vec![0u8; len]; + + self.read_addrs(addr, &mut data, mem_access).map_err(|e| { + log::error!("Failed to read from address: {:?}", e); + + e + })?; + + Ok(DebugResponse::ReadAddr(data)) + } + DebugMsg::ReadRegisters => { + let regs = self.vm.regs().map_err(VmError::Register)?; + let fpu = self.vm.fpu().map_err(VmError::Register)?; + Ok(DebugResponse::ReadRegisters(Box::new((regs, fpu)))) + } + DebugMsg::RemoveHwBreakpoint(addr) => Ok(DebugResponse::RemoveHwBreakpoint( + self.vm + .remove_hw_breakpoint(addr) + .map_err(|e| { + log::error!("Failed to remove hw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::RemoveSwBreakpoint(addr) => Ok(DebugResponse::RemoveSwBreakpoint( + self.remove_sw_breakpoint(addr, mem_access) + .map_err(|e| { + log::error!("Failed to remove sw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::Step => { + self.vm.set_single_step(true).map_err(|e| { + log::error!("Failed to enable step instruction: {:?}", e); + + e + })?; + + Ok(DebugResponse::Step) + } + DebugMsg::WriteAddr(addr, data) => { + self.write_addrs(addr, &data, mem_access).map_err(|e| { + log::error!("Failed to write to address: {:?}", e); + + e + })?; + + Ok(DebugResponse::WriteAddr) + } + DebugMsg::WriteRegisters(boxed_regs) => { + let (regs, fpu) = boxed_regs.as_ref(); + self.vm.set_regs(regs).map_err(VmError::Register)?; + self.vm.set_fpu(fpu).map_err(VmError::Register)?; + + Ok(DebugResponse::WriteRegisters) + } + } + } else { + Err(ProcessDebugRequestError::DebugNotEnabled) + } + } + + pub(crate) fn recv_dbg_msg( + &mut self, + ) -> std::result::Result { + use super::RecvDbgMsgError; + + let gdb_conn = self + .gdb_conn + .as_mut() + .ok_or(RecvDbgMsgError::DebugNotEnabled)?; + + Ok(gdb_conn.recv()?) + } + + pub(crate) fn send_dbg_msg( + &mut self, + cmd: DebugResponse, + ) -> std::result::Result<(), super::SendDbgMsgError> { + use super::SendDbgMsgError; + + log::debug!("Sending {:?}", cmd); + + let gdb_conn = self + .gdb_conn + .as_mut() + .ok_or(SendDbgMsgError::DebugNotEnabled)?; + + Ok(gdb_conn.send(cmd)?) + } + + fn read_addrs( + &mut self, + mut gva: u64, + mut data: &mut [u8], + mem_access: &DebugMemoryAccess, + ) -> std::result::Result<(), ProcessDebugRequestError> { + let data_len = data.len(); + log::debug!("Read addr: {:X} len: {:X}", gva, data_len); + + while !data.is_empty() { + let gpa = self.vm.translate_gva(gva)?; + + let read_len = std::cmp::min( + data.len(), + (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(), + ); + + mem_access.read(&mut data[..read_len], gpa)?; + + data = &mut data[read_len..]; + gva += read_len as u64; + } + + Ok(()) + } + + /// Copies the data from the provided slice to the guest memory address + /// The address is checked to be a valid guest address + fn write_addrs( + &mut self, + mut gva: u64, + mut data: &[u8], + mem_access: &DebugMemoryAccess, + ) -> std::result::Result<(), ProcessDebugRequestError> { + let data_len = data.len(); + log::debug!("Write addr: {:X} len: {:X}", gva, data_len); + + while !data.is_empty() { + let gpa = self.vm.translate_gva(gva)?; + + let write_len = std::cmp::min( + data.len(), + (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(), + ); + + // Use the memory access to write to guest memory + mem_access + .write(&data[..write_len], gpa) + .map_err(ProcessDebugRequestError::WriteMemory)?; + + data = &data[write_len..]; + gva += write_len as u64; + } + + Ok(()) + } + + // Must be idempotent! + fn add_sw_breakpoint( + &mut self, + gva: u64, + mem_access: &DebugMemoryAccess, + ) -> std::result::Result<(), ProcessDebugRequestError> { + // Check if breakpoint already exists + if self.sw_breakpoints.contains_key(&gva) { + return Ok(()); + } + + // Write breakpoint OP code to write to guest memory + let mut save_data = [0; SW_BP_SIZE]; + self.read_addrs(gva, &mut save_data[..], mem_access)?; + self.write_addrs(gva, &SW_BP, mem_access)?; + + // Save guest memory to restore when breakpoint is removed + self.sw_breakpoints.insert(gva, save_data[0]); + + Ok(()) + } + + fn remove_sw_breakpoint( + &mut self, + gva: u64, + mem_access: &DebugMemoryAccess, + ) -> std::result::Result<(), ProcessDebugRequestError> { + if let Some(saved_data) = self.sw_breakpoints.remove(&gva) { + // Restore saved data to the guest's memory + self.write_addrs(gva, &[saved_data], mem_access)?; + + Ok(()) + } else { + Err(ProcessDebugRequestError::SwBreakpointNotFound(gva)) + } + } + } +} + +#[cfg(test)] +#[cfg(not(feature = "nanvix-unstable"))] +#[allow(clippy::needless_range_loop)] +mod tests { + use std::sync::{Arc, Mutex}; + + use hyperlight_common::vmem::{self, BasicMapping, Mapping, MappingKind}; + use rand::RngExt; + + use super::*; + #[cfg(kvm)] + use crate::hypervisor::regs::FP_CONTROL_WORD_DEFAULT; + use crate::hypervisor::regs::{CommonSegmentRegister, CommonTableRegister, MXCSR_DEFAULT}; + use crate::hypervisor::virtual_machine::VirtualMachine; + use crate::mem::layout::SandboxMemoryLayout; + use crate::mem::memory_region::{GuestMemoryRegion, MemoryRegionFlags}; + use crate::mem::mgr::{GuestPageTableBuffer, SandboxMemoryManager}; + use crate::mem::ptr::RawPtr; + use crate::mem::shared_mem::ExclusiveSharedMemory; + use crate::sandbox::SandboxConfiguration; + use crate::sandbox::host_funcs::FunctionRegistry; + #[cfg(any(crashdump, gdb))] + use crate::sandbox::uninitialized::SandboxRuntimeConfig; + use crate::sandbox::uninitialized_evolve::set_up_hypervisor_partition; + + /// Test context holding an initialized VM with memory manager. + /// Used by tests that need to interact with guest memory after execution. + struct TestVmContext { + vm: HyperlightVm, + hshm: SandboxMemoryManager, + host_funcs: Arc>, + #[cfg(gdb)] + dbg_mem_access_hdl: Arc>>, + } + + // ========================================================================== + // Dirty State Builders - Create non-default vCPU state for testing reset + // ========================================================================== + + /// Build dirty general purpose registers for testing reset_vcpu. + fn dirty_regs() -> CommonRegisters { + CommonRegisters { + rax: 0x1111111111111111, + rbx: 0x2222222222222222, + rcx: 0x3333333333333333, + rdx: 0x4444444444444444, + rsi: 0x5555555555555555, + rdi: 0x6666666666666666, + rsp: 0x7777777777777777, + rbp: 0x8888888888888888, + r8: 0x9999999999999999, + r9: 0xAAAAAAAAAAAAAAAA, + r10: 0xBBBBBBBBBBBBBBBB, + r11: 0xCCCCCCCCCCCCCCCC, + r12: 0xDDDDDDDDDDDDDDDD, + r13: 0xEEEEEEEEEEEEEEEE, + r14: 0xFFFFFFFFFFFFFFFF, + r15: 0x0123456789ABCDEF, + rip: 0xFEDCBA9876543210, + rflags: 0x202, // IF + reserved bit 1 + } + } + + /// Build dirty FPU state for testing reset_vcpu. + fn dirty_fpu() -> CommonFpu { + CommonFpu { + fpr: [[0xAB; 16]; 8], + fcw: 0x0F7F, // Different from default 0x037F + fsw: 0x1234, + ftwx: 0xAB, + last_opcode: 0x0123, + last_ip: 0xDEADBEEF00000000, + last_dp: 0xCAFEBABE00000000, + xmm: [[0xCD; 16]; 16], + mxcsr: 0x3F80, // Different from default 0x1F80 + } + } + + /// Build dirty special registers for testing reset_vcpu. + /// Must be consistent for 64-bit long mode (CR0/CR4/EFER). + fn dirty_sregs(_pml4_addr: u64) -> CommonSpecialRegisters { + let segment = CommonSegmentRegister { + base: 0x1000, + limit: 0xFFFF, + selector: 0x10, + type_: 3, // data segment, read/write, accessed + present: 1, + dpl: 0, + db: 1, + s: 1, + l: 0, + g: 1, + avl: 1, + unusable: 0, + padding: 0, + }; + // CS segment - 64-bit code segment + let cs_segment = CommonSegmentRegister { + base: 0, + limit: 0xFFFF, + selector: 0x08, + type_: 0b1011, // code segment, execute/read, accessed + present: 1, + dpl: 0, + db: 0, // must be 0 in 64-bit mode + s: 1, + l: 1, // 64-bit mode + g: 1, + avl: 0, + unusable: 0, + padding: 0, + }; + let table = CommonTableRegister { + base: 0xDEAD0000, + limit: 0xFFFF, + }; + CommonSpecialRegisters { + cs: cs_segment, + ds: segment, + es: segment, + fs: segment, + gs: segment, + ss: segment, + tr: CommonSegmentRegister { + type_: 0b1011, // busy TSS + present: 1, + ..segment + }, + ldt: segment, + gdt: table, + idt: table, + cr0: 0x80000011, // PE + ET + PG + cr2: 0xBADC0DE, + // MSHV validates cr3 and rejects bogus values; use valid _pml4_addr for MSHV + cr3: match get_available_hypervisor() { + #[cfg(mshv3)] + Some(HypervisorType::Mshv) => _pml4_addr, + _ => 0x12345000, + }, + cr4: 0x20, // PAE + cr8: 0x5, + efer: 0x500, // LME + LMA + apic_base: 0xFEE00900, + interrupt_bitmap: [0; 4], // fails if non-zero on MSHV + } + } + + /// Build dirty debug registers for testing reset_vcpu. + /// + /// DR6 bit layout (Intel SDM / AMD APM): + /// Bits 0-3 (B0-B3): Breakpoint condition detected - software writable/clearable + /// Bits 4-10: Reserved, read as 1s on modern processors (read-only) + /// Bit 11 (BLD): Bus Lock Trap - cleared by processor, read-only on older CPUs + /// Bit 12: Reserved, always 0 + /// Bit 13 (BD): Debug Register Access Detected - software clearable + /// Bit 14 (BS): Single-Step - software clearable + /// Bit 15 (BT): Task Switch breakpoint - software clearable + /// Bit 16 (RTM): TSX-related, read-only (1 if no TSX) + /// Bits 17-31: Reserved, read as 1s on modern processors (read-only) + /// Bits 32-63: Reserved, must be 0 + /// + /// Writable bits: 0-3, 13, 14, 15 = mask 0xE00F + /// Reserved 1s: 4-10, 11 (if no BLD), 16 (if no TSX), 17-31 = ~0xE00F on lower 32 bits + const DR6_WRITABLE_MASK: u64 = 0xE00F; // B0-B3, BD, BS, BT + + /// DR7 bit layout: + /// Bits 0-7 (L0-L3, G0-G3): Local/global breakpoint enables - writable + /// Bits 8-9 (LE, GE): Local/Global Exact (386 only, ignored on modern) - writable + /// Bit 10: Reserved, must be 1 (read-only) + /// Bits 11-12: Reserved (RTM/TSX on some CPUs), must be 0 (read-only) + /// Bit 13 (GD): General Detect Enable - writable + /// Bits 14-15: Reserved, must be 0 (read-only) + /// Bits 16-31 (R/W0-3, LEN0-3): Breakpoint conditions and lengths - writable + /// Bits 32-63: Reserved, must be 0 (read-only) + /// + /// Writable bits: 0-9, 13, 16-31 = mask 0xFFFF23FF + const DR7_WRITABLE_MASK: u64 = 0xFFFF_23FF; + + fn dirty_debug_regs() -> CommonDebugRegs { + CommonDebugRegs { + dr0: 0xDEADBEEF00001000, + dr1: 0xDEADBEEF00002000, + dr2: 0xDEADBEEF00003000, + dr3: 0xDEADBEEF00004000, + // Set all writable bits: B0-B3 (0-3), BD (13), BS (14), BT (15) + dr6: DR6_WRITABLE_MASK, + // Set writable bits: L0-L3, G0-G3 (0-7), LE/GE (8-9), GD (13), conditions (16-31) + dr7: DR7_WRITABLE_MASK, + } + } + + /// Returns default test values for reset_vcpu parameters. + /// Uses standard 64-bit defaults since reset_vcpu now restores full sregs from snapshot. + fn default_sregs() -> CommonSpecialRegisters { + CommonSpecialRegisters::standard_64bit_defaults(0) + } + + // ========================================================================== + // Normalizers - Handle hypervisor-specific quirks when comparing vCPU state + // ========================================================================== + + /// Normalize debug registers for comparison by applying writable masks. + /// Reserved bits in DR6/DR7 are read-only (set by CPU), so we copy them from actual. + fn normalize_debug_regs(expected: &mut CommonDebugRegs, actual: &CommonDebugRegs) { + expected.dr6 = (expected.dr6 & DR6_WRITABLE_MASK) | (actual.dr6 & !DR6_WRITABLE_MASK); + expected.dr7 = (expected.dr7 & DR7_WRITABLE_MASK) | (actual.dr7 & !DR7_WRITABLE_MASK); + } + + /// Normalize segment hidden cache fields that hypervisors report differently. + /// Applies to: unusable, granularity (g), and ss.db fields. + /// Does NOT normalize type_ - use this when verifying explicitly-set dirty state. + fn normalize_sregs_hidden_cache( + expected: &mut CommonSpecialRegisters, + actual: &CommonSpecialRegisters, + ) { + expected.ss.db = actual.ss.db; + expected.cs.unusable = actual.cs.unusable; + expected.cs.g = actual.cs.g; + expected.ds.unusable = actual.ds.unusable; + expected.ds.g = actual.ds.g; + expected.es.unusable = actual.es.unusable; + expected.es.g = actual.es.g; + expected.fs.unusable = actual.fs.unusable; + expected.fs.g = actual.fs.g; + expected.gs.unusable = actual.gs.unusable; + expected.gs.g = actual.gs.g; + expected.ss.unusable = actual.ss.unusable; + expected.ss.g = actual.ss.g; + expected.tr.unusable = actual.tr.unusable; + expected.tr.g = actual.tr.g; + expected.ldt.unusable = actual.ldt.unusable; + expected.ldt.g = actual.ldt.g; + } + + /// Normalize sregs for verifying reset state. + /// + /// Handles hypervisor-specific differences in segment descriptor fields: + /// - Hidden cache fields (unusable, granularity bits) vary between KVM/MSHV/WHP + /// - For unusable segments (DS/ES/FS/GS/SS in 64-bit mode), the type_ field + /// is ignored by the CPU and varies between hypervisors + fn normalize_sregs_for_reset( + expected: &mut CommonSpecialRegisters, + actual: &CommonSpecialRegisters, + ) { + normalize_sregs_hidden_cache(expected, actual); + // type_ for unusable segments: hypervisors return different defaults + // (KVM returns type_=1, WHP returns type_=0). + expected.ds.type_ = actual.ds.type_; + expected.es.type_ = actual.es.type_; + expected.fs.type_ = actual.fs.type_; + expected.gs.type_ = actual.gs.type_; + expected.ss.type_ = actual.ss.type_; + } + + /// Normalize sregs for tests that run actual guest code. + /// + /// Handles hypervisor-specific differences in segment descriptor fields: + /// - Hidden cache fields (unusable, db) vary between KVM/MSHV/WHP + /// - For unusable segments (DS/ES/FS/GS/SS in 64-bit mode), the type_ field + /// is ignored by the CPU and varies between hypervisors + fn normalize_sregs_for_run_tests( + expected: &mut CommonSpecialRegisters, + actual: &CommonSpecialRegisters, + ) { + expected.ss.db = actual.ss.db; + expected.cs.unusable = actual.cs.unusable; + expected.ds.unusable = actual.ds.unusable; + expected.ds.type_ = actual.ds.type_; + expected.es.unusable = actual.es.unusable; + expected.es.type_ = actual.es.type_; + expected.fs.unusable = actual.fs.unusable; + expected.fs.type_ = actual.fs.type_; + expected.gs.unusable = actual.gs.unusable; + expected.gs.type_ = actual.gs.type_; + expected.ss.unusable = actual.ss.unusable; + expected.ss.type_ = actual.ss.type_; + expected.tr.unusable = actual.tr.unusable; + expected.ldt.unusable = actual.ldt.unusable; + } + + /// Normalize FPU MXCSR for KVM quirk. + /// KVM doesn't preserve MXCSR via set_fpu/fpu(), so we need to set it manually + /// when comparing FPU state. + #[cfg_attr(not(kvm), allow(unused_variables))] + fn normalize_fpu_mxcsr_for_kvm(fpu: &mut CommonFpu, expected_mxcsr: u32) { + #[cfg(kvm)] + if *get_available_hypervisor().as_ref().unwrap() == HypervisorType::Kvm { + fpu.mxcsr = expected_mxcsr; + } + } + + /// Normalize FPU state for reset comparison. + /// + /// When ftwx == 0, all x87 FPU registers are marked empty. In this state: + /// - `fpr`: Contents are architecturally undefined since registers are empty + /// - `last_ip`, `last_dp`, `last_opcode`: Track the last FPU instruction location. + /// On WHP, the register read API may return stale values even after + /// reset_xsave() properly zeroes the XSAVE area. This is a WHP API quirk - + /// the guest-visible state (via FXSAVE/XSAVE instructions) IS properly reset. + /// + /// IMPORTANT: The `reset_vcpu_fpu_guest_visible_state` test verifies actual + /// guest-visible FPU state by running real guest code with FXSAVE, providing + /// defense-in-depth against hypervisor API quirks masking real issues. + fn normalize_fpu_for_reset(expected: &mut CommonFpu, actual: &CommonFpu) { + if actual.ftwx == 0 { + expected.fpr = actual.fpr; + expected.last_ip = actual.last_ip; + expected.last_dp = actual.last_dp; + expected.last_opcode = actual.last_opcode; + } + } + + // ========================================================================== + // Assertion Helpers - Verify vCPU state after reset + // ========================================================================== + + /// Assert that debug registers are in reset state. + /// Reserved bits in DR6/DR7 are read-only (set by CPU), so we only check + /// that writable bits are cleared to 0 and DR0-DR3 are zeroed. + fn assert_debug_regs_reset(vm: &dyn VirtualMachine) { + let debug_regs = vm.debug_regs().unwrap(); + let expected = CommonDebugRegs { + dr0: 0, + dr1: 0, + dr2: 0, + dr3: 0, + dr6: debug_regs.dr6 & !DR6_WRITABLE_MASK, + dr7: debug_regs.dr7 & !DR7_WRITABLE_MASK, + }; + assert_eq!(debug_regs, expected); + } + + /// Assert that general-purpose registers are in reset state. + /// After reset, all registers should be zeroed except rflags which has + /// reserved bit 1 always set. + fn assert_regs_reset(vm: &dyn VirtualMachine) { + assert_eq!( + vm.regs().unwrap(), + CommonRegisters { + rflags: 1 << 1, // Reserved bit 1 is always set + ..Default::default() + } + ); + } + + /// Assert that FPU state is in reset state. + /// Handles hypervisor-specific quirks (KVM MXCSR, empty FPU registers). + fn assert_fpu_reset(vm: &dyn VirtualMachine) { + let fpu = vm.fpu().unwrap(); + let mut expected_fpu = CommonFpu::default(); + normalize_fpu_mxcsr_for_kvm(&mut expected_fpu, fpu.mxcsr); + normalize_fpu_for_reset(&mut expected_fpu, &fpu); + assert_eq!(fpu, expected_fpu); + } + + /// Assert that special registers are in reset state. + /// Handles hypervisor-specific differences in hidden descriptor cache fields. + fn assert_sregs_reset(vm: &dyn VirtualMachine, pml4_addr: u64) { + let defaults = CommonSpecialRegisters::standard_64bit_defaults(pml4_addr); + let sregs = vm.sregs().unwrap(); + let mut expected_sregs = defaults; + // Normalize hypervisor implementation-specific fields. + // These are part of the hidden descriptor cache. While guests can write them + // indirectly (by loading segments from a crafted GDT), guests cannot read them back + // (e.g., `mov ax, ds` only returns the selector, not the hidden cache). + // KVM and MSHV reset to different default values, but both properly reset so there's + // no information leakage between tenants. + normalize_sregs_for_reset(&mut expected_sregs, &sregs); + assert_eq!(sregs, expected_sregs); + } + + // ========================================================================== + // XSAVE Helpers - Build dirty XSAVE state for testing extended CPU state + // ========================================================================== + + /// Query CPUID.0DH.n for XSAVE component info. + /// Returns (size, offset, align_64) for the given component: + /// - size: CPUID.0DH.n:EAX - size in bytes + /// - offset: CPUID.0DH.n:EBX - offset from XSAVE base (standard format only) + /// - align_64: CPUID.0DH.n:ECX bit 1 - true if 64-byte aligned (compacted format) + fn xsave_component_info(comp_id: u32) -> (usize, usize, bool) { + let result = unsafe { std::arch::x86_64::__cpuid_count(0xD, comp_id) }; + let size = result.eax as usize; + let offset = result.ebx as usize; + let align_64 = (result.ecx & 0b10) != 0; + (size, offset, align_64) + } + + /// Query CPUID.0DH.00H for the bitmap of supported user state components. + /// EDX:EAX forms a 64-bit bitmap where bit i indicates support for component i. + fn xsave_supported_components() -> u64 { + let result = unsafe { std::arch::x86_64::__cpuid_count(0xD, 0) }; + (result.edx as u64) << 32 | (result.eax as u64) + } + + /// Dirty extended state components using compacted XSAVE format (MSHV/WHP). + /// Components are stored contiguously starting at byte 576, with alignment + /// requirements from CPUID.0DH.n:ECX[1]. + /// Returns a bitmask of components that were actually dirtied. + fn dirty_xsave_extended_compacted( + xsave: &mut [u32], + xcomp_bv: u64, + supported_components: u64, + ) -> u64 { + let mut dirtied_mask = 0u64; + let mut offset = 576usize; + + for comp_id in 2..63u32 { + // Skip if component not supported by CPU or not enabled in XCOMP_BV + if (supported_components & (1u64 << comp_id)) == 0 { + continue; + } + if (xcomp_bv & (1u64 << comp_id)) == 0 { + continue; + } + + let (size, _, align_64) = xsave_component_info(comp_id); + + // ECX[1]=1 means 64-byte aligned; ECX[1]=0 means immediately after previous + if align_64 { + offset = offset.next_multiple_of(64); + } + + // Dirty this component's data area (only if it fits in the buffer) + let start_idx = offset / 4; + let end_idx = (offset + size) / 4; + if end_idx <= xsave.len() { + for i in start_idx..end_idx { + xsave[i] = 0x12345678 ^ comp_id.wrapping_mul(0x11111111); + } + dirtied_mask |= 1u64 << comp_id; + } + + offset += size; + } + + dirtied_mask + } + + /// Dirty extended state components using standard XSAVE format (KVM). + /// Components are at fixed offsets from CPUID.0DH.n:EBX. + /// Returns a bitmask of components that were actually dirtied. + fn dirty_xsave_extended_standard(xsave: &mut [u32], supported_components: u64) -> u64 { + let mut dirtied_mask = 0u64; + + for comp_id in 2..63u32 { + // Skip if component not supported by CPU + if (supported_components & (1u64 << comp_id)) == 0 { + continue; + } + + let (size, fixed_offset, _) = xsave_component_info(comp_id); + + let start_idx = fixed_offset / 4; + let end_idx = (fixed_offset + size) / 4; + if end_idx <= xsave.len() { + for i in start_idx..end_idx { + xsave[i] = 0x12345678 ^ comp_id.wrapping_mul(0x11111111); + } + dirtied_mask |= 1u64 << comp_id; + } + } + + dirtied_mask + } + + /// Dirty the legacy XSAVE region (bytes 0-511) for testing reset_vcpu. + /// This includes FPU/x87 state, SSE state, and reserved areas. + /// + /// Layout (from Intel SDM Table 13-1): + /// Bytes 0-1: FCW, 2-3: FSW, 4: FTW, 5: reserved, 6-7: FOP + /// Bytes 8-15: FIP, 16-23: FDP + /// Bytes 24-27: MXCSR, 28-31: MXCSR_MASK (preserve - hardware defined) + /// Bytes 32-159: ST0-ST7/MM0-MM7 (8 regs × 16 bytes) + /// Bytes 160-415: XMM0-XMM15 (16 regs × 16 bytes) + /// Bytes 416-511: Reserved + fn dirty_xsave_legacy(xsave: &mut [u32], current_xsave: &[u8]) { + // FCW (bytes 0-1) + FSW (bytes 2-3) - pack into xsave[0] + // FCW = 0x0F7F (different from default 0x037F), FSW = 0x1234 + xsave[0] = 0x0F7F | (0x1234 << 16); + // FTW (byte 4) + reserved (byte 5) + FOP (bytes 6-7) - pack into xsave[1] + // FTW = 0xAB, FOP = 0x0123 + xsave[1] = 0xAB | (0x0123 << 16); + // FIP (bytes 8-15) - xsave[2] and xsave[3] + xsave[2] = 0xDEAD0001; + xsave[3] = 0xBEEF0002; + // FDP (bytes 16-23) - xsave[4] and xsave[5] + xsave[4] = 0xCAFE0003; + xsave[5] = 0xBABE0004; + // MXCSR (bytes 24-27) - xsave[6], use valid value different from default + xsave[6] = 0x3F80; + // xsave[7] is MXCSR_MASK - preserve from current (hardware defined, read-only) + if current_xsave.len() >= 32 { + xsave[7] = u32::from_le_bytes(current_xsave[28..32].try_into().unwrap()); + } + + // ST0-ST7/MM0-MM7 (bytes 32-159, indices 8-39) + for i in 8..40 { + xsave[i] = 0xCAFEBABE; + } + // XMM0-XMM15 (bytes 160-415, indices 40-103) + for i in 40..104 { + xsave[i] = 0xDEADBEEF; + } + + // Reserved area (bytes 416-511, indices 104-127) + for i in 104..128 { + xsave[i] = 0xABCDEF12; + } + } + + /// Preserve XSAVE header (bytes 512-575) from current state. + /// This includes XSTATE_BV and XCOMP_BV which hypervisors require. + fn preserve_xsave_header(xsave: &mut [u32], current_xsave: &[u8]) { + for i in 128..144 { + let byte_offset = i * 4; + xsave[i] = u32::from_le_bytes( + current_xsave[byte_offset..byte_offset + 4] + .try_into() + .unwrap(), + ); + } + } + + fn dirty_xsave(current_xsave: &[u8]) -> Vec { + let mut xsave = vec![0u32; current_xsave.len() / 4]; + + dirty_xsave_legacy(&mut xsave, current_xsave); + preserve_xsave_header(&mut xsave, current_xsave); + + let xcomp_bv = u64::from_le_bytes(current_xsave[520..528].try_into().unwrap()); + let supported_components = xsave_supported_components(); + + // Dirty extended components and get mask of what was actually dirtied + let extended_mask = if (xcomp_bv & (1u64 << 63)) != 0 { + // Compacted format (MSHV/WHP) + dirty_xsave_extended_compacted(&mut xsave, xcomp_bv, supported_components) + } else { + // Standard format (KVM) + dirty_xsave_extended_standard(&mut xsave, supported_components) + }; + + // UPDATE XSTATE_BV to indicate dirtied components have valid data. + // WHP validates consistency between XSTATE_BV and actual data in the buffer. + // Bits 0,1 = legacy x87/SSE (always set after dirty_xsave_legacy) + // Bits 2+ = extended components that we actually dirtied + let xstate_bv = 0x3 | extended_mask; + + // Write XSTATE_BV to bytes 512-519 (u32 indices 128-129) + xsave[128] = (xstate_bv & 0xFFFFFFFF) as u32; + xsave[129] = (xstate_bv >> 32) as u32; + + xsave + } + + // ========================================================================== + // Test VM Setup + // ========================================================================== + + /// Creates a test VM with the given code. This is the shared setup logic used by + /// both `hyperlight_vm()` and `create_test_vm_context()`. + fn create_test_vm_context(code: &[u8]) -> TestVmContext { + let config: SandboxConfiguration = Default::default(); + #[cfg(any(crashdump, gdb))] + let rt_cfg: SandboxRuntimeConfig = Default::default(); + + let mut layout = SandboxMemoryLayout::new(config, code.len(), 4096, None).unwrap(); + + let pt_base_gpa = layout.get_pt_base_gpa(); + let pt_buf = GuestPageTableBuffer::new(pt_base_gpa as usize); + + for rgn in layout + .get_memory_regions_::(()) + .unwrap() + .iter() + { + let readable = rgn.flags.contains(MemoryRegionFlags::READ); + let writable = rgn.flags.contains(MemoryRegionFlags::WRITE); + let executable = rgn.flags.contains(MemoryRegionFlags::EXECUTE); + let mapping = Mapping { + phys_base: rgn.guest_region.start as u64, + virt_base: rgn.guest_region.start as u64, + len: rgn.guest_region.len() as u64, + kind: MappingKind::Basic(BasicMapping { + readable, + writable, + executable, + }), + }; + unsafe { vmem::map(&pt_buf, mapping) }; + } + + // Map the scratch region at the top of the address space + let scratch_size = config.get_scratch_size(); + let scratch_gpa = hyperlight_common::layout::scratch_base_gpa(scratch_size); + let scratch_gva = hyperlight_common::layout::scratch_base_gva(scratch_size); + let scratch_mapping = Mapping { + phys_base: scratch_gpa, + virt_base: scratch_gva, + len: scratch_size as u64, + kind: MappingKind::Basic(BasicMapping { + readable: true, + writable: true, + executable: true, // Match regular codepath (map_specials) + }), + }; + unsafe { vmem::map(&pt_buf, scratch_mapping) }; + + let pt_bytes = pt_buf.into_bytes(); + layout.set_pt_size(pt_bytes.len()).unwrap(); + + let mem_size = layout.get_memory_size().unwrap(); + let mut eshm = ExclusiveSharedMemory::new(mem_size).unwrap(); + let snapshot_pt_start = mem_size - layout.get_pt_size(); + eshm.copy_from_slice(&pt_bytes, snapshot_pt_start).unwrap(); + eshm.copy_from_slice(code, layout.get_guest_code_offset()) + .unwrap(); + + let scratch_mem = ExclusiveSharedMemory::new(config.get_scratch_size()).unwrap(); + let mut mem_mgr = SandboxMemoryManager::new( + layout, + eshm, + scratch_mem, + NextAction::Initialise(layout.get_guest_code_address() as u64), + ); + mem_mgr.write_memory_layout().unwrap(); + + let (mut hshm, gshm) = mem_mgr.build().unwrap(); + + let peb_address = gshm.layout.peb_address; + let stack_top_gva = hyperlight_common::layout::MAX_GVA as u64 + - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET + + 1; + let mut vm = set_up_hypervisor_partition( + gshm, + &config, + stack_top_gva, + #[cfg(any(crashdump, gdb))] + rt_cfg, + crate::mem::exe::LoadInfo::dummy(), + ) + .unwrap(); + + let seed = rand::rng().random::(); + let peb_addr = RawPtr::from(u64::try_from(peb_address).unwrap()); + let page_size = u32::try_from(page_size::get()).unwrap(); + + #[cfg(gdb)] + let dbg_mem_access_hdl = Arc::new(Mutex::new(hshm.clone())); + + let host_funcs = Arc::new(Mutex::new(FunctionRegistry::default())); + + vm.initialise( + peb_addr, + seed, + page_size, + &mut hshm, + &host_funcs, + None, + #[cfg(gdb)] + dbg_mem_access_hdl.clone(), + ) + .unwrap(); + + TestVmContext { + vm, + hshm, + host_funcs, + #[cfg(gdb)] + dbg_mem_access_hdl, + } + } + + /// Simple helper that returns just the VM for tests that don't need memory access. + fn hyperlight_vm(code: &[u8]) -> HyperlightVm { + create_test_vm_context(code).vm + } + + // ========================================================================== + // Tests + // ========================================================================== + + #[test] + fn reset_vcpu_simple() { + // push rax; hlt - aligns stack to 16 bytes + const CODE: [u8; 2] = [0x50, 0xf4]; + let mut hyperlight_vm = hyperlight_vm(&CODE); + let available_hv = *get_available_hypervisor().as_ref().unwrap(); + + // Get the initial CR3 value before dirtying sregs + let initial_cr3 = hyperlight_vm.vm.sregs().unwrap().cr3; + + // Set all vCPU state to dirty values + let regs = dirty_regs(); + let fpu = dirty_fpu(); + let sregs = dirty_sregs(initial_cr3); + let current_xsave = hyperlight_vm.vm.xsave().unwrap(); + let xsave = dirty_xsave(¤t_xsave); + let debug_regs = dirty_debug_regs(); + + hyperlight_vm.vm.set_xsave(&xsave).unwrap(); + hyperlight_vm.vm.set_regs(®s).unwrap(); + hyperlight_vm.vm.set_fpu(&fpu).unwrap(); + hyperlight_vm.vm.set_sregs(&sregs).unwrap(); + hyperlight_vm.vm.set_debug_regs(&debug_regs).unwrap(); + + // Verify regs were set + assert_eq!(hyperlight_vm.vm.regs().unwrap(), regs); + + // Verify fpu was set + let mut got_fpu = hyperlight_vm.vm.fpu().unwrap(); + let mut expected_fpu = fpu; + // KVM doesn't preserve mxcsr via set_fpu/fpu(), copy expected to got + normalize_fpu_mxcsr_for_kvm(&mut got_fpu, fpu.mxcsr); + // fpr only uses 80 bits per register. Normalize upper bits for comparison. + for i in 0..8 { + expected_fpu.fpr[i][10..16].copy_from_slice(&got_fpu.fpr[i][10..16]); + } + assert_eq!(got_fpu, expected_fpu); + + // Verify xsave was set by checking key dirty values in the legacy region. + // Note: set_fpu() is called after set_xsave(), so XMM registers reflect fpu state (0xCD pattern). + let got_xsave = hyperlight_vm.vm.xsave().unwrap(); + // FCW (bytes 0-1) should be 0x0F7F (set by both xsave and fpu) + let got_fcw = u16::from_le_bytes(got_xsave[0..2].try_into().unwrap()); + assert_eq!(got_fcw, 0x0F7F, "xsave FCW should be dirty"); + // MXCSR (bytes 24-27) should be 0x3F80 (set by xsave; fpu doesn't update it on KVM) + let got_mxcsr = u32::from_le_bytes(got_xsave[24..28].try_into().unwrap()); + assert_eq!(got_mxcsr, 0x3F80, "xsave MXCSR should be dirty"); + // XMM0-XMM15 (bytes 160-415): set_fpu overwrites with 0xCD pattern from dirty_fpu() + for i in 0..16 { + let offset = 160 + i * 16; + let xmm_word = u32::from_le_bytes(got_xsave[offset..offset + 4].try_into().unwrap()); + assert_eq!( + xmm_word, 0xCDCDCDCD, + "xsave XMM{i} should match fpu dirty value" + ); + } + + // Verify debug regs were set + let got_debug_regs = hyperlight_vm.vm.debug_regs().unwrap(); + let mut expected_debug_regs = debug_regs; + normalize_debug_regs(&mut expected_debug_regs, &got_debug_regs); + assert_eq!(got_debug_regs, expected_debug_regs); + + // Verify sregs were set + let got_sregs = hyperlight_vm.vm.sregs().unwrap(); + let mut expected_sregs = sregs; + normalize_sregs_hidden_cache(&mut expected_sregs, &got_sregs); + assert_eq!(got_sregs, expected_sregs); + + // Reset the vCPU + hyperlight_vm.reset_vcpu(0, &default_sregs()).unwrap(); + + // Verify registers are reset to defaults + assert_regs_reset(hyperlight_vm.vm.as_ref()); + + // Verify FPU is reset to defaults + assert_fpu_reset(hyperlight_vm.vm.as_ref()); + + // Verify debug registers are reset to defaults + assert_debug_regs_reset(hyperlight_vm.vm.as_ref()); + + // Verify xsave is reset - should be zeroed except for hypervisor-specific fields + let reset_xsave = hyperlight_vm.vm.xsave().unwrap(); + // Build expected xsave: all zeros with fpu specific defaults. Then copy hypervisor-specific fields from actual + let mut expected_xsave = vec![0u8; reset_xsave.len()]; + #[cfg(mshv3)] + if available_hv == HypervisorType::Mshv { + // FCW (offset 0-1): When XSTATE_BV.LegacyX87 = 0 (init state), the hypervisor + // skips copying the FPU legacy region entirely, leaving zeros in the buffer. + // The actual guest FCW register is 0x037F (verified via fpu() assertion above), + // but xsave() doesn't report it because XSTATE_BV=0 means "init state, buffer + // contents undefined." We copy from actual to handle this. + expected_xsave[0..2].copy_from_slice(&reset_xsave[0..2]); + } + #[cfg(target_os = "windows")] + if available_hv == HypervisorType::Whp { + // FCW (offset 0-1): When XSTATE_BV.LegacyX87 = 0 (init state), the hypervisor + // skips copying the FPU legacy region entirely, leaving zeros in the buffer. + // The actual guest FCW register is 0x037F (verified via fpu() assertion above), + // but xsave() doesn't report it because XSTATE_BV=0 means "init state, buffer + // contents undefined." We copy from actual to handle this. + expected_xsave[0..2].copy_from_slice(&reset_xsave[0..2]); + } + #[cfg(kvm)] + if available_hv == HypervisorType::Kvm { + expected_xsave[0..2].copy_from_slice(&FP_CONTROL_WORD_DEFAULT.to_le_bytes()); + } + + // - MXCSR at offset 24-27: default FPU state set by hypervisor + expected_xsave[24..28].copy_from_slice(&MXCSR_DEFAULT.to_le_bytes()); + // - MXCSR_MASK at offset 28-31: hardware-defined, read-only + expected_xsave[28..32].copy_from_slice(&reset_xsave[28..32]); + // - Reserved bytes at offset 464-511: These are in the reserved/padding area of the legacy + // FXSAVE region (after XMM registers which end at byte 416). On KVM/Intel, these bytes + // may contain hypervisor-specific metadata that isn't cleared during vCPU reset. + // Since this is not guest-visible computational state, we copy from actual to expected. + expected_xsave[464..512].copy_from_slice(&reset_xsave[464..512]); + // - XSAVE header at offset 512-575: contains XSTATE_BV and XCOMP_BV (hypervisor-managed) + // XSTATE_BV (512-519): Bitmap indicating which state components have valid data in the + // buffer. When a bit is 0, the hypervisor uses the architectural init value for that + // component. After reset, xsave() may still return non-zero XSTATE_BV since the + // hypervisor reports which components it manages, not which have been modified. + // XCOMP_BV (520-527): Compaction bitmap. Bit 63 indicates compacted format (used by MSHV/WHP). + // When set, the XSAVE area uses a compact layout where only enabled components are stored + // contiguously. This is a format indicator, not state data, so it's preserved across reset. + // Both fields are managed by the hypervisor to describe the XSAVE area format and capabilities, + // not guest-visible computational state, so they don't need to be zeroed on reset. + if reset_xsave.len() >= 576 { + expected_xsave[512..576].copy_from_slice(&reset_xsave[512..576]); + } + assert_eq!( + reset_xsave, expected_xsave, + "xsave should be zeroed except for hypervisor-specific fields" + ); + + // Verify sregs are reset to defaults (CR3 is 0 as passed to reset_vcpu) + assert_sregs_reset(hyperlight_vm.vm.as_ref(), 0); + } + + /// Tests that actually runs code, as opposed to just setting vCPU state. + mod run_tests { + use iced_x86::code_asm::*; + + use super::*; + + #[test] + fn reset_vcpu_regs() { + let mut a = CodeAssembler::new(64).unwrap(); + a.push(rax).unwrap(); // Align stack to 16 bytes + a.mov(rax, 0x1111111111111111u64).unwrap(); + a.mov(rbx, 0x2222222222222222u64).unwrap(); + a.mov(rcx, 0x3333333333333333u64).unwrap(); + a.mov(rdx, 0x4444444444444444u64).unwrap(); + a.mov(rsi, 0x5555555555555555u64).unwrap(); + a.mov(rdi, 0x6666666666666666u64).unwrap(); + a.mov(rbp, 0x7777777777777777u64).unwrap(); + a.mov(r8, 0x8888888888888888u64).unwrap(); + a.mov(r9, 0x9999999999999999u64).unwrap(); + a.mov(r10, 0xAAAAAAAAAAAAAAAAu64).unwrap(); + a.mov(r11, 0xBBBBBBBBBBBBBBBBu64).unwrap(); + a.mov(r12, 0xCCCCCCCCCCCCCCCCu64).unwrap(); + a.mov(r13, 0xDDDDDDDDDDDDDDDDu64).unwrap(); + a.mov(r14, 0xEEEEEEEEEEEEEEEEu64).unwrap(); + a.mov(r15, 0xFFFFFFFFFFFFFFFFu64).unwrap(); + a.hlt().unwrap(); + let code = a.assemble(0).unwrap(); + + let mut hyperlight_vm = hyperlight_vm(&code); + + // After run, check registers match expected dirty state + let regs = hyperlight_vm.vm.regs().unwrap(); + let mut expected_dirty = CommonRegisters { + rax: 0x1111111111111111, + rbx: 0x2222222222222222, + rcx: 0x3333333333333333, + rdx: 0x4444444444444444, + rsi: 0x5555555555555555, + rdi: 0x6666666666666666, + rsp: 0, + rbp: 0x7777777777777777, + r8: 0x8888888888888888, + r9: 0x9999999999999999, + r10: 0xAAAAAAAAAAAAAAAA, + r11: 0xBBBBBBBBBBBBBBBB, + r12: 0xCCCCCCCCCCCCCCCC, + r13: 0xDDDDDDDDDDDDDDDD, + r14: 0xEEEEEEEEEEEEEEEE, + r15: 0xFFFFFFFFFFFFFFFF, + rip: 0, + rflags: 0, + }; + // rip, rsp, and rflags are set by the CPU, we don't expect those to match our expected values + expected_dirty.rip = regs.rip; + expected_dirty.rsp = regs.rsp; + expected_dirty.rflags = regs.rflags; + assert_eq!(regs, expected_dirty); + + // Reset vcpu + hyperlight_vm.reset_vcpu(0, &default_sregs()).unwrap(); + + // Check registers are reset to defaults + assert_regs_reset(hyperlight_vm.vm.as_ref()); + } + + #[test] + fn reset_vcpu_fpu() { + #[cfg(kvm)] + use crate::hypervisor::regs::MXCSR_DEFAULT; + + #[cfg(kvm)] + let available_hv = *get_available_hypervisor().as_ref().unwrap(); + + // Build code to dirty XMM registers, x87 FPU, and MXCSR + let mut a = CodeAssembler::new(64).unwrap(); + a.push(rax).unwrap(); // Align stack to 16 bytes + + // xmm0-xmm7: use movd + pshufd to fill with pattern + let xmm_regs_low = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]; + let patterns_low: [u32; 8] = [ + 0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777, + 0x88888888, + ]; + for (xmm, pat) in xmm_regs_low.iter().zip(patterns_low.iter()) { + a.mov(eax, *pat).unwrap(); + a.movd(*xmm, eax).unwrap(); + a.pshufd(*xmm, *xmm, 0).unwrap(); + } + + // xmm8-xmm15: upper XMM registers + let xmm_regs_high = [xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15]; + let patterns_high: [u32; 8] = [ + 0x99999999, 0xAAAAAAAA, 0xBBBBBBBB, 0xCCCCCCCC, 0xDDDDDDDD, 0xEEEEEEEE, 0xFFFFFFFF, + 0x12345678, + ]; + for (xmm, pat) in xmm_regs_high.iter().zip(patterns_high.iter()) { + a.mov(eax, *pat).unwrap(); + a.movd(*xmm, eax).unwrap(); + a.pshufd(*xmm, *xmm, 0).unwrap(); + } + + // Use 7 FLDs so TOP=1 after execution, different from default TOP=0. + // This ensures reset properly clears TOP, not just register contents. + a.fldz().unwrap(); // 0.0 + a.fldl2e().unwrap(); // log2(e) + a.fldl2t().unwrap(); // log2(10) + a.fldlg2().unwrap(); // log10(2) + a.fldln2().unwrap(); // ln(2) + a.fldpi().unwrap(); // pi + // Push a memory value to also dirty last_dp + a.mov(rax, 0xDEADBEEFu64).unwrap(); + a.push(rax).unwrap(); + a.fld(qword_ptr(rsp)).unwrap(); // dirties last_dp + a.pop(rax).unwrap(); + + // Dirty FCW (0x0F7F, different from default 0x037F) + a.mov(eax, 0x0F7Fu32).unwrap(); + a.push(rax).unwrap(); + a.fldcw(word_ptr(rsp)).unwrap(); + a.pop(rax).unwrap(); + + // Dirty MXCSR (0x3F80, different from default 0x1F80) + a.mov(eax, 0x3F80u32).unwrap(); + a.push(rax).unwrap(); + a.ldmxcsr(dword_ptr(rsp)).unwrap(); + a.pop(rax).unwrap(); + + a.hlt().unwrap(); + let code = a.assemble(0).unwrap(); + + let mut hyperlight_vm = hyperlight_vm(&code); + + // After run, check FPU state matches expected dirty values + let fpu = hyperlight_vm.vm.fpu().unwrap(); + + #[cfg_attr(not(kvm), allow(unused_mut))] + let mut expected_dirty = CommonFpu { + fcw: 0x0F7F, + ftwx: 0xFE, // 7 registers valid (bit 0 empty after 7 pushes with TOP=1) + xmm: [ + 0x11111111111111111111111111111111_u128.to_le_bytes(), + 0x22222222222222222222222222222222_u128.to_le_bytes(), + 0x33333333333333333333333333333333_u128.to_le_bytes(), + 0x44444444444444444444444444444444_u128.to_le_bytes(), + 0x55555555555555555555555555555555_u128.to_le_bytes(), + 0x66666666666666666666666666666666_u128.to_le_bytes(), + 0x77777777777777777777777777777777_u128.to_le_bytes(), + 0x88888888888888888888888888888888_u128.to_le_bytes(), + 0x99999999999999999999999999999999_u128.to_le_bytes(), + 0xAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA_u128.to_le_bytes(), + 0xBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB_u128.to_le_bytes(), + 0xCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC_u128.to_le_bytes(), + 0xDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD_u128.to_le_bytes(), + 0xEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE_u128.to_le_bytes(), + 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF_u128.to_le_bytes(), + 0x12345678123456781234567812345678_u128.to_le_bytes(), + ], + mxcsr: 0x3F80, + fsw: 0x0802, // TOP=1 after 7 pushes (bits 11-13), DE flag from denormal load + // fpr: 80-bit values with 6 bytes padding; may vary between CPU vendors + fpr: fpu.fpr, + // last_opcode: FPU Opcode update varies by CPU (may only update on unmasked exceptions) + last_opcode: fpu.last_opcode, + // last_ip: code is loaded at runtime-determined address + last_ip: fpu.last_ip, + // last_dp: points to stack (rsp) which is runtime-determined + last_dp: fpu.last_dp, + }; + // KVM doesn't preserve mxcsr via fpu(), copy from actual + normalize_fpu_mxcsr_for_kvm(&mut expected_dirty, fpu.mxcsr); + assert_eq!(fpu, expected_dirty); + + // KVM's get_fpu/set_fpu ioctls don't include MXCSR (it's in the SSE state, + // not x87 FPU state). We must use xsave to verify MXCSR on KVM. + #[cfg(kvm)] + if available_hv == HypervisorType::Kvm { + let xsave = hyperlight_vm.vm.xsave().unwrap(); + let mxcsr = u32::from_le_bytes(xsave[24..28].try_into().unwrap()); + assert_eq!(mxcsr, 0x3F80, "MXCSR in XSAVE should be dirty"); + } + + // Reset vcpu + hyperlight_vm.reset_vcpu(0, &default_sregs()).unwrap(); + + // Check FPU is reset to defaults + assert_fpu_reset(hyperlight_vm.vm.as_ref()); + + // Verify MXCSR via xsave on KVM (fpu() doesn't include it) + #[cfg(kvm)] + if available_hv == HypervisorType::Kvm { + let xsave = hyperlight_vm.vm.xsave().unwrap(); + let mxcsr = u32::from_le_bytes(xsave[24..28].try_into().unwrap()); + assert_eq!(mxcsr, MXCSR_DEFAULT, "MXCSR in XSAVE should be reset"); + } + } + + #[test] + fn reset_vcpu_debug_regs() { + let mut a = CodeAssembler::new(64).unwrap(); + a.push(rax).unwrap(); // Align stack to 16 bytes + a.mov(rax, 0xDEAD_BEEF_0000_0000u64).unwrap(); + a.mov(dr0, rax).unwrap(); + a.mov(rax, 0xDEAD_BEEF_0000_0001u64).unwrap(); + a.mov(dr1, rax).unwrap(); + a.mov(rax, 0xDEAD_BEEF_0000_0002u64).unwrap(); + a.mov(dr2, rax).unwrap(); + a.mov(rax, 0xDEAD_BEEF_0000_0003u64).unwrap(); + a.mov(dr3, rax).unwrap(); + a.mov(rax, 1u64).unwrap(); + a.mov(dr6, rax).unwrap(); + a.mov(rax, 0xFFu64).unwrap(); + a.mov(dr7, rax).unwrap(); + a.hlt().unwrap(); + let code = a.assemble(0).unwrap(); + + let mut hyperlight_vm = hyperlight_vm(&code); + + // Verify debug registers are dirty + let debug_regs = hyperlight_vm.vm.debug_regs().unwrap(); + let expected_dirty = CommonDebugRegs { + dr0: 0xDEAD_BEEF_0000_0000, + dr1: 0xDEAD_BEEF_0000_0001, + dr2: 0xDEAD_BEEF_0000_0002, + dr3: 0xDEAD_BEEF_0000_0003, + // dr6: guest set B0 (bit 0) = 1, reserved bits vary by CPU + dr6: (debug_regs.dr6 & !DR6_WRITABLE_MASK) | 0x1, + // dr7: guest set lower byte = 0xFF, reserved bits vary by CPU + dr7: (debug_regs.dr7 & !DR7_WRITABLE_MASK) | 0xFF, + }; + assert_eq!(debug_regs, expected_dirty); + + // Reset vcpu + hyperlight_vm.reset_vcpu(0, &default_sregs()).unwrap(); + + // Check debug registers are reset to default values + assert_debug_regs_reset(hyperlight_vm.vm.as_ref()); + } + + #[test] + fn reset_vcpu_sregs() { + // Build code that modifies special registers and halts + // We can modify CR0.WP, CR2, CR4.TSD, and CR8 from guest code in ring 0 + let mut a = CodeAssembler::new(64).unwrap(); + a.push(rax).unwrap(); // Align stack to 16 bytes + // Set CR0.WP (Write Protect, bit 16) + a.mov(rax, cr0).unwrap(); + a.or(rax, 0x10000i32).unwrap(); + a.mov(cr0, rax).unwrap(); + // Set CR2 + a.mov(rax, 0xDEADBEEFu64).unwrap(); + a.mov(cr2, rax).unwrap(); + // Set CR4.TSD (Time Stamp Disable, bit 2) + a.mov(rax, cr4).unwrap(); + a.or(rax, 0x4i32).unwrap(); + a.mov(cr4, rax).unwrap(); + // Set CR8 + a.mov(rax, 5u64).unwrap(); + a.mov(cr8, rax).unwrap(); + a.hlt().unwrap(); + let code = a.assemble(0).unwrap(); + + let mut hyperlight_vm = hyperlight_vm(&code); + + // Get the initial CR3 value and expected defaults + let initial_cr3 = hyperlight_vm.vm.sregs().unwrap().cr3; + let defaults = CommonSpecialRegisters::standard_64bit_defaults(initial_cr3); + + // Verify registers are dirty (CR0.WP, CR2, CR4.TSD and CR8 modified by our code) + let sregs = hyperlight_vm.vm.sregs().unwrap(); + let mut expected_dirty = CommonSpecialRegisters { + cr0: defaults.cr0 | 0x10000, // WP bit set + cr2: 0xDEADBEEF, + cr4: defaults.cr4 | 0x4, // TSD bit set + cr8: 0x5, + ..defaults + }; + normalize_sregs_for_run_tests(&mut expected_dirty, &sregs); + assert_eq!(sregs, expected_dirty); + + // Reset vcpu + hyperlight_vm.reset_vcpu(0, &default_sregs()).unwrap(); + + // Check registers are reset to defaults (CR3 is 0 as passed to reset_vcpu) + let sregs = hyperlight_vm.vm.sregs().unwrap(); + let mut expected_reset = CommonSpecialRegisters::standard_64bit_defaults(0); + normalize_sregs_for_run_tests(&mut expected_reset, &sregs); + assert_eq!(sregs, expected_reset); + } + + /// Verifies guest-visible FPU state (via FXSAVE) is properly reset. + /// Unlike tests using hypervisor API, this runs actual guest code with FXSAVE. + #[test] + fn reset_vcpu_fpu_guest_visible_state() { + let mut ctx = hyperlight_vm_with_mem_mgr_fxsave(); + + // Verify FPU was dirtied after first run + let fpu_before_reset = ctx.ctx.vm.vm.fpu().unwrap(); + assert_eq!( + fpu_before_reset.fcw, 0x0F7F, + "FCW should be dirty after first run" + ); + assert_ne!( + fpu_before_reset.ftwx, 0, + "FTW should indicate valid registers after first run" + ); + + let fxsave_before = ctx.read_fxsave(); + let fcw_before = u16::from_le_bytes(fxsave_before[0..2].try_into().unwrap()); + assert_eq!(fcw_before, 0x0F7F, "Guest FXSAVE FCW should be dirty"); + let mxcsr_before = u32::from_le_bytes(fxsave_before[24..28].try_into().unwrap()); + assert_eq!(mxcsr_before, 0x3F80, "Guest FXSAVE MXCSR should be dirty"); + let xmm0_before = u32::from_le_bytes(fxsave_before[160..164].try_into().unwrap()); + assert_eq!(xmm0_before, 0x11111111, "Guest FXSAVE XMM0 should be dirty"); + + let root_pt_addr = ctx.ctx.vm.get_root_pt().unwrap(); + let segment_state = ctx.ctx.vm.get_snapshot_sregs().unwrap(); + + ctx.ctx.vm.reset_vcpu(root_pt_addr, &segment_state).unwrap(); + + // Re-run from entrypoint (flag=1 means guest skips dirty phase, just does FXSAVE) + // Use stack_top - 8 to match initialise()'s behavior (simulates call pushing return addr) + let NextAction::Call(rip) = ctx.ctx.vm.entrypoint else { + panic!("entrypoint should be call"); + }; + let regs = CommonRegisters { + rip, + rsp: ctx.stack_top_gva() - 8, + rflags: 1 << 1, + ..Default::default() + }; + ctx.ctx.vm.vm.set_regs(®s).unwrap(); + ctx.run(); + + // Verify guest-visible state is reset + let fxsave_after = ctx.read_fxsave(); + let fcw_after = u16::from_le_bytes(fxsave_after[0..2].try_into().unwrap()); + assert_eq!( + fcw_after, 0x037F, + "Guest FXSAVE FCW should be reset to default 0x037F, got 0x{:04X}", + fcw_after + ); + + let fsw_after = u16::from_le_bytes(fxsave_after[2..4].try_into().unwrap()); + assert_eq!(fsw_after, 0, "FSW should be reset"); + + let ftw_after = fxsave_after[4]; + assert_eq!(ftw_after, 0, "FTW should be 0 (all empty)"); + + let fop_after = u16::from_le_bytes(fxsave_after[6..8].try_into().unwrap()); + assert_eq!(fop_after, 0, "FOP should be 0"); + + let fip_after = u64::from_le_bytes(fxsave_after[8..16].try_into().unwrap()); + assert_eq!(fip_after, 0, "FIP should be 0"); + + let fdp_after = u64::from_le_bytes(fxsave_after[16..24].try_into().unwrap()); + assert_eq!(fdp_after, 0, "FDP should be 0"); + + let mxcsr_after = u32::from_le_bytes(fxsave_after[24..28].try_into().unwrap()); + assert_eq!( + mxcsr_after, MXCSR_DEFAULT, + "Guest FXSAVE MXCSR should be reset to 0x{:08X}, got 0x{:08X}", + MXCSR_DEFAULT, mxcsr_after + ); + + // ST0-ST7 should be zeroed + for i in 0..8 { + let offset = 32 + i * 16; + let st_bytes = &fxsave_after[offset..offset + 10]; + assert!(st_bytes.iter().all(|&b| b == 0), "ST{} should be zeroed", i); + } + + // XMM0-XMM15 should be zeroed + for i in 0..16 { + let offset = 160 + i * 16; + let xmm_bytes = &fxsave_after[offset..offset + 16]; + assert!( + xmm_bytes.iter().all(|&b| b == 0), + "XMM{} should be zeroed", + i + ); + } + } + + /// Extended test context for FXSAVE tests that need to read memory at a specific offset. + struct FxsaveTestContext { + ctx: TestVmContext, + /// Offset in shared memory where FXSAVE data is stored (output_data region) + fxsave_offset: usize, + } + + impl FxsaveTestContext { + fn run(&mut self) { + self.ctx + .vm + .run( + &mut self.ctx.hshm, + &self.ctx.host_funcs, + #[cfg(gdb)] + self.ctx.dbg_mem_access_hdl.clone(), + ) + .unwrap(); + } + + fn read_fxsave(&self) -> [u8; 512] { + let mut fxsave = [0u8; 512]; + self.ctx + .hshm + .scratch_mem + .copy_to_slice(&mut fxsave, self.fxsave_offset) + .unwrap(); + fxsave + } + + /// Get the stack top GVA, same as the regular codepath. + fn stack_top_gva(&self) -> u64 { + hyperlight_common::layout::MAX_GVA as u64 + - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET + + 1 + } + } + + /// Creates VM with guest code that: dirtys FPU (if flag==0), does FXSAVE to buffer, sets flag=1. + /// Uses output_data region for FXSAVE buffer (like regular guest output), scratch for stack. + fn hyperlight_vm_with_mem_mgr_fxsave() -> FxsaveTestContext { + use iced_x86::code_asm::*; + + // Compute fixed addresses for FXSAVE buffer and flag. + // These are in the output_data region which starts at a known offset. + // We use a default SandboxConfiguration to get the same layout as create_test_vm_context. + let config: SandboxConfiguration = Default::default(); + let layout = SandboxMemoryLayout::new(config, 512, 4096, None).unwrap(); + let fxsave_offset = layout.get_output_data_buffer_scratch_host_offset(); + let fxsave_gva = layout.get_output_data_buffer_gva(); + let flag_gva = fxsave_gva + 512; + + let mut a = CodeAssembler::new(64).unwrap(); + a.push(rax).unwrap(); // Align stack to 16 bytes + + // Check flag at fixed address: if flag != 0, skip dirty phase + a.mov(rax, flag_gva).unwrap(); + a.mov(al, byte_ptr(rax)).unwrap(); + a.test(al, al).unwrap(); + let mut skip_dirty = a.create_label(); + a.jnz(skip_dirty).unwrap(); + + // Dirty x87 FPU (7 pushes so TOP=1) + a.fldz().unwrap(); + a.fldl2e().unwrap(); + a.fldl2t().unwrap(); + a.fldlg2().unwrap(); + a.fldln2().unwrap(); + a.fldpi().unwrap(); + a.fld1().unwrap(); + + // Dirty FCW (0x0F7F vs default 0x037F) + a.sub(rsp, 16i32).unwrap(); + a.mov(dword_ptr(rsp), 0x0F7Fu32).unwrap(); + a.fldcw(word_ptr(rsp)).unwrap(); + a.add(rsp, 16i32).unwrap(); + + // Dirty MXCSR (0x3F80 vs default 0x1F80) + a.sub(rsp, 16i32).unwrap(); + a.mov(dword_ptr(rsp), 0x3F80u32).unwrap(); + a.ldmxcsr(dword_ptr(rsp)).unwrap(); + a.add(rsp, 16i32).unwrap(); + + // Dirty XMM0-7 + let xmm_regs = [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7]; + for (i, xmm) in xmm_regs.iter().enumerate() { + a.mov(eax, 0x11111111u32 * (i as u32 + 1)).unwrap(); + a.movd(*xmm, eax).unwrap(); + a.pshufd(*xmm, *xmm, 0).unwrap(); + } + + // Set flag = 1 at fixed address + a.mov(rax, flag_gva).unwrap(); + a.mov(byte_ptr(rax), 1u32).unwrap(); + + // FXSAVE to buffer at fixed address (runs on both executions) + a.set_label(&mut skip_dirty).unwrap(); + a.mov(rax, fxsave_gva).unwrap(); + a.fxsave(ptr(rax)).unwrap(); + + // Return dispatch ptr + a.mov(rax, layout.get_guest_code_address() as u64).unwrap(); + + a.hlt().unwrap(); + + let code = a.assemble(0).unwrap(); + + // Reuse common test setup - initialise() will run the code + let ctx = create_test_vm_context(&code); + + FxsaveTestContext { ctx, fxsave_offset } + } + } + + /// ======================================================================== + /// Misc tests + /// ======================================================================== + #[test] + fn test_get_max_log_level_filter_both_guest_and_host() { + let rust_log = "hyperlight_guest=trace,hyperlight_host=debug".to_string(); + let filter = get_max_log_level_filter(rust_log); + + assert_eq!(filter, LevelFilter::TRACE, "Max log level should be Trace"); + } + #[test] + fn test_get_max_log_level_filter_only_guest() { + let rust_log = "hyperlight_guest=info".to_string(); + let filter = get_max_log_level_filter(rust_log); + + assert_eq!(filter, LevelFilter::INFO, "Max log level should be Info"); + } + #[test] + fn test_get_max_log_level_filter_only_host() { + let rust_log = "hyperlight_host=debug".to_string(); + let filter = get_max_log_level_filter(rust_log); + + assert_eq!(filter, LevelFilter::DEBUG, "Max log level should be Debug"); + } + #[test] + fn test_get_max_log_level_filter_only_general() { + let rust_log = "trace".to_string(); + let filter = get_max_log_level_filter(rust_log); + + assert_eq!(filter, LevelFilter::TRACE, "Max log level should be Trace"); + } + #[test] + fn test_get_max_log_level_filter_complex_rust_log_00() { + let rust_log = + "error,hyperlight_guest=debug,hyperlight_host=info,hyperlight_guest_bin=trace" + .to_string(); + let filter = get_max_log_level_filter(rust_log); + + assert_eq!(filter, LevelFilter::DEBUG, "Max log level should be Debug"); + } + #[test] + fn test_get_max_log_level_filter_complex_rust_log_01() { + let rust_log = + "error,hyperlight_host=info,hyperlight_guest=debug,hyperlight_guest_bin=trace" + .to_string(); + let filter = get_max_log_level_filter(rust_log); + + assert_eq!(filter, LevelFilter::DEBUG, "Max log level should be Debug"); + } + #[test] + fn test_get_max_log_level_filter_complex_rust_log_02() { + let rust_log = + "hyperlight_host=info,error,hyperlight_guest=debug,hyperlight_guest_bin=trace" + .to_string(); + let filter = get_max_log_level_filter(rust_log); + + assert_eq!(filter, LevelFilter::DEBUG, "Max log level should be Debug"); + } + #[test] + fn test_get_max_log_level_filter_general_and_others() { + let rust_log = + "trace,hyperlight_component_macro=debug,hyperlight_component_util=error".to_string(); + let filter = get_max_log_level_filter(rust_log); + + assert_eq!(filter, LevelFilter::TRACE, "Max log level should be Trace"); + } + #[test] + fn test_get_max_log_level_filter_default() { + let rust_log = "hyperlight_common=debug,hyperlight_component_util=info".to_string(); + let filter = get_max_log_level_filter(rust_log); + + assert_eq!( + filter, + LevelFilter::ERROR, + "Max log level should default to Error" + ); + } + #[test] + fn test_get_max_log_level_filter_invalid_rust_log() { + let rust_log = "this is an invalid rust log string".to_string(); + let filter = get_max_log_level_filter(rust_log); + + assert_eq!( + filter, + LevelFilter::ERROR, + "Max log level should default to Error" + ); + } + #[test] + fn test_get_max_log_level_filter_empty_rust_log() { + let rust_log = "".to_string(); + let filter = get_max_log_level_filter(rust_log); + + assert_eq!( + filter, + LevelFilter::ERROR, + "Max log level should default to Error" + ); + } +} From 14635dcf765ffee608b46a3c1720c2f57993d4f7 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Mon, 9 Mar 2026 13:42:55 -0700 Subject: [PATCH 5/6] feat: enable hyperlight-host cross-compilation for aarch64 Add conditional compilation gates and aarch64 stub modules so that cargo build --target aarch64-unknown-linux-gnu -p hyperlight-host succeeds. No aarch64 implementation is added. All aarch64 code paths are stubs that panic at runtime. Changes: - Add aarch64 register stub types (CommonRegisters, etc.) - Add aarch64 KVM and MSHV backend stubs - Add aarch64 HyperlightVm method stubs (new, initialise, etc.) - Add aarch64 layout and vmem stubs in hyperlight_common - Gate VmExit::Debug fields with target_arch - Add hv_arm64_memory_intercept_message support - Update Cargo.toml dependencies for aarch64 - Update build.rs cfg aliases for aarch64 Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/arch/aarch64/layout.rs | 25 +++++ .../src/arch/aarch64/vmem.rs | 52 ++++++++++ src/hyperlight_common/src/layout.rs | 6 +- src/hyperlight_common/src/vmem.rs | 1 + src/hyperlight_host/Cargo.toml | 4 +- src/hyperlight_host/build.rs | 4 +- .../src/hypervisor/hyperlight_vm/aarch64.rs | 99 +++++++++++++++++++ .../src/hypervisor/hyperlight_vm/mod.rs | 2 + src/hyperlight_host/src/hypervisor/regs.rs | 5 + .../src/hypervisor/regs/aarch64/mod.rs | 37 +++++++ .../hypervisor/virtual_machine/kvm/aarch64.rs | 40 ++++++++ .../src/hypervisor/virtual_machine/kvm/mod.rs | 5 + .../src/hypervisor/virtual_machine/mod.rs | 11 ++- .../virtual_machine/mshv/aarch64.rs | 38 +++++++ .../hypervisor/virtual_machine/mshv/mod.rs | 5 + src/hyperlight_host/src/mem/memory_region.rs | 15 ++- 16 files changed, 339 insertions(+), 10 deletions(-) create mode 100644 src/hyperlight_common/src/arch/aarch64/layout.rs create mode 100644 src/hyperlight_common/src/arch/aarch64/vmem.rs create mode 100644 src/hyperlight_host/src/hypervisor/hyperlight_vm/aarch64.rs create mode 100644 src/hyperlight_host/src/hypervisor/regs/aarch64/mod.rs create mode 100644 src/hyperlight_host/src/hypervisor/virtual_machine/kvm/aarch64.rs create mode 100644 src/hyperlight_host/src/hypervisor/virtual_machine/mshv/aarch64.rs diff --git a/src/hyperlight_common/src/arch/aarch64/layout.rs b/src/hyperlight_common/src/arch/aarch64/layout.rs new file mode 100644 index 000000000..20f17026c --- /dev/null +++ b/src/hyperlight_common/src/arch/aarch64/layout.rs @@ -0,0 +1,25 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +// TODO(aarch64): change these, they are only provided in order to compile +pub const MAX_GVA: usize = 0xffff_ffff_ffff_efff; +pub const SNAPSHOT_PT_GVA_MIN: usize = 0xffff_8000_0000_0000; +pub const SNAPSHOT_PT_GVA_MAX: usize = 0xffff_80ff_ffff_ffff; +pub const MAX_GPA: usize = 0x0000_000f_ffff_ffff; + +pub fn min_scratch_size(_input_data_size: usize, _output_data_size: usize) -> usize { + unimplemented!("min_scratch_size") +} diff --git a/src/hyperlight_common/src/arch/aarch64/vmem.rs b/src/hyperlight_common/src/arch/aarch64/vmem.rs new file mode 100644 index 000000000..3803251d2 --- /dev/null +++ b/src/hyperlight_common/src/arch/aarch64/vmem.rs @@ -0,0 +1,52 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +// TODO(aarch64): implement real page table operations + +use crate::vmem::{Mapping, TableOps, TableReadOps, Void}; + +pub const PAGE_SIZE: usize = 4096; +pub const PAGE_TABLE_SIZE: usize = 4096; +pub type PageTableEntry = u64; +pub type VirtAddr = u64; +pub type PhysAddr = u64; + +/// # Safety +/// See `TableOps` documentation. +#[allow(clippy::missing_safety_doc)] +pub unsafe fn map(_op: &Op, _mapping: Mapping) { + unimplemented!("map") +} + +/// # Safety +/// See `TableReadOps` documentation. +#[allow(clippy::missing_safety_doc)] +pub unsafe fn virt_to_phys<'a, Op: TableReadOps + 'a>( + _op: impl core::convert::AsRef + Copy + 'a, + _address: u64, + _len: u64, +) -> impl Iterator + 'a { + unimplemented!("virt_to_phys"); + #[allow(unreachable_code)] + core::iter::empty() +} + +pub trait TableMovability {} +impl> TableMovability + for crate::vmem::MayMoveTable +{ +} +impl TableMovability for crate::vmem::MayNotMoveTable {} diff --git a/src/hyperlight_common/src/layout.rs b/src/hyperlight_common/src/layout.rs index 1d40f8fc0..b53897bb1 100644 --- a/src/hyperlight_common/src/layout.rs +++ b/src/hyperlight_common/src/layout.rs @@ -23,10 +23,14 @@ limitations under the License. all(target_arch = "x86_64", feature = "nanvix-unstable"), path = "arch/i686/layout.rs" )] +#[cfg_attr(target_arch = "aarch64", path = "arch/aarch64/layout.rs")] mod arch; pub use arch::{MAX_GPA, MAX_GVA}; -#[cfg(all(target_arch = "x86_64", not(feature = "nanvix-unstable")))] +#[cfg(any( + all(target_arch = "x86_64", not(feature = "nanvix-unstable")), + target_arch = "aarch64" +))] pub use arch::{SNAPSHOT_PT_GVA_MAX, SNAPSHOT_PT_GVA_MIN}; // offsets down from the top of scratch memory for various things diff --git a/src/hyperlight_common/src/vmem.rs b/src/hyperlight_common/src/vmem.rs index be67658a3..c72a6b9af 100644 --- a/src/hyperlight_common/src/vmem.rs +++ b/src/hyperlight_common/src/vmem.rs @@ -16,6 +16,7 @@ limitations under the License. #[cfg_attr(target_arch = "x86_64", path = "arch/amd64/vmem.rs")] #[cfg_attr(target_arch = "x86", path = "arch/i686/vmem.rs")] +#[cfg_attr(target_arch = "aarch64", path = "arch/aarch64/vmem.rs")] mod arch; /// This is always the page size that the /guest/ is being compiled diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 031961ed6..28fcba460 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -49,7 +49,7 @@ chrono = { version = "0.4", optional = true } anyhow = "1.0" metrics = "0.24.3" serde_json = "1.0" -elfcore = "2.0" +elfcore = { version = "2.0", optional = true } uuid = { version = "1.22.0", features = ["v4"] } [target.'cfg(windows)'.dependencies] @@ -128,7 +128,7 @@ executable_heap = [] # This feature enables printing of debug information to stdout in debug builds print_debug = [] # Dumps the VM state to a file on unexpected errors or crashes. The path of the file will be printed on stdout and logged. -crashdump = ["dep:chrono"] +crashdump = ["dep:chrono", "dep:elfcore"] trace_guest = ["dep:opentelemetry", "dep:tracing-opentelemetry", "dep:hyperlight-guest-tracing", "hyperlight-common/trace_guest"] mem_profile = [ "trace_guest", "dep:framehop", "dep:fallible-iterator", "hyperlight-common/mem_profile" ] kvm = ["dep:kvm-bindings", "dep:kvm-ioctls"] diff --git a/src/hyperlight_host/build.rs b/src/hyperlight_host/build.rs index 953bfda29..d599bedc1 100644 --- a/src/hyperlight_host/build.rs +++ b/src/hyperlight_host/build.rs @@ -99,10 +99,10 @@ fn main() -> Result<()> { // Essentially the kvm and mshv3 features are ignored on windows as long as you use #[cfg(kvm)] and not #[cfg(feature = "kvm")]. // You should never use #[cfg(feature = "kvm")] or #[cfg(feature = "mshv3")] in the codebase. cfg_aliases::cfg_aliases! { - gdb: { all(feature = "gdb", debug_assertions) }, + gdb: { all(feature = "gdb", debug_assertions, target_arch = "x86_64") }, kvm: { all(feature = "kvm", target_os = "linux") }, mshv3: { all(feature = "mshv3", target_os = "linux") }, - crashdump: { all(feature = "crashdump") }, + crashdump: { all(feature = "crashdump", target_arch = "x86_64") }, // print_debug feature is aliased with debug_assertions to make it only available in debug-builds. print_debug: { all(feature = "print_debug", debug_assertions) }, } diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/aarch64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/aarch64.rs new file mode 100644 index 000000000..c4288ae60 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/aarch64.rs @@ -0,0 +1,99 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// TODO(aarch64): implement arch-specific HyperlightVm methods + +use std::sync::Arc; + +use super::{ + AccessPageTableError, CreateHyperlightVmError, DispatchGuestCallError, HyperlightVm, + InitializeError, +}; +#[cfg(gdb)] +use crate::hypervisor::gdb::{DebugCommChannel, DebugMsg, DebugResponse}; +use crate::hypervisor::regs::CommonSpecialRegisters; +use crate::hypervisor::virtual_machine::RegisterError; +use crate::mem::mgr::SandboxMemoryManager; +use crate::mem::shared_mem::{GuestSharedMemory, HostSharedMemory}; +use crate::sandbox::SandboxConfiguration; +use crate::sandbox::host_funcs::FunctionRegistry; +use crate::sandbox::snapshot::NextAction; +#[cfg(feature = "mem_profile")] +use crate::sandbox::trace::MemTraceInfo; +#[cfg(crashdump)] +use crate::sandbox::uninitialized::SandboxRuntimeConfig; + +impl HyperlightVm { + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + _snapshot_mem: GuestSharedMemory, + _scratch_mem: GuestSharedMemory, + _pml4_addr: u64, + _entrypoint: NextAction, + _rsp_gva: u64, + _config: &SandboxConfiguration, + #[cfg(gdb)] _gdb_conn: Option>, + #[cfg(crashdump)] _rt_cfg: SandboxRuntimeConfig, + #[cfg(feature = "mem_profile")] _trace_info: MemTraceInfo, + ) -> std::result::Result { + unimplemented!("new") + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn initialise( + &mut self, + _peb_addr: crate::mem::ptr::RawPtr, + _seed: u64, + _page_size: u32, + _mem_mgr: &mut SandboxMemoryManager, + _host_funcs: &Arc>, + _guest_max_log_level: Option, + #[cfg(gdb)] _dbg_mem_access_fn: Arc< + std::sync::Mutex>, + >, + ) -> Result<(), InitializeError> { + unimplemented!("initialise") + } + + pub(crate) fn dispatch_call_from_host( + &mut self, + _mem_mgr: &mut SandboxMemoryManager, + _host_funcs: &Arc>, + #[cfg(gdb)] _dbg_mem_access_fn: Arc< + std::sync::Mutex>, + >, + ) -> Result<(), DispatchGuestCallError> { + unimplemented!("dispatch_call_from_host") + } + + pub(crate) fn get_root_pt(&self) -> Result { + unimplemented!("get_root_pt") + } + + pub(crate) fn get_snapshot_sregs( + &mut self, + ) -> Result { + unimplemented!("get_snapshot_sregs") + } + + pub(crate) fn reset_vcpu( + &mut self, + _cr3: u64, + _sregs: &CommonSpecialRegisters, + ) -> std::result::Result<(), RegisterError> { + unimplemented!("reset_vcpu") + } +} diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs index 0b30e2ed2..82c095feb 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs @@ -24,6 +24,8 @@ use std::collections::HashMap; use std::str::FromStr; use std::sync::{Arc, Mutex}; +#[cfg(target_arch = "aarch64")] +pub(crate) use aarch64::*; use hyperlight_common::log_level::GuestLogFilter; use tracing_core::LevelFilter; diff --git a/src/hyperlight_host/src/hypervisor/regs.rs b/src/hyperlight_host/src/hypervisor/regs.rs index ac806e658..828bdf558 100644 --- a/src/hyperlight_host/src/hypervisor/regs.rs +++ b/src/hyperlight_host/src/hypervisor/regs.rs @@ -19,9 +19,14 @@ mod x86_64; #[cfg(target_arch = "x86_64")] pub(crate) use x86_64::*; +#[cfg(target_arch = "aarch64")] +mod aarch64; #[cfg(target_os = "windows")] use std::collections::HashSet; +#[cfg(target_arch = "aarch64")] +pub(crate) use aarch64::*; + #[cfg(target_os = "windows")] #[derive(Debug, PartialEq)] pub(crate) enum FromWhpRegisterError { diff --git a/src/hyperlight_host/src/hypervisor/regs/aarch64/mod.rs b/src/hyperlight_host/src/hypervisor/regs/aarch64/mod.rs new file mode 100644 index 000000000..8f91c634d --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/regs/aarch64/mod.rs @@ -0,0 +1,37 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// TODO(aarch64): implement real register definitions + +#[derive(Debug, Default, Copy, Clone, PartialEq)] +pub(crate) struct CommonRegisters { + _placeholder: u64, +} + +#[derive(Debug, Default, Copy, Clone, PartialEq)] +pub(crate) struct CommonSpecialRegisters { + _placeholder: u64, +} + +#[derive(Debug, Default, Copy, Clone, PartialEq)] +pub(crate) struct CommonFpu { + _placeholder: u64, +} + +#[derive(Debug, Default, Copy, Clone, PartialEq)] +pub(crate) struct CommonDebugRegs { + _placeholder: u64, +} diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/aarch64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/aarch64.rs new file mode 100644 index 000000000..39ecb775d --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/aarch64.rs @@ -0,0 +1,40 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// TODO(aarch64): implement KVM backend + +use tracing::{Span, instrument}; + +use crate::hypervisor::virtual_machine::CreateVmError; + +/// Return `true` if the KVM API is available +#[instrument(skip_all, parent = Span::current(), level = "Trace")] +pub(crate) fn is_hypervisor_present() -> bool { + // TODO(aarch64): implement KVM detection + false +} + +/// A KVM implementation of a single-vcpu VM +#[derive(Debug)] +pub(crate) struct KvmVm { + _placeholder: (), +} + +impl KvmVm { + pub(crate) fn new() -> std::result::Result { + unimplemented!("KvmVm::new") + } +} diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/mod.rs index 3c886e176..b2adf372c 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/mod.rs @@ -18,3 +18,8 @@ limitations under the License. mod x86_64; #[cfg(target_arch = "x86_64")] pub(crate) use x86_64::*; + +#[cfg(target_arch = "aarch64")] +mod aarch64; +#[cfg(target_arch = "aarch64")] +pub(crate) use aarch64::*; diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs index d739ad0d4..b3bed769e 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs @@ -111,8 +111,8 @@ pub(crate) const XSAVE_MIN_SIZE: usize = 576; #[cfg(all(any(kvm, mshv3), test, not(feature = "nanvix-unstable")))] pub(crate) const XSAVE_BUFFER_SIZE: usize = 4096; -// Compiler error if no hypervisor type is available -#[cfg(not(any(kvm, mshv3, target_os = "windows")))] +// Compiler error if no hypervisor type is available (not applicable on aarch64 yet) +#[cfg(not(any(kvm, mshv3, target_os = "windows", target_arch = "aarch64")))] compile_error!( "No hypervisor type is available for the current platform. Please enable either the `kvm` or `mshv3` cargo feature." ); @@ -121,7 +121,12 @@ compile_error!( pub(crate) enum VmExit { /// The vCPU has exited due to a debug event (usually breakpoint) #[cfg(gdb)] - Debug { dr6: u64, exception: u32 }, + Debug { + #[cfg(target_arch = "x86_64")] + dr6: u64, + #[cfg(target_arch = "x86_64")] + exception: u32, + }, /// The vCPU has halted Halt(), /// The vCPU has issued a write to the given port with the given value diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/aarch64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/aarch64.rs new file mode 100644 index 000000000..79d816b15 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/aarch64.rs @@ -0,0 +1,38 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use tracing::{Span, instrument}; + +use crate::hypervisor::virtual_machine::CreateVmError; + +/// Return `true` if the MSHV API is available +#[instrument(skip_all, parent = Span::current(), level = "Trace")] +pub(crate) fn is_hypervisor_present() -> bool { + // TODO(aarch64): implement MSHV detection + false +} + +/// An MSHV implementation of a single-vcpu VM +#[derive(Debug)] +pub(crate) struct MshvVm { + _placeholder: (), +} + +impl MshvVm { + pub(crate) fn new() -> std::result::Result { + unimplemented!("MshvVm::new") + } +} diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/mod.rs index 3c886e176..b2adf372c 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/mod.rs @@ -18,3 +18,8 @@ limitations under the License. mod x86_64; #[cfg(target_arch = "x86_64")] pub(crate) use x86_64::*; + +#[cfg(target_arch = "aarch64")] +mod aarch64; +#[cfg(target_arch = "aarch64")] +pub(crate) use aarch64::*; diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index 1883b0251..ba193e533 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -26,7 +26,9 @@ use kvm_bindings::{KVM_MEM_READONLY, kvm_userspace_memory_region}; use mshv_bindings::{ MSHV_SET_MEM_BIT_EXECUTABLE, MSHV_SET_MEM_BIT_UNMAP, MSHV_SET_MEM_BIT_WRITABLE, }; -#[cfg(mshv3)] +#[cfg(all(mshv3, target_arch = "aarch64"))] +use mshv_bindings::{hv_arm64_memory_intercept_message, mshv_user_mem_region}; +#[cfg(all(mshv3, target_arch = "x86_64"))] use mshv_bindings::{hv_x64_memory_intercept_message, mshv_user_mem_region}; #[cfg(target_os = "windows")] use windows::Win32::System::Hypervisor::{self, WHV_MEMORY_ACCESS_TYPE}; @@ -95,7 +97,7 @@ impl TryFrom for MemoryRegionFlags { } } -#[cfg(mshv3)] +#[cfg(all(mshv3, target_arch = "x86_64"))] impl TryFrom for MemoryRegionFlags { type Error = crate::HyperlightError; @@ -112,6 +114,15 @@ impl TryFrom for MemoryRegionFlags { } } +#[cfg(all(mshv3, target_arch = "aarch64"))] +impl TryFrom for MemoryRegionFlags { + type Error = crate::HyperlightError; + + fn try_from(_msg: hv_arm64_memory_intercept_message) -> crate::Result { + unimplemented!("try_from") + } +} + // NOTE: In the future, all host-side knowledge about memory region types // should collapse down to Snapshot vs Scratch (see shared_mem.rs). // Until then, these variants help distinguish regions for diagnostics From c70b153cd0361c44c63c0db8cea2ae2faccb546f Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Mon, 9 Mar 2026 14:05:03 -0700 Subject: [PATCH 6/6] refactor: move error types before struct in hyperlight_vm Move all error type definitions (DispatchGuestCallError, InitializeError, RunVmError, etc.) above the HyperlightVm struct definition to group related error types together. Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/hypervisor/hyperlight_vm/mod.rs | 86 +++++++++---------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs index 82c095feb..4fa9fba36 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs @@ -121,49 +121,6 @@ pub(super) fn get_guest_log_filter(guest_max_log_level: Option) -> GuestLogFilter::from(guest_log_level_filter).into() } -/// Represents a Hyperlight Virtual Machine instance. -/// -/// This struct manages the lifecycle of the VM, including: -/// - The underlying hypervisor implementation (e.g., KVM, MSHV, WHP). -/// - Memory management, including initial sandbox regions and dynamic mappings. -/// - The vCPU execution loop and handling of VM exits (I/O, MMIO, interrupts). -pub(crate) struct HyperlightVm { - #[cfg(gdb)] - pub(super) vm: Box, - #[cfg(not(gdb))] - pub(super) vm: Box, - pub(super) page_size: usize, - pub(super) entrypoint: NextAction, // only present if this vm has not yet been initialised - pub(super) rsp_gva: u64, - pub(super) interrupt_handle: Arc, - - pub(super) next_slot: u32, // Monotonically increasing slot number - pub(super) freed_slots: Vec, // Reusable slots from unmapped regions - - pub(super) snapshot_slot: u32, - // The current snapshot region, used to keep it alive as long as - // it is used & when unmapping - pub(super) snapshot_memory: Option, - pub(super) scratch_slot: u32, // The slot number used for the scratch region - // The current scratch region, used to keep it alive as long as it - // is used & when unmapping - pub(super) scratch_memory: Option, - - pub(super) mmap_regions: Vec<(u32, MemoryRegion)>, // Later mapped regions (slot number, region) - - pub(super) pending_tlb_flush: bool, - - #[cfg(gdb)] - pub(super) gdb_conn: Option>, - #[cfg(gdb)] - pub(super) sw_breakpoints: HashMap, // addr -> original instruction - #[cfg(feature = "mem_profile")] - pub(super) trace_info: MemTraceInfo, - #[cfg(crashdump)] - pub(super) rt_cfg: SandboxRuntimeConfig, -} - - /// DispatchGuestCall error #[derive(Debug, thiserror::Error)] pub enum DispatchGuestCallError { @@ -395,6 +352,49 @@ pub enum HyperlightVmError { #[error("Access page table error: {0}")] AccessPageTable(#[from] AccessPageTableError), } + +/// Represents a Hyperlight Virtual Machine instance. +/// +/// This struct manages the lifecycle of the VM, including: +/// - The underlying hypervisor implementation (e.g., KVM, MSHV, WHP). +/// - Memory management, including initial sandbox regions and dynamic mappings. +/// - The vCPU execution loop and handling of VM exits (I/O, MMIO, interrupts). +pub(crate) struct HyperlightVm { + #[cfg(gdb)] + pub(super) vm: Box, + #[cfg(not(gdb))] + pub(super) vm: Box, + pub(super) page_size: usize, + pub(super) entrypoint: NextAction, // only present if this vm has not yet been initialised + pub(super) rsp_gva: u64, + pub(super) interrupt_handle: Arc, + + pub(super) next_slot: u32, // Monotonically increasing slot number + pub(super) freed_slots: Vec, // Reusable slots from unmapped regions + + pub(super) snapshot_slot: u32, + // The current snapshot region, used to keep it alive as long as + // it is used & when unmapping + pub(super) snapshot_memory: Option, + pub(super) scratch_slot: u32, // The slot number used for the scratch region + // The current scratch region, used to keep it alive as long as it + // is used & when unmapping + pub(super) scratch_memory: Option, + + pub(super) mmap_regions: Vec<(u32, MemoryRegion)>, // Later mapped regions (slot number, region) + + pub(super) pending_tlb_flush: bool, + + #[cfg(gdb)] + pub(super) gdb_conn: Option>, + #[cfg(gdb)] + pub(super) sw_breakpoints: HashMap, // addr -> original instruction + #[cfg(feature = "mem_profile")] + pub(super) trace_info: MemTraceInfo, + #[cfg(crashdump)] + pub(super) rt_cfg: SandboxRuntimeConfig, +} + impl HyperlightVm { /// Map a region of host memory into the sandbox. ///