diff --git a/src/vmm/src/arch/aarch64/fdt.rs b/src/vmm/src/arch/aarch64/fdt.rs index 9946d3516cc..a84e457f37b 100644 --- a/src/vmm/src/arch/aarch64/fdt.rs +++ b/src/vmm/src/arch/aarch64/fdt.rs @@ -9,7 +9,7 @@ use std::ffi::CString; use std::fmt::Debug; use vm_fdt::{Error as VmFdtError, FdtWriter, FdtWriterNode}; -use vm_memory::GuestMemoryError; +use vm_memory::{GuestMemoryError, GuestMemoryRegion}; use super::cache_info::{CacheEntry, read_cache_config}; use super::gic::GICDevice; @@ -22,7 +22,7 @@ use crate::device_manager::mmio::MMIODeviceInfo; use crate::device_manager::pci_mngr::PciDevices; use crate::devices::acpi::vmgenid::{VMGENID_MEM_SIZE, VmGenId}; use crate::initrd::InitrdConfig; -use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap}; +use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestRegionType}; // This is a value for uniquely identifying the FDT node declaring the interrupt controller. const GIC_PHANDLE: u32 = 1; @@ -227,14 +227,16 @@ fn create_memory_node(fdt: &mut FdtWriter, guest_mem: &GuestMemoryMmap) -> Resul // The reason we do this is that Linux does not allow remapping system memory. However, without // remap, kernel drivers cannot get virtual addresses to read data from device memory. Leaving // this memory region out allows Linux kernel modules to remap and thus read this region. - let mem_size = guest_mem.last_addr().raw_value() - - super::layout::DRAM_MEM_START - - super::layout::SYSTEM_MEM_SIZE - + 1; - let mem_reg_prop = &[ - super::layout::DRAM_MEM_START + super::layout::SYSTEM_MEM_SIZE, - mem_size, - ]; + // Pick the first (and only) memory region + let dram_region = guest_mem + .iter() + .find(|region| region.region_type == GuestRegionType::Dram) + .unwrap(); + let start_addr = dram_region + .start_addr() + .unchecked_add(super::layout::SYSTEM_MEM_SIZE); + let mem_size = dram_region.last_addr().unchecked_offset_from(start_addr) + 1; + let mem_reg_prop = &[start_addr.raw_value(), mem_size]; let mem = fdt.begin_node("memory@ram")?; fdt.property_string("device_type", "memory")?; fdt.property_array_u64("reg", mem_reg_prop)?; diff --git a/src/vmm/src/arch/aarch64/mod.rs b/src/vmm/src/arch/aarch64/mod.rs index 74c5204af0e..4e82a7d3d56 100644 --- a/src/vmm/src/arch/aarch64/mod.rs +++ b/src/vmm/src/arch/aarch64/mod.rs @@ -22,7 +22,7 @@ use std::fs::File; use linux_loader::loader::pe::PE as Loader; use linux_loader::loader::{Cmdline, KernelLoader}; -use vm_memory::GuestMemoryError; +use vm_memory::{GuestMemoryError, GuestMemoryRegion}; use crate::arch::{BootProtocol, EntryPoint, arch_memory_regions_with_gap}; use crate::cpu_config::aarch64::{CpuConfiguration, CpuConfigurationError}; @@ -30,7 +30,9 @@ use crate::cpu_config::templates::CustomCpuTemplate; use crate::initrd::InitrdConfig; use crate::utils::{align_up, u64_to_usize, usize_to_u64}; use crate::vmm_config::machine_config::MachineConfig; -use crate::vstate::memory::{Address, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap}; +use crate::vstate::memory::{ + Address, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap, GuestRegionType, +}; use crate::vstate::vcpu::KvmVcpuError; use crate::{DeviceManager, Kvm, Vcpu, VcpuConfig, Vm, logger}; @@ -151,31 +153,29 @@ pub fn initrd_load_addr(guest_mem: &GuestMemoryMmap, initrd_size: usize) -> Opti usize_to_u64(initrd_size), usize_to_u64(super::GUEST_PAGE_SIZE), ); - match GuestAddress(get_fdt_addr(guest_mem)).checked_sub(rounded_size) { - Some(offset) => { - if guest_mem.address_in_range(offset) { - Some(offset.raw_value()) - } else { - None - } - } - None => None, - } + GuestAddress(get_fdt_addr(guest_mem)) + .checked_sub(rounded_size) + .filter(|&addr| guest_mem.address_in_range(addr)) + .map(|addr| addr.raw_value()) } // Auxiliary function to get the address where the device tree blob is loaded. fn get_fdt_addr(mem: &GuestMemoryMmap) -> u64 { + // Find the first (and only) DRAM region. + let dram_region = mem + .iter() + .find(|region| region.region_type == GuestRegionType::Dram) + .unwrap(); + // If the memory allocated is smaller than the size allocated for the FDT, // we return the start of the DRAM so that // we allow the code to try and load the FDT. - - if let Some(addr) = mem.last_addr().checked_sub(layout::FDT_MAX_SIZE as u64 - 1) - && mem.address_in_range(addr) - { - return addr.raw_value(); - } - - layout::DRAM_MEM_START + dram_region + .last_addr() + .checked_sub(layout::FDT_MAX_SIZE as u64 - 1) + .filter(|&addr| mem.address_in_range(addr)) + .map(|addr| addr.raw_value()) + .unwrap_or(layout::DRAM_MEM_START) } /// Load linux kernel into guest memory. diff --git a/src/vmm/src/arch/x86_64/mod.rs b/src/vmm/src/arch/x86_64/mod.rs index b18267c6a1e..4d6b906768e 100644 --- a/src/vmm/src/arch/x86_64/mod.rs +++ b/src/vmm/src/arch/x86_64/mod.rs @@ -31,12 +31,13 @@ pub mod xstate; #[allow(missing_docs)] pub mod generated; +use std::cmp::max; use std::fs::File; use kvm::Kvm; use layout::{ - CMDLINE_START, FIRST_ADDR_PAST_32BITS, FIRST_ADDR_PAST_64BITS_MMIO, MMIO32_MEM_SIZE, - MMIO32_MEM_START, MMIO64_MEM_SIZE, MMIO64_MEM_START, PCI_MMCONFIG_SIZE, PCI_MMCONFIG_START, + CMDLINE_START, MMIO32_MEM_SIZE, MMIO32_MEM_START, MMIO64_MEM_SIZE, MMIO64_MEM_START, + PCI_MMCONFIG_SIZE, PCI_MMCONFIG_START, }; use linux_loader::configurator::linux::LinuxBootConfigurator; use linux_loader::configurator::pvh::PvhBootConfigurator; @@ -59,7 +60,7 @@ use crate::initrd::InitrdConfig; use crate::utils::{align_down, u64_to_usize, usize_to_u64}; use crate::vmm_config::machine_config::MachineConfig; use crate::vstate::memory::{ - Address, GuestAddress, GuestMemory, GuestMemoryMmap, GuestMemoryRegion, + Address, GuestAddress, GuestMemory, GuestMemoryMmap, GuestMemoryRegion, GuestRegionType, }; use crate::vstate::vcpu::KvmVcpuConfigureError; use crate::{Vcpu, VcpuConfig, Vm, logger}; @@ -253,10 +254,6 @@ fn configure_pvh( initrd: &Option, ) -> Result<(), ConfigurationError> { const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336e_c578; - let first_addr_past_32bits = GuestAddress(FIRST_ADDR_PAST_32BITS); - let end_32bit_gap_start = GuestAddress(MMIO32_MEM_START); - let first_addr_past_64bits = GuestAddress(FIRST_ADDR_PAST_64BITS_MMIO); - let end_64bit_gap_start = GuestAddress(MMIO64_MEM_START); let himem_start = GuestAddress(layout::HIMEM_START); // Vector to hold modules (currently either empty or holding initrd). @@ -294,36 +291,21 @@ fn configure_pvh( type_: E820_RESERVED, ..Default::default() }); - let last_addr = guest_mem.last_addr(); - if last_addr > first_addr_past_64bits { - memmap.push(hvm_memmap_table_entry { - addr: first_addr_past_64bits.raw_value(), - size: last_addr.unchecked_offset_from(first_addr_past_64bits) + 1, - type_: MEMMAP_TYPE_RAM, - ..Default::default() - }); - } - - if last_addr > first_addr_past_32bits { + for region in guest_mem + .iter() + .filter(|region| region.region_type == GuestRegionType::Dram) + { + // the first 1MB is reserved for the kernel + let addr = max(himem_start, region.start_addr()); memmap.push(hvm_memmap_table_entry { - addr: first_addr_past_32bits.raw_value(), - size: (end_64bit_gap_start.unchecked_offset_from(first_addr_past_32bits)) - .min(last_addr.unchecked_offset_from(first_addr_past_32bits) + 1), + addr: addr.raw_value(), + size: region.last_addr().unchecked_offset_from(addr) + 1, type_: MEMMAP_TYPE_RAM, ..Default::default() }); } - memmap.push(hvm_memmap_table_entry { - addr: himem_start.raw_value(), - size: end_32bit_gap_start - .unchecked_offset_from(himem_start) - .min(last_addr.unchecked_offset_from(himem_start) + 1), - type_: MEMMAP_TYPE_RAM, - ..Default::default() - }); - // Construct the hvm_start_info structure and serialize it into // boot_params. This will be stored at PVH_INFO_START address, and %rbx // will be initialized to contain PVH_INFO_START prior to starting the @@ -368,10 +350,6 @@ fn configure_64bit_boot( const KERNEL_HDR_MAGIC: u32 = 0x5372_6448; const KERNEL_LOADER_OTHER: u8 = 0xff; const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x0100_0000; // Must be non-zero. - let first_addr_past_32bits = GuestAddress(FIRST_ADDR_PAST_32BITS); - let end_32bit_gap_start = GuestAddress(MMIO32_MEM_START); - let first_addr_past_64bits = GuestAddress(FIRST_ADDR_PAST_64BITS_MMIO); - let end_64bit_gap_start = GuestAddress(MMIO64_MEM_START); let himem_start = GuestAddress(layout::HIMEM_START); @@ -409,35 +387,20 @@ fn configure_64bit_boot( E820_RESERVED, )?; - let last_addr = guest_mem.last_addr(); - - if last_addr > first_addr_past_64bits { - add_e820_entry( - &mut params, - first_addr_past_64bits.raw_value(), - last_addr.unchecked_offset_from(first_addr_past_64bits) + 1, - E820_RAM, - )?; - } - - if last_addr > first_addr_past_32bits { + for region in guest_mem + .iter() + .filter(|region| region.region_type == GuestRegionType::Dram) + { + // the first 1MB is reserved for the kernel + let addr = max(himem_start, region.start_addr()); add_e820_entry( &mut params, - first_addr_past_32bits.raw_value(), - (end_64bit_gap_start.unchecked_offset_from(first_addr_past_32bits)) - .min(last_addr.unchecked_offset_from(first_addr_past_32bits) + 1), + addr.raw_value(), + region.last_addr().unchecked_offset_from(addr) + 1, E820_RAM, )?; } - add_e820_entry( - &mut params, - himem_start.raw_value(), - (last_addr.unchecked_offset_from(himem_start) + 1) - .min(end_32bit_gap_start.unchecked_offset_from(himem_start)), - E820_RAM, - )?; - LinuxBootConfigurator::write_bootparams( &BootParams::new(¶ms, GuestAddress(layout::ZERO_PAGE_START)), guest_mem, @@ -573,6 +536,7 @@ mod tests { use linux_loader::loader::bootparam::boot_e820_entry; use super::*; + use crate::arch::x86_64::layout::FIRST_ADDR_PAST_32BITS; use crate::test_utils::{arch_mem, single_region_mem}; use crate::utils::mib_to_bytes; use crate::vstate::resources::ResourceAllocator; diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 6bd81c46f18..aa9c9590dfc 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -168,7 +168,7 @@ pub fn build_microvm_for_boot( // Build custom CPU config if a custom template is provided. let mut vm = Vm::new(&kvm)?; let (mut vcpus, vcpus_exit_evt) = vm.create_vcpus(vm_resources.machine_config.vcpu_count)?; - vm.register_memory_regions(guest_memory)?; + vm.register_dram_memory_regions(guest_memory)?; let mut device_manager = DeviceManager::new( event_manager, @@ -411,7 +411,7 @@ pub fn build_microvm_from_snapshot( .create_vcpus(vm_resources.machine_config.vcpu_count) .map_err(StartMicrovmError::Vm)?; - vm.register_memory_regions(guest_memory) + vm.restore_memory_regions(guest_memory, µvm_state.vm_state.memory) .map_err(StartMicrovmError::Vm)?; #[cfg(target_arch = "x86_64")] @@ -462,7 +462,6 @@ pub fn build_microvm_from_snapshot( event_manager, vm_resources, instance_id: &instance_info.id, - restored_from_file: uffd.is_none(), vcpus_exit_evt: &vcpus_exit_evt, }; #[allow(unused_mut)] diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index 46accb637b0..e560204c9d2 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -586,7 +586,7 @@ pub(crate) mod tests { let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let mut vm = Vm::new(&kvm).unwrap(); - vm.register_memory_regions(guest_mem).unwrap(); + vm.register_dram_memory_regions(guest_mem).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); @@ -632,7 +632,7 @@ pub(crate) mod tests { let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let mut vm = Vm::new(&kvm).unwrap(); - vm.register_memory_regions(guest_mem).unwrap(); + vm.register_dram_memory_regions(guest_mem).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); @@ -685,7 +685,7 @@ pub(crate) mod tests { let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let mut vm = Vm::new(&kvm).unwrap(); - vm.register_memory_regions(guest_mem).unwrap(); + vm.register_dram_memory_regions(guest_mem).unwrap(); #[cfg(target_arch = "x86_64")] vm.setup_irqchip().unwrap(); diff --git a/src/vmm/src/device_manager/mod.rs b/src/vmm/src/device_manager/mod.rs index d7052422a3a..5cdb6ad80ca 100644 --- a/src/vmm/src/device_manager/mod.rs +++ b/src/vmm/src/device_manager/mod.rs @@ -439,7 +439,6 @@ pub struct DeviceRestoreArgs<'a> { pub vcpus_exit_evt: &'a EventFd, pub vm_resources: &'a mut VmResources, pub instance_id: &'a str, - pub restored_from_file: bool, } impl std::fmt::Debug for DeviceRestoreArgs<'_> { @@ -449,7 +448,6 @@ impl std::fmt::Debug for DeviceRestoreArgs<'_> { .field("vm", &self.vm) .field("vm_resources", &self.vm_resources) .field("instance_id", &self.instance_id) - .field("restored_from_file", &self.restored_from_file) .finish() } } @@ -487,7 +485,6 @@ impl<'a> Persist<'a> for DeviceManager { event_manager: constructor_args.event_manager, vm_resources: constructor_args.vm_resources, instance_id: constructor_args.instance_id, - restored_from_file: constructor_args.restored_from_file, }; let mmio_devices = MMIODeviceManager::restore(mmio_ctor_args, &state.mmio_state)?; @@ -505,7 +502,6 @@ impl<'a> Persist<'a> for DeviceManager { mem: constructor_args.mem, vm_resources: constructor_args.vm_resources, instance_id: constructor_args.instance_id, - restored_from_file: constructor_args.restored_from_file, event_manager: constructor_args.event_manager, }; let pci_devices = PciDevices::restore(pci_ctor_args, &state.pci_state)?; diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs index b8371d82a83..a600c4e19b3 100644 --- a/src/vmm/src/device_manager/pci_mngr.rs +++ b/src/vmm/src/device_manager/pci_mngr.rs @@ -247,7 +247,6 @@ pub struct PciDevicesConstructorArgs<'a> { pub mem: &'a GuestMemoryMmap, pub vm_resources: &'a mut VmResources, pub instance_id: &'a str, - pub restored_from_file: bool, pub event_manager: &'a mut EventManager, } @@ -258,7 +257,6 @@ impl<'a> Debug for PciDevicesConstructorArgs<'a> { .field("mem", &self.mem) .field("vm_resources", &self.vm_resources) .field("instance_id", &self.instance_id) - .field("restored_from_file", &self.restored_from_file) .finish() } } @@ -410,10 +408,7 @@ impl<'a> Persist<'a> for PciDevices { if let Some(balloon_state) = &state.balloon_device { let device = Arc::new(Mutex::new( Balloon::restore( - BalloonConstructorArgs { - mem: mem.clone(), - restored_from_file: constructor_args.restored_from_file, - }, + BalloonConstructorArgs { mem: mem.clone() }, &balloon_state.device_state, ) .unwrap(), @@ -668,7 +663,6 @@ mod tests { mem: vmm.vm.guest_memory(), vm_resources, instance_id: "microvm-id", - restored_from_file: true, event_manager: &mut event_manager, }; let _restored_dev_manager = diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index d6d46fff0f5..4414ddddd07 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -144,7 +144,6 @@ pub struct MMIODevManagerConstructorArgs<'a> { pub event_manager: &'a mut EventManager, pub vm_resources: &'a mut VmResources, pub instance_id: &'a str, - pub restored_from_file: bool, } impl fmt::Debug for MMIODevManagerConstructorArgs<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -412,10 +411,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { if let Some(balloon_state) = &state.balloon_device { let device = Arc::new(Mutex::new(Balloon::restore( - BalloonConstructorArgs { - mem: mem.clone(), - restored_from_file: constructor_args.restored_from_file, - }, + BalloonConstructorArgs { mem: mem.clone() }, &balloon_state.device_state, )?)); @@ -686,7 +682,6 @@ mod tests { event_manager: &mut event_manager, vm_resources, instance_id: "microvm-id", - restored_from_file: true, }; let _restored_dev_manager = MMIODeviceManager::restore(restore_args, &device_manager_state.mmio_state).unwrap(); diff --git a/src/vmm/src/devices/virtio/balloon/device.rs b/src/vmm/src/devices/virtio/balloon/device.rs index 87a82c4fa9d..f0ecf77bc9e 100644 --- a/src/vmm/src/devices/virtio/balloon/device.rs +++ b/src/vmm/src/devices/virtio/balloon/device.rs @@ -14,7 +14,7 @@ use super::super::ActivateError; use super::super::device::{DeviceState, VirtioDevice}; use super::super::queue::Queue; use super::metrics::METRICS; -use super::util::{compact_page_frame_numbers, remove_range}; +use super::util::compact_page_frame_numbers; use super::{ BALLOON_DEV_ID, BALLOON_NUM_QUEUES, BALLOON_QUEUE_SIZES, DEFLATE_INDEX, INFLATE_INDEX, MAX_PAGE_COMPACT_BUFFER, MAX_PAGES_IN_DESC, MIB_TO_4K_PAGES, STATS_INDEX, @@ -32,7 +32,9 @@ use crate::devices::virtio::queue::InvalidAvailIdx; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::logger::IncMetric; use crate::utils::u64_to_usize; -use crate::vstate::memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemoryMmap}; +use crate::vstate::memory::{ + Address, ByteValued, Bytes, GuestAddress, GuestMemoryExtension, GuestMemoryMmap, +}; use crate::{impl_device_type, mem_size_mib}; const SIZE_OF_U32: usize = std::mem::size_of::(); @@ -171,7 +173,6 @@ pub struct Balloon { pub(crate) device_state: DeviceState, // Implementation specific fields. - pub(crate) restored_from_file: bool, pub(crate) stats_polling_interval_s: u16, pub(crate) stats_timer: TimerFd, // The index of the previous stats descriptor is saved because @@ -188,7 +189,6 @@ impl Balloon { amount_mib: u32, deflate_on_oom: bool, stats_polling_interval_s: u16, - restored_from_file: bool, ) -> Result { let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; @@ -228,7 +228,6 @@ impl Balloon { queues, device_state: DeviceState::Inactive, activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, - restored_from_file, stats_polling_interval_s, stats_timer, stats_desc_index: None, @@ -335,10 +334,9 @@ impl Balloon { let guest_addr = GuestAddress(u64::from(page_frame_number) << VIRTIO_BALLOON_PFN_SHIFT); - if let Err(err) = remove_range( - mem, - (guest_addr, u64::from(range_len) << VIRTIO_BALLOON_PFN_SHIFT), - self.restored_from_file, + if let Err(err) = mem.discard_range( + guest_addr, + usize::try_from(range_len).unwrap() << VIRTIO_BALLOON_PFN_SHIFT, ) { error!("Error removing memory range: {:?}", err); } @@ -739,7 +737,7 @@ pub(crate) mod tests { // Test all feature combinations. for deflate_on_oom in [true, false].iter() { for stats_interval in [0, 1].iter() { - let mut balloon = Balloon::new(0, *deflate_on_oom, *stats_interval, false).unwrap(); + let mut balloon = Balloon::new(0, *deflate_on_oom, *stats_interval).unwrap(); assert_eq!(balloon.device_type(), VIRTIO_ID_BALLOON); let features: u64 = (1u64 << VIRTIO_F_VERSION_1) @@ -766,7 +764,7 @@ pub(crate) mod tests { #[test] fn test_virtio_read_config() { - let balloon = Balloon::new(0x10, true, 0, false).unwrap(); + let balloon = Balloon::new(0x10, true, 0).unwrap(); let cfg = BalloonConfig { amount_mib: 16, @@ -800,7 +798,7 @@ pub(crate) mod tests { #[test] fn test_virtio_write_config() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0).unwrap(); let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; @@ -826,7 +824,7 @@ pub(crate) mod tests { #[test] fn test_invalid_request() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); // Only initialize the inflate queue to demonstrate invalid request handling. @@ -887,7 +885,7 @@ pub(crate) mod tests { #[test] fn test_inflate() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); @@ -959,7 +957,7 @@ pub(crate) mod tests { #[test] fn test_deflate() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let defq = VirtQueue::new(GuestAddress(0), &mem, 16); @@ -1009,7 +1007,7 @@ pub(crate) mod tests { #[test] fn test_stats() { - let mut balloon = Balloon::new(0, true, 1, false).unwrap(); + let mut balloon = Balloon::new(0, true, 1).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let statsq = VirtQueue::new(GuestAddress(0), &mem, 16); @@ -1101,7 +1099,7 @@ pub(crate) mod tests { #[test] fn test_process_balloon_queues() { - let mut balloon = Balloon::new(0x10, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0x10, true, 0).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); @@ -1116,7 +1114,7 @@ pub(crate) mod tests { #[test] fn test_update_stats_interval() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0).unwrap(); let mem = default_mem(); let q = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, q.create_queue()); @@ -1129,7 +1127,7 @@ pub(crate) mod tests { ); balloon.update_stats_polling_interval(0).unwrap(); - let mut balloon = Balloon::new(0, true, 1, false).unwrap(); + let mut balloon = Balloon::new(0, true, 1).unwrap(); let mem = default_mem(); let q = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, q.create_queue()); @@ -1147,14 +1145,14 @@ pub(crate) mod tests { #[test] fn test_cannot_update_inactive_device() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0).unwrap(); // Assert that we can't update an inactive device. balloon.update_size(1).unwrap_err(); } #[test] fn test_num_pages() { - let mut balloon = Balloon::new(0, true, 0, false).unwrap(); + let mut balloon = Balloon::new(0, true, 0).unwrap(); // Switch the state to active. balloon.device_state = DeviceState::Activated(ActiveState { mem: single_region_mem(32 << 20), diff --git a/src/vmm/src/devices/virtio/balloon/event_handler.rs b/src/vmm/src/devices/virtio/balloon/event_handler.rs index 3922b4b8385..6fdd00c434c 100644 --- a/src/vmm/src/devices/virtio/balloon/event_handler.rs +++ b/src/vmm/src/devices/virtio/balloon/event_handler.rs @@ -142,7 +142,7 @@ pub mod tests { #[test] fn test_event_handler() { let mut event_manager = EventManager::new().unwrap(); - let mut balloon = Balloon::new(0, true, 10, false).unwrap(); + let mut balloon = Balloon::new(0, true, 10).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); diff --git a/src/vmm/src/devices/virtio/balloon/persist.rs b/src/vmm/src/devices/virtio/balloon/persist.rs index c5fa227c620..e92356c394e 100644 --- a/src/vmm/src/devices/virtio/balloon/persist.rs +++ b/src/vmm/src/devices/virtio/balloon/persist.rs @@ -95,7 +95,6 @@ pub struct BalloonState { pub struct BalloonConstructorArgs { /// Pointer to guest memory. pub mem: GuestMemoryMmap, - pub restored_from_file: bool, } impl Persist<'_> for Balloon { @@ -122,12 +121,7 @@ impl Persist<'_> for Balloon { ) -> Result { // We can safely create the balloon with arbitrary flags and // num_pages because we will overwrite them after. - let mut balloon = Balloon::new( - 0, - false, - state.stats_polling_interval_s, - constructor_args.restored_from_file, - )?; + let mut balloon = Balloon::new(0, false, state.stats_polling_interval_s)?; let mut num_queues = BALLOON_NUM_QUEUES; // As per the virtio 1.1 specification, the statistics queue @@ -184,7 +178,7 @@ mod tests { let mut mem = vec![0; 4096]; // Create and save the balloon device. - let balloon = Balloon::new(0x42, false, 2, false).unwrap(); + let balloon = Balloon::new(0x42, false, 2).unwrap(); Snapshot::new(balloon.save()) .save(&mut mem.as_mut_slice()) @@ -192,10 +186,7 @@ mod tests { // Deserialize and restore the balloon device. let restored_balloon = Balloon::restore( - BalloonConstructorArgs { - mem: guest_mem, - restored_from_file: true, - }, + BalloonConstructorArgs { mem: guest_mem }, &Snapshot::load_without_crc_check(mem.as_slice()) .unwrap() .data, @@ -203,7 +194,6 @@ mod tests { .unwrap(); assert_eq!(restored_balloon.device_type(), VIRTIO_ID_BALLOON); - assert!(restored_balloon.restored_from_file); assert_eq!(restored_balloon.acked_features, balloon.acked_features); assert_eq!(restored_balloon.avail_features, balloon.avail_features); diff --git a/src/vmm/src/devices/virtio/balloon/util.rs b/src/vmm/src/devices/virtio/balloon/util.rs index 35ef69f972f..1d00891783e 100644 --- a/src/vmm/src/devices/virtio/balloon/util.rs +++ b/src/vmm/src/devices/virtio/balloon/util.rs @@ -65,57 +65,6 @@ pub(crate) fn compact_page_frame_numbers(v: &mut [u32]) -> Vec<(u32, u32)> { result } -pub(crate) fn remove_range( - guest_memory: &GuestMemoryMmap, - range: (GuestAddress, u64), - restored_from_file: bool, -) -> Result<(), RemoveRegionError> { - let (guest_address, range_len) = range; - - if let Some(region) = guest_memory.find_region(guest_address) { - if guest_address.0 + range_len > region.start_addr().0 + region.len() { - return Err(RemoveRegionError::MalformedRange); - } - let phys_address = guest_memory - .get_host_address(guest_address) - .map_err(|_| RemoveRegionError::AddressTranslation)?; - - // Mmap a new anonymous region over the present one in order to create a hole. - // This workaround is (only) needed after resuming from a snapshot because the guest memory - // is mmaped from file as private and there is no `madvise` flag that works for this case. - if restored_from_file { - // SAFETY: The address and length are known to be valid. - let ret = unsafe { - libc::mmap( - phys_address.cast(), - u64_to_usize(range_len), - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_FIXED | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, - -1, - 0, - ) - }; - if ret == libc::MAP_FAILED { - return Err(RemoveRegionError::MmapFail(io::Error::last_os_error())); - } - }; - - // Madvise the region in order to mark it as not used. - // SAFETY: The address and length are known to be valid. - let ret = unsafe { - let range_len = u64_to_usize(range_len); - libc::madvise(phys_address.cast(), range_len, libc::MADV_DONTNEED) - }; - if ret < 0 { - return Err(RemoveRegionError::MadviseFail(io::Error::last_os_error())); - } - - Ok(()) - } else { - Err(RemoveRegionError::RegionNotFound) - } -} - #[cfg(test)] mod tests { use std::fmt::Debug; @@ -174,88 +123,6 @@ mod tests { ); } - #[test] - fn test_remove_range() { - let page_size: usize = 0x1000; - let mem = single_region_mem(2 * page_size); - - // Fill the memory with ones. - let ones = vec![1u8; 2 * page_size]; - mem.write(&ones[..], GuestAddress(0)).unwrap(); - - // Remove the first page. - remove_range(&mem, (GuestAddress(0), page_size as u64), false).unwrap(); - - // Check that the first page is zeroed. - let mut actual_page = vec![0u8; page_size]; - mem.read(actual_page.as_mut_slice(), GuestAddress(0)) - .unwrap(); - assert_eq!(vec![0u8; page_size], actual_page); - // Check that the second page still contains ones. - mem.read(actual_page.as_mut_slice(), GuestAddress(page_size as u64)) - .unwrap(); - assert_eq!(vec![1u8; page_size], actual_page); - - // Malformed range: the len is too big. - assert_match!( - remove_range(&mem, (GuestAddress(0), 0x10000), false).unwrap_err(), - RemoveRegionError::MalformedRange - ); - - // Region not mapped. - assert_match!( - remove_range(&mem, (GuestAddress(0x10000), 0x10), false).unwrap_err(), - RemoveRegionError::RegionNotFound - ); - - // Madvise fail: the guest address is not aligned to the page size. - assert_match!( - remove_range(&mem, (GuestAddress(0x20), page_size as u64), false).unwrap_err(), - RemoveRegionError::MadviseFail(_) - ); - } - - #[test] - fn test_remove_range_on_restored() { - let page_size: usize = 0x1000; - let mem = single_region_mem(2 * page_size); - - // Fill the memory with ones. - let ones = vec![1u8; 2 * page_size]; - mem.write(&ones[..], GuestAddress(0)).unwrap(); - - // Remove the first page. - remove_range(&mem, (GuestAddress(0), page_size as u64), true).unwrap(); - - // Check that the first page is zeroed. - let mut actual_page = vec![0u8; page_size]; - mem.read(actual_page.as_mut_slice(), GuestAddress(0)) - .unwrap(); - assert_eq!(vec![0u8; page_size], actual_page); - // Check that the second page still contains ones. - mem.read(actual_page.as_mut_slice(), GuestAddress(page_size as u64)) - .unwrap(); - assert_eq!(vec![1u8; page_size], actual_page); - - // Malformed range: the len is too big. - assert_match!( - remove_range(&mem, (GuestAddress(0), 0x10000), true).unwrap_err(), - RemoveRegionError::MalformedRange - ); - - // Region not mapped. - assert_match!( - remove_range(&mem, (GuestAddress(0x10000), 0x10), true).unwrap_err(), - RemoveRegionError::RegionNotFound - ); - - // Mmap fail: the guest address is not aligned to the page size. - assert_match!( - remove_range(&mem, (GuestAddress(0x20), page_size as u64), true).unwrap_err(), - RemoveRegionError::MmapFail(_) - ); - } - /// ------------------------------------- /// BEGIN PROPERTY BASED TESTING use proptest::prelude::*; diff --git a/src/vmm/src/devices/virtio/block/virtio/io/mod.rs b/src/vmm/src/devices/virtio/block/virtio/io/mod.rs index 09cc7c4e31d..eef347b51ce 100644 --- a/src/vmm/src/devices/virtio/block/virtio/io/mod.rs +++ b/src/vmm/src/devices/virtio/block/virtio/io/mod.rs @@ -179,6 +179,7 @@ pub mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::ffi::OsStrExt; + use vm_memory::GuestMemoryRegion; use vmm_sys_util::tempfile::TempFile; use super::*; @@ -186,7 +187,7 @@ pub mod tests { use crate::utils::u64_to_usize; use crate::vmm_config::machine_config::HugePageConfig; use crate::vstate::memory; - use crate::vstate::memory::{Bitmap, Bytes, GuestMemory}; + use crate::vstate::memory::{Bitmap, Bytes, GuestMemory, GuestRegionMmapExt}; const FILE_LEN: u32 = 1024; // 2 pages of memory should be enough to test read/write ops and also dirty tracking. @@ -227,7 +228,10 @@ pub mod tests { true, HugePageConfig::None, ) - .unwrap(), + .unwrap() + .into_iter() + .map(|region| GuestRegionMmapExt::dram_from_mmap_region(region, 0)) + .collect(), ) .unwrap() } diff --git a/src/vmm/src/devices/virtio/vhost_user.rs b/src/vmm/src/devices/virtio/vhost_user.rs index bd1260fad40..8619b481edb 100644 --- a/src/vmm/src/devices/virtio/vhost_user.rs +++ b/src/vmm/src/devices/virtio/vhost_user.rs @@ -380,7 +380,7 @@ impl VhostUserHandleImpl { let vhost_user_net_reg = VhostUserMemoryRegionInfo { guest_phys_addr: region.start_addr().raw_value(), memory_size: region.len(), - userspace_addr: region.as_ptr() as u64, + userspace_addr: region.inner.as_ptr() as u64, mmap_offset, mmap_handle, }; @@ -480,7 +480,7 @@ pub(crate) mod tests { use crate::devices::virtio::test_utils::default_interrupt; use crate::test_utils::create_tmp_socket; use crate::vstate::memory; - use crate::vstate::memory::GuestAddress; + use crate::vstate::memory::{GuestAddress, GuestRegionMmapExt}; pub(crate) fn create_mem(file: File, regions: &[(GuestAddress, usize)]) -> GuestMemoryMmap { GuestMemoryMmap::from_regions( @@ -490,7 +490,10 @@ pub(crate) mod tests { Some(file), false, ) - .unwrap(), + .unwrap() + .into_iter() + .map(|region| GuestRegionMmapExt::dram_from_mmap_region(region, 0)) + .collect(), ) .unwrap() } @@ -800,7 +803,7 @@ pub(crate) mod tests { .map(|region| VhostUserMemoryRegionInfo { guest_phys_addr: region.start_addr().raw_value(), memory_size: region.len(), - userspace_addr: region.as_ptr() as u64, + userspace_addr: region.inner.as_ptr() as u64, mmap_offset: region.file_offset().unwrap().start(), mmap_handle: region.file_offset().unwrap().file().as_raw_fd(), }) diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 7a168db0146..1abd5aad186 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -106,6 +106,8 @@ pub mod snapshot; pub mod test_utils; /// Utility functions and struct pub mod utils; +/// Vendored vm memory (vm-memory#312) +pub mod vm_memory_vendored; /// Wrappers over structures used to configure the VMM. pub mod vmm_config; /// Module with virtual state structs. diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 212b6105831..ee76bf6800b 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -588,7 +588,7 @@ mod tests { use crate::vmm_config::balloon::BalloonDeviceConfig; use crate::vmm_config::net::NetworkInterfaceConfig; use crate::vmm_config::vsock::tests::default_config; - use crate::vstate::memory::GuestMemoryRegionState; + use crate::vstate::memory::{GuestMemoryRegionState, GuestRegionType}; fn default_vmm_with_devices() -> Vmm { let mut event_manager = EventManager::new().expect("Cannot create EventManager"); @@ -693,6 +693,7 @@ mod tests { regions: vec![GuestMemoryRegionState { base_address: 0, size: 0x20000, + region_type: GuestRegionType::Dram, }], }; diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 0d2f4bbed22..7d2318fa14d 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -6,6 +6,7 @@ use std::path::PathBuf; use std::sync::{Arc, Mutex, MutexGuard}; use serde::{Deserialize, Serialize}; +use vm_memory::GuestAddress; use crate::cpu_config::templates::CustomCpuTemplate; use crate::device_manager::persist::SharedDeviceType; @@ -463,11 +464,14 @@ impl VmResources { Ok(()) } - /// Allocates guest memory in a configuration most appropriate for these [`VmResources`]. + /// Allocates the given guest memory regions. /// /// If vhost-user-blk devices are in use, allocates memfd-backed shared memory, otherwise /// prefers anonymous memory for performance reasons. - pub fn allocate_guest_memory(&self) -> Result, MemoryError> { + fn allocate_memory_regions( + &self, + regions: &[(GuestAddress, usize)], + ) -> Result, MemoryError> { let vhost_user_device_used = self .block .devices @@ -483,22 +487,27 @@ impl VmResources { // because that would require running a backend process. If in the future we converge to // a single way of backing guest memory for vhost-user and non-vhost-user cases, // that would not be worth the effort. - let regions = - crate::arch::arch_memory_regions(mib_to_bytes(self.machine_config.mem_size_mib)); if vhost_user_device_used { memory::memfd_backed( - regions.as_ref(), + regions, self.machine_config.track_dirty_pages, self.machine_config.huge_pages, ) } else { memory::anonymous( - regions.into_iter(), + regions.iter().copied(), self.machine_config.track_dirty_pages, self.machine_config.huge_pages, ) } } + + /// Allocates guest memory in a configuration most appropriate for these [`VmResources`]. + pub fn allocate_guest_memory(&self) -> Result, MemoryError> { + let regions = + crate::arch::arch_memory_regions(mib_to_bytes(self.machine_config.mem_size_mib)); + self.allocate_memory_regions(®ions) + } } impl From<&VmResources> for VmmConfig { @@ -1511,7 +1520,7 @@ mod tests { .unwrap(); let err = vm_resources .update_from_restored_device(SharedDeviceType::Balloon(Arc::new(Mutex::new( - Balloon::new(128, false, 0, true).unwrap(), + Balloon::new(128, false, 0).unwrap(), )))) .unwrap_err(); assert!( diff --git a/src/vmm/src/test_utils/mod.rs b/src/vmm/src/test_utils/mod.rs index 2cfcc274b5d..6fe66cdbadb 100644 --- a/src/vmm/src/test_utils/mod.rs +++ b/src/vmm/src/test_utils/mod.rs @@ -12,11 +12,11 @@ use crate::builder::build_microvm_for_boot; use crate::resources::VmResources; use crate::seccomp::get_empty_filters; use crate::test_utils::mock_resources::{MockBootSourceConfig, MockVmConfig, MockVmResources}; +use crate::vm_memory_vendored::GuestRegionCollection; use crate::vmm_config::boot_source::BootSourceConfig; use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::HugePageConfig; -use crate::vstate::memory; -use crate::vstate::memory::{GuestMemoryMmap, GuestRegionMmap}; +use crate::vstate::memory::{self, GuestMemoryMmap, GuestRegionMmap, GuestRegionMmapExt}; use crate::{EventManager, Vmm}; pub mod mock_resources; @@ -43,9 +43,12 @@ pub fn single_region_mem_at_raw(at: u64, size: usize) -> Vec { /// Creates a [`GuestMemoryMmap`] with multiple regions and without dirty page tracking. pub fn multi_region_mem(regions: &[(GuestAddress, usize)]) -> GuestMemoryMmap { - GuestMemoryMmap::from_regions( + GuestRegionCollection::from_regions( memory::anonymous(regions.iter().copied(), false, HugePageConfig::None) - .expect("Cannot initialize memory"), + .expect("Cannot initialize memory") + .into_iter() + .map(|region| GuestRegionMmapExt::dram_from_mmap_region(region, 0)) + .collect(), ) .unwrap() } diff --git a/src/vmm/src/vm_memory_vendored.rs b/src/vmm/src/vm_memory_vendored.rs new file mode 100644 index 00000000000..7e3ecb81682 --- /dev/null +++ b/src/vmm/src/vm_memory_vendored.rs @@ -0,0 +1,265 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Module temporarily containing vendored in-review vm-memory features +//! +//! TODO: To be removed once https://github.com/rust-vmm/vm-memory/pull/312 is merged + +#![allow(clippy::cast_possible_truncation)] // vm-memory has different clippy configuration + +use std::io::{Read, Write}; +use std::sync::Arc; +use std::sync::atomic::Ordering; + +use vm_memory::guest_memory::Result; +use vm_memory::{ + Address, AtomicAccess, Bytes, Error, GuestAddress, GuestMemory, GuestMemoryError, + GuestMemoryRegion, MemoryRegionAddress, +}; + +use crate::vstate::memory::GuestRegionMmapExt; + +/// [`GuestMemory`](trait.GuestMemory.html) implementation based on a homogeneous collection +/// of [`GuestMemoryRegion`] implementations. +/// +/// Represents a sorted set of non-overlapping physical guest memory regions. +#[derive(Debug)] +pub struct GuestRegionCollection { + regions: Vec>, +} + +impl Default for GuestRegionCollection { + fn default() -> Self { + Self { + regions: Vec::new(), + } + } +} + +impl Clone for GuestRegionCollection { + fn clone(&self) -> Self { + GuestRegionCollection { + regions: self.regions.iter().map(Arc::clone).collect(), + } + } +} + +impl GuestRegionCollection { + /// Creates a new [`GuestRegionCollection`] from a vector of regions. + /// + /// # Arguments + /// + /// * `regions` - The vector of regions. The regions shouldn't overlap, and they should be + /// sorted by the starting address. + pub fn from_regions(mut regions: Vec) -> std::result::Result { + Self::from_arc_regions(regions.drain(..).map(Arc::new).collect()) + } + + /// Creates a new [`GuestRegionCollection`] from a vector of Arc regions. + /// + /// Similar to the constructor `from_regions()` as it returns a + /// [`GuestRegionCollection`]. The need for this constructor is to provide a way for + /// consumer of this API to create a new [`GuestRegionCollection`] based on existing + /// regions coming from an existing [`GuestRegionCollection`] instance. + /// + /// # Arguments + /// + /// * `regions` - The vector of `Arc` regions. The regions shouldn't overlap and they should be + /// sorted by the starting address. + pub fn from_arc_regions(regions: Vec>) -> std::result::Result { + if regions.is_empty() { + return Err(Error::NoMemoryRegion); + } + + for window in regions.windows(2) { + let prev = &window[0]; + let next = &window[1]; + + if prev.start_addr() > next.start_addr() { + return Err(Error::UnsortedMemoryRegions); + } + + if prev.last_addr() >= next.start_addr() { + return Err(Error::MemoryRegionOverlap); + } + } + + Ok(Self { regions }) + } + + /// Insert a region into the `GuestMemoryMmap` object and return a new `GuestMemoryMmap`. + /// + /// # Arguments + /// * `region`: the memory region to insert into the guest memory object. + pub fn insert_region( + &self, + region: Arc, + ) -> std::result::Result, Error> { + let mut regions = self.regions.clone(); + regions.push(region); + regions.sort_by_key(|x| x.start_addr()); + + Self::from_arc_regions(regions) + } +} + +impl GuestMemory for GuestRegionCollection { + type R = R; + + fn num_regions(&self) -> usize { + self.regions.len() + } + + fn find_region(&self, addr: GuestAddress) -> Option<&R> { + let index = match self.regions.binary_search_by_key(&addr, |x| x.start_addr()) { + Ok(x) => Some(x), + // Within the closest region with starting address < addr + Err(x) if (x > 0 && addr <= self.regions[x - 1].last_addr()) => Some(x - 1), + _ => None, + }; + index.map(|x| self.regions[x].as_ref()) + } + + fn iter(&self) -> impl Iterator { + self.regions.iter().map(AsRef::as_ref) + } +} + +// This impl will be subsumed by the default impl in vm-memory#312 +impl Bytes for GuestRegionMmapExt { + type E = GuestMemoryError; + + /// # Examples + /// * Write a slice at guest address 0x1200. + /// + /// ``` + /// # #[cfg(feature = "backend-mmap")] + /// # use vm_memory::{Bytes, GuestAddress, GuestMemoryMmap}; + /// # + /// # #[cfg(feature = "backend-mmap")] + /// # { + /// # let start_addr = GuestAddress(0x1000); + /// # let mut gm = GuestMemoryMmap::<()>::from_ranges(&vec![(start_addr, 0x400)]) + /// # .expect("Could not create guest memory"); + /// # + /// let res = gm + /// .write(&[1, 2, 3, 4, 5], GuestAddress(0x1200)) + /// .expect("Could not write to guest memory"); + /// assert_eq!(5, res); + /// # } + /// ``` + fn write(&self, buf: &[u8], addr: MemoryRegionAddress) -> Result { + let maddr = addr.raw_value() as usize; + self.as_volatile_slice()? + .write(buf, maddr) + .map_err(Into::into) + } + + /// # Examples + /// * Read a slice of length 16 at guestaddress 0x1200. + /// + /// ``` + /// # #[cfg(feature = "backend-mmap")] + /// # use vm_memory::{Bytes, GuestAddress, GuestMemoryMmap}; + /// # + /// # #[cfg(feature = "backend-mmap")] + /// # { + /// # let start_addr = GuestAddress(0x1000); + /// # let mut gm = GuestMemoryMmap::<()>::from_ranges(&vec![(start_addr, 0x400)]) + /// # .expect("Could not create guest memory"); + /// # + /// let buf = &mut [0u8; 16]; + /// let res = gm + /// .read(buf, GuestAddress(0x1200)) + /// .expect("Could not read from guest memory"); + /// assert_eq!(16, res); + /// # } + /// ``` + fn read(&self, buf: &mut [u8], addr: MemoryRegionAddress) -> Result { + let maddr = addr.raw_value() as usize; + self.as_volatile_slice()? + .read(buf, maddr) + .map_err(Into::into) + } + + fn write_slice(&self, buf: &[u8], addr: MemoryRegionAddress) -> Result<()> { + let maddr = addr.raw_value() as usize; + self.as_volatile_slice()? + .write_slice(buf, maddr) + .map_err(Into::into) + } + + fn read_slice(&self, buf: &mut [u8], addr: MemoryRegionAddress) -> Result<()> { + let maddr = addr.raw_value() as usize; + self.as_volatile_slice()? + .read_slice(buf, maddr) + .map_err(Into::into) + } + + fn store( + &self, + val: T, + addr: MemoryRegionAddress, + order: Ordering, + ) -> Result<()> { + self.as_volatile_slice().and_then(|s| { + s.store(val, addr.raw_value() as usize, order) + .map_err(Into::into) + }) + } + + fn load(&self, addr: MemoryRegionAddress, order: Ordering) -> Result { + self.as_volatile_slice() + .and_then(|s| s.load(addr.raw_value() as usize, order).map_err(Into::into)) + } + + // All remaining functions are deprecated and have been removed in vm-memory/main. + // Firecracker does not use them, so no point in writing out implementations here. + fn read_from( + &self, + _addr: MemoryRegionAddress, + _src: &mut F, + _count: usize, + ) -> std::result::Result + where + F: Read, + { + unimplemented!() + } + + fn read_exact_from( + &self, + _addr: MemoryRegionAddress, + _src: &mut F, + _count: usize, + ) -> std::result::Result<(), Self::E> + where + F: Read, + { + unimplemented!() + } + + fn write_to( + &self, + _addr: MemoryRegionAddress, + _dst: &mut F, + _count: usize, + ) -> std::result::Result + where + F: Write, + { + unimplemented!() + } + + fn write_all_to( + &self, + _addr: MemoryRegionAddress, + _dst: &mut F, + _count: usize, + ) -> std::result::Result<(), Self::E> + where + F: Write, + { + unimplemented!() + } +} diff --git a/src/vmm/src/vmm_config/balloon.rs b/src/vmm/src/vmm_config/balloon.rs index 83d419c49db..e56430d6dc6 100644 --- a/src/vmm/src/vmm_config/balloon.rs +++ b/src/vmm/src/vmm_config/balloon.rs @@ -88,9 +88,6 @@ impl BalloonBuilder { cfg.amount_mib, cfg.deflate_on_oom, cfg.stats_polling_interval_s, - // `restored` flag is false because this code path - // is never called by snapshot restore functionality. - false, )?))); Ok(()) @@ -178,7 +175,7 @@ pub(crate) mod tests { #[test] fn test_set_device() { let mut builder = BalloonBuilder::new(); - let balloon = Balloon::new(0, true, 0, true).unwrap(); + let balloon = Balloon::new(0, true, 0).unwrap(); builder.set_device(Arc::new(Mutex::new(balloon))); assert!(builder.inner.is_some()); } diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 38ee7cc2ce6..1bf7cda6342 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -9,6 +9,8 @@ use std::fs::File; use std::io::SeekFrom; use std::sync::Arc; +use kvm_bindings::{KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region}; +use log::error; use serde::{Deserialize, Serialize}; pub use vm_memory::bitmap::{AtomicBitmap, BS, Bitmap, BitmapSlice}; pub use vm_memory::mmap::MmapRegionBuilder; @@ -17,7 +19,7 @@ pub use vm_memory::{ Address, ByteValued, Bytes, FileOffset, GuestAddress, GuestMemory, GuestMemoryRegion, GuestUsize, MemoryRegionAddress, MmapRegion, address, }; -use vm_memory::{Error as VmMemoryError, GuestMemoryError, WriteVolatile}; +use vm_memory::{Error as VmMemoryError, GuestMemoryError, VolatileSlice, WriteVolatile}; use vmm_sys_util::errno; use crate::DirtyBitmap; @@ -25,7 +27,7 @@ use crate::utils::{get_page_size, u64_to_usize}; use crate::vmm_config::machine_config::HugePageConfig; /// Type of GuestMemoryMmap. -pub type GuestMemoryMmap = vm_memory::GuestMemoryMmap>; +pub type GuestMemoryMmap = crate::vm_memory_vendored::GuestRegionCollection; /// Type of GuestRegionMmap. pub type GuestRegionMmap = vm_memory::GuestRegionMmap>; /// Type of GuestMmapRegion. @@ -50,6 +52,156 @@ pub enum MemoryError { OffsetTooLarge, } +/// Type of the guest region +#[derive(Copy, Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum GuestRegionType { + /// Guest DRAM + Dram, +} + +/// An extension to GuestMemoryRegion that stores the type of region, and the KVM slot +/// number. +#[derive(Debug)] +pub struct GuestRegionMmapExt { + /// the wrapped GuestRegionMmap + pub inner: GuestRegionMmap, + /// the type of region + pub region_type: GuestRegionType, + /// the KVM slot number assigned to this region + pub slot: u32, +} + +impl GuestRegionMmapExt { + pub(crate) fn dram_from_mmap_region(region: GuestRegionMmap, slot: u32) -> Self { + GuestRegionMmapExt { + inner: region, + region_type: GuestRegionType::Dram, + slot, + } + } + + pub(crate) fn from_state( + region: GuestRegionMmap, + state: &GuestMemoryRegionState, + slot: u32, + ) -> Result { + Ok(GuestRegionMmapExt { + inner: region, + region_type: state.region_type, + slot, + }) + } + + pub(crate) fn kvm_userspace_memory_region(&self) -> kvm_userspace_memory_region { + let flags = if self.inner.bitmap().is_some() { + KVM_MEM_LOG_DIRTY_PAGES + } else { + 0 + }; + + kvm_userspace_memory_region { + flags, + slot: self.slot, + guest_phys_addr: self.inner.start_addr().raw_value(), + memory_size: self.inner.len(), + userspace_addr: self.inner.as_ptr() as u64, + } + } + + pub(crate) fn discard_range( + &self, + caddr: MemoryRegionAddress, + len: usize, + ) -> Result { + let phys_address = self.get_host_address(caddr)?; + + match (self.inner.file_offset(), self.inner.flags()) { + // If and only if we are resuming from a snapshot file, we have a file and it's mapped + // private + (Some(_), flags) if flags & libc::MAP_PRIVATE != 0 => { + // Mmap a new anonymous region over the present one in order to create a hole + // with zero pages. + // This workaround is (only) needed after resuming from a snapshot file because the + // guest memory is mmaped from file as private. In this case, MADV_DONTNEED on the + // file only drops any anonymous pages in range, but subsequent accesses would read + // whatever page is stored on the backing file. Mmapping anonymous pages ensures + // it's zeroed. + // SAFETY: The address and length are known to be valid. + let ret = unsafe { + libc::mmap( + phys_address.cast(), + len, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_FIXED | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, + -1, + 0, + ) + }; + if ret == libc::MAP_FAILED { + let os_error = std::io::Error::last_os_error(); + error!("discard_range: mmap failed: {:?}", os_error); + Err(GuestMemoryError::IOError(os_error)) + } else { + Ok(len) + } + } + // TODO: madvise(MADV_DONTNEED) doesn't actually work with memfd + // (or in general MAP_SHARED of a fd). In those cases we should use + // fallocate64(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE). + // We keep falling to the madvise branch to keep the previous behaviour. + (None, _) | (Some(_), _) => { + // Madvise the region in order to mark it as not used. + // SAFETY: The address and length are known to be valid. + let ret = unsafe { libc::madvise(phys_address.cast(), len, libc::MADV_DONTNEED) }; + if ret < 0 { + let os_error = std::io::Error::last_os_error(); + error!("discard_range: madvise failed: {:?}", os_error); + Err(GuestMemoryError::IOError(os_error)) + } else { + Ok(len) + } + } + } + } +} + +#[allow(clippy::cast_possible_wrap)] +#[allow(clippy::cast_possible_truncation)] +impl GuestMemoryRegion for GuestRegionMmapExt { + type B = Option; + + fn len(&self) -> GuestUsize { + self.inner.len() + } + + fn start_addr(&self) -> GuestAddress { + self.inner.start_addr() + } + + fn bitmap(&self) -> &Self::B { + self.inner.bitmap() + } + + fn get_host_address( + &self, + addr: MemoryRegionAddress, + ) -> vm_memory::guest_memory::Result<*mut u8> { + self.inner.get_host_address(addr) + } + + fn file_offset(&self) -> Option<&FileOffset> { + self.inner.file_offset() + } + + fn get_slice( + &self, + offset: MemoryRegionAddress, + count: usize, + ) -> vm_memory::guest_memory::Result>> { + self.inner.get_slice(offset, count) + } +} + /// Creates a `Vec` of `GuestRegionMmap` with the given configuration pub fn create( regions: impl Iterator, @@ -158,6 +310,9 @@ where /// Store the dirty bitmap in internal store fn store_dirty_bitmap(&self, dirty_bitmap: &DirtyBitmap, page_size: usize); + + /// Discards a memory range, freeing up memory pages + fn discard_range(&self, addr: GuestAddress, range_len: usize) -> Result<(), GuestMemoryError>; } /// State of a guest memory region saved to file/buffer. @@ -169,6 +324,8 @@ pub struct GuestMemoryRegionState { pub base_address: u64, /// Region size. pub size: usize, + /// Region type + pub region_type: GuestRegionType, } /// Describes guest memory regions and their snapshot file mappings. @@ -196,6 +353,7 @@ impl GuestMemoryExtension for GuestMemoryMmap { guest_memory_state.regions.push(GuestMemoryRegionState { base_address: region.start_addr().0, size: u64_to_usize(region.len()), + region_type: region.region_type, }); }); guest_memory_state @@ -227,8 +385,8 @@ impl GuestMemoryExtension for GuestMemoryMmap { let mut writer_offset = 0; let page_size = get_page_size().map_err(MemoryError::PageSize)?; - let write_result = self.iter().zip(0..).try_for_each(|(region, slot)| { - let kvm_bitmap = dirty_bitmap.get(&slot).unwrap(); + let write_result = self.iter().try_for_each(|region| { + let kvm_bitmap = dirty_bitmap.get(®ion.slot).unwrap(); let firecracker_bitmap = region.bitmap(); let mut write_size = 0; let mut dirty_batch_start: u64 = 0; @@ -291,8 +449,8 @@ impl GuestMemoryExtension for GuestMemoryMmap { /// Stores the dirty bitmap inside into the internal bitmap fn store_dirty_bitmap(&self, dirty_bitmap: &DirtyBitmap, page_size: usize) { - self.iter().zip(0..).for_each(|(region, slot)| { - let kvm_bitmap = dirty_bitmap.get(&slot).unwrap(); + self.iter().for_each(|region| { + let kvm_bitmap = dirty_bitmap.get(®ion.slot).unwrap(); let firecracker_bitmap = region.bitmap(); for (i, v) in kvm_bitmap.iter().enumerate() { @@ -308,6 +466,25 @@ impl GuestMemoryExtension for GuestMemoryMmap { } }); } + + fn discard_range(&self, addr: GuestAddress, range_len: usize) -> Result<(), GuestMemoryError> { + let res = self.try_access( + range_len, + addr, + |_offset, len, caddr, region| -> Result { + region.discard_range(caddr, len) + }, + ); + + match res { + Ok(count) if count == range_len => Ok(()), + Ok(count) => Err(GuestMemoryError::PartialBuffer { + expected: range_len, + completed: count, + }), + Err(e) => Err(e), + } + } } fn create_memfd( @@ -345,14 +522,26 @@ mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::collections::HashMap; - use std::io::{Read, Seek}; + use std::io::{Read, Seek, Write}; use vmm_sys_util::tempfile::TempFile; use super::*; use crate::snapshot::Snapshot; + use crate::test_utils::single_region_mem; use crate::utils::{get_page_size, mib_to_bytes}; + fn into_region_ext(regions: Vec) -> GuestMemoryMmap { + GuestMemoryMmap::from_regions( + regions + .into_iter() + .zip(0u32..) // assign dummy slots + .map(|(region, slot)| GuestRegionMmapExt::dram_from_mmap_region(region, slot)) + .collect(), + ) + .unwrap() + } + #[test] fn test_anonymous() { for dirty_page_tracking in [true, false] { @@ -386,10 +575,8 @@ mod tests { (GuestAddress(region_size as u64), region_size), // pages 3-5 (GuestAddress(region_size as u64 * 2), region_size), // pages 6-8 ]; - let guest_memory = GuestMemoryMmap::from_regions( - anonymous(regions.into_iter(), true, HugePageConfig::None).unwrap(), - ) - .unwrap(); + let guest_memory = + into_region_ext(anonymous(regions.into_iter(), true, HugePageConfig::None).unwrap()); let dirty_map = [ // page 0: not dirty @@ -430,7 +617,7 @@ mod tests { } } - fn check_serde(guest_memory: &GuestMemoryMmap) { + fn check_serde(guest_memory: &M) { let mut snapshot_data = vec![0u8; 10000]; let original_state = guest_memory.describe(); Snapshot::new(&original_state) @@ -448,15 +635,14 @@ mod tests { let region_size = page_size * 3; // Test with a single region - let guest_memory = GuestMemoryMmap::from_regions( + let guest_memory = into_region_ext( anonymous( [(GuestAddress(0), region_size)].into_iter(), false, HugePageConfig::None, ) .unwrap(), - ) - .unwrap(); + ); check_serde(&guest_memory); // Test with some regions @@ -465,10 +651,8 @@ mod tests { (GuestAddress(region_size as u64), region_size), // pages 3-5 (GuestAddress(region_size as u64 * 2), region_size), // pages 6-8 ]; - let guest_memory = GuestMemoryMmap::from_regions( - anonymous(regions.into_iter(), true, HugePageConfig::None).unwrap(), - ) - .unwrap(); + let guest_memory = + into_region_ext(anonymous(regions.into_iter(), true, HugePageConfig::None).unwrap()); check_serde(&guest_memory); } @@ -481,20 +665,21 @@ mod tests { (GuestAddress(0), page_size), (GuestAddress(page_size as u64 * 2), page_size), ]; - let guest_memory = GuestMemoryMmap::from_regions( + let guest_memory = into_region_ext( anonymous(mem_regions.into_iter(), true, HugePageConfig::None).unwrap(), - ) - .unwrap(); + ); let expected_memory_state = GuestMemoryState { regions: vec![ GuestMemoryRegionState { base_address: 0, size: page_size, + region_type: GuestRegionType::Dram, }, GuestMemoryRegionState { base_address: page_size as u64 * 2, size: page_size, + region_type: GuestRegionType::Dram, }, ], }; @@ -507,20 +692,21 @@ mod tests { (GuestAddress(0), page_size * 3), (GuestAddress(page_size as u64 * 4), page_size * 3), ]; - let guest_memory = GuestMemoryMmap::from_regions( + let guest_memory = into_region_ext( anonymous(mem_regions.into_iter(), true, HugePageConfig::None).unwrap(), - ) - .unwrap(); + ); let expected_memory_state = GuestMemoryState { regions: vec![ GuestMemoryRegionState { base_address: 0, size: page_size * 3, + region_type: GuestRegionType::Dram, }, GuestMemoryRegionState { base_address: page_size as u64 * 4, size: page_size * 3, + region_type: GuestRegionType::Dram, }, ], }; @@ -541,10 +727,9 @@ mod tests { (region_1_address, region_size), (region_2_address, region_size), ]; - let guest_memory = GuestMemoryMmap::from_regions( + let guest_memory = into_region_ext( anonymous(mem_regions.into_iter(), true, HugePageConfig::None).unwrap(), - ) - .unwrap(); + ); // Check that Firecracker bitmap is clean. guest_memory.iter().for_each(|r| { assert!(!r.bitmap().dirty_at(0)); @@ -566,10 +751,8 @@ mod tests { let mut memory_file = TempFile::new().unwrap().into_file(); guest_memory.dump(&mut memory_file).unwrap(); - let restored_guest_memory = GuestMemoryMmap::from_regions( - snapshot_file(memory_file, memory_state.regions(), false).unwrap(), - ) - .unwrap(); + let restored_guest_memory = + into_region_ext(snapshot_file(memory_file, memory_state.regions(), false).unwrap()); // Check that the region contents are the same. let mut restored_region = vec![0u8; page_size * 2]; @@ -596,10 +779,9 @@ mod tests { (region_1_address, region_size), (region_2_address, region_size), ]; - let guest_memory = GuestMemoryMmap::from_regions( + let guest_memory = into_region_ext( anonymous(mem_regions.into_iter(), true, HugePageConfig::None).unwrap(), - ) - .unwrap(); + ); // Check that Firecracker bitmap is clean. guest_memory.iter().for_each(|r| { assert!(!r.bitmap().dirty_at(0)); @@ -628,10 +810,8 @@ mod tests { guest_memory.dump_dirty(&mut file, &dirty_bitmap).unwrap(); // We can restore from this because this is the first dirty dump. - let restored_guest_memory = GuestMemoryMmap::from_regions( - snapshot_file(file, memory_state.regions(), false).unwrap(), - ) - .unwrap(); + let restored_guest_memory = + into_region_ext(snapshot_file(file, memory_state.regions(), false).unwrap()); // Check that the region contents are the same. let mut restored_region = vec![0u8; region_size]; @@ -687,10 +867,9 @@ mod tests { (region_1_address, region_size), (region_2_address, region_size), ]; - let guest_memory = GuestMemoryMmap::from_regions( + let guest_memory = into_region_ext( anonymous(mem_regions.into_iter(), true, HugePageConfig::None).unwrap(), - ) - .unwrap(); + ); // Check that Firecracker bitmap is clean. guest_memory.iter().for_each(|r| { @@ -726,4 +905,106 @@ mod tests { seals.insert(memfd::FileSeal::SealGrow); memfd.add_seals(&seals).unwrap_err(); } + + /// This asserts that $lhs matches $rhs. + macro_rules! assert_match { + ($lhs:expr, $rhs:pat) => {{ assert!(matches!($lhs, $rhs)) }}; + } + + #[test] + fn test_discard_range() { + let page_size: usize = 0x1000; + let mem = single_region_mem(2 * page_size); + + // Fill the memory with ones. + let ones = vec![1u8; 2 * page_size]; + mem.write(&ones[..], GuestAddress(0)).unwrap(); + + // Remove the first page. + mem.discard_range(GuestAddress(0), page_size).unwrap(); + + // Check that the first page is zeroed. + let mut actual_page = vec![0u8; page_size]; + mem.read(actual_page.as_mut_slice(), GuestAddress(0)) + .unwrap(); + assert_eq!(vec![0u8; page_size], actual_page); + // Check that the second page still contains ones. + mem.read(actual_page.as_mut_slice(), GuestAddress(page_size as u64)) + .unwrap(); + assert_eq!(vec![1u8; page_size], actual_page); + + // Malformed range: the len is too big. + assert_match!( + mem.discard_range(GuestAddress(0), 0x10000).unwrap_err(), + GuestMemoryError::PartialBuffer { .. } + ); + + // Region not mapped. + assert_match!( + mem.discard_range(GuestAddress(0x10000), 0x10).unwrap_err(), + GuestMemoryError::InvalidGuestAddress(_) + ); + + // Madvise fail: the guest address is not aligned to the page size. + assert_match!( + mem.discard_range(GuestAddress(0x20), page_size) + .unwrap_err(), + GuestMemoryError::IOError(_) + ); + } + + #[test] + fn test_discard_range_on_file() { + let page_size: usize = 0x1000; + let mut memory_file = TempFile::new().unwrap().into_file(); + memory_file.set_len(2 * page_size as u64).unwrap(); + memory_file.write_all(&vec![2u8; 2 * page_size]).unwrap(); + let mem = into_region_ext( + snapshot_file( + memory_file, + std::iter::once((GuestAddress(0), 2 * page_size)), + false, + ) + .unwrap(), + ); + + // Fill the memory with ones. + let ones = vec![1u8; 2 * page_size]; + mem.write(&ones[..], GuestAddress(0)).unwrap(); + + // Remove the first page. + mem.discard_range(GuestAddress(0), page_size).unwrap(); + + // Check that the first page is zeroed. + let mut actual_page = vec![0u8; page_size]; + mem.read(actual_page.as_mut_slice(), GuestAddress(0)) + .unwrap(); + assert_eq!(vec![0u8; page_size], actual_page); + // Check that the second page still contains ones. + mem.read(actual_page.as_mut_slice(), GuestAddress(page_size as u64)) + .unwrap(); + assert_eq!(vec![1u8; page_size], actual_page); + + // Malformed range: the len is too big. + assert_match!( + mem.discard_range(GuestAddress(0), 0x10000).unwrap_err(), + GuestMemoryError::PartialBuffer { + expected: _, + completed: _ + } + ); + + // Region not mapped. + assert_match!( + mem.discard_range(GuestAddress(0x10000), 0x10).unwrap_err(), + GuestMemoryError::InvalidGuestAddress(_) + ); + + // Mmap fail: the guest address is not aligned to the page size. + assert_match!( + mem.discard_range(GuestAddress(0x20), page_size) + .unwrap_err(), + GuestMemoryError::IOError(_) + ); + } } diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 27d43176367..a3752eb5ae8 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -15,8 +15,8 @@ use std::sync::{Arc, Mutex, MutexGuard}; #[cfg(target_arch = "x86_64")] use kvm_bindings::KVM_IRQCHIP_IOAPIC; use kvm_bindings::{ - KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, KVM_MEM_LOG_DIRTY_PAGES, KVM_MSI_VALID_DEVID, - KvmIrqRouting, kvm_irq_routing_entry, kvm_userspace_memory_region, + KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, KVM_MSI_VALID_DEVID, KvmIrqRouting, + kvm_irq_routing_entry, kvm_userspace_memory_region, }; use kvm_ioctls::VmFd; use log::{debug, error}; @@ -36,7 +36,8 @@ use crate::snapshot::Persist; use crate::utils::u64_to_usize; use crate::vmm_config::snapshot::SnapshotType; use crate::vstate::memory::{ - Address, GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion, GuestRegionMmap, + GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion, GuestMemoryState, + GuestRegionMmap, GuestRegionMmapExt, MemoryError, }; use crate::vstate::resources::ResourceAllocator; use crate::vstate::vcpu::VcpuError; @@ -241,6 +242,8 @@ pub struct VmCommon { /// The KVM file descriptor used to access this Vm. pub fd: VmFd, max_memslots: u32, + /// Next free kvm slot + next_slot: u32, /// The guest memory of this Vm. pub guest_memory: GuestMemoryMmap, /// Interrupts used by Vm's devices @@ -275,7 +278,9 @@ pub enum VmError { /// Error calling mincore: {0} Mincore(vmm_sys_util::errno::Error), /// ResourceAllocator error: {0} - ResourceAllocator(#[from] vm_allocator::Error) + ResourceAllocator(#[from] vm_allocator::Error), + /// MemoryError error: {0} + MemoryError(#[from] MemoryError), } /// Contains Vm functions that are usable across CPU architectures @@ -321,6 +326,7 @@ impl Vm { Ok(VmCommon { fd, max_memslots: kvm.max_nr_memslots(), + next_slot: 0, guest_memory: GuestMemoryMmap::default(), interrupts: Mutex::new(HashMap::with_capacity(GSI_MSI_END as usize + 1)), resource_allocator: Mutex::new(ResourceAllocator::new()), @@ -348,53 +354,80 @@ impl Vm { Ok((vcpus, exit_evt)) } - /// Register a list of new memory regions to this [`Vm`]. - pub fn register_memory_regions( - &mut self, - regions: Vec, + /// Call set_user_memory_region on VM fd + pub(crate) fn set_user_memory_region( + &self, + region: kvm_userspace_memory_region, ) -> Result<(), VmError> { - for region in regions { - self.register_memory_region(region)? + // SAFETY: Safe because the fd is a valid KVM file descriptor. + unsafe { + self.fd() + .set_user_memory_region(region) + .map_err(VmError::SetUserMemoryRegion)?; } Ok(()) } - /// Register a new memory region to this [`Vm`]. - pub fn register_memory_region(&mut self, region: GuestRegionMmap) -> Result<(), VmError> { - let next_slot = self - .guest_memory() - .num_regions() - .try_into() - .expect("Number of existing memory regions exceeds u32::MAX"); - if self.common.max_memslots <= next_slot { - return Err(VmError::NotEnoughMemorySlots(self.common.max_memslots)); - } + /// Allocates num_slots consecutive slot IDs and returns the first one. + fn allocate_slot_ids(&mut self, num_slots: u32) -> Result { + let slot = self.common.next_slot; + self.common.next_slot += num_slots; - let flags = if region.bitmap().is_some() { - KVM_MEM_LOG_DIRTY_PAGES + if self.common.next_slot > self.common.max_memslots { + Err(VmError::NotEnoughMemorySlots(self.common.max_memslots)) } else { - 0 - }; + Ok(slot) + } + } - let memory_region = kvm_userspace_memory_region { - slot: next_slot, - guest_phys_addr: region.start_addr().raw_value(), - memory_size: region.len(), - userspace_addr: region.as_ptr() as u64, - flags, - }; + fn _register_memory_region(&mut self, region: Arc) -> Result<(), VmError> { + let new_guest_memory = self + .common + .guest_memory + .insert_region(Arc::clone(®ion))?; - let new_guest_memory = self.common.guest_memory.insert_region(Arc::new(region))?; + self.set_user_memory_region(region.kvm_userspace_memory_region())?; - // SAFETY: Safe because the fd is a valid KVM file descriptor. - unsafe { - self.fd() - .set_user_memory_region(memory_region) - .map_err(VmError::SetUserMemoryRegion)?; + self.common.guest_memory = new_guest_memory; + + Ok(()) + } + + /// Register a list of new memory regions to this [`Vm`]. + pub fn register_dram_memory_regions( + &mut self, + regions: Vec, + ) -> Result<(), VmError> { + for region in regions { + let arcd_region = Arc::new(GuestRegionMmapExt::dram_from_mmap_region( + region, + self.allocate_slot_ids(1)?, + )); + + self._register_memory_region(arcd_region)? } - self.common.guest_memory = new_guest_memory; + Ok(()) + } + + /// Register a list of new memory regions to this [`Vm`]. + /// + /// Note: regions and state.regions need to be in the same order. + pub fn restore_memory_regions( + &mut self, + regions: Vec, + state: &GuestMemoryState, + ) -> Result<(), VmError> { + for (region, state) in regions.into_iter().zip(state.regions.iter()) { + let arcd_region = Arc::new(GuestRegionMmapExt::from_state( + region, + state, + self.allocate_slot_ids(1)?, + )?); + + self._register_memory_region(arcd_region)? + } Ok(()) } @@ -419,28 +452,26 @@ impl Vm { /// Resets the KVM dirty bitmap for each of the guest's memory regions. pub fn reset_dirty_bitmap(&self) { - self.guest_memory() - .iter() - .zip(0u32..) - .for_each(|(region, slot)| { - let _ = self.fd().get_dirty_log(slot, u64_to_usize(region.len())); - }); + self.guest_memory().iter().for_each(|region| { + let _ = self + .fd() + .get_dirty_log(region.slot, u64_to_usize(region.len())); + }); } /// Retrieves the KVM dirty bitmap for each of the guest's memory regions. pub fn get_dirty_bitmap(&self) -> Result { self.guest_memory() .iter() - .zip(0u32..) - .map(|(region, slot)| { + .map(|region| { let bitmap = match region.bitmap() { Some(_) => self .fd() - .get_dirty_log(slot, u64_to_usize(region.len())) + .get_dirty_log(region.slot, u64_to_usize(region.len())) .map_err(VmError::GetDirtyLog)?, - None => mincore_bitmap(region)?, + None => mincore_bitmap(®ion.inner)?, }; - Ok((slot, bitmap)) + Ok((region.slot, bitmap)) }) .collect() } @@ -700,7 +731,7 @@ pub(crate) mod tests { pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm) { let (kvm, mut vm) = setup_vm(); let gm = single_region_mem_raw(mem_size); - vm.register_memory_regions(gm).unwrap(); + vm.register_dram_memory_regions(gm).unwrap(); (kvm, vm) } @@ -718,14 +749,14 @@ pub(crate) mod tests { // Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE // will result in error. let gm = single_region_mem_raw(0x10); - let res = vm.register_memory_regions(gm); + let res = vm.register_dram_memory_regions(gm); assert_eq!( res.unwrap_err().to_string(), "Cannot set the memory regions: Invalid argument (os error 22)" ); let gm = single_region_mem_raw(0x1000); - let res = vm.register_memory_regions(gm); + let res = vm.register_dram_memory_regions(gm); res.unwrap(); } @@ -760,7 +791,7 @@ pub(crate) mod tests { let region = GuestRegionMmap::new(region, GuestAddress(i as u64 * 0x1000)).unwrap(); - let res = vm.register_memory_region(region); + let res = vm.register_dram_memory_regions(vec![region]); if max_nr_regions <= i { assert!(