Skip to content

Commit

Permalink
fix: Retry KVM_CREATE_VM on EINTR
Browse files Browse the repository at this point in the history
It is known that KVM_CREATE_VM fails with EINTR on heavily loaded
machines with many VMs. It might be a kernel bug but apparently has not
been fixed. To mitigate it, QEMU does an infinitely retry on EINTR.
Similar, do retries up to 5 times.

Signed-off-by: Takahiro Itazuri <[email protected]>
  • Loading branch information
zulinx86 committed Feb 21, 2025
1 parent 8757b06 commit f292393
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 2 deletions.
29 changes: 28 additions & 1 deletion src/vmm/src/vstate/vm/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,34 @@ pub enum ArchVmError {
impl ArchVm {
/// Create a new `Vm` struct.
pub fn new(kvm: &Kvm) -> Result<ArchVm, VmError> {
let fd = kvm.fd.create_vm().map_err(VmError::CreateVm)?;
// It is known that KVM_CREATE_VM spuriously fails with EINTR on heavily loaded machines
// with many VMs. It might be a kernel bug but apparently has not been fixed.
//
// To mitigate it, QEMU does an inifinite retry on EINTR:
// - https://github.com/qemu/qemu/commit/94ccff133820552a859c0fb95e33a539e0b90a75
// - https://github.com/qemu/qemu/commit/bbde13cd14ad4eec18529ce0bf5876058464e124
//
// Similarly, we do retries up to 5 times until the bug is fixed.
const MAX_ATTEMPTS: u8 = 5;
let mut attempts = 0;
let fd = loop {
match kvm.fd.create_vm() {
Ok(fd) => break fd,
Err(err) if err.errno() == libc::EINTR => {
attempts += 1;
info!("KVM_CREATE_VM returned EINTR: {attempts} of {MAX_ATTEMPTS} attempts");
if attempts == MAX_ATTEMPTS {
return Err(VmError::CreateVm(err));
}
// Exponential backoff (1us, 2us, 4us, 8us => 15ms in total)
std::thread::sleep(std::time::Duration::from_micros(
2u64.pow(attempts as u32 - 1),
));
}
Err(err) => return Err(VmError::CreateVm(err)),
}
};

Ok(ArchVm {
fd,
irqchip_handle: None,
Expand Down
30 changes: 29 additions & 1 deletion src/vmm/src/vstate/vm/x86_64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use kvm_ioctls::VmFd;
use serde::{Deserialize, Serialize};

use crate::arch::x86_64::msr::MsrError;
use crate::logger::info;
use crate::utils::u64_to_usize;
use crate::vstate::vm::VmError;

Expand Down Expand Up @@ -53,7 +54,34 @@ pub struct ArchVm {
impl ArchVm {
/// Create a new `Vm` struct.
pub fn new(kvm: &crate::vstate::kvm::Kvm) -> Result<ArchVm, VmError> {
let fd = kvm.fd.create_vm().map_err(VmError::CreateVm)?;
// It is known that KVM_CREATE_VM spuriously fails with EINTR on heavily loaded machines
// with many VMs. It might be a kernel bug but apparently has not been fixed.
//
// To mitigate it, QEMU does an inifinite retry on EINTR:
// - https://github.com/qemu/qemu/commit/94ccff133820552a859c0fb95e33a539e0b90a75
// - https://github.com/qemu/qemu/commit/bbde13cd14ad4eec18529ce0bf5876058464e124
//
// Similarly, we do retries up to 5 times until the bug is fixed.
const MAX_ATTEMPTS: u8 = 5;
let mut attempts = 0;
let fd = loop {
match kvm.fd.create_vm() {
Ok(fd) => break fd,
Err(err) if err.errno() == libc::EINTR => {
attempts += 1;
info!("KVM_CREATE_VM returned EINTR: {attempts} of {MAX_ATTEMPTS} attempts");
if attempts == MAX_ATTEMPTS {
return Err(VmError::CreateVm(err));
}
// Exponential backoff (1us, 2us, 4us, 8us => 15ms in total)
std::thread::sleep(std::time::Duration::from_micros(
2u64.pow(attempts as u32 - 1),
));
}
Err(err) => return Err(VmError::CreateVm(err)),
}
};

let msrs_to_save = kvm.msrs_to_save().map_err(ArchVmError::GetMsrsToSave)?;

fd.set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS))
Expand Down

0 comments on commit f292393

Please sign in to comment.