Alioth Code Coverage

board.rs8.67%

1// Copyright 2024 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#[cfg(target_arch = "aarch64")]
16#[path = "board_aarch64.rs"]
17mod aarch64;
18#[cfg(target_arch = "x86_64")]
19#[path = "board_x86_64/board_x86_64.rs"]
20mod x86_64;
21
22#[cfg(target_os = "linux")]
23use std::collections::HashMap;
24use std::ffi::CStr;
25use std::sync::Arc;
26use std::sync::mpsc::Sender;
27use std::thread::JoinHandle;
28
29use libc::{MAP_PRIVATE, MAP_SHARED};
30use parking_lot::{Condvar, Mutex, RwLock, RwLockReadGuard};
31use serde::Deserialize;
32use serde_aco::Help;
33use snafu::{ResultExt, Snafu};
34
35#[cfg(target_arch = "x86_64")]
36use crate::arch::cpuid::CpuidIn;
37#[cfg(target_arch = "x86_64")]
38use crate::arch::layout::PORT_PCI_ADDRESS;
39use crate::arch::layout::{
40 MEM_64_START, PCIE_CONFIG_START, PCIE_MMIO_32_NON_PREFETCHABLE_END,
41 PCIE_MMIO_32_NON_PREFETCHABLE_START, PCIE_MMIO_32_PREFETCHABLE_END,
42 PCIE_MMIO_32_PREFETCHABLE_START, RAM_32_SIZE,
43};
44use crate::device::MmioDev;
45#[cfg(target_arch = "x86_64")]
46use crate::device::fw_cfg::FwCfg;
47use crate::errors::{DebugTrace, trace_error};
48use crate::hv::{Coco, Hypervisor, Vcpu, Vm, VmConfig, VmEntry, VmExit};
49#[cfg(target_arch = "x86_64")]
50use crate::loader::xen;
51use crate::loader::{Executable, InitState, Payload, linux};
52use crate::mem::mapped::ArcMemPages;
53use crate::mem::{MemBackend, MemConfig, MemRegion, MemRegionType, Memory};
54use crate::pci::bus::PciBus;
55#[cfg(target_os = "linux")]
56use crate::vfio::container::Container;
57#[cfg(target_os = "linux")]
58use crate::vfio::iommu::Ioas;
59
60#[cfg(target_arch = "aarch64")]
61use self::aarch64::ArchBoard;
62#[cfg(target_arch = "x86_64")]
63use self::x86_64::ArchBoard;
64
65#[trace_error]
66#[derive(Snafu, DebugTrace)]
67#[snafu(module, context(suffix(false)))]
68pub enum Error {
69 #[snafu(display("Hypervisor internal error"), context(false))]
70 HvError { source: Box<crate::hv::Error> },
71 #[snafu(display("Failed to access guest memory"), context(false))]
72 Memory { source: Box<crate::mem::Error> },
73 #[snafu(display("Failed to load payload"), context(false))]
74 Loader { source: Box<crate::loader::Error> },
75 #[snafu(display("Invalid CPU topology"))]
76 InvalidCpuTopology,
77 #[snafu(display("Failed to create VCPU-{index}"))]
78 CreateVcpu {
79 index: u16,
80 source: Box<crate::hv::Error>,
81 },
82 #[snafu(display("Failed to run VCPU-{index}"))]
83 RunVcpu {
84 index: u16,
85 source: Box<crate::hv::Error>,
86 },
87 #[snafu(display("Failed to stop VCPU-{index}"))]
88 StopVcpu {
89 index: u16,
90 source: Box<crate::hv::Error>,
91 },
92 #[snafu(display("Failed to reset PCI devices"))]
93 ResetPci { source: Box<crate::pci::Error> },
94 #[snafu(display("Failed to configure firmware"))]
95 FwCfg { error: std::io::Error },
96 #[snafu(display("Missing payload"))]
97 MissingPayload,
98 #[snafu(display("Failed to notify the VMM thread"))]
99 NotifyVmm,
100 #[snafu(display("Another VCPU thread has signaled failure"))]
101 PeerFailure,
102 #[snafu(display("Unexpected state: {state:?}, want {want:?}"))]
103 UnexpectedState { state: BoardState, want: BoardState },
104 #[cfg(target_arch = "x86_64")]
105 #[snafu(display("Missing CPUID leaf {leaf:x?}"))]
106 MissingCpuid { leaf: CpuidIn },
107 #[snafu(display("Firmware error"), context(false))]
108 Firmware { source: Box<crate::firmware::Error> },
109 #[snafu(display("Unknown firmware metadata"))]
110 UnknownFirmwareMetadata,
111}
112
113type Result<T, E = Error> = std::result::Result<T, E>;
114
115#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Deserialize, Help)]
116pub struct CpuTopology {
117 #[serde(default)]
118 /// Enable SMT (Hyperthreading).
119 pub smt: bool,
120 #[serde(default)]
121 /// Number of cores per socket.
122 pub cores: u16,
123 #[serde(default)]
124 /// Number of sockets.
125 pub sockets: u8,
126}
127
128impl CpuTopology {
129 pub fn encode(&self, index: u16) -> (u8, u16, u8) {42x
130 let total_cores = self.cores * self.sockets as u16;42x
131 let thread_id = index / total_cores;42x
132 let core_id = index % total_cores % self.cores;42x
133 let socket_id = index % total_cores / self.cores;42x
134 (socket_id as u8, core_id, thread_id as u8)42x
135 }42x
136
137 pub fn decode(&self, socket_id: u8, core_id: u16, thread_id: u8) -> u16 {27x
138 let total_cores = self.cores * self.sockets as u16;27x
139 thread_id as u16 * total_cores + core_id + socket_id as u16 * self.cores27x
140 }27x
141}
142
143const fn default_cpu_count() -> u16 {
144 1
145}
146
147#[derive(Debug, Default, PartialEq, Eq, Deserialize, Help)]
148pub struct CpuConfig {
149 /// Number of VCPUs assigned to the guest. [default: 1]
150 #[serde(default = "default_cpu_count")]
151 pub count: u16,
152 /// Architecture specific CPU topology.
153 #[serde(default)]
154 pub topology: CpuTopology,
155}
156
157impl CpuConfig {
158 pub fn fixup(&mut self) -> Result<()> {6x
159 if self.topology.sockets == 0 {6x
160 self.topology.sockets = 1;3x
161 }3x
162 let vcpus_per_core = 1 + self.topology.smt as u16;6x
163 if self.topology.cores == 0 {6x
164 self.topology.cores = self.count / self.topology.sockets as u16 / vcpus_per_core;3x
165 }3x
166 let vcpus_per_socket = self.topology.cores * vcpus_per_core;6x
167 let count = self.topology.sockets as u16 * vcpus_per_socket;6x
168 if count != self.count {6x
169 return error::InvalidCpuTopology.fail();3x
170 }3x
171 Ok(())3x
172 }6x
173}
174
175#[derive(Debug, Clone, Copy, PartialEq, Eq)]
176pub enum BoardState {
177 Paused,
178 Running,
179 Shutdown,
180 RebootPending,
181}
182
183#[derive(Debug)]
184struct MpSync {
185 state: BoardState,
186 fatal: bool,
187 count: u16,
188}
189
190pub const PCIE_MMIO_64_SIZE: u64 = 1 << 40;
191
192#[derive(Debug, Default, PartialEq, Eq, Deserialize)]
193pub struct BoardConfig {
194 pub mem: MemConfig,
195 pub cpu: CpuConfig,
196 pub coco: Option<Coco>,
197}
198
199impl BoardConfig {
200 pub fn pcie_mmio_64_start(&self) -> u64 {
201 (self.mem.size.saturating_sub(RAM_32_SIZE) + MEM_64_START).next_power_of_two()
202 }
203
204 pub fn config_fixup(&mut self) -> Result<()> {
205 self.cpu.fixup()
206 }
207}
208
209type VcpuGuard<'a> = RwLockReadGuard<'a, Vec<VcpuHandle>>;
210type VcpuHandle = JoinHandle<Result<()>>;
211
212pub struct Board<V>
213where
214 V: Vm,
215{
216 pub vm: V,
217 pub memory: Memory,
218 pub vcpus: Arc<RwLock<Vec<VcpuHandle>>>,
219 pub arch: ArchBoard<V>,
220 pub config: BoardConfig,
221 pub payload: RwLock<Option<Payload>>,
222 pub io_devs: RwLock<Vec<(u16, Arc<dyn MmioDev>)>>,
223 pub mmio_devs: RwLock<Vec<(u64, Arc<dyn MmioDev>)>>,
224 pub pci_bus: PciBus,
225 #[cfg(target_arch = "x86_64")]
226 pub fw_cfg: Mutex<Option<Arc<Mutex<FwCfg>>>>,
227 #[cfg(target_os = "linux")]
228 pub vfio_ioases: Mutex<HashMap<Box<str>, Arc<Ioas>>>,
229 #[cfg(target_os = "linux")]
230 pub vfio_containers: Mutex<HashMap<Box<str>, Arc<Container>>>,
231
232 mp_sync: Mutex<MpSync>,
233 cond_var: Condvar,
234}
235
236impl<V> Board<V>
237where
238 V: Vm,
239{
240 pub fn new<H>(hv: &H, mut config: BoardConfig) -> Result<Self>
241 where
242 H: Hypervisor<Vm = V>,
243 {
244 config.config_fixup()?;
245
246 let vm_config = VmConfig {
247 coco: config.coco.clone(),
248 };
249 let mut vm = hv.create_vm(&vm_config)?;
250 let vm_memory = Arc::new(vm.create_vm_memory()?);
251 let arch = ArchBoard::new(hv, &vm, &config)?;
252
253 let board = Board {
254 vm,
255 memory: Memory::new(vm_memory.clone()),
256 arch,
257 config,
258 payload: RwLock::new(None),
259 vcpus: Arc::new(RwLock::new(Vec::new())),
260 io_devs: RwLock::new(Vec::new()),
261 mmio_devs: RwLock::new(Vec::new()),
262 pci_bus: PciBus::new(),
263 #[cfg(target_arch = "x86_64")]
264 fw_cfg: Mutex::new(None),
265 #[cfg(target_os = "linux")]
266 vfio_ioases: Mutex::new(HashMap::new()),
267 #[cfg(target_os = "linux")]
268 vfio_containers: Mutex::new(HashMap::new()),
269
270 mp_sync: Mutex::new(MpSync {
271 state: BoardState::Paused,
272 count: 0,
273 fatal: false,
274 }),
275 cond_var: Condvar::new(),
276 };
277
278 board.coco_init(vm_memory)?;
279
280 Ok(board)
281 }
282
283 pub fn boot(&self) -> Result<()> {
284 self.resume()
285 }
286
287 pub fn resume(&self) -> Result<()> {
288 let mut mp_sync = self.mp_sync.lock();
289 if mp_sync.state == BoardState::Paused {
290 mp_sync.state = BoardState::Running;
291 } else {
292 return error::UnexpectedState {
293 state: mp_sync.state,
294 want: BoardState::Paused,
295 }
296 .fail();
297 }
298 self.cond_var.notify_all();
299 Ok(())
300 }
301
302 pub fn pause(&self) -> Result<()> {
303 let vcpus = self.vcpus.read();
304 let mut mp_sync = self.mp_sync.lock();
305 if mp_sync.state != BoardState::Running {
306 return error::UnexpectedState {
307 state: mp_sync.state,
308 want: BoardState::Running,
309 }
310 .fail();
311 }
312 mp_sync.state = BoardState::Paused;
313 self.stop_other_vcpus(None, &vcpus)?;
314 Ok(())
315 }
316
317 fn load_payload(&self, vcpu: &mut V::Vcpu) -> Result<InitState, Error> {
318 let payload = self.payload.read();
319 let Some(payload) = payload.as_ref() else {
320 return error::MissingPayload.fail();
321 };
322
323 if let Some(fw) = payload.firmware.as_ref() {
324 return self.setup_firmware(fw, payload, vcpu);
325 }
326
327 let Some(exec) = &payload.executable else {
328 return error::MissingPayload.fail();
329 };
330 let mem_regions = self.memory.mem_region_entries();
331 let init_state = match exec {
332 Executable::Linux(image) => linux::load(
333 &self.memory.ram_bus(),
334 &mem_regions,
335 image.as_ref(),
336 payload.cmdline.as_deref(),
337 payload.initramfs.as_deref(),
338 ),
339 #[cfg(target_arch = "x86_64")]
340 Executable::Pvh(image) => xen::load(
341 &self.memory.ram_bus(),
342 &mem_regions,
343 image.as_ref(),
344 payload.cmdline.as_deref(),
345 payload.initramfs.as_deref(),
346 ),
347 }?;
348 Ok(init_state)
349 }
350
351 fn add_pci_devs(&self) -> Result<()> {
352 #[cfg(target_arch = "x86_64")]
353 self.memory
354 .add_io_dev(PORT_PCI_ADDRESS, self.pci_bus.io_bus.clone())?;
355 self.memory.add_region(
356 PCIE_CONFIG_START,
357 Arc::new(MemRegion::with_emulated(
358 self.pci_bus.segment.clone(),
359 MemRegionType::Reserved,
360 )),
361 )?;
362 let pcie_mmio_64_start = self.config.pcie_mmio_64_start();
363 self.pci_bus.segment.assign_resources(&[
364 (0x1000, 0x10000),
365 (
366 PCIE_MMIO_32_NON_PREFETCHABLE_START,
367 PCIE_MMIO_32_NON_PREFETCHABLE_END,
368 ),
369 (
370 PCIE_MMIO_32_PREFETCHABLE_START,
371 PCIE_MMIO_32_PREFETCHABLE_END,
372 ),
373 (pcie_mmio_64_start, pcie_mmio_64_start + PCIE_MMIO_64_SIZE),
374 ]);
375 Ok(())
376 }
377
378 fn vcpu_loop(&self, vcpu: &mut <V as Vm>::Vcpu, index: u16) -> Result<BoardState> {
379 let mut vm_entry = VmEntry::None;
380 loop {
381 let vm_exit = vcpu.run(vm_entry).context(error::RunVcpu { index })?;
382 vm_entry = match vm_exit {
383 #[cfg(target_arch = "x86_64")]
384 VmExit::Io { port, write, size } => self.memory.handle_io(port, write, size)?,
385 VmExit::Mmio { addr, write, size } => self.memory.handle_mmio(addr, write, size)?,
386 VmExit::Shutdown => break Ok(BoardState::Shutdown),
387 VmExit::Reboot => break Ok(BoardState::RebootPending),
388 VmExit::Paused => break Ok(BoardState::Paused),
389 VmExit::Interrupted => {
390 let mp_sync = self.mp_sync.lock();
391 match mp_sync.state {
392 BoardState::Shutdown => VmEntry::Shutdown,
393 BoardState::RebootPending => VmEntry::Reboot,
394 BoardState::Paused => VmEntry::Pause,
395 BoardState::Running => VmEntry::None,
396 }
397 }
398 VmExit::ConvertMemory { gpa, size, private } => {
399 self.memory.mark_private_memory(gpa, size, private)?;
400 VmEntry::None
401 }
402 };
403 }
404 }
405
406 fn sync_vcpus(&self, vcpus: &VcpuGuard) -> Result<()> {
407 let mut mp_sync = self.mp_sync.lock();
408 if mp_sync.fatal {
409 return error::PeerFailure.fail();
410 }
411
412 mp_sync.count += 1;
413 if mp_sync.count == vcpus.len() as u16 {
414 mp_sync.count = 0;
415 self.cond_var.notify_all();
416 } else {
417 self.cond_var.wait(&mut mp_sync)
418 }
419
420 if mp_sync.fatal {
421 return error::PeerFailure.fail();
422 }
423
424 Ok(())
425 }
426
427 fn notify_vmm(&self, index: u16, event_tx: &Sender<u16>) -> Result<()> {
428 if event_tx.send(index).is_err() {
429 error::NotifyVmm.fail()
430 } else {
431 Ok(())
432 }
433 }
434
435 fn boot_init_sync(&self, index: u16, vcpu: &mut V::Vcpu) -> Result<()> {
436 let vcpus = self.vcpus.read();
437 if index == 0 {
438 self.create_ram()?;
439 for (port, dev) in self.io_devs.read().iter() {
440 self.memory.add_io_dev(*port, dev.clone())?;
441 }
442 for (addr, dev) in self.mmio_devs.read().iter() {
443 self.memory.add_mmio_dev(*addr, dev.clone())?;
444 }
445 self.add_pci_devs()?;
446 let init_state = self.load_payload(vcpu)?;
447 self.init_boot_vcpu(vcpu, &init_state)?;
448 self.create_firmware_data(&init_state)?;
449 }
450 self.init_ap(index, vcpu, &vcpus)?;
451 self.coco_finalize(index, &vcpus)?;
452 self.sync_vcpus(&vcpus)
453 }
454
455 fn stop_other_vcpus(&self, current: Option<u16>, vcpus: &VcpuGuard) -> Result<()> {
456 for (index, handle) in vcpus.iter().enumerate() {
457 let index = index as u16;
458 if let Some(current) = current {
459 if current == index {
460 continue;
461 }
462 log::info!("VCPU-{current}: stopping VCPU-{index}");
463 } else {
464 log::info!("Stopping VCPU-{index}");
465 }
466 let identity = self.encode_cpu_identity(index);
467 self.vm
468 .stop_vcpu(identity, handle)
469 .context(error::StopVcpu { index })?;
470 }
471 Ok(())
472 }
473
474 fn run_vcpu_inner(&self, index: u16, event_tx: &Sender<u16>) -> Result<(), Error> {
475 let mut vcpu = self.create_vcpu(index)?;
476 self.notify_vmm(index, event_tx)?;
477 self.init_vcpu(index, &mut vcpu)?;
478
479 'reboot: loop {
480 let mut mp_sync = self.mp_sync.lock();
481 loop {
482 match mp_sync.state {
483 BoardState::Paused => self.cond_var.wait(&mut mp_sync),
484 BoardState::Running => break,
485 BoardState::Shutdown => break 'reboot Ok(()),
486 BoardState::RebootPending => mp_sync.state = BoardState::Running,
487 }
488 }
489 drop(mp_sync);
490
491 self.boot_init_sync(index, &mut vcpu)?;
492
493 let request = 'pause: loop {
494 let request = self.vcpu_loop(&mut vcpu, index);
495
496 let vcpus = self.vcpus.read();
497 let mut mp_sync = self.mp_sync.lock();
498 if mp_sync.state == BoardState::Running {
499 mp_sync.state = match request {
500 Ok(BoardState::RebootPending) => BoardState::RebootPending,
501 Ok(BoardState::Paused) => BoardState::Paused,
502 _ => BoardState::Shutdown,
503 };
504 log::trace!("VCPU-{index}: change state to {:?}", mp_sync.state);
505 self.stop_other_vcpus(Some(index), &vcpus)?;
506 }
507 loop {
508 match mp_sync.state {
509 BoardState::Running => break,
510 BoardState::Paused => self.cond_var.wait(&mut mp_sync),
511 BoardState::RebootPending | BoardState::Shutdown => break 'pause request,
512 }
513 }
514 };
515
516 if index == 0 {
517 self.pci_bus.segment.reset().context(error::ResetPci)?;
518 self.memory.reset()?;
519 }
520 self.reset_vcpu(index, &mut vcpu)?;
521
522 request?;
523
524 let vcpus = self.vcpus.read();
525 self.sync_vcpus(&vcpus)?;
526 }
527 }
528
529 fn create_vcpu(&self, index: u16) -> Result<V::Vcpu> {
530 let identity = self.encode_cpu_identity(index);
531 let vcpu = self
532 .vm
533 .create_vcpu(index, identity)
534 .context(error::CreateVcpu { index })?;
535 Ok(vcpu)
536 }
537
538 pub fn run_vcpu(&self, index: u16, event_tx: Sender<u16>) -> Result<(), Error> {
539 let ret = self.run_vcpu_inner(index, &event_tx);
540
541 let _ = self.notify_vmm(index, &event_tx);
542
543 if matches!(ret, Ok(_) | Err(Error::PeerFailure { .. })) {
544 return Ok(());
545 }
546
547 log::warn!("VCPU-{index} reported error {ret:?}, unblocking other VCPUs...");
548 let mut mp_sync = self.mp_sync.lock();
549 mp_sync.fatal = true;
550 if mp_sync.count > 0 {
551 self.cond_var.notify_all();
552 }
553 ret
554 }
555
556 fn create_ram_pages(
557 &self,
558 size: u64,
559 #[cfg_attr(not(target_os = "linux"), allow(unused_variables))] name: &CStr,
560 ) -> Result<ArcMemPages> {
561 let mmap_flag = if self.config.mem.shared {
562 Some(MAP_SHARED)
563 } else {
564 Some(MAP_PRIVATE)
565 };
566 let pages = match self.config.mem.backend {
567 #[cfg(target_os = "linux")]
568 MemBackend::Memfd => ArcMemPages::from_memfd(name, size as usize, None),
569 MemBackend::Anonymous => ArcMemPages::from_anonymous(size as usize, None, mmap_flag),
570 }?;
571 #[cfg(target_os = "linux")]
572 if self.config.mem.transparent_hugepage {
573 pages.madvise_hugepage()?;
574 }
575 Ok(pages)
576 }
577}
578
579#[cfg(test)]
580#[path = "board_test.rs"]
581mod tests;
582