blk.rs0.00%
1
// Copyright 2024 Google LLC2
//3
// Licensed under the Apache License, Version 2.0 (the "License");4
// you may not use this file except in compliance with the License.5
// You may obtain a copy of the License at6
//7
// https://www.apache.org/licenses/LICENSE-2.08
//9
// Unless required by applicable law or agreed to in writing, software10
// distributed under the License is distributed on an "AS IS" BASIS,11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12
// See the License for the specific language governing permissions and13
// limitations under the License.14
15
use std::fs::{File, OpenOptions};16
use std::io::{IoSlice, IoSliceMut, Read, Write};17
#[cfg(target_os = "linux")]18
use std::os::fd::AsRawFd;19
use std::os::unix::fs::FileExt;20
use std::path::Path;21
use std::sync::Arc;22
use std::sync::mpsc::Receiver;23
use std::thread::JoinHandle;24
25
#[cfg(target_os = "linux")]26
use io_uring::cqueue::Entry as Cqe;27
#[cfg(target_os = "linux")]28
use io_uring::opcode;29
#[cfg(target_os = "linux")]30
use io_uring::types::Fd;31
use mio::Registry;32
use mio::event::Event;33
use serde::Deserialize;34
use serde_aco::Help;35
use snafu::ResultExt;36
use zerocopy::{FromBytes, FromZeros, Immutable, IntoBytes};37
38
use crate::hv::IoeventFd;39
use crate::mem::mapped::RamBus;40
use crate::sync::notifier::Notifier;41
use crate::virtio::dev::{DevParam, Virtio, WakeEvent};42
use crate::virtio::queue::{DescChain, QueueReg, Status as QStatus, VirtQueue};43
use crate::virtio::worker::WorkerApi;44
#[cfg(target_os = "linux")]45
use crate::virtio::worker::io_uring::{ActiveIoUring, BufferAction, IoUring, VirtioIoUring};46
use crate::virtio::worker::mio::{ActiveMio, Mio, VirtioMio};47
use crate::virtio::{DeviceId, FEATURE_BUILT_IN, IrqSender, Result, error};48
use crate::{bitflags, consts, impl_mmio_for_zerocopy};49
50
consts! {51
#[derive(FromBytes)]52
pub struct RequestType(u32) {53
IN = 0;54
OUT = 1;55
FLUSH = 4;56
GET_ID = 8;57
GET_LIFETIME = 10;58
DISCARD = 11;59
WRITE_ZEROES = 13;60
SECURE_ERASE = 14;61
}62
}63
64
consts! {65
#[derive(FromBytes)]66
pub struct Status(u8) {67
OK = 0;68
IOERR = 1;69
UNSUPP = 2;70
}71
}72
73
#[repr(C)]74
#[derive(Debug, FromBytes)]75
pub struct Request {76
type_: RequestType,77
reserved: u32,78
sector: u64,79
}80
81
pub const VIRTIO_BLK_ID_SIZE: usize = 20;82
83
const SECTOR_SIZE: usize = 1 << 9;84
85
bitflags! {86
pub struct BlockFeature(u128) {87
SIZE_MAX = 1 << 1;88
SEG_MAX = 1 << 2;89
GEOMETRY = 1 << 4;90
RO = 1 << 5;91
BLK_SIZE = 1 << 6;92
FLUSH = 1 << 9;93
TOPOLOGY = 1 << 10;94
CONFIG_WCE = 1 << 11;95
MQ = 1 << 12;96
DISCARD = 1 << 13;97
WRITE_ZEROS = 1 << 14;98
LIFETIME = 1 << 15;99
SECURE_ERASE = 1 << 16;100
}101
}102
103
#[derive(Debug, Default, FromZeros, Immutable, IntoBytes)]104
#[repr(C)]105
pub struct BlockConfig {106
capacity: u64,107
size_max: u32,108
seg_max: u32,109
110
// geometry111
cylinders: u16,112
heads: u8,113
sectors: u8,114
115
blk_size: u32,116
117
// topology118
physical_block_exp: u8,119
alignment_offset: u8,120
min_io_size: u16,121
opt_io_size: u32,122
123
writeback: u8,124
unused0: u8,125
num_queues: u16,126
max_discard_sectors: u32,127
max_discard_seg: u32,128
discard_sector_alignment: u32,129
max_write_zeroes_sectors: u32,130
max_write_zeroes_seg: u32,131
write_zeroes_may_unmap: u8,132
_unused1: [u8; 3],133
max_secure_erase_sectors: u32,134
max_secure_erase_seg: u32,135
secure_erase_sector_alignment: u32,136
}137
impl_mmio_for_zerocopy!(BlockConfig);138
139
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Help)]140
pub struct BlkFileParam {141
/// Path to a raw-formatted disk image.142
pub path: Box<Path>,143
/// Set the device as readonly. [default: false]144
#[serde(default)]145
pub readonly: bool,146
/// System API for asynchronous IO.147
#[serde(default)]148
pub api: WorkerApi,149
}150
151
impl DevParam for BlkFileParam {152
type Device = Block;153
154
fn build(self, name: impl Into<Arc<str>>) -> Result<Block> {155
Block::new(self, name)156
}157
}158
159
enum BlkRequest<'d, 'm> {160
Done {161
written: u32,162
},163
In {164
data: &'d mut IoSliceMut<'m>,165
offset: u64,166
status: &'d mut u8,167
},168
Out {169
data: &'d IoSlice<'m>,170
offset: u64,171
status: &'d mut u8,172
},173
Flush {174
status: &'d mut u8,175
},176
}177
178
#[derive(Debug)]179
pub struct Block {180
name: Arc<str>,181
config: Arc<BlockConfig>,182
disk: File,183
feature: BlockFeature,184
api: WorkerApi,185
}186
187
impl Block {188
pub fn new(param: BlkFileParam, name: impl Into<Arc<str>>) -> Result<Self> {189
let access_disk = error::AccessFile {190
path: param.path.as_ref(),191
};192
let disk = OpenOptions::new()193
.read(true)194
.write(!param.readonly)195
.open(¶m.path)196
.context(access_disk)?;197
198
let ctx_lock = error::LockFile {199
path: param.path.as_ref(),200
};201
if param.readonly {202
disk.try_lock_shared().context(ctx_lock)203
} else {204
disk.try_lock().context(ctx_lock)205
}?;206
207
let len = disk.metadata().context(access_disk)?.len();208
let config = BlockConfig {209
capacity: len / SECTOR_SIZE as u64,210
num_queues: 1,211
..Default::default()212
};213
let config = Arc::new(config);214
let mut feature = BlockFeature::FLUSH;215
if param.readonly {216
feature |= BlockFeature::RO;217
}218
Ok(Block {219
name: name.into(),220
disk,221
config,222
feature,223
api: param.api,224
})225
}226
227
fn handle_desc<'d, 'm>(&self, desc: &'d mut DescChain<'m>) -> Result<BlkRequest<'d, 'm>> {228
let [hdr, data_out @ ..] = &desc.readable[..] else {229
return error::InvalidBuffer.fail();230
};231
let Ok(request) = Request::read_from_bytes(hdr) else {232
return error::InvalidBuffer.fail();233
};234
let [data_in @ .., status_buf] = &mut desc.writable[..] else {235
return error::InvalidBuffer.fail();236
};237
let [status] = &mut status_buf[..] else {238
return error::InvalidBuffer.fail();239
};240
let offset = request.sector * SECTOR_SIZE as u64;241
match request.type_ {242
RequestType::IN => {243
let [data] = data_in else {244
return error::InvalidBuffer.fail();245
};246
Ok(BlkRequest::In {247
data,248
offset,249
status,250
})251
}252
RequestType::OUT => {253
if self.feature.contains(BlockFeature::RO) {254
log::error!("{}: attempt to write to a read-only device", self.name);255
*status = Status::IOERR.into();256
return Ok(BlkRequest::Done { written: 1 });257
}258
let [data] = data_out else {259
return error::InvalidBuffer.fail();260
};261
Ok(BlkRequest::Out {262
data,263
offset,264
status,265
})266
}267
RequestType::FLUSH => Ok(BlkRequest::Flush { status }),268
RequestType::GET_ID => {269
let mut name_bytes = self.name.as_bytes();270
let count = name_bytes.read_vectored(data_in)? as u32;271
*status = Status::OK.into();272
Ok(BlkRequest::Done { written: 1 + count })273
}274
unknown => {275
log::error!("{}: unimplemented op: {unknown:#x?}", self.name);276
*status = Status::UNSUPP.into();277
Ok(BlkRequest::Done { written: 1 })278
}279
}280
}281
}282
283
impl Virtio for Block {284
type Config = BlockConfig;285
type Feature = BlockFeature;286
287
fn id(&self) -> DeviceId {288
DeviceId::BLOCK289
}290
291
fn name(&self) -> &str {292
&self.name293
}294
295
fn num_queues(&self) -> u16 {296
self.config.num_queues297
}298
299
fn config(&self) -> Arc<BlockConfig> {300
self.config.clone()301
}302
303
fn feature(&self) -> u128 {304
self.feature.bits() | FEATURE_BUILT_IN305
}306
307
fn spawn_worker<S, E>(308
self,309
event_rx: Receiver<WakeEvent<S, E>>,310
memory: Arc<RamBus>,311
queue_regs: Arc<[QueueReg]>,312
) -> Result<(JoinHandle<()>, Arc<Notifier>)>313
where314
S: IrqSender,315
E: IoeventFd,316
{317
match self.api {318
#[cfg(target_os = "linux")]319
WorkerApi::IoUring => IoUring::spawn_worker(self, event_rx, memory, queue_regs),320
WorkerApi::Mio => Mio::spawn_worker(self, event_rx, memory, queue_regs),321
}322
}323
}324
325
impl VirtioMio for Block {326
fn reset(&mut self, _registry: &Registry) {}327
328
fn activate<'m, Q, S, E>(329
&mut self,330
_feature: u128,331
_active_mio: &mut ActiveMio<'_, '_, 'm, Q, S, E>,332
) -> Result<()>333
where334
Q: VirtQueue<'m>,335
S: IrqSender,336
E: IoeventFd,337
{338
Ok(())339
}340
341
fn handle_event<'a, 'm, Q, S, E>(342
&mut self,343
_event: &Event,344
_active_mio: &mut ActiveMio<'_, '_, 'm, Q, S, E>,345
) -> Result<()>346
where347
Q: VirtQueue<'m>,348
S: IrqSender,349
E: IoeventFd,350
{351
Ok(())352
}353
354
fn handle_queue<'m, Q, S, E>(355
&mut self,356
index: u16,357
active_mio: &mut ActiveMio<'_, '_, 'm, Q, S, E>,358
) -> Result<()>359
where360
Q: VirtQueue<'m>,361
S: IrqSender,362
E: IoeventFd,363
{364
let Some(Some(queue)) = active_mio.queues.get_mut(index as usize) else {365
log::error!("{}: invalid queue index {index}", self.name);366
return Ok(());367
};368
let mut disk = &self.disk;369
queue.handle_desc(index, active_mio.irq_sender, |chain| {370
let written_len = match Block::handle_desc(self, chain) {371
Err(e) => {372
log::error!("{}: handle descriptor: {e}", self.name);373
0374
}375
Ok(BlkRequest::Done { written }) => written,376
Ok(BlkRequest::In {377
data,378
offset,379
status,380
}) => match disk.read_exact_at(data, offset) {381
Ok(_) => {382
*status = Status::OK.into();383
data.len() as u32 + 1384
}385
Err(e) => {386
log::error!("{}: read: {e}", self.name);387
*status = Status::IOERR.into();388
1389
}390
},391
Ok(BlkRequest::Out {392
data,393
offset,394
status,395
}) => {396
match disk.write_all_at(data, offset) {397
Ok(_) => *status = Status::OK.into(),398
Err(e) => {399
log::error!("{}: write: {e}", self.name);400
*status = Status::IOERR.into();401
}402
}403
1404
}405
Ok(BlkRequest::Flush { status }) => {406
match disk.flush() {407
Ok(_) => *status = Status::OK.into(),408
Err(e) => {409
log::error!("{}: flush: {e}", self.name);410
*status = Status::IOERR.into();411
}412
}413
1414
}415
};416
Ok(QStatus::Done { len: written_len })417
})418
}419
}420
421
#[cfg(target_os = "linux")]422
impl VirtioIoUring for Block {423
fn activate<'m, Q, S, E>(424
&mut self,425
_feature: u128,426
_ring: &mut ActiveIoUring<'_, '_, 'm, Q, S, E>,427
) -> Result<()>428
where429
S: IrqSender,430
Q: VirtQueue<'m>,431
E: IoeventFd,432
{433
Ok(())434
}435
436
fn handle_desc(&mut self, _q_index: u16, chain: &mut DescChain) -> Result<BufferAction> {437
let fd = Fd(self.disk.as_raw_fd());438
let action = match Block::handle_desc(self, chain)? {439
BlkRequest::Done { written } => BufferAction::Written(written),440
BlkRequest::In { data, offset, .. } => {441
let read = opcode::Read::new(fd, data.as_mut_ptr(), data.len() as u32)442
.offset(offset)443
.build();444
BufferAction::Sqe(read)445
}446
BlkRequest::Out { data, offset, .. } => {447
let write = opcode::Write::new(fd, data.as_ptr(), data.len() as u32)448
.offset(offset)449
.build();450
BufferAction::Sqe(write)451
}452
BlkRequest::Flush { .. } => {453
let flush = opcode::Fsync::new(fd).build();454
BufferAction::Sqe(flush)455
}456
};457
Ok(action)458
}459
460
fn complete_desc(&mut self, q_index: u16, chain: &mut DescChain, cqe: &Cqe) -> Result<u32> {461
let result = cqe.result();462
let status_code = if result >= 0 {463
Status::OK464
} else {465
let err = std::io::Error::from_raw_os_error(-result);466
log::error!("{}: queue-{q_index} io error: {err}", self.name,);467
Status::IOERR468
};469
match Block::handle_desc(self, chain)? {470
BlkRequest::Done { .. } => unreachable!(),471
BlkRequest::Flush { status } => {472
*status = status_code.into();473
Ok(1)474
}475
BlkRequest::In { data, status, .. } => {476
*status = status_code.into();477
Ok(data.len() as u32 + 1)478
}479
BlkRequest::Out { status, .. } => {480
*status = status_code.into();481
Ok(1)482
}483
}484
}485
}486