tap.rs0.00%
1
// Copyright 2024 Google LLC2
//3
// Licensed under the Apache License, Version 2.0 (the "License");4
// you may not use this file except in compliance with the License.5
// You may obtain a copy of the License at6
//7
// https://www.apache.org/licenses/LICENSE-2.08
//9
// Unless required by applicable law or agreed to in writing, software10
// distributed under the License is distributed on an "AS IS" BASIS,11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12
// See the License for the specific language governing permissions and13
// limitations under the License.14
15
use std::cmp::max;16
use std::fmt::Debug;17
use std::fs::{File, OpenOptions};18
use std::io::{ErrorKind, IoSlice};19
use std::mem::MaybeUninit;20
use std::os::fd::{AsFd, AsRawFd};21
use std::os::unix::prelude::OpenOptionsExt;22
use std::path::Path;23
use std::sync::Arc;24
use std::sync::mpsc::Receiver;25
use std::thread::JoinHandle;26
27
use io_uring::cqueue::Entry as Cqe;28
use io_uring::opcode;29
use io_uring::types::Fd;30
use libc::{IFF_MULTI_QUEUE, IFF_NO_PI, IFF_TAP, IFF_VNET_HDR, O_NONBLOCK};31
use mio::event::Event;32
use mio::unix::SourceFd;33
use mio::{Interest, Registry, Token};34
use serde::Deserialize;35
use serde_aco::Help;36
use zerocopy::{FromBytes, IntoBytes};37
38
use crate::device::net::MacAddr;39
use crate::hv::IoeventFd;40
use crate::mem::mapped::RamBus;41
use crate::sync::notifier::Notifier;42
use crate::sys::if_tun::{TunFeature, tun_set_iff, tun_set_offload, tun_set_vnet_hdr_sz};43
use crate::virtio::dev::net::{44
CtrlAck, CtrlClass, CtrlHdr, CtrlMq, CtrlMqParisSet, NetConfig, NetFeature, VirtioNetHdr,45
};46
use crate::virtio::dev::{DevParam, DeviceId, Result, Virtio, WakeEvent};47
use crate::virtio::queue::{48
DescChain, QueueReg, Status, VirtQueue, copy_from_reader, copy_to_writer,49
};50
use crate::virtio::worker::WorkerApi;51
use crate::virtio::worker::io_uring::{ActiveIoUring, BufferAction, IoUring, VirtioIoUring};52
use crate::virtio::worker::mio::{ActiveMio, Mio, VirtioMio};53
use crate::virtio::{FEATURE_BUILT_IN, IrqSender, error};54
55
#[derive(Debug)]56
pub struct Net {57
name: Arc<str>,58
config: Arc<NetConfig>,59
tap_sockets: Vec<File>,60
feature: NetFeature,61
driver_feature: NetFeature,62
dev_tap: Option<Box<Path>>,63
if_name: Option<String>,64
api: WorkerApi,65
}66
67
#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize, Help)]68
pub struct NetTapParam {69
/// MAC address of the virtual NIC, e.g. 06:3a:76:53:da:3d.70
pub mac: MacAddr,71
/// Maximum transmission unit.72
pub mtu: u16,73
/// Number of pairs of transmit/receive queues. [default: 1]74
#[serde(alias = "qp", default)]75
pub queue_pairs: u16,76
/// Path to the character device file of a tap interface.77
///78
/// Required for MacVTap and IPVTap, e.g. /dev/tapX.79
/// Optional for TUN/TAP. [default: /dev/net/tun]80
pub tap: Option<Box<Path>>,81
/// Name of a tap interface, e.g. tapX.82
///83
/// Required for TUN/TAP. Optional for MacVTap and IPVTap.84
#[serde(alias = "if")]85
pub if_name: Option<String>,86
/// System API for asynchronous IO.87
#[serde(default)]88
pub api: WorkerApi,89
}90
91
impl DevParam for NetTapParam {92
type Device = Net;93
94
fn build(self, name: impl Into<Arc<str>>) -> Result<Net> {95
Net::new(self, name)96
}97
}98
99
fn new_socket(dev_tap: Option<&Path>, blocking: bool) -> Result<File> {100
let tap_dev = dev_tap.unwrap_or(Path::new("/dev/net/tun"));101
let mut opt = OpenOptions::new();102
opt.read(true).write(true);103
if !blocking {104
opt.custom_flags(O_NONBLOCK);105
}106
let socket = opt.open(tap_dev)?;107
Ok(socket)108
}109
110
impl Net {111
pub fn new(param: NetTapParam, name: impl Into<Arc<str>>) -> Result<Self> {112
let mut socket = new_socket(113
param.tap.as_deref(),114
matches!(param.api, WorkerApi::IoUring),115
)?;116
let max_queue_pairs = max(param.queue_pairs, 1);117
setup_socket(&mut socket, param.if_name.as_deref(), max_queue_pairs > 1)?;118
let mut dev_feat = NetFeature::MAC119
| NetFeature::MTU120
| NetFeature::CSUM121
| NetFeature::HOST_TSO4122
| NetFeature::HOST_TSO6123
| NetFeature::HOST_ECN124
| NetFeature::HOST_UFO125
| NetFeature::HOST_USO126
| NetFeature::CTRL_VQ127
| detect_tap_offload(&socket);128
if max_queue_pairs > 1 {129
dev_feat |= NetFeature::MQ;130
}131
let net = Net {132
name: name.into(),133
config: Arc::new(NetConfig {134
mac: param.mac,135
max_queue_pairs,136
mtu: param.mtu,137
..Default::default()138
}),139
tap_sockets: vec![socket],140
feature: dev_feat,141
driver_feature: NetFeature::empty(),142
dev_tap: param.tap,143
if_name: param.if_name,144
api: param.api,145
};146
Ok(net)147
}148
149
fn handle_ctrl_queue(150
&mut self,151
desc: &mut DescChain,152
registry: Option<&Registry>,153
) -> Result<u32> {154
let Some(header) = desc155
.readable156
.first()157
.and_then(|b| CtrlHdr::read_from_bytes(b).ok())158
else {159
return error::InvalidBuffer.fail();160
};161
let Some(ack_byte) = desc.writable.first_mut().and_then(|v| v.first_mut()) else {162
return error::InvalidBuffer.fail();163
};164
let ack = match header.class {165
CtrlClass::MQ => match CtrlMq(header.command) {166
CtrlMq::VQ_PARIS_SET => {167
let to_set = |b: &IoSlice| CtrlMqParisSet::read_from_bytes(b).ok();168
let Some(data) = desc.readable.get(1).and_then(to_set) else {169
return error::InvalidBuffer.fail();170
};171
let pairs = data.virtq_pairs as usize;172
self.tap_sockets.truncate(pairs);173
for index in self.tap_sockets.len()..pairs {174
let mut socket = new_socket(175
self.dev_tap.as_deref(),176
matches!(self.api, WorkerApi::IoUring),177
)?;178
setup_socket(&mut socket, self.if_name.as_deref(), true)?;179
enable_tap_offload(&mut socket, self.driver_feature)?;180
if let Some(r) = registry {181
r.register(182
&mut SourceFd(&socket.as_raw_fd()),183
Token(index),184
Interest::READABLE | Interest::WRITABLE,185
)?;186
}187
self.tap_sockets.push(socket);188
}189
log::info!("{}: using {pairs} pairs of queues", self.name);190
CtrlAck::OK191
}192
_ => CtrlAck::ERR,193
},194
_ => CtrlAck::ERR,195
};196
*ack_byte = ack.raw();197
Ok(1)198
}199
}200
201
impl Virtio for Net {202
type Config = NetConfig;203
type Feature = NetFeature;204
205
fn id(&self) -> DeviceId {206
DeviceId::NET207
}208
209
fn name(&self) -> &str {210
&self.name211
}212
213
fn num_queues(&self) -> u16 {214
let data_queues = self.config.max_queue_pairs << 1;215
if self.feature.contains(NetFeature::CTRL_VQ) {216
data_queues + 1217
} else {218
data_queues219
}220
}221
222
fn config(&self) -> Arc<NetConfig> {223
self.config.clone()224
}225
226
fn feature(&self) -> u128 {227
self.feature.bits() | FEATURE_BUILT_IN228
}229
230
fn spawn_worker<S, E>(231
self,232
event_rx: Receiver<WakeEvent<S, E>>,233
memory: Arc<RamBus>,234
queue_regs: Arc<[QueueReg]>,235
) -> Result<(JoinHandle<()>, Arc<Notifier>)>236
where237
S: IrqSender,238
E: IoeventFd,239
{240
match self.api {241
WorkerApi::Mio => Mio::spawn_worker(self, event_rx, memory, queue_regs),242
WorkerApi::IoUring => IoUring::spawn_worker(self, event_rx, memory, queue_regs),243
}244
}245
}246
247
impl VirtioMio for Net {248
fn reset(&mut self, registry: &Registry) {249
self.tap_sockets.truncate(1);250
let _ = registry.deregister(&mut SourceFd(&self.tap_sockets[0].as_raw_fd()));251
}252
253
fn activate<'m, Q, S, E>(254
&mut self,255
feature: u128,256
active_mio: &mut ActiveMio<'_, '_, 'm, Q, S, E>,257
) -> Result<()>258
where259
Q: VirtQueue<'m>,260
S: IrqSender,261
E: IoeventFd,262
{263
self.driver_feature = NetFeature::from_bits_retain(feature);264
let socket = &mut self.tap_sockets[0];265
enable_tap_offload(socket, self.driver_feature)?;266
active_mio.poll.registry().register(267
&mut SourceFd(&socket.as_raw_fd()),268
Token(0),269
Interest::READABLE | Interest::WRITABLE,270
)?;271
Ok(())272
}273
274
fn handle_event<'a, 'm, Q, S, E>(275
&mut self,276
event: &Event,277
active_mio: &mut ActiveMio<'_, '_, 'm, Q, S, E>,278
) -> Result<()>279
where280
Q: VirtQueue<'m>,281
S: IrqSender,282
E: IoeventFd,283
{284
let token = event.token().0;285
let irq_sender = active_mio.irq_sender;286
if event.is_readable() {287
let rx_queue_index = token << 1;288
let Some(Some(queue)) = active_mio.queues.get_mut(rx_queue_index) else {289
log::error!("{}: cannot find rx queue {rx_queue_index}", self.name);290
return Ok(());291
};292
let Some(socket) = self.tap_sockets.get(token) else {293
log::error!("{}: cannot find tap queue {token}", self.name);294
return Ok(());295
};296
queue.handle_desc(rx_queue_index as u16, irq_sender, copy_from_reader(socket))?;297
}298
if event.is_writable() {299
let tx_queue_index = (token << 1) + 1;300
let Some(Some(queue)) = active_mio.queues.get_mut(tx_queue_index) else {301
log::error!("{}: cannot find tx queue {tx_queue_index}", self.name);302
return Ok(());303
};304
let Some(socket) = self.tap_sockets.get(token) else {305
log::error!("{}: cannot find tap queue {token}", self.name);306
return Ok(());307
};308
queue.handle_desc(tx_queue_index as u16, irq_sender, copy_to_writer(socket))?;309
}310
Ok(())311
}312
313
fn handle_queue<'m, Q, S, E>(314
&mut self,315
index: u16,316
active_mio: &mut ActiveMio<'_, '_, 'm, Q, S, E>,317
) -> Result<()>318
where319
Q: VirtQueue<'m>,320
S: IrqSender,321
E: IoeventFd,322
{323
let Some(Some(queue)) = active_mio.queues.get_mut(index as usize) else {324
log::error!("{}: invalid queue index {index}", self.name);325
return Ok(());326
};327
let irq_sender = active_mio.irq_sender;328
let registry = active_mio.poll.registry();329
if index == self.config.max_queue_pairs * 2 {330
return queue.handle_desc(index, irq_sender, |chain| {331
let len = self.handle_ctrl_queue(chain, Some(registry))?;332
Ok(Status::Done { len })333
});334
}335
let Some(socket) = self.tap_sockets.get(index as usize >> 1) else {336
log::error!("{}: invalid tap queue {}", self.name, index >> 1);337
return Ok(());338
};339
if index & 1 == 0 {340
queue.handle_desc(index, irq_sender, copy_from_reader(socket))341
} else {342
queue.handle_desc(index, irq_sender, copy_to_writer(socket))343
}344
}345
}346
347
impl VirtioIoUring for Net {348
fn activate<'m, Q, S, E>(349
&mut self,350
feature: u128,351
_ring: &mut ActiveIoUring<'_, '_, 'm, Q, S, E>,352
) -> Result<()>353
where354
S: IrqSender,355
Q: VirtQueue<'m>,356
E: IoeventFd,357
{358
self.driver_feature = NetFeature::from_bits_retain(feature);359
let socket = &mut self.tap_sockets[0];360
enable_tap_offload(socket, self.driver_feature)?;361
Ok(())362
}363
364
fn handle_desc(&mut self, q_index: u16, chain: &mut DescChain) -> Result<BufferAction> {365
if q_index == self.config.max_queue_pairs * 2 {366
let len = self.handle_ctrl_queue(chain, None)?;367
return Ok(BufferAction::Written(len));368
}369
let Some(socket) = self.tap_sockets.get(q_index as usize >> 1) else {370
log::error!("{}: invalid tap queue {}", self.name, q_index >> 1);371
return Ok(BufferAction::Written(0));372
};373
let entry = if q_index & 1 == 0 {374
let writable = &chain.writable;375
opcode::Readv::new(376
Fd(socket.as_raw_fd()),377
writable.as_ptr() as *const _,378
writable.len() as _,379
)380
.build()381
} else {382
let readable = &chain.readable;383
opcode::Writev::new(384
Fd(socket.as_raw_fd()),385
readable.as_ptr() as *const _,386
readable.len() as _,387
)388
.build()389
};390
Ok(BufferAction::Sqe(entry))391
}392
393
fn complete_desc(&mut self, q_index: u16, _chain: &mut DescChain, cqe: &Cqe) -> Result<u32> {394
let ret = cqe.result();395
if ret < 0 {396
let err = std::io::Error::from_raw_os_error(-ret);397
log::error!("{}: failed to send/receive packet: {err}", self.name,);398
return Ok(0);399
}400
if q_index & 1 == 0 {401
Ok(ret as u32)402
} else {403
Ok(0)404
}405
}406
}407
408
fn setup_socket(file: &mut File, if_name: Option<&str>, mq: bool) -> Result<()> {409
let mut tap_ifconfig = unsafe { MaybeUninit::<libc::ifreq>::zeroed().assume_init() };410
411
if let Some(name) = if_name {412
let name_len = std::cmp::min(tap_ifconfig.ifr_name.len() - 1, name.len());413
tap_ifconfig.ifr_name.as_mut_bytes()[0..name_len]414
.copy_from_slice(&name.as_bytes()[0..name_len]);415
}416
417
let mut flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;418
if mq {419
flags |= IFF_MULTI_QUEUE;420
}421
tap_ifconfig.ifr_ifru.ifru_flags = flags as i16;422
423
unsafe { tun_set_iff(file, &tap_ifconfig) }.or_else(|e| {424
if e.kind() == ErrorKind::InvalidInput && !mq {425
flags |= IFF_MULTI_QUEUE;426
tap_ifconfig.ifr_ifru.ifru_flags = flags as i16;427
unsafe { tun_set_iff(file, &tap_ifconfig) }428
} else {429
Err(e)430
}431
})?;432
433
unsafe { tun_set_vnet_hdr_sz(file, &(size_of::<VirtioNetHdr>() as _)) }?;434
Ok(())435
}436
437
fn detect_tap_offload(tap: &impl AsFd) -> NetFeature {438
let mut tap_feature = TunFeature::all();439
let mut dev_feat = NetFeature::GUEST_CSUM440
| NetFeature::GUEST_TSO4441
| NetFeature::GUEST_TSO6442
| NetFeature::GUEST_ECN443
| NetFeature::GUEST_UFO444
| NetFeature::GUEST_USO4445
| NetFeature::GUEST_USO6;446
if unsafe { tun_set_offload(tap, tap_feature) }.is_ok() {447
return dev_feat;448
}449
tap_feature &= !(TunFeature::USO4 | TunFeature::USO6);450
dev_feat &= !(NetFeature::GUEST_USO4 | NetFeature::GUEST_USO6);451
if unsafe { tun_set_offload(tap, tap_feature) }.is_ok() {452
return dev_feat;453
}454
tap_feature &= !(TunFeature::UFO);455
dev_feat &= !NetFeature::GUEST_UFO;456
if unsafe { tun_set_offload(tap, tap_feature) }.is_ok() {457
return dev_feat;458
}459
NetFeature::empty()460
}461
462
fn enable_tap_offload(tap: &mut File, feature: NetFeature) -> Result<()> {463
let mut tap_feature = TunFeature::empty();464
if feature.contains(NetFeature::GUEST_CSUM) {465
tap_feature |= TunFeature::CSUM;466
}467
if feature.contains(NetFeature::GUEST_TSO4) {468
tap_feature |= TunFeature::TSO4;469
}470
if feature.contains(NetFeature::GUEST_TSO6) {471
tap_feature |= TunFeature::TSO6;472
}473
if feature.contains(NetFeature::GUEST_ECN) {474
tap_feature |= TunFeature::TSO_ECN;475
}476
if feature.contains(NetFeature::GUEST_UFO) {477
tap_feature |= TunFeature::UFO;478
}479
if feature.contains(NetFeature::GUEST_USO4) {480
tap_feature |= TunFeature::USO4;481
}482
if feature.contains(NetFeature::GUEST_USO6) {483
tap_feature |= TunFeature::USO6;484
}485
unsafe { tun_set_offload(tap, tap_feature) }?;486
Ok(())487
}488