Alioth Code Coverage

tap.rs0.00%

1// Copyright 2024 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::cmp::max;
16use std::fmt::Debug;
17use std::fs::{File, OpenOptions};
18use std::io::{ErrorKind, IoSlice};
19use std::mem::MaybeUninit;
20use std::os::fd::{AsFd, AsRawFd};
21use std::os::unix::prelude::OpenOptionsExt;
22use std::path::Path;
23use std::sync::Arc;
24use std::sync::mpsc::Receiver;
25use std::thread::JoinHandle;
26
27use io_uring::cqueue::Entry as Cqe;
28use io_uring::opcode;
29use io_uring::types::Fd;
30use libc::{IFF_MULTI_QUEUE, IFF_NO_PI, IFF_TAP, IFF_VNET_HDR, O_NONBLOCK};
31use mio::event::Event;
32use mio::unix::SourceFd;
33use mio::{Interest, Registry, Token};
34use serde::Deserialize;
35use serde_aco::Help;
36use zerocopy::{FromBytes, IntoBytes};
37
38use crate::device::net::MacAddr;
39use crate::hv::IoeventFd;
40use crate::mem::mapped::RamBus;
41use crate::sync::notifier::Notifier;
42use crate::sys::if_tun::{TunFeature, tun_set_iff, tun_set_offload, tun_set_vnet_hdr_sz};
43use crate::virtio::dev::net::{
44 CtrlAck, CtrlClass, CtrlHdr, CtrlMq, CtrlMqParisSet, NetConfig, NetFeature, VirtioNetHdr,
45};
46use crate::virtio::dev::{DevParam, DeviceId, Result, Virtio, WakeEvent};
47use crate::virtio::queue::{
48 DescChain, QueueReg, Status, VirtQueue, copy_from_reader, copy_to_writer,
49};
50use crate::virtio::worker::WorkerApi;
51use crate::virtio::worker::io_uring::{ActiveIoUring, BufferAction, IoUring, VirtioIoUring};
52use crate::virtio::worker::mio::{ActiveMio, Mio, VirtioMio};
53use crate::virtio::{FEATURE_BUILT_IN, IrqSender, error};
54
55#[derive(Debug)]
56pub struct Net {
57 name: Arc<str>,
58 config: Arc<NetConfig>,
59 tap_sockets: Vec<File>,
60 feature: NetFeature,
61 driver_feature: NetFeature,
62 dev_tap: Option<Box<Path>>,
63 if_name: Option<String>,
64 api: WorkerApi,
65}
66
67#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize, Help)]
68pub struct NetTapParam {
69 /// MAC address of the virtual NIC, e.g. 06:3a:76:53:da:3d.
70 pub mac: MacAddr,
71 /// Maximum transmission unit.
72 pub mtu: u16,
73 /// Number of pairs of transmit/receive queues. [default: 1]
74 #[serde(alias = "qp", default)]
75 pub queue_pairs: u16,
76 /// Path to the character device file of a tap interface.
77 ///
78 /// Required for MacVTap and IPVTap, e.g. /dev/tapX.
79 /// Optional for TUN/TAP. [default: /dev/net/tun]
80 pub tap: Option<Box<Path>>,
81 /// Name of a tap interface, e.g. tapX.
82 ///
83 /// Required for TUN/TAP. Optional for MacVTap and IPVTap.
84 #[serde(alias = "if")]
85 pub if_name: Option<String>,
86 /// System API for asynchronous IO.
87 #[serde(default)]
88 pub api: WorkerApi,
89}
90
91impl DevParam for NetTapParam {
92 type Device = Net;
93
94 fn build(self, name: impl Into<Arc<str>>) -> Result<Net> {
95 Net::new(self, name)
96 }
97}
98
99fn new_socket(dev_tap: Option<&Path>, blocking: bool) -> Result<File> {
100 let tap_dev = dev_tap.unwrap_or(Path::new("/dev/net/tun"));
101 let mut opt = OpenOptions::new();
102 opt.read(true).write(true);
103 if !blocking {
104 opt.custom_flags(O_NONBLOCK);
105 }
106 let socket = opt.open(tap_dev)?;
107 Ok(socket)
108}
109
110impl Net {
111 pub fn new(param: NetTapParam, name: impl Into<Arc<str>>) -> Result<Self> {
112 let mut socket = new_socket(
113 param.tap.as_deref(),
114 matches!(param.api, WorkerApi::IoUring),
115 )?;
116 let max_queue_pairs = max(param.queue_pairs, 1);
117 setup_socket(&mut socket, param.if_name.as_deref(), max_queue_pairs > 1)?;
118 let mut dev_feat = NetFeature::MAC
119 | NetFeature::MTU
120 | NetFeature::CSUM
121 | NetFeature::HOST_TSO4
122 | NetFeature::HOST_TSO6
123 | NetFeature::HOST_ECN
124 | NetFeature::HOST_UFO
125 | NetFeature::HOST_USO
126 | NetFeature::CTRL_VQ
127 | detect_tap_offload(&socket);
128 if max_queue_pairs > 1 {
129 dev_feat |= NetFeature::MQ;
130 }
131 let net = Net {
132 name: name.into(),
133 config: Arc::new(NetConfig {
134 mac: param.mac,
135 max_queue_pairs,
136 mtu: param.mtu,
137 ..Default::default()
138 }),
139 tap_sockets: vec![socket],
140 feature: dev_feat,
141 driver_feature: NetFeature::empty(),
142 dev_tap: param.tap,
143 if_name: param.if_name,
144 api: param.api,
145 };
146 Ok(net)
147 }
148
149 fn handle_ctrl_queue(
150 &mut self,
151 desc: &mut DescChain,
152 registry: Option<&Registry>,
153 ) -> Result<u32> {
154 let Some(header) = desc
155 .readable
156 .first()
157 .and_then(|b| CtrlHdr::read_from_bytes(b).ok())
158 else {
159 return error::InvalidBuffer.fail();
160 };
161 let Some(ack_byte) = desc.writable.first_mut().and_then(|v| v.first_mut()) else {
162 return error::InvalidBuffer.fail();
163 };
164 let ack = match header.class {
165 CtrlClass::MQ => match CtrlMq(header.command) {
166 CtrlMq::VQ_PARIS_SET => {
167 let to_set = |b: &IoSlice| CtrlMqParisSet::read_from_bytes(b).ok();
168 let Some(data) = desc.readable.get(1).and_then(to_set) else {
169 return error::InvalidBuffer.fail();
170 };
171 let pairs = data.virtq_pairs as usize;
172 self.tap_sockets.truncate(pairs);
173 for index in self.tap_sockets.len()..pairs {
174 let mut socket = new_socket(
175 self.dev_tap.as_deref(),
176 matches!(self.api, WorkerApi::IoUring),
177 )?;
178 setup_socket(&mut socket, self.if_name.as_deref(), true)?;
179 enable_tap_offload(&mut socket, self.driver_feature)?;
180 if let Some(r) = registry {
181 r.register(
182 &mut SourceFd(&socket.as_raw_fd()),
183 Token(index),
184 Interest::READABLE | Interest::WRITABLE,
185 )?;
186 }
187 self.tap_sockets.push(socket);
188 }
189 log::info!("{}: using {pairs} pairs of queues", self.name);
190 CtrlAck::OK
191 }
192 _ => CtrlAck::ERR,
193 },
194 _ => CtrlAck::ERR,
195 };
196 *ack_byte = ack.raw();
197 Ok(1)
198 }
199}
200
201impl Virtio for Net {
202 type Config = NetConfig;
203 type Feature = NetFeature;
204
205 fn id(&self) -> DeviceId {
206 DeviceId::NET
207 }
208
209 fn name(&self) -> &str {
210 &self.name
211 }
212
213 fn num_queues(&self) -> u16 {
214 let data_queues = self.config.max_queue_pairs << 1;
215 if self.feature.contains(NetFeature::CTRL_VQ) {
216 data_queues + 1
217 } else {
218 data_queues
219 }
220 }
221
222 fn config(&self) -> Arc<NetConfig> {
223 self.config.clone()
224 }
225
226 fn feature(&self) -> u128 {
227 self.feature.bits() | FEATURE_BUILT_IN
228 }
229
230 fn spawn_worker<S, E>(
231 self,
232 event_rx: Receiver<WakeEvent<S, E>>,
233 memory: Arc<RamBus>,
234 queue_regs: Arc<[QueueReg]>,
235 ) -> Result<(JoinHandle<()>, Arc<Notifier>)>
236 where
237 S: IrqSender,
238 E: IoeventFd,
239 {
240 match self.api {
241 WorkerApi::Mio => Mio::spawn_worker(self, event_rx, memory, queue_regs),
242 WorkerApi::IoUring => IoUring::spawn_worker(self, event_rx, memory, queue_regs),
243 }
244 }
245}
246
247impl VirtioMio for Net {
248 fn reset(&mut self, registry: &Registry) {
249 self.tap_sockets.truncate(1);
250 let _ = registry.deregister(&mut SourceFd(&self.tap_sockets[0].as_raw_fd()));
251 }
252
253 fn activate<'m, Q, S, E>(
254 &mut self,
255 feature: u128,
256 active_mio: &mut ActiveMio<'_, '_, 'm, Q, S, E>,
257 ) -> Result<()>
258 where
259 Q: VirtQueue<'m>,
260 S: IrqSender,
261 E: IoeventFd,
262 {
263 self.driver_feature = NetFeature::from_bits_retain(feature);
264 let socket = &mut self.tap_sockets[0];
265 enable_tap_offload(socket, self.driver_feature)?;
266 active_mio.poll.registry().register(
267 &mut SourceFd(&socket.as_raw_fd()),
268 Token(0),
269 Interest::READABLE | Interest::WRITABLE,
270 )?;
271 Ok(())
272 }
273
274 fn handle_event<'a, 'm, Q, S, E>(
275 &mut self,
276 event: &Event,
277 active_mio: &mut ActiveMio<'_, '_, 'm, Q, S, E>,
278 ) -> Result<()>
279 where
280 Q: VirtQueue<'m>,
281 S: IrqSender,
282 E: IoeventFd,
283 {
284 let token = event.token().0;
285 let irq_sender = active_mio.irq_sender;
286 if event.is_readable() {
287 let rx_queue_index = token << 1;
288 let Some(Some(queue)) = active_mio.queues.get_mut(rx_queue_index) else {
289 log::error!("{}: cannot find rx queue {rx_queue_index}", self.name);
290 return Ok(());
291 };
292 let Some(socket) = self.tap_sockets.get(token) else {
293 log::error!("{}: cannot find tap queue {token}", self.name);
294 return Ok(());
295 };
296 queue.handle_desc(rx_queue_index as u16, irq_sender, copy_from_reader(socket))?;
297 }
298 if event.is_writable() {
299 let tx_queue_index = (token << 1) + 1;
300 let Some(Some(queue)) = active_mio.queues.get_mut(tx_queue_index) else {
301 log::error!("{}: cannot find tx queue {tx_queue_index}", self.name);
302 return Ok(());
303 };
304 let Some(socket) = self.tap_sockets.get(token) else {
305 log::error!("{}: cannot find tap queue {token}", self.name);
306 return Ok(());
307 };
308 queue.handle_desc(tx_queue_index as u16, irq_sender, copy_to_writer(socket))?;
309 }
310 Ok(())
311 }
312
313 fn handle_queue<'m, Q, S, E>(
314 &mut self,
315 index: u16,
316 active_mio: &mut ActiveMio<'_, '_, 'm, Q, S, E>,
317 ) -> Result<()>
318 where
319 Q: VirtQueue<'m>,
320 S: IrqSender,
321 E: IoeventFd,
322 {
323 let Some(Some(queue)) = active_mio.queues.get_mut(index as usize) else {
324 log::error!("{}: invalid queue index {index}", self.name);
325 return Ok(());
326 };
327 let irq_sender = active_mio.irq_sender;
328 let registry = active_mio.poll.registry();
329 if index == self.config.max_queue_pairs * 2 {
330 return queue.handle_desc(index, irq_sender, |chain| {
331 let len = self.handle_ctrl_queue(chain, Some(registry))?;
332 Ok(Status::Done { len })
333 });
334 }
335 let Some(socket) = self.tap_sockets.get(index as usize >> 1) else {
336 log::error!("{}: invalid tap queue {}", self.name, index >> 1);
337 return Ok(());
338 };
339 if index & 1 == 0 {
340 queue.handle_desc(index, irq_sender, copy_from_reader(socket))
341 } else {
342 queue.handle_desc(index, irq_sender, copy_to_writer(socket))
343 }
344 }
345}
346
347impl VirtioIoUring for Net {
348 fn activate<'m, Q, S, E>(
349 &mut self,
350 feature: u128,
351 _ring: &mut ActiveIoUring<'_, '_, 'm, Q, S, E>,
352 ) -> Result<()>
353 where
354 S: IrqSender,
355 Q: VirtQueue<'m>,
356 E: IoeventFd,
357 {
358 self.driver_feature = NetFeature::from_bits_retain(feature);
359 let socket = &mut self.tap_sockets[0];
360 enable_tap_offload(socket, self.driver_feature)?;
361 Ok(())
362 }
363
364 fn handle_desc(&mut self, q_index: u16, chain: &mut DescChain) -> Result<BufferAction> {
365 if q_index == self.config.max_queue_pairs * 2 {
366 let len = self.handle_ctrl_queue(chain, None)?;
367 return Ok(BufferAction::Written(len));
368 }
369 let Some(socket) = self.tap_sockets.get(q_index as usize >> 1) else {
370 log::error!("{}: invalid tap queue {}", self.name, q_index >> 1);
371 return Ok(BufferAction::Written(0));
372 };
373 let entry = if q_index & 1 == 0 {
374 let writable = &chain.writable;
375 opcode::Readv::new(
376 Fd(socket.as_raw_fd()),
377 writable.as_ptr() as *const _,
378 writable.len() as _,
379 )
380 .build()
381 } else {
382 let readable = &chain.readable;
383 opcode::Writev::new(
384 Fd(socket.as_raw_fd()),
385 readable.as_ptr() as *const _,
386 readable.len() as _,
387 )
388 .build()
389 };
390 Ok(BufferAction::Sqe(entry))
391 }
392
393 fn complete_desc(&mut self, q_index: u16, _chain: &mut DescChain, cqe: &Cqe) -> Result<u32> {
394 let ret = cqe.result();
395 if ret < 0 {
396 let err = std::io::Error::from_raw_os_error(-ret);
397 log::error!("{}: failed to send/receive packet: {err}", self.name,);
398 return Ok(0);
399 }
400 if q_index & 1 == 0 {
401 Ok(ret as u32)
402 } else {
403 Ok(0)
404 }
405 }
406}
407
408fn setup_socket(file: &mut File, if_name: Option<&str>, mq: bool) -> Result<()> {
409 let mut tap_ifconfig = unsafe { MaybeUninit::<libc::ifreq>::zeroed().assume_init() };
410
411 if let Some(name) = if_name {
412 let name_len = std::cmp::min(tap_ifconfig.ifr_name.len() - 1, name.len());
413 tap_ifconfig.ifr_name.as_mut_bytes()[0..name_len]
414 .copy_from_slice(&name.as_bytes()[0..name_len]);
415 }
416
417 let mut flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
418 if mq {
419 flags |= IFF_MULTI_QUEUE;
420 }
421 tap_ifconfig.ifr_ifru.ifru_flags = flags as i16;
422
423 unsafe { tun_set_iff(file, &tap_ifconfig) }.or_else(|e| {
424 if e.kind() == ErrorKind::InvalidInput && !mq {
425 flags |= IFF_MULTI_QUEUE;
426 tap_ifconfig.ifr_ifru.ifru_flags = flags as i16;
427 unsafe { tun_set_iff(file, &tap_ifconfig) }
428 } else {
429 Err(e)
430 }
431 })?;
432
433 unsafe { tun_set_vnet_hdr_sz(file, &(size_of::<VirtioNetHdr>() as _)) }?;
434 Ok(())
435}
436
437fn detect_tap_offload(tap: &impl AsFd) -> NetFeature {
438 let mut tap_feature = TunFeature::all();
439 let mut dev_feat = NetFeature::GUEST_CSUM
440 | NetFeature::GUEST_TSO4
441 | NetFeature::GUEST_TSO6
442 | NetFeature::GUEST_ECN
443 | NetFeature::GUEST_UFO
444 | NetFeature::GUEST_USO4
445 | NetFeature::GUEST_USO6;
446 if unsafe { tun_set_offload(tap, tap_feature) }.is_ok() {
447 return dev_feat;
448 }
449 tap_feature &= !(TunFeature::USO4 | TunFeature::USO6);
450 dev_feat &= !(NetFeature::GUEST_USO4 | NetFeature::GUEST_USO6);
451 if unsafe { tun_set_offload(tap, tap_feature) }.is_ok() {
452 return dev_feat;
453 }
454 tap_feature &= !(TunFeature::UFO);
455 dev_feat &= !NetFeature::GUEST_UFO;
456 if unsafe { tun_set_offload(tap, tap_feature) }.is_ok() {
457 return dev_feat;
458 }
459 NetFeature::empty()
460}
461
462fn enable_tap_offload(tap: &mut File, feature: NetFeature) -> Result<()> {
463 let mut tap_feature = TunFeature::empty();
464 if feature.contains(NetFeature::GUEST_CSUM) {
465 tap_feature |= TunFeature::CSUM;
466 }
467 if feature.contains(NetFeature::GUEST_TSO4) {
468 tap_feature |= TunFeature::TSO4;
469 }
470 if feature.contains(NetFeature::GUEST_TSO6) {
471 tap_feature |= TunFeature::TSO6;
472 }
473 if feature.contains(NetFeature::GUEST_ECN) {
474 tap_feature |= TunFeature::TSO_ECN;
475 }
476 if feature.contains(NetFeature::GUEST_UFO) {
477 tap_feature |= TunFeature::UFO;
478 }
479 if feature.contains(NetFeature::GUEST_USO4) {
480 tap_feature |= TunFeature::USO4;
481 }
482 if feature.contains(NetFeature::GUEST_USO6) {
483 tap_feature |= TunFeature::USO6;
484 }
485 unsafe { tun_set_offload(tap, tap_feature) }?;
486 Ok(())
487}
488