diff --git a/test/integration-test/src/lib.rs b/test/integration-test/src/lib.rs index 702c71908..184a9bbf8 100644 --- a/test/integration-test/src/lib.rs +++ b/test/integration-test/src/lib.rs @@ -64,6 +64,8 @@ bpf_file!( UPROBE_COOKIE => "uprobe_cookie", ); +#[cfg(test)] +mod netlink; #[cfg(test)] mod tests; #[cfg(test)] diff --git a/test/integration-test/src/netlink.rs b/test/integration-test/src/netlink.rs new file mode 100644 index 000000000..8841a01fc --- /dev/null +++ b/test/integration-test/src/netlink.rs @@ -0,0 +1,537 @@ +//! Minimal netlink helpers for integration-test network setup. +//! +//! These live here (rather than in the `aya` crate) to avoid adding test-only +//! public API surface to the production library. + +use std::{ + collections::HashMap, + ffi::CStr, + io, mem, + net::Ipv4Addr, + os::fd::{AsRawFd as _, FromRawFd as _, OwnedFd, RawFd}, + ptr, slice, +}; + +use libc::{ + AF_INET, AF_NETLINK, AF_UNSPEC, IFA_ADDRESS, IFA_LOCAL, IFF_UP, IFLA_ADDRESS, IFLA_IFNAME, + IFLA_INFO_DATA, IFLA_INFO_KIND, IFLA_LINKINFO, IFLA_NET_NS_FD, NDA_DST, NDA_LLADDR, + NETLINK_ROUTE, NLA_ALIGNTO, NLA_F_NESTED, NLA_TYPE_MASK, NLM_F_ACK, NLM_F_CREATE, NLM_F_EXCL, + NLM_F_REQUEST, NLMSG_DONE, NLMSG_ERROR, NTF_SELF, NUD_PERMANENT, RTM_GETLINK, RTM_NEWADDR, + RTM_NEWLINK, RTM_NEWNEIGH, RTM_SETLINK, SOCK_RAW, SOL_NETLINK, nlattr, nlmsgerr, nlmsghdr, +}; + +/// `NLMSG_ALIGNTO` from the kernel; value is always 4. +const NLMSG_ALIGNTO: usize = 4; +const NLA_HDR_LEN: usize = align_to(size_of::(), NLA_ALIGNTO as usize); +/// `VETH_INFO_PEER` from `linux/veth.h`; not exported by libc. +const VETH_INFO_PEER: u16 = 1; + +/// `struct ifinfomsg` from `linux/if_link.h`. +#[derive(Copy, Clone)] +#[repr(C)] +struct Ifinfomsg { + ifi_family: u8, + _ifi_pad: u8, + ifi_type: u16, + ifi_index: i32, + ifi_flags: u32, + ifi_change: u32, +} + +/// `struct ifaddrmsg` from `linux/if_addr.h`. +#[derive(Copy, Clone)] +#[repr(C)] +struct Ifaddrmsg { + family: u8, + prefixlen: u8, + flags: u8, + scope: u8, + index: u32, +} + +/// `struct ndmsg` from `linux/neighbour.h`. +#[derive(Copy, Clone)] +#[repr(C)] +struct Ndmsg { + family: u8, + pad1: u8, + pad2: u16, + ifindex: i32, + state: u16, + flags: u8, + r#type: u8, +} + +#[derive(Copy, Clone)] +#[repr(C)] +struct LinkRequest { + header: nlmsghdr, + if_info: Ifinfomsg, + attrs: [u8; 64], +} + +#[derive(Copy, Clone)] +#[repr(C)] +struct VethRequest { + header: nlmsghdr, + if_info: Ifinfomsg, + attrs: [u8; 128], +} + +#[derive(Copy, Clone)] +#[repr(C)] +struct AddrRequest { + header: nlmsghdr, + addr_info: Ifaddrmsg, + attrs: [u8; 24], +} + +#[derive(Copy, Clone)] +#[repr(C)] +struct NeighRequest { + header: nlmsghdr, + neigh_info: Ndmsg, + attrs: [u8; 24], +} + +const fn align_to(v: usize, align: usize) -> usize { + v.next_multiple_of(align) +} + +/// Reinterpret a `#[repr(C)]` struct as a byte slice. +/// +/// # Safety +/// +/// `T` must be `#[repr(C)]` with no padding that could leak uninitialised bytes +/// into the returned slice. All request structs in this module satisfy this +/// because they are either fully initialised or their padding bytes are +/// explicitly zeroed via `mem::zeroed()`. +unsafe fn bytes_of(val: &T) -> &[u8] { + unsafe { slice::from_raw_parts(ptr::from_ref(val).cast(), size_of::()) } +} + +/// Return a mutable slice over the attribute area of a request struct. +/// +/// # Safety +/// +/// `msg_len` must be the offset of the attribute area within `T`. +unsafe fn request_attributes(req: &mut T, msg_len: usize) -> &mut [u8] { + let req: *mut u8 = ptr::from_mut(req).cast(); + let attrs_addr = unsafe { req.add(msg_len) }; + let align_offset = attrs_addr.align_offset(NLMSG_ALIGNTO); + let attrs_addr = unsafe { attrs_addr.add(align_offset) }; + let len = size_of::() - msg_len - align_offset; + unsafe { slice::from_raw_parts_mut(attrs_addr, len) } +} + +fn write_attr_bytes( + buf: &mut [u8], + offset: usize, + attr_type: u16, + value: &[u8], +) -> Result { + let attr = nlattr { + nla_type: attr_type, + nla_len: (NLA_HDR_LEN + value.len()) as u16, + }; + write_attr_header(buf, offset, attr)?; + let value_len = write_bytes(buf, offset + NLA_HDR_LEN, value)?; + Ok(NLA_HDR_LEN + value_len) +} + +fn write_attr_header(buf: &mut [u8], offset: usize, attr: nlattr) -> Result { + let attr = unsafe { bytes_of(&attr) }; + write_bytes(buf, offset, attr)?; + Ok(NLA_HDR_LEN) +} + +fn write_bytes(buf: &mut [u8], offset: usize, value: &[u8]) -> Result { + let align_len = align_to(value.len(), NLA_ALIGNTO as usize); + if offset + align_len > buf.len() { + return Err(io::Error::other( + "no space left in netlink attribute buffer", + )); + } + buf[offset..offset + value.len()].copy_from_slice(value); + Ok(align_len) +} + +struct NetlinkSocket { + sock: OwnedFd, +} + +impl NetlinkSocket { + fn open() -> io::Result { + let sock = unsafe { libc::socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE) }; + if sock < 0 { + return Err(io::Error::last_os_error()); + } + let sock = unsafe { OwnedFd::from_raw_fd(sock) }; + + let enable = 1i32; + unsafe { + if libc::setsockopt( + sock.as_raw_fd(), + SOL_NETLINK, + libc::NETLINK_EXT_ACK, + ptr::from_ref(&enable).cast(), + size_of_val(&enable) as u32, + ) < 0 + { + return Err(io::Error::last_os_error()); + } + if libc::setsockopt( + sock.as_raw_fd(), + SOL_NETLINK, + libc::NETLINK_CAP_ACK, + ptr::from_ref(&enable).cast(), + size_of_val(&enable) as u32, + ) < 0 + { + return Err(io::Error::last_os_error()); + } + } + Ok(Self { sock }) + } + + /// Send a request and collect the response messages. + fn execute(&self, msg: &[u8]) -> io::Result> { + if unsafe { libc::send(self.sock.as_raw_fd(), msg.as_ptr().cast(), msg.len(), 0) } < 0 { + return Err(io::Error::last_os_error()); + } + self.recv() + } + + fn recv(&self) -> io::Result> { + let mut buf = [0u8; 4096]; + let mut messages = Vec::new(); + let mut multipart = true; + 'out: while multipart { + multipart = false; + let len = + unsafe { libc::recv(self.sock.as_raw_fd(), buf.as_mut_ptr().cast(), buf.len(), 0) }; + if len < 0 { + return Err(io::Error::last_os_error()); + } + if len == 0 { + break; + } + let len = len as usize; + let mut offset = 0; + while offset < len { + let message = NetlinkMessage::read(&buf[offset..])?; + offset += align_to(message.header.nlmsg_len as usize, NLMSG_ALIGNTO); + multipart = (message.header.nlmsg_flags & (libc::NLM_F_MULTI as u16)) != 0; + match i32::from(message.header.nlmsg_type) { + NLMSG_ERROR => { + let err = message.error.unwrap(); + if err.error == 0 { + // ACK + continue; + } + return Err(io::Error::from_raw_os_error(-err.error)); + } + NLMSG_DONE => break 'out, + _ => messages.push(message), + } + } + } + Ok(messages) + } +} + +struct NetlinkMessage { + header: nlmsghdr, + data: Vec, + error: Option, +} + +impl NetlinkMessage { + fn read(buf: &[u8]) -> io::Result { + if size_of::() > buf.len() { + return Err(io::Error::other("buffer smaller than nlmsghdr")); + } + let header: nlmsghdr = unsafe { ptr::read_unaligned(buf.as_ptr().cast()) }; + let msg_len = header.nlmsg_len as usize; + if msg_len < size_of::() || msg_len > buf.len() { + return Err(io::Error::other("invalid nlmsg_len")); + } + let data_offset = align_to(size_of::(), NLMSG_ALIGNTO); + if data_offset >= buf.len() { + return Err(io::Error::other("need more data")); + } + let (rest, error) = if header.nlmsg_type == NLMSG_ERROR as u16 { + if data_offset + size_of::() > buf.len() { + return Err(io::Error::other( + "NLMSG_ERROR but not enough space for nlmsgerr", + )); + } + ( + &buf[data_offset + size_of::()..msg_len], + Some(unsafe { ptr::read_unaligned(buf[data_offset..].as_ptr().cast()) }), + ) + } else { + (&buf[data_offset..msg_len], None) + }; + Ok(Self { + header, + data: rest.to_vec(), + error, + }) + } +} + +fn parse_attrs(buf: &[u8]) -> HashMap { + let mut attrs = HashMap::new(); + let mut offset = 0; + while offset < buf.len() { + let remaining = &buf[offset..]; + if NLA_HDR_LEN > remaining.len() { + break; + } + let attr: nlattr = unsafe { ptr::read_unaligned(remaining.as_ptr().cast()) }; + let len = attr.nla_len as usize; + if len < NLA_HDR_LEN { + break; + } + let align_len = align_to(len, NLA_ALIGNTO as usize); + if align_len > remaining.len() { + break; + } + attrs.insert( + attr.nla_type & (NLA_TYPE_MASK as u16), + &remaining[NLA_HDR_LEN..len], + ); + offset += align_len; + } + attrs +} + +/// Set a network interface up (`IFF_UP`). +pub(crate) fn set_link_up(if_index: i32) -> io::Result<()> { + let sock = NetlinkSocket::open()?; + let mut req = unsafe { mem::zeroed::() }; + + let nlmsg_len = size_of::() + size_of::(); + req.header = nlmsghdr { + nlmsg_len: nlmsg_len as u32, + nlmsg_flags: (NLM_F_REQUEST | NLM_F_ACK) as u16, + nlmsg_type: RTM_SETLINK, + nlmsg_pid: 0, + nlmsg_seq: 1, + }; + req.if_info.ifi_family = AF_UNSPEC as u8; + req.if_info.ifi_index = if_index; + req.if_info.ifi_flags = IFF_UP as u32; + req.if_info.ifi_change = IFF_UP as u32; + + sock.execute(unsafe { &bytes_of(&req)[..req.header.nlmsg_len as usize] })?; + Ok(()) +} + +/// Create a veth pair. +pub(crate) fn create_veth_pair(if_name: &CStr, peer_name: &CStr) -> io::Result<()> { + let sock = NetlinkSocket::open()?; + let mut req = unsafe { mem::zeroed::() }; + + let nlmsg_len = size_of::() + size_of::(); + req.header = nlmsghdr { + nlmsg_len: nlmsg_len as u32, + nlmsg_flags: (NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL) as u16, + nlmsg_type: RTM_NEWLINK, + nlmsg_pid: 0, + nlmsg_seq: 1, + }; + req.if_info.ifi_family = AF_UNSPEC as u8; + + let attrs_buf = unsafe { request_attributes(&mut req, nlmsg_len) }; + let attr_len = write_veth_attrs(attrs_buf, if_name, peer_name)?; + req.header.nlmsg_len += align_to(attr_len, NLA_ALIGNTO as usize) as u32; + + sock.execute(unsafe { &bytes_of(&req)[..req.header.nlmsg_len as usize] })?; + Ok(()) +} + +fn write_veth_attrs(buf: &mut [u8], if_name: &CStr, peer_name: &CStr) -> Result { + let mut offset = 0usize; + + // IFLA_IFNAME for the first interface. + offset += write_attr_bytes(buf, offset, IFLA_IFNAME, if_name.to_bytes_with_nul())?; + + // IFLA_LINKINFO (nested): contains IFLA_INFO_KIND + IFLA_INFO_DATA. + let linkinfo_start = offset; + offset += NLA_HDR_LEN; // reserve; back-filled below + + // IFLA_INFO_KIND = "veth" + offset += write_attr_bytes(buf, offset, IFLA_INFO_KIND, c"veth".to_bytes_with_nul())?; + + // IFLA_INFO_DATA (nested): contains VETH_INFO_PEER. + let info_data_start = offset; + offset += NLA_HDR_LEN; + + // VETH_INFO_PEER (nested): contains an Ifinfomsg + IFLA_IFNAME. + let peer_start = offset; + offset += NLA_HDR_LEN; + + // Embedded Ifinfomsg for the peer (all zeros from mem::zeroed). + let ifinfo_size = align_to(size_of::(), NLA_ALIGNTO as usize); + if offset + ifinfo_size > buf.len() { + return Err(io::Error::other( + "no space left in netlink attribute buffer", + )); + } + offset += ifinfo_size; + + // IFLA_IFNAME for the peer. + offset += write_attr_bytes(buf, offset, IFLA_IFNAME, peer_name.to_bytes_with_nul())?; + + // Back-fill headers. + write_attr_header( + buf, + peer_start, + nlattr { + nla_type: (NLA_F_NESTED as u16) | VETH_INFO_PEER, + nla_len: (offset - peer_start) as u16, + }, + )?; + write_attr_header( + buf, + info_data_start, + nlattr { + nla_type: (NLA_F_NESTED as u16) | IFLA_INFO_DATA, + nla_len: (offset - info_data_start) as u16, + }, + )?; + write_attr_header( + buf, + linkinfo_start, + nlattr { + nla_type: (NLA_F_NESTED as u16) | IFLA_LINKINFO, + nla_len: (offset - linkinfo_start) as u16, + }, + )?; + + Ok(offset) +} + +/// Move a network interface to another network namespace. +pub(crate) fn set_link_ns(if_index: i32, netns_fd: RawFd) -> io::Result<()> { + let sock = NetlinkSocket::open()?; + let mut req = unsafe { mem::zeroed::() }; + + let nlmsg_len = size_of::() + size_of::(); + req.header = nlmsghdr { + nlmsg_len: nlmsg_len as u32, + nlmsg_flags: (NLM_F_REQUEST | NLM_F_ACK) as u16, + nlmsg_type: RTM_SETLINK, + nlmsg_pid: 0, + nlmsg_seq: 1, + }; + req.if_info.ifi_family = AF_UNSPEC as u8; + req.if_info.ifi_index = if_index; + + let attrs_buf = unsafe { request_attributes(&mut req, nlmsg_len) }; + let attr_len = write_attr_bytes(attrs_buf, 0, IFLA_NET_NS_FD, unsafe { bytes_of(&netns_fd) })?; + req.header.nlmsg_len += align_to(attr_len, NLA_ALIGNTO as usize) as u32; + + sock.execute(unsafe { &bytes_of(&req)[..req.header.nlmsg_len as usize] })?; + Ok(()) +} + +/// Add an IPv4 address to a network interface. +pub(crate) fn add_addr_v4(if_index: i32, addr: Ipv4Addr, prefix_len: u8) -> io::Result<()> { + let sock = NetlinkSocket::open()?; + let mut req = unsafe { mem::zeroed::() }; + + let nlmsg_len = size_of::() + size_of::(); + req.header = nlmsghdr { + nlmsg_len: nlmsg_len as u32, + nlmsg_flags: (NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL) as u16, + nlmsg_type: RTM_NEWADDR, + nlmsg_pid: 0, + nlmsg_seq: 1, + }; + req.addr_info = Ifaddrmsg { + family: AF_INET as u8, + prefixlen: prefix_len, + flags: 0, + scope: 0, + index: if_index as u32, + }; + + let attrs_buf = unsafe { request_attributes(&mut req, nlmsg_len) }; + let addr_bytes = addr.octets(); + let mut offset = 0; + offset += write_attr_bytes(attrs_buf, offset, IFA_LOCAL, &addr_bytes)?; + offset += write_attr_bytes(attrs_buf, offset, IFA_ADDRESS, &addr_bytes)?; + req.header.nlmsg_len += align_to(offset, NLA_ALIGNTO as usize) as u32; + + sock.execute(unsafe { &bytes_of(&req)[..req.header.nlmsg_len as usize] })?; + Ok(()) +} + +/// Read the MAC address (`IFLA_ADDRESS`) of a network interface. +pub(crate) fn get_link_mac(if_index: i32) -> io::Result<[u8; 6]> { + let sock = NetlinkSocket::open()?; + let mut req = unsafe { mem::zeroed::() }; + + let nlmsg_len = size_of::() + size_of::(); + req.header = nlmsghdr { + nlmsg_len: nlmsg_len as u32, + nlmsg_flags: NLM_F_REQUEST as u16, + nlmsg_type: RTM_GETLINK, + nlmsg_pid: 0, + nlmsg_seq: 1, + }; + req.if_info.ifi_family = AF_UNSPEC as u8; + req.if_info.ifi_index = if_index; + + for msg in sock.execute(unsafe { &bytes_of(&req)[..req.header.nlmsg_len as usize] })? { + if msg.header.nlmsg_type != RTM_NEWLINK { + continue; + } + let attrs = parse_attrs(&msg.data[size_of::()..]); + if let Some(data) = attrs.get(&IFLA_ADDRESS) { + if let Ok(mac) = <[u8; 6]>::try_from(*data) { + return Ok(mac); + } + } + } + + Err(io::Error::other( + "IFLA_ADDRESS not found in RTM_GETLINK response", + )) +} + +/// Add a static neighbor (ARP) entry. +pub(crate) fn add_neigh_v4(if_index: i32, dst_addr: Ipv4Addr, lladdr: [u8; 6]) -> io::Result<()> { + let sock = NetlinkSocket::open()?; + let mut req = unsafe { mem::zeroed::() }; + + let nlmsg_len = size_of::() + size_of::(); + req.header = nlmsghdr { + nlmsg_len: nlmsg_len as u32, + nlmsg_flags: (NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL) as u16, + nlmsg_type: RTM_NEWNEIGH, + nlmsg_pid: 0, + nlmsg_seq: 1, + }; + req.neigh_info = Ndmsg { + family: AF_INET as u8, + pad1: 0, + pad2: 0, + ifindex: if_index, + state: NUD_PERMANENT, + flags: NTF_SELF, + r#type: 0, + }; + + let attrs_buf = unsafe { request_attributes(&mut req, nlmsg_len) }; + let mut offset = 0; + offset += write_attr_bytes(attrs_buf, offset, NDA_DST, &dst_addr.octets())?; + offset += write_attr_bytes(attrs_buf, offset, NDA_LLADDR, &lladdr)?; + req.header.nlmsg_len += align_to(offset, NLA_ALIGNTO as usize) as u32; + + sock.execute(unsafe { &bytes_of(&req)[..req.header.nlmsg_len as usize] })?; + Ok(()) +} diff --git a/test/integration-test/src/tests/load.rs b/test/integration-test/src/tests/load.rs index 8ff890975..67eb7360f 100644 --- a/test/integration-test/src/tests/load.rs +++ b/test/integration-test/src/tests/load.rs @@ -21,11 +21,15 @@ use aya::{ }; use aya_obj::programs::XdpAttachType; +use crate::utils::NetNsGuard; + const MAX_RETRIES: usize = 100; pub(crate) const RETRY_DURATION: Duration = Duration::from_millis(10); #[test_log::test] fn long_name() { + let _netns = NetNsGuard::new(); + let mut bpf = Ebpf::load(crate::NAME_TEST).unwrap(); let name_prog: &mut Xdp = bpf .program_mut("ihaveaverylongname") @@ -33,7 +37,9 @@ fn long_name() { .try_into() .unwrap(); name_prog.load().unwrap(); - name_prog.attach("lo", XdpFlags::default()).unwrap(); + name_prog + .attach(NetNsGuard::IFACE, XdpFlags::default()) + .unwrap(); // We used to be able to assert with bpftool that the program name was short. // It seem though that it now uses the name from the ELF symbol table instead. @@ -263,10 +269,12 @@ impl_unload_program_ops!(FlowDissector, FlowDissectorLinkId, FlowDissectorLink); #[test_log::test] fn unload_xdp() { + let _netns = NetNsGuard::new(); + type P = Xdp; let program_name = "pass"; - let attach = |prog: &mut P| prog.attach("lo", XdpFlags::default()).unwrap(); + let attach = |prog: &mut P| prog.attach(NetNsGuard::IFACE, XdpFlags::default()).unwrap(); run_unload_program_test( crate::TEST, program_name, @@ -387,10 +395,12 @@ fn basic_flow_dissector() { #[test_log::test] fn pin_link() { + let _netns = NetNsGuard::new(); + type P = Xdp; let program_name = "pass"; - let attach = |prog: &mut P| prog.attach("lo", XdpFlags::default()).unwrap(); + let attach = |prog: &mut P| prog.attach(NetNsGuard::IFACE, XdpFlags::default()).unwrap(); let mut bpf = Ebpf::load(crate::TEST).unwrap(); let prog: &mut P = bpf.program_mut(program_name).unwrap().try_into().unwrap(); @@ -400,7 +410,7 @@ fn pin_link() { assert_loaded(program_name); let fd_link: FdLink = link.try_into().unwrap(); - let pinned = fd_link.pin("/sys/fs/bpf/aya-xdp-test-lo").unwrap(); + let pinned = fd_link.pin("/sys/fs/bpf/aya-xdp-test-veth0").unwrap(); // because of the pin, the program is still attached prog.unload().unwrap(); @@ -424,18 +434,17 @@ fn pin_tcx_link() { return; } - use crate::utils::NetNsGuard; let _netns = NetNsGuard::new(); let program_name = "tcx_next"; - let pin_path = "/sys/fs/bpf/aya-tcx-test-lo"; + let pin_path = "/sys/fs/bpf/aya-tcx-test-veth0"; let mut bpf = Ebpf::load(crate::TCX).unwrap(); let prog: &mut SchedClassifier = bpf.program_mut(program_name).unwrap().try_into().unwrap(); prog.load().unwrap(); let link_id = prog .attach_with_options( - "lo", + NetNsGuard::IFACE, TcAttachType::Ingress, TcAttachOptions::TcxOrder(LinkOrder::default()), ) @@ -493,12 +502,14 @@ impl_pin_program_ops!(UProbe); #[test_log::test] fn pin_lifecycle() { + let _netns = NetNsGuard::new(); + type P = Xdp; let program_name = "pass"; - let attach = |prog: &mut P| prog.attach("lo", XdpFlags::default()).unwrap(); + let attach = |prog: &mut P| prog.attach(NetNsGuard::IFACE, XdpFlags::default()).unwrap(); let program_pin = "/sys/fs/bpf/aya-xdp-test-prog"; - let link_pin = "/sys/fs/bpf/aya-xdp-test-lo"; + let link_pin = "/sys/fs/bpf/aya-xdp-test-veth0"; let from_pin = |program_pin: &str| P::from_pin(program_pin, XdpAttachType::Interface).unwrap(); let kernel_version = KernelVersion::current().unwrap(); diff --git a/test/integration-test/src/tests/smoke.rs b/test/integration-test/src/tests/smoke.rs index 8f6200c4f..d8cd1e4b0 100644 --- a/test/integration-test/src/tests/smoke.rs +++ b/test/integration-test/src/tests/smoke.rs @@ -16,7 +16,7 @@ fn modprobe() { // correctly within the test environment. let _netns = NetNsGuard::new(); - tc::qdisc_add_clsact("lo").unwrap(); + tc::qdisc_add_clsact(NetNsGuard::IFACE).unwrap(); } #[test_log::test] @@ -34,7 +34,9 @@ fn xdp() { let mut bpf = Ebpf::load(crate::PASS).unwrap(); let dispatcher: &mut Xdp = bpf.program_mut("pass").unwrap().try_into().unwrap(); dispatcher.load().unwrap(); - dispatcher.attach("lo", XdpFlags::default()).unwrap(); + dispatcher + .attach(NetNsGuard::IFACE, XdpFlags::default()) + .unwrap(); } #[test_log::test] @@ -72,7 +74,7 @@ fn extension() { let mut bpf = Ebpf::load(crate::MAIN).unwrap(); let pass: &mut Xdp = bpf.program_mut("xdp_pass").unwrap().try_into().unwrap(); pass.load().unwrap(); - pass.attach("lo", XdpFlags::default()).unwrap(); + pass.attach(NetNsGuard::IFACE, XdpFlags::default()).unwrap(); let mut bpf = EbpfLoader::new() .extension("xdp_drop") diff --git a/test/integration-test/src/tests/tcx.rs b/test/integration-test/src/tests/tcx.rs index 774a9b80a..6b185a3e3 100644 --- a/test/integration-test/src/tests/tcx.rs +++ b/test/integration-test/src/tests/tcx.rs @@ -38,7 +38,7 @@ fn tcx() { attach_program_with_link_order_inner!($program_name, $link_order); $program_name .attach_with_options( - "lo", + NetNsGuard::IFACE, TcAttachType::Ingress, TcAttachOptions::TcxOrder($link_order), ) @@ -48,7 +48,7 @@ fn tcx() { attach_program_with_link_order_inner!($program_name, $link_order); let $link_id_name = $program_name .attach_with_options( - "lo", + NetNsGuard::IFACE, TcAttachType::Ingress, TcAttachOptions::TcxOrder($link_order), ) @@ -92,7 +92,8 @@ fn tcx() { .map(|program| program.info().unwrap().id()) .collect::>(); - let (revision, got_order) = SchedClassifier::query_tcx("lo", TcAttachType::Ingress).unwrap(); + let (revision, got_order) = + SchedClassifier::query_tcx(NetNsGuard::IFACE, TcAttachType::Ingress).unwrap(); assert_eq!(revision, (expected_order.len() + 1) as u64); assert_eq!( got_order diff --git a/test/integration-test/src/tests/xdp.rs b/test/integration-test/src/tests/xdp.rs index 192538b91..9730c5598 100644 --- a/test/integration-test/src/tests/xdp.rs +++ b/test/integration-test/src/tests/xdp.rs @@ -4,13 +4,35 @@ use assert_matches::assert_matches; use aya::{ Ebpf, maps::{Array, CpuMap, XskMap}, - programs::{ProgramError, Xdp, XdpError, XdpFlags, xdp::XdpLinkId}, + programs::{ProgramError, Xdp, XdpError, XdpFlags}, util::KernelVersion, }; use object::{Object as _, ObjectSection as _, ObjectSymbol as _, SymbolSection}; use xdpilone::{BufIdx, IfInfo, Socket, SocketConfig, Umem, UmemConfig}; -use crate::utils::NetNsGuard; +use crate::utils::{NetNsGuard, PeerNsGuard}; + +// Sanity-check the veth + PeerNsGuard plumbing without any BPF programs. +#[test_log::test] +fn veth_connectivity() { + // peer must be declared after netns so it drops first (see PeerNsGuard docs). + let netns = NetNsGuard::new(); + let peer = PeerNsGuard::new(&netns); + + let sock = UdpSocket::bind(format!("{}:0", NetNsGuard::IFACE_ADDR)).unwrap(); + let addr = sock.local_addr().unwrap(); + sock.set_read_timeout(Some(Duration::from_secs(10))) + .unwrap(); + + peer.run(|| { + let sock = UdpSocket::bind(format!("{}:0", NetNsGuard::PEER_ADDR)).unwrap(); + sock.send_to(b"veth ok", addr).unwrap(); + }); + + let mut buf = [0u8; 16]; + let n = sock.recv(&mut buf).unwrap(); + assert_eq!(&buf[..n], b"veth ok"); +} #[test_log::test] #[expect( @@ -18,7 +40,9 @@ use crate::utils::NetNsGuard; reason = "packet headers are encoded in network byte order" )] fn af_xdp() { - let _netns = NetNsGuard::new(); + // peer must be declared after netns so it drops first (see PeerNsGuard docs). + let netns = NetNsGuard::new(); + let peer = PeerNsGuard::new(&netns); let mut bpf = Ebpf::load(crate::REDIRECT).unwrap(); let mut socks: XskMap<_> = bpf.take_map("SOCKS").unwrap().try_into().unwrap(); @@ -29,7 +53,7 @@ fn af_xdp() { .try_into() .unwrap(); xdp.load().unwrap(); - xdp.attach("lo", XdpFlags::default()).unwrap(); + xdp.attach(NetNsGuard::IFACE, XdpFlags::default()).unwrap(); const SIZE: usize = 2 * 4096; @@ -47,7 +71,7 @@ fn af_xdp() { }; let mut iface = IfInfo::invalid(); - iface.from_name(c"lo").unwrap(); + iface.from_name(c"veth0").unwrap(); let sock = match Socket::with_shared(&iface, &umem) { Ok(sock) => sock, Err(err) => { @@ -81,9 +105,13 @@ fn af_xdp() { writer.insert_once(frame1.offset); writer.commit(); - let sock = UdpSocket::bind("127.0.0.1:0").unwrap(); - let port = sock.local_addr().unwrap().port(); - sock.send_to(b"hello AF_XDP", "127.0.0.1:1777").unwrap(); + let dst = format!("{}:1777", NetNsGuard::IFACE_ADDR); + let port = peer.run(|| { + let sock = UdpSocket::bind(format!("{}:0", NetNsGuard::PEER_ADDR)).unwrap(); + let port = sock.local_addr().unwrap().port(); + sock.send_to(b"hello AF_XDP", &dst).unwrap(); + port + }); assert_eq!(rx.available(), 1); let desc = rx.receive(1).read().unwrap(); @@ -97,20 +125,26 @@ fn af_xdp() { assert_eq!(ip[9], 17); // UDP let (udp, payload) = buf.split_at(8); let ports = &udp[..4]; - let (src, dst) = ports.split_at(2); + let (src, dst_port) = ports.split_at(2); assert_eq!(src, port.to_be_bytes().as_slice()); // Source - assert_eq!(dst, 1777u16.to_be_bytes().as_slice()); // Dest + assert_eq!(dst_port, 1777u16.to_be_bytes().as_slice()); // Dest assert_eq!(payload, b"hello AF_XDP"); assert_eq!(rx.available(), 1); // Removes socket from map, no more packets will be redirected. socks.unset(0).unwrap(); assert_eq!(rx.available(), 1); - sock.send_to(b"hello AF_XDP", "127.0.0.1:1777").unwrap(); + peer.run(|| { + let sock = UdpSocket::bind(format!("{}:0", NetNsGuard::PEER_ADDR)).unwrap(); + sock.send_to(b"hello AF_XDP", &dst).unwrap(); + }); assert_eq!(rx.available(), 1); // Adds socket to map again, packets will be redirected again. socks.set(0, rx.as_raw_fd(), 0).unwrap(); - sock.send_to(b"hello AF_XDP", "127.0.0.1:1777").unwrap(); + peer.run(|| { + let sock = UdpSocket::bind(format!("{}:0", NetNsGuard::PEER_ADDR)).unwrap(); + sock.send_to(b"hello AF_XDP", &dst).unwrap(); + }); assert_eq!(rx.available(), 2); } @@ -182,34 +216,35 @@ fn cpumap_chain() { }; cpus.set(0, 2048, Some(xdp_chain_fd), 0).unwrap(); - // Load the main program + // Load the main program and attach to loopback (generic/SKB-mode XDP). + // We use loopback instead of veth because cpumap chaining does not + // reliably deliver packets through veth on arm64. let xdp: &mut Xdp = bpf.program_mut("redirect_cpu").unwrap().try_into().unwrap(); xdp.load().unwrap(); - let result = xdp.attach("lo", XdpFlags::default()); + // Generic devices did not support cpumap XDP programs until 5.15. - // // See https://github.com/torvalds/linux/commit/11941f8a85362f612df61f4aaab0e41b64d2111d. + let result = xdp.attach("lo", XdpFlags::default()); if KernelVersion::current().unwrap() < KernelVersion::new(5, 15, 0) { assert_matches!(result, Err(ProgramError::XdpError(XdpError::NetlinkError(err))) => { assert_eq!(err.raw_os_error(), Some(libc::EINVAL)) }); - eprintln!( - "skipping test - cpumap attachment not supported on generic (loopback) interface" - ); + eprintln!("skipping test - cpumap on generic XDP requires kernel >= 5.15"); return; } - let _unused: XdpLinkId = result.unwrap(); + result.unwrap(); const PAYLOAD: &str = "hello cpumap"; + // Send a UDP packet to ourselves over loopback so it hits our XDP program. let sock = UdpSocket::bind("127.0.0.1:0").unwrap(); let addr = sock.local_addr().unwrap(); sock.set_read_timeout(Some(Duration::from_secs(60))) .unwrap(); sock.send_to(PAYLOAD.as_bytes(), addr).unwrap(); - // Read back the packet to ensure it went through the entire network stack, including our two - // probes. + // Read back the packet to ensure it traversed the full XDP pipeline: + // redirect_cpu -> cpumap -> redirect_cpu_chain -> network stack. let mut buf = [0u8; PAYLOAD.len() + 1]; let n = sock.recv(&mut buf).unwrap(); diff --git a/test/integration-test/src/utils.rs b/test/integration-test/src/utils.rs index 04aa50413..b867ff09b 100644 --- a/test/integration-test/src/utils.rs +++ b/test/integration-test/src/utils.rs @@ -6,15 +6,18 @@ use std::{ ffi::CString, fs, io::{self, Write as _}, + net::Ipv4Addr, + os::fd::AsRawFd as _, path::Path, process, sync::atomic::{AtomicU64, Ordering}, }; use anyhow::{Context as _, Result}; -use aya::netlink_set_link_up; use libc::if_nametoindex; +use crate::netlink; + const CGROUP_ROOT: &str = "/sys/fs/cgroup"; const CGROUP_PROCS: &str = "cgroup.procs"; @@ -142,6 +145,14 @@ pub(crate) struct NetNsGuard { impl NetNsGuard { const PERSIST_DIR: &str = "/var/run/netns/"; + pub(crate) const IFACE: &str = "veth0"; + const PEER_IFACE: &str = "veth1"; + pub(crate) const IFACE_ADDR: &str = "10.0.0.1"; + pub(crate) const PEER_ADDR: &str = "10.0.0.2"; + + pub(crate) fn name(&self) -> &str { + &self.name + } #[expect( clippy::print_stdout, @@ -188,10 +199,43 @@ impl NetNsGuard { ns.name, io::Error::last_os_error() ); - netlink_set_link_up(idx as i32) + netlink::set_link_up(idx as i32) .unwrap_or_else(|e| panic!("failed to set `lo` up in netns {}: {e}", ns.name)); } + // Create a veth pair for tests that attach XDP/TC programs. Veth supports + // native XDP (unlike loopback which only supports SKB/generic mode), so tests + // exercise the same code path used in production. + netlink::create_veth_pair( + &CString::new(Self::IFACE).unwrap(), + &CString::new(Self::PEER_IFACE).unwrap(), + ) + .unwrap_or_else(|e| { + panic!( + "failed to create veth pair {}/{} in netns {}: {e}", + Self::IFACE, + Self::PEER_IFACE, + ns.name + ) + }); + + // Bring up both ends. + for iface in [Self::IFACE, Self::PEER_IFACE] { + let name = CString::new(iface).unwrap(); + unsafe { + let idx = if_nametoindex(name.as_ptr()); + assert!( + idx != 0, + "interface `{iface}` not found in netns {}: {}", + ns.name, + io::Error::last_os_error() + ); + netlink::set_link_up(idx as i32).unwrap_or_else(|e| { + panic!("failed to set `{iface}` up in netns {}: {e}", ns.name) + }); + } + } + ns } } @@ -231,6 +275,242 @@ impl Drop for NetNsGuard { } } +/// Run a closure inside a network namespace identified by a file handle. +/// +/// Uses a scoped thread because `setns` changes the network namespace of the +/// *calling* thread — running it on a disposable thread avoids polluting the +/// test thread's namespace. +fn run_in_netns(ns_file: &fs::File, f: F) -> R +where + F: FnOnce() -> R + Send, + R: Send, +{ + std::thread::scope(|s| { + s.spawn(|| { + nix::sched::setns(ns_file, nix::sched::CloneFlags::CLONE_NEWNET) + .expect("setns to target netns"); + f() + }) + .join() + .unwrap() + }) +} + +/// A second network namespace connected to the test namespace via the veth pair. +/// +/// Creates a topology where `veth0` (10.0.0.1) lives in the test namespace and +/// `veth1` (10.0.0.2) is moved into the peer namespace. Static ARP entries are +/// installed on both sides so that no ARP traffic interferes with XDP programs. +/// +/// Must be declared *after* `NetNsGuard` so that it is dropped first (Rust drops +/// locals in reverse declaration order). Dropping the peer namespace also destroys +/// its end of the veth pair. +pub(crate) struct PeerNsGuard { + name: String, +} + +impl PeerNsGuard { + pub(crate) fn new(netns: &NetNsGuard) -> Self { + let name = format!("{}-peer", netns.name()); + + // Create peer netns: create a persist file, then use a scoped thread + // to unshare(CLONE_NEWNET) and bind-mount the new ns over the file. + let ns_path = Path::new(NetNsGuard::PERSIST_DIR).join(&name); + let _unused: fs::File = fs::File::create(&ns_path) + .unwrap_or_else(|err| panic!("fs::File::create(\"{}\"): {err:?}", ns_path.display())); + std::thread::scope(|s| { + s.spawn(|| { + nix::sched::unshare(nix::sched::CloneFlags::CLONE_NEWNET) + .expect("unshare(CLONE_NEWNET) for peer ns"); + let new_ns_path = format!("/proc/self/task/{}/ns/net", nix::unistd::gettid()); + nix::mount::mount( + Some(new_ns_path.as_str()), + &ns_path, + Some("none"), + nix::mount::MsFlags::MS_BIND, + None::<&str>, + ) + .expect("mount-bind peer netns"); + }) + .join() + .unwrap(); + }); + + // Move veth1 into peer netns. + let peer_iface = CString::new(NetNsGuard::PEER_IFACE).unwrap(); + let peer_ns = fs::File::open(&ns_path) + .unwrap_or_else(|e| panic!("open(\"{}\"): {e}", ns_path.display())); + unsafe { + let idx = if_nametoindex(peer_iface.as_ptr()); + assert!( + idx != 0, + "interface `{}` not found: {}", + NetNsGuard::PEER_IFACE, + io::Error::last_os_error() + ); + netlink::set_link_ns(idx as i32, peer_ns.as_raw_fd()).unwrap_or_else(|e| { + panic!( + "failed to move `{}` to netns {name}: {e}", + NetNsGuard::PEER_IFACE + ) + }); + } + + // Assign IP to veth0 in test netns. + let iface = CString::new(NetNsGuard::IFACE).unwrap(); + let iface_addr: Ipv4Addr = NetNsGuard::IFACE_ADDR.parse().unwrap(); + let peer_addr: Ipv4Addr = NetNsGuard::PEER_ADDR.parse().unwrap(); + unsafe { + let idx = if_nametoindex(iface.as_ptr()); + assert!( + idx != 0, + "interface `{}` not found: {}", + NetNsGuard::IFACE, + io::Error::last_os_error() + ); + netlink::add_addr_v4(idx as i32, iface_addr, 24) + .unwrap_or_else(|e| panic!("failed to add addr to `{}`: {e}", NetNsGuard::IFACE)); + } + + // Configure veth1 in peer netns: add addr, set link up, set lo up, get MAC. + let veth1_mac = run_in_netns(&peer_ns, || { + let peer_iface = CString::new(NetNsGuard::PEER_IFACE).unwrap(); + let lo = CString::new("lo").unwrap(); + unsafe { + let idx = if_nametoindex(peer_iface.as_ptr()); + assert!( + idx != 0, + "interface `{}` not found in peer netns: {}", + NetNsGuard::PEER_IFACE, + io::Error::last_os_error() + ); + netlink::add_addr_v4(idx as i32, peer_addr, 24).unwrap_or_else(|e| { + panic!( + "failed to add addr to `{}` in peer netns: {e}", + NetNsGuard::PEER_IFACE + ) + }); + netlink::set_link_up(idx as i32).unwrap_or_else(|e| { + panic!( + "failed to set `{}` up in peer netns: {e}", + NetNsGuard::PEER_IFACE + ) + }); + + let lo_idx = if_nametoindex(lo.as_ptr()); + assert!( + lo_idx != 0, + "interface `lo` not found in peer netns: {}", + io::Error::last_os_error() + ); + netlink::set_link_up(lo_idx as i32) + .unwrap_or_else(|e| panic!("failed to set `lo` up in peer netns: {e}")); + + netlink::get_link_mac(idx as i32).unwrap_or_else(|e| { + panic!("failed to get MAC of `{}`: {e}", NetNsGuard::PEER_IFACE) + }) + } + }); + + // Read veth0 MAC in test netns. + let veth0_mac = unsafe { + let idx = if_nametoindex(iface.as_ptr()); + assert!( + idx != 0, + "interface `{}` not found: {}", + NetNsGuard::IFACE, + io::Error::last_os_error() + ); + netlink::get_link_mac(idx as i32) + .unwrap_or_else(|e| panic!("failed to get MAC of `{}`: {e}", NetNsGuard::IFACE)) + }; + + // Static ARP in test netns: peer IP -> veth1 MAC. + unsafe { + let idx = if_nametoindex(iface.as_ptr()); + netlink::add_neigh_v4(idx as i32, peer_addr, veth1_mac).unwrap_or_else(|e| { + panic!( + "failed to add neigh entry for {} on `{}`: {e}", + NetNsGuard::PEER_ADDR, + NetNsGuard::IFACE + ) + }); + } + + // Static ARP in peer netns: test IP -> veth0 MAC. + run_in_netns(&peer_ns, || { + let peer_iface = CString::new(NetNsGuard::PEER_IFACE).unwrap(); + unsafe { + let idx = if_nametoindex(peer_iface.as_ptr()); + assert!( + idx != 0, + "interface `{}` not found in peer netns: {}", + NetNsGuard::PEER_IFACE, + io::Error::last_os_error() + ); + netlink::add_neigh_v4(idx as i32, iface_addr, veth0_mac).unwrap_or_else(|e| { + panic!( + "failed to add neigh entry for {} on `{}`: {e}", + NetNsGuard::IFACE_ADDR, + NetNsGuard::PEER_IFACE + ) + }); + } + }); + + Self { name } + } + + /// Run a closure inside the peer network namespace. + /// + /// Uses a scoped thread because `setns` changes the network namespace of the + /// *calling* thread — running it on a disposable thread avoids polluting the + /// test thread's namespace. + pub(crate) fn run(&self, f: F) -> R + where + F: FnOnce() -> R + Send, + R: Send, + { + let peer_ns_path = Path::new(NetNsGuard::PERSIST_DIR).join(&self.name); + let peer_ns = fs::File::open(&peer_ns_path) + .unwrap_or_else(|e| panic!("open(\"{}\"): {e}", peer_ns_path.display())); + run_in_netns(&peer_ns, f) + } +} + +impl Drop for PeerNsGuard { + #[expect( + clippy::print_stderr, + reason = "drop handlers avoid panic-in-panic by logging errors" + )] + #[expect( + clippy::use_debug, + reason = "debug formatting preserves error context in drop" + )] + fn drop(&mut self) { + let Self { name } = self; + match (|| -> Result<()> { + let ns_path = Path::new(NetNsGuard::PERSIST_DIR).join(&name); + nix::mount::umount2(&ns_path, nix::mount::MntFlags::MNT_DETACH).with_context(|| { + format!("nix::mount::umount2(\"{}\", MNT_DETACH)", ns_path.display()) + })?; + fs::remove_file(&ns_path) + .with_context(|| format!("fs::remove_file(\"{}\")", ns_path.display()))?; + Ok(()) + })() { + Ok(()) => (), + Err(err) => { + // Avoid panic in panic. + if std::thread::panicking() { + eprintln!("{err:?}"); + } else { + panic!("{err:?}"); + } + } + } + } +} + /// If the `KernelVersion::current >= $version`, `assert!($cond)`, else `assert!(!$cond)`. macro_rules! kernel_assert { ($cond:expr, $version:expr $(,)?) => {