265 lines
7.0 KiB
C
265 lines
7.0 KiB
C
|
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||
|
|
||
|
/*
|
||
|
* Test suite of lwt BPF programs that reroutes packets
|
||
|
* The file tests focus not only if these programs work as expected normally,
|
||
|
* but also if they can handle abnormal situations gracefully. This test
|
||
|
* suite currently only covers lwt_xmit hook. lwt_in tests have not been
|
||
|
* implemented.
|
||
|
*
|
||
|
* WARNING
|
||
|
* -------
|
||
|
* This test suite can crash the kernel, thus should be run in a VM.
|
||
|
*
|
||
|
* Setup:
|
||
|
* ---------
|
||
|
* all tests are performed in a single netns. A lwt encap route is setup for
|
||
|
* each subtest:
|
||
|
*
|
||
|
* ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err
|
||
|
*
|
||
|
* Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains
|
||
|
* a single test program entry. This program sets packet mark by last byte of
|
||
|
* the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb
|
||
|
* mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped
|
||
|
* to avoid route loop. We didn't use generated BPF skeleton since the
|
||
|
* attachment for lwt programs are not supported by libbpf yet.
|
||
|
*
|
||
|
* The test program will bring up a tun device, and sets up the following
|
||
|
* routes:
|
||
|
*
|
||
|
* ip rule add pref 100 from all fwmark <tun_index> lookup 100
|
||
|
* ip route add table 100 default dev tun0
|
||
|
*
|
||
|
* For normal testing, a ping command is running in the test netns:
|
||
|
*
|
||
|
* ping 10.0.0.<tun_index> -c 1 -w 1 -s 100
|
||
|
*
|
||
|
* For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP
|
||
|
* socket will try to overflow the fq queue and trigger qdisc drop error.
|
||
|
*
|
||
|
* Scenarios:
|
||
|
* --------------------------------
|
||
|
* 1. Reroute to a running tun device
|
||
|
* 2. Reroute to a device where qdisc drop
|
||
|
*
|
||
|
* For case 1, ping packets should be received by the tun device.
|
||
|
*
|
||
|
* For case 2, force UDP packets to overflow fq limit. As long as kernel
|
||
|
* is not crashed, it is considered successful.
|
||
|
*/
|
||
|
#define NETNS "ns_lwt_reroute"
|
||
|
#include <netinet/in.h>
|
||
|
#include "lwt_helpers.h"
|
||
|
#include "network_helpers.h"
|
||
|
#include <linux/net_tstamp.h>
|
||
|
|
||
|
#define BPF_OBJECT "test_lwt_reroute.bpf.o"
|
||
|
#define LOCAL_SRC "10.0.0.1"
|
||
|
#define TEST_CIDR "10.0.0.0/24"
|
||
|
#define XMIT_HOOK "xmit"
|
||
|
#define XMIT_SECTION "lwt_xmit"
|
||
|
#define NSEC_PER_SEC 1000000000ULL
|
||
|
|
||
|
/* send a ping to be rerouted to the target device */
|
||
|
static void ping_once(const char *ip)
|
||
|
{
|
||
|
/* We won't get a reply. Don't fail here */
|
||
|
SYS_NOFAIL("ping %s -c1 -W1 -s %d",
|
||
|
ip, ICMP_PAYLOAD_SIZE);
|
||
|
}
|
||
|
|
||
|
/* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop
|
||
|
* error. This is done via TX tstamp to force buffering delayed packets.
|
||
|
*/
|
||
|
static int overflow_fq(int snd_target, const char *target_ip)
|
||
|
{
|
||
|
struct sockaddr_in addr = {
|
||
|
.sin_family = AF_INET,
|
||
|
.sin_port = htons(1234),
|
||
|
};
|
||
|
|
||
|
char data_buf[8]; /* only #pkts matter, so use a random small buffer */
|
||
|
char control_buf[CMSG_SPACE(sizeof(uint64_t))];
|
||
|
struct iovec iov = {
|
||
|
.iov_base = data_buf,
|
||
|
.iov_len = sizeof(data_buf),
|
||
|
};
|
||
|
int err = -1;
|
||
|
int s = -1;
|
||
|
struct sock_txtime txtime_on = {
|
||
|
.clockid = CLOCK_MONOTONIC,
|
||
|
.flags = 0,
|
||
|
};
|
||
|
struct msghdr msg = {
|
||
|
.msg_name = &addr,
|
||
|
.msg_namelen = sizeof(addr),
|
||
|
.msg_control = control_buf,
|
||
|
.msg_controllen = sizeof(control_buf),
|
||
|
.msg_iovlen = 1,
|
||
|
.msg_iov = &iov,
|
||
|
};
|
||
|
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
|
||
|
|
||
|
memset(data_buf, 0, sizeof(data_buf));
|
||
|
|
||
|
s = socket(AF_INET, SOCK_DGRAM, 0);
|
||
|
if (!ASSERT_GE(s, 0, "socket"))
|
||
|
goto out;
|
||
|
|
||
|
err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on));
|
||
|
if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)"))
|
||
|
goto out;
|
||
|
|
||
|
err = inet_pton(AF_INET, target_ip, &addr.sin_addr);
|
||
|
if (!ASSERT_EQ(err, 1, "inet_pton"))
|
||
|
goto out;
|
||
|
|
||
|
while (snd_target > 0) {
|
||
|
struct timespec now;
|
||
|
|
||
|
memset(control_buf, 0, sizeof(control_buf));
|
||
|
cmsg->cmsg_type = SCM_TXTIME;
|
||
|
cmsg->cmsg_level = SOL_SOCKET;
|
||
|
cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t));
|
||
|
|
||
|
err = clock_gettime(CLOCK_MONOTONIC, &now);
|
||
|
if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) {
|
||
|
err = -1;
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
*(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC +
|
||
|
now.tv_nsec;
|
||
|
|
||
|
/* we will intentionally send more than fq limit, so ignore
|
||
|
* the error here.
|
||
|
*/
|
||
|
sendmsg(s, &msg, MSG_NOSIGNAL);
|
||
|
snd_target--;
|
||
|
}
|
||
|
|
||
|
/* no kernel crash so far is considered success */
|
||
|
err = 0;
|
||
|
|
||
|
out:
|
||
|
if (s >= 0)
|
||
|
close(s);
|
||
|
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
static int setup(const char *tun_dev)
|
||
|
{
|
||
|
int target_index = -1;
|
||
|
int tap_fd = -1;
|
||
|
|
||
|
tap_fd = open_tuntap(tun_dev, false);
|
||
|
if (!ASSERT_GE(tap_fd, 0, "open_tun"))
|
||
|
return -1;
|
||
|
|
||
|
target_index = if_nametoindex(tun_dev);
|
||
|
if (!ASSERT_GE(target_index, 0, "if_nametoindex"))
|
||
|
return -1;
|
||
|
|
||
|
SYS(fail, "ip link add link_err type dummy");
|
||
|
SYS(fail, "ip link set lo up");
|
||
|
SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32");
|
||
|
SYS(fail, "ip link set link_err up");
|
||
|
SYS(fail, "ip link set %s up", tun_dev);
|
||
|
|
||
|
SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit",
|
||
|
TEST_CIDR, BPF_OBJECT);
|
||
|
|
||
|
SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100",
|
||
|
target_index);
|
||
|
SYS(fail, "ip route add t 100 default dev %s", tun_dev);
|
||
|
|
||
|
return tap_fd;
|
||
|
|
||
|
fail:
|
||
|
if (tap_fd >= 0)
|
||
|
close(tap_fd);
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
static void test_lwt_reroute_normal_xmit(void)
|
||
|
{
|
||
|
const char *tun_dev = "tun0";
|
||
|
int tun_fd = -1;
|
||
|
int ifindex = -1;
|
||
|
char ip[256];
|
||
|
struct timeval timeo = {
|
||
|
.tv_sec = 0,
|
||
|
.tv_usec = 250000,
|
||
|
};
|
||
|
|
||
|
tun_fd = setup(tun_dev);
|
||
|
if (!ASSERT_GE(tun_fd, 0, "setup_reroute"))
|
||
|
return;
|
||
|
|
||
|
ifindex = if_nametoindex(tun_dev);
|
||
|
if (!ASSERT_GE(ifindex, 0, "if_nametoindex"))
|
||
|
return;
|
||
|
|
||
|
snprintf(ip, 256, "10.0.0.%d", ifindex);
|
||
|
|
||
|
/* ping packets should be received by the tun device */
|
||
|
ping_once(ip);
|
||
|
|
||
|
if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1,
|
||
|
"wait_for_packet"))
|
||
|
log_err("%s xmit", __func__);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Test the failure case when the skb is dropped at the qdisc. This is a
|
||
|
* regression prevention at the xmit hook only.
|
||
|
*/
|
||
|
static void test_lwt_reroute_qdisc_dropped(void)
|
||
|
{
|
||
|
const char *tun_dev = "tun0";
|
||
|
int tun_fd = -1;
|
||
|
int ifindex = -1;
|
||
|
char ip[256];
|
||
|
|
||
|
tun_fd = setup(tun_dev);
|
||
|
if (!ASSERT_GE(tun_fd, 0, "setup_reroute"))
|
||
|
goto fail;
|
||
|
|
||
|
SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev);
|
||
|
|
||
|
ifindex = if_nametoindex(tun_dev);
|
||
|
if (!ASSERT_GE(ifindex, 0, "if_nametoindex"))
|
||
|
return;
|
||
|
|
||
|
snprintf(ip, 256, "10.0.0.%d", ifindex);
|
||
|
ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq");
|
||
|
|
||
|
fail:
|
||
|
if (tun_fd >= 0)
|
||
|
close(tun_fd);
|
||
|
}
|
||
|
|
||
|
static void *test_lwt_reroute_run(void *arg)
|
||
|
{
|
||
|
netns_delete();
|
||
|
RUN_TEST(lwt_reroute_normal_xmit);
|
||
|
RUN_TEST(lwt_reroute_qdisc_dropped);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
void test_lwt_reroute(void)
|
||
|
{
|
||
|
pthread_t test_thread;
|
||
|
int err;
|
||
|
|
||
|
/* Run the tests in their own thread to isolate the namespace changes
|
||
|
* so they do not affect the environment of other tests.
|
||
|
* (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
|
||
|
*/
|
||
|
err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL);
|
||
|
if (ASSERT_OK(err, "pthread_create"))
|
||
|
ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
|
||
|
}
|