DPDK BPF

DPDK 自版本 18.05 已集成了 librte_bpf, 主要利用rte_eth_rx_burst/rte_eth_tx_burst 回调函数机制, 执行eBPF字节码. 当前支持以下特性:

base eBPF ISA (except tail-pointer)
JIT (x86_64 and arm64 only)
eBPF code verifier
user-defined helper functions (64-bit only)
RX/TX filter (加载 eBPF grog 作为 DPDK RX/TX 回调函数处理数据包, 单独跟每个RX/TX绑定)
rte_mbuf access (64-bit only)

不支持的功能特性:

cBPF
eBPF MAP
tail-pointer calls
external function calls for 32-bit platforms

DPDK BPF 执行流程

在这里插入图片描述

Fedora

sudo dnf install -y git gcc ncurses-devel elfutils-libelf-devel bc \openssl-devel libcap-devel clang llvm graphviz bison flex glibc-static

generate bpf prog

examples/bpf/t1.c 提供了一个处理原始数据报文的例子, 检测到匹配IP地址与UDP目的端口5000则丢弃:

/* SPDX-License-Identifier: BSD-3-Clause* Copyright(c) 2018 Intel Corporation*//** eBPF program sample.* Accepts pointer to first segment packet data as an input parameter.* analog of tcpdump -s 1 -d 'dst 1.2.3.4 && udp && dst port 5000'* (000) ldh      [12]* (001) jeq      #0x800           jt 2    jf 12* (002) ld       [30]* (003) jeq      #0x1020304       jt 4    jf 12* (004) ldb      [23]* (005) jeq      #0x11            jt 6    jf 12* (006) ldh      [20]* (007) jset     #0x1fff          jt 12   jf 8* (008) ldxb     4*([14]&0xf)* (009) ldh      [x + 16]* (010) jeq      #0x1388          jt 11   jf 12* (011) ret      #1* (012) ret      #0** To compile on x86:* clang -O2 -U __GNUC__ -target bpf -c t1.c** To compile on ARM:* clang -O2 -I/usr/include/aarch64-linux-gnu/ -target bpf -c t1.c*/#include <stdint.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <arpa/inet.h>uint64_t
entry(void *pkt)
{struct ether_header *ether_header = (void *)pkt;if (ether_header->ether_type != htons(0x0800))return 0;struct iphdr *iphdr = (void *)(ether_header + 1);if (iphdr->protocol != 17 || (iphdr->frag_off & 0x1ffff) != 0 ||iphdr->daddr != htonl(0x1020304))return 0;int hlen = iphdr->ihl * 4;struct udphdr *udphdr = (void *)iphdr + hlen;if (udphdr->dest != htons(5000))return 0;return 1;
}

编译bpf字节码:

# clang -O2 -U __GNUC__ -I${RTE_SDK}/${RTE_TARGET}/include -target bpf  -Wno-int-to-void-pointer-cast -c t1.c
# llvm-objdump  --arch=bpf -S t1.ot1.o:	file format elf64-bpfDisassembly of section .text:0000000000000000 <entry>:0:	b7 00 00 00 00 00 00 00	r0 = 01:	69 12 0c 00 00 00 00 00	r2 = *(u16 *)(r1 + 12)2:	55 02 0f 00 08 00 00 00	if r2 != 8 goto +15 <LBB0_6>3:	71 12 17 00 00 00 00 00	r2 = *(u8 *)(r1 + 23)4:	55 02 0d 00 11 00 00 00	if r2 != 17 goto +13 <LBB0_6>5:	69 12 14 00 00 00 00 00	r2 = *(u16 *)(r1 + 20)6:	55 02 0b 00 00 00 00 00	if r2 != 0 goto +11 <LBB0_6>7:	61 12 1e 00 00 00 00 00	r2 = *(u32 *)(r1 + 30)8:	55 02 09 00 01 02 03 04	if r2 != 67305985 goto +9 <LBB0_6>9:	07 01 00 00 0e 00 00 00	r1 += 1410:	71 12 00 00 00 00 00 00	r2 = *(u8 *)(r1 + 0)11:	67 02 00 00 02 00 00 00	r2 <<= 212:	57 02 00 00 3c 00 00 00	r2 &= 6013:	0f 21 00 00 00 00 00 00	r1 += r214:	69 11 02 00 00 00 00 00	r1 = *(u16 *)(r1 + 2)15:	b7 00 00 00 01 00 00 00	r0 = 116:	15 01 01 00 13 88 00 00	if r1 == 34835 goto +1 <LBB0_6>17:	b7 00 00 00 00 00 00 00	r0 = 00000000000000090 <LBB0_6>:18:	95 00 00 00 00 00 00 00	exit

load/unload bpf prog

testpmd 提供了一组bpf命令用于验证bpf功能:

testpmd> bpf-load rx|tx <portid> <queueid> <load-flags> <filename>
testpmd> bpf-unload rx|tx <portid> <queueid>

bpf with rte_mbuf*

bpf入参为 rte_mbuf *

bpf-load rx 0 0 M <path>/t3.o
...
bpf-load rx 0 n M <path>/t3.o

bpf with raw packet

bpf入参为原始报文数据

bpf-load rx 0 0 J <path>/t4.o
...
bpf-load rx 0 n J <path>/t4.o

bpf with vm

bpf入参为原始报文数据, 使用 bpf vm 执行字节码:

bpf-load rx 0 0 - <path>/t5.o
...
bpf-load rx 0 n - <path>/t5.o

unload bpf

bpf-unload rx 0 0
...
bpf-unload rx 0 n

Performance

硬件

CPU: Intel(R) Xeon(R) Platinum 9242 CPU @ 2.30GHz
Mellanox Technologies MT2892 Family [ConnectX-6 Dx]

dpdk 21.05 testpmd:

!/bin/sh#EAL_ARGS+=" --log-level="lib.eal":8 --log-level=pmd:8 --log-level="pmd.net.mlx5":3 "
NR_Q=18APP=./dpdk-testpmd-21.05
$APP -l 24-47 --socket-mem=4096,4096 -n 4  -w '54:00.1,dv_flow_en=1,mprq_en=1,rxqs_min_mprq=1,rx_vec_en=1' ${EAL_ARGS}  -- \-i  --rxq=${NR_Q} --txq=${NR_Q} --nb-cores=23 --forward-mode icmpecho --no-numa --enable-rx-cksum --auto-start --rxd=2048 --txd=2048 --burst=64

bpf prog

bpf 丢弃UDP目的端口为5000所有数据报文: t1.c 简化版, 移除了IP地址判断:

#include <stdint.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <arpa/inet.h>uint64_t
entry(void *pkt)
{struct ether_header *ether_header = (void *)pkt;if (ether_header->ether_type != htons(0x0800))return 1;struct iphdr *iphdr = (void *)(ether_header + 1);if (iphdr->protocol != 17)return 1;int hlen = iphdr->ihl * 4;struct udphdr *udphdr = (void *)iphdr + hlen;if (udphdr->dest != htons(5000))return 0;return 0;
}

编译:

clang -O2 -U __GNUC__ -I${RTE_SDK}/${RTE_TARGET}/include -target bpf  -Wno-int-to-void-pointer-cast -c t4.c

Load:

bpf-load rx 0 0 J <path>/t4.o
bpf-load rx 0 1 J <path>/t4.o
bpf-load rx 0 2 J <path>/t4.o
bpf-load rx 0 3 J <path>/t4.o
bpf-load rx 0 4 J <path>/t4.o
bpf-load rx 0 5 J <path>/t4.o
bpf-load rx 0 6 J <path>/t4.o
bpf-load rx 0 7 J <path>/t4.o
bpf-load rx 0 8 J <path>/t4.o
bpf-load rx 0 9 J <path>/t4.o
bpf-load rx 0 10 J <path>/t4.o
bpf-load rx 0 11 J <path>/t4.o
bpf-load rx 0 12 J <path>/t4.o
bpf-load rx 0 13 J <path>/t4.o
bpf-load rx 0 14 J <path>/t4.o
bpf-load rx 0 15 J <path>/t4.o
bpf-load rx 0 16 J <path>/t4.o
bpf-load rx 0 17 J <path>/t4.o

result

在当前测试硬件环境下, icmpecho 为 RX-DROP处理模式, 执行bpf字节码只做简单丢弃, 这种方式对性能几乎无影响, 可考虑用于插件处理数据包:

testpmd> show port stats all######################## NIC statistics for port 0  ########################RX-packets: 81360790320 RX-missed: 8141       RX-bytes:  4881647419320RX-errors: 0RX-nombuf:  0         TX-packets: 4          TX-errors: 0          TX-bytes:  360Throughput (since last show)Rx-pps:    149155140          Rx-bps:  71594467312Tx-pps:            0          Tx-bps:            0############################################################################bpf-unload rx 0 0
bpf-unload rx 0 1
bpf-unload rx 0 2
bpf-unload rx 0 3
bpf-unload rx 0 4
bpf-unload rx 0 5
bpf-unload rx 0 6
bpf-unload rx 0 7
bpf-unload rx 0 8
bpf-unload rx 0 9
bpf-unload rx 0 10
bpf-unload rx 0 11
bpf-unload rx 0 12
bpf-unload rx 0 13
bpf-unload rx 0 14
bpf-unload rx 0 15
bpf-unload rx 0 16
bpf-unload rx 0 17
testpmd> show port stats all######################## NIC statistics for port 0  ########################RX-packets: 60151600493 RX-missed: 8141       RX-bytes:  3609096029700RX-errors: 0RX-nombuf:  0         TX-packets: 4          TX-errors: 0          TX-bytes:  360Throughput (since last show)Rx-pps:    149159900          Rx-bps:  71596752112Tx-pps:            0          Tx-bps:            0############################################################################
testpmd>