DPDK BPF
DPDK 自版本 18.05 已集成了 librte_bpf
, 主要利用rte_eth_rx_burst/rte_eth_tx_burst
回调函数机制, 执行eBPF字节码. 当前支持以下特性:
- base eBPF ISA (except tail-pointer)
- JIT (x86_64 and arm64 only)
- eBPF code verifier
- user-defined helper functions (64-bit only)
- RX/TX filter (加载 eBPF grog 作为 DPDK RX/TX 回调函数处理数据包, 单独跟每个RX/TX绑定)
- rte_mbuf access (64-bit only)
不支持的功能特性:
- cBPF
- eBPF MAP
- tail-pointer calls
- external function calls for 32-bit platforms
DPDK BPF 执行流程
Fedora
sudo dnf install -y git gcc ncurses-devel elfutils-libelf-devel bc \openssl-devel libcap-devel clang llvm graphviz bison flex glibc-static
generate bpf prog
examples/bpf/t1.c
提供了一个处理原始数据报文的例子, 检测到匹配IP地址与UDP目的端口5000则丢弃:
/* SPDX-License-Identifier: BSD-3-Clause* Copyright(c) 2018 Intel Corporation*//** eBPF program sample.* Accepts pointer to first segment packet data as an input parameter.* analog of tcpdump -s 1 -d 'dst 1.2.3.4 && udp && dst port 5000'* (000) ldh [12]* (001) jeq #0x800 jt 2 jf 12* (002) ld [30]* (003) jeq #0x1020304 jt 4 jf 12* (004) ldb [23]* (005) jeq #0x11 jt 6 jf 12* (006) ldh [20]* (007) jset #0x1fff jt 12 jf 8* (008) ldxb 4*([14]&0xf)* (009) ldh [x + 16]* (010) jeq #0x1388 jt 11 jf 12* (011) ret #1* (012) ret #0** To compile on x86:* clang -O2 -U __GNUC__ -target bpf -c t1.c** To compile on ARM:* clang -O2 -I/usr/include/aarch64-linux-gnu/ -target bpf -c t1.c*/#include <stdint.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <arpa/inet.h>uint64_t
entry(void *pkt)
{struct ether_header *ether_header = (void *)pkt;if (ether_header->ether_type != htons(0x0800))return 0;struct iphdr *iphdr = (void *)(ether_header + 1);if (iphdr->protocol != 17 || (iphdr->frag_off & 0x1ffff) != 0 ||iphdr->daddr != htonl(0x1020304))return 0;int hlen = iphdr->ihl * 4;struct udphdr *udphdr = (void *)iphdr + hlen;if (udphdr->dest != htons(5000))return 0;return 1;
}
编译bpf字节码:
# clang -O2 -U __GNUC__ -I${RTE_SDK}/${RTE_TARGET}/include -target bpf -Wno-int-to-void-pointer-cast -c t1.c
# llvm-objdump --arch=bpf -S t1.ot1.o: file format elf64-bpfDisassembly of section .text:0000000000000000 <entry>:0: b7 00 00 00 00 00 00 00 r0 = 01: 69 12 0c 00 00 00 00 00 r2 = *(u16 *)(r1 + 12)2: 55 02 0f 00 08 00 00 00 if r2 != 8 goto +15 <LBB0_6>3: 71 12 17 00 00 00 00 00 r2 = *(u8 *)(r1 + 23)4: 55 02 0d 00 11 00 00 00 if r2 != 17 goto +13 <LBB0_6>5: 69 12 14 00 00 00 00 00 r2 = *(u16 *)(r1 + 20)6: 55 02 0b 00 00 00 00 00 if r2 != 0 goto +11 <LBB0_6>7: 61 12 1e 00 00 00 00 00 r2 = *(u32 *)(r1 + 30)8: 55 02 09 00 01 02 03 04 if r2 != 67305985 goto +9 <LBB0_6>9: 07 01 00 00 0e 00 00 00 r1 += 1410: 71 12 00 00 00 00 00 00 r2 = *(u8 *)(r1 + 0)11: 67 02 00 00 02 00 00 00 r2 <<= 212: 57 02 00 00 3c 00 00 00 r2 &= 6013: 0f 21 00 00 00 00 00 00 r1 += r214: 69 11 02 00 00 00 00 00 r1 = *(u16 *)(r1 + 2)15: b7 00 00 00 01 00 00 00 r0 = 116: 15 01 01 00 13 88 00 00 if r1 == 34835 goto +1 <LBB0_6>17: b7 00 00 00 00 00 00 00 r0 = 00000000000000090 <LBB0_6>:18: 95 00 00 00 00 00 00 00 exit
load/unload bpf prog
testpmd 提供了一组bpf命令用于验证bpf功能:
testpmd> bpf-load rx|tx <portid> <queueid> <load-flags> <filename>
testpmd> bpf-unload rx|tx <portid> <queueid>
bpf with rte_mbuf*
bpf入参为 rte_mbuf *
bpf-load rx 0 0 M <path>/t3.o
...
bpf-load rx 0 n M <path>/t3.o
bpf with raw packet
bpf入参为原始报文数据
bpf-load rx 0 0 J <path>/t4.o
...
bpf-load rx 0 n J <path>/t4.o
bpf with vm
bpf入参为原始报文数据, 使用 bpf vm 执行字节码:
bpf-load rx 0 0 - <path>/t5.o
...
bpf-load rx 0 n - <path>/t5.o
unload bpf
bpf-unload rx 0 0
...
bpf-unload rx 0 n
Performance
硬件
CPU: Intel(R) Xeon(R) Platinum 9242 CPU @ 2.30GHz
Mellanox Technologies MT2892 Family [ConnectX-6 Dx]
dpdk 21.05 testpmd:
!/bin/sh#EAL_ARGS+=" --log-level="lib.eal":8 --log-level=pmd:8 --log-level="pmd.net.mlx5":3 "
NR_Q=18APP=./dpdk-testpmd-21.05
$APP -l 24-47 --socket-mem=4096,4096 -n 4 -w '54:00.1,dv_flow_en=1,mprq_en=1,rxqs_min_mprq=1,rx_vec_en=1' ${EAL_ARGS} -- \-i --rxq=${NR_Q} --txq=${NR_Q} --nb-cores=23 --forward-mode icmpecho --no-numa --enable-rx-cksum --auto-start --rxd=2048 --txd=2048 --burst=64
bpf prog
bpf 丢弃UDP目的端口为5000所有数据报文: t1.c 简化版, 移除了IP地址判断:
#include <stdint.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <arpa/inet.h>uint64_t
entry(void *pkt)
{struct ether_header *ether_header = (void *)pkt;if (ether_header->ether_type != htons(0x0800))return 1;struct iphdr *iphdr = (void *)(ether_header + 1);if (iphdr->protocol != 17)return 1;int hlen = iphdr->ihl * 4;struct udphdr *udphdr = (void *)iphdr + hlen;if (udphdr->dest != htons(5000))return 0;return 0;
}
编译:
clang -O2 -U __GNUC__ -I${RTE_SDK}/${RTE_TARGET}/include -target bpf -Wno-int-to-void-pointer-cast -c t4.c
Load:
bpf-load rx 0 0 J <path>/t4.o
bpf-load rx 0 1 J <path>/t4.o
bpf-load rx 0 2 J <path>/t4.o
bpf-load rx 0 3 J <path>/t4.o
bpf-load rx 0 4 J <path>/t4.o
bpf-load rx 0 5 J <path>/t4.o
bpf-load rx 0 6 J <path>/t4.o
bpf-load rx 0 7 J <path>/t4.o
bpf-load rx 0 8 J <path>/t4.o
bpf-load rx 0 9 J <path>/t4.o
bpf-load rx 0 10 J <path>/t4.o
bpf-load rx 0 11 J <path>/t4.o
bpf-load rx 0 12 J <path>/t4.o
bpf-load rx 0 13 J <path>/t4.o
bpf-load rx 0 14 J <path>/t4.o
bpf-load rx 0 15 J <path>/t4.o
bpf-load rx 0 16 J <path>/t4.o
bpf-load rx 0 17 J <path>/t4.o
result
在当前测试硬件环境下, icmpecho
为 RX-DROP
处理模式, 执行bpf字节码只做简单丢弃, 这种方式对性能几乎无影响, 可考虑用于插件处理数据包:
testpmd> show port stats all######################## NIC statistics for port 0 ########################RX-packets: 81360790320 RX-missed: 8141 RX-bytes: 4881647419320RX-errors: 0RX-nombuf: 0 TX-packets: 4 TX-errors: 0 TX-bytes: 360Throughput (since last show)Rx-pps: 149155140 Rx-bps: 71594467312Tx-pps: 0 Tx-bps: 0############################################################################bpf-unload rx 0 0
bpf-unload rx 0 1
bpf-unload rx 0 2
bpf-unload rx 0 3
bpf-unload rx 0 4
bpf-unload rx 0 5
bpf-unload rx 0 6
bpf-unload rx 0 7
bpf-unload rx 0 8
bpf-unload rx 0 9
bpf-unload rx 0 10
bpf-unload rx 0 11
bpf-unload rx 0 12
bpf-unload rx 0 13
bpf-unload rx 0 14
bpf-unload rx 0 15
bpf-unload rx 0 16
bpf-unload rx 0 17
testpmd> show port stats all######################## NIC statistics for port 0 ########################RX-packets: 60151600493 RX-missed: 8141 RX-bytes: 3609096029700RX-errors: 0RX-nombuf: 0 TX-packets: 4 TX-errors: 0 TX-bytes: 360Throughput (since last show)Rx-pps: 149159900 Rx-bps: 71596752112Tx-pps: 0 Tx-bps: 0############################################################################
testpmd>
Reference
eBPF spec
DPDK- Berkeley Packet Filter Library
Awesome eBPF
Cilium - BPF and XDP Reference Guide