Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -2897,8 +2897,34 @@ union bpf_attr {
*
* * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
* **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
* Indicate the new IP header version after decapsulating the outer
* IP header. Used when the inner and outer IP versions are different.
* Indicate the new IP header version after decapsulating the
* outer IP header. Used when the inner and outer IP versions
* are different. These flags only trigger a protocol change
* without clearing any tunnel-specific GSO flags.
*
* * **BPF_F_ADJ_ROOM_DECAP_L4_GRE**:
* Clear GRE tunnel GSO flags (SKB_GSO_GRE and SKB_GSO_GRE_CSUM)
* when decapsulating a GRE tunnel.
*
* * **BPF_F_ADJ_ROOM_DECAP_L4_UDP**:
* Clear UDP tunnel GSO flags (SKB_GSO_UDP_TUNNEL and
* SKB_GSO_UDP_TUNNEL_CSUM) when decapsulating a UDP tunnel.
*
* * **BPF_F_ADJ_ROOM_DECAP_IPXIP4**:
* Clear IPIP/SIT tunnel GSO flag (SKB_GSO_IPXIP4) when decapsulating
* a tunnel with an outer IPv4 header (IPv4-in-IPv4 or IPv6-in-IPv4).
*
* * **BPF_F_ADJ_ROOM_DECAP_IPXIP6**:
* Clear IPv6 encapsulation tunnel GSO flag (SKB_GSO_IPXIP6) when
* decapsulating a tunnel with an outer IPv6 header (IPv6-in-IPv6
* or IPv4-in-IPv6).
*
* When using the decapsulation flags above, the skb->encapsulation
* flag is automatically cleared if all tunnel-specific GSO flags
* (SKB_GSO_UDP_TUNNEL, SKB_GSO_UDP_TUNNEL_CSUM, SKB_GSO_GRE,
* SKB_GSO_GRE_CSUM, SKB_GSO_IPXIP4, SKB_GSO_IPXIP6) have been
* removed from the packet. This handles cases where all tunnel
* layers have been decapsulated.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
Expand Down Expand Up @@ -6087,7 +6113,7 @@ enum {
};

/* BPF_FUNC_skb_adjust_room flags. */
enum {
enum bpf_adj_room_flags {
BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1),
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
Expand All @@ -6097,6 +6123,10 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7),
BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8),
BPF_F_ADJ_ROOM_DECAP_L4_GRE = (1ULL << 9),
BPF_F_ADJ_ROOM_DECAP_L4_UDP = (1ULL << 10),
BPF_F_ADJ_ROOM_DECAP_IPXIP4 = (1ULL << 11),
BPF_F_ADJ_ROOM_DECAP_IPXIP6 = (1ULL << 12),
};

enum {
Expand Down
119 changes: 102 additions & 17 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#include <net/sock_reuseport.h>
#include <net/busy_poll.h>
#include <net/tcp.h>
#include <net/gre.h>
#include <net/xfrm.h>
#include <net/udp.h>
#include <linux/bpf_trace.h>
Expand Down Expand Up @@ -3420,14 +3421,27 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
#define BPF_F_ADJ_ROOM_DECAP_L3_MASK (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
BPF_F_ADJ_ROOM_DECAP_L3_IPV6)

#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
#define BPF_F_ADJ_ROOM_DECAP_L4_MASK (BPF_F_ADJ_ROOM_DECAP_L4_UDP | \
BPF_F_ADJ_ROOM_DECAP_L4_GRE)

#define BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK (BPF_F_ADJ_ROOM_DECAP_IPXIP4 | \
BPF_F_ADJ_ROOM_DECAP_IPXIP6)

#define BPF_F_ADJ_ROOM_ENCAP_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
BPF_F_ADJ_ROOM_ENCAP_L2( \
BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
BPF_F_ADJ_ROOM_DECAP_L3_MASK)
BPF_ADJ_ROOM_ENCAP_L2_MASK))

#define BPF_F_ADJ_ROOM_DECAP_MASK (BPF_F_ADJ_ROOM_DECAP_L3_MASK | \
BPF_F_ADJ_ROOM_DECAP_L4_MASK | \
BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)

#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
BPF_F_ADJ_ROOM_ENCAP_MASK | \
BPF_F_ADJ_ROOM_DECAP_MASK | \
BPF_F_ADJ_ROOM_NO_CSUM_RESET)

static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
u64 flags)
Expand Down Expand Up @@ -3547,8 +3561,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
{
int ret;

if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
BPF_F_ADJ_ROOM_DECAP_L3_MASK |
if (unlikely(flags & ~(BPF_F_ADJ_ROOM_DECAP_MASK |
BPF_F_ADJ_ROOM_FIXED_GSO |
BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
return -EINVAL;

Expand Down Expand Up @@ -3582,9 +3596,48 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
skb_increase_gso_size(shinfo, len_diff);

/* Selective GSO flag clearing based on decap type.
* Only clear the flags for the tunnel layer being removed.
*/
if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) &&
(shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM)))
shinfo->gso_type &= ~(SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM);
if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE) &&
(shinfo->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM)))
shinfo->gso_type &= ~(SKB_GSO_GRE |
SKB_GSO_GRE_CSUM);
if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4) &&
(shinfo->gso_type & SKB_GSO_IPXIP4))
shinfo->gso_type &= ~SKB_GSO_IPXIP4;
if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6) &&
(shinfo->gso_type & SKB_GSO_IPXIP6))
shinfo->gso_type &= ~SKB_GSO_IPXIP6;

/* Clear encapsulation flag only when no tunnel GSO flags remain */
if (flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) {
if (!(shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_GRE |
SKB_GSO_GRE_CSUM |
SKB_GSO_IPXIP4 |
SKB_GSO_IPXIP6 |
SKB_GSO_ESP)))
if (skb->encapsulation)
skb->encapsulation = 0;
}

/* Header must be checked, and gso_segs recomputed. */
shinfo->gso_type |= SKB_GSO_DODGY;
shinfo->gso_segs = 0;
} else {
/* For non-GSO packets, clear encapsulation if decap flags are set */
if ((flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) &&
skb->encapsulation)
skb->encapsulation = 0;
}

return 0;
Expand Down Expand Up @@ -3644,8 +3697,7 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
u32 off;
int ret;

if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK |
BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
return -EINVAL;
if (unlikely(len_diff_abs > 0xfffU))
return -EFAULT;
Expand All @@ -3664,20 +3716,53 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
return -ENOTSUPP;
}

if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
if (flags & BPF_F_ADJ_ROOM_DECAP_MASK) {
u32 len_decap_min = 0;

if (!shrink)
return -EINVAL;

switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
case BPF_F_ADJ_ROOM_DECAP_L3_IPV4:
/* Reject mutually exclusive decap flag pairs. */
if ((flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) ==
BPF_F_ADJ_ROOM_DECAP_L3_MASK)
return -EINVAL;

if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_MASK) ==
BPF_F_ADJ_ROOM_DECAP_L4_MASK)
return -EINVAL;

if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK) ==
BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)
return -EINVAL;

/* Reject mutually exclusive decap tunnel type flags. */
if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_MASK) &&
(flags & BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK))
return -EINVAL;

if (flags & BPF_F_ADJ_ROOM_DECAP_L4_MASK)
len_decap_min += bpf_skb_net_base_len(skb);

if (flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP)
len_decap_min += sizeof(struct udphdr);

if (flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE)
len_decap_min += sizeof(struct gre_base_hdr);

if (flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4)
len_decap_min += sizeof(struct iphdr);

if (flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6)
len_decap_min += sizeof(struct ipv6hdr);

if (len_diff_abs < len_decap_min)
return -EINVAL;

if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
len_min = sizeof(struct iphdr);
break;
case BPF_F_ADJ_ROOM_DECAP_L3_IPV6:

if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
len_min = sizeof(struct ipv6hdr);
break;
default:
return -EINVAL;
}
}

len_cur = skb->len - skb_network_offset(skb);
Expand Down
36 changes: 33 additions & 3 deletions tools/include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -2897,8 +2897,34 @@ union bpf_attr {
*
* * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
* **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
* Indicate the new IP header version after decapsulating the outer
* IP header. Used when the inner and outer IP versions are different.
* Indicate the new IP header version after decapsulating the
* outer IP header. Used when the inner and outer IP versions
* are different. These flags only trigger a protocol change
* without clearing any tunnel-specific GSO flags.
*
* * **BPF_F_ADJ_ROOM_DECAP_L4_GRE**:
* Clear GRE tunnel GSO flags (SKB_GSO_GRE and SKB_GSO_GRE_CSUM)
* when decapsulating a GRE tunnel.
*
* * **BPF_F_ADJ_ROOM_DECAP_L4_UDP**:
* Clear UDP tunnel GSO flags (SKB_GSO_UDP_TUNNEL and
* SKB_GSO_UDP_TUNNEL_CSUM) when decapsulating a UDP tunnel.
*
* * **BPF_F_ADJ_ROOM_DECAP_IPXIP4**:
* Clear IPIP/SIT tunnel GSO flag (SKB_GSO_IPXIP4) when decapsulating
* a tunnel with an outer IPv4 header (IPv4-in-IPv4 or IPv6-in-IPv4).
*
* * **BPF_F_ADJ_ROOM_DECAP_IPXIP6**:
* Clear IPv6 encapsulation tunnel GSO flag (SKB_GSO_IPXIP6) when
* decapsulating a tunnel with an outer IPv6 header (IPv6-in-IPv6
* or IPv4-in-IPv6).
*
* When using the decapsulation flags above, the skb->encapsulation
* flag is automatically cleared if all tunnel-specific GSO flags
* (SKB_GSO_UDP_TUNNEL, SKB_GSO_UDP_TUNNEL_CSUM, SKB_GSO_GRE,
* SKB_GSO_GRE_CSUM, SKB_GSO_IPXIP4, SKB_GSO_IPXIP6) have been
* removed from the packet. This handles cases where all tunnel
* layers have been decapsulated.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
Expand Down Expand Up @@ -6087,7 +6113,7 @@ enum {
};

/* BPF_FUNC_skb_adjust_room flags. */
enum {
enum bpf_adj_room_flags {
BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1),
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
Expand All @@ -6097,6 +6123,10 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7),
BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8),
BPF_F_ADJ_ROOM_DECAP_L4_GRE = (1ULL << 9),
BPF_F_ADJ_ROOM_DECAP_L4_UDP = (1ULL << 10),
BPF_F_ADJ_ROOM_DECAP_IPXIP4 = (1ULL << 11),
BPF_F_ADJ_ROOM_DECAP_IPXIP6 = (1ULL << 12),
};

enum {
Expand Down
23 changes: 16 additions & 7 deletions tools/testing/selftests/bpf/progs/test_tc_tunnel.c
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,8 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}

static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto,
__u64 ipxip_flag)
{
__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
struct ipv6_opt_hdr ip6_opt_hdr;
Expand All @@ -607,28 +608,33 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)

switch (proto) {
case IPPROTO_IPIP:
flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4 |
ipxip_flag;
break;
case IPPROTO_IPV6:
flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6 |
ipxip_flag;
break;
case NEXTHDR_DEST:
if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
sizeof(ip6_opt_hdr)) < 0)
return TC_ACT_OK;
switch (ip6_opt_hdr.nexthdr) {
case IPPROTO_IPIP:
flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4 |
ipxip_flag;
break;
case IPPROTO_IPV6:
flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6 |
ipxip_flag;
break;
default:
return TC_ACT_OK;
}
break;
case IPPROTO_GRE:
olen += sizeof(struct gre_hdr);
flags |= BPF_F_ADJ_ROOM_DECAP_L4_GRE;
if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
return TC_ACT_OK;
switch (bpf_ntohs(greh.protocol)) {
Expand All @@ -642,6 +648,7 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
break;
case IPPROTO_UDP:
olen += sizeof(struct udphdr);
flags |= BPF_F_ADJ_ROOM_DECAP_L4_UDP;
if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
return TC_ACT_OK;
switch (bpf_ntohs(udph.dest)) {
Expand Down Expand Up @@ -678,7 +685,8 @@ static int decap_ipv4(struct __sk_buff *skb)
return TC_ACT_OK;

return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
iph_outer.protocol);
iph_outer.protocol,
BPF_F_ADJ_ROOM_DECAP_IPXIP4);
}

static int decap_ipv6(struct __sk_buff *skb)
Expand All @@ -690,7 +698,8 @@ static int decap_ipv6(struct __sk_buff *skb)
return TC_ACT_OK;

return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
iph_outer.nexthdr);
iph_outer.nexthdr,
BPF_F_ADJ_ROOM_DECAP_IPXIP6);
}

SEC("decap")
Expand Down
Loading