10 May 2011 23:19
Re: [PATCH 2/2] datapath: Hash only the part of struct sw_flow_key populated by flow_extract().
On Mon, 2011-05-02 at 15:37 -0700, Jesse Gross wrote:
> I think what you had previously is actually along the right lines and
> we definitely need to include L4 headers in the length. The one
> difference is that once we start to parse a header group the length
> needs to be included even if it ultimately turns out to be invalid. A
> header group is something that is implied by a next header in the
> previous layer (i.e. IPv6 is implied by the EtherType).
>
> I think the header groups are:
> * L1/L2 (tun_id, in_port, dl_tci, dl_src, dl_dst)
> * IPv4 (nw_proto, nw_tos, ipv4.src, ipv4.dst)
> * IPv6 (nw_proto, nw_tos, ipv6.src, ipv6.dst)
> * ARP (nw_proto, nw_tos, arp.sha, arp.tha)
> * TCP/UDP/ICMP over IPv4 (ipv4.tp.src, ipv4.tp.dst)
> * TCP/UDP/ICMP over IPv6 (ipv6.tp.src, ipv6.tp.dst)
> * IPv6 ND (ipv6.nd_target, ipv6.nd_sll, ipv6.nd_tll)
>
> If we break it more finely than that, then we run into the problems
> with invalid packets partially matching flows. However, by breaking
> it at the layer boundary we know that flows can't have additional data
> farther on because that would imply that they have the same current
> layer headers as us, which are zeroed.
That makes sense to me. Ok, here is an incremental diff that should
return flow key lengths at the end of these boundaries, even when errors
occur:
diff --git a/datapath/flow.c b/datapath/flow.c
index 241822a..0f851d6 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
<at> <at> -101,7 +101,11 <at> <at> u64 flow_used_time(unsigned long flow_jiffies)
return cur_ms - idle_ms;
}
-static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
+#define SW_FLOW_KEY_OFFSET(field) \
+ offsetof(struct sw_flow_key, field) + \
+ FIELD_SIZEOF(struct sw_flow_key, field)
+
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, int *key_lenp)
{
unsigned int nh_ofs = skb_network_offset(skb);
unsigned int nh_len;
<at> <at> -109,6 +113,8 <at> <at> static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
struct ipv6hdr *nh;
uint8_t nexthdr;
+ *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.dst);
+
if (unlikely(skb->len < nh_ofs + sizeof(*nh)))
return -EINVAL;
<at> <at> -304,10 +310,6 <at> <at> static __be16 parse_ethertype(struct sk_buff *skb)
return llc->ethertype;
}
-#define SW_FLOW_KEY_OFFSET(field) \
- offsetof(struct sw_flow_key, field) + \
- FIELD_SIZEOF(struct sw_flow_key, field)
-
static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
int *key_lenp, int nh_len)
{
<at> <at> -343,6 +345,7 <at> <at> static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
nd = (struct nd_msg *)skb_transport_header(skb);
ipv6_addr_copy(&key->ipv6.nd_target, &nd->target);
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_tll);
icmp_len -= sizeof(*nd);
offset = 0;
<at> <at> -472,7 +475,7 <at> <at> int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
if (key->dl_type == htons(ETH_P_IP)) {
struct iphdr *nh;
- key_len = SW_FLOW_KEY_OFFSET(ipv4);
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.dst);
error = check_iphdr(skb);
if (unlikely(error)) {
<at> <at> -493,18 +496,21 <at> <at> int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
if (!(nh->frag_off & htons(IP_MF | IP_OFFSET)) &&
!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) {
if (key->nw_proto == IPPROTO_TCP) {
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv4.tp.src = tcp->source;
key->ipv4.tp.dst = tcp->dest;
}
} else if (key->nw_proto == IPPROTO_UDP) {
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv4.tp.src = udp->source;
key->ipv4.tp.dst = udp->dest;
}
} else if (key->nw_proto == IPPROTO_ICMP) {
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (icmphdr_ok(skb)) {
struct icmphdr *icmp = icmp_hdr(skb);
/* The ICMP type and code fields use the 16-bit
<at> <at> -540,6 +546,7 <at> <at> int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
memcpy(&key->ipv4.dst, arp->ar_tip, sizeof(key->ipv4.dst));
memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
}
}
} else if (key->dl_type == htons(ETH_P_IPV6)) {
<at> <at> -547,7 +554,7 <at> <at> int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
key_len = SW_FLOW_KEY_OFFSET(ipv6);
- nh_len = parse_ipv6hdr(skb, key);
+ nh_len = parse_ipv6hdr(skb, key, &key_len);
if (unlikely(nh_len < 0)) {
if (nh_len == -EINVAL)
skb->transport_header = skb->network_header;
<at> <at> -558,21 +565,23 <at> <at> int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
/* Transport layer. */
if (key->nw_proto == NEXTHDR_TCP) {
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.tp_dst);
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv6.tp_src = tcp->source;
key->ipv6.tp_dst = tcp->dest;
}
} else if (key->nw_proto == NEXTHDR_UDP) {
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.tp_dst);
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv6.tp_src = udp->source;
key->ipv6.tp_dst = udp->dest;
}
} else if (key->nw_proto == NEXTHDR_ICMP) {
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.tp_dst);
if (icmp6hdr_ok(skb)) {
- error = parse_icmpv6(skb, key, &key_len,
- nh_len);
+ error = parse_icmpv6(skb, key, &key_len, nh_len);
if (error < 0)
goto out;
}
<at> <at> -614,13 +623,14 <at> <at> int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
const struct nlattr *attr)
{
int error = 0;
- int key_len = SW_FLOW_KEY_OFFSET(nw_tos);
const struct nlattr *nla;
u16 prev_type;
int rem;
+ int key_len;
memset(swkey, 0, sizeof(*swkey));
swkey->dl_type = htons(ETH_P_802_2);
+ key_len = SW_FLOW_KEY_OFFSET(dl_type);
prev_type = ODP_KEY_ATTR_UNSPEC;
nla_for_each_nested(nla, attr, rem) {
<at> <at> -693,7 +703,7 <at> <at> int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
break;
case TRANSITION(ODP_KEY_ATTR_ETHERTYPE, ODP_KEY_ATTR_IPV4):
- key_len = SW_FLOW_KEY_OFFSET(ipv4);
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.dst);
if (swkey->dl_type != htons(ETH_P_IP))
goto invalid;
ipv4_key = nla_data(nla);
<at> <at> -721,6 +731,7 <at> <at> int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
break;
case TRANSITION(ODP_KEY_ATTR_IPV4, ODP_KEY_ATTR_TCP):
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (swkey->nw_proto != IPPROTO_TCP)
goto invalid;
tcp_key = nla_data(nla);
<at> <at> -729,6 +740,7 <at> <at> int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
break;
case TRANSITION(ODP_KEY_ATTR_IPV6, ODP_KEY_ATTR_TCP):
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.tp_dst);
if (swkey->nw_proto != IPPROTO_TCP)
goto invalid;
tcp_key = nla_data(nla);
<at> <at> -737,6 +749,7 <at> <at> int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
break;
case TRANSITION(ODP_KEY_ATTR_IPV4, ODP_KEY_ATTR_UDP):
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (swkey->nw_proto != IPPROTO_UDP)
goto invalid;
udp_key = nla_data(nla);
<at> <at> -745,6 +758,7 <at> <at> int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
break;
case TRANSITION(ODP_KEY_ATTR_IPV6, ODP_KEY_ATTR_UDP):
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.tp_dst);
if (swkey->nw_proto != IPPROTO_UDP)
goto invalid;
udp_key = nla_data(nla);
<at> <at> -753,6 +767,7 <at> <at> int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
break;
case TRANSITION(ODP_KEY_ATTR_IPV4, ODP_KEY_ATTR_ICMP):
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (swkey->nw_proto != IPPROTO_ICMP)
goto invalid;
icmp_key = nla_data(nla);
<at> <at> -761,6 +776,7 <at> <at> int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
break;
case TRANSITION(ODP_KEY_ATTR_IPV6, ODP_KEY_ATTR_ICMPV6):
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.tp_dst);
if (swkey->nw_proto != IPPROTO_ICMPV6)
goto invalid;
icmpv6_key = nla_data(nla);
<at> <at> -769,7 +785,7 <at> <at> int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
break;
case TRANSITION(ODP_KEY_ATTR_ETHERTYPE, ODP_KEY_ATTR_ARP):
- key_len = SW_FLOW_KEY_OFFSET(ipv4);
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
if (swkey->dl_type != htons(ETH_P_ARP))
goto invalid;
arp_key = nla_data(nla);
<at> <at> -783,6 +799,7 <at> <at> int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
break;
case TRANSITION(ODP_KEY_ATTR_ICMPV6, ODP_KEY_ATTR_ND):
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_tll);
if (swkey->ipv6.tp_src != htons(NDISC_NEIGHBOUR_SOLICITATION)
&& swkey->ipv6.tp_src != htons(NDISC_NEIGHBOUR_ADVERTISEMENT))
goto invalid;
RSS Feed