2
0
mirror of https://github.com/openvswitch/ovs synced 2025-09-01 06:45:17 +00:00

ofproto-dpif-upcall: Echo HASH attribute back to datapath.

The kernel datapath may sent upcall with hash info,
ovs-vswitchd should get it from upcall and then send
it back.

The reason is that:
| When using the kernel datapath, the upcall don't
| include skb hash info relatived. That will introduce
| some problem, because the hash of skb is important
| in kernel stack. For example, VXLAN module uses
| it to select UDP src port. The tx queue selection
| may also use the hash in stack.
|
| Hash is computed in different ways. Hash is random
| for a TCP socket, and hash may be computed in hardware,
| or software stack. Recalculation hash is not easy.
|
| There will be one upcall, without information of skb
| hash, to ovs-vswitchd, for the first packet of a TCP
| session. The rest packets will be processed in Open vSwitch
| modules, hash kept. If this tcp session is forward to
| VXLAN module, then the UDP src port of first tcp packet
| is different from rest packets.
|
| TCP packets may come from the host or dockers, to Open vSwitch.
| To fix it, we store the hash info to upcall, and restore hash
| when packets sent back.

Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
This commit is contained in:
Tonghao Zhang
2019-11-15 10:58:59 +08:00
committed by Ben Pfaff
parent df711aae1f
commit 0442bfb11d
4 changed files with 17 additions and 4 deletions

View File

@@ -210,6 +210,7 @@ enum ovs_packet_attr {
error logging should be suppressed. */
OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */
OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */
OVS_PACKET_ATTR_HASH, /* Packet hash. */
__OVS_PACKET_ATTR_MAX
};

View File

@@ -1777,6 +1777,10 @@ dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
if (d_exec->mtu) {
nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
}
if (d_exec->hash) {
nl_msg_put_u64(buf, OVS_PACKET_ATTR_HASH, d_exec->hash);
}
}
/* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
@@ -2461,7 +2465,8 @@ parse_odp_packet(const struct dpif_netlink *dpif, struct ofpbuf *buf,
[OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
[OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
[OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
[OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true }
[OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true },
[OVS_PACKET_ATTR_HASH] = { .type = NL_A_U64, .optional = true }
};
struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
@@ -2494,6 +2499,7 @@ parse_odp_packet(const struct dpif_netlink *dpif, struct ofpbuf *buf,
upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
upcall->mru = a[OVS_PACKET_ATTR_MRU];
upcall->hash = a[OVS_PACKET_ATTR_HASH];
/* Allow overwriting the netlink attribute header without reallocating. */
dp_packet_use_stub(&upcall->packet,

View File

@@ -712,6 +712,7 @@ struct dpif_execute {
bool probe; /* Suppress error messages. */
unsigned int mtu; /* Maximum transmission unit to fragment.
0 if not a fragmented packet */
uint64_t hash;
const struct flow *flow; /* Flow extracted from 'packet'. */
/* Input, but possibly modified as a side effect of execution. */
@@ -802,6 +803,7 @@ struct dpif_upcall {
size_t key_len; /* Length of 'key' in bytes. */
ovs_u128 ufid; /* Unique flow identifier for 'key'. */
struct nlattr *mru; /* Maximum receive unit. */
struct nlattr *hash; /* Packet hash. */
struct nlattr *cutlen; /* Number of bytes shrink from the end. */
/* DPIF_UC_ACTION only. */

View File

@@ -217,6 +217,7 @@ struct upcall {
ofp_port_t ofp_in_port; /* OpenFlow in port, or OFPP_NONE. */
uint16_t mru; /* If !0, Maximum receive unit of
fragmented IP packet */
uint64_t hash;
enum upcall_type type; /* Type of the upcall. */
const struct nlattr *actions; /* Flow actions in DPIF_UC_ACTION Upcalls. */
@@ -784,7 +785,7 @@ recv_upcalls(struct handler *handler)
struct dpif_upcall *dupcall = &dupcalls[n_upcalls];
struct upcall *upcall = &upcalls[n_upcalls];
struct flow *flow = &flows[n_upcalls];
unsigned int mru;
unsigned int mru = 0;
int error;
ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls],
@@ -802,8 +803,10 @@ recv_upcalls(struct handler *handler)
if (dupcall->mru) {
mru = nl_attr_get_u16(dupcall->mru);
} else {
mru = 0;
}
if (dupcall->hash) {
upcall->hash = nl_attr_get_u64(dupcall->hash);
}
error = upcall_receive(upcall, udpif->backer, &dupcall->packet,
@@ -1600,6 +1603,7 @@ handle_upcalls(struct udpif *udpif, struct upcall *upcalls,
op->dop.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) != 0;
op->dop.execute.probe = false;
op->dop.execute.mtu = upcall->mru;
op->dop.execute.hash = upcall->hash;
}
}