mirror of
https://github.com/openvswitch/ovs
synced 2025-08-30 22:05:19 +00:00
dpif-linux: Prevent a single port from monopolizing upcalls.
Currently it is possible for a client on a single port to generate a huge number of packets that miss in the kernel flow table and monopolize the userspace/kernel communication path. This effectively DoS's the machine because no new flow setups can take place. This adds some additional fairness by separating each upcall type for each object in the datapath onto a separate socket, each with its own queue. Userspace then reads round-robin from each socket so other flow setups can still succeed. Since the number of objects can potentially be large, we don't always have a unique socket for each. Instead, we create 16 sockets and spread the load around them in a round robin fashion. It's theoretically possible to do better than this with some kind of active load balancing scheme but this seems like a good place to start. Feature #6485
This commit is contained in:
3
NEWS
3
NEWS
@@ -15,6 +15,9 @@ Post-v1.2.0
|
|||||||
- CAPWAP tunneling now supports an extension to transport a 64-key. By
|
- CAPWAP tunneling now supports an extension to transport a 64-key. By
|
||||||
default it remains compatible with the old version and other
|
default it remains compatible with the old version and other
|
||||||
standards-based implementations.
|
standards-based implementations.
|
||||||
|
- Flow setups are now processed in a round-robin manner across ports
|
||||||
|
to prevent any single client from monopolizing the CPU and conducting
|
||||||
|
a denial of service attack.
|
||||||
|
|
||||||
v1.2.0 - 03 Aug 2011
|
v1.2.0 - 03 Aug 2011
|
||||||
------------------------
|
------------------------
|
||||||
|
199
lib/dpif-linux.c
199
lib/dpif-linux.c
@@ -48,6 +48,7 @@
|
|||||||
#include "openvswitch/tunnel.h"
|
#include "openvswitch/tunnel.h"
|
||||||
#include "packets.h"
|
#include "packets.h"
|
||||||
#include "poll-loop.h"
|
#include "poll-loop.h"
|
||||||
|
#include "random.h"
|
||||||
#include "shash.h"
|
#include "shash.h"
|
||||||
#include "sset.h"
|
#include "sset.h"
|
||||||
#include "unaligned.h"
|
#include "unaligned.h"
|
||||||
@@ -60,6 +61,9 @@ enum { LRU_MAX_PORTS = 1024 };
|
|||||||
enum { LRU_MASK = LRU_MAX_PORTS - 1};
|
enum { LRU_MASK = LRU_MAX_PORTS - 1};
|
||||||
BUILD_ASSERT_DECL(IS_POW2(LRU_MAX_PORTS));
|
BUILD_ASSERT_DECL(IS_POW2(LRU_MAX_PORTS));
|
||||||
|
|
||||||
|
enum { N_UPCALL_SOCKS = 16 };
|
||||||
|
BUILD_ASSERT_DECL(IS_POW2(N_UPCALL_SOCKS));
|
||||||
|
|
||||||
/* This ethtool flag was introduced in Linux 2.6.24, so it might be
|
/* This ethtool flag was introduced in Linux 2.6.24, so it might be
|
||||||
* missing if we have old headers. */
|
* missing if we have old headers. */
|
||||||
#define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
|
#define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
|
||||||
@@ -133,7 +137,8 @@ struct dpif_linux {
|
|||||||
int dp_ifindex;
|
int dp_ifindex;
|
||||||
|
|
||||||
/* Upcall messages. */
|
/* Upcall messages. */
|
||||||
struct nl_sock *upcall_sock;
|
struct nl_sock *upcall_socks[N_UPCALL_SOCKS];
|
||||||
|
int last_read_upcall;
|
||||||
unsigned int listen_mask;
|
unsigned int listen_mask;
|
||||||
|
|
||||||
/* Change notification. */
|
/* Change notification. */
|
||||||
@@ -164,6 +169,9 @@ static int dpif_linux_init(void);
|
|||||||
static void open_dpif(const struct dpif_linux_dp *, struct dpif **);
|
static void open_dpif(const struct dpif_linux_dp *, struct dpif **);
|
||||||
static bool dpif_linux_nln_parse(struct ofpbuf *, void *);
|
static bool dpif_linux_nln_parse(struct ofpbuf *, void *);
|
||||||
static void dpif_linux_port_changed(const void *vport, void *dpif);
|
static void dpif_linux_port_changed(const void *vport, void *dpif);
|
||||||
|
static uint32_t get_upcall_pid_port(struct dpif_linux *, uint32_t port);
|
||||||
|
static uint32_t get_upcall_pid_flow(struct dpif_linux *,
|
||||||
|
const struct nlattr *key, size_t key_len);
|
||||||
|
|
||||||
static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *,
|
static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *,
|
||||||
struct ofpbuf *);
|
struct ofpbuf *);
|
||||||
@@ -256,21 +264,17 @@ open_dpif(const struct dpif_linux_dp *dp, struct dpif **dpifp)
|
|||||||
struct dpif_linux *dpif;
|
struct dpif_linux *dpif;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
dpif = xmalloc(sizeof *dpif);
|
dpif = xzalloc(sizeof *dpif);
|
||||||
dpif->port_notifier = nln_notifier_create(nln, dpif_linux_port_changed,
|
dpif->port_notifier = nln_notifier_create(nln, dpif_linux_port_changed,
|
||||||
dpif);
|
dpif);
|
||||||
|
|
||||||
dpif_init(&dpif->dpif, &dpif_linux_class, dp->name,
|
dpif_init(&dpif->dpif, &dpif_linux_class, dp->name,
|
||||||
dp->dp_ifindex, dp->dp_ifindex);
|
dp->dp_ifindex, dp->dp_ifindex);
|
||||||
|
|
||||||
dpif->upcall_sock = NULL;
|
|
||||||
dpif->listen_mask = 0;
|
|
||||||
dpif->dp_ifindex = dp->dp_ifindex;
|
dpif->dp_ifindex = dp->dp_ifindex;
|
||||||
sset_init(&dpif->changed_ports);
|
sset_init(&dpif->changed_ports);
|
||||||
dpif->change_error = false;
|
|
||||||
*dpifp = &dpif->dpif;
|
*dpifp = &dpif->dpif;
|
||||||
|
|
||||||
dpif->lru_head = dpif->lru_tail = 0;
|
|
||||||
dpif->lru_bitmap = bitmap_allocate(LRU_MAX_PORTS);
|
dpif->lru_bitmap = bitmap_allocate(LRU_MAX_PORTS);
|
||||||
bitmap_set1(dpif->lru_bitmap, OVSP_LOCAL);
|
bitmap_set1(dpif->lru_bitmap, OVSP_LOCAL);
|
||||||
for (i = 1; i < LRU_MAX_PORTS; i++) {
|
for (i = 1; i < LRU_MAX_PORTS; i++) {
|
||||||
@@ -278,13 +282,24 @@ open_dpif(const struct dpif_linux_dp *dp, struct dpif **dpifp)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
destroy_upcall_socks(struct dpif_linux *dpif)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < N_UPCALL_SOCKS; i++) {
|
||||||
|
nl_sock_destroy(dpif->upcall_socks[i]);
|
||||||
|
dpif->upcall_socks[i] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
dpif_linux_close(struct dpif *dpif_)
|
dpif_linux_close(struct dpif *dpif_)
|
||||||
{
|
{
|
||||||
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
||||||
|
|
||||||
nln_notifier_destroy(dpif->port_notifier);
|
nln_notifier_destroy(dpif->port_notifier);
|
||||||
nl_sock_destroy(dpif->upcall_sock);
|
destroy_upcall_socks(dpif);
|
||||||
sset_destroy(&dpif->changed_ports);
|
sset_destroy(&dpif->changed_ports);
|
||||||
free(dpif->lru_bitmap);
|
free(dpif->lru_bitmap);
|
||||||
free(dpif);
|
free(dpif);
|
||||||
@@ -398,13 +413,15 @@ dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev,
|
|||||||
/* Loop until we find a port that isn't used. */
|
/* Loop until we find a port that isn't used. */
|
||||||
do {
|
do {
|
||||||
request.port_no = dpif_linux_pop_port(dpif);
|
request.port_no = dpif_linux_pop_port(dpif);
|
||||||
if (dpif->upcall_sock) {
|
request.upcall_pid = get_upcall_pid_port(dpif, request.port_no);
|
||||||
request.upcall_pid = nl_sock_pid(dpif->upcall_sock);
|
|
||||||
}
|
|
||||||
error = dpif_linux_vport_transact(&request, &reply, &buf);
|
error = dpif_linux_vport_transact(&request, &reply, &buf);
|
||||||
|
|
||||||
if (!error) {
|
if (!error) {
|
||||||
*port_nop = reply.port_no;
|
*port_nop = reply.port_no;
|
||||||
|
VLOG_DBG("%s: assigning port %"PRIu32" to netlink "
|
||||||
|
"pid %"PRIu32,
|
||||||
|
dpif_name(dpif_), request.port_no,
|
||||||
|
request.upcall_pid);
|
||||||
}
|
}
|
||||||
ofpbuf_delete(buf);
|
ofpbuf_delete(buf);
|
||||||
} while (request.port_no != UINT32_MAX
|
} while (request.port_no != UINT32_MAX
|
||||||
@@ -659,9 +676,7 @@ dpif_linux_flow_put(struct dpif *dpif_, enum dpif_flow_put_flags flags,
|
|||||||
/* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
|
/* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
|
||||||
request.actions = actions ? actions : &dummy_action;
|
request.actions = actions ? actions : &dummy_action;
|
||||||
request.actions_len = actions_len;
|
request.actions_len = actions_len;
|
||||||
if (dpif->upcall_sock) {
|
request.upcall_pid = get_upcall_pid_flow(dpif, key, key_len);
|
||||||
request.upcall_pid = nl_sock_pid(dpif->upcall_sock);
|
|
||||||
}
|
|
||||||
if (flags & DPIF_FP_ZERO_STATS) {
|
if (flags & DPIF_FP_ZERO_STATS) {
|
||||||
request.clear = true;
|
request.clear = true;
|
||||||
}
|
}
|
||||||
@@ -827,11 +842,7 @@ dpif_linux_execute(struct dpif *dpif_,
|
|||||||
const struct ofpbuf *packet)
|
const struct ofpbuf *packet)
|
||||||
{
|
{
|
||||||
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
||||||
uint32_t upcall_pid = 0;
|
uint32_t upcall_pid = get_upcall_pid_flow(dpif, key, key_len);
|
||||||
|
|
||||||
if (dpif->upcall_sock) {
|
|
||||||
upcall_pid = nl_sock_pid(dpif->upcall_sock);
|
|
||||||
}
|
|
||||||
|
|
||||||
return dpif_linux_execute__(dpif->dp_ifindex, upcall_pid, key, key_len,
|
return dpif_linux_execute__(dpif->dp_ifindex, upcall_pid, key, key_len,
|
||||||
actions, actions_len, packet);
|
actions, actions_len, packet);
|
||||||
@@ -845,45 +856,77 @@ dpif_linux_recv_get_mask(const struct dpif *dpif_, int *listen_mask)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static uint32_t
|
||||||
dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask)
|
get_upcall_pid_port__(struct dpif_linux *dpif, uint32_t port)
|
||||||
{
|
{
|
||||||
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
int idx = port & (N_UPCALL_SOCKS - 1);
|
||||||
int error;
|
return nl_sock_pid(dpif->upcall_socks[idx]);
|
||||||
|
}
|
||||||
|
|
||||||
if (listen_mask == dpif->listen_mask) {
|
static uint32_t
|
||||||
|
get_upcall_pid_port(struct dpif_linux *dpif, uint32_t port)
|
||||||
|
{
|
||||||
|
if (!(dpif->listen_mask & (1u << DPIF_UC_MISS))) {
|
||||||
return 0;
|
return 0;
|
||||||
} else if (!listen_mask) {
|
}
|
||||||
nl_sock_destroy(dpif->upcall_sock);
|
|
||||||
dpif->upcall_sock = NULL;
|
return get_upcall_pid_port__(dpif, port);
|
||||||
} else if (!dpif->upcall_sock) {
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
get_upcall_pid_flow(struct dpif_linux *dpif,
|
||||||
|
const struct nlattr *key, size_t key_len)
|
||||||
|
{
|
||||||
|
const struct nlattr *nla;
|
||||||
|
uint32_t port;
|
||||||
|
|
||||||
|
if (!(dpif->listen_mask &
|
||||||
|
((1u << DPIF_UC_ACTION) | (1u << DPIF_UC_SAMPLE)))) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
nla = nl_attr_find__(key, key_len, OVS_KEY_ATTR_IN_PORT);
|
||||||
|
if (nla) {
|
||||||
|
port = nl_attr_get_u32(nla);
|
||||||
|
} else {
|
||||||
|
port = random_uint32();
|
||||||
|
}
|
||||||
|
|
||||||
|
return get_upcall_pid_port__(dpif, port);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
set_upcall_pids(struct dpif_linux *dpif)
|
||||||
|
{
|
||||||
struct dpif_port port;
|
struct dpif_port port;
|
||||||
struct dpif_port_dump port_dump;
|
struct dpif_port_dump port_dump;
|
||||||
struct dpif_flow_dump flow_dump;
|
struct dpif_flow_dump flow_dump;
|
||||||
const struct nlattr *key;
|
const struct nlattr *key;
|
||||||
size_t key_len;
|
size_t key_len;
|
||||||
|
int error;
|
||||||
|
|
||||||
error = nl_sock_create(NETLINK_GENERIC, &dpif->upcall_sock);
|
DPIF_PORT_FOR_EACH (&port, &port_dump, &dpif->dpif) {
|
||||||
if (error) {
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
DPIF_PORT_FOR_EACH (&port, &port_dump, dpif_) {
|
|
||||||
struct dpif_linux_vport vport_request;
|
struct dpif_linux_vport vport_request;
|
||||||
|
|
||||||
dpif_linux_vport_init(&vport_request);
|
dpif_linux_vport_init(&vport_request);
|
||||||
vport_request.cmd = OVS_VPORT_CMD_SET;
|
vport_request.cmd = OVS_VPORT_CMD_SET;
|
||||||
vport_request.dp_ifindex = dpif->dp_ifindex;
|
vport_request.dp_ifindex = dpif->dp_ifindex;
|
||||||
vport_request.port_no = port.port_no;
|
vport_request.port_no = port.port_no;
|
||||||
vport_request.upcall_pid = nl_sock_pid(dpif->upcall_sock);
|
vport_request.upcall_pid = get_upcall_pid_port(dpif,
|
||||||
|
vport_request.port_no);
|
||||||
error = dpif_linux_vport_transact(&vport_request, NULL, NULL);
|
error = dpif_linux_vport_transact(&vport_request, NULL, NULL);
|
||||||
if (error) {
|
if (!error) {
|
||||||
VLOG_WARN_RL(&error_rl, "%s: failed to set upcall pid on "
|
VLOG_DBG("%s: assigning port %"PRIu32" to netlink "
|
||||||
"port: %s", dpif_name(dpif_), strerror(error));
|
"pid %"PRIu32,
|
||||||
|
dpif_name(&dpif->dpif), vport_request.port_no,
|
||||||
|
vport_request.upcall_pid);
|
||||||
|
} else {
|
||||||
|
VLOG_WARN_RL(&error_rl, "%s: failed to set upcall pid on port: %s",
|
||||||
|
dpif_name(&dpif->dpif), strerror(error));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dpif_flow_dump_start(&flow_dump, dpif_);
|
dpif_flow_dump_start(&flow_dump, &dpif->dpif);
|
||||||
while (dpif_flow_dump_next(&flow_dump, &key, &key_len,
|
while (dpif_flow_dump_next(&flow_dump, &key, &key_len,
|
||||||
NULL, NULL, NULL)) {
|
NULL, NULL, NULL)) {
|
||||||
struct dpif_linux_flow flow_request;
|
struct dpif_linux_flow flow_request;
|
||||||
@@ -893,17 +936,43 @@ dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask)
|
|||||||
flow_request.dp_ifindex = dpif->dp_ifindex;
|
flow_request.dp_ifindex = dpif->dp_ifindex;
|
||||||
flow_request.key = key;
|
flow_request.key = key;
|
||||||
flow_request.key_len = key_len;
|
flow_request.key_len = key_len;
|
||||||
flow_request.upcall_pid = nl_sock_pid(dpif->upcall_sock);
|
flow_request.upcall_pid = get_upcall_pid_flow(dpif, key, key_len);
|
||||||
error = dpif_linux_flow_transact(&flow_request, NULL, NULL);
|
error = dpif_linux_flow_transact(&flow_request, NULL, NULL);
|
||||||
if (error) {
|
if (error) {
|
||||||
VLOG_WARN_RL(&error_rl, "%s: failed to set upcall pid on "
|
VLOG_WARN_RL(&error_rl, "%s: failed to set upcall pid on flow: %s",
|
||||||
"flow: %s", dpif_name(dpif_), strerror(error));
|
dpif_name(&dpif->dpif), strerror(error));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
dpif_flow_dump_done(&flow_dump);
|
dpif_flow_dump_done(&flow_dump);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask)
|
||||||
|
{
|
||||||
|
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
||||||
|
|
||||||
|
if (listen_mask == dpif->listen_mask) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!listen_mask) {
|
||||||
|
destroy_upcall_socks(dpif);
|
||||||
|
} else if (!dpif->listen_mask) {
|
||||||
|
int i;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
for (i = 0; i < N_UPCALL_SOCKS; i++) {
|
||||||
|
error = nl_sock_create(NETLINK_GENERIC, &dpif->upcall_socks[i]);
|
||||||
|
if (error) {
|
||||||
|
destroy_upcall_socks(dpif);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
dpif->listen_mask = listen_mask;
|
dpif->listen_mask = listen_mask;
|
||||||
|
set_upcall_pids(dpif);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1020,19 +1089,34 @@ static int
|
|||||||
dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall)
|
dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall)
|
||||||
{
|
{
|
||||||
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
||||||
struct ofpbuf *buf;
|
|
||||||
int error;
|
|
||||||
int i;
|
int i;
|
||||||
|
int read_tries = 0;
|
||||||
|
|
||||||
if (!dpif->upcall_sock) {
|
if (!dpif->listen_mask) {
|
||||||
return EAGAIN;
|
return EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < 50; i++) {
|
for (i = 0; i < N_UPCALL_SOCKS; i++) {
|
||||||
|
struct nl_sock *upcall_sock;
|
||||||
|
dpif->last_read_upcall = (dpif->last_read_upcall + 1) &
|
||||||
|
(N_UPCALL_SOCKS - 1);
|
||||||
|
upcall_sock = dpif->upcall_socks[dpif->last_read_upcall];
|
||||||
|
|
||||||
|
if (nl_sock_woke(upcall_sock)) {
|
||||||
int dp_ifindex;
|
int dp_ifindex;
|
||||||
|
|
||||||
error = nl_sock_recv(dpif->upcall_sock, &buf, false);
|
for (;;) {
|
||||||
if (error) {
|
struct ofpbuf *buf;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
if (++read_tries > 50) {
|
||||||
|
return EAGAIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = nl_sock_recv(upcall_sock, &buf, false);
|
||||||
|
if (error == EAGAIN) {
|
||||||
|
break;
|
||||||
|
} else if (error) {
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1048,6 +1132,8 @@ dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return EAGAIN;
|
return EAGAIN;
|
||||||
}
|
}
|
||||||
@@ -1056,8 +1142,14 @@ static void
|
|||||||
dpif_linux_recv_wait(struct dpif *dpif_)
|
dpif_linux_recv_wait(struct dpif *dpif_)
|
||||||
{
|
{
|
||||||
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
||||||
if (dpif->upcall_sock) {
|
int i;
|
||||||
nl_sock_wait(dpif->upcall_sock, POLLIN);
|
|
||||||
|
if (!dpif->listen_mask) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < N_UPCALL_SOCKS; i++) {
|
||||||
|
nl_sock_wait(dpif->upcall_socks[i], POLLIN);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1065,9 +1157,14 @@ static void
|
|||||||
dpif_linux_recv_purge(struct dpif *dpif_)
|
dpif_linux_recv_purge(struct dpif *dpif_)
|
||||||
{
|
{
|
||||||
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
|
||||||
|
int i;
|
||||||
|
|
||||||
if (dpif->upcall_sock) {
|
if (!dpif->listen_mask) {
|
||||||
nl_sock_drain(dpif->upcall_sock);
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < N_UPCALL_SOCKS; i++) {
|
||||||
|
nl_sock_drain(dpif->upcall_socks[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user