From a7f33fdbfb67a7b3f6cc24c6eab88b33fc815363 Mon Sep 17 00:00:00 2001 From: Darrell Ball Date: Tue, 3 Dec 2019 09:14:17 -0800 Subject: [PATCH] conntrack: Support zone limits. Signed-off-by: Darrell Ball Signed-off-by: Ben Pfaff --- Documentation/faq/releases.rst | 2 +- NEWS | 1 + lib/conntrack-private.h | 8 ++ lib/conntrack.c | 137 +++++++++++++++++++++++++++++++ lib/conntrack.h | 18 ++++ lib/dpctl.man | 8 +- lib/dpif-netdev.c | 88 +++++++++++++++++++- tests/system-kmod-macros.at | 7 -- tests/system-traffic.at | 1 - tests/system-userspace-macros.at | 9 -- 10 files changed, 253 insertions(+), 26 deletions(-) diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index 9c5ee03fc..6702c58a2 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -118,7 +118,7 @@ Q: Are all features available with all datapaths? Connection tracking 4.3 2.5 2.6 YES Conntrack Fragment Reass. 4.3 2.6 2.12 YES Conntrack Timeout Policies 5.2 2.12 NO NO - Conntrack Zone Limit 4.18 2.10 NO YES + Conntrack Zone Limit 4.18 2.10 2.13 YES Conntrack NAT 4.6 2.6 2.8 YES Tunnel - LISP NO 2.11 NO NO Tunnel - STT NO 2.4 NO YES diff --git a/NEWS b/NEWS index 222280b1a..17f92babf 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,7 @@ Post-v2.12.0 - Userspace datapath: * Add option to enable, disable and query TCP sequence checking in conntrack. + * Add support for conntrack zone limits. - AF_XDP: * New option 'use-need-wakeup' for netdev-afxdp to control enabling of corresponding 'need_wakeup' flag in AF_XDP rings. Enabled by default diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h index 590f1399e..b04e4cd77 100644 --- a/lib/conntrack-private.h +++ b/lib/conntrack-private.h @@ -105,6 +105,12 @@ struct conn { long long expiration; uint32_t mark; int seq_skew; + + /* Immutable data. */ + int32_t admit_zone; /* The zone for managing zone limit counts. */ + uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */ + + /* Mutable data. */ bool seq_skew_dir; /* TCP sequence skew direction due to NATTing of FTP * control messages; true if reply direction. */ bool cleaned; /* True if cleaned from expiry lists. */ @@ -155,6 +161,7 @@ struct conntrack { struct ovs_mutex ct_lock; /* Protects 2 following fields. */ struct cmap conns OVS_GUARDED; struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED; + struct hmap zone_limits OVS_GUARDED; uint32_t hash_basis; /* Salt for hashing a connection key. */ pthread_t clean_thread; /* Periodically cleans up connection tracker. */ struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */ @@ -172,6 +179,7 @@ struct conntrack { * control context. */ struct ipf *ipf; /* Fragmentation handling context. */ + uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */ atomic_bool tcp_seq_chk; /* Check TCP sequence numbers. */ }; diff --git a/lib/conntrack.c b/lib/conntrack.c index df7b9fa7a..b80080e72 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -76,6 +76,11 @@ enum ct_alg_ctl_type { CT_ALG_CTL_SIP, }; +struct zone_limit { + struct hmap_node node; + struct conntrack_zone_limit czl; +}; + static bool conn_key_extract(struct conntrack *, struct dp_packet *, ovs_be16 dl_type, struct conn_lookup_ctx *, uint16_t zone); @@ -305,6 +310,8 @@ conntrack_init(void) for (unsigned i = 0; i < ARRAY_SIZE(ct->exp_lists); i++) { ovs_list_init(&ct->exp_lists[i]); } + hmap_init(&ct->zone_limits); + ct->zone_limit_seq = 0; ovs_mutex_unlock(&ct->ct_lock); ct->hash_basis = random_uint32(); @@ -318,6 +325,111 @@ conntrack_init(void) return ct; } +static uint32_t +zone_key_hash(int32_t zone, uint32_t basis) +{ + size_t hash = hash_int((OVS_FORCE uint32_t) zone, basis); + return hash; +} + +static struct zone_limit * +zone_limit_lookup(struct conntrack *ct, int32_t zone) + OVS_REQUIRES(ct->ct_lock) +{ + uint32_t hash = zone_key_hash(zone, ct->hash_basis); + struct zone_limit *zl; + HMAP_FOR_EACH_IN_BUCKET (zl, node, hash, &ct->zone_limits) { + if (zl->czl.zone == zone) { + return zl; + } + } + return NULL; +} + +static struct zone_limit * +zone_limit_lookup_or_default(struct conntrack *ct, int32_t zone) + OVS_REQUIRES(ct->ct_lock) +{ + struct zone_limit *zl = zone_limit_lookup(ct, zone); + return zl ? zl : zone_limit_lookup(ct, DEFAULT_ZONE); +} + +struct conntrack_zone_limit +zone_limit_get(struct conntrack *ct, int32_t zone) +{ + ovs_mutex_lock(&ct->ct_lock); + struct conntrack_zone_limit czl = {DEFAULT_ZONE, 0, 0, 0}; + struct zone_limit *zl = zone_limit_lookup_or_default(ct, zone); + if (zl) { + czl = zl->czl; + } + ovs_mutex_unlock(&ct->ct_lock); + return czl; +} + +static int +zone_limit_create(struct conntrack *ct, int32_t zone, uint32_t limit) + OVS_REQUIRES(ct->ct_lock) +{ + if (zone >= DEFAULT_ZONE && zone <= MAX_ZONE) { + struct zone_limit *zl = xzalloc(sizeof *zl); + zl->czl.limit = limit; + zl->czl.zone = zone; + zl->czl.zone_limit_seq = ct->zone_limit_seq++; + uint32_t hash = zone_key_hash(zone, ct->hash_basis); + hmap_insert(&ct->zone_limits, &zl->node, hash); + return 0; + } else { + return EINVAL; + } +} + +int +zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit) +{ + int err = 0; + ovs_mutex_lock(&ct->ct_lock); + struct zone_limit *zl = zone_limit_lookup(ct, zone); + if (zl) { + zl->czl.limit = limit; + VLOG_INFO("Changed zone limit of %u for zone %d", limit, zone); + } else { + err = zone_limit_create(ct, zone, limit); + if (!err) { + VLOG_INFO("Created zone limit of %u for zone %d", limit, zone); + } else { + VLOG_WARN("Request to create zone limit for invalid zone %d", + zone); + } + } + ovs_mutex_unlock(&ct->ct_lock); + return err; +} + +static void +zone_limit_clean(struct conntrack *ct, struct zone_limit *zl) + OVS_REQUIRES(ct->ct_lock) +{ + hmap_remove(&ct->zone_limits, &zl->node); + free(zl); +} + +int +zone_limit_delete(struct conntrack *ct, uint16_t zone) +{ + ovs_mutex_lock(&ct->ct_lock); + struct zone_limit *zl = zone_limit_lookup(ct, zone); + if (zl) { + zone_limit_clean(ct, zl); + VLOG_INFO("Deleted zone limit for zone %d", zone); + } else { + VLOG_INFO("Attempted delete of non-existent zone limit: zone %d", + zone); + } + ovs_mutex_unlock(&ct->ct_lock); + return 0; +} + static void conn_clean_cmn(struct conntrack *ct, struct conn *conn) OVS_REQUIRES(ct->ct_lock) @@ -328,6 +440,11 @@ conn_clean_cmn(struct conntrack *ct, struct conn *conn) uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis); cmap_remove(&ct->conns, &conn->cm_node, hash); + + struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone); + if (zl && zl->czl.zone_limit_seq == conn->zone_limit_seq) { + zl->czl.count--; + } } /* Must be called with 'conn' of 'conn_type' CT_CONN_TYPE_DEFAULT. Also @@ -378,6 +495,13 @@ conntrack_destroy(struct conntrack *ct) conn_clean_one(ct, conn); } cmap_destroy(&ct->conns); + + struct zone_limit *zl; + HMAP_FOR_EACH_POP (zl, node, &ct->zone_limits) { + free(zl); + } + hmap_destroy(&ct->zone_limits); + ovs_mutex_unlock(&ct->ct_lock); ovs_mutex_destroy(&ct->ct_lock); @@ -850,6 +974,12 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, } if (commit) { + struct zone_limit *zl = zone_limit_lookup_or_default(ct, + ctx->key.zone); + if (zl && zl->czl.count >= zl->czl.limit) { + return nc; + } + unsigned int n_conn_limit; atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit); if (atomic_count_get(&ct->n_conn) >= n_conn_limit) { @@ -915,6 +1045,13 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, cmap_insert(&ct->conns, &nc->cm_node, ctx->hash); atomic_count_inc(&ct->n_conn); ctx->conn = nc; /* For completeness. */ + if (zl) { + nc->admit_zone = zl->czl.zone; + nc->zone_limit_seq = zl->czl.zone_limit_seq; + zl->czl.count++; + } else { + nc->admit_zone = INVALID_ZONE; + } } return nc; diff --git a/lib/conntrack.h b/lib/conntrack.h index 75409ba35..b0d0fc8d9 100644 --- a/lib/conntrack.h +++ b/lib/conntrack.h @@ -104,6 +104,20 @@ struct conntrack_dump { uint16_t zone; }; +struct conntrack_zone_limit { + int32_t zone; + uint32_t limit; + uint32_t count; + uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */ +}; + +enum { + INVALID_ZONE = -2, + DEFAULT_ZONE = -1, /* Default zone for zone limit management. */ + MIN_ZONE = 0, + MAX_ZONE = 0xFFFF, +}; + struct ct_dpif_entry; struct ct_dpif_tuple; @@ -121,5 +135,9 @@ int conntrack_get_nconns(struct conntrack *ct, uint32_t *nconns); int conntrack_set_tcp_seq_chk(struct conntrack *ct, bool enabled); bool conntrack_get_tcp_seq_chk(struct conntrack *ct); struct ipf *conntrack_ipf_ctx(struct conntrack *ct); +struct conntrack_zone_limit zone_limit_get(struct conntrack *ct, + int32_t zone); +int zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit); +int zone_limit_delete(struct conntrack *ct, uint16_t zone); #endif /* conntrack.h */ diff --git a/lib/dpctl.man b/lib/dpctl.man index 6529baedc..090c3b960 100644 --- a/lib/dpctl.man +++ b/lib/dpctl.man @@ -346,18 +346,16 @@ particular zone is not specified in the datapath, it defaults to the default per-zone limit. A default zone may be specified with the \fBdefault=\fIdefault_limit\fR argument. Initially, the default per-zone limit is unlimited. An unlimited number of entries may be set -with \fB0\fR limit. Only supported for Linux kernel datapath. +with \fB0\fR limit. . .TP \*(DX\fBct\-del\-limits\fR [\fIdp\fR] \fBzone=\fIzone[,zone]\fR... Deletes the connection tracking limit for \fIzone\fR. Multiple zones may -be specified with a comma-separated list. Only supported for Linux -kernel datapath. +be specified with a comma-separated list. . .TP \*(DX\fBct\-get\-limits\fR [\fIdp\fR] [\fBzone=\fIzone\fR[\fB,\fIzone\fR]...] Retrieves the maximum allowed number of connections and current counts per-zone. If \fIzone\fR is given, only the specified zone(s) are printed. If no zones are specified, all the zone limits and counts are -provided. The command always displays the default zone limit. Only -supported for Linux kernel datapath. +provided. The command always displays the default zone limit. diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 885a4df36..1e5493615 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -7470,6 +7470,88 @@ dpif_netdev_ct_get_tcp_seq_chk(struct dpif *dpif, bool *enabled) return 0; } +static int +dpif_netdev_ct_set_limits(struct dpif *dpif OVS_UNUSED, + const uint32_t *default_limits, + const struct ovs_list *zone_limits) +{ + int err = 0; + struct dp_netdev *dp = get_dp_netdev(dpif); + if (default_limits) { + err = zone_limit_update(dp->conntrack, DEFAULT_ZONE, *default_limits); + if (err != 0) { + return err; + } + } + + struct ct_dpif_zone_limit *zone_limit; + LIST_FOR_EACH (zone_limit, node, zone_limits) { + err = zone_limit_update(dp->conntrack, zone_limit->zone, + zone_limit->limit); + if (err != 0) { + break; + } + } + return err; +} + +static int +dpif_netdev_ct_get_limits(struct dpif *dpif OVS_UNUSED, + uint32_t *default_limit, + const struct ovs_list *zone_limits_request, + struct ovs_list *zone_limits_reply) +{ + struct dp_netdev *dp = get_dp_netdev(dpif); + struct conntrack_zone_limit czl; + + czl = zone_limit_get(dp->conntrack, DEFAULT_ZONE); + if (czl.zone == DEFAULT_ZONE) { + *default_limit = czl.limit; + } else { + return EINVAL; + } + + if (!ovs_list_is_empty(zone_limits_request)) { + struct ct_dpif_zone_limit *zone_limit; + LIST_FOR_EACH (zone_limit, node, zone_limits_request) { + czl = zone_limit_get(dp->conntrack, zone_limit->zone); + if (czl.zone == zone_limit->zone || czl.zone == DEFAULT_ZONE) { + ct_dpif_push_zone_limit(zone_limits_reply, zone_limit->zone, + czl.limit, czl.count); + } else { + return EINVAL; + } + } + } else { + for (int z = MIN_ZONE; z <= MAX_ZONE; z++) { + czl = zone_limit_get(dp->conntrack, z); + if (czl.zone == z) { + ct_dpif_push_zone_limit(zone_limits_reply, z, czl.limit, + czl.count); + } + } + } + + return 0; +} + +static int +dpif_netdev_ct_del_limits(struct dpif *dpif OVS_UNUSED, + const struct ovs_list *zone_limits) +{ + int err = 0; + struct dp_netdev *dp = get_dp_netdev(dpif); + struct ct_dpif_zone_limit *zone_limit; + LIST_FOR_EACH (zone_limit, node, zone_limits) { + err = zone_limit_delete(dp->conntrack, zone_limit->zone); + if (err != 0) { + break; + } + } + + return err; +} + static int dpif_netdev_ipf_set_enabled(struct dpif *dpif, bool v6, bool enable) { @@ -7576,9 +7658,9 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_ct_get_nconns, dpif_netdev_ct_set_tcp_seq_chk, dpif_netdev_ct_get_tcp_seq_chk, - NULL, /* ct_set_limits */ - NULL, /* ct_get_limits */ - NULL, /* ct_del_limits */ + dpif_netdev_ct_set_limits, + dpif_netdev_ct_get_limits, + dpif_netdev_ct_del_limits, NULL, /* ct_set_timeout_policy */ NULL, /* ct_get_timeout_policy */ NULL, /* ct_del_timeout_policy */ diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at index 9e89aec43..daf66bdec 100644 --- a/tests/system-kmod-macros.at +++ b/tests/system-kmod-macros.at @@ -110,13 +110,6 @@ m4_define([CHECK_CONNTRACK_TIMEOUT], on_exit 'modprobe -r nfnetlink_cttimeout' ]) -# CHECK_CT_DPIF_PER_ZONE_LIMIT() -# -# Perform requirements checks for running ovs-dpctl ct-[set|get|del]-limits per -# zone. The kernel datapath does support this feature. Will remove this check -# after both kernel and userspace datapath support it. -m4_define([CHECK_CT_DPIF_PER_ZONE_LIMIT]) - # CHECK_CT_DPIF_SET_GET_MAXCONNS() # # Perform requirements checks for running ovs-dpctl ct-set-maxconns or diff --git a/tests/system-traffic.at b/tests/system-traffic.at index cde7429f3..0fb7aacfa 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -3574,7 +3574,6 @@ AT_CLEANUP AT_SETUP([conntrack - limit by zone]) CHECK_CONNTRACK() -CHECK_CT_DPIF_PER_ZONE_LIMIT() OVS_TRAFFIC_VSWITCHD_START() ADD_NAMESPACES(at_ns0, at_ns1) diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at index a419f30c1..ba7f4102f 100644 --- a/tests/system-userspace-macros.at +++ b/tests/system-userspace-macros.at @@ -106,15 +106,6 @@ m4_define([CHECK_CONNTRACK_TIMEOUT], AT_SKIP_IF([:]) ]) -# CHECK_CT_DPIF_PER_ZONE_LIMIT() -# -# Perform requirements checks for running ovs-dpctl ct-[set|get|del]-limits per -# zone. The userspace datapath does not support this feature yet. -m4_define([CHECK_CT_DPIF_PER_ZONE_LIMIT], -[ - AT_SKIP_IF([:]) -]) - # CHECK_CT_DPIF_SET_GET_MAXCONNS() # # Perform requirements checks for running ovs-dpctl ct-set-maxconns or