mirror of
https://github.com/openvswitch/ovs
synced 2025-09-02 15:25:22 +00:00
classifier: Prevent tries vs n_tries race leading to NULL dereference.
Currently classifier tries and n_tries can be updated not atomically, there is a race condition which can lead to NULL dereference. The race can happen when main thread updates a classifier tries and n_tries in classifier_set_prefix_fields() and at the same time revalidator or handler thread try to lookup them in classifier_lookup__(). Such race can be triggered when user changes prefixes of flow_table. Race(user changes flow_table prefixes: ip_dst,ip_src => none): [main thread] [revalidator/handler thread] =========================================================== /* cls->n_tries == 2 */ for (int i = 0; i < cls->n_tries; i++) { trie_init(cls, i, NULL); /* n_tries == 0 */ cls->n_tries = n_tries; /* cls->tries[i]->feild is NULL */ trie_ctx_init(&trie_ctx[i],&cls->tries[i]); /* trie->field is NULL */ ctx->be32ofs = trie->field->flow_be32ofs; To prevent the race, instead of re-introducing internal mutex implemented in the commitfccd7c092e
("classifier: Remove internal mutex."), this patch makes trie field RCU protected and checks it after read. Fixes:fccd7c092e
("classifier: Remove internal mutex.") Signed-off-by: Eiichi Tsukata <eiichi.tsukata@nutanix.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
This commit is contained in:
committed by
Ilya Maximets
parent
8c2c503bdb
commit
a611705990
@@ -393,7 +393,9 @@ classifier_set_prefix_fields(struct classifier *cls,
|
|||||||
bitmap_set1(fields.bm, trie_fields[i]);
|
bitmap_set1(fields.bm, trie_fields[i]);
|
||||||
|
|
||||||
new_fields[n_tries] = NULL;
|
new_fields[n_tries] = NULL;
|
||||||
if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) {
|
const struct mf_field *cls_field
|
||||||
|
= ovsrcu_get(struct mf_field *, &cls->tries[n_tries].field);
|
||||||
|
if (n_tries >= cls->n_tries || field != cls_field) {
|
||||||
new_fields[n_tries] = field;
|
new_fields[n_tries] = field;
|
||||||
changed = true;
|
changed = true;
|
||||||
}
|
}
|
||||||
@@ -454,7 +456,7 @@ trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field)
|
|||||||
} else {
|
} else {
|
||||||
ovsrcu_set_hidden(&trie->root, NULL);
|
ovsrcu_set_hidden(&trie->root, NULL);
|
||||||
}
|
}
|
||||||
trie->field = field;
|
ovsrcu_set_hidden(&trie->field, CONST_CAST(struct mf_field *, field));
|
||||||
|
|
||||||
/* Add existing rules to the new trie. */
|
/* Add existing rules to the new trie. */
|
||||||
CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
|
CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
|
||||||
@@ -839,7 +841,6 @@ classifier_remove_assert(struct classifier *cls,
|
|||||||
struct trie_ctx {
|
struct trie_ctx {
|
||||||
const struct cls_trie *trie;
|
const struct cls_trie *trie;
|
||||||
bool lookup_done; /* Status of the lookup. */
|
bool lookup_done; /* Status of the lookup. */
|
||||||
uint8_t be32ofs; /* U32 offset of the field in question. */
|
|
||||||
unsigned int maskbits; /* Prefix length needed to avoid false matches. */
|
unsigned int maskbits; /* Prefix length needed to avoid false matches. */
|
||||||
union trie_prefix match_plens; /* Bitmask of prefix lengths with possible
|
union trie_prefix match_plens; /* Bitmask of prefix lengths with possible
|
||||||
* matches. */
|
* matches. */
|
||||||
@@ -849,7 +850,6 @@ static void
|
|||||||
trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie)
|
trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie)
|
||||||
{
|
{
|
||||||
ctx->trie = trie;
|
ctx->trie = trie;
|
||||||
ctx->be32ofs = trie->field->flow_be32ofs;
|
|
||||||
ctx->lookup_done = false;
|
ctx->lookup_done = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1531,8 +1531,10 @@ insert_subtable(struct classifier *cls, const struct minimask *mask)
|
|||||||
*CONST_CAST(uint8_t *, &subtable->n_indices) = index;
|
*CONST_CAST(uint8_t *, &subtable->n_indices) = index;
|
||||||
|
|
||||||
for (i = 0; i < cls->n_tries; i++) {
|
for (i = 0; i < cls->n_tries; i++) {
|
||||||
subtable->trie_plen[i] = minimask_get_prefix_len(mask,
|
const struct mf_field *field
|
||||||
cls->tries[i].field);
|
= ovsrcu_get(struct mf_field *, &cls->tries[i].field);
|
||||||
|
subtable->trie_plen[i]
|
||||||
|
= field ? minimask_get_prefix_len(mask, field) : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Ports trie. */
|
/* Ports trie. */
|
||||||
@@ -1575,11 +1577,17 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
|
|||||||
* fields using the prefix tries. The trie checks are done only as
|
* fields using the prefix tries. The trie checks are done only as
|
||||||
* needed to avoid folding in additional bits to the wildcards mask. */
|
* needed to avoid folding in additional bits to the wildcards mask. */
|
||||||
for (j = 0; j < n_tries; j++) {
|
for (j = 0; j < n_tries; j++) {
|
||||||
/* Is the trie field relevant for this subtable, and
|
/* Is the trie field relevant for this subtable? */
|
||||||
is the trie field within the current range of fields? */
|
if (field_plen[j]) {
|
||||||
if (field_plen[j] &&
|
|
||||||
flowmap_is_set(&range_map, trie_ctx[j].be32ofs / 2)) {
|
|
||||||
struct trie_ctx *ctx = &trie_ctx[j];
|
struct trie_ctx *ctx = &trie_ctx[j];
|
||||||
|
const struct mf_field *ctx_field
|
||||||
|
= ovsrcu_get(struct mf_field *, &ctx->trie->field);
|
||||||
|
|
||||||
|
/* Is the trie field within the current range of fields? */
|
||||||
|
if (!ctx_field
|
||||||
|
|| !flowmap_is_set(&range_map, ctx_field->flow_be32ofs / 2)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* On-demand trie lookup. */
|
/* On-demand trie lookup. */
|
||||||
if (!ctx->lookup_done) {
|
if (!ctx->lookup_done) {
|
||||||
@@ -1601,14 +1609,16 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
|
|||||||
* than this subtable would otherwise. */
|
* than this subtable would otherwise. */
|
||||||
if (ctx->maskbits <= field_plen[j]) {
|
if (ctx->maskbits <= field_plen[j]) {
|
||||||
/* Unwildcard the bits and skip the rest. */
|
/* Unwildcard the bits and skip the rest. */
|
||||||
mask_set_prefix_bits(wc, ctx->be32ofs, ctx->maskbits);
|
mask_set_prefix_bits(wc, ctx_field->flow_be32ofs,
|
||||||
|
ctx->maskbits);
|
||||||
/* Note: Prerequisite already unwildcarded, as the only
|
/* Note: Prerequisite already unwildcarded, as the only
|
||||||
* prerequisite of the supported trie lookup fields is
|
* prerequisite of the supported trie lookup fields is
|
||||||
* the ethertype, which is always unwildcarded. */
|
* the ethertype, which is always unwildcarded. */
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
/* Can skip if the field is already unwildcarded. */
|
/* Can skip if the field is already unwildcarded. */
|
||||||
if (mask_prefix_bits_set(wc, ctx->be32ofs, ctx->maskbits)) {
|
if (mask_prefix_bits_set(wc, ctx_field->flow_be32ofs,
|
||||||
|
ctx->maskbits)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2001,12 +2011,12 @@ static unsigned int
|
|||||||
trie_lookup(const struct cls_trie *trie, const struct flow *flow,
|
trie_lookup(const struct cls_trie *trie, const struct flow *flow,
|
||||||
union trie_prefix *plens)
|
union trie_prefix *plens)
|
||||||
{
|
{
|
||||||
const struct mf_field *mf = trie->field;
|
const struct mf_field *mf = ovsrcu_get(struct mf_field *, &trie->field);
|
||||||
|
|
||||||
/* Check that current flow matches the prerequisites for the trie
|
/* Check that current flow matches the prerequisites for the trie
|
||||||
* field. Some match fields are used for multiple purposes, so we
|
* field. Some match fields are used for multiple purposes, so we
|
||||||
* must check that the trie is relevant for this flow. */
|
* must check that the trie is relevant for this flow. */
|
||||||
if (mf_are_prereqs_ok(mf, flow, NULL)) {
|
if (mf && mf_are_prereqs_ok(mf, flow, NULL)) {
|
||||||
return trie_lookup_value(&trie->root,
|
return trie_lookup_value(&trie->root,
|
||||||
&((ovs_be32 *)flow)[mf->flow_be32ofs],
|
&((ovs_be32 *)flow)[mf->flow_be32ofs],
|
||||||
&plens->be32, mf->n_bits);
|
&plens->be32, mf->n_bits);
|
||||||
@@ -2053,8 +2063,9 @@ minimask_get_prefix_len(const struct minimask *minimask,
|
|||||||
* happened to be zeros.
|
* happened to be zeros.
|
||||||
*/
|
*/
|
||||||
static const ovs_be32 *
|
static const ovs_be32 *
|
||||||
minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf)
|
minimatch_get_prefix(const struct minimatch *match, rcu_field_ptr *field)
|
||||||
{
|
{
|
||||||
|
struct mf_field *mf = ovsrcu_get_protected(struct mf_field *, field);
|
||||||
size_t u64_ofs = mf->flow_be32ofs / 2;
|
size_t u64_ofs = mf->flow_be32ofs / 2;
|
||||||
|
|
||||||
return (OVS_FORCE const ovs_be32 *)miniflow_get__(match->flow, u64_ofs)
|
return (OVS_FORCE const ovs_be32 *)miniflow_get__(match->flow, u64_ofs)
|
||||||
@@ -2068,7 +2079,7 @@ static void
|
|||||||
trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
|
trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
|
||||||
{
|
{
|
||||||
trie_insert_prefix(&trie->root,
|
trie_insert_prefix(&trie->root,
|
||||||
minimatch_get_prefix(&rule->match, trie->field), mlen);
|
minimatch_get_prefix(&rule->match, &trie->field), mlen);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -2123,7 +2134,7 @@ static void
|
|||||||
trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
|
trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
|
||||||
{
|
{
|
||||||
trie_remove_prefix(&trie->root,
|
trie_remove_prefix(&trie->root,
|
||||||
minimatch_get_prefix(&rule->match, trie->field), mlen);
|
minimatch_get_prefix(&rule->match, &trie->field), mlen);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
|
/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
|
||||||
|
@@ -314,12 +314,14 @@ extern "C" {
|
|||||||
struct cls_subtable;
|
struct cls_subtable;
|
||||||
struct cls_match;
|
struct cls_match;
|
||||||
|
|
||||||
|
struct mf_field;
|
||||||
|
typedef OVSRCU_TYPE(struct mf_field *) rcu_field_ptr;
|
||||||
struct trie_node;
|
struct trie_node;
|
||||||
typedef OVSRCU_TYPE(struct trie_node *) rcu_trie_ptr;
|
typedef OVSRCU_TYPE(struct trie_node *) rcu_trie_ptr;
|
||||||
|
|
||||||
/* Prefix trie for a 'field' */
|
/* Prefix trie for a 'field' */
|
||||||
struct cls_trie {
|
struct cls_trie {
|
||||||
const struct mf_field *field; /* Trie field, or NULL. */
|
rcu_field_ptr field; /* Trie field, or NULL. */
|
||||||
rcu_trie_ptr root; /* NULL if none. */
|
rcu_trie_ptr root; /* NULL if none. */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -512,8 +512,9 @@ verify_tries(struct classifier *cls)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < cls->n_tries; i++) {
|
for (i = 0; i < cls->n_tries; i++) {
|
||||||
n_rules += trie_verify(&cls->tries[i].root, 0,
|
const struct mf_field * cls_field
|
||||||
cls->tries[i].field->n_bits);
|
= ovsrcu_get(struct mf_field *, &cls->tries[i].field);
|
||||||
|
n_rules += trie_verify(&cls->tries[i].root, 0, cls_field->n_bits);
|
||||||
}
|
}
|
||||||
assert(n_rules <= cls->n_rules);
|
assert(n_rules <= cls->n_rules);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user