MEDIUM: pattern: link all final elements from the reference

There is a data model issue in the current pattern design that makes
pattern deletion extremely expensive: there's no direct way from a
reference to access all indexed occurrences. As such, the only way
to remove all indexed entries corresponding to a reference update
is to scan all expressions's lists and trees to find a link to the
reference. While this was possibly OK when map removal was not
common and most maps were small, this is not conceivable anymore
with GeoIP maps containing 10M+ entries and del-map operations that
are triggered from http-request rulesets.

This patch introduces two list heads from the pattern reference, one
for the objects linked by lists and one for those linked by tree node.
Ideally a single list would be enough but the linked elements are too
much unrelated to be distinguished at the moment, so we'll need two
lists. However for the long term a single-linked list will suffice but
for now it's not possible due to the way elements are removed from
expressions. As such this patch adds 32 bytes of memory usage per
reference plus 16 per indexed entry, but both will be cut in half
later.

The links are not yet used for deletion, this patch only ensures the
list is always consistent.
This commit is contained in:
Willy Tarreau 2020-11-02 12:10:48 +01:00
parent 6d8a68914e
commit 4bdd0a13d6
2 changed files with 29 additions and 2 deletions

View File

@ -111,12 +111,15 @@ struct pat_ref {
__decl_thread(HA_SPINLOCK_T lock); /* Lock used to protect pat ref elements */
};
/* This is a part of struct pat_ref. Each entry contain one
* pattern and one associated value as original string.
/* This is a part of struct pat_ref. Each entry contains one pattern and one
* associated value as original string. All derivative forms (via exprs) are
* accessed from list_head or tree_head.
*/
struct pat_ref_elt {
struct list list; /* Used to chain elements. */
struct list back_refs; /* list of users tracking this pat ref */
struct list list_head; /* all pattern_list derived from this reference */
struct list tree_head; /* all pattern_tree derived from this reference */
char *pattern;
char *sample;
int line;
@ -126,6 +129,7 @@ struct pat_ref_elt {
* "sample" with a tree entry. It is used with maps.
*/
struct pattern_tree {
struct list from_ref; // pattern_tree linked from pat_ref_elt
struct sample_data *data;
struct pat_ref_elt *ref;
struct ebmb_node node;
@ -170,6 +174,7 @@ struct pattern {
/* This struct is just used for chaining patterns */
struct pattern_list {
struct list from_ref; // pattern_tree linked from pat_ref_elt
struct list list;
struct pattern pat;
};

View File

@ -1082,6 +1082,7 @@ void free_pattern_tree(struct eb_root *root)
next = eb_next(node);
eb_delete(node);
elt = container_of(node, struct pattern_tree, node);
LIST_DEL(&elt->from_ref);
free(elt->data);
free(elt);
node = next;
@ -1094,6 +1095,7 @@ void pat_prune_gen(struct pattern_expr *expr)
list_for_each_entry_safe(pat, tmp, &expr->patterns, list) {
LIST_DEL(&pat->list);
LIST_DEL(&pat->from_ref);
if (pat->pat.sflags & PAT_SF_REGFREE)
regex_free(pat->pat.ptr.ptr);
else
@ -1130,6 +1132,8 @@ int pat_idx_list_val(struct pattern_expr *expr, struct pattern *pat, char **err)
/* chain pattern in the expression */
LIST_ADDQ(&expr->patterns, &patl->list);
/* and from the reference */
LIST_ADDQ(&pat->ref->list_head, &patl->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@ -1159,6 +1163,8 @@ int pat_idx_list_ptr(struct pattern_expr *expr, struct pattern *pat, char **err)
/* chain pattern in the expression */
LIST_ADDQ(&expr->patterns, &patl->list);
/* and from the reference */
LIST_ADDQ(&pat->ref->list_head, &patl->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@ -1189,6 +1195,8 @@ int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err)
/* chain pattern in the expression */
LIST_ADDQ(&expr->patterns, &patl->list);
/* and from the reference */
LIST_ADDQ(&pat->ref->list_head, &patl->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@ -1219,6 +1227,8 @@ int pat_idx_list_reg_cap(struct pattern_expr *expr, struct pattern *pat, int cap
/* chain pattern in the expression */
LIST_ADDQ(&expr->patterns, &patl->list);
/* and from the reference */
LIST_ADDQ(&pat->ref->list_head, &patl->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@ -1268,6 +1278,7 @@ int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err)
/* Insert the entry. */
ebmb_insert_prefix(&expr->pattern_tree, &node->node, 4);
LIST_ADDQ(&pat->ref->tree_head, &node->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@ -1296,6 +1307,7 @@ int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err)
/* Insert the entry. */
ebmb_insert_prefix(&expr->pattern_tree_2, &node->node, 16);
LIST_ADDQ(&pat->ref->tree_head, &node->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@ -1340,6 +1352,7 @@ int pat_idx_tree_str(struct pattern_expr *expr, struct pattern *pat, char **err)
/* index the new node */
ebst_insert(&expr->pattern_tree, &node->node);
LIST_ADDQ(&pat->ref->tree_head, &node->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@ -1382,6 +1395,7 @@ int pat_idx_tree_pfx(struct pattern_expr *expr, struct pattern *pat, char **err)
/* index the new node */
ebmb_insert_prefix(&expr->pattern_tree, &node->node, len);
LIST_ADDQ(&pat->ref->tree_head, &node->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@ -1400,6 +1414,7 @@ void pat_del_list_gen(struct pattern_expr *expr, struct pat_ref_elt *ref)
/* Delete and free entry. */
LIST_DEL(&pat->list);
LIST_DEL(&pat->from_ref);
if (pat->pat.sflags & PAT_SF_REGFREE)
regex_free(pat->pat.ptr.reg);
else
@ -1428,6 +1443,7 @@ void pat_del_tree_ip(struct pattern_expr *expr, struct pat_ref_elt *ref)
/* Delete and free entry. */
ebmb_delete(node);
LIST_DEL(&elt->from_ref);
free(elt->data);
free(elt);
}
@ -1448,6 +1464,7 @@ void pat_del_tree_ip(struct pattern_expr *expr, struct pat_ref_elt *ref)
/* Delete and free entry. */
ebmb_delete(node);
LIST_DEL(&elt->from_ref);
free(elt->data);
free(elt);
}
@ -1476,6 +1493,7 @@ void pat_del_tree_str(struct pattern_expr *expr, struct pat_ref_elt *ref)
/* Delete and free entry. */
ebmb_delete(node);
LIST_DEL(&elt->from_ref);
free(elt->data);
free(elt);
}
@ -1858,6 +1876,8 @@ struct pat_ref_elt *pat_ref_append(struct pat_ref *ref, const char *pattern, con
}
LIST_INIT(&elt->back_refs);
LIST_INIT(&elt->list_head);
LIST_INIT(&elt->tree_head);
LIST_ADDQ(&ref->head, &elt->list);
return elt;
fail:
@ -1975,6 +1995,8 @@ void pat_ref_reload(struct pat_ref *ref, struct pat_ref *replace)
bref->ref = NULL;
}
LIST_DEL(&elt->list);
LIST_DEL(&elt->list_head);
LIST_DEL(&elt->tree_head);
free(elt->pattern);
free(elt->sample);
free(elt);