From 2c7b71db56895626cf2395da5f8c1a4090c29b6a Mon Sep 17 00:00:00 2001 From: wanghuizhao Date: Wed, 8 Mar 2023 17:53:08 +0800 Subject: [PATCH] libselinux: performance optimization for duplicate detection When semodule -i some.pp to install a module package, duplicate items are detected for the module. The detection function is nodups_specs in libselinux/src/label_file.c. The algorithm complexity of implementing this function is O(M*(N^2)). M is a symbol related to the length of a string. N indicates the number of data->nspec. In scenarios where N is very large, the efficiency is very low. To solve this problem, I propose to use the hash table to detect duplicates. The algorithm complexity of new implementing is O(M*N). The execution efficiency will be greatly improved. Comparison between the execution time of the nodups_specs function. Old double-layer loop implementation O(M*(N^2)): semodule -i myapp1.pp nodups_specs data->nspec: 5002 nodups_specs start: 11785.242s nodups_specs end: 11785.588s nodups_specs consumes: 0.346s semodule -i myapp2.pp nodups_specs data->nspec: 10002 nodups_specs start: 11804.280s nodups_specs end: 11806.546s nodups_specs consumes: 2.266s semodule -i myapp3.pp nodups_specs data->nspec: 20002 nodups_specs start: 11819.106s nodups_specs end: 11830.892s nodups_specs consumes: 11.786s New hash table implementation O(M*N): semodule -i myapp1.pp nodups_specs data->nspec: 5002 nodups_specs start: 11785.588s nodups_specs end: 11785.590s nodups_specs consumes: 0.002s semodule -i myapp2.pp nodups_specs data->nspec: 10002 nodups_specs start: 11806.546s nodups_specs end: 11806.552s nodups_specs consumes: 0.006s semodule -i myapp3.pp nodups_specs data->nspec: 20002 nodups_specs start: 11830.892s nodups_specs end: 11830.905s nodups_specs consumes: 0.013s Signed-off-by: wanghuizhao Acked-by: James Carter --- libselinux/src/label_file.c | 120 ++++++++++++++++++++++++++++-------- 1 file changed, 96 insertions(+), 24 deletions(-) diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c index 74ae9b9f..d489c1f7 100644 --- a/libselinux/src/label_file.c +++ b/libselinux/src/label_file.c @@ -19,10 +19,19 @@ #include #include +#include "hashtab.h" #include "callbacks.h" #include "label_internal.h" #include "label_file.h" +/* controls the shrink multiple of the hashtab length */ +#define SHRINK_MULTIS 1 + +struct chkdups_key { + char *regex; + unsigned int mode; +}; + /* * Internals, mostly moved over from matchpathcon.c */ @@ -56,41 +65,104 @@ static int find_stem_from_file(struct saved_data *data, const char *key) return -1; } +/* + * hash calculation and key comparison of hash table + */ + +static unsigned int symhash(hashtab_t h, const_hashtab_key_t key) +{ + const struct chkdups_key *k = (const struct chkdups_key *)key; + const char *p = NULL; + size_t size; + unsigned int val = 0; + + size = strlen(k->regex); + for (p = k->regex; ((size_t) (p - k->regex)) < size; p++) + val = + ((val << 4) | (val >> (8 * sizeof(unsigned int) - 4))) ^ (*p); + return val % h->size; +} + +static int symcmp(hashtab_t h + __attribute__ ((unused)), const_hashtab_key_t key1, + const_hashtab_key_t key2) +{ + const struct chkdups_key *a = (const struct chkdups_key *)key1; + const struct chkdups_key *b = (const struct chkdups_key *)key2; + + return strcmp(a->regex, b->regex) || (a->mode && b->mode && a->mode != b->mode); +} + +static int destroy_chkdups_key(hashtab_key_t key) +{ + free(key); + + return 0; +} + /* * Warn about duplicate specifications. */ static int nodups_specs(struct saved_data *data, const char *path) { - int rc = 0; - unsigned int ii, jj; + int rc = 0, ret = 0; + unsigned int ii; struct spec *curr_spec, *spec_arr = data->spec_arr; + struct chkdups_key *new = NULL; + unsigned int hashtab_len = (data->nspec / SHRINK_MULTIS) ? data->nspec / SHRINK_MULTIS : 1; + hashtab_t hash_table = hashtab_create(symhash, symcmp, hashtab_len); + if (!hash_table) { + rc = -1; + COMPAT_LOG(SELINUX_ERROR, "%s: hashtab create failed.\n", path); + return rc; + } for (ii = 0; ii < data->nspec; ii++) { - curr_spec = &spec_arr[ii]; - for (jj = ii + 1; jj < data->nspec; jj++) { - if ((!strcmp(spec_arr[jj].regex_str, - curr_spec->regex_str)) - && (!spec_arr[jj].mode || !curr_spec->mode - || spec_arr[jj].mode == curr_spec->mode)) { - rc = -1; - errno = EINVAL; - if (strcmp(spec_arr[jj].lr.ctx_raw, - curr_spec->lr.ctx_raw)) { - COMPAT_LOG - (SELINUX_ERROR, - "%s: Multiple different specifications for %s (%s and %s).\n", - path, curr_spec->regex_str, - spec_arr[jj].lr.ctx_raw, - curr_spec->lr.ctx_raw); - } else { - COMPAT_LOG - (SELINUX_ERROR, - "%s: Multiple same specifications for %s.\n", - path, curr_spec->regex_str); - } + new = (struct chkdups_key *)malloc(sizeof(struct chkdups_key)); + if (!new) { + rc = -1; + hashtab_destroy_key(hash_table, destroy_chkdups_key); + COMPAT_LOG(SELINUX_ERROR, "%s: hashtab key create failed.\n", path); + return rc; + } + new->regex = spec_arr[ii].regex_str; + new->mode = spec_arr[ii].mode; + ret = hashtab_insert(hash_table, (hashtab_key_t)new, &spec_arr[ii]); + if (ret == HASHTAB_SUCCESS) + continue; + if (ret == HASHTAB_PRESENT) { + curr_spec = + (struct spec *)hashtab_search(hash_table, (hashtab_key_t)new); + rc = -1; + errno = EINVAL; + free(new); + if (strcmp(spec_arr[ii].lr.ctx_raw, curr_spec->lr.ctx_raw)) { + COMPAT_LOG + (SELINUX_ERROR, + "%s: Multiple different specifications for %s (%s and %s).\n", + path, curr_spec->regex_str, + spec_arr[ii].lr.ctx_raw, + curr_spec->lr.ctx_raw); + } else { + COMPAT_LOG + (SELINUX_ERROR, + "%s: Multiple same specifications for %s.\n", + path, curr_spec->regex_str); } } + if (ret == HASHTAB_OVERFLOW) { + rc = -1; + free(new); + COMPAT_LOG + (SELINUX_ERROR, + "%s: hashtab happen memory error.\n", + path); + break; + } } + + hashtab_destroy_key(hash_table, destroy_chkdups_key); + return rc; }