libselinux: performance optimization for duplicate detection

When semodule -i some.pp to install a module package, duplicate items are
detected for the module. The detection function is nodups_specs in
libselinux/src/label_file.c. The algorithm complexity of implementing
this function is O(M*(N^2)). M is a symbol related to the length of a string.
N indicates the number of data->nspec. In scenarios where N is very large, the
efficiency is very low.

To solve this problem, I propose to use the hash table to detect duplicates.
The algorithm complexity of new implementing is O(M*N). The execution
efficiency will be greatly improved.

Comparison between the execution time of the nodups_specs function.

Old double-layer loop implementation O(M*(N^2)):

semodule -i myapp1.pp
nodups_specs data->nspec: 5002
nodups_specs start: 11785.242s
nodups_specs end:   11785.588s
nodups_specs consumes:  0.346s

semodule -i myapp2.pp
nodups_specs data->nspec: 10002
nodups_specs start: 11804.280s
nodups_specs end:   11806.546s
nodups_specs consumes:  2.266s

semodule -i myapp3.pp
nodups_specs data->nspec: 20002
nodups_specs start: 11819.106s
nodups_specs end:   11830.892s
nodups_specs consumes: 11.786s

New hash table implementation O(M*N):

semodule -i myapp1.pp
nodups_specs data->nspec: 5002
nodups_specs start: 11785.588s
nodups_specs end:   11785.590s
nodups_specs consumes:  0.002s

semodule -i myapp2.pp
nodups_specs data->nspec: 10002
nodups_specs start: 11806.546s
nodups_specs end:   11806.552s
nodups_specs consumes:  0.006s

semodule -i myapp3.pp
nodups_specs data->nspec: 20002
nodups_specs start: 11830.892s
nodups_specs end:   11830.905s
nodups_specs consumes:  0.013s

Signed-off-by: wanghuizhao <wanghuizhao1@huawei.com>
Acked-by: James Carter <jwcart2@gmail.com>
This commit is contained in:
wanghuizhao 2023-03-08 17:53:08 +08:00 committed by James Carter
parent 4a420508a9
commit 2c7b71db56
1 changed files with 96 additions and 24 deletions

View File

@ -19,10 +19,19 @@
#include <sys/types.h>
#include <sys/stat.h>
#include "hashtab.h"
#include "callbacks.h"
#include "label_internal.h"
#include "label_file.h"
/* controls the shrink multiple of the hashtab length */
#define SHRINK_MULTIS 1
struct chkdups_key {
char *regex;
unsigned int mode;
};
/*
* Internals, mostly moved over from matchpathcon.c
*/
@ -56,41 +65,104 @@ static int find_stem_from_file(struct saved_data *data, const char *key)
return -1;
}
/*
* hash calculation and key comparison of hash table
*/
static unsigned int symhash(hashtab_t h, const_hashtab_key_t key)
{
const struct chkdups_key *k = (const struct chkdups_key *)key;
const char *p = NULL;
size_t size;
unsigned int val = 0;
size = strlen(k->regex);
for (p = k->regex; ((size_t) (p - k->regex)) < size; p++)
val =
((val << 4) | (val >> (8 * sizeof(unsigned int) - 4))) ^ (*p);
return val % h->size;
}
static int symcmp(hashtab_t h
__attribute__ ((unused)), const_hashtab_key_t key1,
const_hashtab_key_t key2)
{
const struct chkdups_key *a = (const struct chkdups_key *)key1;
const struct chkdups_key *b = (const struct chkdups_key *)key2;
return strcmp(a->regex, b->regex) || (a->mode && b->mode && a->mode != b->mode);
}
static int destroy_chkdups_key(hashtab_key_t key)
{
free(key);
return 0;
}
/*
* Warn about duplicate specifications.
*/
static int nodups_specs(struct saved_data *data, const char *path)
{
int rc = 0;
unsigned int ii, jj;
int rc = 0, ret = 0;
unsigned int ii;
struct spec *curr_spec, *spec_arr = data->spec_arr;
struct chkdups_key *new = NULL;
unsigned int hashtab_len = (data->nspec / SHRINK_MULTIS) ? data->nspec / SHRINK_MULTIS : 1;
hashtab_t hash_table = hashtab_create(symhash, symcmp, hashtab_len);
if (!hash_table) {
rc = -1;
COMPAT_LOG(SELINUX_ERROR, "%s: hashtab create failed.\n", path);
return rc;
}
for (ii = 0; ii < data->nspec; ii++) {
curr_spec = &spec_arr[ii];
for (jj = ii + 1; jj < data->nspec; jj++) {
if ((!strcmp(spec_arr[jj].regex_str,
curr_spec->regex_str))
&& (!spec_arr[jj].mode || !curr_spec->mode
|| spec_arr[jj].mode == curr_spec->mode)) {
rc = -1;
errno = EINVAL;
if (strcmp(spec_arr[jj].lr.ctx_raw,
curr_spec->lr.ctx_raw)) {
COMPAT_LOG
(SELINUX_ERROR,
"%s: Multiple different specifications for %s (%s and %s).\n",
path, curr_spec->regex_str,
spec_arr[jj].lr.ctx_raw,
curr_spec->lr.ctx_raw);
} else {
COMPAT_LOG
(SELINUX_ERROR,
"%s: Multiple same specifications for %s.\n",
path, curr_spec->regex_str);
}
new = (struct chkdups_key *)malloc(sizeof(struct chkdups_key));
if (!new) {
rc = -1;
hashtab_destroy_key(hash_table, destroy_chkdups_key);
COMPAT_LOG(SELINUX_ERROR, "%s: hashtab key create failed.\n", path);
return rc;
}
new->regex = spec_arr[ii].regex_str;
new->mode = spec_arr[ii].mode;
ret = hashtab_insert(hash_table, (hashtab_key_t)new, &spec_arr[ii]);
if (ret == HASHTAB_SUCCESS)
continue;
if (ret == HASHTAB_PRESENT) {
curr_spec =
(struct spec *)hashtab_search(hash_table, (hashtab_key_t)new);
rc = -1;
errno = EINVAL;
free(new);
if (strcmp(spec_arr[ii].lr.ctx_raw, curr_spec->lr.ctx_raw)) {
COMPAT_LOG
(SELINUX_ERROR,
"%s: Multiple different specifications for %s (%s and %s).\n",
path, curr_spec->regex_str,
spec_arr[ii].lr.ctx_raw,
curr_spec->lr.ctx_raw);
} else {
COMPAT_LOG
(SELINUX_ERROR,
"%s: Multiple same specifications for %s.\n",
path, curr_spec->regex_str);
}
}
if (ret == HASHTAB_OVERFLOW) {
rc = -1;
free(new);
COMPAT_LOG
(SELINUX_ERROR,
"%s: hashtab happen memory error.\n",
path);
break;
}
}
hashtab_destroy_key(hash_table, destroy_chkdups_key);
return rc;
}