mirror of
https://pagure.io/numad.git
synced 2025-01-05 06:29:30 +00:00
Update to 20121130
This commit is contained in:
parent
a1f2291dd9
commit
87b49d5588
43
numad.8
43
numad.8
@ -1,7 +1,8 @@
|
||||
.TH "numad" "8" "1.0.0" "Bill Gray" "Administration"
|
||||
.SH "numad"
|
||||
.LP
|
||||
numad \- A user\-level daemon that provides advice and managment for optimum use of CPUs and memory on systems with NUMA topology.
|
||||
numad \- A user\-level daemon that provides placement advice and process
|
||||
management for efficient use of CPUs and memory on systems with NUMA topology.
|
||||
.SH "SYNTAX"
|
||||
.LP
|
||||
numad [\fI\-dhvV\fP]
|
||||
@ -13,6 +14,9 @@ numad [\fI\-D non-standard-cgroup-mount-point\fP]
|
||||
numad [\fI\-i [min_interval:]max_interval\fP]
|
||||
.br
|
||||
.LP
|
||||
numad [\fI\-K 0|1\fP]
|
||||
.br
|
||||
.LP
|
||||
numad [\fI\-l log_level\fP]
|
||||
.br
|
||||
.LP
|
||||
@ -36,11 +40,18 @@ numad [\fI\-x PID\fP]
|
||||
|
||||
.SH "DESCRIPTION"
|
||||
.LP
|
||||
Numad is a system daemon that monitors NUMA topology and usage. It will attempt
|
||||
to locate processes for optimum NUMA locality and affinity, dynamically
|
||||
adjusting to changing system conditions. Numad also provides guidance to assist
|
||||
management applications with initial manual binding of CPU and memory resources
|
||||
for their processes.
|
||||
Numad is a system daemon that monitors NUMA topology and resource usage. It
|
||||
will attempt to locate processes for efficient NUMA locality and affinity,
|
||||
dynamically adjusting to changing system conditions. Numad also provides
|
||||
guidance to assist management applications with initial manual binding of CPU
|
||||
and memory resources for their processes. Note that numad is primarily
|
||||
intended for server consolidation environments, where there might be multiple
|
||||
applications or multiple virtual guests running on the same server system.
|
||||
Numad is most likely to have a positive effect when processes can be localized
|
||||
in a subset of the system's NUMA nodes. If the entire system is dedicated to a
|
||||
large in-memory database application, for example -- especially if memory
|
||||
accesses will likely remain unpredictable -- numad will probably not improve
|
||||
performance.
|
||||
.SH "OPTIONS"
|
||||
.LP
|
||||
.TP
|
||||
@ -61,6 +72,16 @@ Sets the time interval that numad waits between system scans, in seconds to
|
||||
cause the daemon to exit. (This is the normal mechanism to terminate the
|
||||
daemon.) A bigger <\fImax_interval\fP> will decrease numad overhead but also
|
||||
decrease responsiveness to changing loads.
|
||||
.TP
|
||||
\fB\-K\fR <\fI0|1\fP>
|
||||
This option controls whether numad keeps interleaved memory spread across NUMA
|
||||
nodes, or attempts to merge interleaved memory to local NUMA nodes. The
|
||||
default is to merge interleaved memory. This is the appropriate setting to
|
||||
localize processes in a subset of the system's NUMA nodes. If you are running
|
||||
a large, single-instance application that allocates interleaved memory because
|
||||
the workload will have continuous unpredictable memory access patterns (e.g. a
|
||||
large in-memory database), you might get better results by specifying \fI\-K
|
||||
1\fP to instruct numad to keep interleaved memory distributed.
|
||||
.TP
|
||||
\fB\-l\fR <\fIlog_level\fP>
|
||||
Sets the log level to <\fIlog_level\fP>. Reasonable choices are 5, 6, or 7.
|
||||
@ -69,15 +90,15 @@ The default value is 5.
|
||||
\fB\-p\fR <\fIPID\fP>
|
||||
Add PID to explicit inclusion list of processes to consider for managing, if
|
||||
the process also uses significant resources. Multiple \fI\-p PID\fP options
|
||||
can be specified at daemon start, but after deamon start, only one PID can be
|
||||
can be specified at daemon start, but after daemon start, only one PID can be
|
||||
added to the inclusion list per subsequent numad invocation. Use with \-S to
|
||||
precisely control the scope of processes numad can manage. Note that the
|
||||
specified process will not necessarily be actively managed unless it also meets
|
||||
numad's significance threshold -- which is currently 300MB and half a CPU.
|
||||
numad's significance threshold -- which is currently 300MB and half of a CPU.
|
||||
.TP
|
||||
\fB\-r\fR <\fIPID\fP>
|
||||
Remove PID from both the explicit inclusion and the exclusion lists of
|
||||
processes. After deamon start, only one PID can be removed from the explicit
|
||||
processes. After daemon start, only one PID can be removed from the explicit
|
||||
process lists per subsequent numad invocation. Use with \-S and \-p and \-x to
|
||||
precisely control the scope of processes numad can manage.
|
||||
.TP
|
||||
@ -110,7 +131,7 @@ Queries numad for the best NUMA nodes to bind an entity that needs
|
||||
be specified as well <\fI:MB\fP> so numad can recommend NUMA nodes with
|
||||
available CPU capacity and adequate free memory. This query option can be used
|
||||
regardless of whether numad is running as a daemon. (An invocation using this
|
||||
option when numad is not running as a daemon, will not cause the deamon to
|
||||
option when numad is not running as a daemon, will not cause the daemon to
|
||||
start.) Output of this option is a string that contains a NUMA node list. For
|
||||
example: 2\-3,6. The recommended node list could be saved in a shell variable
|
||||
(e.g., NODES) and then used as the node list parameter in a
|
||||
@ -122,7 +143,7 @@ command. See numactl(8).
|
||||
\fB\-x\fR <\fIPID\fP>
|
||||
Add PID to explicit exclusion list of processes to blacklist from managing.
|
||||
Multiple \fI\-x PID\fP options can be specified at daemon start, but after
|
||||
deamon start, only one PID can be added to the exclusion list per subsequent
|
||||
daemon start, only one PID can be added to the exclusion list per subsequent
|
||||
numad invocation. Use with \-S to precisely control the scope of processes
|
||||
numad can manage.
|
||||
.SH "FILES"
|
||||
|
300
numad.c
300
numad.c
@ -54,7 +54,7 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#include <values.h>
|
||||
|
||||
|
||||
#define VERSION_STRING "20121015"
|
||||
#define VERSION_STRING "20121130"
|
||||
|
||||
|
||||
#define VAR_RUN_FILE "/var/run/numad.pid"
|
||||
@ -112,6 +112,7 @@ int min_interval = MIN_INTERVAL;
|
||||
int max_interval = MAX_INTERVAL;
|
||||
int target_utilization = TARGET_UTILIZATION_PERCENT;
|
||||
int scan_all_processes = 1;
|
||||
int keep_interleaved_memory = 0;
|
||||
|
||||
pthread_mutex_t pid_list_mutex;
|
||||
pthread_mutex_t node_info_mutex;
|
||||
@ -197,7 +198,7 @@ void init_msg_queue() {
|
||||
msg_qid = msgget(msg_key, msg_flg);
|
||||
if (msg_qid < 0) {
|
||||
numad_log(LOG_CRIT, "msgget failed\n");
|
||||
exit(EXIT_FAILURE);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
flush_msg_queue();
|
||||
}
|
||||
@ -205,7 +206,7 @@ void init_msg_queue() {
|
||||
void recv_msg(msg_p m) {
|
||||
if (msgrcv(msg_qid, m, sizeof(msg_body_t), getpid(), 0) < 0) {
|
||||
numad_log(LOG_CRIT, "msgrcv failed\n");
|
||||
exit(EXIT_FAILURE);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
// printf("Received: >>%s<< from process %d\n", m->body.text, m->body.src_pid);
|
||||
}
|
||||
@ -273,8 +274,8 @@ typedef struct id_list {
|
||||
|
||||
int add_ids_to_list_from_str(id_list_p list_p, char *s) {
|
||||
if (list_p == NULL) {
|
||||
numad_log(LOG_CRIT, "Cannot add to NULL list\n");
|
||||
exit(EXIT_FAILURE);
|
||||
numad_log(LOG_CRIT, "Cannot add to NULL list\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if ((s == NULL) || (strlen(s) == 0)) {
|
||||
goto return_list;
|
||||
@ -308,8 +309,8 @@ return_list:
|
||||
int str_from_id_list(char *str_p, int str_size, id_list_p list_p) {
|
||||
char *p = str_p;
|
||||
if ((p == NULL) || (str_size < 3)) {
|
||||
numad_log(LOG_CRIT, "Bad string for ID listing\n");
|
||||
exit(EXIT_FAILURE);
|
||||
numad_log(LOG_CRIT, "Bad string for ID listing\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
int n;
|
||||
if ((list_p == NULL) || ((n = NUM_IDS_IN_LIST(list_p)) == 0)) {
|
||||
@ -378,7 +379,7 @@ typedef struct process_data {
|
||||
|
||||
|
||||
// Hash table size must always be a power of two
|
||||
#define MIN_PROCESS_HASH_TABLE_SIZE 64
|
||||
#define MIN_PROCESS_HASH_TABLE_SIZE 16
|
||||
int process_hash_table_size = 0;
|
||||
int process_hash_collisions = 0;
|
||||
process_data_p process_hash_table = NULL;
|
||||
@ -467,6 +468,7 @@ int process_hash_update(process_data_p newp) {
|
||||
return new_hash_table_entry;
|
||||
}
|
||||
|
||||
|
||||
int process_hash_rehash(int old_ix) {
|
||||
// Given the index of a table entry that would otherwise be orphaned by
|
||||
// process_hash_remove(), reinsert into table using PID from existing record.
|
||||
@ -486,12 +488,16 @@ int process_hash_remove(int pid) {
|
||||
if (ix >= 0) {
|
||||
// remove the target
|
||||
process_data_p dp = &process_hash_table[ix];
|
||||
if (dp->comm) { free(dp->comm); }
|
||||
if (dp->comm) { free(dp->comm); }
|
||||
if (dp->cpuset_name) { free(dp->cpuset_name); }
|
||||
// if (dp->node_list_p) { FREE_LIST(dp->node_list_p); }
|
||||
memset(dp, 0, sizeof(process_data_t));
|
||||
// bubble up the collision chain
|
||||
while ((pid = process_hash_table[++ix].pid) > 0) {
|
||||
// bubble up the collision chain and rehash if neeeded
|
||||
for (;;) {
|
||||
ix += 1;
|
||||
ix &= (process_hash_table_size - 1);
|
||||
if ((pid = process_hash_table[ix].pid) <= 0) {
|
||||
break;
|
||||
}
|
||||
if (process_hash_lookup(pid) < 0) {
|
||||
if (process_hash_rehash(ix) < 0) {
|
||||
numad_log(LOG_ERR, "rehash fail\n");
|
||||
@ -512,6 +518,7 @@ void process_hash_table_expand() {
|
||||
} else {
|
||||
process_hash_table_size = MIN_PROCESS_HASH_TABLE_SIZE;
|
||||
}
|
||||
numad_log(LOG_DEBUG, "Expanding hash table size: %d\n", process_hash_table_size);
|
||||
process_hash_table = malloc(process_hash_table_size * sizeof(process_data_t));
|
||||
if (process_hash_table == NULL) {
|
||||
numad_log(LOG_CRIT, "hash table malloc failed\n");
|
||||
@ -531,6 +538,18 @@ void process_hash_table_expand() {
|
||||
}
|
||||
}
|
||||
|
||||
void process_hash_table_dump() {
|
||||
for (int ix = 0; (ix < process_hash_table_size); ix++) {
|
||||
process_data_p p = &process_hash_table[ix];
|
||||
if (p->pid) {
|
||||
numad_log(LOG_DEBUG,
|
||||
"ix: %d PID: %d %s Thds: %d CPU %ld MBs: %ld Data TS: %ld Bind TS: %ld\n",
|
||||
ix, p->pid, ((p->comm != NULL) ? p->comm : "(Null)"), p->num_threads,
|
||||
p->CPUs_used, p->MBs_used, p->data_time_stamp, p->bind_time_stamp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void process_hash_table_cleanup(uint64_t update_time) {
|
||||
int cpusets_removed = 0;
|
||||
int num_hash_entries_used = 0;
|
||||
@ -591,8 +610,10 @@ pid_list_p insert_pid_into_pid_list(pid_list_p list_ptr, long pid) {
|
||||
if (process_hash_table != NULL) {
|
||||
int hash_ix = process_hash_lookup(pid);
|
||||
if ((hash_ix >= 0) && (list_ptr == include_pid_list)) {
|
||||
// Clear dup_bind_count, in case user wants it to be re-evaluated soon
|
||||
// Clear dup_bind_count and interleaved flag,
|
||||
// in case user wants it to be re-evaluated soon
|
||||
process_hash_table[hash_ix].dup_bind_count = 0;
|
||||
process_hash_table[hash_ix].flags &= ~PROCESS_FLAG_INTERLEAVED;
|
||||
}
|
||||
}
|
||||
// Check for duplicate pid first
|
||||
@ -661,6 +682,8 @@ void print_usage_and_exit(char *prog_name) {
|
||||
fprintf(stderr, "-D <CGROUP_MOUNT_POINT> to specify cgroup mount point\n");
|
||||
fprintf(stderr, "-h to print this usage info\n");
|
||||
fprintf(stderr, "-i [<MIN>:]<MAX> to specify interval seconds\n");
|
||||
fprintf(stderr, "-K 1 to keep interleaved memory spread across nodes\n");
|
||||
fprintf(stderr, "-K 0 to merge interleaved memory to local NUMA nodes\n");
|
||||
fprintf(stderr, "-l <N> to specify logging level (usually 5, 6, or 7)\n");
|
||||
fprintf(stderr, "-p <PID> to add PID to inclusion pid list\n");
|
||||
fprintf(stderr, "-r <PID> to remove PID from explicit pid lists\n");
|
||||
@ -724,7 +747,7 @@ void check_prereqs(char *prog_name) {
|
||||
fprintf(stderr, "Looks like transparent hugepage scan time in %s is %d ms.\n", thp_scan_fname, ms);
|
||||
fprintf(stderr, "Consider increasing the frequency of THP scanning,\n");
|
||||
fprintf(stderr, "by echoing a smaller number (e.g. 100) to %s\n", thp_scan_fname);
|
||||
fprintf(stderr, "to more agressively (re)construct THPs. For example:\n");
|
||||
fprintf(stderr, "to more aggressively (re)construct THPs. For example:\n");
|
||||
fprintf(stderr, "# echo 100 > /sys/kernel/mm/redhat_transparent_hugepage/khugepaged/scan_sleep_millisecs\n");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
@ -857,19 +880,19 @@ int get_huge_page_size_in_bytes() {
|
||||
int huge_page_size = 0;;
|
||||
FILE *fs = fopen("/proc/meminfo", "r");
|
||||
if (!fs) {
|
||||
numad_log(LOG_CRIT, "Can't open /proc/meminfo\n");
|
||||
exit(EXIT_FAILURE);
|
||||
numad_log(LOG_CRIT, "Can't open /proc/meminfo\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
char buf[BUF_SIZE];
|
||||
while (fgets(buf, BUF_SIZE, fs)) {
|
||||
if (!strncmp("Hugepagesize", buf, 12)) {
|
||||
char *p = &buf[12];
|
||||
while ((!isdigit(*p)) && (p < buf + BUF_SIZE)) {
|
||||
p++;
|
||||
}
|
||||
huge_page_size = atoi(p);
|
||||
break;
|
||||
}
|
||||
if (!strncmp("Hugepagesize", buf, 12)) {
|
||||
char *p = &buf[12];
|
||||
while ((!isdigit(*p)) && (p < buf + BUF_SIZE)) {
|
||||
p++;
|
||||
}
|
||||
huge_page_size = atoi(p);
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(fs);
|
||||
return huge_page_size * KILOBYTE;
|
||||
@ -1099,8 +1122,8 @@ int node_and_digits(const struct dirent *dptr) {
|
||||
if (*p++ != 'd') return 0;
|
||||
if (*p++ != 'e') return 0;
|
||||
do {
|
||||
if (!isdigit(*p++))
|
||||
return 0;
|
||||
if (!isdigit(*p++))
|
||||
return 0;
|
||||
} while (*p != '\0');
|
||||
return 1;
|
||||
}
|
||||
@ -1458,11 +1481,23 @@ id_list_p pick_numa_nodes(int pid, int cpus, int mbs) {
|
||||
int num_existing_mems = 0;
|
||||
static id_list_p existing_mems_list_p;
|
||||
CLEAR_LIST(existing_mems_list_p);
|
||||
uint64_t time_stamp = get_time_stamp();
|
||||
static node_data_p tmp_node;
|
||||
static uint64_t *process_MBs;
|
||||
static uint64_t *saved_magnitude_for_node;
|
||||
static int process_MBs_num_nodes;
|
||||
uint64_t time_stamp = get_time_stamp();
|
||||
// See if dynamic structures need to grow.
|
||||
if (process_MBs_num_nodes < num_nodes + 1) {
|
||||
process_MBs_num_nodes = num_nodes + 1;
|
||||
// The "+1 node" is for accumulating interleaved memory
|
||||
process_MBs = realloc(process_MBs, process_MBs_num_nodes * sizeof(uint64_t));
|
||||
tmp_node = realloc(tmp_node, num_nodes * sizeof(node_data_t) );
|
||||
saved_magnitude_for_node = realloc(saved_magnitude_for_node, num_nodes * sizeof(uint64_t));
|
||||
if ((process_MBs == NULL) || (tmp_node == NULL) || (saved_magnitude_for_node == NULL)) {
|
||||
numad_log(LOG_CRIT, "process_MBs realloc failed\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
// For existing processes, get miscellaneous process specific details
|
||||
int pid_ix;
|
||||
process_data_p p = NULL;
|
||||
@ -1487,7 +1522,7 @@ id_list_p pick_numa_nodes(int pid, int cpus, int mbs) {
|
||||
}
|
||||
if (!fgets(buf, BUF_SIZE, fs)) {
|
||||
numad_log(LOG_WARNING, "Tried to research PID %d cpuset, but it apparently went away.\n", p->pid);
|
||||
fclose(fs);
|
||||
fclose(fs);
|
||||
return NULL; // Assume the process terminated?
|
||||
}
|
||||
fclose(fs);
|
||||
@ -1569,18 +1604,6 @@ id_list_p pick_numa_nodes(int pid, int cpus, int mbs) {
|
||||
// is expensive and should be minimized. Also, old kernels dismantle
|
||||
// transparent huge pages while producing the numa_maps memory
|
||||
// information!
|
||||
// Check to see if dynamic structures need to grow.
|
||||
if (process_MBs_num_nodes < num_nodes + 1) {
|
||||
process_MBs_num_nodes = num_nodes + 1;
|
||||
// The "+1 node" is for accumulating interleaved memory
|
||||
process_MBs = realloc(process_MBs, process_MBs_num_nodes * sizeof(uint64_t));
|
||||
tmp_node = realloc(tmp_node, num_nodes * sizeof(node_data_t) );
|
||||
saved_magnitude_for_node = realloc(saved_magnitude_for_node, num_nodes * sizeof(uint64_t));
|
||||
if ((process_MBs == NULL) || (tmp_node == NULL) || (saved_magnitude_for_node == NULL)) {
|
||||
numad_log(LOG_CRIT, "process_MBs realloc failed\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
memset(process_MBs, 0, process_MBs_num_nodes * sizeof(uint64_t));
|
||||
snprintf(fname, FNAME_SIZE, "/proc/%d/numa_maps", pid);
|
||||
fs = fopen(fname, "r");
|
||||
@ -1626,8 +1649,9 @@ id_list_p pick_numa_nodes(int pid, int cpus, int mbs) {
|
||||
numad_log(LOG_DEBUG, "PROCESS_MBs[%d]: %ld\n", ix, process_MBs[ix]);
|
||||
}
|
||||
}
|
||||
if (process_has_interleaved_memory) {
|
||||
// Mark this process as having interleaved memory, and stamp it as done.
|
||||
if ((process_has_interleaved_memory) && (keep_interleaved_memory)) {
|
||||
// Mark this process as having interleaved memory so we do not
|
||||
// merge the interleaved memory. Time stamp it as done.
|
||||
p->flags |= PROCESS_FLAG_INTERLEAVED;
|
||||
p->bind_time_stamp = get_time_stamp();
|
||||
if (log_level >= LOG_DEBUG) {
|
||||
@ -1690,8 +1714,15 @@ id_list_p pick_numa_nodes(int pid, int cpus, int mbs) {
|
||||
int prev_node_used = -1;
|
||||
// Continue to allocate more resources until request are met.
|
||||
// OK if not not quite all the CPU request is met.
|
||||
// FIXME: ?? Is the following too much CPU flexing?
|
||||
while ((mbs > 0) || (cpus > (tmp_node[0].CPUs_total / 4))) {
|
||||
// FIXME: ?? Is half of the utilization margin a good amount of CPU flexing?
|
||||
int cpu_flex = ((100 - target_utilization) * tmp_node[0].CPUs_total) / 200;
|
||||
if (pid <= 0) {
|
||||
// If trying to find resources for pre-placement advice request, do not
|
||||
// underestimate the amount of CPUs needed. Instead, err on the side
|
||||
// of providing too many resources. So, no flexing here...
|
||||
cpu_flex = 0;
|
||||
}
|
||||
while ((mbs > 0) || (cpus > cpu_flex)) {
|
||||
if (log_level >= LOG_DEBUG) {
|
||||
numad_log(LOG_DEBUG, "MBs: %d, CPUs: %d\n", mbs, cpus);
|
||||
}
|
||||
@ -1834,6 +1865,10 @@ id_list_p pick_numa_nodes(int pid, int cpus, int mbs) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
if ((pid <= 0) && (num_target_nodes <= 0)) {
|
||||
// Always provide at least one node for pre-placement advice
|
||||
ADD_ID_TO_LIST(node[0].node_id, target_node_list_p);
|
||||
}
|
||||
try_memory_move_again:
|
||||
str_from_id_list(buf, BUF_SIZE, existing_mems_list_p);
|
||||
str_from_id_list(buf2, BUF_SIZE, target_node_list_p);
|
||||
@ -1974,89 +2009,112 @@ void *set_dynamic_options(void *arg) {
|
||||
// int arg_value = *(int *)arg;
|
||||
char buf[BUF_SIZE];
|
||||
for (;;) {
|
||||
// Loop here forever waiting for a msg to do something...
|
||||
msg_t msg;
|
||||
recv_msg(&msg);
|
||||
switch (msg.body.cmd) {
|
||||
case 'i':
|
||||
min_interval = msg.body.arg1;
|
||||
max_interval = msg.body.arg2;
|
||||
if (max_interval <= 0) {
|
||||
shut_down_numad();
|
||||
}
|
||||
numad_log(LOG_NOTICE, "Changing interval to %d:%d\n", msg.body.arg1, msg.body.arg2);
|
||||
break;
|
||||
case 'l':
|
||||
numad_log(LOG_NOTICE, "Changing log level to %d\n", msg.body.arg1);
|
||||
log_level = msg.body.arg1;
|
||||
break;
|
||||
case 'p':
|
||||
numad_log(LOG_NOTICE, "Adding PID %d to inclusion PID list\n", msg.body.arg1);
|
||||
pthread_mutex_lock(&pid_list_mutex);
|
||||
exclude_pid_list = remove_pid_from_pid_list(exclude_pid_list, msg.body.arg1);
|
||||
include_pid_list = insert_pid_into_pid_list(include_pid_list, msg.body.arg1);
|
||||
pthread_mutex_unlock(&pid_list_mutex);
|
||||
break;
|
||||
case 'r':
|
||||
numad_log(LOG_NOTICE, "Removing PID %d from explicit PID lists\n", msg.body.arg1);
|
||||
pthread_mutex_lock(&pid_list_mutex);
|
||||
include_pid_list = remove_pid_from_pid_list(include_pid_list, msg.body.arg1);
|
||||
exclude_pid_list = remove_pid_from_pid_list(exclude_pid_list, msg.body.arg1);
|
||||
pthread_mutex_unlock(&pid_list_mutex);
|
||||
break;
|
||||
case 'S':
|
||||
scan_all_processes = (msg.body.arg1 != 0);
|
||||
if (scan_all_processes) {
|
||||
numad_log(LOG_NOTICE, "Scanning all processes\n");
|
||||
} else {
|
||||
numad_log(LOG_NOTICE, "Scanning only explicit PID list processes\n");
|
||||
}
|
||||
break;
|
||||
case 'u':
|
||||
numad_log(LOG_NOTICE, "Changing target utilization to %d\n", msg.body.arg1);
|
||||
target_utilization = msg.body.arg1;
|
||||
break;
|
||||
case 'w':
|
||||
numad_log(LOG_NOTICE, "Getting NUMA pre-placement advice for %d CPUs and %d MBs\n",
|
||||
// Loop here forever waiting for a msg to do something...
|
||||
msg_t msg;
|
||||
recv_msg(&msg);
|
||||
switch (msg.body.cmd) {
|
||||
case 'i':
|
||||
min_interval = msg.body.arg1;
|
||||
max_interval = msg.body.arg2;
|
||||
if (max_interval <= 0) {
|
||||
shut_down_numad();
|
||||
}
|
||||
numad_log(LOG_NOTICE, "Changing interval to %d:%d\n", msg.body.arg1, msg.body.arg2);
|
||||
break;
|
||||
case 'K':
|
||||
keep_interleaved_memory = (msg.body.arg1 != 0);
|
||||
if (keep_interleaved_memory) {
|
||||
numad_log(LOG_NOTICE, "Keeping interleaved memory spread across nodes\n");
|
||||
} else {
|
||||
numad_log(LOG_NOTICE, "Merging interleaved memory to localized NUMA nodes\n");
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
numad_log(LOG_NOTICE, "Changing log level to %d\n", msg.body.arg1);
|
||||
log_level = msg.body.arg1;
|
||||
break;
|
||||
case 'p':
|
||||
numad_log(LOG_NOTICE, "Adding PID %d to inclusion PID list\n", msg.body.arg1);
|
||||
pthread_mutex_lock(&pid_list_mutex);
|
||||
exclude_pid_list = remove_pid_from_pid_list(exclude_pid_list, msg.body.arg1);
|
||||
include_pid_list = insert_pid_into_pid_list(include_pid_list, msg.body.arg1);
|
||||
pthread_mutex_unlock(&pid_list_mutex);
|
||||
break;
|
||||
case 'r':
|
||||
numad_log(LOG_NOTICE, "Removing PID %d from explicit PID lists\n", msg.body.arg1);
|
||||
pthread_mutex_lock(&pid_list_mutex);
|
||||
include_pid_list = remove_pid_from_pid_list(include_pid_list, msg.body.arg1);
|
||||
exclude_pid_list = remove_pid_from_pid_list(exclude_pid_list, msg.body.arg1);
|
||||
pthread_mutex_unlock(&pid_list_mutex);
|
||||
break;
|
||||
case 'S':
|
||||
scan_all_processes = (msg.body.arg1 != 0);
|
||||
if (scan_all_processes) {
|
||||
numad_log(LOG_NOTICE, "Scanning all processes\n");
|
||||
} else {
|
||||
numad_log(LOG_NOTICE, "Scanning only explicit PID list processes\n");
|
||||
}
|
||||
break;
|
||||
case 'u':
|
||||
numad_log(LOG_NOTICE, "Changing target utilization to %d\n", msg.body.arg1);
|
||||
target_utilization = msg.body.arg1;
|
||||
break;
|
||||
case 'w':
|
||||
numad_log(LOG_NOTICE, "Getting NUMA pre-placement advice for %d CPUs and %d MBs\n",
|
||||
msg.body.arg1, msg.body.arg2);
|
||||
pthread_mutex_lock(&node_info_mutex);
|
||||
update_nodes();
|
||||
id_list_p node_list_p = pick_numa_nodes(-1, (msg.body.arg1 * ONE_HUNDRED), msg.body.arg2);
|
||||
str_from_id_list(buf, BUF_SIZE, node_list_p);
|
||||
pthread_mutex_unlock(&node_info_mutex);
|
||||
send_msg(msg.body.src_pid, 'w', requested_cpus, requested_mbs, buf);
|
||||
break;
|
||||
case 'x':
|
||||
numad_log(LOG_NOTICE, "Adding PID %d to exclusion PID list\n", msg.body.arg1);
|
||||
pthread_mutex_lock(&pid_list_mutex);
|
||||
include_pid_list = remove_pid_from_pid_list(include_pid_list, msg.body.arg1);
|
||||
exclude_pid_list = insert_pid_into_pid_list(exclude_pid_list, msg.body.arg1);
|
||||
pthread_mutex_unlock(&pid_list_mutex);
|
||||
break;
|
||||
default:
|
||||
numad_log(LOG_WARNING, "Unexpected msg command: %c %d %d %s from PID %d\n",
|
||||
pthread_mutex_lock(&node_info_mutex);
|
||||
update_nodes();
|
||||
id_list_p node_list_p = pick_numa_nodes(-1, msg.body.arg1, msg.body.arg2);
|
||||
str_from_id_list(buf, BUF_SIZE, node_list_p);
|
||||
pthread_mutex_unlock(&node_info_mutex);
|
||||
send_msg(msg.body.src_pid, 'w', 0, 0, buf);
|
||||
break;
|
||||
case 'x':
|
||||
numad_log(LOG_NOTICE, "Adding PID %d to exclusion PID list\n", msg.body.arg1);
|
||||
pthread_mutex_lock(&pid_list_mutex);
|
||||
include_pid_list = remove_pid_from_pid_list(include_pid_list, msg.body.arg1);
|
||||
exclude_pid_list = insert_pid_into_pid_list(exclude_pid_list, msg.body.arg1);
|
||||
pthread_mutex_unlock(&pid_list_mutex);
|
||||
break;
|
||||
default:
|
||||
numad_log(LOG_WARNING, "Unexpected msg command: %c %d %d %s from PID %d\n",
|
||||
msg.body.cmd, msg.body.arg1, msg.body.arg1, msg.body.text,
|
||||
msg.body.src_pid);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} // for (;;)
|
||||
}
|
||||
|
||||
|
||||
|
||||
void parse_two_arg_values(char *p, int *first_ptr, int *second_ptr, int first_is_optional) {
|
||||
void parse_two_arg_values(char *p, int *first_ptr, int *second_ptr, int first_is_optional, int first_scale_digits) {
|
||||
char *orig_p = p;
|
||||
char *q = NULL;
|
||||
int second = -1;
|
||||
int first = (int)strtol(p, &p, 10);
|
||||
if (p == orig_p) {
|
||||
errno = 0;
|
||||
int first = (int) strtol(p, &p, 10);
|
||||
if ((errno != 0) || (p == orig_p) || (first < 0)) {
|
||||
fprintf(stderr, "Can't parse arg value(s): %s\n", orig_p);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (*p == '.') {
|
||||
p++;
|
||||
while ((first_scale_digits > 0) && (isdigit(*p))) {
|
||||
first *= 10;
|
||||
first += (*p++ - '0');
|
||||
first_scale_digits -= 1;
|
||||
}
|
||||
while (isdigit(*p)) { p++; }
|
||||
}
|
||||
while (first_scale_digits > 0) {
|
||||
first *= 10;
|
||||
first_scale_digits -= 1;
|
||||
}
|
||||
if (*p == ':') {
|
||||
q = p + 1;
|
||||
second = (int)strtol(q, &p, 10);
|
||||
if (p == q) {
|
||||
errno = 0;
|
||||
second = (int) strtol(q, &p, 10);
|
||||
if ((errno != 0) || (p == q) || (second < 0)) {
|
||||
fprintf(stderr, "Can't parse arg value(s): %s\n", orig_p);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
@ -2078,6 +2136,7 @@ int main(int argc, char *argv[]) {
|
||||
int opt;
|
||||
int d_flag = 0;
|
||||
int i_flag = 0;
|
||||
int K_flag = 0;
|
||||
int l_flag = 0;
|
||||
int p_flag = 0;
|
||||
int r_flag = 0;
|
||||
@ -2087,7 +2146,7 @@ int main(int argc, char *argv[]) {
|
||||
int w_flag = 0;
|
||||
int x_flag = 0;
|
||||
long list_pid = 0;
|
||||
while ((opt = getopt(argc, argv, "dD:hi:l:p:r:S:u:vVw:x:")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "dD:hi:K:l:p:r:S:u:vVw:x:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'd':
|
||||
d_flag = 1;
|
||||
@ -2101,7 +2160,11 @@ int main(int argc, char *argv[]) {
|
||||
break;
|
||||
case 'i':
|
||||
i_flag = 1;
|
||||
parse_two_arg_values(optarg, &min_interval, &max_interval, 1);
|
||||
parse_two_arg_values(optarg, &min_interval, &max_interval, 1, 0);
|
||||
break;
|
||||
case 'K':
|
||||
K_flag = 1;
|
||||
keep_interleaved_memory = (atoi(optarg) != 0);
|
||||
break;
|
||||
case 'l':
|
||||
l_flag = 1;
|
||||
@ -2137,7 +2200,7 @@ int main(int argc, char *argv[]) {
|
||||
break;
|
||||
case 'w':
|
||||
w_flag = 1;
|
||||
parse_two_arg_values(optarg, &requested_cpus, &requested_mbs, 0);
|
||||
parse_two_arg_values(optarg, &requested_cpus, &requested_mbs, 0, 2);
|
||||
break;
|
||||
case 'x':
|
||||
x_flag = 1;
|
||||
@ -2151,8 +2214,8 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
if (argc > optind) {
|
||||
fprintf(stderr, "Unexpected arg = %s\n", argv[optind]);
|
||||
exit(EXIT_FAILURE);
|
||||
fprintf(stderr, "Unexpected arg = %s\n", argv[optind]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (i_flag) {
|
||||
if ((max_interval < min_interval) && (max_interval != 0)) {
|
||||
@ -2174,6 +2237,9 @@ int main(int argc, char *argv[]) {
|
||||
if (i_flag) {
|
||||
send_msg(daemon_pid, 'i', min_interval, max_interval, "");
|
||||
}
|
||||
if (K_flag) {
|
||||
send_msg(daemon_pid, 'K', keep_interleaved_memory, 0, "");
|
||||
}
|
||||
if (d_flag || l_flag || v_flag) {
|
||||
send_msg(daemon_pid, 'l', log_level, 0, "");
|
||||
}
|
||||
@ -2204,7 +2270,7 @@ int main(int argc, char *argv[]) {
|
||||
sleep(2);
|
||||
update_nodes();
|
||||
numad_log(LOG_NOTICE, "Getting NUMA pre-placement advice for %d CPUs and %d MBs\n", requested_cpus, requested_mbs);
|
||||
id_list_p node_list_p = pick_numa_nodes(-1, (requested_cpus * ONE_HUNDRED), requested_mbs);
|
||||
id_list_p node_list_p = pick_numa_nodes(-1, requested_cpus, requested_mbs);
|
||||
str_from_id_list(buf, BUF_SIZE, node_list_p);
|
||||
fprintf(stdout, "%s\n", buf);
|
||||
close_log_file();
|
||||
|
Loading…
Reference in New Issue
Block a user