diff --git a/scripts/testing/sleep-accuracy/Makefile b/scripts/testing/sleep-accuracy/Makefile new file mode 100644 index 000000000..ca2001c98 --- /dev/null +++ b/scripts/testing/sleep-accuracy/Makefile @@ -0,0 +1,5 @@ +sleep-accuracy: sleep-accuracy.c + gcc -O2 -o $@ $< -lm -lpthread + +clean: + $(RM) sleep-accuracy diff --git a/scripts/testing/sleep-accuracy/sleep-accuracy.c b/scripts/testing/sleep-accuracy/sleep-accuracy.c new file mode 100644 index 000000000..052ecd22f --- /dev/null +++ b/scripts/testing/sleep-accuracy/sleep-accuracy.c @@ -0,0 +1,1010 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sleep-accuracy - Measure the accuracy of nanosleep under various conditions. + +/* + Debug tips: + + CPU affinity and toggling can be observed with: + + SLEEP_PID=$(pgrep sleep-accuracy | sort -n | head -n 1) + sudo bpftrace -e "tracepoint:sched:sched_stat_runtime{ if(args->pid == $SLEEP_PID) { @run[cpu]+=args->runtime } } interval:ms:100{ print(@run); }" +*/ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define uint64_t u_int64_t + +#define NS_PER_SEC 1000000000ULL +#define MICROSECOND 1000ULL +#define MILLISECOND 1000000ULL + +// MAX_COMB - maximum number of combinations for cpus, pol/prio, busy, sleep... +#define MAX_COMB 10 + +pid_t main_thread_pid = 0; + +typedef enum { + BENCHMARK_NANOSLEEP, + BENCHMARK_NETWORKING, + BENCHMARK_FUTEX, +} benchmark_type_t; + +// options +typedef struct { + int cpus[MAX_COMB][2]; // CPUs to pin or toggle + int cpu_count; // Number of CPU cores + int polprio[MAX_COMB][2]; // Scheduling policy and priority pairs + int polprio_count; // Number of policy/priority pairs + int cpuidle_minmax[MAX_COMB][2]; // cpuidle min/max state pairs + int cpuidle_count; // Number of cpuidle min/max pairs + int cpufreq_minmax[MAX_COMB][2]; // cpufreq min/max [kHz] pairs + int cpufreq_count; // Number of cpufreq min/max pairs + int64_t busy_times[MAX_COMB]; // Busy durations in nanoseconds + int busy_count; // Number of busy durations + int64_t sleep_times[MAX_COMB]; // Sleep durations in nanoseconds + int sleep_count; // Number of sleep durations + int64_t toggle_intervals[MAX_COMB]; // CPU toggling intervals [ns] + int toggle_count; // Number of CPU toggling intervals + int64_t iterations; // Number of iterations per measurement + int repeats; // Number of repetitions for each measurement + benchmark_type_t benchmarks[MAX_COMB]; // Benchmarks to run + int benchmark_count; // Number of benchmarks +} options_t; + +options_t options = {}; + +void print_usage() { + printf( + "sleep-accuracy - Measure the accuracy of nanosleep under various conditions.\n" + "\n" + "Usage: sleep-accuracy [options]\n" + "Options:\n" + " -b Comma-separated list of benchmarks to run: nanosleep,networking,futex (default: nanosleep)\n" + " -c Comma-separated list of CPUs to pin benchmark thread(s) (default: no pinning)\n" + " -c Comma-separated list of multi-CPU affinity.\n" + " nanosleep: CPU affinity of the only benchmark thread toggles cpu0 and cpu1 in -t intervals\n" + " networking: client and server pinned to cpu0 and cpu1 respectively, or both pinned to cpu0 if cpu1 is -1\n" + " futex: thread 1 and thread 2 pinned to cpu0 and cpu1 respectively, or both pinned to cpu0 if cpu1 is -1\n" + " -t Comma-separated list of CPU toggling intervals [ns], if CPU toggling is used with -c cpu0/cpu1 (default: 1000000)\n" + " -p Comma-separated list of Scheduling policy/priority.\n" + " 0=OTHER, 1=FIFO, 2=RR, 3=BATCH, 5=IDLE (default: 0/0), see sched_setscheduler(2)\n" + " -f Comma-separated list of cpufreq min/max [kHz] pairs (default: 0/9999999)\n" + " -i Comma-separated list of cpuidle min/max state pairs (default: 0/99)\n" + " -B Comma-separated list of busy durations [ns] (default: 0,1000,1000000)\n" + " -s Comma-separated list of sleep durations [ns] (default: 0,1000,1000000)\n" + " -r Number of repetitions for each measurement (default: 1)\n" + " -I Number of iterations per measurement (default: 1000)\n" + " -h Show this help message\n" + "\n" + "Example:\n" + " sleep-accuracy -c 3/13,3,13 -t 1000000,100000 -p 0/0,1/1 -f 1200000/1200000,0/9999999 -i -1/-1,0/1,0/9 -B 20000 -s 50000 -I 10000 -r 5\n" + " report requested sleep accuracy when...\n" + " -c 3/13,3,13: migrating between CPUs 3 and 13 or running only on CPU 3 or 13\n" + " -t 1000000,10000: ...migrating every 1 ms or 100 us,\n" + " -p 0/0,1/1: ...with SCHED_OTHER prio0 or SCHED_FIFO prio1,\n" + " -f 1200000/1200000,0/9999999: ...with CPU(s) fixed at 1.2 GHz or platforms min/max frequencies,\n" + " -i -1/-1,0/1,0/9: ...with no states, only states 0 and 1, or all idle states enabled\n" + " -B 20000: ...running busy for 20us before each sleep,\n" + " -s 50000: ...requesting 50us sleep,\n" + " -I 10000: ...repeating each measurement 10k times to get statistically significant results,\n" + " -r 5: ...and repeating the whole measurement 5 times to see variation between runs.\n" + ); +} + +// delay - sleep for specified nanoseconds +void delay(uint64_t ns) { + struct timespec req, rem; + req.tv_sec = ns / NS_PER_SEC; + req.tv_nsec = ns % NS_PER_SEC; + while (nanosleep(&req, &rem) == -1) { + req = rem; // continue sleeping for the remaining time if interrupted + } +} + +// set_cpu_affinity - set CPU affinity of the main thread to a specific CPU +void set_cpu_affinity(int cpu) { + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + if (sched_setaffinity(main_thread_pid, sizeof(cpuset), &cpuset) == -1) { + perror("sched_setaffinity"); + exit(EXIT_FAILURE); + } +} + +// set_scheduler - set scheduling policy and priority +void set_scheduler(int policy, int priority) { + struct sched_param param; + param.sched_priority = priority; + if (sched_setscheduler(0, policy, ¶m) == -1) { + perror("sched_setscheduler"); + exit(EXIT_FAILURE); + } +} + +// set_cpuidle_minmax - enable/disable cpuidle/stateX's +void set_cpuidle_minmax(int cpu, int min, int max) { + char disable_filename[1024]; + int state = 0; + FILE *f = NULL; + while (1) { + sprintf(disable_filename, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/disable", cpu, state); + FILE *f = fopen(disable_filename, "w"); + if (!f) { + if (state == 0 && max != 99) { + perror("cannot open for writing: cpuidle/state0/disable"); + } + break; // all cpuidle states processed + } + fprintf(f, "%d\n", (state < min || state > max) ? 1 : 0); + fflush(f); + fsync(fileno(f)); + fclose(f); + state++; + } + if (f) fclose(f); +} + +// get_cpuidle_minmax - read min and max cpuidle states for the CPU from sysfs +void get_cpuidle_minmax(int cpu, int *min, int *max) { + char disable_filename[1024]; + int state = 0; + FILE *f = NULL; + *min = -1; + *max = -1; + while (1) { + sprintf(disable_filename, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/disable", cpu, state); + f = fopen(disable_filename, "r"); + if (!f) { + break; // all cpuidle states processed + } + int disabled = 0; + fscanf(f, "%d", &disabled); + fclose(f); + if (!disabled) { + if (*min == -1) *min = state; + *max = state; + } + state++; + } +} + +// set_cpufreq_minmax - set min and max cpufreq for the CPU in sysfs +void set_cpufreq_minmax(int cpu, int min, int max) { + char freq_filename[1024]; + FILE *f = NULL; + + sprintf(freq_filename, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu); + f = fopen(freq_filename, "w"); + if (f) { + fprintf(f, "%d\n", max); + fflush(f); + fsync(fileno(f)); + fclose(f); + } else { + perror("cannot open for writing: cpufreq/scaling_max_freq"); + } + + sprintf(freq_filename, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq", cpu); + f = fopen(freq_filename, "w"); + if (f) { + fprintf(f, "%d\n", min); + fflush(f); + fsync(fileno(f)); + fclose(f); + } else { + perror("cannot open for writing: cpufreq/scaling_min_freq"); + } +} + +// get_cpufreq_minmax - read min and max cpufreq for the CPU from sysfs +void get_cpufreq_minmax(int cpu, int *min, int *max) { + char freq_filename[1024]; + FILE *f = NULL; + + sprintf(freq_filename, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu); + f = fopen(freq_filename, "r"); + if (f) { + fscanf(f, "%d", max); + fclose(f); + } else { + perror("cannot open for reading: cpufreq/scaling_max_freq"); + } + + sprintf(freq_filename, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq", cpu); + f = fopen(freq_filename, "r"); + if (f) { + fscanf(f, "%d", min); + fclose(f); + } else { + perror("cannot open for reading: cpufreq/scaling_min_freq"); + } +} + +// get_time_ns - get current time in nanoseconds +uint64_t get_time_ns() { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (uint64_t)ts.tv_sec * NS_PER_SEC + (uint64_t)ts.tv_nsec; +} + +// compare_uint64 - comparison function for qsort +int compare_uint64(const void *a, const void *b) { + uint64_t val1 = *(const uint64_t *)a; + uint64_t val2 = *(const uint64_t *)b; + if (val1 < val2) return -1; + if (val1 > val2) return 1; + return 0; +} + +// busy-wait for a specified duration +void busy_wait(uint64_t duration_ns) { + u_int64_t start = get_time_ns(); + while (get_time_ns() - start < duration_ns); +} + +// reset_latencies - initialize latencies array with error value (-1) +void reset_latencies(int64_t *latencies) { + for (int i = 0; i < options.iterations; i++) { + latencies[i] = -1; + } +} + +// measure_nanosleep - perform measurements (all iterations) of nanosleep latency +void measure_nanosleep(int64_t busy_ns, int64_t sleep_ns, int64_t *out_latencies, int cpu, int cpu_other, int64_t toggle_ns) { + int64_t iters = options.iterations; + pid_t toggler_pid = -1; + + // Set CPU affinity for measurement thread (cpuX) + if (cpu != -1) { + set_cpu_affinity(cpu); + } + + // Launch CPU toggler process if needed (when cpu_other is specified) + if (cpu != -1 && cpu_other != -1 && toggle_ns > 0) { + toggler_pid = fork(); + if (toggler_pid < 0) { + perror("measure_nanosleep: fork for CPU toggler failed"); + goto out; + } + + if (toggler_pid == 0) { + // Child process - CPU toggler + // This process toggles the CPU affinity of the parent (measurement) thread. + // set_cpu_affinity() uses main_thread_pid, which refers to the parent process. + prctl(PR_SET_PDEATHSIG, SIGTERM); // ensure child exits when parent exits + if (getppid() == 1) { + exit(0); // parent already exited, so do we + } + while (1) { + set_cpu_affinity(cpu); + delay(toggle_ns); + set_cpu_affinity(cpu_other); + delay(toggle_ns); + } + } + + // Parent process - give toggler some time to start + delay(toggle_ns * 2); + } + + // Perform measurements + for (int i = 0; i < iters; i++) { + if (busy_ns > 0) { + busy_wait(busy_ns); // Simulate work before sleep + } + int64_t sleep_start = get_time_ns(); + + // request a short sleep using nanosleep, even if sleep_ns is 0 + if (sleep_ns >= 0) { + struct timespec req = {0, sleep_ns}; + nanosleep(&req, NULL); + } + + int64_t sleep_end = get_time_ns(); + int64_t actual_sleep = sleep_end - sleep_start; + int64_t latency = actual_sleep - sleep_ns; + + out_latencies[i] = latency; + } + +out: + // Ensure CPU toggler exits before returning + if (toggler_pid > 0) { + kill(toggler_pid, SIGTERM); + waitpid(toggler_pid, NULL, 0); + } +} + +// measure_networking - measure networking latency using loopback socket communication +// Server and client run in separate processes with optional CPU affinity +void measure_networking(int64_t busy_ns, int64_t sleep_ns, int64_t *out_latencies, int cpu, int cpu_other) { + int64_t iters = options.iterations; + int server_fd = -1, client_fd = -1, conn_fd = -1; + struct sockaddr_in server_addr, client_addr; + socklen_t addr_len = sizeof(client_addr); + char buffer[1]; + int pipe_fd[2] = {-1, -1}; + pid_t pid = -1; + int opt = 1; + + // Create pipe for synchronization + if (pipe(pipe_fd) < 0) { + perror("pipe creation failed"); + goto out; + } + + // Create server socket + server_fd = socket(AF_INET, SOCK_STREAM, 0); + if (server_fd < 0) { + perror("socket creation failed"); + goto out_close_pipe; + } + + setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR | SO_REUSEPORT, &opt, sizeof(opt)); + + memset(&server_addr, 0, sizeof(server_addr)); + server_addr.sin_family = AF_INET; + server_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + server_addr.sin_port = 0; // Let OS assign a port + + if (bind(server_fd, (struct sockaddr *)&server_addr, sizeof(server_addr)) < 0) { + perror("bind failed"); + goto out_close_server; + } + + if (listen(server_fd, 1) < 0) { + perror("listen failed"); + goto out_close_server; + } + + // Get the assigned port + addr_len = sizeof(server_addr); + getsockname(server_fd, (struct sockaddr *)&server_addr, &addr_len); + + // Fork to create server and client processes + pid = fork(); + if (pid < 0) { + perror("fork failed"); + goto out_close_server; + } + + if (pid == 0) { + // Child process - Server + close(pipe_fd[0]); // Close read end + pipe_fd[0] = -1; + + // Set CPU affinity for server (cpuX) + if (cpu != -1) { + set_cpu_affinity(cpu); + } + + // Signal parent that server is ready and listening + write(pipe_fd[1], "R", 1); + + // Accept connection + conn_fd = accept(server_fd, (struct sockaddr *)&client_addr, &addr_len); + if (conn_fd < 0) { + perror("accept failed"); + close(pipe_fd[1]); + close(server_fd); + exit(1); + } + + // Server echo loop + for (int i = 0; i < iters; i++) { + // Receive from client + if (recv(conn_fd, buffer, 1, 0) <= 0) { + break; + } + buffer[0] = 'y'; + // Echo back + if (send(conn_fd, buffer, 1, 0) <= 0) { + break; + } + } + + close(conn_fd); + close(server_fd); + close(pipe_fd[1]); + exit(0); + } + + // Parent process - Client + close(pipe_fd[1]); // Close write end + pipe_fd[1] = -1; + + // Set CPU affinity for client (cpuY if specified, otherwise cpuX) + if (cpu_other != -1) { + set_cpu_affinity(cpu_other); + } else if (cpu != -1) { + set_cpu_affinity(cpu); + } + + // Wait for server to be ready + char ready_signal; + if (read(pipe_fd[0], &ready_signal, 1) <= 0) { + perror("Failed to receive ready signal from server"); + goto out_kill_server; + } + + // Create client socket + client_fd = socket(AF_INET, SOCK_STREAM, 0); + if (client_fd < 0) { + perror("client socket creation failed"); + goto out_kill_server; + } + + // Connect to server + if (connect(client_fd, (struct sockaddr *)&server_addr, sizeof(server_addr)) < 0) { + perror("connect failed"); + goto out_close_client; + } + + // Measure round-trip latency + for (int i = 0; i < iters; i++) { + if (busy_ns > 0) { + busy_wait(busy_ns); + } + + if (sleep_ns > 0) { + struct timespec req = {0, sleep_ns}; + nanosleep(&req, NULL); + } + + int64_t start = get_time_ns(); + + // Send one byte. + // In case of error, skip writing out_latencies[i] (keep it as -1) + buffer[0] = 'x'; + if (send(client_fd, buffer, 1, 0) < 0) { + continue; + } + + // Receive echo back + if (recv(client_fd, buffer, 1, 0) < 0) { + continue; + } + + int64_t end = get_time_ns(); + int64_t latency = (end - start) / 2; // Divide by 2 for one-way latency approximation + if (buffer[0] != 'y') { + continue; // Invalid response, skip recording latency + } + out_latencies[i] = latency; + } + +out_close_client: + if (client_fd >= 0) + close(client_fd); +out_kill_server: + if (pid > 0) { + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + } + if (pipe_fd[0] >= 0) + close(pipe_fd[0]); +out_close_server: + if (server_fd >= 0) + close(server_fd); +out_close_pipe: + if (pipe_fd[1] >= 0) + close(pipe_fd[1]); +out: +} + +// futex wrapper +static long futex(uint32_t *uaddr, int futex_op, uint32_t val, + const struct timespec *timeout, uint32_t *uaddr2, uint32_t val3) { + return syscall(SYS_futex, uaddr, futex_op, val, timeout, uaddr2, val3); +} + +// Thread context for futex benchmark +typedef struct { + uint32_t *futex_word1; + uint32_t *futex_word2; + uint32_t *value1; + uint32_t *value2; + int64_t *latencies; + int64_t busy_ns; + int64_t sleep_ns; + int cpu; + int iterations; + int is_thread1; +} futex_thread_args_t; + +// measure_futex - measure futex synchronization latency between two threads +// Thread 1 and thread 2 use futex for synchronization with optional CPU affinity +void* futex_thread_func(void *arg) { + futex_thread_args_t *args = (futex_thread_args_t *)arg; + int iters = args->iterations; + + // Set CPU affinity for this thread + if (args->cpu != -1) { + set_cpu_affinity(args->cpu); + } + + if (args->is_thread1) { + // Thread 1: initiates the ping-pong + for (int i = 0; i < iters; i++) { + if (args->sleep_ns > 0) { + struct timespec req = {0, args->sleep_ns}; + nanosleep(&req, NULL); + } + + if (args->busy_ns > 0) { + busy_wait(args->busy_ns); + } + + int64_t start = get_time_ns(); + + // Signal thread 2 by changing value and waking it + *args->value1 = i + 1; + __atomic_store_n(args->futex_word1, 1, __ATOMIC_SEQ_CST); + futex(args->futex_word1, FUTEX_WAKE, 1, NULL, NULL, 0); + + // Wait for thread 2 to respond + while (__atomic_load_n(args->futex_word2, __ATOMIC_SEQ_CST) == 0) { + futex(args->futex_word2, FUTEX_WAIT, 0, NULL, NULL, 0); + } + __atomic_store_n(args->futex_word2, 0, __ATOMIC_SEQ_CST); + + int64_t end = get_time_ns(); + + // Verify that thread 2 changed the value as expected + if (*args->value2 != i + 1) { + args->latencies[i] = -1; // Mark as error + continue; + } + + int64_t latency = (end - start) / 2; // Divide by 2 for one-way latency + args->latencies[i] = latency; + } + } else { + // Thread 2: responds to thread 1 + for (int i = 0; i < iters; i++) { + // Wait for thread 1 to signal + while (__atomic_load_n(args->futex_word1, __ATOMIC_SEQ_CST) == 0) { + futex(args->futex_word1, FUTEX_WAIT, 0, NULL, NULL, 0); + } + __atomic_store_n(args->futex_word1, 0, __ATOMIC_SEQ_CST); + + // Verify that thread 1 changed the value as expected + if (*args->value1 != i + 1) { + // Error - just continue, thread 1 will mark the error + __atomic_store_n(args->futex_word2, 1, __ATOMIC_SEQ_CST); + futex(args->futex_word2, FUTEX_WAKE, 1, NULL, NULL, 0); + continue; + } + + // Respond by changing value and waking thread 1 + *args->value2 = i + 1; + __atomic_store_n(args->futex_word2, 1, __ATOMIC_SEQ_CST); + futex(args->futex_word2, FUTEX_WAKE, 1, NULL, NULL, 0); + } + } + + return NULL; +} + +void measure_futex(int64_t busy_ns, int64_t sleep_ns, int64_t *out_latencies, int cpu, int cpu_other) { + pthread_t thread1 = 0, thread2 = 0; + uint32_t *futex_word1 = NULL, *futex_word2 = NULL; + uint32_t *value1 = NULL, *value2 = NULL; + futex_thread_args_t args1, args2; + int ret; + void *thread_ret; + + // Allocate shared memory for futex words and values + futex_word1 = malloc(sizeof(uint32_t)); + if (!futex_word1) { + perror("malloc futex_word1 failed"); + goto out; + } + + futex_word2 = malloc(sizeof(uint32_t)); + if (!futex_word2) { + perror("malloc futex_word2 failed"); + goto out_free_futex1; + } + + value1 = malloc(sizeof(uint32_t)); + if (!value1) { + perror("malloc value1 failed"); + goto out_free_futex2; + } + + value2 = malloc(sizeof(uint32_t)); + if (!value2) { + perror("malloc value2 failed"); + goto out_free_value1; + } + + // Initialize futex words and values + *futex_word1 = 0; + *futex_word2 = 0; + *value1 = 0; + *value2 = 0; + + // Setup arguments for thread 1 + args1.futex_word1 = futex_word1; + args1.futex_word2 = futex_word2; + args1.value1 = value1; + args1.value2 = value2; + args1.latencies = out_latencies; + args1.busy_ns = busy_ns; + args1.sleep_ns = sleep_ns; + args1.cpu = cpu; + args1.iterations = options.iterations; + args1.is_thread1 = 1; + + // Setup arguments for thread 2 + args2.futex_word1 = futex_word1; + args2.futex_word2 = futex_word2; + args2.value1 = value1; + args2.value2 = value2; + args2.latencies = NULL; // Thread 2 doesn't record latencies + args2.busy_ns = 0; // Only thread 1 does busy-wait + args2.sleep_ns = 0; // Only thread 1 does sleep + args2.cpu = (cpu_other != -1) ? cpu_other : cpu; + args2.iterations = options.iterations; + args2.is_thread1 = 0; + + // Create thread 2 first (responder) + ret = pthread_create(&thread2, NULL, futex_thread_func, &args2); + if (ret != 0) { + perror("pthread_create thread2 failed"); + thread2 = 0; + goto out_free_value2; + } + + // Create thread 1 (initiator) + ret = pthread_create(&thread1, NULL, futex_thread_func, &args1); + if (ret != 0) { + perror("pthread_create thread1 failed"); + thread1 = 0; + goto out_join_thread2; + } + + // Wait for thread 1 to complete + pthread_join(thread1, &thread_ret); + thread1 = 0; + +out_join_thread2: + if (thread2 != 0) + pthread_join(thread2, &thread_ret); +out_free_value2: + if (value2) + free(value2); +out_free_value1: + if (value1) + free(value1); +out_free_futex2: + if (futex_word2) + free(futex_word2); +out_free_futex1: + if (futex_word1) + free(futex_word1); +out: + return; +} + +const char* benchmark_name(benchmark_type_t type) { + switch (type) { + case BENCHMARK_NANOSLEEP: return "nanosleep"; + case BENCHMARK_NETWORKING: return "networking"; + case BENCHMARK_FUTEX: return "futex"; + default: return "unknown"; + } +} + +void print_latencies(int64_t *latencies) { + uint64_t total_latency = 0; + int64_t iters = options.iterations; + for (int i = 0; i < iters; i++) { + total_latency += latencies[i]; + } + + // Sort latencies for percentile calculation + qsort(latencies, iters, sizeof(uint64_t), compare_uint64); + + double avg_latency = (double)total_latency / iters; + + // Calculate percentiles + int64_t min = latencies[0]; + int64_t p5 = latencies[(int)(iters * 0.05)]; + int64_t p50 = latencies[(int)(iters * 0.5)]; + int64_t p80 = latencies[(int)(iters * 0.8)]; + int64_t p90 = latencies[(int)(iters * 0.9)]; + int64_t p95 = latencies[(int)(iters * 0.95)]; + int64_t p99 = latencies[(int)(iters * 0.99)]; + int64_t p999 = latencies[(int)(iters * 0.999)]; + int64_t max = latencies[iters - 1]; + + // Print results + printf("%ld %ld %ld %ld %ld %ld %ld %ld %ld %.0f", min, p5, p50, p80, p90, p95, p99, p999, max, avg_latency); +} + +void parse_options(int argc, char *argv[]) { + // Default values + options.cpu_count = 0; + options.polprio_count = 0; + options.busy_count = 0; + options.sleep_count = 0; + options.toggle_count = 0; + options.benchmark_count = 0; + options.iterations = 1000; + options.repeats = 1; + + options.polprio[options.polprio_count][0] = 0; // Default policy OTHER + options.polprio[options.polprio_count++][1] = 0; // Default priority 0 + + options.cpuidle_minmax[options.cpuidle_count][0] = 0; // Default cpuidle min state + options.cpuidle_minmax[options.cpuidle_count++][1] = 99; // Default cpuidle max state + + options.cpufreq_minmax[options.cpufreq_count][0] = 0; // Default cpufreq min [kHz] + options.cpufreq_minmax[options.cpufreq_count++][1] = 9999999; // Default cpufreq max [kHz] + + options.benchmarks[options.benchmark_count++] = BENCHMARK_NANOSLEEP; // Default benchmark + + options.busy_times[options.busy_count++] = 0; + options.busy_times[options.busy_count++] = 1000; + options.busy_times[options.busy_count++] = 1000000; + + options.sleep_times[options.sleep_count++] = 0; + options.sleep_times[options.sleep_count++] = 1000; + options.sleep_times[options.sleep_count++] = 1000000; + + options.toggle_intervals[options.toggle_count++] = 1000000; // Default 1 ms + + // Parse command-line arguments + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { + char *token = strtok(argv[++i], ","); + while (token && options.cpu_count < MAX_COMB) { + char *slash = strchr(token, '/'); + if (slash) { + *slash = '\0'; + options.cpus[options.cpu_count][0] = atoi(token); + options.cpus[options.cpu_count++][1] = atoi(slash + 1); + } else { + options.cpus[options.cpu_count++][0] = atoi(token); + options.cpus[options.cpu_count - 1][1] = -1; // indicate single CPU pinning + } + token = strtok(NULL, ","); + } + } else if (strcmp(argv[i], "-p") == 0 && i + 1 < argc) { + options.polprio_count = 0; // Reset defaults + char *token = strtok(argv[++i], ","); + while (token && options.polprio_count < MAX_COMB) { + char *slash = strchr(token, '/'); + if (slash) { + *slash = '\0'; + options.polprio[options.polprio_count][0] = atoi(token); + options.polprio[options.polprio_count++][1] = atoi(slash + 1); + } + token = strtok(NULL, ","); + } + } else if (strcmp(argv[i], "-i") == 0 && i + 1 < argc) { + options.cpuidle_count = 0; // Reset defaults + char *token = strtok(argv[++i], ","); + while (token && options.cpuidle_count < MAX_COMB) { + char *slash = strchr(token, '/'); + if (slash) { + *slash = '\0'; + options.cpuidle_minmax[options.cpuidle_count][0] = atoi(token); + options.cpuidle_minmax[options.cpuidle_count++][1] = atoi(slash + 1); + } + token = strtok(NULL, ","); + } + } else if (strcmp(argv[i], "-f") == 0 && i + 1 < argc) { + options.cpufreq_count = 0; // Reset defaults + char *token = strtok(argv[++i], ","); + while (token && options.cpufreq_count < MAX_COMB) { + char *slash = strchr(token, '/'); + if (slash) { + *slash = '\0'; + options.cpufreq_minmax[options.cpufreq_count][0] = atoi(token); + options.cpufreq_minmax[options.cpufreq_count++][1] = atoi(slash + 1); + } + token = strtok(NULL, ","); + } + } else if (strcmp(argv[i], "-b") == 0 && i + 1 < argc) { + options.benchmark_count = 0; // Reset defaults + char *token = strtok(argv[++i], ","); + while (token && options.benchmark_count < MAX_COMB) { + if (strcmp(token, "nanosleep") == 0) { + options.benchmarks[options.benchmark_count++] = BENCHMARK_NANOSLEEP; + } else if (strcmp(token, "networking") == 0) { + options.benchmarks[options.benchmark_count++] = BENCHMARK_NETWORKING; + } else if (strcmp(token, "futex") == 0) { + options.benchmarks[options.benchmark_count++] = BENCHMARK_FUTEX; + } else { + fprintf(stderr, "Unknown benchmark: %s\n", token); + exit(EXIT_FAILURE); + } + token = strtok(NULL, ","); + } + } else if (strcmp(argv[i], "-B") == 0 && i + 1 < argc) { + options.busy_count = 0; // Reset defaults + char *token = strtok(argv[++i], ","); + while (token && options.busy_count < MAX_COMB) { + options.busy_times[options.busy_count++] = strtoull(token, NULL, 10); + token = strtok(NULL, ","); + } + } else if (strcmp(argv[i], "-s") == 0 && i + 1 < argc) { + options.sleep_count = 0; // Reset defaults + char *token = strtok(argv[++i], ","); + while (token && options.sleep_count < MAX_COMB) { + options.sleep_times[options.sleep_count++] = strtoull(token, NULL, 10); + token = strtok(NULL, ","); + } + } else if (strcmp(argv[i], "-t") == 0 && i + 1 < argc) { + options.toggle_count = 0; // Reset defaults + char *token = strtok(argv[++i], ","); + while (token && options.toggle_count < MAX_COMB) { + options.toggle_intervals[options.toggle_count++] = strtoull(token, NULL, 10); + token = strtok(NULL, ","); + } + } else if (strcmp(argv[i], "-I") == 0 && i + 1 < argc) { + options.iterations = atoi(argv[++i]); + } else if (strcmp(argv[i], "-r") == 0 && i + 1 < argc) { + options.repeats = atoi(argv[++i]); + } else if (strcmp(argv[i], "-h") == 0) { + print_usage(); + exit(0); + } else { + fprintf(stderr, "Unknown option: %s\n", argv[i]); + exit(EXIT_FAILURE); + } + } +} + + + +int main(int argc, char *argv[]) { + int64_t *latencies; + parse_options(argc, argv); + + latencies = malloc(sizeof(int64_t) * options.iterations); + if(!latencies) { + perror("allocating memory for latencies failed"); + exit(EXIT_FAILURE); + } + + main_thread_pid = getpid(); + + printf("benchmark round cpu0 cpu1 cpumigr_ns schedpol schedprio idlemin idlemax freqmin freqmax busy_ns sleep_ns min p5 p50 p80 p90 p95 p99 p999 max avg\n"); + + for (int r = 0; r < options.repeats; r++) { + + for (int bench_idx = 0; bench_idx < options.benchmark_count; bench_idx++) { + benchmark_type_t benchmark = options.benchmarks[bench_idx]; + + for (int toggle_idx = 0; toggle_idx < options.toggle_count; toggle_idx++) { + int toggle_ns = options.toggle_intervals[toggle_idx]; + + for (int cpu_idx = 0; cpu_idx < (options.cpu_count ? options.cpu_count : 1); cpu_idx++) { + int cpu = options.cpu_count ? options.cpus[cpu_idx][0] : -1; + int cpu_other = options.cpu_count ? options.cpus[cpu_idx][1] : -1; + + + for (int pp_idx = 0; pp_idx < options.polprio_count; pp_idx++) { + set_scheduler(options.polprio[pp_idx][0], options.polprio[pp_idx][1]); + + for (int cpuidle_idx = 0; cpuidle_idx < options.cpuidle_count; cpuidle_idx++) { + int cpuidle_min = -1; + int cpuidle_max = -1; + if (cpu != -1) { + cpuidle_min = options.cpuidle_minmax[cpuidle_idx][0]; + cpuidle_max = options.cpuidle_minmax[cpuidle_idx][1]; + set_cpuidle_minmax(cpu, cpuidle_min, cpuidle_max); + if (cpu_other != -1) { + set_cpuidle_minmax(cpu_other, cpuidle_min, cpuidle_max); + } + } + + for (int cpufreq_idx = 0; cpufreq_idx < options.cpufreq_count; cpufreq_idx++) { + int cpufreq_min = -1; + int cpufreq_max = -1; + if (cpu != -1) { + cpufreq_min = options.cpufreq_minmax[cpufreq_idx][0]; + cpufreq_max = options.cpufreq_minmax[cpufreq_idx][1]; + set_cpufreq_minmax(cpu, cpufreq_min, cpufreq_max); + if (cpu_other != -1) { + set_cpufreq_minmax(cpu_other, cpufreq_min, cpufreq_max); + } + } + + for (int b_idx = 0; b_idx < options.busy_count; b_idx++) { + + for (int s_idx = 0; s_idx < options.sleep_count; s_idx++) { + + // Reset latencies array to -1 before measurement + reset_latencies(latencies); + + if (cpu != -1) { + delay(10 * MILLISECOND); // give kernel some time for affinity, cpufreq and cpuidle settings to take effect + get_cpufreq_minmax(cpu, &cpufreq_min, &cpufreq_max); + get_cpuidle_minmax(cpu, &cpuidle_min, &cpuidle_max); + } + + // Call the appropriate benchmark function + switch (benchmark) { + case BENCHMARK_NANOSLEEP: + measure_nanosleep(options.busy_times[b_idx], options.sleep_times[s_idx], latencies, cpu, cpu_other, toggle_ns); + break; + case BENCHMARK_NETWORKING: + measure_networking(options.busy_times[b_idx], options.sleep_times[s_idx], latencies, cpu, cpu_other); + break; + case BENCHMARK_FUTEX: + measure_futex(options.busy_times[b_idx], options.sleep_times[s_idx], latencies, cpu, cpu_other); + break; + } + + // print measurement parameters and results + printf("%s %d %d %d %ld %d %d %d %d %d %d %ld %ld ", + benchmark_name(benchmark), + r + 1, + cpu, + cpu_other, + cpu_other != -1 ? toggle_ns : -1, + options.polprio[pp_idx][0], + options.polprio[pp_idx][1], + cpuidle_min, + cpuidle_max, + cpufreq_min, + cpufreq_max, + options.busy_times[b_idx], + options.sleep_times[s_idx]); + print_latencies(latencies); + printf("\n"); + fflush(stdout); + } + } + + if (cpu != -1) set_cpufreq_minmax(cpu, 0, 9999999); // reset cpufreq + if (cpu_other != -1) set_cpufreq_minmax(cpu_other, 0, 9999999); // reset cpufreq + } + + if (cpu != -1) set_cpuidle_minmax(cpu, 0, 99); // reset cpuidle + if (cpu_other != -1) set_cpuidle_minmax(cpu_other, 0, 99); // reset cpuidle + } + } + } + } + } + } +}