diff --git a/.gitignore b/.gitignore index 94e0890..8246631 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,13 @@ *.swp -# Skip build/ areas +# Skip build artifacts build* +*.o # Skip LOC-generated files loc*h loc_filenames.c # Skip pytests cached files *__pycache__* + +# Skip samples +*-sample* \ No newline at end of file diff --git a/README.md b/README.md index cd88cda..7dba126 100644 --- a/README.md +++ b/README.md @@ -38,24 +38,24 @@ struct { pid_t tid; // User thread-ID int32_t loc; // (Optional) Line-of-Code ID const char *msg; // Diagnostic message literal - uint64_t arg1; // Argument value-1 - uint64_t arg2; // Argument value-2 + uint64_t argsN; // Number of arguments }; ``` +Where arguments (if present) are stored directly before their corresponding `struct`. + The array is backed by a memory-mapped file, filename given by `l3_init()`. The logging routines simply do an atomic fetch-and-increment of a global index to get a slot into the array, and then update that slot -with the calling thread ID, a pointer to a message string, and up to -two 64-bit arguments. (We store just the pointer to the string rather than -string itself because this is usually a lot faster and, of course, consumes -less storage space.) +with the calling thread ID, a pointer to a message string, and the accompanying +arguments. (We store just the pointer to the string rather thanstring itself because +this is usually a lot faster and, of course, consumes less storage space.) **The address must be a pointer to a string literal.** The `l3_dump.py` utility will map the pointer to find the string literal to which it points from the executable, to generate a human-readable -dump of the log. +dump of the log (with the most recent messages printed first). ------ diff --git a/l3.c b/l3.c index 58f1057..0d9c08d 100644 --- a/l3.c +++ b/l3.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -50,24 +51,25 @@ /** * L3 Log entry Structure definitions: */ -typedef struct l3_entry +typedef union l3_entry { - pid_t tid; -#ifdef L3_LOC_ENABLED - loc_t loc; -#else - uint32_t loc; -#endif // L3_LOC_ENABLED - const char *msg; - uint64_t arg1; - uint64_t arg2; -} L3_ENTRY; + struct l3_arguments + { + uint64_t args[3]; + } body; -/** - * The L3-dump script expects a specific layout and its parsing routines - * hard-code the log-entry size to be these many bytes. - */ -#define L3_LOG_ENTRY_SZ (4 * sizeof(uint64_t)) + struct l3_footer + { + pid_t tid; + #ifdef L3_LOC_ENABLED + loc_t loc; + #else + uint32_t loc; + #endif // L3_LOC_ENABLED + const char *msg; + uint64_t argN; + } foot; +} L3_ENTRY; /** * Cross-check LOC's data structures. We need this to be true to ensure @@ -104,29 +106,19 @@ enum loc_type_t */ typedef struct l3_log { - uint64_t idx; + uint64_t idx; // Offset into `slots` for where the next entry is to be written. Atomic. uint64_t fbase_addr; - uint32_t pad0; - uint16_t log_size; // # of log-entries == L3_MAX_SLOTS + uint32_t slot_count; // == L3_MAX_SLOTS. For determining the size of the ring buffer when decoding. + uint16_t pad2; uint8_t platform; uint8_t loc_type; uint64_t pad1; - L3_ENTRY slots[L3_MAX_SLOTS]; + L3_ENTRY slots[0]; } L3_LOG; L3_LOG *l3_log = NULL; // L3_LOG_MMAP: Also referenced in l3.S. -/** - * The L3-dump script expects a specific layout and its parsing routines - * hard-code the log-header size to be these many bytes. (The overlay of - * L3_LOG{} and L3_ENTRY{}'s size is somewhat of a convenience. They just - * happen to be of the same size, as of now, hence, this reuse.) - */ -L3_STATIC_ASSERT(offsetof(L3_LOG,slots) == sizeof(L3_ENTRY), - "Expected layout of L3_LOG{} is != 32 bytes."); - - #if __APPLE__ /** @@ -155,6 +147,7 @@ L3_THREAD_LOCAL pid_t l3_my_tid = 0; int l3_init(const char *path) { + uint64_t fsize = sizeof(L3_LOG) + L3_MAX_SLOTS * sizeof(L3_ENTRY); int fd = -1; if (path) { @@ -162,8 +155,7 @@ l3_init(const char *path) if (fd == -1) { return -1; } - - if (lseek(fd, sizeof(L3_LOG), SEEK_SET) < 0) { + if (lseek(fd, fsize, SEEK_SET) < 0) { return -1; } if (write(fd, &fd, 1) != 1) { @@ -171,7 +163,7 @@ l3_init(const char *path) } } - l3_log = (L3_LOG *) mmap(NULL, sizeof(*l3_log), PROT_READ|PROT_WRITE, + l3_log = (L3_LOG *) mmap(NULL, fsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (l3_log == MAP_FAILED) { return -1; @@ -180,6 +172,7 @@ l3_init(const char *path) // Technically, this is not needed as mmap() is guaranteed to return // zero-filled pages. We do this just to be clear where the idx begins. l3_log->idx = 0; + l3_log->slot_count = L3_MAX_SLOTS; #if __APPLE__ l3_log->fbase_addr = getBaseAddress(); @@ -193,7 +186,7 @@ l3_init(const char *path) return -1; } l3_log->fbase_addr = (intptr_t) info.dli_fbase; - l3_log->platform = L3_LOG_PLATFORM_LINUX; + l3_log->platform = L3_LOG_PLATFORM_LINUX; #endif // __APPLE__ // Note down, in the log-header, the type of LOC-encoding in effect. @@ -209,8 +202,6 @@ l3_init(const char *path) l3_log->loc_type = L3_LOG_LOC_ENCODING; #endif // L3_LOC_ELF_ENABLED - l3_log->log_size = L3_MAX_SLOTS; - // printf("fbase_addr=%" PRIu64 " (0x%llx)\n", l3_log->fbase_addr, l3_log->fbase_addr); // printf("sizeof(L3_LOG)=%ld, header=%lu bytes\n", // sizeof(L3_LOG), offsetof(L3_LOG,slots)); @@ -222,46 +213,58 @@ l3_init(const char *path) /** * l3_log_fn() - 'C' implementation of L3-logging. * - * As 'loc' is an argument synthesized by the caller-macro, under conditional - * compilation, keep it as the last argument. This makes it possible to define - * DEBUG version of the caller-macro using printf(), for argument v/s print- - * format specifiers in 'msg'. + * 'loc' is an argument synthesized by the caller-macro, under conditional. + * This makes it possible to define DEBUG version of the caller-macro using printf(), + * for argument v/s print-format specifiers in 'msg'. */ -void __attribute__((weak)) +void #ifdef L3_LOC_ENABLED -l3_log_fn(const char *msg, const uint64_t arg1, const uint64_t arg2, - loc_t loc) +l3_log_fn(const char *msg, loc_t loc, int nargs, ...) #else -l3_log_fn(const char *msg, const uint64_t arg1, const uint64_t arg2, - uint32_t loc) +l3_log_fn(const char *msg, uint32_t loc, int nargs, ...) #endif { + const uint64_t args_per_block = sizeof(L3_ENTRY) / sizeof(uint64_t); + uint64_t entryBlocks = 1 + (nargs + args_per_block - 1) / args_per_block; -#if __APPLE__ - int idx = __sync_fetch_and_add(&l3_log->idx, 1); -#else - int idx = __libc_single_threaded ? l3_log->idx++ - : __sync_fetch_and_add(&l3_log->idx, 1); + uint32_t ofs; +#ifndef __APPLE__ + if (__libc_single_threaded) + { + ofs = l3_log->idx; + l3_log->idx += entryBlocks; // the next log will write here + } else #endif // __APPLE__ + { + ofs = __sync_fetch_and_add(&l3_log->idx, entryBlocks); + } + if (!l3_my_tid) { l3_my_tid = L3_GET_TID(); } - idx %= L3_MAX_SLOTS; - l3_log->slots[idx].tid = l3_my_tid; -#ifdef L3_LOC_ENABLED - l3_log->slots[idx].loc = (loc_t) loc; -#else // L3_LOC_ENABLED - -#ifdef DEBUG - assert(l3_log->slots[idx].loc == 0); -#endif // DEBUG - -#endif // L3_LOC_ENABLED + ofs %= L3_MAX_SLOTS; + // printf("Logging %ld blocks from idx %ld.\n", entryBlocks, ofs); + + // Write the blocks. The one always fits, but the rest might need to wrap. + // Because 3 arguments fit in the span of the Entry's footer metadata, the byte layout may look like: + // [ofs+0] = Args 0-2, [ofs+1] = Args 3-5, [ofs+N] = Footer + va_list args; + va_start(args, nargs); + for (int i = nargs; i > 0; i -= args_per_block) { + for (int j = 0; j < L3_MIN(i, args_per_block); j++) { + l3_log->slots[ofs].body.args[j] = va_arg(args, uint64_t); + } + // wrapping increment + if(++ofs >= L3_MAX_SLOTS) ofs -= L3_MAX_SLOTS; + } + va_end(args); - l3_log->slots[idx].msg = msg; - l3_log->slots[idx].arg1 = arg1; - l3_log->slots[idx].arg2 = arg2; + // Write the footer block. + l3_log->slots[ofs].foot.tid = l3_my_tid; + l3_log->slots[ofs].foot.loc = loc; + l3_log->slots[ofs].foot.msg = msg; + l3_log->slots[ofs].foot.argN = nargs; } int @@ -279,6 +282,8 @@ l3_deinit(void) * * https://stackoverflow.com/questions/16552710/how-do-you-get-the-start-and-end-addresses-of-a-custom-elf-section * + * https://www.scs.stanford.edu/~dm/blog/va-opt.html + * * Some reference code to try out, some day: * #include diff --git a/l3.h b/l3.h index b8b48d5..5a8cf6f 100644 --- a/l3.h +++ b/l3.h @@ -24,8 +24,9 @@ /** * Size of circular ring-buffer to track log entries. + * A message occupies 1 slot, and 1 more for each 3 arguments. */ -#define L3_MAX_SLOTS (16384) +#define L3_MAX_SLOTS (32768) /** * Error codes returned by API / interfaces. @@ -72,6 +73,20 @@ int l3_deinit(void); } #endif +#define L3_PREFIX_CAST(x) ((uint64_t)(x)) + +#define L3_MACRO_NARGS_IMPL(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,N,...) N +#define L3_MACRO_NARGS(...) L3_MACRO_NARGS_IMPL(_, ##__VA_ARGS__, 9,8,7,6,5,4,3,2,1,0) // GCC/Clang extension +// Deeply nested macro expansion such that it appears recursive +#define L3_MACRO_EXPAND(...) L3_MACRO_EXPAND2(L3_MACRO_EXPAND2(L3_MACRO_EXPAND2(L3_MACRO_EXPAND2(__VA_ARGS__)))) +#define L3_MACRO_EXPAND2(...) L3_MACRO_EXPAND1(L3_MACRO_EXPAND1(L3_MACRO_EXPAND1(L3_MACRO_EXPAND1(__VA_ARGS__)))) +#define L3_MACRO_EXPAND1(...) __VA_ARGS__ +#define L3_MACRO_PARENS () +#define L3_MACRO_FOR_EACH(macro, ...) __VA_OPT__(L3_MACRO_EXPAND(L3_MACRO_FOR_EACH_LOOP(macro, __VA_ARGS__))) +#define L3_MACRO_FOR_EACH_LOOP(macro, a1, ...) macro(a1) __VA_OPT__(, L3_MACRO_FOR_EACH_LOOP2 L3_MACRO_PARENS (macro, __VA_ARGS__)) +#define L3_MACRO_FOR_EACH_LOOP2() L3_MACRO_FOR_EACH_LOOP +#define L3_PREFIX_MULTI(...) L3_MACRO_FOR_EACH(L3_PREFIX_CAST, __VA_ARGS__) + /** * \brief Caller-macro to invoke L3 logging. * @@ -86,22 +101,18 @@ int l3_deinit(void); #define l3_log(msg, arg1, arg2) \ if (1) { \ - l3_log_fn((msg), \ - (uint64_t) (arg1), (uint64_t) (arg2), \ - __LOC__); \ + l3_log_fn((msg), __LOC__, L3_MACRO_NARGS(__VA_ARGS__) __VA_OPT__(,) L3_PREFIX_MULTI(__VA_ARGS__));\ } else if (0) { \ - printf((msg), (arg1), (arg2)); \ + printf((msg), L3_PREFIX_MULTI(__VA_ARGS__)); \ } else #else // L3_LOC_ENABLED # define l3_log(msg, arg1, arg2) \ if (1) { \ - l3_log_fn((msg), \ - (uint64_t) (arg1), (uint64_t) (arg2), \ - L3_ARG_UNUSED); \ + l3_log_fn((msg), L3_ARG_UNUSED, L3_MACRO_NARGS(__VA_ARGS__) __VA_OPT__(,) L3_PREFIX_MULTI(__VA_ARGS__));\ } else if (0) { \ - printf((msg), (arg1), (arg2)); \ + printf((msg), L3_PREFIX_MULTI(__VA_ARGS__)); \ } else #endif // L3_LOC_ENABLED @@ -111,15 +122,11 @@ int l3_deinit(void); #ifdef L3_LOC_ENABLED #define l3_log(msg, arg1, arg2) \ - l3_log_fn((msg), \ - (uint64_t) (arg1), (uint64_t) (arg2), \ - __LOC__) + l3_log_fn((msg), __LOC__, L3_MACRO_NARGS(__VA_ARGS__) __VA_OPT__(,) L3_PREFIX_MULTI(__VA_ARGS__)) #else - #define l3_log(msg, arg1, arg2) \ - l3_log_fn((msg), \ - (uint64_t) (arg1), (uint64_t) (arg2), \ - L3_ARG_UNUSED) + #define l3_log(msg, ...) \ + l3_log_fn((msg), L3_ARG_UNUSED, L3_MACRO_NARGS(__VA_ARGS__) __VA_OPT__(,) L3_PREFIX_MULTI(__VA_ARGS__)) #endif // L3_LOC_ENABLED @@ -142,11 +149,9 @@ extern "C" { #endif #ifdef L3_LOC_ENABLED -void l3_log_fn(const char *msg, const uint64_t arg1, const uint64_t arg2, - const loc_t loc); +void l3_log_fn(const char *msg, loc_t loc, int nargs, ...); #else -void l3_log_fn(const char *msg, const uint64_t arg1, const uint64_t arg2, - const uint32_t loc); +void l3_log_fn(const char *msg, uint32_t loc, int nargs, ...); #endif // L3_LOC_ENABLED #ifdef __cplusplus diff --git a/l3_dump.py b/l3_dump.py index 1c90596..1283991 100755 --- a/l3_dump.py +++ b/l3_dump.py @@ -14,12 +14,15 @@ import re import shlex import argparse +import io # ############################################################################## # Constants that tie the unpacking logic to L3's core structure's layout # ############################################################################## L3_LOG_HEADER_SZ = 32 # bytes; offsetof(L3_LOG, slots) -L3_ENTRY_SZ = 32 # bytes; sizeof(L3_ENTRY) +L3_ENTRY_SZ = 24 # bytes; sizeof(L3_ENTRY) +L3_ARGS_PER_ENTRY = L3_ENTRY_SZ // 8 # sizeof(L3_ENTRY) / sizeof(u64) +L3_ARGS_DECODE_STR = "<" + "Q"*L3_ARGS_PER_ENTRY # ############################################################################# PROGRAM_BIN = 'unknown' @@ -232,18 +235,23 @@ def parse_string_offsets(input_str:str) -> (dict,int): # Return # of valid-lines-parsed count, so pytests can verify it. return (_strings, nlines) +def wrapping_sub(x: int, y: int, wrap: int) -> tuple[int, bool]: + wrapping = x < y + result = (x - y) % wrap + return (result, wrapping) + # ############################################################################# -def l3_unpack_loghdr(file_hdl) -> (int, int, int): +def l3_unpack_loghdr(file_hdl: io.BufferedReader) -> tuple[int, int, int, int, int]: """ Unpack the header struct of a L3-log file and identify key fields. We are unpacking a struct laid out like the following: typedef struct l3_log { - uint64_t idx; + uint64_t idx; // Contains the index 1-past-the-end of the final log entry uint64_t fbase_addr; - uint32_t pad0; - uint16_t log_size; // # of log-entries == L3_MAX_SLOTS + uint32_t slot_count; // Corresponds to file size + uint16_t pad2; uint8_t platform; uint8_t loc_type; uint64_t pad1; @@ -260,7 +268,11 @@ def l3_unpack_loghdr(file_hdl) -> (int, int, int): # '<' => byte-order of the header is little-endian # See: https://docs.python.org/3/library/struct.html # pylint: disable-next=unused-variable - (_, fibase, pad0, pad2, l3_platform, loc_type, pad1) = struct.unpack(' (int, int, int): decode_loc_id = L3_LOC_UNSET # DEBUG: print(f"{l3_platform=}, {decode_loc_id=}") - return (fibase, l3_platform, decode_loc_id) + return (fibase, l3_platform, decode_loc_id, idx_back, slot_count) # ############################################################################# @@ -375,7 +387,7 @@ def mac_get__cstring_offset(program_bin:str) -> int: return offset ############################################################################### -def do_c_print(msg_text:str, arg1:int, arg2:int) -> str: +def do_c_print(msg_text:str, args:list[int]) -> str: """ Parse an input C-style format-string to convert it to a form that is accepted by Python. Invoke C-style printing using provided arguments. @@ -386,9 +398,8 @@ def do_c_print(msg_text:str, arg1:int, arg2:int) -> str: """ format_string = fmtstr_replace(msg_text) # DEBUG: print(f"{format_string=}") - args = format_string.count("%") - if args == 1: return format_string % (arg1, arg2) - if args == 2: return format_string % (arg1, arg2) + percents = format_string.count("%") + if percents > 0: return format_string % tuple(args) return format_string ############################################################################### @@ -459,13 +470,12 @@ def do_main(args:list, return_logentry_lists:bool = False): tid_list = [] loc_list = [] msg_list = [] - arg1_list = [] - arg2_list = [] + args_list = [] with open(l3_logfile, 'rb') as file: # Unpack the 1st n-bytes as an L3_LOG{} struct to get a hold # of the fbase-address stashed by the l3_init() call. - (fibase, _, decode_loc_id) = l3_unpack_loghdr(file) + (fibase, _, decode_loc_id, entry_last, slot_count) = l3_unpack_loghdr(file) loc_decoder_bin = select_loc_decoder_bin(decode_loc_id, program_bin, @@ -473,36 +483,63 @@ def do_main(args:list, return_logentry_lists:bool = False): # pylint: disable=invalid-name nentries = 0 loc_prev = 0 - # Keep reading chunks of log-entries from file ... - while True: - row = file.read(L3_ENTRY_SZ) - len_row = len(row) - - # Deal with eof - if not row or len_row == 0 or len_row < L3_ENTRY_SZ: - break - - tid, loc, msgptr, arg1, arg2 = struct.unpack('