diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2300ad2..45d31ae 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,7 +28,7 @@ jobs: steps: - uses: actions/checkout@v3 - - run: sudo apt-get install -y libparted-dev libudev-dev + - run: sudo apt-get update && sudo apt-get install -y libparted-dev libudev-dev - run: make all extra - if: matrix.volume == 'relative-path' @@ -70,8 +70,10 @@ jobs: - name: Cleanup loop device if: always() run: | - sudo losetup -d ${{ steps.loop.outputs.dev }} - rm dummy.img + if [ -n "${{ steps.loop.outputs.dev }}" ]; then + sudo losetup -d ${{ steps.loop.outputs.dev }} + fi + rm -f dummy.img MacOS: strategy: diff --git a/Makefile b/Makefile index b62cd7f..f9a5301 100644 --- a/Makefile +++ b/Makefile @@ -54,8 +54,8 @@ f3write: libutils.o utils.o libflow.o f3write.o f3read: libutils.o utils.o libflow.o f3read.o $(CC) -o $@ $^ $(LDFLAGS) -lm -f3probe: libutils.o libdevs.o libprobe.o f3probe.o - $(CC) -o $@ $^ $(LDFLAGS) -ludev +f3probe: libutils.o libflow.o libdevs.o libprobe.o f3probe.o + $(CC) -o $@ $^ $(LDFLAGS) -lm -ludev f3brew: libutils.o libflow.o libdevs.o f3brew.o $(CC) -o $@ $^ $(LDFLAGS) -lm -ludev diff --git a/f3brew.c b/f3brew.c index b04180b..2ddc8da 100644 --- a/f3brew.c +++ b/f3brew.c @@ -1,3 +1,6 @@ +#define _POSIX_C_SOURCE 200112L +#define _XOPEN_SOURCE 600 + #include #include #include @@ -323,8 +326,8 @@ static void test_write_blocks(struct device *dev, first_block, last_block); fflush(stdout); - init_flow(&fw, block_size, total_size, max_write_rate, show_progress, - NULL); + init_flow(&fw, block_size, total_size, max_write_rate, + show_progress ? printf_flush_cb : dummy_cb, 0, NULL); assert(!gettimeofday(&t1, NULL)); write_blocks(dev, &fw, first_block, last_block); @@ -498,8 +501,8 @@ static void test_read_blocks(struct device *dev, printf("Reading blocks from 0x%" PRIx64 " to 0x%" PRIx64 ":\n", first_block, last_block); - init_flow(&fw, block_size, total_size, max_read_rate, show_progress, - NULL); + init_flow(&fw, block_size, total_size, max_read_rate, + show_progress ? printf_flush_cb : dummy_cb, 0, NULL); assert(!gettimeofday(&t1, NULL)); read_blocks(dev, &fw, first_block, last_block, &stats); diff --git a/f3fix.c b/f3fix.c index 061c3a4..4b79293 100644 --- a/f3fix.c +++ b/f3fix.c @@ -1,3 +1,6 @@ +#define _POSIX_C_SOURCE 200112L +#define _XOPEN_SOURCE 600 + #include #include #include diff --git a/f3probe.c b/f3probe.c index 4533313..5434901 100644 --- a/f3probe.c +++ b/f3probe.c @@ -1,14 +1,15 @@ -#define _POSIX_C_SOURCE 200809L +#define _POSIX_C_SOURCE 200112L +#define _XOPEN_SOURCE 600 #include #include #include -#include #include #include #include #include #include +#include #include "version.h" #include "libprobe.h" @@ -53,6 +54,8 @@ static struct argp_option options[] = { "Time reads, writes, and resets", 0}, {"verbose", 'v', NULL, 0, "Show detailed progress", 0}, + {"show-progress", 'p', "NUM", 0, + "Show progress if NUM is not zero", 0}, { 0 } }; @@ -69,6 +72,7 @@ struct args { bool min_mem; bool time_ops; bool verbose; + bool show_progress; /* Geometry. */ uint64_t real_size_byte; @@ -165,6 +169,10 @@ static error_t parse_opt(int key, char *arg, struct argp_state *state) args->verbose = true; break; + case 'p': + args->show_progress = !!arg_to_ll_bytes(state, arg); + break; + case ARGP_KEY_INIT: args->filename = NULL; break; @@ -196,12 +204,6 @@ static error_t parse_opt(int key, char *arg, struct argp_state *state) static struct argp argp = {options, parse_opt, adoc, doc, NULL, NULL, NULL}; -static void dummy_probe_progress(const char *format, ...) -{ - /* Do nothing */ - UNUSED(format); -} - struct unit_test_item { uint64_t real_size_byte; uint64_t fake_size_byte; @@ -277,8 +279,8 @@ static int unit_test(const char *filename) assert(dev); max_probe_blocks = probe_device_max_blocks(dev); assert(!probe_device(dev, &real_size_byte, &announced_size_byte, - &wrap, &cache_size_block, &block_order, - dummy_probe_progress)); + &wrap, &cache_size_block, &block_order, dummy_cb, + false)); free_device(dev); fake_type = dev_param_to_type(real_size_byte, announced_size_byte, wrap, block_order); @@ -329,18 +331,18 @@ static int unit_test(const char *filename) static inline void report_size(const char *prefix, uint64_t bytes, int block_order) { - report_probed_size(printf_cb, prefix, bytes, block_order); + report_probed_size(0, printf_cb, prefix, bytes, block_order); } static inline void report_order(const char *prefix, int order) { - report_probed_order(printf_cb, prefix, order); + report_probed_order(0, printf_cb, prefix, order); } static inline void report_cache(const char *prefix, uint64_t cache_size_block, int block_order) { - report_probed_cache(printf_cb, prefix, cache_size_block, block_order); + report_probed_cache(0, printf_cb, prefix, cache_size_block, block_order); } static void report_probe_time(const char *prefix, uint64_t usec) @@ -358,15 +360,6 @@ static void report_ops(const char *op, uint64_t count, uint64_t time_us) printf("%10s: %s / %" PRIu64 " = %s\n", op, str1, count, str2); } -static void print_probe_progress(const char *format, ...) -{ - va_list args; - va_start(args, format); - vprintf(format, args); - va_end(args); - fflush(stdout); -} - static int test_device(struct args *args) { struct timeval t1, t2; @@ -421,7 +414,8 @@ static int test_device(struct args *args) */ assert(!probe_device(dev, &real_size_byte, &announced_size_byte, &wrap, &cache_size_block, &block_order, - args->verbose ? print_probe_progress : dummy_probe_progress)); + args->verbose ? printf_flush_cb : dummy_cb, + args->show_progress)); assert(!gettimeofday(&t2, NULL)); if (args->verbose) { @@ -518,6 +512,8 @@ int main(int argc, char **argv) .min_mem = false, .time_ops = false, .verbose = false, + /* If stdout isn't a terminal, suppress progress. */ + .show_progress = isatty(STDOUT_FILENO), .real_size_byte = 1ULL << 31, .fake_size_byte = 1ULL << 34, .wrap = 31, diff --git a/f3read.c b/f3read.c index 3f3b28d..e60f801 100644 --- a/f3read.c +++ b/f3read.c @@ -327,7 +327,7 @@ static void iterate_files(const char *path, const long *files, UNUSED(end_at); init_flow(&fw, get_block_size(path), get_total_size(path, files), - max_read_rate, progress, NULL); + max_read_rate, progress ? printf_flush_cb : dummy_cb, 0, NULL); printf(" SECTORS " " ok/corrupted/changed/overwritten\n"); diff --git a/f3write.c b/f3write.c index 9458342..db2a57c 100644 --- a/f3write.c +++ b/f3write.c @@ -306,7 +306,7 @@ static int fill_fs(const char *path, long start_at, long end_at, } init_flow(&fw, get_block_size(path), free_space, max_write_rate, - progress, flush_chunk); + progress ? printf_flush_cb : dummy_cb, 0, flush_chunk); assert(!gettimeofday(&t1, NULL)); for (i = start_at; i <= end_at; i++) if (create_and_fill_file(path, i, GIGABYTES, diff --git a/libdevs.h b/libdevs.h index e93b6a9..03d4126 100644 --- a/libdevs.h +++ b/libdevs.h @@ -56,14 +56,6 @@ const char *dev_get_filename(struct device *dev); * Methods */ -/* One should use the following constant as the size of the buffer needed to - * batch writes or reads. - * - * It must be a power of 2 greater than, or equal to 2^20. - * The current value is 1MB. - */ -#define BIG_BLOCK_SIZE_BYTE (1 << 20) - int dev_read_blocks(struct device *dev, char *buf, uint64_t first_pos, uint64_t last_pos); int dev_write_blocks(struct device *dev, const char *buf, diff --git a/libflow.c b/libflow.c index f76844e..661bc05 100644 --- a/libflow.c +++ b/libflow.c @@ -1,6 +1,8 @@ #define _POSIX_C_SOURCE 200112L #define _XOPEN_SOURCE 600 +#include +#include #include #include #include @@ -17,29 +19,29 @@ #if (__APPLE__ && __MACH__) || defined(__OpenBSD__) #include -static void msleep(double wait_ms) +static inline void ussleep(double wait_us) { - assert(!usleep(wait_ms * 1000)); + assert(!usleep(wait_us)); } #else /* Everyone else */ #include /* For clock_gettime() and clock_nanosleep(). */ -static void msleep(double wait_ms) +static void ussleep(double wait_us) { struct timespec req; int ret; assert(!clock_gettime(CLOCK_MONOTONIC, &req)); - /* Add @wait_ms to @req. */ - if (wait_ms > 1000) { - time_t sec = wait_ms / 1000; - wait_ms -= sec * 1000; - assert(wait_ms > 0); + /* Add @wait_us to @req. */ + if (wait_us > 1000000) { + time_t sec = wait_us / 1000000; + wait_us -= sec * 1000000; + assert(wait_us > 0); req.tv_sec += sec; } - req.tv_nsec += wait_ms * 1000000; + req.tv_nsec += wait_us * 1000; /* Round @req up. */ if (req.tv_nsec >= 1000000000) { @@ -56,7 +58,7 @@ static void msleep(double wait_ms) assert(ret == 0); } -#endif /* msleep() */ +#endif /* ussleep() */ static inline void move_to_inc_at_start(struct flow *fw) { @@ -65,69 +67,98 @@ static inline void move_to_inc_at_start(struct flow *fw) } void init_flow(struct flow *fw, int block_size, uint64_t total_size, - long max_process_rate, int progress, + long max_process_rate, progress_cb cb, unsigned int indent, flow_func_flush_chunk_t func_flush_chunk) { fw->total_size = total_size; fw->total_processed = 0; - fw->progress = progress; + fw->cb = cb; + fw->indent = indent; fw->block_size = block_size; /* Bytes */ fw->blocks_per_delay = 1; /* block_size B/s */ - fw->delay_ms = 1000; /* 1s */ + fw->delay_ns = 1000000000ULL; /* 1s */ fw->max_process_rate = max_process_rate <= 0 ? DBL_MAX : max_process_rate * 1024.; fw->measured_blocks = 0; - fw->measured_time_ms = 0; + fw->measured_time_ns = 0; fw->erase = 0; fw->func_flush_chunk = func_flush_chunk; + fw->has_rem_chunk_size = false; + fw->rem_chunk_size = 0; + fw->rem_chunk_speed = 0; fw->processed_blocks = 0; - fw->acc_delay_us = 0; + fw->acc_delay_ns = 0; assert(fw->block_size > 0); assert(fw->block_size % SECTOR_SIZE == 0); move_to_inc_at_start(fw); } -static inline void repeat_ch(char ch, int count) +uint64_t get_rem_chunk_size(const struct flow *fw) { - while (count > 0) { - printf("%c", ch); - count--; - } + const int64_t rem_blocks = fw->blocks_per_delay - fw->processed_blocks; + const uint64_t rem_size = rem_blocks * fw->block_size; + assert(rem_blocks > 0); + return fw->has_rem_chunk_size && rem_size >= fw->rem_chunk_size + ? fw->rem_chunk_size + : rem_size; } -static void erase(int count) +static inline unsigned int repeat_ch(char *buf, char ch, int count) { - if (count <= 0) - return; - repeat_ch('\b', count); - repeat_ch(' ', count); - repeat_ch('\b', count); + int i; + + for (i = 0; i < count; i++) + buf[i] = ch; + return count; } void clear_progress(struct flow *fw) { - if (!fw->progress) - return; - erase(fw->erase); + char buf[512], *at_buf = buf; + + if (fw->erase <= 0) { + if (fw->indent > 0) { + /* Remove indented empty line. */ + fw->cb(fw->indent, "\b"); + } + goto out; + } + + assert((size_t)fw->erase * 3 + 1 <= sizeof(buf)); + at_buf += repeat_ch(at_buf, '\b', fw->erase); + at_buf += repeat_ch(at_buf, ' ', fw->erase); + at_buf += repeat_ch(at_buf, '\b', fw->erase); + at_buf[0] = '\0'; + + fw->cb(fw->indent, buf); +out: fw->erase = 0; - fflush(stdout); } -static int pr_time(double sec) +#define CHECK_AND_MOVE do { \ + assert(c > 0); \ + len += c; \ + assert((size_t)c < rem_size); \ + rem_size -= c; \ + at_buf += c; \ + } while (0) + +static int pr_time(char *buf, const size_t size, double sec) { - int has_h, has_m; - int c, tot; + char *at_buf = buf; + size_t rem_size = size; + bool has_h, has_m; + int c, len = 0; - tot = printf(" -- "); - assert(tot > 0); + c = snprintf(at_buf, rem_size, " -- "); + CHECK_AND_MOVE; has_h = sec >= 3600; if (has_h) { double h = floor(sec / 3600); - c = printf("%i:", (int)h); - assert(c > 0); - tot += c; + c = snprintf(at_buf, rem_size, "%i:", (int)h); + CHECK_AND_MOVE; sec -= h * 3600; } @@ -135,60 +166,78 @@ static int pr_time(double sec) if (has_m) { double m = floor(sec / 60); if (has_h) - c = printf("%02i:", (int)m); + c = snprintf(at_buf, rem_size, "%02i:", (int)m); else - c = printf("%i:", (int)m); - assert(c > 0); - tot += c; + c = snprintf(at_buf, rem_size, "%i:", (int)m); + CHECK_AND_MOVE; sec -= m * 60; } if (has_m) - c = printf("%02i", (int)round(sec)); + c = snprintf(at_buf, rem_size, "%02i", (int)round(sec)); else - c = printf("%is", (int)round(sec)); - assert(c > 0); - return tot + c; + c = snprintf(at_buf, rem_size, "%is", (int)round(sec)); + CHECK_AND_MOVE; + + return len; } static inline double get_avg_speed_given_time(const struct flow *fw, - uint64_t total_time_ms) + uint64_t total_time_ns) { - return (double)(fw->measured_blocks * fw->block_size * 1000) / - total_time_ms; + return ((double)(fw->measured_blocks * fw->block_size) * 1000000000.0) + / total_time_ns; } /* Average writing speed in byte/s. */ static inline double get_avg_speed(const struct flow *fw) { - return get_avg_speed_given_time(fw, fw->measured_time_ms); + return get_avg_speed_given_time(fw, fw->measured_time_ns); +} + +static inline bool has_enough_measurements(const struct flow *fw) +{ + return fw->measured_time_ns > fw->delay_ns; } static void report_progress(struct flow *fw, double inst_speed) { const char *unit = adjust_unit(&inst_speed); double percent; + char buf[256]; + int c, len = 0; + /* The following shouldn't be necessary, but sometimes * the initial free space isn't exactly reported * by the kernel; this issue has been seen on Macs. */ if (fw->total_size < fw->total_processed) fw->total_size = fw->total_processed; + + clear_progress(fw); + percent = (double)fw->total_processed * 100 / fw->total_size; - erase(fw->erase); - fw->erase = printf("%.2f%% -- %.2f %s/s", + c = snprintf(buf, sizeof(buf), "%.2f%% -- %.2f %s/s", percent, inst_speed, unit); - assert(fw->erase > 0); - if (has_enough_measurements(fw)) - fw->erase += pr_time( + assert(c > 0); + len += c; + + if (has_enough_measurements(fw)) { + c = pr_time(buf + len, sizeof(buf) - len, (fw->total_size - fw->total_processed) / get_avg_speed(fw)); - fflush(stdout); + assert(c > 0); + len += c; + } + + assert((size_t)len + 1 <= sizeof(buf)); + fw->erase = len; + fw->cb(fw->indent, "%s", buf); } static inline void __start_measurement(struct flow *fw) { - assert(!gettimeofday(&fw->t1, NULL)); + assert(!clock_gettime(CLOCK_MONOTONIC, &fw->t1)); } void start_measurement(struct flow *fw) @@ -197,9 +246,9 @@ void start_measurement(struct flow *fw) * The report below is especially useful when a single measurement spans * multiple files; this happens when a drive is faster than 1GB/s. */ - if (fw->progress) - report_progress(fw, fw->blocks_per_delay * fw->block_size * - 1000.0 / fw->delay_ms); + report_progress(fw, + fw->blocks_per_delay * fw->block_size * 1000000000.0 / + fw->delay_ns); __start_measurement(fw); } @@ -253,17 +302,17 @@ static inline void move_to_dec(struct flow *fw) } static inline int is_rate_above(const struct flow *fw, - uint64_t delay, double inst_speed) + uint64_t delay_ns, double inst_speed) { /* We use logical or here to enforce the lowest limit. */ - return delay > fw->delay_ms || inst_speed > fw->max_process_rate; + return delay_ns > fw->delay_ns || inst_speed > fw->max_process_rate; } static inline int is_rate_below(const struct flow *fw, - uint64_t delay, double inst_speed) + uint64_t delay_ns, double inst_speed) { /* We use logical and here to enforce both limits. */ - return delay <= fw->delay_ms && inst_speed < fw->max_process_rate; + return delay_ns <= fw->delay_ns && inst_speed < fw->max_process_rate; } static inline int flush_chunk(const struct flow *fw, int fd) @@ -273,12 +322,53 @@ static inline int flush_chunk(const struct flow *fw, int fd) return 0; } +static bool t1_gt_t2(const struct timespec *t1, const struct timespec *t2) +{ + if (t1->tv_sec > t2->tv_sec) + return true; + if (t1->tv_sec < t2->tv_sec) + return false; + return t1->tv_nsec > t2->tv_nsec; +} + +static void update_rem_chunk_size(struct flow *fw, const struct timespec *t2) +{ + const uint64_t rem_chunk_size = fw->blocks_per_delay * fw->block_size; + const struct timespec *t1; + double inst_speed; + + if (fw->rem_chunk_size == 0) { + /* This is the first time measure() is called. */ + t1 = &fw->t1; + } else if (t1_gt_t2(&fw->t1, &fw->prv_t2)) { + /* end_measurement() has already been called, and + * start_measurement() was called again. + */ + t1 = &fw->t1; + } else { + /* This is at least the second time measure() is called after + * start_measurement() was called and before end_measurement() + * was called. + */ + t1 = &fw->prv_t2; + } + inst_speed = (rem_chunk_size * 1000000000.0) / + diff_timespec_ns(t1, t2); + fw->prv_t2 = *t2; + + if (fw->rem_chunk_size != 0 && inst_speed < fw->rem_chunk_speed) + return; + + fw->rem_chunk_size = rem_chunk_size; + fw->rem_chunk_speed = inst_speed; +} + int measure(int fd, struct flow *fw, long processed) { ldiv_t result = ldiv(processed, fw->block_size); - struct timeval t2; - uint64_t delay; - double bytes_k, inst_speed; + struct timespec t2; + uint64_t delay_ns; + double bytes_g, inst_speed; assert(result.rem == 0); fw->processed_blocks += result.quot; @@ -291,62 +381,100 @@ int measure(int fd, struct flow *fw, long processed) if (flush_chunk(fw, fd) < 0) return -1; /* Caller can read errno(3). */ - assert(!gettimeofday(&t2, NULL)); - delay = (diff_timeval_us(&fw->t1, &t2) + fw->acc_delay_us) / 1000; + assert(!clock_gettime(CLOCK_MONOTONIC, &t2)); + if (!fw->has_rem_chunk_size) + update_rem_chunk_size(fw, &t2); + delay_ns = diff_timespec_ns(&fw->t1, &t2) + fw->acc_delay_ns; /* Instantaneous speed in bytes per second. */ - bytes_k = fw->blocks_per_delay * fw->block_size * 1000.0; - inst_speed = bytes_k / delay; - - if (delay < fw->delay_ms && inst_speed > fw->max_process_rate) { - /* Wait until inst_speed == fw->max_process_rate - * (if possible). + bytes_g = fw->blocks_per_delay * fw->block_size * 1000000000.0; + inst_speed = bytes_g / delay_ns; + + if (delay_ns < fw->delay_ns && inst_speed > fw->max_process_rate) { + /* delay_ns should be such that + * inst_speed <= fw->max_process_rate. + * To accomplish this, the code below adds a wait. + * + * inst_speed <= fw->max_process_rate [=>] + * bytes_g / (delay_ns + wait_ns) <= fw->max_process_rate [=>] + * bytes_g / fw->max_process_rate <= delay_ns + wait_ns [=>] + * wait_ns >= bytes_g / fw->max_process_rate - delay_ns + * + * The step below minimizes rounding errors. + * + * wait_ns >= (bytes_g - delay_ns * fw->max_process_rate) / + * fw->max_process_rate + * + * Round wait_ns, so it operates as an integer when used in + * nanoseconds. + */ + double wait_ns = round( + (bytes_g - delay_ns * fw->max_process_rate) / + fw->max_process_rate); + + /* From the if-test, + * inst_speed > fw->max_process_rate [=>] + * bytes_g / delay_ns > fw->max_process_rate [=>] + * bytes_g > delay_ns * fw->max_process_rate + * + * For wait_ns to be negative, + * wait_ns < 0 [=>] + * (bytes_g - delay_ns * fw->max_process_rate) / + * fw->max_process_rate < 0 [=>] + * bytes_g < delay_ns * fw->max_process_rate + * + * Therefore, wait_ns cannot be negative. */ - double wait_ms = round((bytes_k - delay * fw->max_process_rate) - / fw->max_process_rate); - - if (wait_ms < 0) { - /* Wait what is possible. */ - wait_ms = fw->delay_ms - delay; - } else if (delay + wait_ms < fw->delay_ms) { - /* wait_ms is not the largest possible value, so - * force the flow algorithm to keep increasing it. - * Otherwise, the delay to print progress may be - * too small. + assert(wait_ns >= 0); + + if (delay_ns + wait_ns < fw->delay_ns) { + /* In this case, There is a factor f > 1 that + * satisfies the following equation: + * + * (delay_ns + wait_ns) * f = fw->delay_ns + * + * This means that both delay_ns and wait_ns should be + * increased to make f = 1. To signal that to the flow + * algorithm below, wait to fw->delay_ns. */ - wait_ms++; + wait_ns = fw->delay_ns - delay_ns; } - if (wait_ms > 0) { + if (wait_ns > 0) { /* Slow down. */ - msleep(wait_ms); + ussleep(wait_ns / 1000.); /* Adjust measurements. */ - delay += wait_ms; - inst_speed = bytes_k / delay; + delay_ns += wait_ns; + inst_speed = bytes_g / delay_ns; } } /* Update mean. */ fw->measured_blocks += fw->processed_blocks; - fw->measured_time_ms += delay; + fw->measured_time_ns += delay_ns; switch (fw->state) { case FW_INC: - if (is_rate_above(fw, delay, inst_speed)) { + if (is_rate_above(fw, delay_ns, inst_speed)) { + if (!fw->has_rem_chunk_size) { + /* Recommend a chunk size to caller. */ + assert(fw->rem_chunk_size != 0); + fw->has_rem_chunk_size = true; + } move_to_search(fw, fw->blocks_per_delay - fw->step / 2, fw->blocks_per_delay); - } else if (is_rate_below(fw, delay, inst_speed)) { + } else if (is_rate_below(fw, delay_ns, inst_speed)) { inc_step(fw); } else move_to_steady(fw); break; case FW_DEC: - if (is_rate_above(fw, delay, inst_speed)) { + if (is_rate_above(fw, delay_ns, inst_speed)) { dec_step(fw); - } else if (is_rate_below(fw, delay, inst_speed)) { + } else if (is_rate_below(fw, delay_ns, inst_speed)) { move_to_search(fw, fw->blocks_per_delay, fw->blocks_per_delay + fw->step / 2); } else @@ -359,10 +487,10 @@ int measure(int fd, struct flow *fw, long processed) break; } - if (is_rate_above(fw, delay, inst_speed)) { + if (is_rate_above(fw, delay_ns, inst_speed)) { fw->bpd2 = fw->blocks_per_delay; fw->blocks_per_delay = (fw->bpd1 + fw->bpd2) / 2; - } else if (is_rate_below(fw, delay, inst_speed)) { + } else if (is_rate_below(fw, delay_ns, inst_speed)) { fw->bpd1 = fw->blocks_per_delay; fw->blocks_per_delay = (fw->bpd1 + fw->bpd2) / 2; } else @@ -370,7 +498,19 @@ int measure(int fd, struct flow *fw, long processed) break; case FW_STEADY: { - if (delay <= fw->delay_ms) { + if (!fw->has_rem_chunk_size) { + /* Recommend a chunk size to caller. + * Execution reaches here when fw->max_process_rate is + * throttling the flow. + */ + assert(fw->rem_chunk_size != 0); + fw->has_rem_chunk_size = true; + /* Since it's in steady state, go for another round + * before making any change. + */ + break; + } + if (delay_ns <= fw->delay_ns) { if (inst_speed < fw->max_process_rate) { move_to_inc(fw); } else if (inst_speed > fw->max_process_rate) { @@ -386,19 +526,18 @@ int measure(int fd, struct flow *fw, long processed) assert(0); } - if (fw->progress) - report_progress(fw, inst_speed); + report_progress(fw, inst_speed); /* Reset accumulators. */ fw->processed_blocks = 0; - fw->acc_delay_us = 0; + fw->acc_delay_ns = 0; __start_measurement(fw); return 0; } int end_measurement(int fd, struct flow *fw) { - struct timeval t2; + struct timespec t2; int saved_errno; int ret = 0; @@ -412,14 +551,12 @@ int end_measurement(int fd, struct flow *fw) } /* Save time in between closing ongoing file and creating a new file. */ - assert(!gettimeofday(&t2, NULL)); - fw->acc_delay_us += diff_timeval_us(&fw->t1, &t2); + assert(!clock_gettime(CLOCK_MONOTONIC, &t2)); + fw->acc_delay_ns += diff_timespec_ns(&fw->t1, &t2); out: /* Erase progress information. */ - erase(fw->erase); - fw->erase = 0; - fflush(stdout); + clear_progress(fw); if (ret < 0) { /* Propagate errno(3) to caller. */ @@ -453,7 +590,8 @@ void print_measured_speed(const struct flow *fw, const struct timeval *t1, int64_t total_time_ms = delay_ms(t1, t2); if (total_time_ms > 0) { pr_avg_speed(speed_type, - get_avg_speed_given_time(fw, total_time_ms)); + get_avg_speed_given_time(fw, total_time_ms * + 1000000ULL)); } else { assert(strlen(speed_type) > 0); printf("%c%s speed not available\n", diff --git a/libflow.h b/libflow.h index 3ee7b8d..8a51394 100644 --- a/libflow.h +++ b/libflow.h @@ -5,7 +5,9 @@ #include #include #include -#include +#include + +#include "libutils.h" struct flow; @@ -16,12 +18,14 @@ struct flow { uint64_t total_size; /* Total number of bytes already processed. */ uint64_t total_processed; - /* If true, show progress. */ - int progress; + /* Callback to show progress. */ + progress_cb cb; + /* Indentation level for callback. */ + unsigned int indent; /* Block size in bytes. */ int block_size; - /* Delay intended between measurements in milliseconds. */ - unsigned int delay_ms; + /* Delay intended between measurements in nanoseconds. */ + uint64_t delay_ns; /* Increment to apply to @blocks_per_delay. */ int64_t step; /* Blocks to process before measurement. */ @@ -31,7 +35,7 @@ struct flow { /* Number of measured blocks. */ uint64_t measured_blocks; /* Measured time. */ - uint64_t measured_time_ms; + uint64_t measured_time_ns; /* State. */ enum {FW_INC, FW_DEC, FW_SEARCH, FW_STEADY} state; /* Number of characters to erase before printing out progress. */ @@ -46,42 +50,52 @@ struct flow { * Initialized while measuring */ + /* Has a recommended chunk size? */ + bool has_rem_chunk_size; + /* Recommended chunk size. */ + uint64_t rem_chunk_size; + /* Speed of the recommended chunk size in bytes per second. */ + double rem_chunk_speed; + /* Only used while has_rem_chunk_size is false. */ + struct timespec prv_t2; + /* Number of blocks processed since last measurement. */ int64_t processed_blocks; /* * Accumulated delay before @processed_blocks reaches @blocks_per_delay - * in microseconds. + * in nanoseconds. */ - uint64_t acc_delay_us; + uint64_t acc_delay_ns; /* Range of blocks_per_delay while in FW_SEARCH state. */ int64_t bpd1, bpd2; /* Time measurements. */ - struct timeval t1; + struct timespec t1; }; /* If @max_process_rate <= 0, the maximum processing rate is infinity. * The unit of @max_process_rate is KB per second. */ void init_flow(struct flow *fw, int block_size, uint64_t total_size, - long max_process_rate, int progress, + long max_process_rate, progress_cb cb, unsigned int indent, flow_func_flush_chunk_t func_flush_chunk); -void start_measurement(struct flow *fw); -int measure(int fd, struct flow *fw, long processed); -void clear_progress(struct flow *fw); -int end_measurement(int fd, struct flow *fw); - -static inline int has_enough_measurements(const struct flow *fw) +static inline void inc_total_size(struct flow *fw, uint64_t size) { - return fw->measured_time_ms > fw->delay_ms; + fw->total_size = fw->total_processed + size; } -static inline uint64_t get_rem_chunk_size(const struct flow *fw) +static inline void fw_set_indent(struct flow *fw, unsigned int indent) { - assert(fw->blocks_per_delay > fw->processed_blocks); - return (fw->blocks_per_delay - fw->processed_blocks) * fw->block_size; + fw->indent = indent; } +uint64_t get_rem_chunk_size(const struct flow *fw); + +void start_measurement(struct flow *fw); +int measure(int fd, struct flow *fw, long processed); +void clear_progress(struct flow *fw); +int end_measurement(int fd, struct flow *fw); + void print_measured_speed(const struct flow *fw, const struct timeval *t1, const struct timeval *t2, const char *speed_type); diff --git a/libprobe.c b/libprobe.c index 91f25bc..cb52adc 100644 --- a/libprobe.c +++ b/libprobe.c @@ -1,4 +1,6 @@ -#include +#define _POSIX_C_SOURCE 200112L +#define _XOPEN_SOURCE 600 + #include #include #include @@ -8,103 +10,274 @@ #include #include "libutils.h" +#include "libflow.h" #include "libprobe.h" -static int _write_blocks(struct device *dev, char *buf, - uint64_t first_pos, uint64_t last_pos, probe_progress_cb cb) +static int _write_blocks(struct device *dev, const char *buf, + uint64_t first_pos, uint64_t last_pos, struct flow *fw, + progress_cb cb, unsigned int indent) { if (dev_write_blocks(dev, buf, first_pos, last_pos) && - dev_write_blocks(dev, buf, first_pos, last_pos)) { - cb("I/O ERROR: Write error at blocks [%" PRIu64 ", %" PRIu64 "]!\n", + dev_write_blocks(dev, buf, first_pos, last_pos)) { + clear_progress(fw); + cb(indent, "I/O ERROR: Write error at blocks [%" PRIu64 ", %" PRIu64 "]!\n", first_pos, last_pos); return true; } return false; } -static int write_blocks(struct device *dev, - uint64_t first_pos, uint64_t last_pos, uint64_t salt, - probe_progress_cb cb) +/* Some fake drives have a "tiny" (e.g. 8KB) cache for random accesses and + * a "large" (e.g. 4MB) cache for sequential accesses. So, for these + * fake drives, a random read may return a bad block, while a sequential + * read that includes that block returns it as a good block. + * This situation has been verified with the donated drive from + * issue #50 (https://github.com/AltraMayor/f3/issues/50). + * + * The example cache sizes come from the following + * discussion among Linux kernel developers: + * https://linux-arm-kernel.infradead.narkive.com/h3crV0D3/mmc-quirks-relating-to-performance-lifetime + * + * To circunvent this problem, the probe must only issue random reads. + */ +struct rdwr_info { + uint64_t cache_pos; + uint64_t cache_size_block; + uint64_t salt; + + struct dynamic_buffer seqw_dbuf; + struct flow seqw_fw; + struct flow randw_fw; + + struct flow randr_fw; +}; + +static int write_random_blocks(struct device *dev, const uint64_t pos[], + uint32_t n_pos, struct rdwr_info *rwi, progress_cb cb, + unsigned int indent) { const int block_order = dev_get_block_order(dev); const int block_size = dev_get_block_size(dev); /* Aligning these pointers is necessary to directly read and write - * the block device. - * For the file device, this is superfluous. + * the block device. For the file device, this is superfluous. */ - char stack[align_head(block_order) + BIG_BLOCK_SIZE_BYTE]; + char stack[align_head(block_order) + block_size]; char *buffer = align_mem(stack, block_order); - char *stamp_blk = buffer; - char *flush_blk = buffer + BIG_BLOCK_SIZE_BYTE; - uint64_t offset = first_pos << block_order; - uint64_t pos, write_pos = first_pos; - - for (pos = first_pos; pos <= last_pos; pos++) { - fill_buffer_with_block(stamp_blk, block_order, offset, salt); - stamp_blk += block_size; - offset += block_size; - - if (stamp_blk == flush_blk || pos == last_pos) { - if (_write_blocks(dev, buffer, write_pos, pos, cb)) - return true; - stamp_blk = buffer; - write_pos = pos + 1; - } + uint32_t i; + + if (n_pos == 0) + return false; + + inc_total_size(&rwi->randw_fw, n_pos << block_order); + fw_set_indent(&rwi->randw_fw, indent); + + start_measurement(&rwi->randw_fw); + for (i = 0; i < n_pos; i++) { + fill_buffer_with_block(buffer, block_order, + pos[i] << block_order, rwi->salt); + if (_write_blocks(dev, buffer, pos[i], pos[i], &rwi->randw_fw, + cb, indent)) + return true; + measure(0, &rwi->randw_fw, block_size); } + end_measurement(0, &rwi->randw_fw); + return false; +} + +static int write_blocks(struct device *dev, + uint64_t first_block, uint64_t last_block, + struct rdwr_info *rwi, progress_cb cb, unsigned int indent) +{ + const int block_order = dev_get_block_order(dev); + const int block_size = dev_get_block_size(dev); + uint64_t offset = first_block << block_order; + uint64_t first_pos = first_block; + + if (first_block > last_block) + return false; + inc_total_size(&rwi->seqw_fw, + (last_block - first_block + 1) << block_order); + fw_set_indent(&rwi->seqw_fw, indent); + + start_measurement(&rwi->seqw_fw); + while (first_pos <= last_block) { + const uint64_t chunk_bytes = get_rem_chunk_size(&rwi->seqw_fw); + const uint64_t needed_size = + align_head(block_order) + chunk_bytes; + const uint64_t max_blocks_to_write = + last_block - first_pos + 1; + uint64_t blocks_to_write; + int shift; + char *buffer, *stamp_blk; + size_t buf_len; + uint64_t pos, next_pos; + + buffer = align_mem2(dbuf_get_buf(&rwi->seqw_dbuf, needed_size), + block_order, &shift); + buf_len = dbuf_get_len(&rwi->seqw_dbuf); + + blocks_to_write = buf_len >= needed_size + ? chunk_bytes >> block_order + : (buf_len - shift) >> block_order; + if (blocks_to_write > max_blocks_to_write) + blocks_to_write = max_blocks_to_write; + + next_pos = first_pos + blocks_to_write - 1; + + stamp_blk = buffer; + for (pos = first_pos; pos <= next_pos; pos++) { + fill_buffer_with_block(stamp_blk, block_order, offset, + rwi->salt); + stamp_blk += block_size; + offset += block_size; + } + + if (_write_blocks(dev, buffer, first_pos, next_pos, + &rwi->seqw_fw, cb, indent)) + return true; + + /* Since parameter func_flush_chunk of init_flow() is NULL, + * the parameter fd of measure() is ignored. + */ + measure(0, &rwi->seqw_fw, blocks_to_write << block_order); + first_pos = next_pos + 1; + } + end_measurement(0, &rwi->seqw_fw); return false; } -static inline int high_level_reset(struct device *dev, uint64_t start_pos, - uint64_t cache_size_block, uint64_t salt, probe_progress_cb cb) +static int overwhelm_cache(struct device *dev, + struct rdwr_info *rwi, progress_cb cb, unsigned int indent) { - return write_blocks(dev, start_pos, start_pos + cache_size_block - 1, - salt, cb); + if (rwi->cache_size_block == 0) + return false; + cb(indent, "Overwhelming cache\n"); + return write_blocks(dev, rwi->cache_pos, + rwi->cache_pos + rwi->cache_size_block - 1, rwi, cb, indent); } -/* Some fake drives have a "tiny" (e.g. 8KB) cache for random accesses and - * a "large" (e.g. 4MB) cache for sequential accesses. So, for these - * fake drives, a random read may return a bad block, while a sequential - * read that includes that block returns it as a good block. - * This situation has been verified with the donated drive from - * issue #50 (https://github.com/AltraMayor/f3/issues/50). - * - * The example cache sizes come from the following - * discussion among Linux kernel developers: - * https://linux-arm-kernel.infradead.narkive.com/h3crV0D3/mmc-quirks-relating-to-performance-lifetime - * - * To circunvent this problem, the probe must only issue random reads. - */ -static int read_blocks(struct device *dev, char *buf, uint64_t pos, - probe_progress_cb cb) +static int read_block(struct device *dev, char *buf, uint64_t pos, + struct flow *fw, progress_cb cb, unsigned int indent) { if (dev_read_blocks(dev, buf, pos, pos) && dev_read_blocks(dev, buf, pos, pos)) { - cb("I/O ERROR: Read error at block %" PRIu64 "!\n", pos); + clear_progress(fw); + cb(indent, "I/O ERROR: Read error at block %" PRIu64 "!\n", + pos); return true; } return false; } -static int is_block_good(struct device *dev, uint64_t pos, int *pis_good, - uint64_t salt, probe_progress_cb cb) +static uint64_t bs_to_set(enum block_state bs) +{ + switch (bs) { + case bs_unknown: + case bs_good: + case bs_bad: + case bs_changed: + case bs_overwritten: + assert(bs < sizeof(uint64_t) * 8); + return 1ULL << bs; + + default: + assert(0); + } +} + +static uint64_t bss_to_set(const enum block_state bss[], uint32_t n_bs) +{ + uint64_t bs_set = 0; + uint32_t i; + + for (i = 0; i < n_bs; i++) + bs_set |= bs_to_set(bss[i]); + return bs_set; +} + +static inline bool in_bs_set(uint64_t bs_set, enum block_state bs) +{ + assert(bs < sizeof(bs_set) * 8); + return (bs_set >> bs) & 1; +} + +struct def_x_block { + uint64_t pos; + uint64_t expected_offset; +}; + +static int find_first_x_block(struct device *dev, + const struct def_x_block x_blocks[], uint32_t n_blocks, + uint64_t bs_set, uint32_t *pfirst_x_block_idx, + enum block_state *pstate, struct rdwr_info *rwi, + progress_cb cb, unsigned int indent) { - const int block_size = dev_get_block_size(dev); const int block_order = dev_get_block_order(dev); + const int block_size = dev_get_block_size(dev); char stack[align_head(block_order) + block_size]; char *probe_blk = align_mem(stack, block_order); - uint64_t found_offset; + uint32_t i; + + if (n_blocks == 0) + goto not_found; + + inc_total_size(&rwi->randr_fw, n_blocks << block_order); + fw_set_indent(&rwi->randr_fw, indent); + + start_measurement(&rwi->randr_fw); + for (i = 0; i < n_blocks; i++) { + uint64_t found_offset; + enum block_state bs; + + if (read_block(dev, probe_blk, x_blocks[i].pos, &rwi->randr_fw, + cb, indent)) + return true; + bs = validate_buffer_with_block(probe_blk, block_order, + x_blocks[i].expected_offset, &found_offset, rwi->salt); + measure(0, &rwi->randr_fw, block_size); + + if (in_bs_set(bs_set, bs)) { + /* Found the first x_block. */ + *pfirst_x_block_idx = i; + *pstate = bs; + end_measurement(0, &rwi->randr_fw); + return false; + } + } + end_measurement(0, &rwi->randr_fw); + +not_found: + *pfirst_x_block_idx = n_blocks; + return false; +} + +static int find_first_bad_block(struct device *dev, const uint64_t pos[], + uint32_t n_pos, bool *pany_bad, uint64_t *pbad_pos, + struct rdwr_info *rwi, progress_cb cb, unsigned int indent) +{ + const int block_order = dev_get_block_order(dev); + /* All but bs_good. */ + const enum block_state bss[] = {bs_unknown, bs_bad, bs_changed, + bs_overwritten}; + struct def_x_block x_blocks[n_pos]; enum block_state bs; + uint32_t i; - if (read_blocks(dev, probe_blk, pos, cb)) - return true; + for (i = 0; i < n_pos; i++) { + x_blocks[i].pos = pos[i]; + x_blocks[i].expected_offset = pos[i] << block_order; + } - bs = validate_buffer_with_block(probe_blk, block_order, - (pos << block_order), &found_offset, salt); - *pis_good = bs == bs_good; - if (!*pis_good) { - cb("INFO: Block %" PRIu64 " is %s!\n", - pos, block_state_to_str(bs)); + if (find_first_x_block(dev, x_blocks, n_pos, + bss_to_set(bss, DIM(bss)), + &i, &bs, rwi, cb, indent)) + return true; + *pany_bad = i < n_pos; + if (*pany_bad) { + *pbad_pos = x_blocks[i].pos; + cb(indent, "INFO: Block %" PRIu64 " is %s!\n", + *pbad_pos, block_state_to_str(bs)); } return false; } @@ -131,51 +304,106 @@ static uint64_t uint64_rand_range(uint64_t a, uint64_t b) return a + (r % (b - a + 1)); } -#define N_BLOCK_SAMPLES 64 +/* Since the list size is small, at most SAMPLING_MAX blocks, + * the O(n_samples^2) complexity is not a problem. + */ +static void fill_with_unique_samples(uint64_t *samples, uint32_t n_samples, + uint64_t first_pos, uint64_t last_pos) +{ + uint32_t i, j; + + assert(n_samples < last_pos - first_pos + 1); + for (i = 0; i < n_samples; ) { + uint64_t r = uint64_rand_range(first_pos, last_pos); + bool unique = true; + for (j = 0; j < i; j++) { + if (samples[j] == r) { + unique = false; + break; + } + } + if (unique) { + samples[i] = r; + i++; + } + } +} +static int uint64_cmp(const void *pa, const void *pb) +{ + const uint64_t *pia = pa; + const uint64_t *pib = pb; + return *pia - *pib; +} + +/* Fill @samples with @n_samples unique random positions in the range + * [@first_pos, @last_pos]. If @sorted is true, sort the entries of + * @samples. If @is_linear is true, the entries of @samples are linear + * (i.e. @first_pos, @first_pos + 1, ...). + */ +static void fill_samples(uint64_t *samples, uint32_t *pn_samples, + uint64_t first_pos, uint64_t last_pos, bool sorted, bool *pis_linear) +{ + const uint64_t gap = last_pos - first_pos + 1; + *pis_linear = gap <= *pn_samples; + if (*pis_linear) { + uint32_t i; + *pn_samples = gap; + for (i = 0; i < gap; i++) + samples[i] = first_pos + i; + + /* Treat single blocks as random reads instead of + * sequential ones. + */ + *pis_linear = gap > 1; + } else { + fill_with_unique_samples(samples, *pn_samples, first_pos, + last_pos); + if (sorted) { + qsort(samples, *pn_samples, sizeof(uint64_t), + uint64_cmp); + } + } +} + +/* Let g be the number of good blocks between + * @first_pos and @last_pos including them. + * Let b be the number of bad and overwritten blocks between + * @first_pos and @last_pos including them. + * + * The probability Pr_g of sampling a good block at random between + * @first_pos and @last_pos is Pr_g = g / (g + b), and + * the probability Pr_1b that among k block samples at least + * one block is bad is Pr_1b = 1 - Pr_g^k. + * + * Assuming Pr_g <= 95% and k = 64, Pr_1b >= 96.2%. + * That is, with high probability (i.e. Pr_1b), + * one can find at least a bad block with k samples + * when most blocks are good (Pr_g). + */ static int probabilistic_test(struct device *dev, uint64_t first_pos, uint64_t last_pos, int *pfound_a_bad_block, - uint64_t salt, probe_progress_cb cb) + struct rdwr_info *rwi, progress_cb cb, unsigned int indent) { - uint64_t gap; - int i, n, is_linear; + uint32_t n_samples = 64; + uint64_t samples[n_samples]; + bool is_linear, any_bad; + uint64_t bad_pos; if (first_pos > last_pos) goto not_found; - /* Let g be the number of good blocks between - * @first_pos and @last_pos including them. - * Let b be the number of bad and overwritten blocks between - * @first_pos and @last_pos including them. - * - * The probability Pr_g of sampling a good block at random between - * @first_pos and @last_pos is Pr_g = g / (g + b), and - * the probability Pr_1b that among k block samples at least - * one block is bad is Pr_1b = 1 - Pr_g^k. - * - * Assuming Pr_g <= 95% and k = 64, Pr_1b >= 96.2%. - * That is, with high probability (i.e. Pr_1b), - * one can find at least a bad block with k samples - * when most blocks are good (Pr_g). - */ - - /* Test @samples. */ - gap = last_pos - first_pos + 1; - is_linear = gap <= N_BLOCK_SAMPLES; - n = is_linear ? gap : N_BLOCK_SAMPLES; - for (i = 0; i < n; i++) { - uint64_t sample_pos = is_linear - ? first_pos + i - : uint64_rand_range(first_pos, last_pos); - int is_good; - - if (is_block_good(dev, sample_pos, &is_good, salt, cb)) - return true; - if (!is_good) { - /* Found a bad block. */ - *pfound_a_bad_block = true; - return false; - } + fill_samples(samples, &n_samples, first_pos, last_pos, false, + &is_linear); + cb(indent, "Sampling %" PRIu32 " blocks from blocks [%" PRIu64 ", %" PRIu64 "]\n", + n_samples, first_pos, last_pos); + if (find_first_bad_block(dev, samples, n_samples, &any_bad, &bad_pos, + rwi, cb, indent)) + return true; + if (any_bad) { + /* Found a bad block. */ + *pfound_a_bad_block = true; + return false; } not_found: @@ -183,93 +411,62 @@ static int probabilistic_test(struct device *dev, return false; } -static int uint64_cmp(const void *pa, const void *pb) -{ - const uint64_t *pia = pa; - const uint64_t *pib = pb; - return *pia - *pib; -} - +/* Find a bad block in the range (left_pos, right_pos) using up to + * n_samples random samples. + * + * If a bad block is found, set *pright_pos to the position of the + * leftmost bad block. + * + * The code relies on the same analytical result derived + * in probabilistic_test(). + */ static int find_a_bad_block(struct device *dev, uint32_t n_samples, uint64_t left_pos, uint64_t *pright_pos, int *found_a_bad_block, - uint64_t reset_pos, uint64_t cache_size_block, uint64_t salt, - probe_progress_cb cb) + struct rdwr_info *rwi, progress_cb cb, unsigned int indent) { - /* We need to list all sampled blocks because - * we need a sorted array; read the code to find the why. - * If the sorted array were not needed, one could save the seed - * of the random sequence and repeat the sequence to read the blocks - * after writing them. - */ uint64_t samples[n_samples]; - uint64_t gap, prv_sample; - uint32_t i; - - cb("\tSampling %" PRIu32 " blocks from blocks (%" PRIu64 ", %" PRIu64 ")\n", - n_samples, left_pos, *pright_pos); + bool is_linear, any_bad; + uint64_t bad_pos; if (n_samples == 0 || *pright_pos <= left_pos + 1) { /* Nothing to sample. */ goto not_found; } - /* The code below relies on the same analytical result derived - * in probabilistic_test(). + /* Sort entries of samples to minimize reads. + * As soon as one finds a bad block, one can ignore the remaining + * samples because the found bad block is the leftmost bad block. */ + fill_samples(samples, &n_samples, left_pos + 1, *pright_pos - 1, true, + &is_linear); + cb(indent, "## Sampling %" PRIu32 " blocks from blocks (%" PRIu64 ", %" PRIu64 ")\n", + n_samples, left_pos, *pright_pos); - /* Fill up @samples. */ - gap = *pright_pos - left_pos - 1; - if (gap <= n_samples) { - n_samples = gap; - for (i = 0; i < n_samples; i++) - samples[i] = left_pos + 1 + i; + cb(indent + 1, "Writing random blocks\n"); - /* Write @samples. */ - if (write_blocks(dev, left_pos + 1, *pright_pos - 1, salt, cb)) + if (is_linear) { + if (write_blocks(dev, left_pos + 1, *pright_pos - 1, rwi, + cb, indent + 1)) return true; } else { - for (i = 0; i < n_samples; i++) - samples[i] = uint64_rand_range(left_pos + 1, - *pright_pos - 1); - - /* Sort entries of @samples to minimize reads. - * As soon as one finds a bad block, one can stop and ignore - * the remaining blocks because the found bad block is - * the leftmost bad block. - */ - qsort(samples, n_samples, sizeof(uint64_t), uint64_cmp); - - /* Write @samples. */ - prv_sample = left_pos; - for (i = 0; i < n_samples; i++) { - if (samples[i] == prv_sample) - continue; - prv_sample = samples[i]; - if (write_blocks(dev, prv_sample, prv_sample, salt, cb)) - return true; - } + if (write_random_blocks(dev, samples, n_samples, rwi, + cb, indent + 1)) + return true; } - if (high_level_reset(dev, reset_pos, cache_size_block, salt, cb)) + if (overwhelm_cache(dev, rwi, cb, indent + 1)) return true; - /* Test @samples. */ - prv_sample = left_pos; - for (i = 0; i < n_samples; i++) { - int is_good; - - if (samples[i] == prv_sample) - continue; - - prv_sample = samples[i]; - if (is_block_good(dev, prv_sample, &is_good, salt, cb)) - return true; - if (!is_good) { - /* Found the leftmost bad block. */ - *pright_pos = prv_sample; - *found_a_bad_block = true; - return false; - } + /* Test samples. */ + cb(indent + 1, "Reading written blocks\n"); + if (find_first_bad_block(dev, samples, n_samples, &any_bad, &bad_pos, + rwi, cb, indent + 1)) + return true; + if (any_bad) { + /* Found the leftmost bad block. */ + *pright_pos = bad_pos; + *found_a_bad_block = true; + return false; } not_found: @@ -291,18 +488,18 @@ static int find_a_bad_block(struct device *dev, uint32_t n_samples, */ static int sampling_probe(struct device *dev, uint64_t left_pos, uint64_t *pright_pos, - uint64_t reset_pos, uint64_t cache_size_block, uint64_t salt, - probe_progress_cb cb) + struct rdwr_info *rwi, progress_cb cb, unsigned int indent) { uint32_t n_samples = SAMPLING_MIN; int found_a_bad_block; bool phase1 = true; assert(SAMPLING_MAX >= SAMPLING_MIN); + cb(indent, "# Sampling\n"); + while (*pright_pos > left_pos + n_samples + 1) { if (find_a_bad_block(dev, n_samples, left_pos, pright_pos, - &found_a_bad_block, reset_pos, - cache_size_block, salt, cb)) + &found_a_bad_block, rwi, cb, indent + 1)) return true; if (found_a_bad_block) continue; @@ -320,18 +517,17 @@ static int sampling_probe(struct device *dev, left_pos = (*pright_pos + left_pos) / 2; } if (find_a_bad_block(dev, n_samples, left_pos, pright_pos, - &found_a_bad_block, reset_pos, - cache_size_block, salt, cb)) + &found_a_bad_block, rwi, cb, indent + 1)) return true; return false; } -static void report_cache_size_test(probe_progress_cb cb, +static void report_cache_size_test(unsigned int indent, progress_cb cb, const struct device *dev, uint64_t first_pos, uint64_t last_pos) { double f_size = (last_pos - first_pos + 1) * dev_get_block_size(dev); const char *unit = adjust_unit(&f_size); - cb("\tTesting cache size: %.2f %s; Blocks [%" PRIu64 ", %" PRIu64 "]\n", + cb(indent, "## Testing cache size: %.2f %s; Blocks [%" PRIu64 ", %" PRIu64 "]\n", f_size, unit, first_pos, last_pos); } @@ -339,8 +535,8 @@ static void report_cache_size_test(probe_progress_cb cb, #define MAX_CACHE_SIZE_BYTE (1ULL << 30) static int find_cache_size(struct device *dev, const uint64_t left_pos, - uint64_t *pright_pos, uint64_t *pcache_size_block, const uint64_t salt, - probe_progress_cb cb) + uint64_t *pright_pos, struct rdwr_info *rwi, progress_cb cb, + unsigned int indent) { const int block_order = dev_get_block_order(dev); const uint64_t end_pos = *pright_pos - 1; @@ -348,7 +544,7 @@ static int find_cache_size(struct device *dev, const uint64_t left_pos, uint64_t final_write_target = MAX_CACHE_SIZE_BYTE >> block_order; uint64_t first_pos = *pright_pos; - cb("# Find cache size\n"); + cb(indent, "# Find cache size\n"); assert(write_target > 0); assert(write_target < final_write_target); @@ -373,19 +569,22 @@ static int find_cache_size(struct device *dev, const uint64_t left_pos, break; } + report_cache_size_test(indent + 1, cb, dev, first_pos, end_pos); + /* Write @write_target blocks before * the previously written blocks. */ - report_cache_size_test(cb, dev, first_pos, end_pos); - if (write_blocks(dev, first_pos, last_pos, salt, cb)) + cb(indent + 2, "Writing blocks [%" PRIu64 ", %" PRIu64 "]\n", + first_pos, last_pos); + if (write_blocks(dev, first_pos, last_pos, rwi, cb, indent + 2)) goto bad; if (probabilistic_test(dev, first_pos, end_pos, - &found_a_bad_block, salt, cb)) + &found_a_bad_block, rwi, cb, indent + 2)) goto bad; if (found_a_bad_block) { *pright_pos = first_pos; - *pcache_size_block = write_target == 1 + rwi->cache_size_block = write_target == 1 ? 0 /* There is no cache. */ : end_pos - first_pos + 1; return false; @@ -397,24 +596,48 @@ static int find_cache_size(struct device *dev, const uint64_t left_pos, /* Good drive. */ *pright_pos = end_pos + 1; - *pcache_size_block = 0; + rwi->cache_size_block = 0; return false; bad: /* *pright_pos does not change. */ - *pcache_size_block = 0; + rwi->cache_size_block = 0; return true; } static int find_wrap(struct device *dev, uint64_t left_pos, uint64_t *pright_pos, - uint64_t reset_pos, uint64_t cache_size_block, uint64_t salt, - probe_progress_cb cb) + struct rdwr_info *rwi, progress_cb cb, unsigned int indent) { - uint64_t offset, high_bit, pos = left_pos + 1; - int is_good, block_order; + const uint64_t good_block = left_pos + 1; + /* The smallest integer m such that 2^m > good_block. */ + const uint32_t m = ceiling_log2(good_block + 1); + /* Let k be the *smallest* integer such that + * 2^(m+k) + good_block >= *pright_pos + * + * Since this function has to test the blocks + * 2^m + good_block, 2^(m+1) + good_block, ..., 2^(m+k-1) + good_block, + * k corresponds to the number of samples to test. + * + * 2^(m+k) + good_block >= *pright_pos [=>] + * 2^(m+k) >= *pright_pos - good_block [=>] + * m + k >= log2(*pright_pos - good_block) [=>] + * k >= log2(*pright_pos - good_block) - m [=>] + * k = ceiling_log2(*pright_pos - good_block) - m + */ + const uint32_t aux = *pright_pos > good_block + ? ceiling_log2(*pright_pos - good_block) + : 0; + const uint32_t n_samples = aux > m ? aux - m : 0; + struct def_x_block x_blocks[n_samples]; + bool any_bad; + uint64_t bad_pos; + int block_order; + uint64_t expected_offset, high_bit; + uint32_t i; + enum block_state bs; - cb("# Find module\n"); + cb(indent, "# Find module\n"); /* * Basis @@ -424,45 +647,54 @@ static int find_wrap(struct device *dev, * of the drive. */ - if (pos >= *pright_pos) + if (good_block >= *pright_pos) return false; - if (write_blocks(dev, pos, pos, salt, cb) || - high_level_reset(dev, reset_pos, cache_size_block, - salt, cb) || - is_block_good(dev, pos, &is_good, salt, cb) || - !is_good) + cb(indent + 1, "Writing reference block %" PRIu64 "\n", good_block); + if (write_random_blocks(dev, &good_block, 1, rwi, cb, indent + 1) || + overwhelm_cache(dev, rwi, cb, indent + 1)) + return true; + + cb(indent + 1, "Reading reference block\n"); + if (find_first_bad_block(dev, &good_block, 1, &any_bad, &bad_pos, + rwi, cb, indent + 1) || any_bad) return true; /* * Inductive step */ - block_order = dev_get_block_order(dev); - offset = pos << block_order; - high_bit = clp2(pos); - if (high_bit <= pos) - high_bit <<= 1; - pos += high_bit; - - while (pos < *pright_pos) { - char stack[align_head(block_order) + (1 << block_order)]; - char *probe_blk = align_mem(stack, block_order); - uint64_t found_offset; + cb(indent + 1, "Probing module (reading %" PRIu32 " blocks)\n", + n_samples); - if (read_blocks(dev, probe_blk, pos, cb)) - return true; + block_order = dev_get_block_order(dev); + expected_offset = good_block << block_order; - if (validate_buffer_with_block(probe_blk, block_order, - offset, &found_offset, salt) == bs_good) { - *pright_pos = high_bit; - return false; - } + /* high_bit starts as the smallest power of 2 greater than + * good_block. + */ + high_bit = 1ULL << m; /* 2^m */ + assert(high_bit > good_block); + /* Fill x_blocks in. */ + for (i = 0; i < n_samples; i++) { + uint64_t pos = high_bit + good_block; + assert(pos < *pright_pos); + x_blocks[i].pos = pos; + x_blocks[i].expected_offset = expected_offset; high_bit <<= 1; - pos = high_bit + left_pos + 1; } + assert(high_bit + good_block >= *pright_pos); + if (find_first_x_block(dev, x_blocks, n_samples, bs_to_set(bs_good), + &i, &bs, rwi, cb, indent + 1)) + return true; + if (i < n_samples) { + assert(bs == bs_good); + *pright_pos = x_blocks[i].pos - good_block; /* = high_bit */ + cb(indent + 1, "INFO: Block %" PRIu64 " overwrites block %" PRIu64 "\n", + x_blocks[i].pos, good_block); + } return false; } @@ -482,52 +714,54 @@ uint64_t probe_device_max_blocks(const struct device *dev) n * SAMPLING_MIN; /* Upper bound for phase 2. */ } -void printf_cb(const char *format, ...) -{ - va_list args; - va_start(args, format); - vprintf(format, args); - va_end(args); -} - -void report_probed_size(probe_progress_cb cb, const char *prefix, - uint64_t bytes, int block_order) +void report_probed_size(unsigned int indent, progress_cb cb, + const char *prefix, uint64_t bytes, int block_order) { double f = bytes; const char *unit = adjust_unit(&f); - cb("%s %.2f %s (%" PRIu64 " blocks)\n", prefix, f, unit, - bytes >> block_order); + cb(indent, "%s %.2f %s (%" PRIu64 " blocks)\n", + prefix, f, unit, bytes >> block_order); } -void report_probed_order(probe_progress_cb cb, const char *prefix, int order) +void report_probed_order(unsigned int indent, progress_cb cb, + const char *prefix, int order) { double f = (1ULL << order); const char *unit = adjust_unit(&f); - cb("%s %.2f %s (2^%i Bytes)\n", prefix, f, unit, order); + cb(indent, "%s %.2f %s (2^%i Bytes)\n", prefix, f, unit, order); } -void report_probed_cache(probe_progress_cb cb, const char *prefix, - uint64_t cache_size_block, int block_order) - +void report_probed_cache(unsigned int indent, progress_cb cb, + const char *prefix, uint64_t cache_size_block, int block_order) { double f = (cache_size_block << block_order); const char *unit = adjust_unit(&f); - cb("%s %.2f %s (%" PRIu64 " blocks)\n", + cb(indent, "%s %.2f %s (%" PRIu64 " blocks)\n", prefix, f, unit, cache_size_block); } int probe_device(struct device *dev, uint64_t *preal_size_byte, uint64_t *pannounced_size_byte, int *pwrap, uint64_t *pcache_size_block, - int *pblock_order, probe_progress_cb cb) + int *pblock_order, progress_cb cb, int show_progress) { const uint64_t dev_size_byte = dev_get_size_byte(dev); const int block_order = dev_get_block_order(dev); - uint64_t salt, cache_size_block; - uint64_t left_pos, right_pos, mid_drive_pos, reset_pos; + const int block_size = dev_get_block_size(dev); + const progress_cb fw_cb = show_progress ? cb : dummy_cb; + uint64_t left_pos, right_pos, mid_drive_pos; + struct rdwr_info rwi; int wrap; assert(block_order <= 20); + dbuf_init(&rwi.seqw_dbuf); + /* We initialize total_size to 0 because write_blocks() updates it + * before writing. + */ + init_flow(&rwi.seqw_fw, block_size, 0, 0, fw_cb, 0, NULL); + init_flow(&rwi.randw_fw, block_size, 0, 0, fw_cb, 0, NULL); + init_flow(&rwi.randr_fw, block_size, 0, 0, fw_cb, 0, NULL); + /* @left_pos must point to a good block. * We just point to the last block of the first 1MB of the card * because this region is reserved for partition tables. @@ -546,7 +780,7 @@ int probe_device(struct device *dev, uint64_t *preal_size_byte, * @left_pos points to a good block, and @right_pos to a bad block. */ if (left_pos >= right_pos) { - cache_size_block = 0; + rwi.cache_size_block = 0; goto bad; } @@ -561,30 +795,26 @@ int probe_device(struct device *dev, uint64_t *preal_size_byte, /* This call is needed due to rand(). */ srand(time(NULL)); - salt = uint64_rand(); + rwi.salt = uint64_rand(); - cb("# Device geometry\n"); - report_probed_size(cb, "=> Announced size:", dev_size_byte, + cb(0, "# Device geometry\n"); + report_probed_size(0, cb, "=> Announced size:", dev_size_byte, block_order); - report_probed_order(cb, "=> Physical block size:", block_order); + report_probed_order(0, cb, "=> Physical block size:", block_order); - if (find_cache_size(dev, mid_drive_pos - 1, &right_pos, - &cache_size_block, salt, cb)) + if (find_cache_size(dev, mid_drive_pos - 1, &right_pos, &rwi, cb, 0)) goto bad; assert(mid_drive_pos <= right_pos); - reset_pos = right_pos; - report_probed_cache(cb, "=> Approximate cache size:", - cache_size_block, block_order); + rwi.cache_pos = right_pos; + report_probed_cache(0, cb, "=> Approximate cache size:", + rwi.cache_size_block, block_order); - if (find_wrap(dev, left_pos, &right_pos, - reset_pos, cache_size_block, salt, cb)) + if (find_wrap(dev, left_pos, &right_pos, &rwi, cb, 0)) goto bad; wrap = ceiling_log2(right_pos << block_order); - report_probed_order(cb, "=> Module:", wrap); + report_probed_order(0, cb, "=> Module:", wrap); - cb("# Sampling\n"); - if (sampling_probe(dev, left_pos, &right_pos, reset_pos, - cache_size_block, salt, cb)) + if (sampling_probe(dev, left_pos, &right_pos, &rwi, cb, 0)) goto bad; if (right_pos == left_pos + 1) { @@ -601,9 +831,11 @@ int probe_device(struct device *dev, uint64_t *preal_size_byte, *pwrap = ceiling_log2(dev_size_byte); out: - report_probed_size(cb, "=> Usable size:", *preal_size_byte, block_order); + dbuf_free(&rwi.seqw_dbuf); + report_probed_size(0, cb, "=> Usable size:", + *preal_size_byte, block_order); *pannounced_size_byte = dev_size_byte; - *pcache_size_block = cache_size_block; + *pcache_size_block = rwi.cache_size_block; *pblock_order = block_order; return false; } diff --git a/libprobe.h b/libprobe.h index 590058d..2fbf6a7 100644 --- a/libprobe.h +++ b/libprobe.h @@ -3,25 +3,23 @@ #include +#include "libutils.h" #include "libdevs.h" uint64_t probe_device_max_blocks(const struct device *dev); -typedef void (*probe_progress_cb)(const char *format, ...); +void report_probed_size(unsigned int indent, progress_cb cb, + const char *prefix, uint64_t bytes, int block_order); -void printf_cb(const char *format, ...); +void report_probed_order(unsigned int indent, progress_cb cb, + const char *prefix, int order); -void report_probed_size(probe_progress_cb cb, const char *prefix, - uint64_t bytes, int block_order); - -void report_probed_order(probe_progress_cb cb, const char *prefix, int order); - -void report_probed_cache(probe_progress_cb cb, const char *prefix, - uint64_t cache_size_block, int block_order); +void report_probed_cache(unsigned int indent, progress_cb cb, + const char *prefix, uint64_t cache_size_block, int block_order); int probe_device(struct device *dev, uint64_t *preal_size_byte, uint64_t *pannounced_size_byte, int *pwrap, uint64_t *pcache_size_block, int *pblock_order, - probe_progress_cb cb); + progress_cb cb, int show_progress); #endif /* HEADER_LIBPROBE_H */ diff --git a/libutils.c b/libutils.c index 05cb57d..2a00425 100644 --- a/libutils.c +++ b/libutils.c @@ -1,8 +1,12 @@ +#define _POSIX_C_SOURCE 200112L +#define _XOPEN_SOURCE 600 + #include /* For fprintf(). */ #include /* For strtoll(). */ #include #include #include +#include #include "libutils.h" #include "version.h" @@ -319,3 +323,50 @@ void print_stats(const struct block_stats *stats, int block_size, print_stat("\tSlightly changed:", stats->changed, block_size, unit_name); print_stat("\t Overwritten:", stats->overwritten, block_size, unit_name); } + +static void print_indent(unsigned int indent, const char *indent_str) +{ + unsigned int i; + for (i = 0; i < indent; i++) + printf("%s", indent_str); +} + +static void vprintf_cb(unsigned int indent, const char *format, va_list args) +{ + const char *indent_str = " "; + const char *erase_str = "\b\b\b\b\b\b\b\b"; + + assert(format != NULL); + if (format[0] != '\b') { + print_indent(indent, indent_str); + vprintf(format, args); + return; + } + + vprintf(format, args); + print_indent(indent, erase_str); +} + +void printf_cb(unsigned int indent, const char *format, ...) +{ + va_list args; + va_start(args, format); + vprintf_cb(indent, format, args); + va_end(args); +} + +void printf_flush_cb(unsigned int indent, const char *format, ...) +{ + va_list args; + va_start(args, format); + vprintf_cb(indent, format, args); + va_end(args); + fflush(stdout); +} + +void dummy_cb(unsigned int indent, const char *format, ...) +{ + /* Do nothing */ + UNUSED(indent); + UNUSED(format); +} diff --git a/libutils.h b/libutils.h index ab550fc..9cd145a 100644 --- a/libutils.h +++ b/libutils.h @@ -3,12 +3,20 @@ #include #include /* For struct argp_state. */ +#include /* For struct timespec. */ #include /* For struct timeval. */ #define SECTOR_SIZE (512) #define SECTOR_ORDER (9) #define UNUSED(x) ((void)x) +#define DIM(x) (sizeof(x) / sizeof((x)[0])) + +typedef void (*progress_cb)(unsigned int indent, const char *format, ...); + +void printf_cb(unsigned int indent, const char *format, ...); +void printf_flush_cb(unsigned int indent, const char *format, ...); +void dummy_cb(unsigned int indent, const char *format, ...); int ilog2(uint64_t x); @@ -92,6 +100,13 @@ static inline uint64_t diff_timeval_us(const struct timeval *t1, t2->tv_usec - t1->tv_usec; } +static inline uint64_t diff_timespec_ns(const struct timespec *t1, + const struct timespec *t2) +{ + return (t2->tv_sec - t1->tv_sec) * 1000000000ULL + + t2->tv_nsec - t1->tv_nsec; +} + void print_stats(const struct block_stats *stats, int block_size, const char *unit_name);