diff --git a/README.md b/README.md index f230ff1..3efd90c 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,19 @@ cmake --build build --config Release ./build/benchmarks/thoroughfloat64 ``` +## Fixed-precision evaluation + +By default, we compare algorithms that output shortest-significand representation +which can round-trip to the original floating-point value. We can also compare +algorithms that output fixed-precision representation of a given precision: + +``` +./build/benchmarks/benchmark -f data/canada.txt -F [precision] +``` + +Note that this only works when we are comparing speeds, not measuring properties +of the algorithms, i.e., we can't use both `-F/--fixed` and `-t/--test` at the same time. + ## Other existing benchmarks - [dtoa Benchmark](https://github.com/miloyip/dtoa-benchmark) diff --git a/benchmarks/algorithms.h b/benchmarks/algorithms.h index 4d8fd8e..074fc49 100644 --- a/benchmarks/algorithms.h +++ b/benchmarks/algorithms.h @@ -44,44 +44,31 @@ #define YY_DOUBLE_SUPPORTED 0 #endif -namespace Benchmarks { - -enum Algorithm { - DRAGON4 = 0, - ERROL3 = 1, - TO_STRING = 2, - FMT_FORMAT = 3, - NETLIB = 4, - SNPRINTF = 5, - GRISU2 = 6, - GRISU_EXACT = 7, - SCHUBFACH = 8, - DRAGONBOX = 9, - RYU = 10, - TEJU_JAGUA = 11, - DOUBLE_CONVERSION = 12, - ABSEIL = 13, - STD_TO_CHARS = 14, - GRISU3 = 15, - SWIFT_DTOA = 16, - YY_DOUBLE = 17, - COUNT // Keep last -}; - template struct BenchArgs { using Type = T; + using BenchFn = std::function&)>; - BenchArgs(const std::string& name = {}, int (*func)(T, std::span&) = {}, - bool used = true, size_t testRepeat = 100) + BenchArgs(const std::string& name = {}, BenchFn func = {}, bool used = true, size_t testRepeat = 100) : name(name), func(func), used(used), testRepeat(testRepeat) {} std::string name{}; - int (*func)(T, std::span&){}; + BenchFn func{}; bool used{}; size_t testRepeat{100}; + + static void initFixedSize(size_t size) { + fixedSize = size; + snprintf(formatStr, sizeof(formatStr), "%%.%zug", fixedSize); + formatStrStr = fmt::format("{{:.{}g}}", fixedSize); + } + static inline size_t fixedSize; + static inline char formatStr[10]; + static inline std::string formatStrStr; }; +namespace BenchmarkShortest { + template int dragon4(T d, std::span& buffer) { if constexpr (std::is_same_v) @@ -92,32 +79,6 @@ int dragon4(T d, std::span& buffer) { PrintFloatFormat_Positional, -1); } -// No errol3 implementation optimized for float instead of double ? -template -int errol3(T d, std::span& buffer) { -#if ERROL_SUPPORTED - errol3_dtoa(d, buffer.data()); // returns the exponent - return std::strlen(buffer.data()); -#else - std::cerr << "errol3 not supported" << std::endl; - std::abort(); -#endif -} - -template -int to_string(T d, std::span& buffer) { - const std::string s = std::to_string(d); - std::copy(s.begin(), s.end(), buffer.begin()); - return s.size(); -} - -template -int fmt_format(T d, std::span& buffer) { - const std::string s = fmt::format("{}", d); - std::copy(s.begin(), s.end(), buffer.begin()); - return s.size(); -} - // There's no "ftoa", only "dtoa", so not optimized for float. template int netlib(T d, std::span& buffer) { @@ -157,7 +118,7 @@ int netlib(T d, std::span& buffer) { } else { buffer[i++] = '0' + value; } - }; + }; // Fractional part (if any remaining digits) const int remaining_digits = rve - (result + std::max(0, decpt)); if (remaining_digits > 0) { @@ -187,12 +148,29 @@ int netlib(T d, std::span& buffer) { #endif } +// No errol3 implementation optimized for float instead of double ? +template +int errol3(T d, std::span& buffer) { +#if ERROL_SUPPORTED + errol3_dtoa(d, buffer.data()); // returns the exponent + return std::strlen(buffer.data()); +#else + std::cerr << "errol3 not supported" << std::endl; + std::abort(); +#endif +} + template -int snprintf(T d, std::span& buffer) { - if constexpr (std::is_same_v) - return std::snprintf(buffer.data(), buffer.size(), "%.9g", d); - else - return std::snprintf(buffer.data(), buffer.size(), "%.17g", d); +int to_string(T d, std::span& buffer) { + const std::string s = std::to_string(d); + std::copy(s.begin(), s.end(), buffer.begin()); + return s.size(); +} + +template +int fmt_format(T d, std::span& buffer) { + const auto it = fmt::format_to(buffer.data(), "{}", d); + return std::distance(buffer.data(), it); } // grisu2 is hardcoded for double. @@ -246,13 +224,15 @@ int teju_jagua(T d, std::span& buffer) { template int double_conversion(T d, std::span& buffer) { - const static double_conversion::DoubleToStringConverter converter( - double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "nan", 'e', - -4, 6, 0, 0); + using namespace double_conversion; + const static DoubleToStringConverter conv( + DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN | DoubleToStringConverter::UNIQUE_ZERO, + "inf", "nan", 'e', -4, 6, 0, 0); + double_conversion::StringBuilder builder(buffer.data(), buffer.size()); const bool valid = std::is_same_v - ? converter.ToShortestSingle(d, &builder) - : converter.ToShortest(d, &builder); + ? conv.ToShortestSingle(d, &builder) + : conv.ToShortest(d, &builder); if (!valid) { std::cerr << "problem with " << d << std::endl; std::abort(); @@ -284,6 +264,52 @@ int yy_double(T d, std::span& buffer) { #endif } +template +int std_to_chars(T d, std::span& buffer) { +#if TO_CHARS_SUPPORTED + const auto [p, ec] + = std::to_chars(buffer.data(), buffer.data() + buffer.size(), d); + if (ec != std::errc()) { + std::cerr << "problem with " << d << std::endl; + std::abort(); + } + return p - buffer.data(); +#else + std::cerr << "std::to_chars not supported" << std::endl; + std::abort(); +#endif +} + +} // namespace BenchmarksShortest + +namespace BenchmarkFixedSize { + +template +int dragon4(T d, std::span& buffer) { + if constexpr (std::is_same_v) + return PrintFloat32(buffer.data(), buffer.size(), d, + PrintFloatFormat_Positional, BenchArgs::fixedSize); + else + return PrintFloat64(buffer.data(), buffer.size(), d, + PrintFloatFormat_Positional, BenchArgs::fixedSize); +} + +template +int netlib(T d, std::span& buffer) { +#if NETLIB_SUPPORTED + char* res; + if constexpr (std::is_same_v) + res = g_ffmt(buffer.data(), &d, BenchArgs::fixedSize, buffer.size()); + else + res = g_dfmt(buffer.data(), &d, BenchArgs::fixedSize, buffer.size()); + *res = '\0'; + return res - buffer.data() + 1; +#else + std::cerr << "netlib not supported" << std::endl; + std::abort(); +#endif +} + template int abseil(T d, std::span& buffer) { // StrAppend is faster but only outputs 6 digits after the decimal point @@ -291,17 +317,48 @@ int abseil(T d, std::span& buffer) { // absl::StrAppend(&s, d); // std::copy(s.begin(), s.end(), buffer.begin()); // return size(s); - if constexpr (std::is_same_v) - return absl::SNPrintF(buffer.data(), buffer.size(), "%.9g", d); - else - return absl::SNPrintF(buffer.data(), buffer.size(), "%.17g", d); + return absl::SNPrintF(buffer.data(), buffer.size(), + BenchArgs::formatStr, d); +} + +template +int snprintf(T d, std::span& buffer) { + return std::snprintf(buffer.data(), buffer.size(), + BenchArgs::formatStr, d); +} + +template +int fmt_format(T d, std::span& buffer) { + const auto it = fmt::format_to(buffer.begin(), + fmt::runtime(BenchArgs::formatStrStr), d); + return std::distance(buffer.begin(), it); +} + +template +int ryu(T d, std::span& buffer) { + return d2fixed_buffered_n(d, BenchArgs::fixedSize, buffer.data()); +} + +template +int double_conversion(T d, std::span& buffer) { + const static double_conversion::DoubleToStringConverter conv( + double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "nan", 'e', + -6, 21, BenchArgs::fixedSize, BenchArgs::fixedSize); + + double_conversion::StringBuilder builder(buffer.data(), buffer.size()); + if (!conv.ToPrecision(d, BenchArgs::fixedSize, &builder)) { + std::cerr << "problem with " << d << std::endl; + std::abort(); + } + return strlen(builder.Finalize()); } template int std_to_chars(T d, std::span& buffer) { #if TO_CHARS_SUPPORTED const auto [p, ec] - = std::to_chars(buffer.data(), buffer.data() + buffer.size(), d); + = std::to_chars(buffer.data(), buffer.data() + buffer.size(), d, + std::chars_format::general, BenchArgs::fixedSize); if (ec != std::errc()) { std::cerr << "problem with " << d << std::endl; std::abort(); @@ -313,30 +370,59 @@ int std_to_chars(T d, std::span& buffer) { #endif } +} // namespace BenchmarksShortest + +template +auto wrap(int (*fn)(T, std::span&)) { + return [fn](T v, std::span& buf) -> int { + return fn(v, buf); + }; +} + template -std::array, Benchmarks::COUNT> initArgs(bool errol = false) { - std::array, Benchmarks::COUNT> args; - args[Benchmarks::DRAGON4] = { "dragon4" , Benchmarks::dragon4 , true , 10 }; - args[Benchmarks::ERROL3] = { "errol3" , Benchmarks::errol3 , errol }; - args[Benchmarks::TO_STRING] = { "std::to_string" , Benchmarks::to_string , ERROL_SUPPORTED }; - args[Benchmarks::FMT_FORMAT] = { "fmt::format" , Benchmarks::fmt_format , true }; - args[Benchmarks::NETLIB] = { "netlib" , Benchmarks::netlib , NETLIB_SUPPORTED && std::is_same_v, 10 }; - args[Benchmarks::SNPRINTF] = { "snprintf" , Benchmarks::snprintf , true }; - args[Benchmarks::GRISU2] = { "grisu2" , Benchmarks::grisu2 , std::is_same_v }; - args[Benchmarks::GRISU_EXACT] = { "grisu_exact" , Benchmarks::grisu_exact , true }; - args[Benchmarks::SCHUBFACH] = { "schubfach" , Benchmarks::schubfach , true }; - args[Benchmarks::DRAGONBOX] = { "dragonbox" , Benchmarks::dragonbox , true }; - args[Benchmarks::RYU] = { "ryu" , Benchmarks::ryu , true }; - args[Benchmarks::TEJU_JAGUA] = { "teju_jagua" , Benchmarks::teju_jagua , true }; - args[Benchmarks::DOUBLE_CONVERSION] = { "double_conversion" , Benchmarks::double_conversion , true }; - args[Benchmarks::ABSEIL] = { "abseil" , Benchmarks::abseil , ABSEIL_SUPPORTED }; - args[Benchmarks::STD_TO_CHARS] = { "std::to_chars" , Benchmarks::std_to_chars , TO_CHARS_SUPPORTED }; - args[Benchmarks::GRISU3] = { "grisu3" , Benchmarks::grisu3 , std::is_same_v }; - args[Benchmarks::SWIFT_DTOA] = { "SwiftDtoa" , Benchmarks::swiftDtoa , SWIFT_LIB_SUPPORTED }; - args[Benchmarks::YY_DOUBLE] = { "yy_double" , Benchmarks::yy_double , YY_DOUBLE_SUPPORTED && std::is_same_v }; +std::vector> initArgs(bool use_errol = false, size_t repeat = 0, size_t fixed_size = 0) { + std::vector> args; + if (fixed_size == 0) { // shortest-length representation + namespace s = BenchmarkShortest; + args.emplace_back("dragon4" , wrap(s::dragon4) , true , 10); + args.emplace_back("netlib" , wrap(s::netlib) , NETLIB_SUPPORTED && std::is_same_v , 10); + args.emplace_back("errol3" , wrap(s::errol3) , ERROL_SUPPORTED && use_errol); + args.emplace_back("fmt_format" , wrap(s::fmt_format) , true); + // args.emplace_back("grisu2" , wrap(s::grisu2) , std::is_same_v); + args.emplace_back("grisu3" , wrap(s::grisu3) , std::is_same_v); + args.emplace_back("grisu_exact" , wrap(s::grisu_exact) , true); + args.emplace_back("schubfach" , wrap(s::schubfach) , true); + args.emplace_back("dragonbox" , wrap(s::dragonbox) , true); + args.emplace_back("ryu" , wrap(s::ryu) , true); + args.emplace_back("teju_jagua" , wrap(s::teju_jagua) , true); + args.emplace_back("double_conversion" , wrap(s::double_conversion) , true); + args.emplace_back("swiftDtoa" , wrap(s::swiftDtoa) , SWIFT_LIB_SUPPORTED); + args.emplace_back("yy_double" , wrap(s::yy_double) , YY_DOUBLE_SUPPORTED && std::is_same_v); + args.emplace_back("std::to_chars" , wrap(s::std_to_chars) , TO_CHARS_SUPPORTED); + // to_string, snprintf and abseil do not support shortest-length representation + // grisu2 does not round-trip correctly + } else { // fixed-length representation + fmt::println("# testing fixed-size output to {} digits", fixed_size); + BenchArgs::initFixedSize(fixed_size); + + namespace f = BenchmarkFixedSize; + args.emplace_back("dragon4" , wrap(f::dragon4) , true , 10); + args.emplace_back("netlib" , wrap(f::netlib) , NETLIB_SUPPORTED , 10); + args.emplace_back("abseil" , wrap(f::abseil) , ABSEIL_SUPPORTED); + args.emplace_back("snprintf" , wrap(f::snprintf) , true); + args.emplace_back("fmt_format" , wrap(f::fmt_format) , true); + args.emplace_back("ryu" , wrap(f::ryu) , std::is_same_v); + args.emplace_back("double_conversion" , wrap(f::double_conversion) , true); + args.emplace_back("std::to_chars" , wrap(f::std_to_chars) , TO_CHARS_SUPPORTED); + } + + if (repeat > 0) { + fmt::println("# forcing repeat count to {}", repeat); + for (auto &arg : args) + arg.testRepeat = repeat; + } + return args; }; -} // namespace Benchmarks - #endif diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index b8e2937..da6a2d0 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -25,11 +25,9 @@ #include #include -using Benchmarks::BenchArgs; - template void evaluateProperties(const std::vector> &lines, - const std::array, Benchmarks::COUNT> &args, + const std::vector> &args, const std::vector &algo_filter) { evaluate_properties_helper(lines, algo_filter, args); } @@ -44,7 +42,7 @@ struct diy_float_t { template void process(const std::vector> &lines, - const std::array, Benchmarks::COUNT> &args, + const std::vector> &args, const std::vector &algo_filter) { // We have a special algorithm for the string generation: if (!algo_filtered_out("just_string", algo_filter)) { @@ -147,6 +145,8 @@ int main(int argc, char **argv) { options.add_options() ("f,file", "File name.", cxxopts::value()->default_value("")) + ("F,fixed", "Fixed-point representation.", + cxxopts::value()->default_value("0")) ("v,volume", "Volume (number of floats generated).", cxxopts::value()->default_value("100000")) ("m,model", "Random Model.", @@ -168,7 +168,6 @@ int main(int argc, char **argv) { fmt::print("{}\n", options.help()); return EXIT_SUCCESS; } - const size_t repeat = result["repeat"].as(); const bool single = result["single"].as(); const auto filter = result.count("algo-filter") ? result["algo-filter"].as>() @@ -195,21 +194,14 @@ int main(int argc, char **argv) { numbers = fileload(filename); } - std::variant, Benchmarks::COUNT>, - std::array, Benchmarks::COUNT>> algorithms; + std::variant>, std::vector>> algorithms; const bool errol = result["errol"].as(); + const size_t repeat = result["repeat"].as(); + const size_t fixed_size = result["fixed"].as(); if (single) - algorithms = Benchmarks::initArgs(errol); + algorithms = initArgs(errol, repeat, fixed_size); else - algorithms = Benchmarks::initArgs(errol); - - if(repeat > 0) { - fmt::println("# forcing repeat count to {}", repeat); - std::visit([repeat](auto &args) { - for (auto &arg : args) - arg.testRepeat = repeat; - }, algorithms); - } + algorithms = initArgs(errol, repeat, fixed_size); const bool test = result["test"].as(); std::visit([test, &filter](const auto &lines, const auto &args) { @@ -233,6 +225,7 @@ int main(int argc, char **argv) { fmt::println("\nEXAMPLES:"); fmt::println(" ./benchmark --single # Run benchmark with single precision (float)"); fmt::println(" ./benchmark --file=data/canada.txt # Run benchmark using numbers from a file"); + fmt::println(" ./benchmark --fixed=10 # Test fixed-point representation instead of shortest length"); fmt::println(" ./benchmark --test # Test correctness instead of performance"); fmt::println(" ./benchmark --volume=1000 --model=uniform # Generate 1000 uniform random numbers"); fmt::println(" ./benchmark --algo-filter=ryu,grisu # Only test algorithms containing 'ryu' or 'grisu'"); diff --git a/benchmarks/benchutil.h b/benchmarks/benchutil.h index c76e6aa..523c3c1 100644 --- a/benchmarks/benchutil.h +++ b/benchmarks/benchutil.h @@ -13,8 +13,6 @@ #include "algorithms.h" #include "counters/event_counter.h" -using Benchmarks::BenchArgs; - event_collector collector; bool algo_filtered_out(const std::string &algo_name, @@ -50,11 +48,11 @@ concept TestCaseRange template requires TestCaseRange void evaluate_properties_helper(Range&& cases, const std::vector &algo_filter, - std::variant, Benchmarks::COUNT>, bool> argsOpt) { + std::variant>, bool> argsOpt) { fmt::println("{:20} {:20}", "Algorithm", "Valid shortest serialization"); const auto args = std::holds_alternative(argsOpt) - ? Benchmarks::initArgs(std::get(argsOpt)) - : std::get, Benchmarks::COUNT>>(argsOpt); + ? initArgs(std::get(argsOpt)) + : std::get>>(argsOpt); // Get number of cases for progress display uint64_t total = 0; @@ -69,7 +67,7 @@ void evaluate_properties_helper(Range&& cases, fmt::println("# skipping {}", algo.name); continue; } - if (algo.func == Benchmarks::dragonbox) { + if (algo.name == "dragonbox") { fmt::println("# skipping {} because it is the reference.", algo.name); continue; } @@ -102,7 +100,7 @@ void evaluate_properties_helper(Range&& cases, // the shortest representation, which is not necessarily the same as the // representation using the fewest significant digits. // So we use dragonbox, which serves as the reference implementation. - const size_t vRef = Benchmarks::dragonbox(d, bufRef); + const size_t vRef = BenchmarkShortest::dragonbox(d, bufRef); const size_t vAlgo = algo.func(d, bufAlgo); std::string_view svRef{bufRef.data(), vRef}, diff --git a/benchmarks/exhaustivefloat32.cpp b/benchmarks/exhaustivefloat32.cpp index d284c1d..85866f9 100644 --- a/benchmarks/exhaustivefloat32.cpp +++ b/benchmarks/exhaustivefloat32.cpp @@ -8,8 +8,6 @@ #include "floatutils.h" #include "benchutil.h" -using Benchmarks::BenchArgs; - void run_exhaustive32(bool errol, const std::vector& algo_filter = {}) { static_assert(sizeof(float) == sizeof(uint32_t)); auto floats_view diff --git a/benchmarks/random_generators.h b/benchmarks/random_generators.h index 17e21ac..b1406b9 100644 --- a/benchmarks/random_generators.h +++ b/benchmarks/random_generators.h @@ -33,8 +33,8 @@ template struct integer_uniform_generator : float_number_generator { std::random_device rd; std::mt19937_64 gen; - std::uniform_int_distribution dis; - explicit integer_uniform_generator(uint64_t a = 0, uint64_t b = 1) + std::uniform_int_distribution dis; + explicit integer_uniform_generator(long a = LONG_MIN, long b = LONG_MAX) : rd(), gen(rd()), dis(a, b) {} std::string describe() override { return std::string(