diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 64c3d3fc..e431cbcd 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -330,10 +330,17 @@ report_parse_error(UC const *p, parse_error error) { // Assuming that you use no more than 19 digits, this will // parse an ASCII string. +// +// store_spans is a *runtime* flag (not a template parameter, deliberately: a +// template would create a second instantiation of this whole function and the +// extra icache pressure wipes out the gain). When false, the integer/fraction +// spans (read only by the rare digit_comp slow path) are not materialized, +// which keeps the fat parsed_number_string_t off the hot path. The caller +// re-parses with store_spans=true if the slow path is actually reached. template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t -parse_number_string(UC const *p, UC const *pend, - parse_options_t options) noexcept { +parse_number_string(UC const *p, UC const *pend, parse_options_t options, + bool store_spans = true) noexcept { chars_format const fmt = detail::adjust_for_feature_macros(options.format); UC const decimal_point = options.decimal_point; @@ -402,7 +409,9 @@ parse_number_string(UC const *p, UC const *pend, } UC const *const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); - answer.integer = span(start_digits, size_t(digit_count)); + if (store_spans) { + answer.integer = span(start_digits, size_t(digit_count)); + } FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { // at least 1 digit in integer part, without leading zeros if (digit_count == 0) { @@ -429,7 +438,9 @@ parse_number_string(UC const *p, UC const *pend, i = i * 10 + digit; // in rare cases, this will overflow, but that's ok } exponent = before - p; - answer.fraction = span(before, size_t(p - before)); + if (store_spans) { + answer.fraction = span(before, size_t(p - before)); + } digit_count -= exponent; } FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { @@ -514,29 +525,35 @@ parse_number_string(UC const *p, UC const *pend, if (digit_count > 19) { answer.too_many_digits = true; - // Let us start again, this time, avoiding overflows. - // We don't need to call if is_integer, since we use the - // pre-tokenized spans from above. - i = 0; - p = answer.integer.ptr; - UC const *int_end = p + answer.integer.len(); - uint64_t const minimal_nineteen_digit_integer{1000000000000000000}; - while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - UC('0')); - ++p; - } - if (i >= minimal_nineteen_digit_integer) { // We have a big integer - exponent = end_of_integer_part - p + exp_number; - } else { // We have a value with a fractional component. - p = answer.fraction.ptr; - UC const *frac_end = p + answer.fraction.len(); - while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + // The truncation recompute below reads the integer/fraction spans. When + // store_spans is false we didn't materialize them, so just flag + // too_many_digits; the caller re-parses with store_spans=true to obtain + // the corrected mantissa/exponent before taking the slow path. + if (store_spans) { + // Let us start again, this time, avoiding overflows. + // We don't need to call if is_integer, since we use the + // pre-tokenized spans from above. + i = 0; + p = answer.integer.ptr; + UC const *int_end = p + answer.integer.len(); + uint64_t const minimal_nineteen_digit_integer{1000000000000000000}; + while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { i = i * 10 + uint64_t(*p - UC('0')); ++p; } - exponent = answer.fraction.ptr - p + exp_number; + if (i >= minimal_nineteen_digit_integer) { // We have a big integer + exponent = end_of_integer_part - p + exp_number; + } else { // We have a value with a fractional component. + p = answer.fraction.ptr; + UC const *frac_end = p + answer.fraction.len(); + while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + i = i * 10 + uint64_t(*p - UC('0')); + ++p; + } + exponent = answer.fraction.ptr - p + exp_number; + } + // We have now corrected both exponent and i, to a truncated value } - // We have now corrected both exponent and i, to a truncated value } } answer.exponent = exponent; diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 3e91c57b..479febcb 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -197,6 +197,39 @@ using parse_options = parse_options_t; #define fastfloat_really_inline inline __attribute__((always_inline)) #endif +// Branch-probability hint marking the rare slow-path branches as cold, so the +// optimizer keeps the out-of-line slow-path re-parse off the hot path (and does +// not duplicate the force-inlined hot scanner into the caller, which bloated +// the hot frame and hurt ILP on some targets). Used at the call site as +// if fastfloat_unlikely(cond) { ... } +// (the macro supplies the parentheses). It expands to the standard [[unlikely]] +// attribute when supported, otherwise to __builtin_expect on GCC/Clang, or +// to a no-op elsewhere (e.g. pre-C++20 MSVC, which has no equivalent hint). +#ifdef __has_cpp_attribute +#if __has_cpp_attribute(unlikely) >= 201803L +#define FASTFLOAT_USE_UNLIKELY_ATTR 1 +#endif +#endif + +#ifdef FASTFLOAT_USE_UNLIKELY_ATTR +// We have to disable -Wc++20-extensions for the [[unlikely]] attribute +// See comment for @jwakely at +// https://github.com/fastfloat/fast_float/pull/387#discussion_r3366943539 +// This is unfortunate. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++20-extensions" +#endif +#define fastfloat_unlikely(x) (x) [[unlikely]] +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +#elif defined(__GNUC__) || defined(__clang__) +#define fastfloat_unlikely(x) (__builtin_expect(!!(x), 0)) +#else +#define fastfloat_unlikely(x) (x) +#endif + #ifndef FASTFLOAT_ASSERT #define FASTFLOAT_ASSERT(x) \ { ((void)(x)); } diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index ff9c53d0..10715732 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -289,6 +289,23 @@ from_chars_advanced(parsed_number_string_t &pns, T &value) noexcept { return answer; } +// Slow path: re-parse materializing the integer/fraction spans the hot no-span +// parse skipped, then run the full algorithm. The two callers reach it only +// through a fastfloat_unlikely branch, so the optimizer keeps this re-parse off +// the hot path on its own (no function-level noinline needed). +// from_chars_advanced already handles both the too_many_digits disambiguation +// and the am.power2<0 digit_comp recompute, so both slow branches collapse to +// one helper call. +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +parse_number_slow_path(UC const *first, UC const *last, T &value, + parse_options_t options, bool bjf) noexcept { + parsed_number_string_t pns = + bjf ? parse_number_string(first, last, options, true) + : parse_number_string(first, last, options, true); + return from_chars_advanced(pns, value); +} + template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t from_chars_float_advanced(UC const *first, UC const *last, T &value, @@ -312,10 +329,15 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value, answer.ptr = first; return answer; } + bool const bjf = uint64_t(fmt & detail::basic_json_fmt) != 0; + + // Fast path: parse WITHOUT materializing the integer/fraction spans (read + // only by the rare slow paths). Skipping their stores keeps the fat + // parsed_number_string_t off the hot path. store_spans is a runtime argument, + // so this reuses the single parse_number_string instantiation. parsed_number_string_t pns = - uint64_t(fmt & detail::basic_json_fmt) - ? parse_number_string(first, last, options) - : parse_number_string(first, last, options); + bjf ? parse_number_string(first, last, options, false) + : parse_number_string(first, last, options, false); if (!pns.valid) { if (uint64_t(fmt & chars_format::no_infnan)) { answer.ec = std::errc::invalid_argument; @@ -326,8 +348,43 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value, } } - // call overload that takes parsed_number_string_t directly. - return from_chars_advanced(pns, value); + // Slow path A (rare): > 19 significant digits. The no-span parse left the + // mantissa un-truncated and skipped the span-based recompute; the cold helper + // re-parses with spans and runs the full algorithm. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++20-extensions" +#endif + if fastfloat_unlikely (pns.too_many_digits) { + return parse_number_slow_path(first, last, value, options, bjf); + } + + answer.ec = std::errc(); // be optimistic + answer.ptr = pns.lastmatch; + + if (clinger_fast_path_impl(pns.mantissa, pns.exponent, pns.negative, value)) { + return answer; + } + + adjusted_mantissa am = + compute_float>(pns.exponent, pns.mantissa); + // Slow path B (rare): Eisel-Lemire could not resolve; digit_comp needs the + // integer/fraction spans. Route to the cold helper (clinger there is a + // dead-effect since it already failed here; the cold re-parse + digit_comp + // via from_chars_advanced reproduces this branch). + if fastfloat_unlikely (am.power2 < 0) { + return parse_number_slow_path(first, last, value, options, bjf); + } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + to_float(pns.negative, am, value); + // Test for over/underflow. + if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) || + am.power2 == binary_format::infinite_power()) { + answer.ec = std::errc::result_out_of_range; + } + return answer; } template