From f47a10fbe15c8e1da4e77c9296deb9448a7ee143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Domokos=20S=C3=A1rm=C3=A1ny?= Date: Thu, 23 Apr 2026 22:26:20 +0000 Subject: [PATCH 01/12] grib1-to-grib2: Add --on-error and --default-ensemble-size options Batch conversion runs over large archives commonly hit a handful of per-message defects (e.g. producer-side metadata such as numberOfForecastsInEnsemble=0 while number!=0, or fields that no recipe can encode). Previously any single such message aborted the whole run. - Add --on-error {abort|log-and-skip|skip}, defaulting to log-and-skip, wrapping the per-message conversion body in a try/catch and reporting a skipped-message tally at the end. - Add --default-ensemble-size : when numberOfForecastsInEnsemble is 0 but number!=0, substitute N instead of throwing. Default 0 preserves the previous strict behaviour. - Thread defaultEnsembleSize through mapGrib1ToGrib2 as a new trailing parameter (default 0) so existing call sites are unaffected. These flags make the tool usable in large parallel conversion pipelines while keeping the safe strict default available. --- src/multio/tools/grib1-to-grib2.cc | 77 ++++++++++++++++++++++++++++-- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/src/multio/tools/grib1-to-grib2.cc b/src/multio/tools/grib1-to-grib2.cc index 0bd1c3ec7..7953db56a 100644 --- a/src/multio/tools/grib1-to-grib2.cc +++ b/src/multio/tools/grib1-to-grib2.cc @@ -245,7 +245,7 @@ void handleStepRange(metkit::codes::CodesHandle& h, dm::FullMarsRecord& mars, in // Perform grib1ToGrib2 mapping - for a few marskeys we have to rely on eccodes namespace iterator. // E.g. the key "number" may be defined and set, although it has no meaning. void mapGrib1ToGrib2(KeySet& marsKeys, metkit::codes::CodesHandle& h, dm::FullMarsRecord& mars, dm::MiscRecord& misc, - int verbosity) { + int verbosity, long defaultEnsembleSize = 0) { mars.stream = dm::parseEntry(dm::STREAM, h); mars.type = dm::parseEntry(dm::TYPE, h); mars.klass = dm::parseEntry(dm::CLASS, h); @@ -366,7 +366,16 @@ void mapGrib1ToGrib2(KeySet& marsKeys, metkit::codes::CodesHandle& h, dm::FullMa // If both are 0 it is likely a control forecast or no ensemble... if (number != 0 && numForecasts == 0) { - throw std::runtime_error("The value for key numberOfForecastsInEnsemble must not be 0"); + if (defaultEnsembleSize > 0) { + std::cout << "Warning: numberOfForecastsInEnsemble is 0 but number=" << number + << "; applying --default-ensemble-size=" << defaultEnsembleSize << std::endl; + numForecasts = defaultEnsembleSize; + } + else { + throw std::runtime_error( + "The value for key numberOfForecastsInEnsemble must not be 0 (use --default-ensemble-size " + "to supply a fallback, or --on-error log-and-skip to skip such messages)"); + } } if (numForecasts != 0) { mars.number.set(number); @@ -795,6 +804,30 @@ Discipline192Handling parseDiscipline192Handling(const std::string& str) { } +enum class OnErrorHandling : std::size_t +{ + Abort, + LogAndSkip, + Skip, +}; + +const std::unordered_map& onErrorHandlingMap() { + static const std::unordered_map map{ + {"abort", OnErrorHandling::Abort}, + {"log-and-skip", OnErrorHandling::LogAndSkip}, + {"skip", OnErrorHandling::Skip}}; + return map; +} + +OnErrorHandling parseOnErrorHandling(const std::string& str) { + const auto& map = onErrorHandlingMap(); + if (auto search = map.find(str); search != map.end()) { + return search->second; + } + throw std::runtime_error(std::string("Unsupported --on-error value: ") + str); +} + + class Grib1ToGrib2 final : public multio::MultioTool { public: // methods Grib1ToGrib2(int argc, char** argv); @@ -835,6 +868,8 @@ class Grib1ToGrib2 final : public multio::MultioTool { std::optional> mappingRules_ = mars2mars::allRulesNoWMOMapping(); Discipline192Handling discipline192Handling_ = Discipline192Handling::LogAndIgnore; + OnErrorHandling onErrorHandling_ = OnErrorHandling::LogAndSkip; + long defaultEnsembleSize_ = 0; }; Grib1ToGrib2::Grib1ToGrib2(int argc, char** argv) : multio::MultioTool{argc, argv} { @@ -871,6 +906,13 @@ Grib1ToGrib2::Grib1ToGrib2(int argc, char** argv) : multio::MultioTool{argc, arg "discipline-192", "Options on handling fields with discipline 192 (field that are ill-formed). Values: \"log-and-ignore\" " "(default), \"ignore\", \"try-to-handle\"")); + options_.push_back(new eckit::option::SimpleOption( + "on-error", + "How to handle per-message conversion errors. Values: \"abort\" (stop on first error), \"log-and-skip\" " + "(default, log the error and continue), \"skip\" (silently skip failing messages)")); + options_.push_back(new eckit::option::SimpleOption( + "default-ensemble-size", + "Fallback value used when numberOfForecastsInEnsemble is 0 but number is non-zero. Default: 0 (throw)")); } void Grib1ToGrib2::init(const eckit::option::CmdArgs& args) { @@ -928,6 +970,14 @@ void Grib1ToGrib2::init(const eckit::option::CmdArgs& args) { if (!discipline192.empty()) { discipline192Handling_ = parseDiscipline192Handling(discipline192); } + + std::string onError; + args.get("on-error", onError); + if (!onError.empty()) { + onErrorHandling_ = parseOnErrorHandling(onError); + } + + args.get("default-ensemble-size", defaultEnsembleSize_); } void Grib1ToGrib2::finish(const eckit::option::CmdArgs&) {} @@ -975,7 +1025,11 @@ void Grib1ToGrib2::execute(const eckit::option::CmdArgs& args) { metkit::mars2grib::Mars2Grib encoder{}; eckit::message::Message msg; + std::size_t msgIndex = 0; + std::size_t skippedCount = 0; while ((msg = reader.next())) { + ++msgIndex; + try { // Extract message from datahandle... we expect it to be a memory handle // TODO pgeier: Alternative would be to explicitly create a eckit::MemoryHandle and write to it std::unique_ptr dh{msg.readHandle()}; @@ -1041,7 +1095,7 @@ void Grib1ToGrib2::execute(const eckit::option::CmdArgs& args) { std::cout << "Extracting metadata..." << std::endl; } - extract::mapGrib1ToGrib2(marsKeys, *inputHandle.get(), mars, misc, verbosity_); + extract::mapGrib1ToGrib2(marsKeys, *inputHandle.get(), mars, misc, verbosity_, defaultEnsembleSize_); if (overwritePacking_) { @@ -1143,6 +1197,23 @@ void Grib1ToGrib2::execute(const eckit::option::CmdArgs& args) { write(*preparedHandle.get(), *outputFileHandle); } } + } + catch (const std::exception& e) { + if (onErrorHandling_ == OnErrorHandling::Abort) { + throw; + } + ++skippedCount; + if (onErrorHandling_ == OnErrorHandling::LogAndSkip) { + std::cerr << "Error converting message #" << msgIndex << ": " << e.what() + << " -- skipping" << std::endl; + } + continue; + } + } + + if (skippedCount > 0) { + std::cerr << "grib1-to-grib2: skipped " << skippedCount << " message(s) due to conversion errors" + << std::endl; } if (outputFileHandle) { From 06bfae448abe84ace1fa0ea0075e6e87fccfd3b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Domokos=20S=C3=A1rm=C3=A1ny?= Date: Fri, 24 Apr 2026 16:25:00 +0100 Subject: [PATCH 02/12] grib1-to-grib2: Fall back to lengthOf4DvarWindow for time window If misc.lengthOfTimeWindow is not set from the standard keys, read the GRIB1 local-section key lengthOf4DvarWindow (hours) from the input handle and use it, skipping the 0xFFFF missing sentinel. Fixes cases where 4D-Var analysis fields produced GRIB2 with a missing lengthOf4DvarWindow. --- src/multio/tools/grib1-to-grib2.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/multio/tools/grib1-to-grib2.cc b/src/multio/tools/grib1-to-grib2.cc index 7953db56a..69c25f074 100644 --- a/src/multio/tools/grib1-to-grib2.cc +++ b/src/multio/tools/grib1-to-grib2.cc @@ -398,6 +398,22 @@ void mapGrib1ToGrib2(KeySet& marsKeys, metkit::codes::CodesHandle& h, dm::FullMa misc.lengthOfTimeWindow = dm::parseEntry(dm::LengthOfTimeWindow.withKey("lengthOfTimeWindow"), h); misc.lengthOfTimeWindowInSeconds = dm::parseEntry(dm::LengthOfTimeWindowInSeconds.withKey("lengthOfTimeWindowInSeconds"), h); + + // Fallback: for classic 4D-Var analysis messages, the window length is + // carried in the GRIB1 local-section key `lengthOf4DvarWindow` (in hours) + // rather than in `lengthOfTimeWindow`. If the primary source did not + // populate `misc.lengthOfTimeWindow`, read the 4D-Var key directly. + // + // No derivation from `anoffset` is performed: we only mirror what the + // producer placed in the input handle. The ecCodes "missing" sentinel + // (0xFFFF for a 16-bit unsigned field) is explicitly ignored so that + // metkit's analysis deduction keeps writing GRIB "missing" downstream. + if (!misc.lengthOfTimeWindow.isSet() && h.has("lengthOf4DvarWindow")) { + long lengthOf4DvarWindowHours = h.getLong("lengthOf4DvarWindow"); + if (lengthOf4DvarWindowHours != 0xFFFF) { + misc.lengthOfTimeWindow.set(lengthOf4DvarWindowHours); + } + } misc.bitsPerValue = dm::parseEntry(dm::BitsPerValue.withKey("bitsPerValue").withDefault(24), h); // TODO pgeier readd maybe From 1bc20e1a41a92d974cce8d80af94afa64c0803fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Domokos=20S=C3=A1rm=C3=A1ny?= Date: Fri, 24 Apr 2026 22:57:21 +0100 Subject: [PATCH 03/12] grib1-to-grib2: Forward numberOfComponents and modelErrorType for eme For mars type=eme (4D-var model errors, EDA long-window 4Dvar system) the matching GRIB2 local definition 39 requires numberOfComponents and modelErrorType in addition to the standard window keys. These two values have no MARS equivalent and must be forwarded from the input GRIB1 handle (which already carries local definition 39). Adds the corresponding misc EntryDefs and MiscRecord slots, and populates them from the input handle in mapGrib1ToGrib2 when mars.type == "eme". The metkit encoder now consumes them via the new AnalysisType::ModelErrors variant. --- src/multio/datamod/GribKeys.h | 12 ++++++++++++ src/multio/datamod/MarsMiscGeo.h | 5 ++++- src/multio/tools/grib1-to-grib2.cc | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/multio/datamod/GribKeys.h b/src/multio/datamod/GribKeys.h index f67e32869..2aa74fe59 100644 --- a/src/multio/datamod/GribKeys.h +++ b/src/multio/datamod/GribKeys.h @@ -62,6 +62,18 @@ constexpr auto TypeOfProcessedDataEntry = .tagOptional() .withAccessor([](auto&& v) { return &v.typeOfProcessedData; }); +// Section 2 - local definition 39 (4D-var model errors, type=eme) + +constexpr auto NumberOfComponents = + EntryDef{"misc-numberOfComponents"} + .tagOptional() + .withAccessor([](auto&& v) { return &v.numberOfComponents; }); + +constexpr auto ModelErrorType = + EntryDef{"misc-modelErrorType"} + .tagOptional() + .withAccessor([](auto&& v) { return &v.modelErrorType; }); + // Section3 (more to be moved here from MarsMiscGeo.h) diff --git a/src/multio/datamod/MarsMiscGeo.h b/src/multio/datamod/MarsMiscGeo.h index 99f7c195c..58626887f 100644 --- a/src/multio/datamod/MarsMiscGeo.h +++ b/src/multio/datamod/MarsMiscGeo.h @@ -334,15 +334,18 @@ struct MiscRecord { EntryType_t bitsPerValue; EntryType_t laplacianOperator; EntryType_t subCentre; + EntryType_t numberOfComponents; + EntryType_t modelErrorType; static constexpr std::string_view record_name_ = "misc"; + static constexpr auto record_entries_ = std::make_tuple( TablesVersion, GeneratingProcessIdentifier, TypeOfProcessedDataEntry, InitialStep, TimeIncrementInSeconds, LengthOfTimeWindow, LengthOfTimeWindowInSeconds, BitmapPresent, MissingValue, TypeOfEnsembleForecast, NumberOfForecastsInEnsemble, SatelliteSeries, ScaleFactorOfCentralWaveNumber, ScaledValueOfCentralWaveNumber, Pv, ScaleFactorOfWaveDirections, ScaleFactorOfWaveFrequencies, WaveDirections, WaveFrequencies, BitsPerValue, - LaplacianOperator, SubCentre); + LaplacianOperator, SubCentre, NumberOfComponents, ModelErrorType); }; diff --git a/src/multio/tools/grib1-to-grib2.cc b/src/multio/tools/grib1-to-grib2.cc index 69c25f074..52b7c6608 100644 --- a/src/multio/tools/grib1-to-grib2.cc +++ b/src/multio/tools/grib1-to-grib2.cc @@ -351,6 +351,21 @@ void mapGrib1ToGrib2(KeySet& marsKeys, metkit::codes::CodesHandle& h, dm::FullMa // TODO pgeier set it again ?? misc.laplacianOperator = dm::parseEntry(dm::LaplacianOperator.withKey("laplacianOperator"), h); + // For type=eme (4D-var model errors, EDA long-window system) the GRIB1 + // input carries local definition 39 with componentIndex (= mars.number), + // numberOfComponents and modelErrorType. The metkit encoder deduces + // componentIndex from mars.number, but numberOfComponents and + // modelErrorType have no MARS equivalent and must be forwarded from the + // input handle. + if (mars.type.get() == "eme") { + if (h.has("numberOfComponents")) { + misc.numberOfComponents.set(h.getLong("numberOfComponents")); + } + if (h.has("modelErrorType")) { + misc.modelErrorType.set(h.getLong("modelErrorType")); + } + } + // Can not rely on "number" from mars key iterator... for reference data (with hdate) number // can be 0 but is not emitted although numberOfForecastsInEnsemble has a valid value // if (auto searchNumber = marsKeys.find("number"); searchNumber != marsKeys.end()) From ac762902b5d8a94f654b63e85ef9efc1bd321729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Domokos=20S=C3=A1rm=C3=A1ny?= Date: Sat, 25 Apr 2026 00:17:43 +0100 Subject: [PATCH 04/12] grib1-to-grib2: Forward pvPresent=false when input has NV=0 Hybrid-level messages whose source GRIB carried no vertical-coordinate parameters (NV=0, e.g. lnsp on ml) previously round-tripped with a spurious PV137 (1002-double) array because the metkit encoder's resolve_PvArray_or_throw fallback fires on an empty par dict. Forward an explicit suppression signal: - Add PVPresent to MiscRecord so it round-trips through dumpUnscopedRecord into par["PVPresent"]. - In mapGrib1ToGrib2, when the input handle has no pv key and reports NV=0, set misc.pvPresent=false. The matching metkit change honours this flag in LevelOp StageAllocate and emits PVPresent=0 with no pv key. Verified end-to-end on an:ml:12 lnsp sh: output NV=0 preserved (was NV=1002). No regression on hybrid messages with valid pv (NV=276). NOTE (revisit): pairs with the metkit-side commit honouring par["PVPresent"]=false. Once both land we can revisit whether resolve_PvArray_or_throw should still default to PV137 on an empty par. --- src/multio/datamod/MarsMiscGeo.h | 3 ++- src/multio/tools/grib1-to-grib2.cc | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/multio/datamod/MarsMiscGeo.h b/src/multio/datamod/MarsMiscGeo.h index 58626887f..f92c75660 100644 --- a/src/multio/datamod/MarsMiscGeo.h +++ b/src/multio/datamod/MarsMiscGeo.h @@ -336,6 +336,7 @@ struct MiscRecord { EntryType_t subCentre; EntryType_t numberOfComponents; EntryType_t modelErrorType; + EntryType_t pvPresent; static constexpr std::string_view record_name_ = "misc"; @@ -345,7 +346,7 @@ struct MiscRecord { LengthOfTimeWindow, LengthOfTimeWindowInSeconds, BitmapPresent, MissingValue, TypeOfEnsembleForecast, NumberOfForecastsInEnsemble, SatelliteSeries, ScaleFactorOfCentralWaveNumber, ScaledValueOfCentralWaveNumber, Pv, ScaleFactorOfWaveDirections, ScaleFactorOfWaveFrequencies, WaveDirections, WaveFrequencies, BitsPerValue, - LaplacianOperator, SubCentre, NumberOfComponents, ModelErrorType); + LaplacianOperator, SubCentre, NumberOfComponents, ModelErrorType, PVPresent); }; diff --git a/src/multio/tools/grib1-to-grib2.cc b/src/multio/tools/grib1-to-grib2.cc index 52b7c6608..62b36424d 100644 --- a/src/multio/tools/grib1-to-grib2.cc +++ b/src/multio/tools/grib1-to-grib2.cc @@ -344,6 +344,19 @@ void mapGrib1ToGrib2(KeySet& marsKeys, metkit::codes::CodesHandle& h, dm::FullMa misc.initialStep = dm::parseEntry(dm::InitialStep.withKey("initialStep"), h); misc.timeIncrementInSeconds = dm::parseEntry(dm::TimeIncrementInSeconds.withKey("timeIncrement"), h); misc.pv = dm::parseEntry(dm::Pv.withKey("pv"), h); + // Signal explicit absence of vertical-coordinate parameters to the + // encoder. Without this, mars2grib's `resolve_PvArray_or_throw` falls back + // to its PV137 default and writes a 1002-double PV array into the output + // for hybrid-level fields whose source GRIB had NV=0 (e.g. lnsp on ml). + // Detect "no PV" via the input handle's NV key and forward as + // `misc.pvPresent=false`; metkit's LevelOp suppresses PV emission when + // it sees this flag. + if (!misc.pv.isSet()) { + long nv = h.has("NV") ? h.getLong("NV") : 0; + if (nv == 0) { + misc.pvPresent.set(false); + } + } misc.bitmapPresent = dm::parseEntry(dm::BitmapPresent.withKey("bitmapPresent"), h); handleMissingValue(h, misc); From dd17543d310efff82014e190ab99c06d6dff1218 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Domokos=20S=C3=A1rm=C3=A1ny?= Date: Sat, 25 Apr 2026 00:40:53 +0100 Subject: [PATCH 05/12] grib1-to-grib2: Refuse spectral_complex messages with IFS laplacianOperator=9.9 sentinel Some IFS-produced spectral_complex messages (notably SPP random fields, type=al, paramId 213131-213160) carry laplacianOperator=9.9 as an IFS sentinel without actually pre-laplacian-scaling the stored coefficients. Re-encoding such messages causes ecCodes' grib_ieee_to_long to assert because (T*(T+1))^9.9 multiplied by the raw coefficients overflows IEEE32 (FLT_MAX). The assertion is unrecoverable from inside ecCodes and bypasses the existing log-and-skip path, aborting the whole tool. Reject these messages up front with eckit::BadValue. The existing top-level catch in grib1-to-grib2.cc treats BadValue as recoverable: - --on-error=abort: terminates with "Bad value:" prefix - --on-error=log-and-skip: reports "skipped 1 message(s)", exits 0 SeriousBug would have been semantically wrong here: the precondition violation is both expected (legacy IFS encoding bug) and recoverable. Verified by encoding fc:al:3 reproducer (180 msgs): 90 real laplacian messages (paramIds 213101-213130) succeed; 90 sentinel messages are skipped cleanly under --on-error=log-and-skip. NOTE (revisit): upstream IFS encoding bug. Once IFS stops emitting sentinel laplacianOperator=9.9 (or sets it to 0 when no scaling was actually applied), this guard becomes dead code. --- src/multio/tools/grib1-to-grib2.cc | 60 ++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/src/multio/tools/grib1-to-grib2.cc b/src/multio/tools/grib1-to-grib2.cc index 62b36424d..1c0502cd7 100644 --- a/src/multio/tools/grib1-to-grib2.cc +++ b/src/multio/tools/grib1-to-grib2.cc @@ -10,7 +10,9 @@ #include #include +#include #include +#include #include #include #include @@ -209,6 +211,59 @@ void handleMissingValue(metkit::codes::CodesHandle& h, dm::MiscRecord& misc) { misc.missingValue.set(missingValue); } +// Spectral_complex packing in ecCodes pre-multiplies each spherical harmonic +// coefficient (m,n) by `(n*(n+1))^laplacianOperator` before quantising it. +// The packed referenceValue and the unpacked low-order subset are stored as +// IEEE 32-bit floats; any product that exceeds FLT_MAX (~3.4e38) trips an +// unrecoverable assertion inside `grib_ieee_to_long` and aborts the whole +// process - bypassing the per-message --on-error handling. +// +// IFS occasionally writes type=al SPP random fields with the sentinel value +// `laplacianOperator=9.9` even though the source data carries no +// pre-laplacian scaling. Combined with the truncation=399 of those fields, +// this yields (399*400)^9.9 ~= 1.6e51, easily overflowing FLT_MAX once +// multiplied by the actual coefficient values. Verified by `grib_set +// laplacianOperator=0` on such a message: re-encoding then succeeds and the +// codedValues match the input exactly, confirming the source data is not +// pre-laplacian-scaled. +// +// We can't compute a tight overflow bound generically from the values array +// (the largest coefficient lives at low n while the largest scaling lives at +// high n; without traversing the spectral (m,n) layout we'd get too many +// false positives). Instead, refuse to encode messages whose laplacianOperator +// is the known IFS sentinel `9.9` by throwing eckit::BadValue. The exception +// is recoverable: --on-error=log-and-skip will skip the offending message and +// continue, while --on-error=abort will propagate and stop the conversion. +// +// NOTE (revisit): the underlying issue is an upstream IFS encoding bug; once +// IFS stops emitting `laplacianOperator=9.9` for non-laplacian-scaled SPP +// random fields this guard becomes dead code and can be removed. +void validateSpectralComplexNoOverflow(const dm::FullMarsRecord& mars, const dm::MiscRecord& misc, + const std::vector& /*values*/) { + if (!mars.packing.isSet() || mars.packing.get() != "complex") { + return; + } + if (!misc.laplacianOperator.isSet()) { + return; + } + + constexpr double kIfsSentinel = 9.9; + constexpr double kEpsilon = 1.0e-3; + const double p = misc.laplacianOperator.get(); + if (std::fabs(p - kIfsSentinel) > kEpsilon) { + return; + } + + std::ostringstream oss; + oss << "spectral_complex message carries IFS sentinel laplacianOperator=" << p + << " (truncation=" << (mars.truncation.isSet() ? mars.truncation.get() : -1L) + << ", type=" << (mars.type.isSet() ? mars.type.get() : std::string{""}) + << ", paramId=" << (mars.param.isSet() ? mars.param.get() : -1L) << "). Re-encoding such messages would " + << "trigger an unrecoverable ecCodes IEEE32 overflow in `grib_ieee_to_long` because the " + << "stored data is not actually pre-laplacian-scaled. Refusing to encode."; + throw eckit::BadValue(oss.str(), Here()); +} + std::optional> parseRange(const std::string& s) { static const std::regex re(R"(^\s*([+-]?\d+)\s*-\s*([+-]?\d+)\s*$)"); std::smatch m; @@ -1219,6 +1274,11 @@ void Grib1ToGrib2::execute(const eckit::option::CmdArgs& args) { const auto marsConfig = dm::dumpRecord(mars); const auto miscConfig = dm::dumpUnscopedRecord(misc); + // Pre-encode validation: catch spectral_complex laplacian-scaling + // overflows before they reach ecCodes (where they would trip an + // unrecoverable assertion in `grib_ieee_to_long`). + extract::validateSpectralComplexNoOverflow(mars, misc, values); + // Call the GRIB2 encoder in metkit auto preparedHandle = encoder.encode(values, marsConfig, miscConfig); From 935fa623255c370466bd7f842ef7d199f87b17b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Domokos=20S=C3=A1rm=C3=A1ny?= Date: Sat, 25 Apr 2026 18:14:30 +0100 Subject: [PATCH 06/12] grib1-to-grib2: Forward mars.number for type=eme The existing ensemble branch requires numberOfForecastsInEnsemble (absent for eme) to set mars.number. The encoder's analysis ModelErrors path needs mars.number to deduce componentIndex; forward it unconditionally inside the eme block. --- src/multio/tools/grib1-to-grib2.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/multio/tools/grib1-to-grib2.cc b/src/multio/tools/grib1-to-grib2.cc index 1c0502cd7..5dc321b4b 100644 --- a/src/multio/tools/grib1-to-grib2.cc +++ b/src/multio/tools/grib1-to-grib2.cc @@ -432,6 +432,14 @@ void mapGrib1ToGrib2(KeySet& marsKeys, metkit::codes::CodesHandle& h, dm::FullMa if (h.has("modelErrorType")) { misc.modelErrorType.set(h.getLong("modelErrorType")); } + // mars.number == componentIndex for eme; the encoder reads it via the + // analysis ModelErrors path (see metkit analysisEncoding.h). + // Forwarded here unconditionally because the standard ensemble branch + // below requires `numberOfForecastsInEnsemble` (absent for eme) and + // would otherwise leave `mars.number` unset. + if (h.has("number")) { + mars.number.set(h.getLong("number")); + } } // Can not rely on "number" from mars key iterator... for reference data (with hdate) number From e75bfa6f6db8d4d739817bf891414d8823eb6cb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Domokos=20S=C3=A1rm=C3=A1ny?= Date: Mon, 27 Apr 2026 18:47:53 +0100 Subject: [PATCH 07/12] grib1-to-grib2: Forward numberOfComponents, modelErrorType and number for type=me Extend the existing type=eme forwarding block to also fire for type=me (model errors). Both types use GRIB1 local definition 39 with componentIndex (= mars.number), numberOfComponents and modelErrorType. The metkit encoder's analysis ModelErrors path now handles both. --- src/multio/tools/grib1-to-grib2.cc | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/multio/tools/grib1-to-grib2.cc b/src/multio/tools/grib1-to-grib2.cc index 5dc321b4b..4284fc99e 100644 --- a/src/multio/tools/grib1-to-grib2.cc +++ b/src/multio/tools/grib1-to-grib2.cc @@ -419,24 +419,24 @@ void mapGrib1ToGrib2(KeySet& marsKeys, metkit::codes::CodesHandle& h, dm::FullMa // TODO pgeier set it again ?? misc.laplacianOperator = dm::parseEntry(dm::LaplacianOperator.withKey("laplacianOperator"), h); - // For type=eme (4D-var model errors, EDA long-window system) the GRIB1 - // input carries local definition 39 with componentIndex (= mars.number), - // numberOfComponents and modelErrorType. The metkit encoder deduces - // componentIndex from mars.number, but numberOfComponents and - // modelErrorType have no MARS equivalent and must be forwarded from the - // input handle. - if (mars.type.get() == "eme") { + // For type=eme (ensemble model errors) or type=me (model errors) the + // GRIB1 input carries local definition 39 with componentIndex + // (= mars.number), numberOfComponents and modelErrorType. The metkit + // encoder deduces componentIndex from mars.number, but + // numberOfComponents and modelErrorType have no MARS equivalent and + // must be forwarded from the input handle. + if (mars.type.get() == "eme" || mars.type.get() == "me") { if (h.has("numberOfComponents")) { misc.numberOfComponents.set(h.getLong("numberOfComponents")); } if (h.has("modelErrorType")) { misc.modelErrorType.set(h.getLong("modelErrorType")); } - // mars.number == componentIndex for eme; the encoder reads it via the - // analysis ModelErrors path (see metkit analysisEncoding.h). + // mars.number == componentIndex for eme/me; the encoder reads it via + // the analysis ModelErrors path (see metkit analysisEncoding.h). // Forwarded here unconditionally because the standard ensemble branch - // below requires `numberOfForecastsInEnsemble` (absent for eme) and - // would otherwise leave `mars.number` unset. + // below requires `numberOfForecastsInEnsemble` (absent for eme/me) + // and would otherwise leave `mars.number` unset. if (h.has("number")) { mars.number.set(h.getLong("number")); } From 5565ec5ec0e7fb9a75d9e207bd89a66bb9337b1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Domokos=20S=C3=A1rm=C3=A1ny?= Date: Mon, 27 Apr 2026 18:52:10 +0100 Subject: [PATCH 08/12] grib1-to-grib2: Populate mars.iteration and forward totalNumberOfIterations for type=4i For type=4i (4D-var analysis iterations) the GRIB1 input carries local definition 38 with iterationNumber and totalNumberOfIterations. iterationNumber is now deduced by metkit's IterationConcept from mars.iteration (= ECMWF MARS keyword "iteration"), so this tool only needs to populate that mars key. totalNumberOfIterations has no MARS representation and is forwarded via the parameter dictionary. Concretely: * mars.iteration = dm::parseEntry(dm::ITERATION, h) is set after mars.anoffset. * The misc data model loses the IterationNumber EntryDef (GribKeys.h) and the iterationNumber field (MarsMiscGeo.h); multio no longer forwards iterationNumber explicitly. * The new type=4i block in grib1-to-grib2.cc forwards only totalNumberOfIterations. Pairs with metkit commit mars2grib: Refactor type=4i/eme/me handling into orthogonal Iteration, ModelErrors and Analysis concepts which adds the IterationConcept that consumes mars.iteration. --- src/multio/datamod/GribKeys.h | 15 ++++++++++++++- src/multio/datamod/MarsMiscGeo.h | 3 ++- src/multio/tools/grib1-to-grib2.cc | 12 ++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/multio/datamod/GribKeys.h b/src/multio/datamod/GribKeys.h index 2aa74fe59..f679cc99b 100644 --- a/src/multio/datamod/GribKeys.h +++ b/src/multio/datamod/GribKeys.h @@ -62,7 +62,7 @@ constexpr auto TypeOfProcessedDataEntry = .tagOptional() .withAccessor([](auto&& v) { return &v.typeOfProcessedData; }); -// Section 2 - local definition 39 (4D-var model errors, type=eme) +// Section 2 - local definition 39 (4D-var model errors, type=eme/me) constexpr auto NumberOfComponents = EntryDef{"misc-numberOfComponents"} @@ -74,6 +74,19 @@ constexpr auto ModelErrorType = .tagOptional() .withAccessor([](auto&& v) { return &v.modelErrorType; }); +// Section 2 - local definition 38 (4D-var analysis iterations, type=4i) +// +// `iterationNumber` is forwarded directly through MARS as +// `mars.iteration` (see `dm::ITERATION` in MarsKeys.h); the metkit +// encoder reads it via the iteration deduction. Only the +// `totalNumberOfIterations` key, which has no MARS equivalent, is +// forwarded through misc. + +constexpr auto TotalNumberOfIterations = + EntryDef{"misc-totalNumberOfIterations"} + .tagOptional() + .withAccessor([](auto&& v) { return &v.totalNumberOfIterations; }); + // Section3 (more to be moved here from MarsMiscGeo.h) diff --git a/src/multio/datamod/MarsMiscGeo.h b/src/multio/datamod/MarsMiscGeo.h index f92c75660..fd67ee898 100644 --- a/src/multio/datamod/MarsMiscGeo.h +++ b/src/multio/datamod/MarsMiscGeo.h @@ -336,6 +336,7 @@ struct MiscRecord { EntryType_t subCentre; EntryType_t numberOfComponents; EntryType_t modelErrorType; + EntryType_t totalNumberOfIterations; EntryType_t pvPresent; @@ -346,7 +347,7 @@ struct MiscRecord { LengthOfTimeWindow, LengthOfTimeWindowInSeconds, BitmapPresent, MissingValue, TypeOfEnsembleForecast, NumberOfForecastsInEnsemble, SatelliteSeries, ScaleFactorOfCentralWaveNumber, ScaledValueOfCentralWaveNumber, Pv, ScaleFactorOfWaveDirections, ScaleFactorOfWaveFrequencies, WaveDirections, WaveFrequencies, BitsPerValue, - LaplacianOperator, SubCentre, NumberOfComponents, ModelErrorType, PVPresent); + LaplacianOperator, SubCentre, NumberOfComponents, ModelErrorType, TotalNumberOfIterations, PVPresent); }; diff --git a/src/multio/tools/grib1-to-grib2.cc b/src/multio/tools/grib1-to-grib2.cc index 4284fc99e..5c30393b5 100644 --- a/src/multio/tools/grib1-to-grib2.cc +++ b/src/multio/tools/grib1-to-grib2.cc @@ -314,6 +314,7 @@ void mapGrib1ToGrib2(KeySet& marsKeys, metkit::codes::CodesHandle& h, dm::FullMa } mars.anoffset = dm::parseEntry(dm::ANOFFSET, h); + mars.iteration = dm::parseEntry(dm::ITERATION, h); mars.ident = dm::parseEntry(dm::IDENT, h); mars.instrument = dm::parseEntry(dm::INSTRUMENT, h); mars.channel = dm::parseEntry(dm::CHANNEL, h); @@ -442,6 +443,17 @@ void mapGrib1ToGrib2(KeySet& marsKeys, metkit::codes::CodesHandle& h, dm::FullMa } } + // For type=4i (4D-var analysis iterations) the GRIB1 input carries + // local definition 38 with iterationNumber and totalNumberOfIterations. + // `iterationNumber` is forwarded directly through MARS as + // `mars.iteration` (set above); only `totalNumberOfIterations` has no + // MARS equivalent and must be forwarded through misc. + if (mars.type.get() == "4i") { + if (h.has("totalNumberOfIterations")) { + misc.totalNumberOfIterations.set(h.getLong("totalNumberOfIterations")); + } + } + // Can not rely on "number" from mars key iterator... for reference data (with hdate) number // can be 0 but is not emitted although numberOfForecastsInEnsemble has a valid value // if (auto searchNumber = marsKeys.find("number"); searchNumber != marsKeys.end()) From 47df3f784706cc0daedb8a59a13fcf6773ef52c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Domokos=20S=C3=A1rm=C3=A1ny?= Date: Thu, 14 May 2026 00:37:16 +0100 Subject: [PATCH 09/12] grib1-to-grib2: Extract numberOfFrequencies for satellite btmp encoding Add NumberOfFrequencies to MiscRecord data model and extract it from the input GRIB handle. Required for section 2 local definition 37 (brightness temperature) encoding via the metkit encoder. --- src/multio/datamod/GribKeys.h | 4 ++++ src/multio/datamod/MarsMiscGeo.h | 8 +++++--- src/multio/tools/grib1-to-grib2.cc | 1 + 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/multio/datamod/GribKeys.h b/src/multio/datamod/GribKeys.h index f679cc99b..2ccef26eb 100644 --- a/src/multio/datamod/GribKeys.h +++ b/src/multio/datamod/GribKeys.h @@ -216,6 +216,10 @@ constexpr auto SatelliteSeries = EntryDef{"misc-satelliteSeries"} .tagOptional() .withAccessor([](auto&& v) { return &v.satelliteSeries; }); +constexpr auto NumberOfFrequencies = + EntryDef{"misc-numberOfFrequencies"} + .tagOptional() + .withAccessor([](auto&& v) { return &v.numberOfFrequencies; }); // Horizontal Keys constexpr auto PressureUnits = EntryDef{"misc-pressureUnits"} diff --git a/src/multio/datamod/MarsMiscGeo.h b/src/multio/datamod/MarsMiscGeo.h index fd67ee898..d6ddc65d6 100644 --- a/src/multio/datamod/MarsMiscGeo.h +++ b/src/multio/datamod/MarsMiscGeo.h @@ -324,6 +324,7 @@ struct MiscRecord { EntryType_t typeOfEnsembleForecast; EntryType_t numberOfForecastsInEnsemble; EntryType_t satelliteSeries; + EntryType_t numberOfFrequencies; EntryType_t scaleFactorOfCentralWaveNumber; EntryType_t scaledValueOfCentralWaveNumber; EntryType_t pv; @@ -345,9 +346,10 @@ struct MiscRecord { static constexpr auto record_entries_ = std::make_tuple( TablesVersion, GeneratingProcessIdentifier, TypeOfProcessedDataEntry, InitialStep, TimeIncrementInSeconds, LengthOfTimeWindow, LengthOfTimeWindowInSeconds, BitmapPresent, MissingValue, TypeOfEnsembleForecast, - NumberOfForecastsInEnsemble, SatelliteSeries, ScaleFactorOfCentralWaveNumber, ScaledValueOfCentralWaveNumber, - Pv, ScaleFactorOfWaveDirections, ScaleFactorOfWaveFrequencies, WaveDirections, WaveFrequencies, BitsPerValue, - LaplacianOperator, SubCentre, NumberOfComponents, ModelErrorType, TotalNumberOfIterations, PVPresent); + NumberOfForecastsInEnsemble, SatelliteSeries, NumberOfFrequencies, ScaleFactorOfCentralWaveNumber, + ScaledValueOfCentralWaveNumber, Pv, ScaleFactorOfWaveDirections, ScaleFactorOfWaveFrequencies, WaveDirections, + WaveFrequencies, BitsPerValue, LaplacianOperator, SubCentre, NumberOfComponents, ModelErrorType, + TotalNumberOfIterations, PVPresent); }; diff --git a/src/multio/tools/grib1-to-grib2.cc b/src/multio/tools/grib1-to-grib2.cc index 5c30393b5..35dc1e874 100644 --- a/src/multio/tools/grib1-to-grib2.cc +++ b/src/multio/tools/grib1-to-grib2.cc @@ -547,6 +547,7 @@ void mapGrib1ToGrib2(KeySet& marsKeys, metkit::codes::CodesHandle& h, dm::FullMa } misc.satelliteSeries = dm::parseEntry(dm::SatelliteSeries.withKey("satelliteSeries"), h); + misc.numberOfFrequencies = dm::parseEntry(dm::NumberOfFrequencies.withKey("numberOfFrequencies"), h); misc.scaleFactorOfCentralWaveNumber = dm::parseEntry(dm::ScaleFactorOfCentralWaveNumber.withKey("scaleFactorOfCentralWaveNumber"), h); misc.scaledValueOfCentralWaveNumber From 09b435057e8e523b43b55889e4bba930c6c2d0d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Domokos=20S=C3=A1rm=C3=A1ny?= Date: Thu, 14 May 2026 08:27:06 +0100 Subject: [PATCH 10/12] grib1-to-grib2: Add --except option to copy matching GRIB2 messages verbatim When used with --all, messages matching --except are copied without re-encoding (same syntax as --exclude). Only applies to GRIB2 messages; matching a GRIB1 message is an error. Without --all, emits a warning since GRIB2 messages are already copied by default. --- src/multio/tools/grib1-to-grib2.cc | 34 ++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/multio/tools/grib1-to-grib2.cc b/src/multio/tools/grib1-to-grib2.cc index 35dc1e874..f958c52d7 100644 --- a/src/multio/tools/grib1-to-grib2.cc +++ b/src/multio/tools/grib1-to-grib2.cc @@ -979,6 +979,7 @@ class Grib1ToGrib2 final : public multio::MultioTool { long ncycle_ = 0; std::optional excludeMap_ = {}; + std::optional exceptMap_ = {}; std::optional filterMap_ = {}; std::optional overwritePacking_ = {}; std::optional setModel_ = {}; @@ -1013,6 +1014,11 @@ Grib1ToGrib2::Grib1ToGrib2(int argc, char** argv) : multio::MultioTool{argc, arg "exclude", "Keys and values to be excluded. Multiple values are separated by ','. Multiple key-values pairs are separated " "by ';'. Example --exclude paramId=130,131,133;levtype=pl,sfc")); + options_.push_back(new eckit::option::SimpleOption( + "except", + "Keys and values to be copied verbatim (without re-encoding) when --all is active. Same syntax as --exclude. " + "Only applies to GRIB2 messages; matching a GRIB1 message is an error. " + "Example --except paramId=213131")); options_.push_back(new eckit::option::SimpleOption( "filter", "Keys and values to be included. Multiple values are separated by ','. Multiple key-values pairs are separated " @@ -1079,6 +1085,16 @@ void Grib1ToGrib2::init(const eckit::option::CmdArgs& args) { excludeMap_ = parseFieldValueMap(std::move(excludeStr), verbosity_); } + std::string exceptStr = ""; + args.get("except", exceptStr); + if (!exceptStr.empty()) { + if (copyGrib2Messages_) { + std::cerr << "Warning: --except has no effect without --all (GRIB2 messages are already copied verbatim)" + << std::endl; + } + exceptMap_ = parseFieldValueMap(std::move(exceptStr), verbosity_); + } + std::string filterStr = ""; args.get("filter", filterStr); if (!filterStr.empty()) { @@ -1195,6 +1211,24 @@ void Grib1ToGrib2::execute(const eckit::option::CmdArgs& args) { } } + // --except: copy matching GRIB2 messages verbatim instead of re-encoding. + // Only meaningful with --all; GRIB1 matches are an error. + if (exceptMap_ && matches(msg, *exceptMap_, verbosity_)) { + if (edition == "1") { + throw eckit::BadValue(std::string("--except matched a GRIB1 message (paramId=") + + std::to_string(inputHandle->getLong("paramId")) + + "). --except may only match GRIB2 messages.", + Here()); + } + if (verbosity_ >= 1) { + std::cout << "except map matched — copying GRIB2 message verbatim" << std::endl; + } + if (outputFileHandle) { + write(*inputHandle.get(), *outputFileHandle); + } + continue; + } + if (edition == "2" && copyGrib2Messages_) { // Write the message directly if (verbosity_ > 2) { From 8595db4497df2ec2f630d3a1d1fe7804876b6f62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Domokos=20S=C3=A1rm=C3=A1ny?= Date: Thu, 14 May 2026 10:04:39 +0100 Subject: [PATCH 11/12] grib1-to-grib2: Extract numberOfForecastsInEnsemble for type=ses --- src/multio/tools/grib1-to-grib2.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/multio/tools/grib1-to-grib2.cc b/src/multio/tools/grib1-to-grib2.cc index f958c52d7..f407e3be7 100644 --- a/src/multio/tools/grib1-to-grib2.cc +++ b/src/multio/tools/grib1-to-grib2.cc @@ -459,7 +459,7 @@ void mapGrib1ToGrib2(KeySet& marsKeys, metkit::codes::CodesHandle& h, dm::FullMa // if (auto searchNumber = marsKeys.find("number"); searchNumber != marsKeys.end()) // Check for derivedEnsembleForecast - if (mars.type.get() == "es" || mars.type.get() == "em") { + if (mars.type.get() == "es" || mars.type.get() == "em" || mars.type.get() == "ses") { long numForecasts = h.getLong("numberOfForecastsInEnsemble"); misc.numberOfForecastsInEnsemble.set(numForecasts); } From 727611ab69ed335255f54ff4c56d145be55646c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Domokos=20S=C3=A1rm=C3=A1ny?= Date: Thu, 14 May 2026 18:02:02 +0100 Subject: [PATCH 12/12] grib1-to-grib2: Warn when --except matches a message skipped by discipline-192 policy --- src/multio/tools/grib1-to-grib2.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/multio/tools/grib1-to-grib2.cc b/src/multio/tools/grib1-to-grib2.cc index f407e3be7..6c3302815 100644 --- a/src/multio/tools/grib1-to-grib2.cc +++ b/src/multio/tools/grib1-to-grib2.cc @@ -1204,6 +1204,11 @@ void Grib1ToGrib2::execute(const eckit::option::CmdArgs& args) { bool isDiscipline192 = (edition == "1") ? isDiscipline192Param(paramId) : (inputHandle->getLong("discipline") == 192); if (isDiscipline192) { + if (exceptMap_ && matches(msg, *exceptMap_, verbosity_)) { + std::cerr << "Warning: --except matched a discipline-192 message (paramId=" << paramId + << ") but it was skipped by --discipline-192 policy. " + << "Use --discipline-192 try-to-handle to allow --except to take effect." << std::endl; + } if (discipline192Handling_ == Discipline192Handling::LogAndIgnore) { std::cout << "Excluding message with discipline 192 (paramId: " << paramId << ")" << std::endl; }