diff --git a/src/earthkit/data/core/field.py b/src/earthkit/data/core/field.py index 9369b6dc..33b35746 100644 --- a/src/earthkit/data/core/field.py +++ b/src/earthkit/data/core/field.py @@ -1551,9 +1551,13 @@ def _default_encoder(self): else: return None - def _encode(self, encoder, hints=None, **kwargs): + def _encode(self, encoder, hints=None, field_metadata=None, **kwargs): """Double dispatch to the encoder.""" - return encoder._encode_field(self, **kwargs) + if field_metadata: + field = self.set(**field_metadata) + else: + field = self + return encoder._encode_field(field, **kwargs) def to_field(self, array=True): """Return the field itself.""" diff --git a/src/earthkit/data/encoders/grib.py b/src/earthkit/data/encoders/grib.py index 00f1d1bf..4cf4e958 100644 --- a/src/earthkit/data/encoders/grib.py +++ b/src/earthkit/data/encoders/grib.py @@ -140,44 +140,57 @@ def __init__(self, template=None): self.template = self.handle_from_template(template, clone=False) self._bbox = {} - def make(self, values=None, metadata=None, template=None): + def make(self, values=None, metadata=None, template=None, field_metadata=None): """Create a new GribCodesHandle from a template, field or metadata. May modify existing metadata Parameters ---------- - values: numpy.ndarray + values: numpy.ndarray, optional The values to encode - metadata: dict + metadata: dict, optional Metadata to encode - template: GribCoder + template: GribCoder, optional A template to use for encoding + field_metadata: dict, optional + Metadata to be set on the field before encoding. """ if template is None: template = self.template - handle = self.handle_from_template(template, clone=True) + handle = self.handle_from_template(template, field_metadata=field_metadata, clone=True) if handle is not None: self.update_metadata_from_template(metadata, template, handle) if handle is None: if values is None: raise ValueError("No values to encode") + if field_metadata: + raise ValueError("Cannot provide field_metadata without a template or handle") handle = self.handle_from_metadata(values, metadata, _COMPULSORY) return handle @staticmethod - def handle_from_template(template, clone=True): + def handle_from_template(template, field_metadata=None, clone=True): handle = None if template is not None: from earthkit.data.core.field import Field def _result(handle): + if field_metadata: + from earthkit.data.field.grib.create import create_grib_field + + field = create_grib_field(handle) + field = field.set(**field_metadata) + # it clones the handle internally, so we don't need to clone it again here + return GribHandleMaker.handle_from_field(field) return handle.clone() if clone else handle if isinstance(template, Field): + if field_metadata: + template = template.set(**field_metadata) return GribHandleMaker.handle_from_field(template) # GribMetadata or GribHandle elif hasattr(template, "handle"): @@ -214,6 +227,7 @@ def _result(handle): @staticmethod def handle_from_field(field): r = {} + field = field.sync() field._get_grib_context(r) handle = r.pop("handle", None) @@ -442,13 +456,24 @@ def __init__(self, template=None, metadata=None, **kwargs): def _normalise_kwargs_names(self, **kwargs): return kwargs - def _normalise_metadata_key_names(self, md): + def _normalise_metadata_key_names(self, metadata): def _convert(name): if name.startswith("metadata."): return name[9:] return name - return {_convert(k): v for k, v in md.items()} + return {_convert(k): v for k, v in metadata.items()} + + def _separate_metadata(self, metadata): + field = {} + grib = {} + for k, v in metadata.items(): + if "." in k: + field[k] = v + else: + grib[k] = v + + return field, grib def _get_handle(self, **kwargs): return GribHandleMaker(template=self.template).make(**kwargs) @@ -484,7 +509,10 @@ def encode( check_nans: bool Check for NaNs in the values and replace them with ``missing_value``. metadata: dict - Metadata to encode. The keys must be ecCodes GRIB keys, optionally prefixed with "metadata.". + Metadata to encode. The keys can be ecCodes GRIB keys, optionally prefixed with "metadata." + The format independent keys from :py:class:`~earthkit.data.core.field.Field` metadata are also + accepted. If format independent keys are provided, they are applied first to create a new handle, + then if ecCodes GRIB keys are provided too, they are applied on top of the handle. template: Field, GribCodesHandle, bytes, str, int, None A template to use for encoding. It can be a :py:class:`~earthkit.data.core.field.Field`, a :py:class:`~earthkit.data.reader.grib.GribCodesHandle`, a GRIB message as @@ -538,9 +566,12 @@ def encode( md = self._normalise_kwargs_names(**self.metadata) md.update(self._normalise_kwargs_names(**metadata)) md.update(self._normalise_kwargs_names(**kwargs)) - md = self._normalise_metadata_key_names(md) + # separate the metadata into format independent field metadata, + # and ecCodes GRIB metadata + field_metadata, md = self._separate_metadata(md) + # when the input date a datetime object time can be inferred from it can_infer_time = ( "date" in md @@ -557,6 +588,8 @@ def encode( kwargs["missing_value"] = missing_value kwargs["can_infer_time"] = can_infer_time + # detect if the data can be written straight to a file without going through the full + # encoding process, which can be very expensive for large data. path_allowed = ( target is not None and target._name == "file" @@ -564,17 +597,21 @@ def encode( and values is not None and template is not None and missing_value == 9999 + and not field_metadata ) - hints = {"path_allowed": path_allowed} if data is not None: from earthkit.data.data.wrappers import from_object data = from_object(data) - return data._encode(self, hints=hints, target=target, template=template, **kwargs) + return data._encode( + self, hints=hints, target=target, template=template, field_metadata=field_metadata, **kwargs + ) else: - handle = self._get_handle(template=template, values=values, metadata=metadata) + handle = self._get_handle( + template=template, values=values, metadata=metadata, field_metadata=field_metadata + ) return self._make_message(handle, **kwargs) def _has_standard_date_input(self, d): diff --git a/tests/encoders/test_grib_encoder.py b/tests/encoders/test_grib_encoder.py index 8837260e..be8caf14 100644 --- a/tests/encoders/test_grib_encoder.py +++ b/tests/encoders/test_grib_encoder.py @@ -179,7 +179,7 @@ def test_grib_encoder_field_data_and_values_and_template(): @pytest.mark.parametrize("init_encoder", [None, ["template", "metadata"], ["template"], ["metadata"]]) -def test_grib_encoder_field_metadata_1(init_encoder): +def test_grib_encoder_field_grib_metadata_1(init_encoder): fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() f = fl[0] @@ -202,7 +202,7 @@ def test_grib_encoder_field_metadata_1(init_encoder): assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 12) -def test_grib_encoder_field_metadata_2(): +def test_grib_encoder_field_grib_metadata_2(): fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() f = fl[0] @@ -218,7 +218,7 @@ def test_grib_encoder_field_metadata_2(): assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 0) -def test_grib_encoder_field_metadata_3(): +def test_grib_encoder_field_grib_metadata_3(): fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() f = fl[0] @@ -231,5 +231,162 @@ def test_grib_encoder_field_metadata_3(): assert f is not f_r assert f.message() != f_r.message() assert np.allclose(f.values + 1.0, f_r.values) + + assert f.get("time.base_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f.get("time.valid_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f.get("time.step") == datetime.timedelta(hours=0) + assert f.get("metadata.dataDate") == 20200513 + assert f.get("metadata.dataTime") == 1200 + assert f.get("metadata.step") == 0 + assert f.get("metadata.validityDate") == 20200513 + assert f.get("metadata.validityTime") == 1200 + + assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 0) + assert f_r.get("time.valid_datetime") == datetime.datetime(1998, 5, 2, 0) + assert f_r.get("time.step") == datetime.timedelta(hours=0) + assert f_r.get("metadata.dataDate") == 19980502 + assert f_r.get("metadata.dataTime") == 0 + assert f_r.get("metadata.step") == 0 + assert f_r.get("metadata.validityDate") == 19980502 + assert f_r.get("metadata.validityTime") == 0 + + +@pytest.mark.parametrize("init_encoder", [None, ["template", "metadata"], ["template"], ["metadata"]]) +@pytest.mark.parametrize( + "new_base_datetime_metadata", [19980502, datetime.datetime(1998, 5, 2, 0, 0), "1998-05-02T00:00:00"] +) +def test_grib_encoder_field_hl_metadata_1(init_encoder, new_base_datetime_metadata): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + + encoder_kwargs = {} + encode_kwargs = {"template": f, "metadata": {"time.base_datetime": new_base_datetime_metadata}} + if init_encoder is not None: + for key in init_encoder: + if key in encode_kwargs: + encoder_kwargs[key] = encode_kwargs.pop(key) + + encoder = create_encoder("grib", **encoder_kwargs) + r = encoder.encode(data=f, **encode_kwargs) + + f_r = r.to_field() + assert f is not f_r + assert f.message() != f_r.message() + assert np.allclose(f.values, f_r.values) + assert f.get("time.base_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f.get("time.valid_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f.get("time.step") == datetime.timedelta(hours=0) + assert f.get("metadata.dataDate") == 20200513 + assert f.get("metadata.dataTime") == 1200 + assert f.get("metadata.step") == 0 + assert f.get("metadata.validityDate") == 20200513 + assert f.get("metadata.validityTime") == 1200 + assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 0) + assert f_r.get("time.valid_datetime") == datetime.datetime(1998, 5, 2, 0) + assert f_r.get("time.step") == datetime.timedelta(hours=0) + assert f_r.get("metadata.dataDate") == 19980502 + assert f_r.get("metadata.dataTime") == 0 + assert f_r.get("metadata.step") == 0 + assert f_r.get("metadata.validityDate") == 19980502 + assert f_r.get("metadata.validityTime") == 0 + + +def test_grib_encoder_field_hl_metadata_2(): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + + encoder = create_encoder("grib", metadata={"time.step": 6}) + r = encoder.encode(data=f, template=f, metadata={"time.base_datetime": datetime.datetime(1998, 5, 2, 0, 0)}) + + f_r = r.to_field() + assert f is not f_r + assert f.message() != f_r.message() + assert np.allclose(f.values, f_r.values) + + assert f.get("time.base_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f.get("time.valid_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f.get("time.step") == datetime.timedelta(hours=0) + assert f.get("metadata.dataDate") == 20200513 + assert f.get("metadata.dataTime") == 1200 + assert f.get("metadata.step") == 0 + assert f.get("metadata.validityDate") == 20200513 + assert f.get("metadata.validityTime") == 1200 + + assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 0) + assert f_r.get("time.valid_datetime") == datetime.datetime(1998, 5, 2, 6) + assert f_r.get("time.step") == datetime.timedelta(hours=6) + assert f_r.get("metadata.dataDate") == 19980502 + assert f_r.get("metadata.dataTime") == 0 + assert f_r.get("metadata.step") == 6 + assert f_r.get("metadata.validityDate") == 19980502 + assert f_r.get("metadata.validityTime") == 600 + + +def test_grib_encoder_field_hl_metadata_3(): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + vals = f.values + 1.0 + + encoder = create_encoder("grib", metadata={"time.step": 6}) + r = encoder.encode(values=vals, template=f, metadata={"time.base_datetime": datetime.datetime(1998, 5, 2, 0, 0)}) + + f_r = r.to_field() + assert f is not f_r + assert f.message() != f_r.message() + assert np.allclose(f.values + 1.0, f_r.values) + + assert f.get("time.base_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f.get("time.valid_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f.get("time.step") == datetime.timedelta(hours=0) + assert f.get("metadata.dataDate") == 20200513 + assert f.get("metadata.dataTime") == 1200 + assert f.get("metadata.step") == 0 + assert f.get("metadata.validityDate") == 20200513 + assert f.get("metadata.validityTime") == 1200 + + assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 0) + assert f_r.get("time.valid_datetime") == datetime.datetime(1998, 5, 2, 6) + assert f_r.get("time.step") == datetime.timedelta(hours=6) + assert f_r.get("metadata.dataDate") == 19980502 + assert f_r.get("metadata.dataTime") == 0 + assert f_r.get("metadata.step") == 6 + assert f_r.get("metadata.validityDate") == 19980502 + assert f_r.get("metadata.validityTime") == 600 + + +def test_grib_encoder_field_mixed_metadata(): + fl = from_source("file", earthkit_examples_file("test.grib")).to_fieldlist() + + f = fl[0] + vals = f.values + 1.0 + + encoder = create_encoder("grib", metadata={"metadata.step": 5, "metadata.time": 600}) + r = encoder.encode(values=vals, template=f, metadata={"time.base_datetime": datetime.datetime(1998, 5, 2, 0, 0)}) + + f_r = r.to_field() + assert f is not f_r + assert f.message() != f_r.message() + assert np.allclose(f.values + 1.0, f_r.values) + + assert f.get("time.base_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f.get("time.valid_datetime") == datetime.datetime(2020, 5, 13, 12, 0) + assert f.get("time.step") == datetime.timedelta(hours=0) + assert f.get("metadata.dataDate") == 20200513 + assert f.get("metadata.dataTime") == 1200 + assert f.get("metadata.step") == 0 + assert f.get("metadata.validityDate") == 20200513 + assert f.get("metadata.validityTime") == 1200 + + assert f_r.get("time.base_datetime") == datetime.datetime(1998, 5, 2, 6) + assert f_r.get("time.valid_datetime") == datetime.datetime(1998, 5, 2, 11) + assert f_r.get("time.step") == datetime.timedelta(hours=5) + assert f_r.get("metadata.dataDate") == 19980502 + assert f_r.get("metadata.dataTime") == 600 + assert f_r.get("metadata.step") == 5 + assert f_r.get("metadata.validityDate") == 19980502 + assert f_r.get("metadata.validityTime") == 1100