@@ -142,7 +142,9 @@ class StrictMetricsVisitor : public BoundVisitor<bool> {
142142 return kRowsMightNotMatch ;
143143 }
144144
145- if (CanContainNulls (id) || CanContainNaNs (id)) {
145+ ICEBERG_ASSIGN_OR_RAISE (auto has_nulls, CanContainNulls (id));
146+ ICEBERG_ASSIGN_OR_RAISE (auto has_nans, CanContainNaNs (id));
147+ if (has_nulls || has_nans) {
146148 return kRowsMightNotMatch ;
147149 }
148150
@@ -168,7 +170,9 @@ class StrictMetricsVisitor : public BoundVisitor<bool> {
168170 return kRowsMightNotMatch ;
169171 }
170172
171- if (CanContainNulls (id) || CanContainNaNs (id)) {
173+ ICEBERG_ASSIGN_OR_RAISE (auto has_nulls, CanContainNulls (id));
174+ ICEBERG_ASSIGN_OR_RAISE (auto has_nans, CanContainNaNs (id));
175+ if (has_nulls || has_nans) {
172176 return kRowsMightNotMatch ;
173177 }
174178
@@ -194,7 +198,9 @@ class StrictMetricsVisitor : public BoundVisitor<bool> {
194198 return kRowsMightNotMatch ;
195199 }
196200
197- if (CanContainNulls (id) || CanContainNaNs (id)) {
201+ ICEBERG_ASSIGN_OR_RAISE (auto has_nulls, CanContainNulls (id));
202+ ICEBERG_ASSIGN_OR_RAISE (auto has_nans, CanContainNaNs (id));
203+ if (has_nulls || has_nans) {
198204 return kRowsMightNotMatch ;
199205 }
200206
@@ -226,7 +232,9 @@ class StrictMetricsVisitor : public BoundVisitor<bool> {
226232 return kRowsMightNotMatch ;
227233 }
228234
229- if (CanContainNulls (id) || CanContainNaNs (id)) {
235+ ICEBERG_ASSIGN_OR_RAISE (auto has_nulls, CanContainNulls (id));
236+ ICEBERG_ASSIGN_OR_RAISE (auto has_nans, CanContainNaNs (id));
237+ if (has_nulls || has_nans) {
230238 return kRowsMightNotMatch ;
231239 }
232240
@@ -258,7 +266,9 @@ class StrictMetricsVisitor : public BoundVisitor<bool> {
258266 return kRowsMightNotMatch ;
259267 }
260268
261- if (CanContainNulls (id) || CanContainNaNs (id)) {
269+ ICEBERG_ASSIGN_OR_RAISE (auto has_nulls, CanContainNulls (id));
270+ ICEBERG_ASSIGN_OR_RAISE (auto has_nans, CanContainNaNs (id));
271+ if (has_nulls || has_nans) {
262272 return kRowsMightNotMatch ;
263273 }
264274 auto lower_it = data_file_.lower_bounds .find (id);
@@ -330,7 +340,9 @@ class StrictMetricsVisitor : public BoundVisitor<bool> {
330340 return kRowsMightNotMatch ;
331341 }
332342
333- if (CanContainNulls (id) || CanContainNaNs (id)) {
343+ ICEBERG_ASSIGN_OR_RAISE (auto has_nulls, CanContainNulls (id));
344+ ICEBERG_ASSIGN_OR_RAISE (auto has_nans, CanContainNaNs (id));
345+ if (has_nulls || has_nans) {
334346 return kRowsMightNotMatch ;
335347 }
336348 auto lower_it = data_file_.lower_bounds .find (id);
@@ -435,19 +447,41 @@ class StrictMetricsVisitor : public BoundVisitor<bool> {
435447 return Literal::Deserialize (stats, primitive_type);
436448 }
437449
438- bool CanContainNulls (int32_t id) {
450+ Result<bool > CanContainNulls (int32_t id) {
451+ ICEBERG_ASSIGN_OR_RAISE (auto field_opt, schema_.FindFieldById (id));
452+ if (field_opt.has_value () && !field_opt->get ().optional ()) {
453+ return false ;
454+ }
455+
456+ // When null_value_counts is not empty, the evaluator expects all fields to be
457+ // present. A missing entry indicates that the field's null count is unknown.
439458 if (data_file_.null_value_counts .empty ()) {
440459 return true ;
441460 }
442461 auto it = data_file_.null_value_counts .find (id);
443- return it != data_file_.null_value_counts .cend () && it->second > 0 ;
462+ if (it == data_file_.null_value_counts .cend ()) {
463+ return true ;
464+ }
465+ return it->second > 0 ;
444466 }
445467
446- bool CanContainNaNs (int32_t id) {
447- // nan counts might be null for early version writers when nan counters are not
448- // populated.
468+ Result<bool > CanContainNaNs (int32_t id) {
469+ ICEBERG_ASSIGN_OR_RAISE (auto field_opt, schema_.FindFieldById (id));
470+ if (field_opt.has_value ()) {
471+ auto type_id = field_opt->get ().type ()->type_id ();
472+ if (type_id != TypeId::kFloat && type_id != TypeId::kDouble ) {
473+ return false ;
474+ }
475+ }
476+
477+ if (data_file_.nan_value_counts .empty ()) {
478+ return true ;
479+ }
449480 auto it = data_file_.nan_value_counts .find (id);
450- return it != data_file_.nan_value_counts .cend () && it->second > 0 ;
481+ if (it == data_file_.nan_value_counts .cend ()) {
482+ return true ;
483+ }
484+ return it->second > 0 ;
451485 }
452486
453487 bool ContainsNullsOnly (int32_t id) {
0 commit comments