From 34348e3276442f60dd793bdaf4b790675b8c733d Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 3 Jan 2023 13:28:08 +1100 Subject: [PATCH 01/20] implement nth, rewrite first/last as FExpr --- docs/api/dt/nth.rst | 89 ++++++++++++++++++++ docs/api/fexpr/cummax copy.rst | 7 ++ docs/api/index-api.rst | 3 + docs/releases/v1.1.0.rst | 2 + src/core/column/nth.h | 85 +++++++++++++++++++ src/core/documentation.h | 2 + src/core/expr/fexpr.cc | 14 ++++ src/core/expr/fexpr.h | 1 + src/core/expr/fexpr_first_last.cc | 130 +++++++++++++++++++++++++++++ src/core/expr/fexpr_nth.cc | 124 +++++++++++++++++++++++++++ src/core/expr/head_reduce_unary.cc | 4 +- src/core/expr/op.h | 2 - src/datatable/__init__.py | 3 + src/datatable/expr/expr.py | 2 - src/datatable/expr/reduce.py | 10 +-- tests/dt/test-nth.py | 120 ++++++++++++++++++++++++++ tests/test-f.py | 10 +++ 17 files changed, 596 insertions(+), 12 deletions(-) create mode 100644 docs/api/dt/nth.rst create mode 100644 docs/api/fexpr/cummax copy.rst create mode 100644 src/core/column/nth.h create mode 100644 src/core/expr/fexpr_first_last.cc create mode 100644 src/core/expr/fexpr_nth.cc create mode 100644 tests/dt/test-nth.py diff --git a/docs/api/dt/nth.rst b/docs/api/dt/nth.rst new file mode 100644 index 0000000000..40d2ef84a5 --- /dev/null +++ b/docs/api/dt/nth.rst @@ -0,0 +1,89 @@ + +.. xfunction:: datatable.nth + :src: src/core/expr/fexpr_nth.cc pyfn_nth + :cvar: doc_dt_nth + :tests: tests/test-reduce.py + :signature: nth(cols, n) + + Return the ``nth`` row for an ``Expr``. + + Parameters + ---------- + cols: FExpr | iterable + Input columns or an iterable. + + return: Expr | ... + One-row f-expression that has the same names, stypes and + number of columns as `cols`. + + Examples + -------- + .. code-block:: python + + >>> from datatable import dt, f, by + >>> + >>> df = dt.Frame({'A': [1, 1, 2, 1, 2], + ... 'B': [None, 2, 3, 4, 5], + ... 'C': [1, 2, 1, 1, 2]}) + >>> df + | A B C + | int32 int32 int32 + -- + ----- ----- ----- + 0 | 1 NA 1 + 1 | 1 2 2 + 2 | 2 3 1 + 3 | 1 4 1 + 4 | 2 5 2 + [5 rows x 3 columns] + + Get the third row of column A:: + + >>> df[:, dt.nth(f.A, n=2)] + | A + | int32 + -- + ----- + 0 | 2 + [1 row x 1 column] + + Get the third row for multiple columns:: + + >>> df[:, dt.nth(f[:], n=2)] + | A B C + | int32 int32 int32 + -- + ----- ----- ----- + 0 | 2 3 1 + + Replicate :func:`first()`:: + + >>> df[:, dt.nth(f.A, n=0)] + | A + | int32 + -- + ----- + 0 | 1 + [1 row x 1 column] + + Replicate :func:`last()`:: + + >>> df[:, dt.nth(f.A, n=-1)] + | A + | int32 + -- + ----- + 0 | 2 + [1 row x 1 column] + +In the presence of :func:`by()`, it returns the nth row of the specified columns per group:: + + >>> df[:, [dt.nth(f.A, n = 2), dt.nth(f.B, n = -1)], by(f.C)] + | C A B + | int32 int32 int32 + -- + ----- ----- ----- + 0 | 1 1 4 + 1 | 2 NA 5 + [2 rows x 3 columns] + + + + See Also + -------- + - :func:`first()` -- function that returns the first row. + - :func:`last()` -- function that returns the last row. diff --git a/docs/api/fexpr/cummax copy.rst b/docs/api/fexpr/cummax copy.rst new file mode 100644 index 0000000000..64183476e4 --- /dev/null +++ b/docs/api/fexpr/cummax copy.rst @@ -0,0 +1,7 @@ + +.. xmethod:: datatable.FExpr.nth + :src: src/core/expr/fexpr.cc PyFExpr::nth + :cvar: doc_FExpr_nth + :signature: nth(n) + + Equivalent to :func:`dt.nth(cols, n)`. diff --git a/docs/api/index-api.rst b/docs/api/index-api.rst index b9cffbfa03..3fab27d32c 100644 --- a/docs/api/index-api.rst +++ b/docs/api/index-api.rst @@ -189,6 +189,8 @@ Functions - Find the smallest element per column * - :func:`ngroup()` - Number each group + * - :func:`nth()` + - Return the nth row. * - :func:`nunique()` - Count the number of unique values per column * - :func:`prod()` @@ -273,6 +275,7 @@ Other median()
min()
ngroup()
+ nth()
nunique()
prod()
qcut()
diff --git a/docs/releases/v1.1.0.rst b/docs/releases/v1.1.0.rst index 68c47ca3be..95cd544754 100644 --- a/docs/releases/v1.1.0.rst +++ b/docs/releases/v1.1.0.rst @@ -114,6 +114,8 @@ -[new] Added reducer functions :func:`dt.countna()` and :func:`dt.nunique()`. [#2999] + -[new] Added function :func:`dt.nth()` to retrieve specified row. [#3128] + -[new] Class :class:`dt.FExpr` now has method :meth:`.nunique()`, which behaves exactly as the equivalent base level function :func:`dt.nunique()`. diff --git a/src/core/column/nth.h b/src/core/column/nth.h new file mode 100644 index 0000000000..220a3af0f1 --- /dev/null +++ b/src/core/column/nth.h @@ -0,0 +1,85 @@ +//------------------------------------------------------------------------------ +// Copyright 2022 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#ifndef dt_NTH_h +#define dt_NTH_h +#include "column/virtual.h" +#include "stype.h" +namespace dt { + +template +class Nth_ColumnImpl : public Virtual_ColumnImpl { + private: + Column col_; + Groupby gby_; + int32_t n_; + size_t : 32; + + public: + Nth_ColumnImpl(Column&& col, const Groupby& gby, int32_t n) + : Virtual_ColumnImpl(gby.size(), col.stype()), + col_(std::move(col)), + gby_(gby), + n_(n) + { + xassert(col_.can_be_read_as()); + } + + + ColumnImpl* clone() const override { + return new Nth_ColumnImpl(Column(col_), gby_, n_); + } + + + size_t n_children() const noexcept override { + return 1; + } + + + const Column& child(size_t i) const override { + xassert(i == 0); (void)i; + return col_; + } + + bool get_element(size_t i, T* out) const override { + xassert(i < gby_.size()); + size_t i0, i1; + gby_.get_group(i, &i0, &i1); + + // Note, when `n_` is negative it is cast to `size_t`, that is + // an unsigned type. Then, when adding `i1`, we rely on `size_t` + // wrap-around. + size_t ni = (n_ >= 0)? static_cast(n_) + i0 + : static_cast(n_) + i1; + + bool isvalid = false; + if (ni >= i0 && ni < i1){ + isvalid = col_.get_element(ni, out); + } + return isvalid; + } +}; + + +} // namespace dt +#endif + + diff --git a/src/core/documentation.h b/src/core/documentation.h index 2cc4f743ad..3477932556 100644 --- a/src/core/documentation.h +++ b/src/core/documentation.h @@ -52,6 +52,7 @@ extern const char* doc_dt_mean; extern const char* doc_dt_median; extern const char* doc_dt_min; extern const char* doc_dt_ngroup; +extern const char* doc_dt_nth; extern const char* doc_dt_nunique; extern const char* doc_dt_qcut; extern const char* doc_dt_rbind; @@ -302,6 +303,7 @@ extern const char* doc_FExpr_max; extern const char* doc_FExpr_mean; extern const char* doc_FExpr_median; extern const char* doc_FExpr_min; +extern const char* doc_FExpr_nth; extern const char* doc_FExpr_nunique; extern const char* doc_FExpr_prod; extern const char* doc_FExpr_remove; diff --git a/src/core/expr/fexpr.cc b/src/core/expr/fexpr.cc index dfd59391cc..e897577c47 100644 --- a/src/core/expr/fexpr.cc +++ b/src/core/expr/fexpr.cc @@ -531,6 +531,20 @@ DECLARE_METHOD(&PyFExpr::min) ->name("min") ->docs(dt::doc_FExpr_min); +oobj PyFExpr::nth(const XArgs& args) { + auto nthFn = oobj::import("datatable", "nth"); + oobj n = args[0].to_oobj() ? args[0].to_oobj() + : py::oint(0); + return nthFn.call({this, n}); +} + +DECLARE_METHOD(&PyFExpr::nth) + ->name("nth") + ->arg_names({"n"}) + ->n_positional_or_keyword_args(1) + ->n_required_args(1) + ->docs(dt::doc_FExpr_nth); + oobj PyFExpr::nunique(const XArgs&) { auto nuniqueFn = oobj::import("datatable", "nunique"); diff --git a/src/core/expr/fexpr.h b/src/core/expr/fexpr.h index b292bbb1d8..8a64df836c 100644 --- a/src/core/expr/fexpr.h +++ b/src/core/expr/fexpr.h @@ -197,6 +197,7 @@ class PyFExpr : public py::XObject { py::oobj mean(const py::XArgs&); py::oobj median(const py::XArgs&); py::oobj min(const py::XArgs&); + py::oobj nth(const py::XArgs&); py::oobj nunique(const py::XArgs&); py::oobj prod(const py::XArgs&); py::oobj remove(const py::XArgs&); diff --git a/src/core/expr/fexpr_first_last.cc b/src/core/expr/fexpr_first_last.cc new file mode 100644 index 0000000000..01833dd256 --- /dev/null +++ b/src/core/expr/fexpr_first_last.cc @@ -0,0 +1,130 @@ +//------------------------------------------------------------------------------ +// Copyright 2022 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#include "column/const.h" +#include "column/nth.h" +#include "documentation.h" +#include "expr/fexpr_func.h" +#include "expr/eval_context.h" +#include "python/xargs.h" +namespace dt { +namespace expr { + +template +class FExpr_FirstLast : public FExpr_Func { + private: + ptrExpr arg_; + size_t : 32; + + public: + FExpr_FirstLast(ptrExpr&& arg) + : arg_(std::move(arg)) + {} + + + std::string repr() const override { + std::string out = FIRST?"first":"last"; + out += '('; + out += arg_->repr(); + out += ')'; + return out; + } + + + Workframe evaluate_n(EvalContext &ctx) const override { + Workframe inputs = arg_->evaluate_n(ctx); + Workframe outputs(ctx); + Groupby gby = ctx.get_groupby(); + + if (!gby) gby = Groupby::single_group(ctx.nrows()); + + int32_t n = FIRST?0:-1; + + for (size_t i = 0; i < inputs.ncols(); ++i) { + auto coli = inputs.retrieve_column(i); + outputs.add_column( + evaluate1(std::move(coli), gby, n), + inputs.retrieve_name(i), + Grouping::GtoONE + ); + } + return outputs; + } + + + Column evaluate1(Column&& col, const Groupby& gby, const int32_t n) const { + SType stype = col.stype(); + switch (stype) { + case SType::VOID: return Column(new ConstNa_ColumnImpl(gby.size())); + case SType::BOOL: + case SType::INT8: return make(std::move(col), gby, n); + case SType::INT16: return make(std::move(col), gby, n); + case SType::DATE32: + case SType::INT32: return make(std::move(col), gby, n); + case SType::TIME64: + case SType::INT64: return make(std::move(col), gby, n); + case SType::FLOAT32: return make(std::move(col), gby, n); + case SType::FLOAT64: return make(std::move(col), gby, n); + case SType::STR32: return make(std::move(col), gby, n); + case SType::STR64: return make(std::move(col), gby, n); + default: + throw TypeError() + << "Invalid column of type `" << stype << "` in " << repr(); + } + } + + + template + Column make(Column&& col, const Groupby& gby, int32_t n) const { + return Column(new Nth_ColumnImpl(std::move(col), gby, n)); + } + +}; + + +static py::oobj pyfn_first(const py::XArgs& args) { + auto cols = args[0].to_oobj(); + return PyFExpr::make(new FExpr_FirstLast(as_fexpr(cols))); +} + + +DECLARE_PYFN(&pyfn_first) + ->name("first") + ->docs(doc_dt_first) + ->arg_names({"cols"}) + ->n_positional_args(1); + + +static py::oobj pyfn_last(const py::XArgs& args) { + auto cols = args[0].to_oobj(); + return PyFExpr::make(new FExpr_FirstLast(as_fexpr(cols))); +} + + +DECLARE_PYFN(&pyfn_last) + ->name("last") + ->docs(doc_dt_last) + ->arg_names({"cols"}) + ->n_positional_args(1); + + + +}} // dt::expr diff --git a/src/core/expr/fexpr_nth.cc b/src/core/expr/fexpr_nth.cc new file mode 100644 index 0000000000..db0587e02a --- /dev/null +++ b/src/core/expr/fexpr_nth.cc @@ -0,0 +1,124 @@ +//------------------------------------------------------------------------------ +// Copyright 2022 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#include "column/const.h" +#include "column/nth.h" +#include "documentation.h" +#include "expr/fexpr_func.h" +#include "expr/eval_context.h" +#include "python/xargs.h" +namespace dt { +namespace expr { + + +class FExpr_Nth : public FExpr_Func { + private: + ptrExpr arg_; + int32_t n_; + size_t : 32; + + public: + FExpr_Nth(ptrExpr&& arg, int32_t n) + : arg_(std::move(arg)), + n_(n) + {} + + + std::string repr() const override { + std::string out = "nth"; + out += '('; + out += arg_->repr(); + out += ", n="; + out += std::to_string(n_); + out += ')'; + return out; + } + + + Workframe evaluate_n(EvalContext &ctx) const override { + Workframe inputs = arg_->evaluate_n(ctx); + Workframe outputs(ctx); + Groupby gby = ctx.get_groupby(); + + if (!gby) gby = Groupby::single_group(ctx.nrows()); + + for (size_t i = 0; i < inputs.ncols(); ++i) { + auto coli = inputs.retrieve_column(i); + outputs.add_column( + evaluate1(std::move(coli), gby, n_), + inputs.retrieve_name(i), + Grouping::GtoONE + ); + } + return outputs; + } + + + Column evaluate1(Column&& col, const Groupby& gby, const int32_t n) const { + SType stype = col.stype(); + switch (stype) { + case SType::VOID: return Column(new ConstNa_ColumnImpl(gby.size())); + case SType::BOOL: + case SType::INT8: return make(std::move(col), gby, n); + case SType::INT16: return make(std::move(col), gby, n); + case SType::DATE32: + case SType::INT32: return make(std::move(col), gby, n); + case SType::TIME64: + case SType::INT64: return make(std::move(col), gby, n); + case SType::FLOAT32: return make(std::move(col), gby, n); + case SType::FLOAT64: return make(std::move(col), gby, n); + case SType::STR32: return make(std::move(col), gby, n); + case SType::STR64: return make(std::move(col), gby, n); + default: + throw TypeError() + << "Invalid column of type `" << stype << "` in " << repr(); + } + } + + + template + Column make(Column&& col, const Groupby& gby, int32_t n) const { + return Column(new Nth_ColumnImpl(std::move(col), gby, n)); + } + +}; + + +static py::oobj pyfn_nth(const py::XArgs& args) { + auto cols = args[0].to_oobj(); + auto n = args[1].to(0); + + return PyFExpr::make(new FExpr_Nth(as_fexpr(cols), n)); + + +} + + +DECLARE_PYFN(&pyfn_nth) + ->name("nth") + ->docs(doc_dt_nth) + ->arg_names({"cols", "n"}) + ->n_positional_args(1) + ->n_positional_or_keyword_args(1) + ->n_required_args(1); + + +}} // dt::expr diff --git a/src/core/expr/head_reduce_unary.cc b/src/core/expr/head_reduce_unary.cc index 48cbb6200f..41b84174be 100644 --- a/src/core/expr/head_reduce_unary.cc +++ b/src/core/expr/head_reduce_unary.cc @@ -711,8 +711,6 @@ Workframe Head_Reduce_Unary::evaluate_n( if (inputs.get_grouping_mode() == Grouping::GtoALL) { switch (op) { case Op::STDEV: fn = compute_sd; break; - case Op::FIRST: fn = compute_firstlast; break; - case Op::LAST: fn = compute_firstlast; break; case Op::COUNT: fn = compute_count; break; case Op::COUNTNA:fn = compute_countna; break; case Op::MEDIAN: fn = compute_median; break; @@ -725,6 +723,8 @@ Workframe Head_Reduce_Unary::evaluate_n( case Op::STDEV: fn = compute_gsd; break; case Op::FIRST: case Op::LAST: fn = compute_gfirstlast; break; + case Op::MIN: + case Op::MAX: case Op::COUNT: fn = compute_gcount; break; case Op::COUNTNA:fn = compute_gcount; break; case Op::MEDIAN: fn = compute_gmedian; break; diff --git a/src/core/expr/op.h b/src/core/expr/op.h index 6ca51b8f29..e99503dfbc 100644 --- a/src/core/expr/op.h +++ b/src/core/expr/op.h @@ -67,8 +67,6 @@ enum class Op : size_t { MIN, // head_reduce_unary.cc MAX, // head_reduce_unary.cc STDEV, // head_reduce_unary.cc - FIRST, // head_reduce_unary.cc - LAST, // head_reduce_unary.cc SUM, // head_reduce_unary.cc COUNT, // head_reduce_unary.cc COUNT0, // head_reduce_nullary.cc diff --git a/src/datatable/__init__.py b/src/datatable/__init__.py index e3ad1437e7..7cddfbfb16 100644 --- a/src/datatable/__init__.py +++ b/src/datatable/__init__.py @@ -37,6 +37,7 @@ cut, FExpr, fillna, + first, fread, ifelse, init_styles, @@ -46,6 +47,7 @@ mean, Namespace, ngroup, + nth, prod, qcut, rbind, @@ -131,6 +133,7 @@ "mean", "median", "ngroup", + "nth", "obj64", "options", "prod", diff --git a/src/datatable/expr/expr.py b/src/datatable/expr/expr.py index a91782d4b7..cbc4aa5f71 100644 --- a/src/datatable/expr/expr.py +++ b/src/datatable/expr/expr.py @@ -52,8 +52,6 @@ class OpCodes(enum.Enum): # Reducers STDEV = 404 - FIRST = 405 - LAST = 406 COUNT = 408 COUNT0 = 409 MEDIAN = 410 diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index a8ee39bfe9..a53cd86fac 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -30,8 +30,6 @@ "corr", "count", "cov", - "first", - "last", "max", "median", "min", @@ -61,16 +59,16 @@ def countna(iterable=None): def first(iterable): - if isinstance(iterable, (Expr, core.FExpr)): - return Expr(OpCodes.FIRST, (iterable,)) + if isinstance(iterable, core.FExpr): + return core.first(iterable) else: for x in iterable: return x def last(iterable): - if isinstance(iterable, (Expr, core.FExpr)): - return Expr(OpCodes.LAST, (iterable,)) + if isinstance(iterable, core.FExpr): + return core.last(iterable) else: try: for x in reversed(iterable): diff --git a/tests/dt/test-nth.py b/tests/dt/test-nth.py new file mode 100644 index 0000000000..48a32405a4 --- /dev/null +++ b/tests/dt/test-nth.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------- +# Copyright 2022 H2O.ai +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +#------------------------------------------------------------------------------- +import pytest +from datatable import dt, f, nth, FExpr, by +from tests import assert_equals + +#------------------------------------------------------------------------------- +# Errors +#------------------------------------------------------------------------------- + +def test_nth_parameter_not_int(): + msg = r"Argument n in function datatable.nth\(\) should be an integer, " \ + "instead got " + DT = dt.Frame([1, 2, None, 4, 5]) + with pytest.raises(TypeError, match = msg): + DT[:, nth(f[0], '1')] + + +def test_nth_no_argument(): + msg = r"Function datatable.nth\(\) requires at least 1 positional " \ + "argument, but none were given" + with pytest.raises(TypeError, match = msg): + nth() + + +#------------------------------------------------------------------------------- +# Normal +#------------------------------------------------------------------------------- + +def test_nth_str(): + assert str(nth(f.A, n=1)) == "FExpr<" + nth.__name__ + "(f.A, n=1)>" + assert str(nth(f.A, n=1) + 1) == "FExpr<" + nth.__name__ + "(f.A, n=1) + 1>" + assert str(nth(f.A + f.B, n=1)) == "FExpr<" + nth.__name__ + "(f.A + f.B, n=1)>" + assert str(nth(f.B, 1)) == "FExpr<" + nth.__name__ + "(f.B, n=1)>" + assert str(nth(f[:2], 1)) == "FExpr<"+ nth.__name__ + "(f[:2], n=1)>" + + +def test_nth_empty_frame(): + DT = dt.Frame() + expr_nth = nth(DT, 1) + assert isinstance(expr_nth, FExpr) + assert_equals(DT[:, nth(f[:], 1)], DT) + + +def test_nth_empty_frame_skipna(): + DT = dt.Frame() + expr_nth = nth(DT, 1) + assert isinstance(expr_nth, FExpr) + assert_equals(DT[:, nth(f[:], 1)], DT) + + +def test_nth_void(): + DT = dt.Frame([None, None, None]) + DT_nth = DT[:, nth(f[:], 0)] + assert_equals(DT_nth, DT[0, :]) + + +def test_nth_trivial(): + DT = dt.Frame([0]/dt.int64) + nth_fexpr = nth(f[:], n=-1) + DT_nth = DT[:, nth_fexpr] + assert isinstance(nth_fexpr, FExpr) + assert_equals(DT, DT_nth) + + +def test_nth_bool(): + DT = dt.Frame([None, False, None, True, False, True]) + DT_nth = DT[:, [nth(f[:], n=1), + nth(f[:], n=-1), + nth(f[:], n=24)]] + DT_ref = dt.Frame([[False], [True], [None]/dt.bool8]) + assert_equals(DT_nth, DT_ref) + + +def test_nth_small(): + DT = dt.Frame([None, 3, None, 4]) + DT_nth = DT[:, [nth(f[:], n=1), + nth(f[:], n=-5)]] + DT_ref = dt.Frame([[3]/dt.int32, [None]/dt.int32]) + assert_equals(DT_nth, DT_ref) + + +def test_nth_string(): + DT = dt.Frame(['d', 'a', 'z', 'b']) + DT_nth = DT[:, [nth(f[:], 0), nth(f[:], n=-1)]] + DT_ref = dt.Frame([['d'], ['b'] ]) + assert_equals(DT_nth, DT_ref) + + +def test_nth_grouped(): + DT = dt.Frame([[15, None, 136, 93, 743, None, None, 91], + ['a','a','a','b','b','c','c','c']]) + DT_nth = DT[:, [nth(f[:], n=0), nth(f[:], n=2)], by(f[-1])] + DT_ref = dt.Frame({ + 'C1':['a','b','c',], + 'C0':[15, 93, None], + 'C2':[136, None, 91 ], + }) + assert_equals(DT_nth, DT_ref) diff --git a/tests/test-f.py b/tests/test-f.py index 492e108fac..b407026215 100644 --- a/tests/test-f.py +++ b/tests/test-f.py @@ -499,3 +499,13 @@ def test_codes(): type = dt.Type.cat8(dt.Type.str32)) assert_equals(DT[:, f.A.codes()], DT[:, dt.codes(f.A)]) + +def test_nth(): + assert str(dt.nth(f.A, n=0)) == str(f.A.nth(n=0)) + assert str(dt.nth(f.A, n=1)) == str(f.A.nth(n=1)) + assert str(dt.nth(f[:], -1)) == str(f[:].nth(-1)) + DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) + assert_equals(DT[:, f.A.nth(n=1)], DT[:, dt.nth(f.A, 1)]) + assert_equals(DT[:, f.A.nth(n=0)], DT[:, dt.nth(f.A, 0)]) + assert_equals(DT[:, f.A.nth(n=0)], DT[:, dt.nth(f.A, 0)]) + From 5b3a99f5cfab1f66b6c3451abaa2f736915ac76d Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 3 Jan 2023 13:35:55 +1100 Subject: [PATCH 02/20] rename file --- docs/api/fexpr/{cummax copy.rst => nth.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/api/fexpr/{cummax copy.rst => nth.rst} (100%) diff --git a/docs/api/fexpr/cummax copy.rst b/docs/api/fexpr/nth.rst similarity index 100% rename from docs/api/fexpr/cummax copy.rst rename to docs/api/fexpr/nth.rst From 339b159890fc040de19160b54b9fa7237d353bba Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 4 Jan 2023 07:18:43 +1100 Subject: [PATCH 03/20] revert commits; restore skipna argument; focus only on nth function --- src/core/expr/fexpr.cc | 7 +- src/core/expr/fexpr_first_last.cc | 130 ----------------------------- src/core/expr/fexpr_nth.cc | 39 +++++---- src/core/expr/head_reduce_unary.cc | 4 + src/core/expr/op.h | 2 + src/datatable/__init__.py | 1 - src/datatable/expr/expr.py | 2 + src/datatable/expr/reduce.py | 10 ++- tests/dt/test-nth.py | 47 +++++++++-- tests/test-f.py | 14 ++-- 10 files changed, 91 insertions(+), 165 deletions(-) delete mode 100644 src/core/expr/fexpr_first_last.cc diff --git a/src/core/expr/fexpr.cc b/src/core/expr/fexpr.cc index e897577c47..8d5f95f467 100644 --- a/src/core/expr/fexpr.cc +++ b/src/core/expr/fexpr.cc @@ -535,13 +535,14 @@ oobj PyFExpr::nth(const XArgs& args) { auto nthFn = oobj::import("datatable", "nth"); oobj n = args[0].to_oobj() ? args[0].to_oobj() : py::oint(0); - return nthFn.call({this, n}); + oobj skipna = args[1].to_oobj_or_none(); + return nthFn.call({this, n, skipna}); } DECLARE_METHOD(&PyFExpr::nth) ->name("nth") - ->arg_names({"n"}) - ->n_positional_or_keyword_args(1) + ->arg_names({"n", "skipna"}) + ->n_positional_or_keyword_args(2) ->n_required_args(1) ->docs(dt::doc_FExpr_nth); diff --git a/src/core/expr/fexpr_first_last.cc b/src/core/expr/fexpr_first_last.cc deleted file mode 100644 index 01833dd256..0000000000 --- a/src/core/expr/fexpr_first_last.cc +++ /dev/null @@ -1,130 +0,0 @@ -//------------------------------------------------------------------------------ -// Copyright 2022 H2O.ai -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//------------------------------------------------------------------------------ -#include "column/const.h" -#include "column/nth.h" -#include "documentation.h" -#include "expr/fexpr_func.h" -#include "expr/eval_context.h" -#include "python/xargs.h" -namespace dt { -namespace expr { - -template -class FExpr_FirstLast : public FExpr_Func { - private: - ptrExpr arg_; - size_t : 32; - - public: - FExpr_FirstLast(ptrExpr&& arg) - : arg_(std::move(arg)) - {} - - - std::string repr() const override { - std::string out = FIRST?"first":"last"; - out += '('; - out += arg_->repr(); - out += ')'; - return out; - } - - - Workframe evaluate_n(EvalContext &ctx) const override { - Workframe inputs = arg_->evaluate_n(ctx); - Workframe outputs(ctx); - Groupby gby = ctx.get_groupby(); - - if (!gby) gby = Groupby::single_group(ctx.nrows()); - - int32_t n = FIRST?0:-1; - - for (size_t i = 0; i < inputs.ncols(); ++i) { - auto coli = inputs.retrieve_column(i); - outputs.add_column( - evaluate1(std::move(coli), gby, n), - inputs.retrieve_name(i), - Grouping::GtoONE - ); - } - return outputs; - } - - - Column evaluate1(Column&& col, const Groupby& gby, const int32_t n) const { - SType stype = col.stype(); - switch (stype) { - case SType::VOID: return Column(new ConstNa_ColumnImpl(gby.size())); - case SType::BOOL: - case SType::INT8: return make(std::move(col), gby, n); - case SType::INT16: return make(std::move(col), gby, n); - case SType::DATE32: - case SType::INT32: return make(std::move(col), gby, n); - case SType::TIME64: - case SType::INT64: return make(std::move(col), gby, n); - case SType::FLOAT32: return make(std::move(col), gby, n); - case SType::FLOAT64: return make(std::move(col), gby, n); - case SType::STR32: return make(std::move(col), gby, n); - case SType::STR64: return make(std::move(col), gby, n); - default: - throw TypeError() - << "Invalid column of type `" << stype << "` in " << repr(); - } - } - - - template - Column make(Column&& col, const Groupby& gby, int32_t n) const { - return Column(new Nth_ColumnImpl(std::move(col), gby, n)); - } - -}; - - -static py::oobj pyfn_first(const py::XArgs& args) { - auto cols = args[0].to_oobj(); - return PyFExpr::make(new FExpr_FirstLast(as_fexpr(cols))); -} - - -DECLARE_PYFN(&pyfn_first) - ->name("first") - ->docs(doc_dt_first) - ->arg_names({"cols"}) - ->n_positional_args(1); - - -static py::oobj pyfn_last(const py::XArgs& args) { - auto cols = args[0].to_oobj(); - return PyFExpr::make(new FExpr_FirstLast(as_fexpr(cols))); -} - - -DECLARE_PYFN(&pyfn_last) - ->name("last") - ->docs(doc_dt_last) - ->arg_names({"cols"}) - ->n_positional_args(1); - - - -}} // dt::expr diff --git a/src/core/expr/fexpr_nth.cc b/src/core/expr/fexpr_nth.cc index db0587e02a..d4c88a6d2b 100644 --- a/src/core/expr/fexpr_nth.cc +++ b/src/core/expr/fexpr_nth.cc @@ -29,25 +29,25 @@ namespace dt { namespace expr { +template class FExpr_Nth : public FExpr_Func { private: ptrExpr arg_; int32_t n_; - size_t : 32; public: - FExpr_Nth(ptrExpr&& arg, int32_t n) - : arg_(std::move(arg)), - n_(n) - {} - + FExpr_Nth(ptrExpr&& arg, py::oobj n) + : arg_(std::move(arg)) + {n_ = n.to_int32_strict();} std::string repr() const override { std::string out = "nth"; out += '('; out += arg_->repr(); - out += ", n="; + out += ", nth="; out += std::to_string(n_); + out += ", skipna="; + out += SKIPNA? "True" : "False"; out += ')'; return out; } @@ -103,22 +103,29 @@ class FExpr_Nth : public FExpr_Func { static py::oobj pyfn_nth(const py::XArgs& args) { - auto cols = args[0].to_oobj(); - auto n = args[1].to(0); - - return PyFExpr::make(new FExpr_Nth(as_fexpr(cols), n)); - - + auto arg = args[0].to_oobj(); + auto n = args[1].to_oobj(); + auto skipna = args[2].to(false); + if (!n.is_int()) { + throw TypeError() << "The argument for the `nth` parameter " + <<"in function datatable.nth() should be an integer, " + <<"instead got "<(as_fexpr(arg), n)); + } else { + return PyFExpr::make(new FExpr_Nth(as_fexpr(arg), n)); + } } DECLARE_PYFN(&pyfn_nth) ->name("nth") ->docs(doc_dt_nth) - ->arg_names({"cols", "n"}) + ->arg_names({"cols", "nth", "skipna"}) ->n_positional_args(1) - ->n_positional_or_keyword_args(1) - ->n_required_args(1); + ->n_positional_or_keyword_args(2) + ->n_required_args(2); }} // dt::expr diff --git a/src/core/expr/head_reduce_unary.cc b/src/core/expr/head_reduce_unary.cc index 41b84174be..ac3f1b6c13 100644 --- a/src/core/expr/head_reduce_unary.cc +++ b/src/core/expr/head_reduce_unary.cc @@ -711,6 +711,8 @@ Workframe Head_Reduce_Unary::evaluate_n( if (inputs.get_grouping_mode() == Grouping::GtoALL) { switch (op) { case Op::STDEV: fn = compute_sd; break; + case Op::FIRST: fn = compute_firstlast; break; + case Op::LAST: fn = compute_firstlast; break; case Op::COUNT: fn = compute_count; break; case Op::COUNTNA:fn = compute_countna; break; case Op::MEDIAN: fn = compute_median; break; @@ -725,6 +727,8 @@ Workframe Head_Reduce_Unary::evaluate_n( case Op::LAST: fn = compute_gfirstlast; break; case Op::MIN: case Op::MAX: + case Op::FIRST: + case Op::LAST: fn = compute_gfirstlast; break; case Op::COUNT: fn = compute_gcount; break; case Op::COUNTNA:fn = compute_gcount; break; case Op::MEDIAN: fn = compute_gmedian; break; diff --git a/src/core/expr/op.h b/src/core/expr/op.h index e99503dfbc..6ca51b8f29 100644 --- a/src/core/expr/op.h +++ b/src/core/expr/op.h @@ -67,6 +67,8 @@ enum class Op : size_t { MIN, // head_reduce_unary.cc MAX, // head_reduce_unary.cc STDEV, // head_reduce_unary.cc + FIRST, // head_reduce_unary.cc + LAST, // head_reduce_unary.cc SUM, // head_reduce_unary.cc COUNT, // head_reduce_unary.cc COUNT0, // head_reduce_nullary.cc diff --git a/src/datatable/__init__.py b/src/datatable/__init__.py index 7cddfbfb16..afe1e89a51 100644 --- a/src/datatable/__init__.py +++ b/src/datatable/__init__.py @@ -37,7 +37,6 @@ cut, FExpr, fillna, - first, fread, ifelse, init_styles, diff --git a/src/datatable/expr/expr.py b/src/datatable/expr/expr.py index cbc4aa5f71..a91782d4b7 100644 --- a/src/datatable/expr/expr.py +++ b/src/datatable/expr/expr.py @@ -52,6 +52,8 @@ class OpCodes(enum.Enum): # Reducers STDEV = 404 + FIRST = 405 + LAST = 406 COUNT = 408 COUNT0 = 409 MEDIAN = 410 diff --git a/src/datatable/expr/reduce.py b/src/datatable/expr/reduce.py index a53cd86fac..a8ee39bfe9 100644 --- a/src/datatable/expr/reduce.py +++ b/src/datatable/expr/reduce.py @@ -30,6 +30,8 @@ "corr", "count", "cov", + "first", + "last", "max", "median", "min", @@ -59,16 +61,16 @@ def countna(iterable=None): def first(iterable): - if isinstance(iterable, core.FExpr): - return core.first(iterable) + if isinstance(iterable, (Expr, core.FExpr)): + return Expr(OpCodes.FIRST, (iterable,)) else: for x in iterable: return x def last(iterable): - if isinstance(iterable, core.FExpr): - return core.last(iterable) + if isinstance(iterable, (Expr, core.FExpr)): + return Expr(OpCodes.LAST, (iterable,)) else: try: for x in reversed(iterable): diff --git a/tests/dt/test-nth.py b/tests/dt/test-nth.py index 48a32405a4..1c6fb7dd91 100644 --- a/tests/dt/test-nth.py +++ b/tests/dt/test-nth.py @@ -49,11 +49,11 @@ def test_nth_no_argument(): #------------------------------------------------------------------------------- def test_nth_str(): - assert str(nth(f.A, n=1)) == "FExpr<" + nth.__name__ + "(f.A, n=1)>" - assert str(nth(f.A, n=1) + 1) == "FExpr<" + nth.__name__ + "(f.A, n=1) + 1>" - assert str(nth(f.A + f.B, n=1)) == "FExpr<" + nth.__name__ + "(f.A + f.B, n=1)>" - assert str(nth(f.B, 1)) == "FExpr<" + nth.__name__ + "(f.B, n=1)>" - assert str(nth(f[:2], 1)) == "FExpr<"+ nth.__name__ + "(f[:2], n=1)>" + assert str(nth(f.A, nth=1)) == "FExpr<" + nth.__name__ + "(f.A, nth=1, skipna=False)>" + assert str(nth(f.A, nth=1, skipna=True) + 1) == "FExpr<" + nth.__name__ + "(f.A, nth=1, skipna=True) + 1>" + assert str(nth(f.A + f.B, nth = 1)) == "FExpr<" + nth.__name__ + "(f.A + f.B, nth=1, skipna=False)>" + assert str(nth(f.B, 1, True)) == "FExpr<" + nth.__name__ + "(f.B, nth=1, skipna=True)>" + assert str(nth(f[:2], 1)) == "FExpr<"+ nth.__name__ + "(f[:2], nth=1, skipna=False)>" def test_nth_empty_frame(): @@ -74,6 +74,11 @@ def test_nth_void(): DT = dt.Frame([None, None, None]) DT_nth = DT[:, nth(f[:], 0)] assert_equals(DT_nth, DT[0, :]) + +def test_nth_void_skipna(): + DT = dt.Frame([None, None, None]) + DT_nth = DT[:, nth(f[:], 0, True)] + assert_equals(DT_nth, DT[0, :]) def test_nth_trivial(): @@ -84,6 +89,14 @@ def test_nth_trivial(): assert_equals(DT, DT_nth) +def test_nth_trivial_skipna(): + DT = dt.Frame([0]/dt.int64) + nth_fexpr = nth(f[:], nth = -1, skipna=True) + DT_nth = DT[:, nth_fexpr] + assert isinstance(nth_fexpr, FExpr) + assert_equals(DT, DT_nth) + + def test_nth_bool(): DT = dt.Frame([None, False, None, True, False, True]) DT_nth = DT[:, [nth(f[:], n=1), @@ -93,6 +106,17 @@ def test_nth_bool(): assert_equals(DT_nth, DT_ref) +def test_nth_bool_skipna(): + DT = dt.Frame([None, False, None, True, False, True]) + DT_nth = DT[:, [nth(f[:], nth = 0, skipna=True), + nth(f[:], nth = -1, skipna=True), + nth(f[:], nth = 2,skipna=True)]] + DT_ref = dt.Frame([[False], [True], [True]]) + + assert_equals(DT_nth, DT_ref) + + + def test_nth_small(): DT = dt.Frame([None, 3, None, 4]) DT_nth = DT[:, [nth(f[:], n=1), @@ -118,3 +142,16 @@ def test_nth_grouped(): 'C2':[136, None, 91 ], }) assert_equals(DT_nth, DT_ref) + + +def test_nth_grouped_skipna(): + DT = dt.Frame([[15, None, 136, 93, 743, None, None, 91], + ['a','a','a','b','b','c','c','c']]) + DT_nth = DT[:, [nth(f[:], nth = 0, skipna=True), nth(f[:], nth = 2,skipna=True)], by(f[-1])] + DT_ref = dt.Frame({ + 'C1':['a','b','c',], + 'C2':[15,93, 91], + 'C3':[136, None,91 ], + }) + assert_equals(DT_nth, DT_ref) + diff --git a/tests/test-f.py b/tests/test-f.py index b407026215..6a66bbf45d 100644 --- a/tests/test-f.py +++ b/tests/test-f.py @@ -501,11 +501,13 @@ def test_codes(): def test_nth(): - assert str(dt.nth(f.A, n=0)) == str(f.A.nth(n=0)) - assert str(dt.nth(f.A, n=1)) == str(f.A.nth(n=1)) - assert str(dt.nth(f[:], -1)) == str(f[:].nth(-1)) + assert str(dt.nth(f.A, nth=0)) == str(f.A.nth(nth=0, skipna=False)) + assert str(dt.nth(f.A, nth=1, skipna=True)) == str(f.A.nth(nth=1, skipna=True)) + assert str(dt.nth(f[:], -1, skipna=False)) == str(f[:].nth(-1, False)) DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) - assert_equals(DT[:, f.A.nth(n=1)], DT[:, dt.nth(f.A, 1)]) - assert_equals(DT[:, f.A.nth(n=0)], DT[:, dt.nth(f.A, 0)]) - assert_equals(DT[:, f.A.nth(n=0)], DT[:, dt.nth(f.A, 0)]) + assert_equals(DT[:, f.A.nth(nth=1, skipna=False)], DT[:, dt.nth(f.A, 1, False)]) + assert_equals(DT[:, f.A.nth(nth=0, skipna=True)], DT[:, dt.nth(f.A, 0, True)]) + assert_equals(DT[:, f.A.nth(nth=0, skipna=True)], DT[:, dt.nth(f.A, 0, skipna=True)]) + + From d89578b1f804b50376c939d355274e7c3d2de1fb Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 4 Jan 2023 07:29:53 +1100 Subject: [PATCH 04/20] whitespace --- tests/test-f.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test-f.py b/tests/test-f.py index 6a66bbf45d..7479535b00 100644 --- a/tests/test-f.py +++ b/tests/test-f.py @@ -509,5 +509,3 @@ def test_nth(): assert_equals(DT[:, f.A.nth(nth=0, skipna=True)], DT[:, dt.nth(f.A, 0, True)]) assert_equals(DT[:, f.A.nth(nth=0, skipna=True)], DT[:, dt.nth(f.A, 0, skipna=True)]) - - From 77ddd4b332b8a159c73fc72e8c66efa7dd0b25f2 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Wed, 4 Jan 2023 10:02:24 +1100 Subject: [PATCH 05/20] Update docs/releases/v1.1.0.rst Co-authored-by: oleksiyskononenko <35204136+oleksiyskononenko@users.noreply.github.com> --- docs/releases/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/releases/v1.1.0.rst b/docs/releases/v1.1.0.rst index 95cd544754..5528904df1 100644 --- a/docs/releases/v1.1.0.rst +++ b/docs/releases/v1.1.0.rst @@ -114,7 +114,7 @@ -[new] Added reducer functions :func:`dt.countna()` and :func:`dt.nunique()`. [#2999] - -[new] Added function :func:`dt.nth()` to retrieve specified row. [#3128] + -[new] Added function :func:`dt.nth()` to retrieve the n-th row. [#3128] -[new] Class :class:`dt.FExpr` now has method :meth:`.nunique()`, which behaves exactly as the equivalent base level function :func:`dt.nunique()`. From 2f645b51117a5fea92a38a1b5ac63ae998a64b48 Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 4 Jan 2023 11:07:37 +1100 Subject: [PATCH 06/20] fix test failures --- docs/api/dt/nth.rst | 26 ++++++++------------------ docs/api/fexpr/nth.rst | 4 ++-- src/core/expr/fexpr_nth.cc | 4 ++-- tests/dt/test-nth.py | 27 ++++++++++++++------------- 4 files changed, 26 insertions(+), 35 deletions(-) diff --git a/docs/api/dt/nth.rst b/docs/api/dt/nth.rst index 40d2ef84a5..7110a40053 100644 --- a/docs/api/dt/nth.rst +++ b/docs/api/dt/nth.rst @@ -3,7 +3,7 @@ :src: src/core/expr/fexpr_nth.cc pyfn_nth :cvar: doc_dt_nth :tests: tests/test-reduce.py - :signature: nth(cols, n) + :signature: nth(cols, n=0, skipna=None) Return the ``nth`` row for an ``Expr``. @@ -12,6 +12,13 @@ cols: FExpr | iterable Input columns or an iterable. + n: int + The number of the row to be returned. + + skipna: str + Drop the nulls before counting which row is the nth row. + Needs to be ``None``, ``any``, or ``all``. + return: Expr | ... One-row f-expression that has the same names, stypes and number of columns as `cols`. @@ -53,23 +60,6 @@ -- + ----- ----- ----- 0 | 2 3 1 - Replicate :func:`first()`:: - - >>> df[:, dt.nth(f.A, n=0)] - | A - | int32 - -- + ----- - 0 | 1 - [1 row x 1 column] - - Replicate :func:`last()`:: - - >>> df[:, dt.nth(f.A, n=-1)] - | A - | int32 - -- + ----- - 0 | 2 - [1 row x 1 column] In the presence of :func:`by()`, it returns the nth row of the specified columns per group:: diff --git a/docs/api/fexpr/nth.rst b/docs/api/fexpr/nth.rst index 64183476e4..6b824f54f3 100644 --- a/docs/api/fexpr/nth.rst +++ b/docs/api/fexpr/nth.rst @@ -2,6 +2,6 @@ .. xmethod:: datatable.FExpr.nth :src: src/core/expr/fexpr.cc PyFExpr::nth :cvar: doc_FExpr_nth - :signature: nth(n) + :signature: nth(n, skipna) - Equivalent to :func:`dt.nth(cols, n)`. + Equivalent to :func:`dt.nth(cols, n, skipna)`. diff --git a/src/core/expr/fexpr_nth.cc b/src/core/expr/fexpr_nth.cc index d4c88a6d2b..3016a62552 100644 --- a/src/core/expr/fexpr_nth.cc +++ b/src/core/expr/fexpr_nth.cc @@ -44,7 +44,7 @@ class FExpr_Nth : public FExpr_Func { std::string out = "nth"; out += '('; out += arg_->repr(); - out += ", nth="; + out += ", n="; out += std::to_string(n_); out += ", skipna="; out += SKIPNA? "True" : "False"; @@ -122,7 +122,7 @@ static py::oobj pyfn_nth(const py::XArgs& args) { DECLARE_PYFN(&pyfn_nth) ->name("nth") ->docs(doc_dt_nth) - ->arg_names({"cols", "nth", "skipna"}) + ->arg_names({"cols", "n", "skipna"}) ->n_positional_args(1) ->n_positional_or_keyword_args(2) ->n_required_args(2); diff --git a/tests/dt/test-nth.py b/tests/dt/test-nth.py index 1c6fb7dd91..526d4c69ef 100644 --- a/tests/dt/test-nth.py +++ b/tests/dt/test-nth.py @@ -22,6 +22,7 @@ # IN THE SOFTWARE. #------------------------------------------------------------------------------- import pytest +import re from datatable import dt, f, nth, FExpr, by from tests import assert_equals @@ -30,10 +31,10 @@ #------------------------------------------------------------------------------- def test_nth_parameter_not_int(): - msg = r"Argument n in function datatable.nth\(\) should be an integer, " \ - "instead got " + msg = "The argument for the nth parameter in function datatable.nth() " \ + "should be an integer, instead got " DT = dt.Frame([1, 2, None, 4, 5]) - with pytest.raises(TypeError, match = msg): + with pytest.raises(TypeError, match = re.escape(msg)): DT[:, nth(f[0], '1')] @@ -49,11 +50,11 @@ def test_nth_no_argument(): #------------------------------------------------------------------------------- def test_nth_str(): - assert str(nth(f.A, nth=1)) == "FExpr<" + nth.__name__ + "(f.A, nth=1, skipna=False)>" - assert str(nth(f.A, nth=1, skipna=True) + 1) == "FExpr<" + nth.__name__ + "(f.A, nth=1, skipna=True) + 1>" - assert str(nth(f.A + f.B, nth = 1)) == "FExpr<" + nth.__name__ + "(f.A + f.B, nth=1, skipna=False)>" - assert str(nth(f.B, 1, True)) == "FExpr<" + nth.__name__ + "(f.B, nth=1, skipna=True)>" - assert str(nth(f[:2], 1)) == "FExpr<"+ nth.__name__ + "(f[:2], nth=1, skipna=False)>" + assert str(nth(f.A, n=1)) == "FExpr<" + nth.__name__ + "(f.A, n=1, skipna=False)>" + assert str(nth(f.A, n=1, skipna=True) + 1) == "FExpr<" + nth.__name__ + "(f.A, n=1, skipna=True) + 1>" + assert str(nth(f.A + f.B, n = 1)) == "FExpr<" + nth.__name__ + "(f.A + f.B, n=1, skipna=False)>" + assert str(nth(f.B, 1, True)) == "FExpr<" + nth.__name__ + "(f.B, n=1, skipna=True)>" + assert str(nth(f[:2], 1)) == "FExpr<"+ nth.__name__ + "(f[:2], n=1, skipna=False)>" def test_nth_empty_frame(): @@ -91,7 +92,7 @@ def test_nth_trivial(): def test_nth_trivial_skipna(): DT = dt.Frame([0]/dt.int64) - nth_fexpr = nth(f[:], nth = -1, skipna=True) + nth_fexpr = nth(f[:], n = -1, skipna=True) DT_nth = DT[:, nth_fexpr] assert isinstance(nth_fexpr, FExpr) assert_equals(DT, DT_nth) @@ -108,9 +109,9 @@ def test_nth_bool(): def test_nth_bool_skipna(): DT = dt.Frame([None, False, None, True, False, True]) - DT_nth = DT[:, [nth(f[:], nth = 0, skipna=True), - nth(f[:], nth = -1, skipna=True), - nth(f[:], nth = 2,skipna=True)]] + DT_nth = DT[:, [nth(f[:], n = 0, skipna=True), + nth(f[:], n = -1, skipna=True), + nth(f[:], n = 2,skipna=True)]] DT_ref = dt.Frame([[False], [True], [True]]) assert_equals(DT_nth, DT_ref) @@ -147,7 +148,7 @@ def test_nth_grouped(): def test_nth_grouped_skipna(): DT = dt.Frame([[15, None, 136, 93, 743, None, None, 91], ['a','a','a','b','b','c','c','c']]) - DT_nth = DT[:, [nth(f[:], nth = 0, skipna=True), nth(f[:], nth = 2,skipna=True)], by(f[-1])] + DT_nth = DT[:, [nth(f[:], n = 0, skipna=True), nth(f[:], n = 2,skipna=True)], by(f[-1])] DT_ref = dt.Frame({ 'C1':['a','b','c',], 'C2':[15,93, 91], From 691f0187bf5a4cb07c5ce9e1370957b68d524cfb Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 4 Jan 2023 11:09:15 +1100 Subject: [PATCH 07/20] fix test failures for test-f.py --- tests/test-f.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test-f.py b/tests/test-f.py index 7479535b00..c4913140b9 100644 --- a/tests/test-f.py +++ b/tests/test-f.py @@ -501,11 +501,11 @@ def test_codes(): def test_nth(): - assert str(dt.nth(f.A, nth=0)) == str(f.A.nth(nth=0, skipna=False)) - assert str(dt.nth(f.A, nth=1, skipna=True)) == str(f.A.nth(nth=1, skipna=True)) + assert str(dt.nth(f.A, n=0)) == str(f.A.nth(n=0, skipna=False)) + assert str(dt.nth(f.A, n=1, skipna=True)) == str(f.A.nth(n=1, skipna=True)) assert str(dt.nth(f[:], -1, skipna=False)) == str(f[:].nth(-1, False)) DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) - assert_equals(DT[:, f.A.nth(nth=1, skipna=False)], DT[:, dt.nth(f.A, 1, False)]) - assert_equals(DT[:, f.A.nth(nth=0, skipna=True)], DT[:, dt.nth(f.A, 0, True)]) - assert_equals(DT[:, f.A.nth(nth=0, skipna=True)], DT[:, dt.nth(f.A, 0, skipna=True)]) + assert_equals(DT[:, f.A.nth(n=1, skipna=False)], DT[:, dt.nth(f.A, 1, False)]) + assert_equals(DT[:, f.A.nth(n=0, skipna=True)], DT[:, dt.nth(f.A, 0, True)]) + assert_equals(DT[:, f.A.nth(n=0, skipna=True)], DT[:, dt.nth(f.A, 0, skipna=True)]) From 140fa82bdb8b45e23beb882090429a752d404205 Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 4 Jan 2023 11:10:56 +1100 Subject: [PATCH 08/20] fix test failures for test-f.py --- tests/dt/test-nth.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/dt/test-nth.py b/tests/dt/test-nth.py index 526d4c69ef..78ff156aa0 100644 --- a/tests/dt/test-nth.py +++ b/tests/dt/test-nth.py @@ -114,8 +114,7 @@ def test_nth_bool_skipna(): nth(f[:], n = 2,skipna=True)]] DT_ref = dt.Frame([[False], [True], [True]]) - assert_equals(DT_nth, DT_ref) - + assert_equals(DT_nth, DT_ref) def test_nth_small(): From 738668c8a611c31e796ef377747e5c58be9ab535 Mon Sep 17 00:00:00 2001 From: sam Date: Thu, 5 Jan 2023 09:30:58 +1100 Subject: [PATCH 09/20] mark pytest funcs --- tests/dt/test-nth.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/dt/test-nth.py b/tests/dt/test-nth.py index 78ff156aa0..3dcbcd14b8 100644 --- a/tests/dt/test-nth.py +++ b/tests/dt/test-nth.py @@ -106,7 +106,7 @@ def test_nth_bool(): DT_ref = dt.Frame([[False], [True], [None]/dt.bool8]) assert_equals(DT_nth, DT_ref) - +@pytest.mark.xfail(reason='skipna not implemented yet') def test_nth_bool_skipna(): DT = dt.Frame([None, False, None, True, False, True]) DT_nth = DT[:, [nth(f[:], n = 0, skipna=True), @@ -143,15 +143,15 @@ def test_nth_grouped(): }) assert_equals(DT_nth, DT_ref) - +@pytest.mark.xfail(reason='skipna not implemented yet') def test_nth_grouped_skipna(): DT = dt.Frame([[15, None, 136, 93, 743, None, None, 91], ['a','a','a','b','b','c','c','c']]) DT_nth = DT[:, [nth(f[:], n = 0, skipna=True), nth(f[:], n = 2,skipna=True)], by(f[-1])] DT_ref = dt.Frame({ 'C1':['a','b','c',], - 'C2':[15,93, 91], - 'C3':[136, None,91 ], + 'C0':[15,93, 91], + 'C2':[136, None,91 ], }) assert_equals(DT_nth, DT_ref) From 3371dfe8208d384373075a3638016a1c762c7455 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Fri, 6 Jan 2023 10:48:50 +1100 Subject: [PATCH 10/20] Update docs/api/dt/nth.rst Co-authored-by: oleksiyskononenko <35204136+oleksiyskononenko@users.noreply.github.com> --- docs/api/dt/nth.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/nth.rst b/docs/api/dt/nth.rst index 7110a40053..51ce105d7c 100644 --- a/docs/api/dt/nth.rst +++ b/docs/api/dt/nth.rst @@ -15,7 +15,7 @@ n: int The number of the row to be returned. - skipna: str + skipna: None | "any" | "all" Drop the nulls before counting which row is the nth row. Needs to be ``None``, ``any``, or ``all``. From d8fc26f7f26c6501f717a4cd9cf2e7c36119157d Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 11 Jan 2023 08:49:15 +1100 Subject: [PATCH 11/20] fix for grouped column --- src/core/column/nth.h | 7 ++++-- src/core/expr/fexpr_nth.cc | 44 ++++++++++++++++++++++---------------- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/core/column/nth.h b/src/core/column/nth.h index 220a3af0f1..17fba9dde1 100644 --- a/src/core/column/nth.h +++ b/src/core/column/nth.h @@ -30,14 +30,16 @@ class Nth_ColumnImpl : public Virtual_ColumnImpl { private: Column col_; Groupby gby_; + bool is_grouped_; int32_t n_; size_t : 32; public: - Nth_ColumnImpl(Column&& col, const Groupby& gby, int32_t n) + Nth_ColumnImpl(Column&& col, const Groupby& gby, bool is_grouped, int32_t n) : Virtual_ColumnImpl(gby.size(), col.stype()), col_(std::move(col)), gby_(gby), + is_grouped_(is_grouped), n_(n) { xassert(col_.can_be_read_as()); @@ -45,7 +47,7 @@ class Nth_ColumnImpl : public Virtual_ColumnImpl { ColumnImpl* clone() const override { - return new Nth_ColumnImpl(Column(col_), gby_, n_); + return new Nth_ColumnImpl(Column(col_), gby_, is_grouped_, n_); } @@ -72,6 +74,7 @@ class Nth_ColumnImpl : public Virtual_ColumnImpl { bool isvalid = false; if (ni >= i0 && ni < i1){ + ni = is_grouped_?i0:ni; isvalid = col_.get_element(ni, out); } return isvalid; diff --git a/src/core/expr/fexpr_nth.cc b/src/core/expr/fexpr_nth.cc index 3016a62552..aecde185d9 100644 --- a/src/core/expr/fexpr_nth.cc +++ b/src/core/expr/fexpr_nth.cc @@ -54,39 +54,45 @@ class FExpr_Nth : public FExpr_Func { Workframe evaluate_n(EvalContext &ctx) const override { - Workframe inputs = arg_->evaluate_n(ctx); + Workframe wf = arg_->evaluate_n(ctx); Workframe outputs(ctx); Groupby gby = ctx.get_groupby(); if (!gby) gby = Groupby::single_group(ctx.nrows()); - for (size_t i = 0; i < inputs.ncols(); ++i) { - auto coli = inputs.retrieve_column(i); - outputs.add_column( - evaluate1(std::move(coli), gby, n_), - inputs.retrieve_name(i), - Grouping::GtoONE - ); + for (size_t i = 0; i < wf.ncols(); ++i) { + bool is_grouped = ctx.has_group_column( + wf.get_frame_id(i), + wf.get_column_id(i) + ); + Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped, n_); + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + // auto coli = inputs.retrieve_column(i); + // outputs.add_column( + // evaluate1(std::move(coli), gby, n_), + // inputs.retrieve_name(i), + // Grouping::GtoONE + // ); } return outputs; } - Column evaluate1(Column&& col, const Groupby& gby, const int32_t n) const { + Column evaluate1(Column&& col, const Groupby& gby, bool is_grouped, const int32_t n) const { SType stype = col.stype(); switch (stype) { case SType::VOID: return Column(new ConstNa_ColumnImpl(gby.size())); case SType::BOOL: - case SType::INT8: return make(std::move(col), gby, n); - case SType::INT16: return make(std::move(col), gby, n); + case SType::INT8: return make(std::move(col), gby, is_grouped, n); + case SType::INT16: return make(std::move(col), gby, is_grouped, n); case SType::DATE32: - case SType::INT32: return make(std::move(col), gby, n); + case SType::INT32: return make(std::move(col), gby, is_grouped, n); case SType::TIME64: - case SType::INT64: return make(std::move(col), gby, n); - case SType::FLOAT32: return make(std::move(col), gby, n); - case SType::FLOAT64: return make(std::move(col), gby, n); - case SType::STR32: return make(std::move(col), gby, n); - case SType::STR64: return make(std::move(col), gby, n); + case SType::INT64: return make(std::move(col), gby, is_grouped, n); + case SType::FLOAT32: return make(std::move(col), gby, is_grouped, n); + case SType::FLOAT64: return make(std::move(col), gby, is_grouped, n); + case SType::STR32: return make(std::move(col), gby, is_grouped, n); + case SType::STR64: return make(std::move(col), gby, is_grouped,n); default: throw TypeError() << "Invalid column of type `" << stype << "` in " << repr(); @@ -95,8 +101,8 @@ class FExpr_Nth : public FExpr_Func { template - Column make(Column&& col, const Groupby& gby, int32_t n) const { - return Column(new Nth_ColumnImpl(std::move(col), gby, n)); + Column make(Column&& col, const Groupby& gby, bool is_grouped, int32_t n) const { + return Column(new Nth_ColumnImpl(std::move(col), gby, is_grouped, n)); } }; From 7f6b10fa4d2a34cefc149ae6ea1df54dbc7172ac Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 11 Jan 2023 08:54:11 +1100 Subject: [PATCH 12/20] add test for grouped column --- tests/dt/test-nth.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/dt/test-nth.py b/tests/dt/test-nth.py index 3dcbcd14b8..c1d53671b5 100644 --- a/tests/dt/test-nth.py +++ b/tests/dt/test-nth.py @@ -143,6 +143,13 @@ def test_nth_grouped(): }) assert_equals(DT_nth, DT_ref) + +def test_nth_grouped_by(): + """Test nth when called on the `by` column""" + DT = dt.Frame([0, 1, 0]) + DT_nth = DT[:, dt.nth(f.C0, 0), by(f.C0)] + DT_ref = dt.Frame({'C0':[0,1], 'C1':[0,1]}) + @pytest.mark.xfail(reason='skipna not implemented yet') def test_nth_grouped_skipna(): DT = dt.Frame([[15, None, 136, 93, 743, None, None, 91], From 6f0c35cffecbaa9ce0c614784dd69581a8ce1973 Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 11 Jan 2023 08:54:29 +1100 Subject: [PATCH 13/20] add test for grouped column --- tests/dt/test-nth.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/dt/test-nth.py b/tests/dt/test-nth.py index c1d53671b5..c96360ce1c 100644 --- a/tests/dt/test-nth.py +++ b/tests/dt/test-nth.py @@ -144,8 +144,7 @@ def test_nth_grouped(): assert_equals(DT_nth, DT_ref) -def test_nth_grouped_by(): - """Test nth when called on the `by` column""" +def test_nth_grouped_column(): DT = dt.Frame([0, 1, 0]) DT_nth = DT[:, dt.nth(f.C0, 0), by(f.C0)] DT_ref = dt.Frame({'C0':[0,1], 'C1':[0,1]}) From 2c01928f0e84a13787bb32a6f8ef3bdbd7f4da2e Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 11 Jan 2023 20:26:02 +1100 Subject: [PATCH 14/20] add fexpr.rst --- docs/api/fexpr.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/api/fexpr.rst b/docs/api/fexpr.rst index da6fe0448a..e1063a90d1 100644 --- a/docs/api/fexpr.rst +++ b/docs/api/fexpr.rst @@ -327,6 +327,7 @@ .median() .min() .nunique() + .nth() .prod() .re_match() .remove() From 82c1876c102e98e68775c32f826c7c1f84d6a60f Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 15:21:42 +1100 Subject: [PATCH 15/20] default 0 for nth --- src/core/expr/fexpr.cc | 3 +-- src/core/expr/fexpr_nth.cc | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/expr/fexpr.cc b/src/core/expr/fexpr.cc index 8d5f95f467..8b6458c9ba 100644 --- a/src/core/expr/fexpr.cc +++ b/src/core/expr/fexpr.cc @@ -533,8 +533,7 @@ DECLARE_METHOD(&PyFExpr::min) oobj PyFExpr::nth(const XArgs& args) { auto nthFn = oobj::import("datatable", "nth"); - oobj n = args[0].to_oobj() ? args[0].to_oobj() - : py::oint(0); + auto n = args[1].to(py::oint(0)); oobj skipna = args[1].to_oobj_or_none(); return nthFn.call({this, n, skipna}); } diff --git a/src/core/expr/fexpr_nth.cc b/src/core/expr/fexpr_nth.cc index aecde185d9..7f98fac34a 100644 --- a/src/core/expr/fexpr_nth.cc +++ b/src/core/expr/fexpr_nth.cc @@ -110,7 +110,7 @@ class FExpr_Nth : public FExpr_Func { static py::oobj pyfn_nth(const py::XArgs& args) { auto arg = args[0].to_oobj(); - auto n = args[1].to_oobj(); + auto n = args[1].to(py::oint(0)); auto skipna = args[2].to(false); if (!n.is_int()) { throw TypeError() << "The argument for the `nth` parameter " From 8e1d633385c090be9468bde11a42d26e476f5496 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 12 Mar 2023 15:24:06 +1100 Subject: [PATCH 16/20] fix fexpr.cc --- src/core/expr/fexpr.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/expr/fexpr.cc b/src/core/expr/fexpr.cc index 8b6458c9ba..8d5f95f467 100644 --- a/src/core/expr/fexpr.cc +++ b/src/core/expr/fexpr.cc @@ -533,7 +533,8 @@ DECLARE_METHOD(&PyFExpr::min) oobj PyFExpr::nth(const XArgs& args) { auto nthFn = oobj::import("datatable", "nth"); - auto n = args[1].to(py::oint(0)); + oobj n = args[0].to_oobj() ? args[0].to_oobj() + : py::oint(0); oobj skipna = args[1].to_oobj_or_none(); return nthFn.call({this, n, skipna}); } From 0bc78d96d881bd9e44e2fdebd61cfe759204a2f2 Mon Sep 17 00:00:00 2001 From: samukweku Date: Tue, 18 Apr 2023 08:58:54 +1000 Subject: [PATCH 17/20] early work for skipna --- src/core/expr/fexpr.cc | 1 - src/core/expr/fexpr_nth.cc | 97 +++++++++++++++++++++++++----- src/core/expr/head_reduce_unary.cc | 2 - 3 files changed, 81 insertions(+), 19 deletions(-) diff --git a/src/core/expr/fexpr.cc b/src/core/expr/fexpr.cc index 8d5f95f467..1c23ea139e 100644 --- a/src/core/expr/fexpr.cc +++ b/src/core/expr/fexpr.cc @@ -543,7 +543,6 @@ DECLARE_METHOD(&PyFExpr::nth) ->name("nth") ->arg_names({"n", "skipna"}) ->n_positional_or_keyword_args(2) - ->n_required_args(1) ->docs(dt::doc_FExpr_nth); diff --git a/src/core/expr/fexpr_nth.cc b/src/core/expr/fexpr_nth.cc index 7f98fac34a..63b4690204 100644 --- a/src/core/expr/fexpr_nth.cc +++ b/src/core/expr/fexpr_nth.cc @@ -20,16 +20,19 @@ // IN THE SOFTWARE. //------------------------------------------------------------------------------ #include "column/const.h" +#include "column/func_binary.h" +#include "column/isna.h" #include "column/nth.h" #include "documentation.h" #include "expr/fexpr_func.h" #include "expr/eval_context.h" #include "python/xargs.h" +#include namespace dt { namespace expr { -template +template class FExpr_Nth : public FExpr_Func { private: ptrExpr arg_; @@ -46,11 +49,57 @@ class FExpr_Nth : public FExpr_Func { out += arg_->repr(); out += ", n="; out += std::to_string(n_); - out += ", skipna="; - out += SKIPNA? "True" : "False"; + if (SKIPNA == 0) { + out += ", skipna=None"; + } else if (SKIPNA == 1) { + out += ", skipna=any"; + } else if (SKIPNA == 2) { + out += ", skipna=all"; + } out += ')'; return out; } + + static Column make_isna_col(Column&& col) { + switch (col.stype()) { + case SType::VOID: return Const_ColumnImpl::make_bool_column(col.nrows(), true); + case SType::BOOL: + case SType::INT8: return Column(new Isna_ColumnImpl(std::move(col))); + case SType::INT16: return Column(new Isna_ColumnImpl(std::move(col))); + case SType::DATE32: + case SType::INT32: return Column(new Isna_ColumnImpl(std::move(col))); + case SType::TIME64: + case SType::INT64: return Column(new Isna_ColumnImpl(std::move(col))); + case SType::FLOAT32: return Column(new Isna_ColumnImpl(std::move(col))); + case SType::FLOAT64: return Column(new Isna_ColumnImpl(std::move(col))); + case SType::STR32: + case SType::STR64: return Column(new Isna_ColumnImpl(std::move(col))); + default: throw RuntimeError(); + } + } + + template + static Column make_bool_col(Column&& a, Column&& b, SType BOOL) { + xassert(compatible_type(stype)); + size_t nrows = a.nrows(); + a.cast_inplace(SType::BOOL); + b.cast_inplace(SType::BOOL); + if (SKIPNA == 1) { + return Column(new FuncBinary1_ColumnImpl( + std::move(a), std::move(b), + [](T x, T y){ return x | y; }, + nrows, SType::BOOL + )); + } + if (SKIPNA == 2) { + return Column(new FuncBinary1_ColumnImpl( + std::move(a), std::move(b), + [](T x, T y){ return x & y; }, + nrows, SType::BOOL + )); + } + + } Workframe evaluate_n(EvalContext &ctx) const override { @@ -67,16 +116,12 @@ class FExpr_Nth : public FExpr_Func { ); Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped, n_); outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); - // auto coli = inputs.retrieve_column(i); - // outputs.add_column( - // evaluate1(std::move(coli), gby, n_), - // inputs.retrieve_name(i), - // Grouping::GtoONE - // ); } return outputs; } + + Column evaluate1(Column&& col, const Groupby& gby, bool is_grouped, const int32_t n) const { SType stype = col.stype(); @@ -111,17 +156,37 @@ class FExpr_Nth : public FExpr_Func { static py::oobj pyfn_nth(const py::XArgs& args) { auto arg = args[0].to_oobj(); auto n = args[1].to(py::oint(0)); - auto skipna = args[2].to(false); + auto skipna = args[2].to_oobj_or_none(); + if (!skipna.is_none()) { + if (!skipna.is_string()) { + throw TypeError() << "The argument for the `skipna` parameter " + <<"in function datatable.nth() should either be None, " + <<"or a string, instead got "<(as_fexpr(arg), n)); - } else { - return PyFExpr::make(new FExpr_Nth(as_fexpr(arg), n)); - } + if (!skipna.is_none()) { + std::string skip_na = skipna.to_string(); + if (skip_na == "any") { + return PyFExpr::make(new FExpr_Nth<1>(as_fexpr(arg), n)); + } + if (skip_na == "all") { + return PyFExpr::make(new FExpr_Nth<2>(as_fexpr(arg), n)); + } + + } + return PyFExpr::make(new FExpr_Nth<0>(as_fexpr(arg), n)); } @@ -131,7 +196,7 @@ DECLARE_PYFN(&pyfn_nth) ->arg_names({"cols", "n", "skipna"}) ->n_positional_args(1) ->n_positional_or_keyword_args(2) - ->n_required_args(2); + ->n_required_args(1); }} // dt::expr diff --git a/src/core/expr/head_reduce_unary.cc b/src/core/expr/head_reduce_unary.cc index ac3f1b6c13..99829c7d85 100644 --- a/src/core/expr/head_reduce_unary.cc +++ b/src/core/expr/head_reduce_unary.cc @@ -723,8 +723,6 @@ Workframe Head_Reduce_Unary::evaluate_n( } else { switch (op) { case Op::STDEV: fn = compute_gsd; break; - case Op::FIRST: - case Op::LAST: fn = compute_gfirstlast; break; case Op::MIN: case Op::MAX: case Op::FIRST: From a1f8575ede8f2a083fadf16b519806344a3d520b Mon Sep 17 00:00:00 2001 From: samukweku Date: Mon, 24 Apr 2023 11:48:07 +1000 Subject: [PATCH 18/20] implement skipna --- src/core/column/nth.h | 6 +- src/core/expr/fexpr_nth.cc | 165 ++++++++++++++++++++------ tests/dt/test-nth.py | 229 +++++++++++++++++++++++++++---------- tests/test-f.py | 12 +- 4 files changed, 310 insertions(+), 102 deletions(-) diff --git a/src/core/column/nth.h b/src/core/column/nth.h index 17fba9dde1..a2c13868ed 100644 --- a/src/core/column/nth.h +++ b/src/core/column/nth.h @@ -71,16 +71,14 @@ class Nth_ColumnImpl : public Virtual_ColumnImpl { // wrap-around. size_t ni = (n_ >= 0)? static_cast(n_) + i0 : static_cast(n_) + i1; - bool isvalid = false; if (ni >= i0 && ni < i1){ - ni = is_grouped_?i0:ni; + ni = is_grouped_?i:ni; isvalid = col_.get_element(ni, out); } return isvalid; } -}; - +}; } // namespace dt #endif diff --git a/src/core/expr/fexpr_nth.cc b/src/core/expr/fexpr_nth.cc index 63b4690204..ec86546aee 100644 --- a/src/core/expr/fexpr_nth.cc +++ b/src/core/expr/fexpr_nth.cc @@ -20,14 +20,16 @@ // IN THE SOFTWARE. //------------------------------------------------------------------------------ #include "column/const.h" -#include "column/func_binary.h" +#include "column/func_nary.h" +#include "column/latent.h" #include "column/isna.h" #include "column/nth.h" #include "documentation.h" +#include "expr/fnary/fnary.h" #include "expr/fexpr_func.h" #include "expr/eval_context.h" +#include "parallel/api.h" #include "python/xargs.h" -#include namespace dt { namespace expr { @@ -78,49 +80,148 @@ class FExpr_Nth : public FExpr_Func { } } - template - static Column make_bool_col(Column&& a, Column&& b, SType BOOL) { - xassert(compatible_type(stype)); - size_t nrows = a.nrows(); - a.cast_inplace(SType::BOOL); - b.cast_inplace(SType::BOOL); + static bool op_rowany(size_t i, int8_t* out, const colvec& columns) { + for (const auto& col : columns) { + int8_t x; + bool xvalid = col.get_element(i, &x); + if (xvalid && x) { + *out = 1; + return true; + } + } + *out = 0; + return true; + } + + static bool op_rowall(size_t i, int8_t* out, const colvec& columns) { + for (const auto& col : columns) { + int8_t x; + bool xvalid = col.get_element(i, &x); + if (!xvalid || x == 0) { + *out = 0; + return true; + } + } + *out = 1; + return true; + } + + static Column make_bool_column(colvec&& columns, const size_t nrows, const size_t ncols) { if (SKIPNA == 1) { - return Column(new FuncBinary1_ColumnImpl( - std::move(a), std::move(b), - [](T x, T y){ return x | y; }, - nrows, SType::BOOL - )); - } - if (SKIPNA == 2) { - return Column(new FuncBinary1_ColumnImpl( - std::move(a), std::move(b), - [](T x, T y){ return x & y; }, - nrows, SType::BOOL - )); + return Column(new FuncNary_ColumnImpl( + std::move(columns), op_rowany, nrows, SType::BOOL)); } + return Column(new FuncNary_ColumnImpl( + std::move(columns), op_rowall, nrows, SType::BOOL)); + } + + template + static RowIndex rowindex_nth(Column& col, const Groupby& gby) { + Buffer buf = Buffer::mem(col.nrows() * sizeof(int32_t)); + auto indices = static_cast(buf.xptr()); + Latent_ColumnImpl::vivify(col); + + dt::parallel_for_dynamic( + gby.size(), + [&](size_t gi) { + size_t i1, i2; + gby.get_group(gi, &i1, &i2); + size_t n = POSITIVE? i1: i2 - 1; + int8_t value; + bool is_valid; + + if (POSITIVE) { + for (size_t i = i1; i < i2; ++i) { + is_valid = col.get_element(i, &value); + if (value==0 && is_valid) { + indices[n] = static_cast(i); + n += 1; + } + } + for (size_t j = n; j < i2; ++j){ + indices[j] = RowIndex::NA; + } + } else { + for (size_t i = i2; i-- > i1;) { + is_valid = col.get_element(i, &value); + if (value==0 && is_valid) { + indices[n] = static_cast(i); + n -= 1; + } + } + for (size_t j = n+1; j-- > i1;){ + indices[j] = RowIndex::NA; + } + } + } + ); + + return RowIndex(std::move(buf), RowIndex::ARR32|RowIndex::SORTED); } - Workframe evaluate_n(EvalContext &ctx) const override { Workframe wf = arg_->evaluate_n(ctx); Workframe outputs(ctx); Groupby gby = ctx.get_groupby(); - if (!gby) gby = Groupby::single_group(ctx.nrows()); + // Check if the input frame is grouped as `GtoONE` + bool is_wf_grouped = (wf.get_grouping_mode() == Grouping::GtoONE); - for (size_t i = 0; i < wf.ncols(); ++i) { - bool is_grouped = ctx.has_group_column( - wf.get_frame_id(i), - wf.get_column_id(i) - ); - Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped, n_); - outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + if (is_wf_grouped) { + // Check if the input frame columns are grouped + bool are_cols_grouped = ctx.has_group_column( + wf.get_frame_id(0), + wf.get_column_id(0) + ); + + if (!are_cols_grouped) { + // When the input frame is `GtoONE`, but columns are not grouped, + // it means we are dealing with the output of another reducer. + // In such a case we create a new groupby, that has one element + // per a group. This may not be optimal performance-wise, + // but chained reducers is a very rare scenario. + xassert(gby.size() == wf.nrows()); + gby = Groupby::nrows_groups(gby.size()); + } } - return outputs; - } - + if (SKIPNA == 0) { + for (size_t i = 0; i < wf.ncols(); ++i) { + bool is_grouped = ctx.has_group_column( + wf.get_frame_id(i), + wf.get_column_id(i) + ); + Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped, n_); + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + } + } else { + Workframe wf_skipna = arg_->evaluate_n(ctx); + colvec columns; + size_t ncols = wf_skipna.ncols(); + size_t nrows = wf_skipna.nrows(); + columns.reserve(ncols); + for (size_t i = 0; i < ncols; ++i) { + Column coli = make_isna_col(wf_skipna.retrieve_column(i)); + columns.push_back(std::move(coli)); + } + Column bool_column = make_bool_column(std::move(columns), nrows, ncols); + RowIndex ri = n_ < 0 ? rowindex_nth(bool_column, gby) + : rowindex_nth(bool_column, gby); + for (size_t i = 0; i < wf.ncols(); ++i) { + bool is_grouped = ctx.has_group_column( + wf.get_frame_id(i), + wf.get_column_id(i) + ); + Column coli = wf.retrieve_column(i); + coli.apply_rowindex(ri); + coli = evaluate1(std::move(coli), gby, is_grouped, n_); + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + } + } + + return outputs; + } Column evaluate1(Column&& col, const Groupby& gby, bool is_grouped, const int32_t n) const { diff --git a/tests/dt/test-nth.py b/tests/dt/test-nth.py index c96360ce1c..21df824a8d 100644 --- a/tests/dt/test-nth.py +++ b/tests/dt/test-nth.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- # Copyright 2022 H2O.ai # # Permission is hereby granted, free of charge, to any person obtaining a @@ -20,41 +20,55 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- import pytest import re from datatable import dt, f, nth, FExpr, by from tests import assert_equals -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- # Errors -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- + def test_nth_parameter_not_int(): - msg = "The argument for the nth parameter in function datatable.nth() " \ - "should be an integer, instead got " + msg = ( + "The argument for the nth parameter in function datatable.nth() " + "should be an integer, instead got " + ) DT = dt.Frame([1, 2, None, 4, 5]) - with pytest.raises(TypeError, match = re.escape(msg)): - DT[:, nth(f[0], '1')] + with pytest.raises(TypeError, match=re.escape(msg)): + DT[:, nth(f[0], "1")] def test_nth_no_argument(): - msg = r"Function datatable.nth\(\) requires at least 1 positional " \ - "argument, but none were given" - with pytest.raises(TypeError, match = msg): + msg = ( + r"Function datatable.nth\(\) requires at least 1 positional " + "argument, but none were given" + ) + with pytest.raises(TypeError, match=msg): nth() -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- # Normal -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- + def test_nth_str(): - assert str(nth(f.A, n=1)) == "FExpr<" + nth.__name__ + "(f.A, n=1, skipna=False)>" - assert str(nth(f.A, n=1, skipna=True) + 1) == "FExpr<" + nth.__name__ + "(f.A, n=1, skipna=True) + 1>" - assert str(nth(f.A + f.B, n = 1)) == "FExpr<" + nth.__name__ + "(f.A + f.B, n=1, skipna=False)>" - assert str(nth(f.B, 1, True)) == "FExpr<" + nth.__name__ + "(f.B, n=1, skipna=True)>" - assert str(nth(f[:2], 1)) == "FExpr<"+ nth.__name__ + "(f[:2], n=1, skipna=False)>" + assert str(nth(f.A, n=1)) == "FExpr<" + nth.__name__ + "(f.A, n=1, skipna=None)>" + assert ( + str(nth(f.A, n=1, skipna="all") + 1) + == "FExpr<" + nth.__name__ + "(f.A, n=1, skipna=all) + 1>" + ) + assert ( + str(nth(f.A + f.B, n=1)) + == "FExpr<" + nth.__name__ + "(f.A + f.B, n=1, skipna=None)>" + ) + assert ( + str(nth(f.B, 1, "any")) == "FExpr<" + nth.__name__ + "(f.B, n=1, skipna=any)>" + ) + assert str(nth(f[:2], 1)) == "FExpr<" + nth.__name__ + "(f[:2], n=1, skipna=None)>" def test_nth_empty_frame(): @@ -75,15 +89,16 @@ def test_nth_void(): DT = dt.Frame([None, None, None]) DT_nth = DT[:, nth(f[:], 0)] assert_equals(DT_nth, DT[0, :]) - + + def test_nth_void_skipna(): DT = dt.Frame([None, None, None]) - DT_nth = DT[:, nth(f[:], 0, True)] + DT_nth = DT[:, nth(f[:], 0, None)] assert_equals(DT_nth, DT[0, :]) def test_nth_trivial(): - DT = dt.Frame([0]/dt.int64) + DT = dt.Frame([0] / dt.int64) nth_fexpr = nth(f[:], n=-1) DT_nth = DT[:, nth_fexpr] assert isinstance(nth_fexpr, FExpr) @@ -91,73 +106,167 @@ def test_nth_trivial(): def test_nth_trivial_skipna(): - DT = dt.Frame([0]/dt.int64) - nth_fexpr = nth(f[:], n = -1, skipna=True) + DT = dt.Frame([0] / dt.int64) + nth_fexpr = nth(f[:], n=-1, skipna=None) DT_nth = DT[:, nth_fexpr] assert isinstance(nth_fexpr, FExpr) assert_equals(DT, DT_nth) - + def test_nth_bool(): DT = dt.Frame([None, False, None, True, False, True]) - DT_nth = DT[:, [nth(f[:], n=1), - nth(f[:], n=-1), - nth(f[:], n=24)]] - DT_ref = dt.Frame([[False], [True], [None]/dt.bool8]) + DT_nth = DT[:, [nth(f[:], n=1), nth(f[:], n=-1), nth(f[:], n=24)]] + DT_ref = dt.Frame([[False], [True], [None] / dt.bool8]) assert_equals(DT_nth, DT_ref) -@pytest.mark.xfail(reason='skipna not implemented yet') + def test_nth_bool_skipna(): DT = dt.Frame([None, False, None, True, False, True]) - DT_nth = DT[:, [nth(f[:], n = 0, skipna=True), - nth(f[:], n = -1, skipna=True), - nth(f[:], n = 2,skipna=True)]] - DT_ref = dt.Frame([[False], [True], [True]]) + DT_nth = DT[ + :, + [ + nth(f[:], n=0, skipna="all"), + nth(f[:], n=-1, skipna="any"), + nth(f[:], n=2, skipna="any"), + ], + ] + DT_ref = dt.Frame([[False], [True], [False]]) + + assert_equals(DT_nth, DT_ref) - assert_equals(DT_nth, DT_ref) - def test_nth_small(): DT = dt.Frame([None, 3, None, 4]) - DT_nth = DT[:, [nth(f[:], n=1), - nth(f[:], n=-5)]] - DT_ref = dt.Frame([[3]/dt.int32, [None]/dt.int32]) + DT_nth = DT[:, [nth(f[:], n=1), nth(f[:], n=-5)]] + DT_ref = dt.Frame([[3] / dt.int32, [None] / dt.int32]) assert_equals(DT_nth, DT_ref) def test_nth_string(): - DT = dt.Frame(['d', 'a', 'z', 'b']) + DT = dt.Frame(["d", "a", "z", "b"]) DT_nth = DT[:, [nth(f[:], 0), nth(f[:], n=-1)]] - DT_ref = dt.Frame([['d'], ['b'] ]) + DT_ref = dt.Frame([["d"], ["b"]]) assert_equals(DT_nth, DT_ref) def test_nth_grouped(): - DT = dt.Frame([[15, None, 136, 93, 743, None, None, 91], - ['a','a','a','b','b','c','c','c']]) + DT = dt.Frame( + [ + [15, None, 136, 93, 743, None, None, 91], + ["a", "a", "a", "b", "b", "c", "c", "c"], + ] + ) DT_nth = DT[:, [nth(f[:], n=0), nth(f[:], n=2)], by(f[-1])] - DT_ref = dt.Frame({ - 'C1':['a','b','c',], - 'C0':[15, 93, None], - 'C2':[136, None, 91 ], - }) + DT_ref = dt.Frame( + { + "C1": [ + "a", + "b", + "c", + ], + "C0": [15, 93, None], + "C2": [136, None, 91], + } + ) + assert_equals(DT_nth, DT_ref) + + +def test_positive_nth_grouped_skipna(): + DT = dt.Frame( + [ + [15, None, 136, 93, 743, None, None, 91], + ["a", "a", "a", "b", "b", "c", "c", "c"], + ] + ) + DT_nth = DT[ + :, [nth(f[:], n=0, skipna="all"), nth(f[:], n=1, skipna="any")], by(f[-1]) + ] + DT_ref = dt.Frame( + { + "C1": [ + "a", + "b", + "c", + ], + "C0": [15, 93, 91], + "C2": [136, 743, None], + } + ) + assert_equals(DT_nth, DT_ref) + + +def test_negative_nth_grouped_skipna(): + DT = dt.Frame( + [ + [15, None, 136, 93, 743, None, None, 91], + ["a", "a", "a", "b", "b", "c", "c", "c"], + ] + ) + DT_nth = DT[ + :, [nth(f[:], n=-1, skipna="all"), nth(f[:], n=-2, skipna="any")], by(f[-1]) + ] + DT_ref = dt.Frame( + { + "C1": [ + "a", + "b", + "c", + ], + "C0": [136, 743, 91], + "C2": [15, 93, None], + } + ) assert_equals(DT_nth, DT_ref) def test_nth_grouped_column(): DT = dt.Frame([0, 1, 0]) DT_nth = DT[:, dt.nth(f.C0, 0), by(f.C0)] - DT_ref = dt.Frame({'C0':[0,1], 'C1':[0,1]}) - -@pytest.mark.xfail(reason='skipna not implemented yet') -def test_nth_grouped_skipna(): - DT = dt.Frame([[15, None, 136, 93, 743, None, None, 91], - ['a','a','a','b','b','c','c','c']]) - DT_nth = DT[:, [nth(f[:], n = 0, skipna=True), nth(f[:], n = 2,skipna=True)], by(f[-1])] - DT_ref = dt.Frame({ - 'C1':['a','b','c',], - 'C0':[15,93, 91], - 'C2':[136, None,91 ], - }) + DT_ref = dt.Frame({"C0": [0, 1], "C1": [0, 1]}) + assert_equals(DT_nth, DT_ref) + + +def test_nth_multiple_columns_skipna_any(): + DT = dt.Frame( + { + "building": ["a", "a", "b", "b", "a", "a", "b", "b"], + "var1": [1.5, None, 2.1, 2.2, 1.2, 1.3, 2.4, None], + "var2": [100, 110, 105, None, 102, None, 103, 107], + "var3": [10, 11, None, None, None, None, None, None], + "var4": [1, 2, 3, 4, 5, 6, 7, 8], + } + ) + DT_nth = DT[:, dt.nth(f[:], skipna="any"), by(f.building)] + DT_ref = dt.Frame( + { + "building": ["a", "b"], + "var1": [1.5, None]/dt.float64, + "var2": [100.0, None]/dt.int32, + "var3": [10.0, None]/dt.int32, + "var4": [1.0, None]/dt.int32 + } + ) + assert_equals(DT_nth, DT_ref) + + +def test_nth_multiple_columns_skipna_all(): + DT = dt.Frame( + { + "building": ["a", "a", "b", "b", "a", "a", "b", "b"], + "var1": [1.5, None, 2.1, 2.2, 1.2, 1.3, 2.4, None], + "var2": [100, 110, 105, None, 102, None, 103, 107], + "var3": [10, 11, None, None, None, None, None, None], + "var4": [1, 2, 3, 4, 5, 6, 7, 8], + } + ) + DT_nth = DT[:, dt.nth(f[:], skipna="all"), by(f.building)] + DT_ref = dt.Frame( + { + "building": ["a", "b"], + "var1": [1.5, 2.1], + "var2": [100, 105]/dt.int32, + "var3": [10.0, None]/dt.int32, + "var4": [1, 3], + } + ) assert_equals(DT_nth, DT_ref) - diff --git a/tests/test-f.py b/tests/test-f.py index c4913140b9..5a70ef0540 100644 --- a/tests/test-f.py +++ b/tests/test-f.py @@ -501,11 +501,11 @@ def test_codes(): def test_nth(): - assert str(dt.nth(f.A, n=0)) == str(f.A.nth(n=0, skipna=False)) - assert str(dt.nth(f.A, n=1, skipna=True)) == str(f.A.nth(n=1, skipna=True)) - assert str(dt.nth(f[:], -1, skipna=False)) == str(f[:].nth(-1, False)) + assert str(dt.nth(f.A, n=0)) == str(f.A.nth(n=0, skipna=None)) + assert str(dt.nth(f.A, n=1, skipna="any")) == str(f.A.nth(n=1, skipna="any")) + assert str(dt.nth(f[:], -1, skipna=None)) == str(f[:].nth(-1, None)) DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) - assert_equals(DT[:, f.A.nth(n=1, skipna=False)], DT[:, dt.nth(f.A, 1, False)]) - assert_equals(DT[:, f.A.nth(n=0, skipna=True)], DT[:, dt.nth(f.A, 0, True)]) - assert_equals(DT[:, f.A.nth(n=0, skipna=True)], DT[:, dt.nth(f.A, 0, skipna=True)]) + assert_equals(DT[:, f.A.nth(n=1, skipna=None)], DT[:, dt.nth(f.A, 1, None)]) + assert_equals(DT[:, f.A.nth(n=0, skipna="any")], DT[:, dt.nth(f.A, 0, "any")]) + assert_equals(DT[:, f.A.nth(n=0, skipna="all")], DT[:, dt.nth(f.A, 0, skipna="all")]) From eb2fb7bf11654305c628258bf11d66b224cc655c Mon Sep 17 00:00:00 2001 From: samukweku Date: Mon, 24 Apr 2023 12:09:54 +1000 Subject: [PATCH 19/20] refactor --- src/core/expr/fexpr_nth.cc | 63 +++++++++++++++++++------------------- tests/dt/test-nth.py | 4 +++ 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/core/expr/fexpr_nth.cc b/src/core/expr/fexpr_nth.cc index ec86546aee..177c1b744c 100644 --- a/src/core/expr/fexpr_nth.cc +++ b/src/core/expr/fexpr_nth.cc @@ -106,7 +106,7 @@ class FExpr_Nth : public FExpr_Func { return true; } - static Column make_bool_column(colvec&& columns, const size_t nrows, const size_t ncols) { + static Column make_boolean_column(colvec&& columns, const size_t nrows, const size_t ncols) { if (SKIPNA == 1) { return Column(new FuncNary_ColumnImpl( std::move(columns), op_rowany, nrows, SType::BOOL)); @@ -186,40 +186,39 @@ class FExpr_Nth : public FExpr_Func { } } - if (SKIPNA == 0) { + if (wf.nrows() == 0) { for (size_t i = 0; i < wf.ncols(); ++i) { - bool is_grouped = ctx.has_group_column( - wf.get_frame_id(i), - wf.get_column_id(i) - ); - Column coli = evaluate1(wf.retrieve_column(i), gby, is_grouped, n_); - outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + Column coli = Column::new_na_column(1, wf.retrieve_column(i).stype()); + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); } - } else { - Workframe wf_skipna = arg_->evaluate_n(ctx); - colvec columns; - size_t ncols = wf_skipna.ncols(); - size_t nrows = wf_skipna.nrows(); - columns.reserve(ncols); - for (size_t i = 0; i < ncols; ++i) { - Column coli = make_isna_col(wf_skipna.retrieve_column(i)); - columns.push_back(std::move(coli)); - } - Column bool_column = make_bool_column(std::move(columns), nrows, ncols); - RowIndex ri = n_ < 0 ? rowindex_nth(bool_column, gby) - : rowindex_nth(bool_column, gby); - for (size_t i = 0; i < wf.ncols(); ++i) { - bool is_grouped = ctx.has_group_column( - wf.get_frame_id(i), - wf.get_column_id(i) - ); - Column coli = wf.retrieve_column(i); - coli.apply_rowindex(ri); - coli = evaluate1(std::move(coli), gby, is_grouped, n_); - outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); - } + return outputs; + } + + RowIndex ri; + if (SKIPNA > 0) { + Workframe wf_skipna = arg_->evaluate_n(ctx); + colvec columns; + size_t ncols = wf_skipna.ncols(); + size_t nrows = wf_skipna.nrows(); + columns.reserve(ncols); + for (size_t i = 0; i < ncols; ++i) { + Column coli = make_isna_col(wf_skipna.retrieve_column(i)); + columns.push_back(std::move(coli)); } - + Column bool_column = make_boolean_column(std::move(columns), nrows, ncols); + ri = n_ < 0 ? rowindex_nth(bool_column, gby) + : rowindex_nth(bool_column, gby); + } + for (size_t i = 0; i < wf.ncols(); ++i) { + bool is_grouped = ctx.has_group_column( + wf.get_frame_id(i), + wf.get_column_id(i) + ); + Column coli = wf.retrieve_column(i); + if (SKIPNA > 0) coli.apply_rowindex(ri); + coli = evaluate1(std::move(coli), gby, is_grouped, n_); + outputs.add_column(std::move(coli), wf.retrieve_name(i), Grouping::GtoONE); + } return outputs; } diff --git a/tests/dt/test-nth.py b/tests/dt/test-nth.py index 21df824a8d..ddb5124ab0 100644 --- a/tests/dt/test-nth.py +++ b/tests/dt/test-nth.py @@ -270,3 +270,7 @@ def test_nth_multiple_columns_skipna_all(): } ) assert_equals(DT_nth, DT_ref) + +def test_nth_zero_rows(): + DT = dt.Frame() + assert_equals(DT[:, nth(f[:])], DT) \ No newline at end of file From b8b5c1e3eb39359f665cd420529426aaf9c0be0a Mon Sep 17 00:00:00 2001 From: samukweku Date: Wed, 17 May 2023 20:27:33 +1000 Subject: [PATCH 20/20] fix test fails --- src/core/expr/head_reduce_unary.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/core/expr/head_reduce_unary.cc b/src/core/expr/head_reduce_unary.cc index 134877da05..f7bceaf661 100644 --- a/src/core/expr/head_reduce_unary.cc +++ b/src/core/expr/head_reduce_unary.cc @@ -552,8 +552,6 @@ Workframe Head_Reduce_Unary::evaluate_n( } else { switch (op) { case Op::STDEV: fn = compute_gsd; break; - case Op::MIN: - case Op::MAX: case Op::FIRST: case Op::LAST: fn = compute_gfirstlast; break; case Op::MEDIAN: fn = compute_gmedian; break;