@@ -230,3 +230,125 @@ def test_series_describe_temporal(scalars_dfs):
230230 check_dtype = False ,
231231 check_index_type = False ,
232232 )
233+
234+
235+ def test_df_groupby_describe (scalars_dfs ):
236+ # TODO: supply a reason why this isn't compatible with pandas 1.x
237+ pytest .importorskip ("pandas" , minversion = "2.0.0" )
238+ scalars_df , scalars_pandas_df = scalars_dfs
239+
240+ numeric_columns = [
241+ "int64_col" ,
242+ "float64_col" ,
243+ ]
244+ non_numeric_columns = ["string_col" ]
245+ supported_columns = numeric_columns + non_numeric_columns
246+
247+ bf_full_result = (
248+ scalars_df .groupby ("bool_col" )[supported_columns ]
249+ .describe (include = "all" )
250+ .to_pandas ()
251+ )
252+
253+ pd_full_result = scalars_pandas_df .groupby ("bool_col" )[supported_columns ].describe (
254+ include = "all"
255+ )
256+
257+ for col in supported_columns :
258+ pd_result = pd_full_result [col ]
259+ bf_result = bf_full_result [col ]
260+
261+ if col in numeric_columns :
262+ # Drop quartiles, as they are approximate
263+ bf_min = bf_result ["min" ]
264+ bf_p25 = bf_result ["25%" ]
265+ bf_p50 = bf_result ["50%" ]
266+ bf_p75 = bf_result ["75%" ]
267+ bf_max = bf_result ["max" ]
268+
269+ # Reindex results with the specified keys and their order, because
270+ # the relative order is not important.
271+ bf_result = bf_result .reindex (
272+ columns = ["count" , "mean" , "std" , "min" , "max" ]
273+ )
274+ pd_result = pd_result .reindex (
275+ columns = ["count" , "mean" , "std" , "min" , "max" ]
276+ )
277+
278+ # Double-check that quantiles are at least plausible.
279+ assert (
280+ (bf_min <= bf_p25 )
281+ & (bf_p25 <= bf_p50 )
282+ & (bf_p50 <= bf_p50 )
283+ & (bf_p75 <= bf_max )
284+ ).all ()
285+ else :
286+ # Reindex results with the specified keys and their order, because
287+ # the relative order is not important.
288+ bf_result = bf_result .reindex (columns = ["count" , "nunique" ])
289+ pd_result = pd_result .reindex (columns = ["count" , "unique" ])
290+ pandas .testing .assert_frame_equal (
291+ # BF counter part of "unique" is called "nunique"
292+ pd_result .astype ("Float64" ).rename (columns = {"unique" : "nunique" }),
293+ bf_result ,
294+ check_dtype = False ,
295+ check_index_type = False ,
296+ )
297+
298+
299+ def test_series_groupby_describe (scalars_dfs ):
300+ # TODO: supply a reason why this isn't compatible with pandas 1.x
301+ pytest .importorskip ("pandas" , minversion = "2.0.0" )
302+ scalars_df , scalars_pandas_df = scalars_dfs
303+
304+ numeric_columns = [
305+ "int64_col" ,
306+ "float64_col" ,
307+ ]
308+ non_numeric_columns = ["string_col" ]
309+ supported_columns = numeric_columns + non_numeric_columns
310+
311+ bf_df = scalars_df .groupby ("bool_col" )
312+
313+ pd_df = scalars_pandas_df .groupby ("bool_col" )
314+
315+ for col in supported_columns :
316+ pd_result = pd_df [col ].describe (include = "all" )
317+ bf_result = bf_df [col ].describe (include = "all" ).to_pandas ()
318+
319+ if col in numeric_columns :
320+ # Drop quartiles, as they are approximate
321+ bf_min = bf_result ["min" ]
322+ bf_p25 = bf_result ["25%" ]
323+ bf_p50 = bf_result ["50%" ]
324+ bf_p75 = bf_result ["75%" ]
325+ bf_max = bf_result ["max" ]
326+
327+ # Reindex results with the specified keys and their order, because
328+ # the relative order is not important.
329+ bf_result = bf_result .reindex (
330+ columns = ["count" , "mean" , "std" , "min" , "max" ]
331+ )
332+ pd_result = pd_result .reindex (
333+ columns = ["count" , "mean" , "std" , "min" , "max" ]
334+ )
335+
336+ # Double-check that quantiles are at least plausible.
337+ assert (
338+ (bf_min <= bf_p25 )
339+ & (bf_p25 <= bf_p50 )
340+ & (bf_p50 <= bf_p50 )
341+ & (bf_p75 <= bf_max )
342+ ).all ()
343+ else :
344+ # Reindex results with the specified keys and their order, because
345+ # the relative order is not important.
346+ bf_result = bf_result .reindex (columns = ["count" , "nunique" ])
347+ pd_result = pd_result .reindex (columns = ["count" , "unique" ])
348+ pandas .testing .assert_frame_equal (
349+ # BF counter part of "unique" is called "nunique"
350+ pd_result .astype ("Float64" ).rename (columns = {"unique" : "nunique" }),
351+ bf_result ,
352+ check_dtype = False ,
353+ check_index_type = False ,
354+ )
0 commit comments