@@ -424,6 +424,58 @@ def test_cut_default_labels(scalars_dfs):
424424 )
425425
426426
427+ @pytest .mark .parametrize (
428+ ("breaks" ,),
429+ [
430+ ([0 , 5 , 10 , 15 , 20 , 100 , 1000 ],), # ints
431+ ([0.5 , 10.5 , 15.5 , 20.5 , 100.5 , 1000.5 ],), # floats
432+ ([0 , 5 , 10.5 , 15.5 , 20 , 100 , 1000.5 ],), # mixed
433+ ],
434+ )
435+ def test_cut_numeric_breaks (scalars_dfs , breaks ):
436+ scalars_df , scalars_pandas_df = scalars_dfs
437+
438+ pd_result = pd .cut (scalars_pandas_df ["float64_col" ], breaks )
439+ bf_result = bpd .cut (scalars_df ["float64_col" ], breaks ).to_pandas ()
440+
441+ # Convert to match data format
442+ pd_result_converted = pd .Series (
443+ [
444+ {"left_exclusive" : interval .left , "right_inclusive" : interval .right }
445+ if pd .notna (val )
446+ else pd .NA
447+ for val , interval in zip (
448+ pd_result , pd_result .cat .categories [pd_result .cat .codes ]
449+ )
450+ ],
451+ name = pd_result .name ,
452+ )
453+
454+ pd .testing .assert_series_equal (
455+ bf_result , pd_result_converted , check_index = False , check_dtype = False
456+ )
457+
458+
459+ @pytest .mark .parametrize (
460+ ("bins" ,),
461+ [
462+ (- 1 ,), # negative integer bins argument
463+ ([],), # empty iterable of bins
464+ (["notabreak" ],), # iterable of wrong type
465+ ([1 ],), # numeric breaks with only one numeric
466+ # this is supported by pandas but not by
467+ # the bigquery operation and a bigframes workaround
468+ # is not yet available. Should return column
469+ # of structs with all NaN values.
470+ ],
471+ )
472+ def test_cut_errors (scalars_dfs , bins ):
473+ scalars_df , _ = scalars_dfs
474+
475+ with pytest .raises (ValueError ):
476+ bpd .cut (scalars_df ["float64_col" ], bins )
477+
478+
427479@pytest .mark .parametrize (
428480 ("bins" ,),
429481 [
0 commit comments