Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ classifiers = [
dependencies = [
"numpy>=1.21.0",
"scipy>=1.9.0",
"pandas>=1.4.0",
"pandas>=1.4.0,<3",
"statsmodels>=0.13.5",
"chartify>=5.0.0",
"ipywidgets>=8.0.0",
Expand Down
17 changes: 12 additions & 5 deletions spotify_confidence/analysis/confidence_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ def remove_group_columns(categorical_columns: Iterable, additional_column: Optio
return list(od)


def validate_categorical_columns(categorical_group_columns: Union[str, Iterable]) -> None:
def validate_categorical_columns(
categorical_group_columns: Union[str, Iterable],
) -> None:
if isinstance(categorical_group_columns, str):
pass
elif isinstance(categorical_group_columns, Iterable):
Expand Down Expand Up @@ -113,15 +115,15 @@ def validate_and_rename_columns(df: DataFrame, columns: Iterable[str]) -> DataFr
if (df[column + SFX1].isna() == df[column + SFX2].isna()).all() and (
df[column + SFX1][df[column + SFX1].notna()] == df[column + SFX2][df[column + SFX2].notna()]
).all():
df = df.rename(columns={column + SFX1: column}).drop(columns=[column + SFX2]) # type: ignore[union-attr,unused-ignore]
df = df.rename(columns={column + SFX1: column}).drop(columns=[column + SFX2])
else:
raise ValueError(f"Values of {column} do not agree across levels: {df[[column + SFX1, column + SFX2]]}")
return df


def drop_and_rename_columns(df: DataFrame, columns: Iterable[str]) -> DataFrame:
columns_dict = {col + SFX1: col for col in columns}
return df.rename(columns=columns_dict).drop(columns=[col + SFX2 for col in columns]) # type: ignore[union-attr,unused-ignore]
return df.rename(columns=columns_dict).drop(columns=[col + SFX2 for col in columns])


def level2str(level: Union[str, Tuple]) -> str:
Expand All @@ -132,7 +134,10 @@ def level2str(level: Union[str, Tuple]) -> str:


def validate_data(
df: DataFrame, columns_that_must_exist, group_columns: Iterable, ordinal_group_column: Optional[str]
df: DataFrame,
columns_that_must_exist,
group_columns: Iterable,
ordinal_group_column: Optional[str],
):
"""Integrity check input dataframe."""
for col in columns_that_must_exist:
Expand Down Expand Up @@ -201,7 +206,9 @@ def axis_format_precision(numbers: Series, absolute: bool, extra_zeros: int = 0)


def to_finite(s: Series, lower_limit: float, upper_limit: float) -> Series:
return s.clip(-100 * abs(lower_limit), 100 * abs(upper_limit))
result = s.clip(-100 * abs(lower_limit), 100 * abs(upper_limit))
assert result is not None
return result


def add_color_column(df: DataFrame, cols: Iterable) -> DataFrame:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,43 +212,45 @@ def _sufficient_statistics(self) -> DataFrame:
self._sufficient = (
self._df.groupby(groupby, sort=False, group_keys=True)
.apply(
lambda df: df.assign(
**{
POINT_ESTIMATE: lambda df: confidence_computers[
df[self._method_column].values[0]
].point_estimate(df, **kwargs)
}
)
.assign(
**{
ORIGINAL_POINT_ESTIMATE: lambda df: (
confidence_computers[ZTEST].point_estimate(df, **kwargs)
if df[self._method_column].values[0] == ZTESTLINREG
else confidence_computers[df[self._method_column].values[0]].point_estimate(
lambda df: (
df.assign(
**{
POINT_ESTIMATE: lambda df: confidence_computers[
df[self._method_column].values[0]
].point_estimate(df, **kwargs)
}
)
.assign(
**{
ORIGINAL_POINT_ESTIMATE: lambda df: (
confidence_computers[ZTEST].point_estimate(df, **kwargs)
if df[self._method_column].values[0] == ZTESTLINREG
else confidence_computers[df[self._method_column].values[0]].point_estimate(
df, **kwargs
)
)
}
)
.assign(
**{
VARIANCE: lambda df: confidence_computers[df[self._method_column].values[0]].variance(
df, **kwargs
)
)
}
)
.assign(
**{
VARIANCE: lambda df: confidence_computers[df[self._method_column].values[0]].variance(
}
)
.assign(
**{
ORIGINAL_VARIANCE: lambda df: (
confidence_computers[ZTEST].variance(df, **kwargs)
if df[self._method_column].values[0] == ZTESTLINREG
else confidence_computers[df[self._method_column].values[0]].variance(df, **kwargs)
)
}
)
.pipe(
lambda df: confidence_computers[df[self._method_column].values[0]].add_point_estimate_ci(
df, **kwargs
)
}
)
.assign(
**{
ORIGINAL_VARIANCE: lambda df: (
confidence_computers[ZTEST].variance(df, **kwargs)
if df[self._method_column].values[0] == ZTESTLINREG
else confidence_computers[df[self._method_column].values[0]].variance(df, **kwargs)
)
}
)
.pipe(
lambda df: confidence_computers[df[self._method_column].values[0]].add_point_estimate_ci(
df, **kwargs
)
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,12 @@ def _sufficient_statistics(self) -> DataFrame:
self._sufficient = (
self._df.groupby(groupby, sort=False, group_keys=True)
.apply(
lambda df: df.assign(**{POINT_ESTIMATE: lambda df: df[self._point_estimate_column]})
.assign(**{ORIGINAL_POINT_ESTIMATE: lambda df: df[self._point_estimate_column]})
.assign(**{VARIANCE: lambda df: df[self._var_column]})
.assign(**{ORIGINAL_VARIANCE: lambda df: df[self._var_column]})
lambda df: (
df.assign(**{POINT_ESTIMATE: lambda df: df[self._point_estimate_column]})
.assign(**{ORIGINAL_POINT_ESTIMATE: lambda df: df[self._point_estimate_column]})
.assign(**{VARIANCE: lambda df: df[self._var_column]})
.assign(**{ORIGINAL_VARIANCE: lambda df: df[self._var_column]})
)
)
.pipe(reset_named_indices)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,9 @@ def adjusted_alphas_for_group(grp: DataFrame) -> Series:
data=(
df.assign(**{comparison_total_column: df[denominator + SFX1] + df[denominator + SFX2]})
.assign(
max_sample_size=lambda df: df[[comparison_total_column, final_expected_sample_size_column]]
.max(axis=1)
.max()
max_sample_size=lambda df: (
df[[comparison_total_column, final_expected_sample_size_column]].max(axis=1).max()
)
)
.assign(sample_size_proportions=lambda df: df[comparison_total_column] / df["max_sample_size"])
.pipe(adjusted_alphas_for_group)[ADJUSTED_ALPHA]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def last_fcab(self):

def __eq__(self, other):
if isinstance(other, ComputationState):
return self._df.equals(other._df) and np.array_equal(self._last_fcab, other._last_fcab) # type: ignore[arg-type,unused-ignore]
return self._df.equals(other._df) and np.array_equal(self._last_fcab, other._last_fcab)
return False


Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ deps =
commands =
ruff check
ruff format --check
ty check
# don't run ty check - the type stubs for 3.9 are not good.
pytest -n auto --no-cov --basetemp={envtmpdir} {posargs}