Correct reshape timebin#78
Conversation
mathieudpnt
left a comment
There was a problem hiding this comment.
did some ruff/syntax changes but overall lokks good to me !
changed a test so it's a bit more complete
let me know
|
|
||
| def get_timezone(df: DataFrame) -> tzoffset | list[tzoffset]: | ||
| """Return timezone(s) from DataFrame.""" | ||
| """Return timezone(s) from DataFrame. |
There was a problem hiding this comment.
Return timezone(s) from APLOSE DataFrame.
|
|
||
|
|
||
| def check_timestamp(df: DataFrame, timestamp_audio: list[Timestamp]) -> None: | ||
| """Check if the variable timestamp_wav exists and is correctly formated. |
There was a problem hiding this comment.
"""Check if provided timestamp_audio list is correctly formated.
Parameters
----------
df: DataFrame
APLOSE results Dataframe.
timestamp_audio: list[Timestamp]
list of start timestamps of corresponding audio file for each detection.
"""There was a problem hiding this comment.
I prefer your following suggestion
"""
timestamp_audio: list[Timestamp]
A list of timestamps. Each timestamp is the start datetime of the
corresponding audio file for each detection in df.
"""
| ---------- | ||
| df: DataFrame | ||
| An APLOSE result DataFrame. | ||
| timestamp_audio: list[Timestamp] |
There was a problem hiding this comment.
"""
timestamp_audio: list[Timestamp]
A list of timestamps. Each timestamp is the start datetime of the
corresponding audio file for each detection in df.
"""| if isinstance(get_timezone(df), list): | ||
| df["start_datetime"] = [to_datetime(elem, utc=True) for elem in df["start_datetime"]] | ||
| df["end_datetime"] = [to_datetime(elem, utc=True) for elem in df["end_datetime"]] | ||
| df["start_datetime"] = [to_datetime(elem, utc=True) |
There was a problem hiding this comment.
if isinstance(get_timezone(df), list):
df["start_datetime"] = [
to_datetime(elem, utc=True) for elem in df["start_datetime"]
]
df["end_datetime"] = [
to_datetime(elem, utc=True) for elem in df["end_datetime"]
]| ) | ||
|
|
||
| return concat(results).sort_values(by=["start_datetime", "end_datetime", "annotator", "annotation"]).reset_index(drop=True) | ||
| return concat(results).sort_values(by=["start_datetime", "end_datetime", |
There was a problem hiding this comment.
return (
concat(results)
.sort_values(by=["start_datetime", "end_datetime", "annotator", "annotation"])
.reset_index(drop=True)
)
tests/test_filtering_utils.py
Outdated
|
|
||
| def test_get_filename_timestamp(sample_df: DataFrame, sample_yaml: Path) -> None: | ||
| tz = get_timezone(sample_df) | ||
| with open(sample_yaml, "r") as f: |
There was a problem hiding this comment.
ruff recommandation:
with sample_yaml.open(encoding="utf-8") as f:
data_yaml = yaml.safe_load(f)
tests/test_filtering_utils.py
Outdated
| tz = get_timezone(sample_df) | ||
| with open(sample_yaml, "r") as f: | ||
| data_yaml = yaml.safe_load(f) | ||
| sample_key = list(data_yaml.keys())[0] |
There was a problem hiding this comment.
ruff
sample_key = next(iter(data_yaml.keys()))
tests/test_filtering_utils.py
Outdated
| check_names=False) | ||
|
|
||
|
|
||
| def test_check_timestamp_none(sample_df: DataFrame) -> None: |
There was a problem hiding this comment.
@pytest.mark.parametrize("timestamp", [None, []])
def test_check_timestamp_empty(sample_df: DataFrame, timestamp: list | None) -> None:
with pytest.raises(ValueError, match="`timestamp_wav` is empty"):
check_timestamp(sample_df, timestamp)this will test for None and empty list. I had to change change_timestamp because it would not treat timestamp_audio = [] as an empty list
def check_timestamp(df: DataFrame, timestamp_audio: list[Timestamp]) -> None:
"""Check if provided `timestamp_audio` list is correctly formated.
Parameters
----------
df: DataFrame
APLOSE results Dataframe.
timestamp_audio: list[Timestamp]
list of start timestamps of corresponding audio file for each detection.
"""
if timestamp_audio in [None, []]:
msg = "`timestamp_wav` is empty"
raise ValueError(msg)
if len(timestamp_audio) != len(df):
msg = "`timestamp_wav` is not the same length as `df`"
raise ValueError(msg)PS: must add from __future__ import annotations to test_filtering_utils
There was a problem hiding this comment.
I don't understand, you want me to replace the test test_check_timestamp_none by your test_check_timestamp_empty?
tests/test_filtering_utils.py
Outdated
| def test_check_timestamp_wrong_length(sample_df: DataFrame) -> None: | ||
| len_sample_df = len(sample_df)+1 | ||
| timestamps = [Timestamp("2025-01-01") + Timedelta(days=i) for i in range(len_sample_df)] # shorter list | ||
|
|
||
| with pytest.raises(ValueError, match="`timestamp_wav` is not the same length as `df`"): | ||
| check_timestamp(sample_df, timestamps) |
There was a problem hiding this comment.
ruff
def test_check_timestamp_wrong_length(sample_df: DataFrame) -> None:
len_sample_df = len(sample_df) + 1
timestamps = [
Timestamp("2025-01-01") + Timedelta(days=i)
for i in range(len_sample_df)
] # shorter list
with pytest.raises(ValueError, match="is not the same length as `df`"):
check_timestamp(sample_df, timestamps)| assert all(df_out["end_time"] == 86400.0) | ||
| assert df_out["start_datetime"].min() >= sample_df["start_datetime"].min().floor("D") | ||
| assert df_out["end_datetime"].max() <= sample_df["end_datetime"].max().ceil("D") | ||
|
|
ac8c353 to
7fa5767
Compare
7fa5767 to
40c861d
Compare
Correction of function reshape_timebin + associated tests