Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
e864f0e
cosmetic changes
scymtym Mar 6, 2025
a66c316
fix: in TemporalCount, assert that unimplemented cases are not executed
scymtym Mar 13, 2025
790f57f
refactor: use just one intervals_to_events function
scymtym Mar 12, 2025
84d0f61
feat: add function find_rectangles in process module
scymtym Mar 12, 2025
9fecc67
feat: allow saving additional interval attributes in result_interval …
scymtym Mar 6, 2025
e71ea22
feat: maybe unnecessary change in mask_intervals
scymtym Mar 11, 2025
593e624
feat: in CappedMinCount, produce intervals with "ratio" information
scymtym Mar 13, 2025
d811f87
feat: in NoDataPreservingAnd, preserve type and attributes of data in…
scymtym Mar 12, 2025
b659b09
feat: in LeftDependentToggle, preserve type and attributes of data in…
scymtym Mar 12, 2025
91e37c5
fix: change behavior of TemporalCount for partially applicable indica…
scymtym Mar 13, 2025
4b4ae70
refactor: avoid intervals_to_events in find_rectangles
scymtym Mar 18, 2025
3644973
refactor: micro-optimizations in find_rectangles
scymtym Mar 18, 2025
405f3de
refactor: avoid copy operations in find_rectangles
scymtym Mar 18, 2025
b4e60f8
feat: add is_same_result parameter to find_rectangles
scymtym Mar 18, 2025
fcf98d0
refactor: maybe faster result interval construction in find_rectangles
scymtym Mar 18, 2025
102cd54
refactor: optimize implementation of TemporalCount
scymtym Mar 18, 2025
c321690
refactor: do not copy in interval_like
scymtym Mar 18, 2025
e2bdf24
Merge branch 'remove-criterion-combination' into interval-ratio-remov…
glichtner Mar 21, 2025
60a386a
Merge branch 'main' into interval-ratio-removed-criterion-combination
glichtner Mar 21, 2025
fb6b635
tests: adapt test to new ee structure
glichtner Mar 22, 2025
b400698
refactor: improvements in task.py
scymtym Mar 24, 2025
a6d8796
refactor: simplify Task.insert_negative_intervals
scymtym Mar 24, 2025
297bbcb
feat: compute "interval ratio" in logical count operators
scymtym Mar 24, 2025
920801d
fix: temporal operator handling
glichtner Mar 24, 2025
c4968ad
feat: add sum(count) in logic.Or handling
glichtner Mar 24, 2025
e9ce71e
feat: IntervalType.union_order changed (NEGATIVE > N/A)
glichtner Mar 24, 2025
a9d17b8
fix: threshold reduction in CappedMinCount
glichtner Mar 25, 2025
89efdbd
refactor: fix window_types problem without reset callback
scymtym Mar 25, 2025
08eeecf
refactor: remove obsolete tests
scymtym Mar 24, 2025
e8f1ab2
refactor: improvements for logic.Count implementation in Task
scymtym Mar 24, 2025
6f69a39
refactor: rewrite implementation of logic.Or in Task
scymtym Mar 25, 2025
a962449
fix: improve logic.Count for count_min = 0
scymtym Mar 25, 2025
0659747
feat: allow custom counting functions in logic.Or
glichtner Mar 25, 2025
b23b1c2
refactor: simplify a few type annotations
scymtym Mar 26, 2025
160a47b
fix: in find_rectangles, fix event sorting and adjacent interval merging
scymtym Mar 26, 2025
ddde099
refactor: omit empty results in rectangle.find_rectangles
scymtym Mar 26, 2025
1d30db7
feat: add TemporalCount.result_for_not_applicable
scymtym Mar 26, 2025
701c2b3
feat: make Presence use result_for_not_applicable=NEGATIVE
glichtner Mar 26, 2025
29da72c
fix: re-set base node category
glichtner Mar 27, 2025
b93e49e
fix: temporal count with interval criterion
glichtner Mar 27, 2025
63f8d5f
revert: remove result_for_not_applicable flag
glichtner Mar 28, 2025
aaa89c3
refactor: using int instead of float time in intervals
glichtner Mar 28, 2025
f54c5d2
fix(datetime): truncate to second precision to avoid rounding issues …
glichtner Mar 28, 2025
9ae95b0
fix: disappearing 0sec intervals in LeftDependentToggle
glichtner Mar 28, 2025
65e815c
feat: add custom count handling for logic.And
glichtner Mar 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions execution_engine/execution_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,23 @@ def execute_recommendation(
assert start_datetime.tzinfo, "start_datetime must be timezone-aware"
assert end_datetime.tzinfo, "end_datetime must be timezone-aware"

# We cut off microseconds to ensure second-level precision.
# In Python, timestamps are usually floored when cast to int,
# but in PostgreSQL they might be rounded. To avoid subtle bugs
# where a 1-second difference appears due to different rounding
# strategies, we explicitly strip microseconds and assert that
# the timestamp is exactly an integer.
start_datetime = start_datetime.replace(microsecond=0)
end_datetime = end_datetime.replace(microsecond=0)

assert start_datetime.timestamp() == int(
start_datetime.timestamp()
), f"start_datetime still contains sub-second precision: {start_datetime}"

assert end_datetime.timestamp() == int(
end_datetime.timestamp()
), f"end_datetime still contains sub-second precision: {end_datetime}"

date_format = "%Y-%m-%d %H:%M:%S %z"

logging.info(
Expand Down
4 changes: 4 additions & 0 deletions execution_engine/execution_graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ def traverse(

traverse(expr, category=category)

# make sure base node has still the correct category - it might have changed duing traversal if the base node
# is used in an interval_criterion of a TemporalCount operator
graph.nodes[base_node].update({"category": CohortCategory.BASE})

if hash(expr) != expr_hash:
raise ValueError("Expression has been modified during traversal")

Expand Down
3 changes: 2 additions & 1 deletion execution_engine/omop/criterion/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from execution_engine.util.interval import IntervalType
from execution_engine.util.serializable import SerializableDataClassABC
from execution_engine.util.sql import SelectInto, select_into
from execution_engine.util.types import PersonIntervals, TimeRange
from execution_engine.util.types import PersonIntervals
from execution_engine.util.types.timerange import TimeRange

__all__ = [
"Criterion",
Expand Down
3 changes: 2 additions & 1 deletion execution_engine/omop/criterion/point_in_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from execution_engine.omop.criterion.concept import ConceptCriterion
from execution_engine.task.process import get_processing_module
from execution_engine.util.interval import IntervalType
from execution_engine.util.types import PersonIntervals, TimeRange, Timing
from execution_engine.util.types import PersonIntervals, Timing
from execution_engine.util.types.timerange import TimeRange
from execution_engine.util.value import Value

process = get_processing_module()
Expand Down
9 changes: 5 additions & 4 deletions execution_engine/omop/db/celida/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
from execution_engine.util.interval import IntervalType

# Use the "public" schema so that tables in different schemas can
# these enums easily without introducing depedencies between the
# respective schemas. Note that replicate the enum definitions in each
# use these enums easily without introducing dependencies between the
# respective schemas. Note that replicating the enum definitions in each
# schema would not work when data must be exchanged between the
# schemas because enum definitions in separate schemas, even if
# identical in terms of enum values, are considered distinct and
Expand All @@ -37,7 +37,6 @@
CohortCategoryEnum = Enum(CohortCategory, name="cohort_category", schema="public")



class Recommendation(Base): # noqa: D101
__tablename__ = "recommendation"
__table_args__ = {"schema": SCHEMA_NAME}
Expand Down Expand Up @@ -170,7 +169,9 @@ class ResultInterval(Base): # noqa: D101
interval_start: Mapped[datetime]
interval_end: Mapped[datetime]
interval_type = mapped_column(IntervalTypeEnum)

interval_ratio: Mapped[float] = mapped_column(
nullable=True
)
execution_run: Mapped["ExecutionRun"] = relationship(
primaryjoin="ResultInterval.run_id == ExecutionRun.run_id",
)
Expand Down
1 change: 1 addition & 0 deletions execution_engine/omop/db/celida/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ def interval_result_view() -> Select:
rri.c.interval_type,
rri.c.interval_start,
rri.c.interval_end,
rri.c.interval_ratio,
)
.select_from(rri)
.outerjoin(pip, (rri.c.pi_pair_id == pip.c.pi_pair_id))
Expand Down
29 changes: 29 additions & 0 deletions execution_engine/omop/sqlclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,35 @@ def _enable_database_triggers(
cursor.close()


def datetime_cols_to_epoch(stmt: sqlalchemy.Select) -> sqlalchemy.Select:
"""
Given a SQLAlchemy 2.0 Select that has columns labeled 'interval_start'
or 'interval_end', replace those column expressions with
EXTRACT(EPOCH FROM <expr>)::BIGINT so they become integer timestamps.

Returns a new Select object with the replaced columns.
"""
new_columns = []

for col in stmt.selected_columns:
label = getattr(col, "name")

if label in ("interval_start", "interval_end"):
# We'll wrap col in EXTRACT(EPOCH FROM col)::BIGINT,
new_col = (
sqlalchemy.func.extract("epoch", col)
.cast(sqlalchemy.BigInteger)
.label(label)
)
new_columns.append(new_col)
else:
new_columns.append(col)

new_stmt = stmt.with_only_columns(*new_columns, maintain_column_froms=True)

return new_stmt


class OMOPSQLClient:
"""A client for the OMOP SQL database.

Expand Down
39 changes: 36 additions & 3 deletions execution_engine/task/process/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import sys
import types
from collections import namedtuple
from typing import TypeVar

from execution_engine.util.interval import IntervalType
from execution_engine.util.types.timerange import TimeRange


def get_processing_module(
Expand Down Expand Up @@ -39,6 +43,35 @@ def get_processing_module(

Interval = namedtuple("Interval", ["lower", "upper", "type"])
IntervalWithCount = namedtuple("IntervalWithCount", ["lower", "upper", "type", "count"])
IntervalWithTypeCounts = namedtuple(
"IntervalWithTypeCounts", ["lower", "upper", "counts"]
)

AnyInterval = Interval | IntervalWithCount
GeneralizedInterval = None | AnyInterval

TInterval = TypeVar("TInterval", bound=AnyInterval)


def interval_like(interval: TInterval, start: int, end: int) -> TInterval:
"""
Return a copy of the given interval with its lower and upper bounds replaced.

Args:
interval (I): The interval to copy. Must be one of Interval or IntervalWithCount.
start (datetime): The new lower bound.
end (datetime): The new upper bound.

Returns:
I: A copy of the interval with updated lower and upper bounds.
"""

return interval._replace(lower=start, upper=end) # type: ignore[return-value]


def timerange_to_interval(tr: TimeRange, type_: IntervalType) -> Interval:
"""
Converts a timerange to an interval with the supplied type.
"""
return Interval(
lower=int(tr.start.timestamp()),
upper=int(tr.end.timestamp()),
type=type_,
)
Loading