diff --git a/drivers/pg/query/sql/schema_up.sql b/drivers/pg/query/sql/schema_up.sql index 4e0196f..8b0c465 100644 --- a/drivers/pg/query/sql/schema_up.sql +++ b/drivers/pg/query/sql/schema_up.sql @@ -343,21 +343,26 @@ $$ create or replace function public.edges_to_path(path variadic int8[]) returns pathComposite as $$ -select row ( +-- Read the edge table once via a CTE, then derive both the node set and edge set from it. +-- This replaces three separate scans of the edge table (two for start_id/end_id, one for +-- edge composites) with a single scan, reused by both subqueries below. +with e as ( + select id, start_id, end_id, kind_id, properties + from edge + where id = any(path) +) +select row( (select array_agg(distinct (n.id, n.kind_ids, n.properties)::nodeComposite) from node n - where n.id in ( - select start_id from edge where id = any(path) - union - select end_id from edge where id = any(path) - )), - (select array_agg(distinct (r.id, r.start_id, r.end_id, r.kind_id, r.properties)::edgeComposite) - from edge r - where r.id = any(path)) + where n.id in (select start_id from e union all select end_id from e)), + (select array_agg(distinct (e.id, e.start_id, e.end_id, e.kind_id, e.properties)::edgeComposite) + from e) )::pathComposite; $$ language sql - immutable + -- stable rather than immutable: this function reads from the edge and node tables, + -- which can change between transactions. + stable parallel safe strict; @@ -387,13 +392,10 @@ begin primary key (path) ) on commit drop; + -- next_id index supports join conditions in recursive expansion queries and the + -- anti-join used in the swap helpers' dead-end pruning. create index forward_front_next_id_index on forward_front using btree (next_id); - create index forward_front_satisfied_index on forward_front using btree (satisfied); - create index forward_front_is_cycle_index on forward_front using btree (is_cycle); - create index next_front_next_id_index on next_front using btree (next_id); - create index next_front_satisfied_index on next_front using btree (satisfied); - create index next_front_is_cycle_index on next_front using btree (is_cycle); end; $$ language plpgsql @@ -411,6 +413,9 @@ begin primary key (id) ) on commit drop; + -- No primary key on path: maintaining a B-tree index on a variable-length int8[] array + -- costs O(depth) per insert and grows with traversal depth. Deduplication is guaranteed + -- upstream by the DISTINCT ON (next_id) applied before inserting into this table. create temporary table paths ( root_id int8 not null, @@ -418,8 +423,7 @@ begin depth int4 not null, satisfied bool, is_cycle bool not null, - path int8[] not null, - primary key (path) + path int8[] not null ) on commit drop; perform create_unidirectional_pathspace_tables(); @@ -435,6 +439,7 @@ $$ begin perform create_unidirectional_pathspace_tables(); + -- No primary key on path: same rationale as the paths table above. create temporary table backward_front ( root_id int8 not null, @@ -442,13 +447,10 @@ begin depth int4 not null, satisfied bool, is_cycle bool not null, - path int8[] not null, - primary key (path) + path int8[] not null ) on commit drop; create index backward_front_next_id_index on backward_front using btree (next_id); - create index backward_front_satisfied_index on backward_front using btree (satisfied); - create index backward_front_is_cycle_index on backward_front using btree (is_cycle); end; $$ language plpgsql @@ -456,8 +458,10 @@ $$ strict; create or replace function public.swap_forward_front() - returns void as + returns int4 as $$ +declare + remaining int4; begin alter table forward_front rename to forward_front_old; @@ -468,13 +472,17 @@ begin truncate table next_front; - delete - from forward_front r - where r.is_cycle - or r.satisfied is null - or not r.satisfied and not exists(select 1 from edge e where e.end_id = r.next_id); - - return; + -- Remove dead-ends: unsatisfied frontier nodes that have no outgoing edges in the graph + -- and therefore can never extend a forward path. The forward expansion step joins on + -- e.start_id = frontier.next_id, so nodes without any outgoing edge (start_id) are + -- guaranteed dead-ends. Uses the edge_start_id_kind_id_id_end_id_index covering index. + delete from forward_front r + where not r.satisfied + and not exists (select 1 from edge e where e.start_id = r.next_id); + + -- Return the surviving frontier size so callers can cache it without a separate COUNT(*). + select count(*) into remaining from forward_front; + return remaining; end; $$ language plpgsql @@ -482,8 +490,10 @@ $$ strict; create or replace function public.swap_backward_front() - returns void as + returns int4 as $$ +declare + remaining int4; begin alter table backward_front rename to backward_front_old; @@ -494,13 +504,17 @@ begin truncate table next_front; - delete - from backward_front r - where r.is_cycle - or r.satisfied is null - or not r.satisfied and not exists(select 1 from edge e where e.start_id = r.next_id); - - return; + -- Remove dead-ends: unsatisfied frontier nodes that have no incoming edges in the graph + -- and therefore can never extend a backward path. The backward expansion step joins on + -- e.end_id = frontier.next_id, so nodes without any incoming edge (end_id) are + -- guaranteed dead-ends. Uses the edge_end_id_kind_id_id_start_id_index covering index. + delete from backward_front r + where not r.satisfied + and not exists (select 1 from edge e where e.end_id = r.next_id); + + -- Return the surviving frontier size so callers can cache it without a separate COUNT(*). + select count(*) into remaining from backward_front; + return remaining; end; $$ language plpgsql @@ -540,29 +554,39 @@ begin forward_front_depth = forward_front_depth + 1; - -- Swap the next_front table into the forward_front - -- Remove cycles and non-conformant satisfaction checks - delete from next_front f using visited v where f.is_cycle or f.satisfied is null or f.next_id = v.id; + -- Remove cycles, non-conformant satisfaction checks, and already-visited nodes. + delete from next_front f where f.is_cycle or f.satisfied is null; + delete from next_front f using visited v where f.next_id = v.id; - raise debug 'Expansion step % - Available Root Paths % - Num satisfied: %', forward_front_depth, (select count(*) from next_front), (select count(*) from next_front p where p.satisfied); + -- Avoid unconditional COUNT(*) subqueries in raise debug: they are always evaluated + -- by PostgreSQL regardless of the effective log level, costing two full table scans + -- per iteration on every production query. + raise debug 'Expansion step %', forward_front_depth; - -- Insert new newly visited nodes into the visited table - insert into visited (id) select distinct on (f.next_id) f.next_id from next_front f on conflict (id) do nothing; + -- Mark all frontier nodes as visited. Use SELECT DISTINCT rather than DISTINCT ON + -- since we only need the id column here. + insert into visited (id) select distinct f.next_id from next_front f on conflict (id) do nothing; - -- Copy pathspace over into the next front + -- Single-pass split: materialize the DISTINCT ON result once via a writable CTE, + -- then route satisfied rows to paths and unsatisfied rows to forward_front. This + -- replaces two separate sequential scans of next_front with one. When a next_id has + -- both satisfied and unsatisfied rows (same node, different paths) the satisfied row + -- is preferred so the solution is recorded and expansion stops for that node. truncate table forward_front; - insert into forward_front - select distinct on (f.next_id) f.root_id, f.next_id, f.depth, f.satisfied, f.is_cycle, f.path - from next_front f; - - -- Copy newly satisfied paths into the path table - insert into paths - select f.root_id, f.next_id, f.depth, f.satisfied, f.is_cycle, f.path - from forward_front f - where f.satisfied; - - -- Empty the next front last to capture the next expansion + with deduped as ( + select distinct on (next_front.next_id) + next_front.root_id, next_front.next_id, next_front.depth, + next_front.satisfied, next_front.is_cycle, next_front.path + from next_front + order by next_front.next_id, next_front.satisfied desc + ), + ins_paths as ( + insert into paths select * from deduped where deduped.satisfied + ) + insert into forward_front select * from deduped where not deduped.satisfied; + + -- Empty the next front to prepare for the next expansion step. truncate table next_front; end loop; @@ -591,12 +615,23 @@ as $$ declare forward_front_depth int4 := 0; + row_count int4; begin raise debug 'unidirectional_asp_harness start'; -- Defines two tables to represent pathspace of the recursive expansion perform create_unidirectional_pathspace_tables(); + -- Visited table tracks nodes already reached at a shallower depth so that longer + -- (non-shortest) paths are pruned from the frontier before expansion. Without this, + -- the same node can be re-expanded at every depth level, causing the frontier to + -- grow exponentially in dense graphs. + create temporary table asp_visited + ( + id int8 not null, + primary key (id) + ) on commit drop; + while forward_front_depth < max_depth and (forward_front_depth = 0 or exists(select 1 from forward_front)) loop -- If this is the first expansion of this frontier, perform the primer query - otherwise perform the @@ -609,15 +644,26 @@ begin forward_front_depth = forward_front_depth + 1; - raise debug 'Expansion step % - Available Root Paths % - Num satisfied: %', forward_front_depth, (select count(*) from next_front), (select count(*) from next_front p where p.satisfied); + raise debug 'Expansion step %', forward_front_depth; - -- Check to see if the root front is satisfied - if exists(select 1 from next_front r where r.satisfied) then - -- Return all satisfied paths from the next front - return query select * from next_front r where r.satisfied; + -- Prune cycles, null satisfaction, and nodes already reached at a shallower depth + -- in a single pass. The visited-set check prevents exponential frontier growth by + -- ensuring each node is only expanded at the first (shortest) depth it is discovered. + delete from next_front f + where f.is_cycle or f.satisfied is null + or f.next_id in (select id from asp_visited); + + -- Return all satisfied paths from the next front. If any rows were returned, + -- we are done — exit the loop. + return query select * from next_front r where r.satisfied; + GET DIAGNOSTICS row_count = ROW_COUNT; + if row_count > 0 then exit; end if; + -- Mark all current frontier nodes as visited before swapping. + insert into asp_visited (id) select distinct f.next_id from next_front f on conflict (id) do nothing; + -- Swap the next_front table into the forward_front perform swap_forward_front(); end loop; @@ -647,18 +693,40 @@ $$ declare forward_front_depth int4 := 0; backward_front_depth int4 := 0; + -- Cached row counts for each frontier, now returned directly by the swap functions + -- instead of requiring a separate COUNT(*) full table scan after each swap. Both + -- start at 0 so the forward frontier is always expanded first (0 <= 0), which + -- matches the original preference when frontiers are equal in size. + forward_front_size int4 := 0; + backward_front_size int4 := 0; + row_count int4; begin raise debug 'bidirectional_asp_harness start'; -- Defines three tables to represent pathspace of the recursive expansion perform create_bidirectional_pathspace_tables(); + -- Per-direction visited tables prevent re-expansion of nodes already reached at a + -- shallower depth, avoiding exponential frontier growth in dense graphs. + create temporary table forward_asp_visited + ( + id int8 not null, + primary key (id) + ) on commit drop; + + create temporary table backward_asp_visited + ( + id int8 not null, + primary key (id) + ) on commit drop; + while forward_front_depth + backward_front_depth < max_depth and (forward_front_depth = 0 or exists(select 1 from forward_front)) and (backward_front_depth = 0 or exists(select 1 from backward_front)) loop - -- Check to expand the smaller of the two frontiers, or if both are the same size prefer the forward frontier - if (select count(*) from forward_front) <= (select count(*) from backward_front) then + -- Expand the smaller frontier; prefer forward when sizes are equal. Sizes are + -- maintained by the counters returned from the swap functions. + if forward_front_size <= backward_front_size then -- If this is the first expansion of this frontier, perform the primer query - otherwise perform the -- recursive expansion query if forward_front_depth = 0 then @@ -669,16 +737,27 @@ begin forward_front_depth = forward_front_depth + 1; - raise debug 'Forward expansion as step % - Available Root Paths % - Num satisfied: %', forward_front_depth + backward_front_depth, (select count(*) from next_front), (select count(*) from next_front p where p.satisfied); + raise debug 'Forward expansion step %', forward_front_depth + backward_front_depth; + + -- Prune cycles, null satisfaction, and nodes already visited in the forward direction + -- in a single pass. + delete from next_front f + where f.is_cycle or f.satisfied is null + or f.next_id in (select id from forward_asp_visited); - -- Check to see if the next frontier is satisfied - if exists(select 1 from next_front r where r.satisfied) then - return query select * from next_front r where r.satisfied; + -- Return all satisfied paths. If any rows were returned, we are done. + return query select * from next_front r where r.satisfied; + GET DIAGNOSTICS row_count = ROW_COUNT; + if row_count > 0 then exit; end if; - -- Swap the next_front table into the forward_front - perform swap_forward_front(); + -- Mark forward frontier nodes as visited before swapping. + insert into forward_asp_visited (id) select distinct f.next_id from next_front f on conflict (id) do nothing; + + -- Swap the next_front table into the forward_front. The swap function returns + -- the new frontier size, eliminating a separate COUNT(*) scan. + select swap_forward_front() into forward_front_size; else -- If this is the first expansion of this frontier, perform the primer query - otherwise perform the -- recursive expansion query @@ -689,23 +768,34 @@ begin end if; backward_front_depth = backward_front_depth + 1; - raise debug 'Backward expansion as step % - Available Terminal Paths % - Num satisfied: %', forward_front_depth + backward_front_depth, (select count(*) from next_front), (select count(*) from next_front p where p.satisfied); + raise debug 'Backward expansion step %', forward_front_depth + backward_front_depth; + + -- Prune cycles, null satisfaction, and nodes already visited in the backward direction + -- in a single pass. + delete from next_front f + where f.is_cycle or f.satisfied is null + or f.next_id in (select id from backward_asp_visited); - -- Check to see if the next frontier is satisfied - if exists(select 1 from next_front r where r.satisfied) then - return query select * from next_front r where r.satisfied; + -- Return all satisfied paths. If any rows were returned, we are done. + return query select * from next_front r where r.satisfied; + GET DIAGNOSTICS row_count = ROW_COUNT; + if row_count > 0 then exit; end if; - -- Swap the next_front table into the backward_front - perform swap_backward_front(); + -- Mark backward frontier nodes as visited before swapping. + insert into backward_asp_visited (id) select distinct f.next_id from next_front f on conflict (id) do nothing; + + -- Swap the next_front table into the backward_front. The swap function returns + -- the new frontier size, eliminating a separate COUNT(*) scan. + select swap_backward_front() into backward_front_size; end if; - -- Check to see if the two frontiers meet somewhere in the middle - if exists(select 1 - from forward_front f - join backward_front b on f.next_id = b.next_id) then - -- Zip the path arrays together treating the matches as satisfied + -- Check to see if the two frontiers meet somewhere in the middle. + -- Skip when one frontier has not expanded yet since the join would find nothing. + if forward_front_depth > 0 and backward_front_depth > 0 then + -- Zip the path arrays together treating the matches as satisfied. + -- The RETURN QUERY + GET DIAGNOSTICS pattern avoids a separate EXISTS scan. return query select f.root_id, b.root_id, f.depth + b.depth, @@ -714,7 +804,10 @@ begin f.path || b.path from forward_front f join backward_front b on f.next_id = b.next_id; - exit; + GET DIAGNOSTICS row_count = ROW_COUNT; + if row_count > 0 then + exit; + end if; end if; end loop;