Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions pydough/exploration/_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""
Shared helpers for explain and explain_llm. Extracts term lists, conditions,
source collection, and safe qualification so both modules stay consistent.
"""

__all__ = [
"extract_conditions",
"extract_terms",
"find_source_collection",
"qualify_safely",
]

import pydough.pydough_operators as pydop
from pydough.configs import PyDoughSession
from pydough.qdag import (
ExpressionFunctionCall,
PyDoughCollectionQDAG,
PyDoughExpressionQDAG,
PyDoughQDAG,
TableCollection,
)
from pydough.unqualified import UnqualifiedNode, qualify_node


def extract_terms(
node: PyDoughCollectionQDAG,
) -> tuple[list[str], list[str]]:
"""
Returns (expression_names, collection_names) for the node's terms, both
sorted.

Iterates node.all_terms; for each name, if get_term(name) is an
expression, add to expression list, else to collection list.
"""
expr_names: list[str] = []
collection_names: list[str] = []
for name in node.all_terms:
term: PyDoughQDAG = node.get_term(name)
if isinstance(term, PyDoughExpressionQDAG):
expr_names.append(name)
else:
collection_names.append(name)
expr_names.sort()
collection_names.sort()
return (expr_names, collection_names)


def extract_conditions(
condition: PyDoughExpressionQDAG,
) -> list[PyDoughExpressionQDAG]:
"""
Returns a list of one or more expression nodes. If condition is an
ExpressionFunctionCall with operator BAN (AND), returns condition.args;
otherwise returns [condition].
"""
if (
isinstance(condition, ExpressionFunctionCall)
and condition.operator == pydop.BAN
):
return [arg for arg in condition.args if isinstance(arg, PyDoughExpressionQDAG)]
return [condition]


# Reserved for use in explain_llm.py (not yet implemented).
def find_source_collection(node: PyDoughCollectionQDAG) -> str | None:
"""
Returns the name of the first TableCollection found when walking up from
node via preceding_context. Returns None if none (e.g. global calc or
user collection).
"""
current: PyDoughCollectionQDAG | None = node
while current is not None:
if isinstance(current, TableCollection):
return current.collection.name
current = current.preceding_context
return None


def qualify_safely(
node: UnqualifiedNode, session: PyDoughSession
) -> tuple[PyDoughQDAG | None, Exception | None]:
"""
Qualifies the node with the session. Returns (qualified_node, None) on
success, or (None, error) on failure. Non-Exception base exceptions
(KeyboardInterrupt, SystemExit) propagate normally. Callers interpret and
format messages for caught errors.
"""
try:
result = qualify_node(node, session)
return (result, None)
except Exception as e:
return (None, e)
147 changes: 93 additions & 54 deletions pydough/exploration/explain.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
__all__ = ["explain"]

import pydough
import pydough.pydough_operators as pydop
from pydough.configs import PyDoughSession
from pydough.errors import PyDoughQDAGException
from pydough.metadata.abstract_metadata import AbstractMetadata
Expand All @@ -26,27 +25,33 @@
BackReferenceExpression,
Calculate,
ChildOperator,
ExpressionFunctionCall,
GlobalContext,
OrderBy,
PartitionBy,
PartitionChild,
PyDoughCollectionQDAG,
PyDoughExpressionQDAG,
PyDoughQDAG,
Reference,
Singular,
SubCollection,
TableCollection,
TopK,
Where,
)
from pydough.qdag.collections.user_collection_qdag import (
PyDoughUserGeneratedCollectionQDag,
)
from pydough.unqualified import (
UnqualifiedCross,
UnqualifiedNode,
UnqualifiedRoot,
display_raw,
qualify_node,
)

from ._common import (
extract_conditions,
extract_terms,
qualify_safely,
)
from .term import find_unqualified_root


Expand Down Expand Up @@ -273,33 +278,30 @@ def explain_unqualified(
An explanation of `node`.
"""
lines: list[str] = []
qualified_node: PyDoughQDAG | None = None
session = pydough.active_session if session is None else session
# Attempt to qualify the node, dumping an appropriate message if it could
# not be qualified
try:
root: UnqualifiedRoot | None = find_unqualified_root(node)
if root is not None:
qualified_node = qualify_node(node, session)
else:
# If the root is None, it means that the node was an expression
# without information about its context.
qualified_node, error = qualify_safely(node, session)
if error is not None:
# No root + failure: generic message.
root = find_unqualified_root(node)
if root is None:
lines.append(
f"Cannot call pydough.explain on {display_raw(node)}.\n"
"Did you mean to use pydough.explain_term?"
)
except PyDoughQDAGException as e:
# If the qualification failed, dump an appropriate message indicating
# why pydough_explain did not work on it.
if "Unrecognized term" in str(e):
# Root + Unrecognized term, display error message with additional
# context about what might have gone wrong and how to fix it.
elif isinstance(error, PyDoughQDAGException) and "Unrecognized term" in str(
error
):
lines.append(
f"{str(e)}\n"
f"{str(error)}\n"
"This could mean you accessed a property using a name that does not exist, or\n"
"that you need to place your PyDough code into a context for it to make sense.\n"
"Did you mean to use pydough.explain_term?"
)
else:
raise e
raise error
return "\n".join(lines)

# If the qualification succeeded, dump info about the qualified node.
if isinstance(qualified_node, PyDoughExpressionQDAG):
Expand All @@ -313,11 +315,8 @@ def explain_unqualified(
if verbose:
# Dump the structure of the collection
lines.append("PyDough collection representing the following logic:")
if verbose:
for line in qualified_node.to_tree_string().splitlines():
lines.append(f" {line}")
else:
lines.append(f" {qualified_node.to_string()}")
for line in qualified_node.to_tree_string().splitlines():
lines.append(f" {line}")
lines.append("")

# Explain what the specific node does
Expand All @@ -332,21 +331,77 @@ def explain_unqualified(
"This node is a reference to the global context for the entire graph. An operation must be done onto this node (e.g. a CALCULATE or accessing a collection) before it can be executed."
)
case TableCollection():
collection_name = qualified_node.collection.name
lines.append(
f"This node, specifically, accesses the collection {collection_name}.\n"
f"Call pydough.explain(graph['{collection_name}']) to learn more about this collection."
)
# UnqualifiedCross qualifies to TableCollection (identity is
# lost after qualification), so detect it via the original
# unqualified node.
if isinstance(node, UnqualifiedCross):
left_desc = display_raw(node._parcel[0])
right_desc = display_raw(node._parcel[1])
lines.append(
"This node is a CROSS join: every row of the left "
"collection is paired with every row of the right "
"collection."
)
lines.append(f"Left: {left_desc}")
lines.append(f"Right: {right_desc}")
else:
collection_name = qualified_node.collection.name
lines.append(
f"This node, specifically, accesses the collection {collection_name}.\n"
f"Call pydough.explain(graph['{collection_name}']) to learn more about this collection."
)
case SubCollection():
collection_name = qualified_node.subcollection_property.collection.name
property_name = qualified_node.subcollection_property.name
lines.append(
f"This node, specifically, accesses the subcollection {collection_name}.{property_name}. Call pydough.explain(graph['{collection_name}']['{property_name}']) to learn more about this subcollection property."
)
prop = qualified_node.subcollection_property
if isinstance(prop, CartesianProductMetadata):
child_name = prop.child_collection.name
# SubCollection.preceding_context is always None; the
# parent context is identified by collection_name.
left_desc = collection_name
lines.append(
"This node is a CROSS join: every row of the left "
"collection is paired with every row of the right "
"collection."
)
lines.append(f"Left (parent): {left_desc}")
lines.append(f"Right (child): {child_name}")
lines.append(
f"Metadata: {collection_name}.{property_name} -> {child_name}. "
f"Call pydough.explain(graph['{collection_name}']['{property_name}']) "
"to learn more."
)
else:
lines.append(
f"This node, specifically, accesses the subcollection "
f"{collection_name}.{property_name}. Call "
f"pydough.explain(graph['{collection_name}']"
f"['{property_name}']) to learn more about this "
"subcollection property."
)
case PartitionChild():
lines.append(
f"This node, specifically, accesses the unpartitioned data of a partitioning (child name: {qualified_node.partition_child_name})."
)
case Singular():
lines.append(
"This node applies the SINGULAR operator, asserting that the "
"preceding collection is singular (1-to-1) with respect to the "
"parent context."
)
lines.append(
f"Collection made singular: {qualified_node.preceding_context.to_string()}"
)
case PyDoughUserGeneratedCollectionQDag():
collection_name = qualified_node.name
columns = sorted(qualified_node.calc_terms)
lines.append(
f"This node accesses user-generated collection '{collection_name}'.\n"
f"Columns: {', '.join(columns)}"
)
lines.append(
f"Unique columns: {', '.join(sorted(qualified_node.unique_terms))}"
)
case ChildOperator():
if len(qualified_node.children):
lines.append(
Expand Down Expand Up @@ -389,16 +444,9 @@ def explain_unqualified(
lines.append(
"The main task of this node is to filter on the following conditions:"
)
conditions: list[PyDoughExpressionQDAG] = []
if (
isinstance(qualified_node.condition, ExpressionFunctionCall)
and qualified_node.condition.operator == pydop.BAN
):
for arg in qualified_node.condition.args:
assert isinstance(arg, PyDoughExpressionQDAG)
conditions.append(arg)
else:
conditions.append(qualified_node.condition)
conditions: list[PyDoughExpressionQDAG] = extract_conditions(
qualified_node.condition
)
for condition in conditions:
tree_string = condition.to_string(True)
regular_string = condition.to_string(False)
Expand Down Expand Up @@ -458,16 +506,7 @@ def explain_unqualified(
)

# Dump the collection & expression terms of the collection
expr_names: list[str] = []
collection_names: list[str] = []
for name in qualified_node.all_terms:
term: PyDoughQDAG = qualified_node.get_term(name)
if isinstance(term, PyDoughExpressionQDAG):
expr_names.append(name)
else:
collection_names.append(name)
expr_names.sort()
collection_names.sort()
expr_names, collection_names = extract_terms(qualified_node)

if len(expr_names) > 0:
lines.append(
Expand Down
4 changes: 4 additions & 0 deletions pydough/exploration/term.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@
from pydough.unqualified import (
UnqualifiedAccess,
UnqualifiedCalculate,
UnqualifiedCross,
UnqualifiedNode,
UnqualifiedOrderBy,
UnqualifiedPartition,
UnqualifiedRoot,
UnqualifiedSingular,
UnqualifiedTopK,
UnqualifiedWhere,
display_raw,
Expand Down Expand Up @@ -57,6 +59,8 @@ def find_unqualified_root(node: UnqualifiedNode) -> UnqualifiedRoot | None:
| UnqualifiedOrderBy()
| UnqualifiedTopK()
| UnqualifiedPartition()
| UnqualifiedCross()
| UnqualifiedSingular()
):
predecessor: UnqualifiedNode = node._parcel[0]
return find_unqualified_root(predecessor)
Expand Down
4 changes: 4 additions & 0 deletions pydough/unqualified/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
"UnqualifiedAccess",
"UnqualifiedBinaryOperation",
"UnqualifiedCalculate",
"UnqualifiedCross",
"UnqualifiedLiteral",
"UnqualifiedNode",
"UnqualifiedOperation",
"UnqualifiedOperator",
"UnqualifiedOrderBy",
"UnqualifiedPartition",
"UnqualifiedRoot",
"UnqualifiedSingular",
"UnqualifiedTopK",
"UnqualifiedWhere",
"UnqualifiedWindow",
Expand All @@ -32,12 +34,14 @@
UnqualifiedAccess,
UnqualifiedBinaryOperation,
UnqualifiedCalculate,
UnqualifiedCross,
UnqualifiedLiteral,
UnqualifiedNode,
UnqualifiedOperation,
UnqualifiedOrderBy,
UnqualifiedPartition,
UnqualifiedRoot,
UnqualifiedSingular,
UnqualifiedTopK,
UnqualifiedWhere,
UnqualifiedWindow,
Expand Down
Loading
Loading