Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 87 additions & 10 deletions pineforge_codegen/codegen/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,79 @@ def __init__(self, ctx: AnalyzerContext) -> None:
self._register_global_aggregate_member_types()
self._uses_matrix = self._detect_matrix_usage()

# max_bars_back: the per-variable history depth the engine's Series<T>
# ring buffer should retain. Pine exposes this two ways — the
# ``strategy(..., max_bars_back=N)`` kwarg (global) and the
# ``max_bars_back(var, N)`` function (per-var). The engine's
# ``Series<T>(int max_len)`` ctor (default 500, include/pineforge/
# series.hpp) is the wiring point: reads past the retained depth return
# na, so honoring the directive means constructing each Series with a
# capacity >= the requested depth. We take the MAX requested N and apply
# it (via ``_series_decl_suffix`` -> ``{N}``) to the directly-declared
# ``Series<T>`` members — a safe superset of Pine's per-var semantics
# (it never retains LESS than Pine, so any history access that succeeds
# in Pine succeeds here). ``None`` => no directive => keep the engine
# default 500 (emit a bare ``Series<T>`` with no ctor arg, so
# directive-free output is byte-identical to before).
#
# KNOWN LIMITATION: the lazily-constructed security-helper map series
# (``_security_helper_series_``, the ``std::unordered_map<std::string,
# Series<double>>`` ~line 971) do NOT pick up the cap. Their entries are
# default-constructed on first ``operator[]`` access, so they always use
# the engine default 500 regardless of the requested ``N``. A
# max_bars_back directive larger than 500 is therefore not honored for
# history reads off security-helper series.
self._max_bars_back_cap: int | None = self._compute_max_bars_back_cap()

@staticmethod
def _int_literal_value(node: ASTNode | None) -> int | None:
"""Return the integer value of a (possibly unary-minus) NumberLiteral,
or None if ``node`` is not an integer literal expression."""
if isinstance(node, UnaryOp) and node.op == "-":
inner = CodeGen._int_literal_value(node.operand)
return -inner if inner is not None else None
if isinstance(node, NumberLiteral) and isinstance(node.value, int):
return node.value
if isinstance(node, NumberLiteral) and isinstance(node.value, float):
# Pine accepts ``max_bars_back=5e2`` style; accept integral floats.
return int(node.value) if node.value.is_integer() else None
return None

def _compute_max_bars_back_cap(self) -> int | None:
"""Scan the AST for max_bars_back directives (strategy() kwarg AND the
bare function call) and return the largest positive integer requested,
or None if none is present / none is a usable literal."""
ast = getattr(self.ctx, "ast", None)
if ast is None:
return None
caps: list[int] = []
for node in self._walk_ast(ast):
if isinstance(node, StrategyDecl):
val = self._int_literal_value(node.kwargs.get("max_bars_back"))
if val is not None and val > 0:
caps.append(val)
elif (
isinstance(node, FuncCall)
and isinstance(node.callee, Identifier)
and node.callee.name == "max_bars_back"
):
# max_bars_back(var, num) — second positional arg, or the
# ``num=`` kwarg, is the depth.
num_node = None
if len(node.args) >= 2:
num_node = node.args[1]
elif "num" in node.kwargs:
num_node = node.kwargs["num"]
val = self._int_literal_value(num_node)
if val is not None and val > 0:
caps.append(val)
return max(caps) if caps else None

def _series_decl_suffix(self) -> str:
"""C++ constructor-arg suffix for Series<T> member declarations. Empty
(engine default 500) unless a max_bars_back directive raised the cap."""
return f"{{{self._max_bars_back_cap}}}" if self._max_bars_back_cap else ""

def _register_global_aggregate_member_types(self) -> None:
"""Infer matrix/array/map class members for global non-var declarations from RHS AST.

Expand Down Expand Up @@ -802,6 +875,10 @@ def generate(self) -> str:

lines: list[str] = []

# Series<T> ctor-arg suffix from any max_bars_back directive (empty when
# absent, so directive-free output is byte-identical to before).
_mbb = self._series_decl_suffix()

# 1. Includes
self._emit_includes(lines)

Expand Down Expand Up @@ -875,7 +952,7 @@ def generate(self) -> str:
self._security_ohlc_hist_fields_by_sec.get(sec_id, ())
):
lines.append(
f" Series<double> {self._security_ohlc_hist_series_cpp(sec_id, field)};"
f" Series<double> {self._security_ohlc_hist_series_cpp(sec_id, field)}{_mbb};"
)
continue
if returns_tuple and tuple_size and tuple_size > 0 and isinstance(expr_node, TupleLiteral):
Expand All @@ -896,7 +973,7 @@ def generate(self) -> str:
lines.append(f" double _req_sec_{sec_id} = na<double>();")
for field in sorted(self._security_ohlc_hist_fields_by_sec.get(sec_id, ())):
lines.append(
f" Series<double> {self._security_ohlc_hist_series_cpp(sec_id, field)};"
f" Series<double> {self._security_ohlc_hist_series_cpp(sec_id, field)}{_mbb};"
)

if self._security_calls:
Expand All @@ -911,7 +988,7 @@ def generate(self) -> str:
state_name = self._security_state_name(info["sec_id"], name)
cpp_type = self._security_cpp_type_for_mutable(name, ginfo)
if getattr(ginfo, "is_series", False):
lines.append(f" Series<{cpp_type}> {state_name};")
lines.append(f" Series<{cpp_type}> {state_name}{_mbb};")
else:
default = self._default_for_type(cpp_type)
lines.append(f" {cpp_type} {state_name} = {default};")
Expand All @@ -938,7 +1015,7 @@ def generate(self) -> str:

# 4. Series members for bar field history
for field_name in sorted(self.ctx.series_bar_fields):
lines.append(f" Series<double> _s_{field_name};")
lines.append(f" Series<double> _s_{field_name}{_mbb};")

# 5. var/varip members (deduplicate by name)
seen_var_members: set[str] = set()
Expand Down Expand Up @@ -987,7 +1064,7 @@ def generate(self) -> str:
if cpp_type == "int" and self._is_int64_builtin_init(name):
cpp_type = "int64_t"
if name in self.ctx.series_vars:
lines.append(f" Series<{cpp_type}> {safe};")
lines.append(f" Series<{cpp_type}> {safe}{_mbb};")
else:
lines.append(f" {cpp_type} {safe};")

Expand All @@ -996,7 +1073,7 @@ def generate(self) -> str:
if name not in self._var_names:
safe = self._safe_name(name)
cpp_type = self._series_type_for(name)
lines.append(f" Series<{cpp_type}> {safe};")
lines.append(f" Series<{cpp_type}> {safe}{_mbb};")

# 7. Fixnan members
for site in self.ctx.fixnan_sites:
Expand All @@ -1009,9 +1086,9 @@ def generate(self) -> str:
# Determine type: int for count vars, double for float vars
if member in ("closedtrades", "opentrades", "wintrades", "losstrades",
"eventrades"):
lines.append(f" Series<int> {svar};")
lines.append(f" Series<int> {svar}{_mbb};")
else:
lines.append(f" Series<double> {svar};")
lines.append(f" Series<double> {svar}{_mbb};")

# 8b. Global-scope non-var declarations as class members
# (so user-defined functions can reference them)
Expand Down Expand Up @@ -1063,7 +1140,7 @@ def generate(self) -> str:
if self._safe_name(vname) == orig_safe:
cpp_type = PINE_TYPE_TO_CPP.get(ptype, "double")
if vname in self.ctx.series_vars:
lines.append(f" Series<{cpp_type}> {cloned_safe};")
lines.append(f" Series<{cpp_type}> {cloned_safe}{_mbb};")
elif vname in self._matrix_specs:
lines.append(f" {self._type_spec_to_cpp(self._matrix_specs[vname])} {cloned_safe};")
elif vname in self._array_vars:
Expand All @@ -1078,7 +1155,7 @@ def generate(self) -> str:
# Non-var series var
if orig_safe in [self._safe_name(n) for n in self.ctx.series_vars]:
cpp_type = self._series_type_for(orig_safe)
lines.append(f" Series<{cpp_type}> {cloned_safe};")
lines.append(f" Series<{cpp_type}> {cloned_safe}{_mbb};")
else:
lines.append(f" double {cloned_safe} = 0.0;")

Expand Down
5 changes: 5 additions & 0 deletions pineforge_codegen/codegen/visit_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,11 @@ def _visit_func_call(self, node: FuncCall) -> str:
return "0"
if func_name in SKIP_FUNC_NAMES and namespace is None:
return "0"
# max_bars_back(var, num): a history-depth DIRECTIVE, not a value.
# Its effect is captured in CodeGen._compute_max_bars_back_cap (which
# sizes every Series<T> ring buffer), so the call itself emits nothing.
if func_name == "max_bars_back" and namespace is None:
return "0"

# request.* calls
if namespace == "request":
Expand Down
81 changes: 62 additions & 19 deletions pineforge_codegen/support_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,14 @@
* HARD_REJECT_FUNC / HARD_REJECT_NAMESPACE - calls that have no PineForge
semantics at all (e.g. ``request.financial``, ``ticker.*``).
* DIVERGENT_VARS - built-in variables whose PineForge value diverges from
TradingView (e.g. ``bar_index`` depends on data window, ``last_bar_index``
is wrongly aliased in codegen). Reported as WARNING — these often appear
in visual or logging code that does not affect trade outcomes.
TradingView. Most are reported as WARNING (e.g. ``bar_index`` depends on the
data window, ``timenow`` is not wall-clock) — these often appear in visual or
logging code that does not affect trade outcomes. A subset
(DIVERGENT_VARS_ERROR: ``last_bar_index`` aliased to the *current* bar index,
``time_close`` aliased to the bar *open* timestamp) are silent MIS-ALIASES:
they produce a plausible-looking but wrong value that flows straight into
trade logic, so a backtest would be silently wrong. Those are escalated to
ERROR (rejected) rather than merely warned.
* NOT_YET - calls the runtime could support but the transpiler does not yet
emit (e.g. ``max_bars_back``, bare ``barssince``).
* request.security - only ``symbol`` / ``timeframe`` / ``expression`` allowed,
Expand Down Expand Up @@ -131,16 +136,25 @@
}

# Built-in variables whose PineForge value diverges from TradingView semantics.
# Demoted to WARNING — many real strategies use bar_index / time_close in
# logging or visual logic that does not affect trade outcomes. The checker
# still flags divergence so users see the risk.
# Most are reported as WARNING — many real strategies use bar_index / timenow in
# logging or visual logic that does not affect trade outcomes. The checker still
# flags divergence so users see the risk.
#
# DIVERGENT_VARS_ERROR is a SUBSET that is escalated to ERROR (rejected): these
# are silent MIS-ALIASES, not merely data-window divergences. They return a
# plausible value that is the WRONG quantity (last_bar_index -> current bar
# index; time_close -> bar OPEN timestamp) and that value flows directly into
# trade logic, so the backtest would be silently wrong. A WARNING is not enough.
DIVERGENT_VARS: dict[str, str] = {
"bar_index": "bar_index depends on the data window; PineForge and TradingView produce different values for the same script.",
"last_bar_index": "last_bar_index is incorrectly aliased to the current bar index in PineForge codegen.",
"last_bar_index": "last_bar_index is aliased to the CURRENT bar index in PineForge codegen (not the index of the last bar); backtest would be silently wrong — rejected.",
"timenow": "timenow is aliased to the current bar timestamp in PineForge; it is not real wall-clock time.",
"time_close": "time_close is aliased to the bar open timestamp in PineForge; it does not represent the bar close time.",
"time_close": "time_close is aliased to the bar OPEN timestamp in PineForge; it does not represent the bar close time; backtest would be silently wrong — rejected.",
}

# Subset of DIVERGENT_VARS escalated from WARNING to ERROR (see comment above).
DIVERGENT_VARS_ERROR: frozenset[str] = frozenset({"last_bar_index", "time_close"})

BARSTATE_APPROX_VARS: dict[str, str] = {
"barstate.islast": "barstate.islast is always false in PineForge batch backtests.",
"barstate.ishistory": "barstate.ishistory is always true in PineForge batch backtests.",
Expand Down Expand Up @@ -191,8 +205,12 @@
})

# Implementable but currently silent in codegen -> reject loudly.
#
# max_bars_back was here ("silently dropped") but is now WIRED: codegen sizes
# every Series<T> ring buffer to the requested depth via the engine's
# ``Series<T>(int max_len)`` ctor (include/pineforge/series.hpp). It is no
# longer rejected — see CodeGen._compute_max_bars_back_cap.
NOT_YET_FUNC: dict[str, str] = {
"max_bars_back": "max_bars_back is silently dropped by the codegen.",
"timeframe.from_seconds": "timeframe.from_seconds is not yet implemented; codegen would emit 'false' and silently produce wrong TF strings.",
}

Expand Down Expand Up @@ -386,6 +404,12 @@ def __init__(self, ast: Program, filename: str = "<input>") -> None:
# request.security (barmerge.* gaps/lookahead values). While > 0 the
# UNSUPPORTED_CONST_NAMESPACES rejection is suppressed.
self._const_arg_ctx_depth: int = 0
# id()s of Identifier/MemberAccess nodes that are the *callee* of a
# FuncCall. A divergent built-in NAME used as a call target (e.g. the
# session-aware ``time_close("D")`` function, which is distinct from the
# bare ``time_close`` variable) must NOT be flagged as a divergent
# variable read. Populated as _visit_FuncCall descends into children.
self._callee_node_ids: set[int] = set()

# -- Public API --

Expand Down Expand Up @@ -629,6 +653,12 @@ def _visit_TupleAssign(self, node: TupleAssign) -> None:
def _visit_FuncCall(self, node: FuncCall) -> None:
ns, name = _qualified_name(node.callee)

# Mark the callee so the generic child-walk does not treat a divergent
# built-in *function* name (e.g. ``time_close("D")``) as a divergent
# *variable* read. The call's own semantics are validated here.
if node.callee is not None:
self._callee_node_ids.add(id(node.callee))

if ns is None and name is None:
self._visit_children(node)
return
Expand Down Expand Up @@ -918,8 +948,9 @@ def _visit_Identifier(self, node: Identifier) -> None:
"code into the strategy script).",
)
return
if node.name in DIVERGENT_VARS:
self._warn(
if node.name in DIVERGENT_VARS and id(node) not in self._callee_node_ids:
emit = self._err if node.name in DIVERGENT_VARS_ERROR else self._warn
emit(
node,
f"{node.name} diverges from TradingView semantics in PineForge.",
hint=DIVERGENT_VARS[node.name],
Expand All @@ -945,8 +976,13 @@ def _visit_Ternary(self, node: Ternary) -> None:

def _visit_MemberAccess(self, node: MemberAccess) -> None:
chain = _resolve_member_chain(node)
if chain is not None and chain in DIVERGENT_VARS:
self._warn(
if (
chain is not None
and chain in DIVERGENT_VARS
and id(node) not in self._callee_node_ids
):
emit = self._err if chain in DIVERGENT_VARS_ERROR else self._warn
emit(
node,
f"{chain} diverges from TradingView semantics in PineForge.",
hint=DIVERGENT_VARS[chain],
Expand Down Expand Up @@ -1004,14 +1040,21 @@ def _visit_MemberAccess(self, node: MemberAccess) -> None:
if isinstance(node.object, Identifier) and node.object.name == "syminfo":
if node.member not in SUPPORTED_SYMINFO:
self._err(node, f"syminfo.{node.member} is not implemented in PineForge runtime.")
elif (
self._in_conditional_depth > 0
and node.member in self._SYMINFO_SILENT_GAP_FIELDS
):
elif node.member in self._SYMINFO_SILENT_GAP_FIELDS:
# These fields silently return na in current PineForge. Warn on
# EVERY read — not just inside an if/ternary condition — because
# a field used directly in a plain expression (e.g. ``x =
# syminfo.pricescale * 2``) slips out as na with no signal too.
# The conditional phrasing is kept where it applies.
extra = (
" condition will always be false."
if self._in_conditional_depth > 0
else " any expression using it will be na."
)
self._warn(
node,
f"syminfo.{node.member} returns na in current PineForge; "
"condition will always be false. "
f"syminfo.{node.member} returns na in current PineForge;"
f"{extra} "
"Will be backfilled by pineforge-data product.",
)
self._visit_children(node)
Expand Down
21 changes: 21 additions & 0 deletions scripts/worker-template.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ const GLUE = `__GLUE__`;
const post = (m) => self.postMessage(m);
let transpileJson = null;

// Hex-encode the SHA-256 of an ArrayBuffer using the worker's WebCrypto.
async function sha256Hex(buf) {
const digest = await crypto.subtle.digest("SHA-256", buf);
return Array.from(new Uint8Array(digest))
.map((b) => b.toString(16).padStart(2, "0"))
.join("");
}

async function init() {
try {
const pyodide = await loadPyodide({ indexURL: "/pyodide/" });
Expand All @@ -21,6 +29,19 @@ async function init() {
const archiveRes = await fetch(`/pyodide/${manifest.archive}`);
if (!archiveRes.ok) throw new Error(`fetch /pyodide/${manifest.archive}: ${archiveRes.status}`);
const buf = await archiveRes.arrayBuffer();
// Defensive integrity check: verify the archive bytes against the manifest's
// sha256 BEFORE unpacking/running. Verify-if-present — older manifests that
// predate the sha256 field are accepted unchanged (forward/backward compat).
if (manifest.sha256) {
const actual = await sha256Hex(buf);
if (actual !== manifest.sha256) {
post({
type: "init-error",
error: `codegen archive sha256 mismatch — expected ${manifest.sha256} got ${actual}`,
});
return;
}
}
pyodide.unpackArchive(buf, "gztar", { extractDir: "/codegen" });
pyodide.runPython(GLUE);
const fn = pyodide.globals.get("transpile_json");
Expand Down
7 changes: 7 additions & 0 deletions tests/gate-corpus/err/divergent_last_bar_index.pine
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//@version=6
strategy("T")
// last_bar_index is aliased to the CURRENT bar index in PineForge codegen, so a
// backtest reading it would be silently wrong -> hard reject (ERROR).
isLast = bar_index == last_bar_index
if isLast
strategy.close_all()
Loading
Loading