Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "funkyoz/json-stream",
"description": "High-performance PHP library for streaming JSON parsing with constant memory usage.",
"version": "1.0.0",
"version": "1.1.0",
"keywords": ["php", "json", "stream", "json-stream"],
"license": "MIT",
"authors": [
Expand Down
191 changes: 191 additions & 0 deletions src/Internal/JsonPath/PathEvaluator.php
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,73 @@ public function matches(): bool
return $this->matchSegments($segments, 1, 0);
}

/**
* Check if we're at an array operation with remaining segments to extract
*
* For patterns like $.users[*].name, when we're at the wildcard position,
* this returns true because we need to extract .name from each matched element.
*
* @return bool True if we should parse and extract remaining segments
*/
public function shouldExtractFromValue(): bool
{
$segments = $this->expression->getSegments();
$depth = count($this->pathStack);

// Need at least depth segments to match current position
if ($depth >= count($segments)) {
return false;
}

// Check if segments up to current depth match
if (! $this->matchSegmentsPartial($segments, 1, 0, $depth)) {
return false;
}

// Check if there are remaining segments after current depth
$remaining = $this->getRemainingSegments();

return ! empty($remaining);
}

/**
* Match segments up to a specific depth (partial match)
*
* @param PathSegment[] $segments Path segments to match
* @param int $segmentIndex Current segment index
* @param int $stackIndex Current stack index
* @param int $maxDepth Maximum depth to match
*/
private function matchSegmentsPartial(array $segments, int $segmentIndex, int $stackIndex, int $maxDepth): bool
{
// Matched up to max depth
if ($stackIndex >= $maxDepth) {
return true;
}

// No more segments to match but haven't reached maxDepth
if ($segmentIndex >= count($segments)) {
return false;
}

// No more stack to match
if ($stackIndex >= count($this->pathStack)) {
return false;
}

$segment = $segments[$segmentIndex];
$key = $this->pathStack[$stackIndex];
$value = $this->valueStack[$stackIndex];

// Normal segment must match at current position
if (! $segment->matches($key, $value, $stackIndex)) {
return false;
}

// Continue with next segment
return $this->matchSegmentsPartial($segments, $segmentIndex + 1, $stackIndex + 1, $maxDepth);
}

/**
* Check if current path structure matches without evaluating filters
*
Expand Down Expand Up @@ -337,4 +404,128 @@ public function getExpression(): PathExpression
{
return $this->expression;
}

/**
* Get segments that come after the current match point
*
* When streaming an array like $.users[*].name, after matching at
* $.users[*], we need to know that .name remains to be extracted.
*
* Returns only PropertySegment and ArrayIndexSegment instances that need
* to be extracted via walkValue(). Does not include the segment we just
* matched (wildcard/filter/slice) as that's already been processed.
*
* @return PathSegment[] Remaining segments to extract from matched value
*/
public function getRemainingSegments(): array
{
$segments = $this->expression->getSegments();
$depth = count($this->pathStack);

// For $.users[*].name at depth 2 (root=$ + prop=users + index=0):
// - Segments: [RootSegment, PropertySegment(users), WildcardSegment, PropertySegment(name)]
// - Depth is 2 (users=1, index=2)
// - Current segment is at index 2 (WildcardSegment)
// - Remaining starts at index 3 (PropertySegment(name))

// Current segment index is depth (0-based): depth 0 = segment 0, depth 1 = segment 1, etc.
// But we want segments AFTER the one we just matched
$currentSegmentIndex = $depth;

// Return segments after current position that can be walked
// Include: PropertySegment, ArrayIndexSegment
// Exclude: WildcardSegment, FilterSegment, ArraySliceSegment (these need streaming)
$remaining = [];
for ($i = $currentSegmentIndex + 1; $i < count($segments); $i++) {
$segment = $segments[$i];
if ($segment instanceof PropertySegment) {
$remaining[] = $segment;
} elseif ($segment instanceof ArrayIndexSegment) {
$remaining[] = $segment;
} elseif ($segment instanceof WildcardSegment ||
$segment instanceof FilterSegment ||
$segment instanceof ArraySliceSegment) {
// Can't walk into wildcards, filters, or slices - need nested streaming
break;
}
}

return $remaining;
}

/**
* Walk into a parsed value to extract remaining path segments
*
* For patterns like $.users[*].name, after streaming the array,
* this walks into each user object to extract the "name" property.
*
* @param mixed $value The parsed value to walk into
* @param PathSegment[] $segments Segments to extract
* @return mixed The extracted value, or null if not found
*/
public function walkValue(mixed $value, array $segments): mixed
{
// If no segments, return the value as-is
if (empty($segments)) {
return $value;
}

$current = $value;

foreach ($segments as $segment) {
// PropertySegment: extract property from object
if ($segment instanceof PropertySegment) {
if (! is_array($current)) {
return null;
}

$propertyName = $segment->getPropertyName();
if (! array_key_exists($propertyName, $current)) {
return null;
}

$current = $current[$propertyName];

continue;
}

// ArrayIndexSegment: extract element from array
if ($segment instanceof ArrayIndexSegment) {
if (! is_array($current) || ! array_is_list($current)) {
return null;
}

$index = $segment->getIndex();
// Handle negative indices
if ($index < 0) {
$index = count($current) + $index;
}

if (! array_key_exists($index, $current)) {
return null;
}

$current = $current[$index];

continue;
}

// WildcardSegment: yield all elements from array
if ($segment instanceof WildcardSegment) {
if (! is_array($current)) {
return null;
}

// This is a nested wildcard case like $.users[*].posts[*]
// We need to return a generator or array of all elements
// For now, we'll handle this differently in the caller
return $current;
}

// Other segment types not yet supported in walk
return null;
}

return $current;
}
}
53 changes: 30 additions & 23 deletions src/Internal/JsonPath/PathExpression.php
Original file line number Diff line number Diff line change
Expand Up @@ -177,18 +177,20 @@ public function getTerminationIndex(): ?int
/**
* Check if path can use simple streaming optimization
*
* Returns true for simple patterns that can be streamed efficiently:
* Returns true for patterns that can be streamed efficiently:
* - $.array[*] - root array wildcard
* - $.prop[*] - property then array wildcard
* - $.prop.nested[*] - nested property navigation then wildcard
* - $.array[0] or $.array[0:10] - specific index/slice access
* - $.Ads[*] - the main use case!
* - $.users[*].name - wildcard with property extraction (NEW)
* - $.users[*].profile.email - wildcard with deep property extraction (NEW)
* - $.users[?(@.age > 18)] - filter expressions (NEW)
* - $.users[?(@.age > 18)].name - filter with property extraction (NEW)
*
* Returns false for complex patterns that need full tree walking:
* - $..prop - recursive descent
* - $.array[*].prop - wildcard followed by property access (needs walkValue)
* - $.array[*].prop[*] - multiple wildcards
* - Complex filter expressions
* - $.users[*].posts[*] - multiple wildcards (requires nested streaming)
*
* @return bool True if simple streaming can be used
*/
Expand All @@ -205,40 +207,45 @@ public function canUseSimpleStreaming(): bool
}

$wildcardCount = 0;
$hasArrayOpFollowedByProperty = false;
$filterCount = 0;
$hasMultipleArrayOps = false;

// Skip root segment (index 0)
for ($i = 1; $i < count($this->segments); $i++) {
$segment = $this->segments[$i];
$nextSegment = $this->segments[$i + 1] ?? null;

// Count wildcards
// Count wildcards and filters
if ($segment instanceof WildcardSegment) {
$wildcardCount++;

// Check if wildcard is followed by property access
if ($nextSegment !== null && $nextSegment instanceof PropertySegment) {
$hasArrayOpFollowedByProperty = true;
}
} elseif ($segment instanceof ArrayIndexSegment || $segment instanceof ArraySliceSegment) {
// Check if array operation is followed by property access
if ($nextSegment !== null && $nextSegment instanceof PropertySegment) {
$hasArrayOpFollowedByProperty = true;
}
} elseif ($segment instanceof FilterSegment) {
// Filters are complex, not simple streaming
return false;
$filterCount++;
} elseif ($segment instanceof ArrayIndexSegment || $segment instanceof ArraySliceSegment) {
// Array index/slice operations are fine
} elseif ($segment instanceof PropertySegment) {
// Property segments are fine - they can come after wildcards
}
}

// Don't stream if:
// - Multiple wildcards
// - Array operation followed by property (like [*].name)
if ($wildcardCount > 1 || $hasArrayOpFollowedByProperty) {
// - Multiple wildcards (nested wildcard streaming not yet implemented)
// - Multiple filters
// - Wildcard + filter combination
if ($wildcardCount > 1) {
return false;
}

if ($filterCount > 1) {
return false;
}

if ($wildcardCount > 0 && $filterCount > 0) {
return false;
}

// Simple patterns: $.array[*], $.prop.array[0], etc. (ending with array op)
// We can stream:
// - Single wildcard with any number of properties after it: $.users[*].name
// - Single filter with any number of properties after it: $.users[?(@.age > 18)].email
// - Simple array access: $.users[0], $.users[0:10]
return true;
}
}
8 changes: 8 additions & 0 deletions src/Internal/JsonPath/PropertySegment.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,12 @@ public function getProperty(): string
{
return $this->property;
}

/**
* Get the property name (alias for getProperty())
*/
public function getPropertyName(): string
{
return $this->property;
}
}
32 changes: 27 additions & 5 deletions src/Internal/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ private function streamFromArray(): \Generator
return;
}

$index = 0;
$index = 0; // Array index for path evaluation
$resultIndex = 0; // Sequential index for yielded results

while (true) {
// Enter array index
Expand All @@ -219,18 +220,39 @@ private function streamFromArray(): \Generator
$this->pathEvaluator->enterLevel($index, $value);

if ($this->pathEvaluator->matches()) {
yield $value;
// Check if there are remaining segments to extract
$remainingSegments = $this->pathEvaluator->getRemainingSegments();
if (! empty($remainingSegments)) {
// Walk into value to extract remaining path
$extracted = $this->pathEvaluator->walkValue($value, $remainingSegments);
if ($extracted !== null) {
yield $resultIndex++ => $extracted;
}
} else {
yield $resultIndex++ => $value;
}
}

$this->pathEvaluator->exitLevel();
} else {
// No filter - check if current position matches structurally
$matchesCurrent = $this->pathEvaluator->matches();
$shouldExtract = $this->pathEvaluator->shouldExtractFromValue();

if ($matchesCurrent) {
// This element matches - parse and yield it
if ($shouldExtract) {
// We're at an array element that partially matches, and there are
// remaining segments to extract (e.g., $.users[*].name)
$value = $this->parseValue();
yield $value;
$remainingSegments = $this->pathEvaluator->getRemainingSegments();
$extracted = $this->pathEvaluator->walkValue($value, $remainingSegments);
if ($extracted !== null) {
yield $resultIndex++ => $extracted;
}
$this->pathEvaluator->exitLevel();
} elseif ($matchesCurrent) {
// This element fully matches - parse and yield it
$value = $this->parseValue();
yield $resultIndex++ => $value;
$this->pathEvaluator->exitLevel();
} else {
// Check if we need to go deeper
Expand Down
Loading