Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
a961288
Add tests reproducing DNR translation and compilation issues with rea…
chrmod Apr 10, 2026
cba3ff2
Add validate-dnr-rules tool for checking DNR rulesets against WebKit'…
chrmod Apr 10, 2026
66f54b7
Fix spurious "duplicate rule id 0" errors for invalid DNR rules
chrmod Apr 10, 2026
e1a26a4
Fix validate-dnr-rules to use locally-built WebKit at runtime
chrmod Apr 10, 2026
ef095de
Fix misleading error limit message in DNR rule translator
chrmod Apr 10, 2026
0170c00
Support bounded quantifiers {n}, {n,m}, {n,} in content extension URL…
chrmod Apr 10, 2026
b79db5d
Support \d, \w, \s character class shorthands in content extension UR…
chrmod Apr 10, 2026
cce6230
Support alternation (|) inside groups in content extension URL filters
chrmod Apr 10, 2026
b95de5b
Map DNR "object" resource type to WebKit's "other" content blocker type
chrmod Apr 10, 2026
8fd1255
Support word boundaries (\b, \B) in content extension URL filters
chrmod Apr 10, 2026
49a15c8
Support \b word boundaries, character class shorthands in [...], and …
chrmod Apr 10, 2026
fc60bb0
Support {0} quantifier and top-level alternation in URL filters
chrmod Apr 10, 2026
18b0883
Add standalone validate-dnr-rules tool and distribution plans
chrmod Apr 10, 2026
a3861ec
Remove planning docs — plans are now implemented
chrmod Apr 10, 2026
a172390
Add CMake build and GitHub Actions CI for cross-platform validate-dnr…
chrmod Apr 10, 2026
58393ff
Fix CMake module path and split CI configure per platform
chrmod Apr 10, 2026
d3be5fd
Build validate-dnr-rules via WebKit's root CMake with shallow checkout
chrmod Apr 10, 2026
ebd31f5
Install full GTK build deps for Linux CI
chrmod Apr 10, 2026
beb1f3c
Use WebKit's own install-dependencies script for Linux CI
chrmod Apr 10, 2026
6d2adcc
Fix Linux deps and add CMake build cache
chrmod Apr 10, 2026
1333d4e
Remove unavailable WPE packages from Linux CI
chrmod Apr 10, 2026
50de9b0
Add missing ATSPI and gi-docgen deps for GTK configure
chrmod Apr 10, 2026
7f8a888
Disable optional GTK features to minimize CI dependencies
chrmod Apr 10, 2026
50276e8
Use JSCOnly port for CI — only needs ICU, no GTK/platform deps
chrmod Apr 10, 2026
bf168d4
Fix WTF include paths for CMake build
chrmod Apr 10, 2026
6950e27
Fix JavaScriptCore include bridge to point to yarr/ subdirectory
chrmod Apr 10, 2026
f226fad
Linux build working: JSCOnly port + system malloc + Gigacage stub
chrmod Apr 11, 2026
7161c47
Add macOS arm64 build to CI, fix bmalloc stubs for cross-platform
chrmod Apr 11, 2026
78c3345
Use macos-15 runner for newer Clang with asm constraint support
chrmod Apr 11, 2026
ca61383
Create GitHub Release with binaries on merge to ghostery branch
chrmod Apr 11, 2026
8df9c11
Also trigger on ghostery/* branches for testing
chrmod Apr 11, 2026
151dbe4
Use 'validator' as the release branch name
chrmod Apr 11, 2026
473b77c
Ad-hoc codesign macOS binary for Apple Silicon Gatekeeper
chrmod Apr 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions .github/workflows/validate-dnr-rules.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
name: Build validate-dnr-rules

on:
push:
branches: [validator, ghostery/*]
workflow_dispatch:

jobs:
build:
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-24.04
name: linux-x64
deps: cmake ninja-build pkg-config ruby unifdef libicu-dev g++ perl python3
- os: macos-15
name: macos-arm64
deps: cmake ninja icu4c pkg-config
runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1

- name: Install dependencies (Linux)
if: runner.os == 'Linux'
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends ${{ matrix.deps }}

- name: Install dependencies (macOS)
if: runner.os == 'macOS'
run: brew install ${{ matrix.deps }}

- name: Cache CMake build
uses: actions/cache@v4
with:
path: build
key: cmake-${{ matrix.name }}-${{ hashFiles('Source/WTF/**', 'Source/WebCore/contentextensions/**', 'ghostery/validate-dnr-rules/**') }}
restore-keys: |
cmake-${{ matrix.name }}-

- name: Configure
run: |
cmake -B build -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DPORT=JSCOnly \
-DUSE_SYSTEM_MALLOC=ON \
.
env:
CMAKE_PREFIX_PATH: ${{ runner.os == 'macOS' && '/opt/homebrew/opt/icu4c' || '' }}

- name: Build
run: cmake --build build --target validate-dnr-rules

- name: Test
run: |
cat > /tmp/test-rules.json << 'RULES'
[
{"id":1,"priority":1,"action":{"type":"block"},"condition":{"regexFilter":"ad[0-9]{2}\\.js"}},
{"id":2,"priority":1,"action":{"type":"block"},"condition":{"regexFilter":"(?:ads|tracking)\\.com"}},
{"id":3,"priority":1,"action":{"type":"block"},"condition":{"regexFilter":"tracker\\d+\\.js"}},
{"id":4,"priority":1,"action":{"type":"block"},"condition":{"regexFilter":"pixel\\b"}}
]
RULES
./build/bin/validate-dnr-rules /tmp/test-rules.json

- name: Sign binary (macOS)
if: runner.os == 'macOS'
run: codesign --sign - --force build/bin/validate-dnr-rules

- name: Prepare artifact
run: |
cp build/bin/validate-dnr-rules validate-dnr-rules-${{ matrix.name }}
chmod +x validate-dnr-rules-${{ matrix.name }}

- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: validate-dnr-rules-${{ matrix.name }}
path: validate-dnr-rules-${{ matrix.name }}

release:
needs: build
runs-on: ubuntu-latest
permissions:
contents: write

steps:
- name: Download all artifacts
uses: actions/download-artifact@v4
with:
path: artifacts

- name: Create release
env:
GH_TOKEN: ${{ github.token }}
GH_REPO: ${{ github.repository }}
run: |
TAG="validate-dnr-rules-$(date +%Y%m%d)-${GITHUB_SHA::8}"

# Delete existing release with same tag if re-running
gh release delete "$TAG" --yes 2>/dev/null || true

gh release create "$TAG" \
--title "validate-dnr-rules $(date +%Y-%m-%d)" \
--notes "Automated build from commit ${GITHUB_SHA::8}.

## Downloads
- **Linux x64**: \`validate-dnr-rules-linux-x64\`
- **macOS arm64**: \`validate-dnr-rules-macos-arm64\` (Intel Macs: run via Rosetta)

## Usage
\`\`\`
chmod +x validate-dnr-rules-*
./validate-dnr-rules-linux-x64 path/to/dnr-rules.json
\`\`\`" \
artifacts/validate-dnr-rules-linux-x64/validate-dnr-rules-linux-x64 \
artifacts/validate-dnr-rules-macos-arm64/validate-dnr-rules-macos-arm64
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ if (DEVELOPER_MODE)
add_subdirectory(PerformanceTests)
endif ()

# -----------------------------------------------------------------------------
# Ghostery tools
# -----------------------------------------------------------------------------
if (EXISTS "${CMAKE_SOURCE_DIR}/ghostery/validate-dnr-rules/CMakeLists.txt")
add_subdirectory(ghostery/validate-dnr-rules)
endif ()

# -----------------------------------------------------------------------------
# Print the features list last, for maximum visibility.
# -----------------------------------------------------------------------------
Expand Down
100 changes: 64 additions & 36 deletions Source/WebCore/contentextensions/Term.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class Term {

// Group terms only.
void extendGroupSubpattern(const Term&);
void startNewAlternative();

void quantify(const AtomQuantifier&);

Expand Down Expand Up @@ -169,7 +170,10 @@ class Term {
friend void add(Hasher&, const Term::CharacterSet&);

struct Group {
Vector<Term> terms;
Vector<Vector<Term>> alternatives;

Vector<Term>& terms() { return alternatives.last(); }
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes no sense (overloading on cost and returning iterators to different vectors).

This is almost certainly a bug.

const Vector<Term>& terms() const { return alternatives.first(); }

friend bool operator==(const Group&, const Group&) = default;
};
Expand Down Expand Up @@ -197,7 +201,7 @@ inline void add(Hasher& hasher, const Term::CharacterSet& characterSet)

inline void add(Hasher& hasher, const Term::Group& group)
{
add(hasher, group.terms);
add(hasher, group.alternatives);
}

inline void add(Hasher& hasher, const Term& term)
Expand Down Expand Up @@ -253,8 +257,12 @@ inline String Term::toString() const
case TermType::Group: {
StringBuilder builder;
builder.append('(');
for (const Term& term : m_atomData.group.terms)
builder.append(term.toString());
for (unsigned a = 0; a < m_atomData.group.alternatives.size(); ++a) {
if (a)
builder.append('|');
for (const Term& term : m_atomData.group.alternatives[a])
builder.append(term.toString());
}
builder.append(')');
builder.append(quantifierToString(m_quantifier));
return builder.toString();
Expand Down Expand Up @@ -294,6 +302,7 @@ inline Term::Term(GroupTermTag)
: m_termType(TermType::Group)
{
new (NotNull, &m_atomData.group) Group();
m_atomData.group.alternatives.append(Vector<Term>());
}

inline Term::Term(EndOfLineAssertionTermTag)
Expand Down Expand Up @@ -371,7 +380,15 @@ inline void Term::extendGroupSubpattern(const Term& term)
ASSERT_WITH_SECURITY_IMPLICATION(m_termType == TermType::Group);
if (m_termType != TermType::Group)
return;
m_atomData.group.terms.append(term);
m_atomData.group.alternatives.last().append(term);
}

inline void Term::startNewAlternative()
{
ASSERT_WITH_SECURITY_IMPLICATION(m_termType == TermType::Group);
if (m_termType != TermType::Group)
return;
m_atomData.group.alternatives.append(Vector<Term>());
}

inline void Term::quantify(const AtomQuantifier& quantifier)
Expand Down Expand Up @@ -443,9 +460,11 @@ inline bool Term::matchesAtLeastOneCharacter() const
return false;

if (m_termType == TermType::Group) {
for (const Term& term : m_atomData.group.terms) {
if (term.matchesAtLeastOneCharacter())
return true;
for (const auto& alternative : m_atomData.group.alternatives) {
for (const Term& term : alternative) {
if (term.matchesAtLeastOneCharacter())
return true;
}
}
return false;
}
Expand All @@ -465,25 +484,22 @@ inline bool Term::isKnownToMatchAnyString() const
return isUniversalTransition() && m_quantifier == AtomQuantifier::ZeroOrMore;
break;
case TermType::Group: {
// There are infinitely many ways to match anything with groups, we just handle simple cases
if (m_atomData.group.terms.size() != 1)
if (m_atomData.group.alternatives.size() != 1)
return false;

const Term& firstTermInGroup = m_atomData.group.terms.first();
// -(.*) with any quantifier.
const auto& terms = m_atomData.group.alternatives.first();
if (terms.size() != 1)
return false;

const Term& firstTermInGroup = terms.first();
if (firstTermInGroup.isKnownToMatchAnyString())
return true;

if (firstTermInGroup.isUniversalTransition()) {
// -(.)*, (.+)*, (.?)* etc.
if (m_quantifier == AtomQuantifier::ZeroOrMore)
return true;

// -(.+)?.
if (m_quantifier == AtomQuantifier::ZeroOrOne && firstTermInGroup.m_quantifier == AtomQuantifier::OneOrMore)
return true;

// -(.?)+.
if (m_quantifier == AtomQuantifier::OneOrMore && firstTermInGroup.m_quantifier == AtomQuantifier::ZeroOrOne)
return true;
}
Expand All @@ -506,7 +522,9 @@ inline bool Term::hasFixedLength() const
case TermType::Group: {
if (m_quantifier != AtomQuantifier::One)
return false;
for (const Term& term : m_atomData.group.terms) {
if (m_atomData.group.alternatives.size() != 1)
return false;
for (const Term& term : m_atomData.group.alternatives.first()) {
if (!term.hasFixedLength())
return false;
}
Expand Down Expand Up @@ -564,7 +582,7 @@ inline bool Term::isUniversalTransition() const
return (m_atomData.characterSet.inverted() && !m_atomData.characterSet.bitCount())
|| (!m_atomData.characterSet.inverted() && m_atomData.characterSet.bitCount() == 127 && !m_atomData.characterSet.get(0));
case TermType::Group:
return m_atomData.group.terms.size() == 1 && m_atomData.group.terms.first().isUniversalTransition();
return m_atomData.group.alternatives.size() == 1 && m_atomData.group.alternatives.first().size() == 1 && m_atomData.group.alternatives.first().first().isUniversalTransition();
}
return false;
}
Expand Down Expand Up @@ -614,25 +632,33 @@ inline void Term::generateSubgraphForAtom(NFA& nfa, ImmutableCharNFANodeBuilder&
break;
}
case TermType::Group: {
if (m_atomData.group.terms.isEmpty()) {
// FIXME: any kind of empty term could be avoided in the parser. This case should turned into an assertion.
source.addEpsilonTransition(destination);
return;
}
auto generateSequence = [&](const Vector<Term>& terms, ImmutableCharNFANodeBuilder& seqSource, uint32_t seqDestination) {
if (terms.isEmpty()) {
seqSource.addEpsilonTransition(seqDestination);
return;
}
if (terms.size() == 1) {
terms.first().generateGraph(nfa, seqSource, seqDestination);
return;
}
ImmutableCharNFANodeBuilder lastTarget = terms.first().generateGraph(nfa, seqSource, ActionList());
for (unsigned i = 1; i < terms.size() - 1; ++i) {
ImmutableCharNFANodeBuilder newNode = terms[i].generateGraph(nfa, lastTarget, ActionList());
lastTarget = WTF::move(newNode);
}
terms.last().generateGraph(nfa, lastTarget, seqDestination);
};

if (m_atomData.group.terms.size() == 1) {
m_atomData.group.terms.first().generateGraph(nfa, source, destination);
return;
if (m_atomData.group.alternatives.size() == 1) {
generateSequence(m_atomData.group.alternatives.first(), source, destination);
break;
}

ImmutableCharNFANodeBuilder lastTarget = m_atomData.group.terms.first().generateGraph(nfa, source, ActionList());
for (unsigned i = 1; i < m_atomData.group.terms.size() - 1; ++i) {
const Term& currentTerm = m_atomData.group.terms[i];
ImmutableCharNFANodeBuilder newNode = currentTerm.generateGraph(nfa, lastTarget, ActionList());
lastTarget = WTF::move(newNode);
for (const auto& alternative : m_atomData.group.alternatives) {
ImmutableCharNFANodeBuilder branchStart(nfa);
source.addEpsilonTransition(branchStart);
generateSequence(alternative, branchStart, destination);
}
const Term& lastTerm = m_atomData.group.terms.last();
lastTerm.generateGraph(nfa, lastTarget, destination);
break;
}
}
Expand All @@ -658,8 +684,10 @@ inline size_t Term::memoryUsed() const
{
size_t extraMemory = 0;
if (m_termType == TermType::Group) {
for (const Term& term : m_atomData.group.terms)
extraMemory += term.memoryUsed();
for (const auto& alternative : m_atomData.group.alternatives) {
for (const Term& term : alternative)
extraMemory += term.memoryUsed();
}
}
return sizeof(Term) + extraMemory;
}
Expand Down
Loading
Loading