77class Validator
88{
99 /**
10- * @param array<string, array<int, string>> $publicSuffixList
10+ * @param array<'icann'|'private'|string,array<string,true|array<string,true|array<string,
11+ * true|array<string,true|array>>>>> $publicSuffixList
12+ * @phpstan-ignore-next-line
1113 */
1214 public function __construct (
1315 protected array $ publicSuffixList ,
@@ -18,7 +20,10 @@ public function validate(string $host): Host
1820 {
1921 $ host = new Host ($ host );
2022
21- $ tld = $ this ->getTld (explode ('. ' , strval ($ host ->toString ())), 'icann ' );
23+ $ tld = $ this ->findTldInHierarchy (
24+ explode ('. ' , (string ) $ host ->toString ()),
25+ $ this ->publicSuffixList ['icann ' ]
26+ );
2227
2328 if ($ tld !== null ) {
2429 $ host ->isPrivate (
@@ -32,47 +37,81 @@ public function validate(string $host): Host
3237 }
3338
3439 /**
35- * @param array<string> $parts
40+ * Find TLD in hierarchical structure using iterative lookup.
41+ * Traverses the hierarchy from right to left, checking each suffix.
42+ * Returns the longest matching domain suffix.
43+ *
44+ * @param array<string> $parts Domain parts (e.g., ['www', 'adro', 'com', 'mx'])
45+ * @param array<string, true|array<string, true|array<string, true|array<string, true|array>>>> $section
46+ * The hierarchical section
3647 */
37- protected function getTld (array $ parts , string $ section, bool $ partialFound = false , ? string $ tld = null ): ?string
48+ protected function findTldInHierarchy (array $ parts , array $ section ): ?string
3849 {
39- $ current = end ($ parts ) . ($ tld ? ". {$ tld }" : '' );
40- unset($ parts [count ($ parts ) - 1 ]);
41-
42- foreach ($ this ->publicSuffixList [$ section ] as $ item ) {
43- if ($ current === $ item ) {
44- return $ this ->getTld (
45- parts: $ parts ,
46- section: $ section ,
47- partialFound: true ,
48- tld: $ current ,
49- );
50- }
50+ if (empty ($ parts )) {
51+ return null ;
5152 }
5253
53- if ($ partialFound === false ) {
54- $ current = end ($ parts ) . '. ' . $ current ;
55- foreach ($ this ->publicSuffixList [$ section ] as $ item ) {
56- if ($ current === $ item ) {
57- return $ this ->getTld (
58- parts: $ parts ,
59- section: $ section ,
60- partialFound: true ,
61- tld: $ current ,
62- );
63- }
54+ $ longestMatch = null ;
55+ $ reversed = array_reverse ($ parts );
56+ $ current = &$ section ;
57+ $ depth = 0 ;
58+
59+ // Traverse from rightmost (top-level domain) leftward
60+ foreach ($ reversed as $ index => $ part ) {
61+ if (!isset ($ current [$ part ])) {
62+ // No match at this level, stop traversing
63+ break ;
64+ }
65+
66+ $ current = &$ current [$ part ];
67+ $ depth ++;
68+
69+ // If this level is marked as a complete domain, record it
70+ if (isset ($ current ['__end__ ' ])) {
71+ // Build the matched suffix by taking the rightmost 'depth' parts in original order
72+ $ suffix_parts = array_slice ($ reversed , 0 , $ depth );
73+ $ longestMatch = implode ('. ' , array_reverse ($ suffix_parts ));
6474 }
6575 }
6676
67- return $ partialFound ? strval ( $ tld ) : null ;
77+ return $ longestMatch ;
6878 }
6979
80+ /**
81+ * Check if host is in private domains list.
82+ * Uses hierarchical lookup similar to getTld with wildcard support.
83+ */
7084 protected function checkIfIsPrivate (string $ host ): bool
7185 {
72- foreach ($ this ->publicSuffixList ['private ' ] as $ item ) {
73- if (strpos ($ host , trim ($ item , '* ' )) !== false ) {
74- return true ;
86+ $ parts = explode ('. ' , $ host );
87+ $ reversed = array_reverse ($ parts );
88+ /** @var array<string, true|array<string, true|array<string, true|array<string, true|array>>>> $section */
89+ $ section = $ this ->publicSuffixList ['private ' ];
90+
91+ $ current = &$ section ;
92+
93+ // Traverse hierarchy from rightmost part
94+ foreach ($ reversed as $ part ) {
95+ // Check for exact match
96+ if (isset ($ current [$ part ])) {
97+ if (isset ($ current [$ part ]['__end__ ' ])) {
98+ return true ;
99+ }
100+ $ current = &$ current [$ part ];
101+ continue ;
75102 }
103+
104+ // Check for wildcard match
105+ if (isset ($ current ['* ' ])) {
106+ if (isset ($ current ['* ' ]['__end__ ' ])) {
107+ return true ;
108+ }
109+ $ current = &$ current ['* ' ];
110+ continue ;
111+ }
112+
113+ // No match found at this level
114+ return false ;
76115 }
77116
78117 return false ;
0 commit comments