|
21 | 21 | package com.bitplan.simplegraph.html; |
22 | 22 |
|
23 | 23 | import static org.junit.Assert.assertEquals; |
| 24 | +import static org.junit.Assert.assertTrue; |
24 | 25 |
|
25 | 26 | import java.util.logging.Logger; |
| 27 | +import java.util.regex.Pattern; |
26 | 28 |
|
27 | 29 | import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; |
28 | 30 | import org.apache.tinkerpop.gremlin.structure.Vertex; |
|
38 | 40 | * |
39 | 41 | */ |
40 | 42 | public class TestHtmlSystem { |
41 | | - public static boolean debug = false; |
42 | | - protected static Logger LOGGER = Logger |
43 | | - .getLogger("com.bitplan.simplegraph.html"); |
| 43 | + public static boolean debug = false; |
| 44 | + protected static Logger LOGGER = Logger.getLogger("com.bitplan.simplegraph.html"); |
44 | 45 |
|
45 | | - @Test |
46 | | - public void testRootNodeAttributes() throws Exception { |
47 | | - HtmlSystem hs = HtmlSystem.forUrl("http://agilemanifesto.org/"); |
48 | | - HtmlNode htmlNode = (HtmlNode) hs.getStartNode(); |
49 | | - assertEquals("html", htmlNode.getRootNode().getName()); |
50 | | - // debug = true; |
51 | | - if (debug) { |
52 | | - htmlNode.forAll(SimpleNode.printDebug); |
53 | | - } |
54 | | - GraphTraversal<Vertex, Vertex> links = hs.g().V().hasLabel("a"); |
55 | | - assertEquals(72, links.count().next().longValue()); |
56 | | - links = hs.g().V().hasLabel("a"); |
57 | | - if (debug) |
58 | | - links.forEachRemaining( |
59 | | - link -> System.out.println(link.property("href").value())); |
60 | | - // links.forEach(link -> System.out.println(link)); |
61 | | - // links.entrySet().forEach(entry->System.out.println(entry.getKey()+"="+entry.getValue())); |
62 | | - } |
| 46 | + @Test |
| 47 | + public void testRootNodeAttributes() throws Exception { |
| 48 | + HtmlSystem hs = HtmlSystem.forUrl("http://agilemanifesto.org/"); |
| 49 | + HtmlNode htmlNode = (HtmlNode) hs.getStartNode(); |
| 50 | + assertEquals("html", htmlNode.getRootNode().getName()); |
| 51 | + // debug = true; |
| 52 | + if (debug) { |
| 53 | + htmlNode.forAll(SimpleNode.printDebug); |
| 54 | + } |
| 55 | + GraphTraversal<Vertex, Vertex> links = hs.g().V().hasLabel("a"); |
| 56 | + assertEquals(72, links.count().next().longValue()); |
| 57 | + links = hs.g().V().hasLabel("a"); |
| 58 | + if (debug) |
| 59 | + links.forEachRemaining(link -> System.out.println(link.property("href").value())); |
| 60 | + // links.forEach(link -> System.out.println(link)); |
| 61 | + // links.entrySet().forEach(entry->System.out.println(entry.getKey()+"="+entry.getValue())); |
| 62 | + } |
63 | 63 |
|
64 | | - @Test |
65 | | - public void testPDFLinks() throws Exception { |
66 | | - HtmlSystem hs = HtmlSystem.forUrl( |
67 | | - "https://filesamples.com/formats/pdf"); |
68 | | - int expectedCount=3; |
69 | | - HtmlNode htmlNode = (HtmlNode) hs.getStartNode(); |
70 | | - assertEquals("html", htmlNode.getRootNode().getName()); |
71 | | - // debug = true; |
72 | | - if (debug) { |
73 | | - htmlNode.forAll(SimpleNode.printDebug); |
74 | | - } |
75 | | - GraphTraversal<Vertex, Vertex> links = hs.g().V().hasLabel("a").has("href", |
76 | | - RegexPredicate.regex(".*pdf")); |
77 | | - assertEquals(expectedCount, links.count().next().longValue()); |
78 | | - links = hs.g().V().hasLabel("a").has("href", RegexPredicate.regex(".*pdf")); |
79 | | - links.forEachRemaining( |
80 | | - link -> System.out.println(link.property("href").value())); |
81 | | - } |
| 64 | + @Test |
| 65 | + public void testPDFLinks() throws Exception { |
| 66 | + HtmlSystem hs = HtmlSystem.forUrl("https://www.princexml.com/samples/"); |
| 67 | + int expectedCount = 23; |
| 68 | + HtmlNode htmlNode = (HtmlNode) hs.getStartNode(); |
| 69 | + assertEquals("html", htmlNode.getRootNode().getName()); |
| 70 | + // debug = true; |
| 71 | + if (debug) { |
| 72 | + htmlNode.forAll(SimpleNode.printDebug); |
| 73 | + } |
| 74 | + GraphTraversal<Vertex, Vertex> links = hs.g().V().hasLabel("a").has("href", RegexPredicate.regex(".*pdf")); |
| 75 | + assertEquals(expectedCount, links.count().next().longValue()); |
| 76 | + links = hs.g().V().hasLabel("a").has("href", RegexPredicate.regex(".*pdf")); |
| 77 | + |
| 78 | + Pattern pdfPattern = Pattern.compile("^/samples/.*\\.pdf$"); |
| 79 | + |
| 80 | + links.forEachRemaining(link -> { |
| 81 | + String hrefValue = link.property("href").value().toString(); |
| 82 | + boolean isValid = pdfPattern.matcher(hrefValue).matches(); |
| 83 | + if (debug) { |
| 84 | + System.out.println(hrefValue + (isValid ? " ✓" : " ✗ INVALID")); |
| 85 | + } |
| 86 | + |
| 87 | + assertTrue("Invalid PDF href pattern: " + hrefValue, isValid); |
| 88 | + }); |
| 89 | + } |
82 | 90 |
|
83 | 91 | } |
0 commit comments