Skip to content

Commit a15bbed

Browse files
committed
TEDEFO-4992 Add NoticeDocument and SafeDocumentBuilder from eforms-notice-viewer
1 parent 658368c commit a15bbed

File tree

3 files changed

+274
-0
lines changed

3 files changed

+274
-0
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ This library provides a set of classes that can be used to solve some common "pr
77
* Automatically discovering and downloading new versions of the eForms SDK.
88
* Maintaining and instantiating at runtime the correct application component versions for different major versions of the SDK.
99
* Basic parsing and processing of XPath expressions.
10+
* Parsing eForms notice XML documents and extracting metadata (SDK version, subtype, languages).
11+
* Secure XML document building with XXE prevention (OWASP guidelines).
1012

1113
## Using the eForms Core Library
1214

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
/*
2+
* Copyright 2022 European Union
3+
*
4+
* Licensed under the EUPL, Version 1.2 or – as soon they will be approved by the European
5+
* Commission – subsequent versions of the EUPL (the "Licence"); You may not use this work except in
6+
* compliance with the Licence. You may obtain a copy of the Licence at:
7+
* https://joinup.ec.europa.eu/software/page/eupl
8+
*
9+
* Unless required by applicable law or agreed to in writing, software distributed under the Licence
10+
* is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11+
* or implied. See the Licence for the specific language governing permissions and limitations under
12+
* the Licence.
13+
*/
14+
package eu.europa.ted.eforms;
15+
16+
import java.io.FileNotFoundException;
17+
import java.io.IOException;
18+
import java.io.InputStream;
19+
import java.nio.charset.StandardCharsets;
20+
import java.nio.file.Files;
21+
import java.nio.file.Path;
22+
import java.util.ArrayList;
23+
import java.util.List;
24+
import java.util.Optional;
25+
import javax.xml.parsers.ParserConfigurationException;
26+
import javax.xml.xpath.XPath;
27+
import javax.xml.xpath.XPathExpressionException;
28+
import javax.xml.xpath.XPathFactory;
29+
import javax.xml.xpath.XPathNodes;
30+
import org.apache.commons.lang3.StringUtils;
31+
import org.apache.commons.lang3.Validate;
32+
import org.w3c.dom.Element;
33+
import org.w3c.dom.Node;
34+
import org.w3c.dom.NodeList;
35+
import org.xml.sax.SAXException;
36+
import eu.europa.ted.util.SafeDocumentBuilder;
37+
38+
/**
39+
* A class representing a Notice document with accessor methods for its XML contents and metadata.
40+
*/
41+
public class NoticeDocument {
42+
43+
private static final String TAG_PRIMARY_LANGUAGE = "cbc:NoticeLanguageCode";
44+
private static final String TAG_SDK_VERSION = "cbc:CustomizationID";
45+
private static final String TAG_SUBTYPE_CODE = "cbc:SubTypeCode";
46+
private static final String XPATH_ADDITIONAL_LANGUAGE =
47+
"/*/AdditionalNoticeLanguage/ID/text()";
48+
49+
private static final XPath xpath = XPathFactory.newInstance().newXPath();
50+
51+
private final Element root;
52+
private final String xmlContents;
53+
54+
public NoticeDocument(final Path noticeXmlPath)
55+
throws ParserConfigurationException, SAXException, IOException {
56+
Validate.notNull(noticeXmlPath, "Undefined Notice XML file path");
57+
58+
if (!Files.isRegularFile(noticeXmlPath)) {
59+
throw new FileNotFoundException(noticeXmlPath.toString());
60+
}
61+
62+
this.xmlContents = Files.readString(noticeXmlPath, StandardCharsets.UTF_8);
63+
this.root = parseXmlRoot(this.xmlContents);
64+
}
65+
66+
public NoticeDocument(final InputStream noticeXmlInput)
67+
throws ParserConfigurationException, SAXException, IOException {
68+
Validate.notNull(noticeXmlInput, "Undefined Notice XML input");
69+
70+
this.xmlContents = new String(noticeXmlInput.readAllBytes(), StandardCharsets.UTF_8);
71+
this.root = parseXmlRoot(this.xmlContents);
72+
}
73+
74+
public NoticeDocument(final String noticeXmlContents)
75+
throws ParserConfigurationException, SAXException, IOException {
76+
Validate.notBlank(noticeXmlContents, "Invalid Notice XML contents");
77+
78+
this.xmlContents = noticeXmlContents;
79+
this.root = parseXmlRoot(this.xmlContents);
80+
}
81+
82+
private static Element parseXmlRoot(final String xmlContents)
83+
throws ParserConfigurationException, SAXException, IOException {
84+
try (InputStream input =
85+
new java.io.ByteArrayInputStream(xmlContents.getBytes(StandardCharsets.UTF_8))) {
86+
final Element root =
87+
SafeDocumentBuilder.buildSafeDocumentBuilderAllowDoctype().parse(input)
88+
.getDocumentElement();
89+
Validate.notNull(root, "No XML root found");
90+
return root;
91+
}
92+
}
93+
94+
/**
95+
* Gets the notice sub type from the notice XML.
96+
*
97+
* @return The notice sub type as found in the notice XML
98+
*/
99+
public String getNoticeSubType() {
100+
return Optional.ofNullable(this.root.getElementsByTagName(TAG_SUBTYPE_CODE))
101+
.map((final NodeList subTypeCodes) -> {
102+
Optional<String> result = Optional.empty();
103+
for (int i = 0; i < subTypeCodes.getLength(); i++) {
104+
result = Optional.ofNullable(subTypeCodes.item(i))
105+
.filter((final Node node) -> node.getAttributes() != null)
106+
.map(Node::getTextContent)
107+
.map(StringUtils::strip);
108+
}
109+
return result.orElse(null);
110+
})
111+
.filter(StringUtils::isNotBlank)
112+
.orElseThrow(() -> new RuntimeException("SubTypeCode not found in notice XML"));
113+
}
114+
115+
/**
116+
* Gets the eForms SDK version from the notice XML.
117+
*
118+
* @return The eForms SDK version as found in the notice XML
119+
*/
120+
public String getEformsSdkVersion() {
121+
return Optional.ofNullable(this.root.getElementsByTagName(TAG_SDK_VERSION))
122+
.filter((final NodeList nodes) -> nodes.getLength() == 1)
123+
.map((final NodeList nodes) -> Optional.ofNullable(nodes.item(0))
124+
.map(Node::getTextContent)
125+
.map(StringUtils::strip)
126+
.map((final String str) -> str.startsWith("eforms-sdk-")
127+
? str.substring("eforms-sdk-".length()) : str)
128+
.orElse(null))
129+
.filter(StringUtils::isNotBlank)
130+
.orElseThrow(() -> new RuntimeException("eForms SDK version not found in notice XML"));
131+
}
132+
133+
/**
134+
* Gets the primary language from the notice XML.
135+
*
136+
* @return The primary language
137+
*/
138+
public String getPrimaryLanguage() {
139+
return Optional
140+
.ofNullable(this.root.getElementsByTagName(TAG_PRIMARY_LANGUAGE))
141+
.map((final NodeList nodes) -> nodes.item(0))
142+
.map(Node::getTextContent)
143+
.orElse(null);
144+
}
145+
146+
/**
147+
* Gets the list of other languages from the notice XML.
148+
*
149+
* @return A list of other languages
150+
* @throws XPathExpressionException If an error occurs evaluating the XPath expression
151+
*/
152+
public List<String> getOtherLanguages() throws XPathExpressionException {
153+
return Optional
154+
.ofNullable(xpath.evaluateExpression(XPATH_ADDITIONAL_LANGUAGE,
155+
this.root.getOwnerDocument(), XPathNodes.class))
156+
.map((final XPathNodes nodes) -> {
157+
final List<String> languages = new ArrayList<>();
158+
nodes.forEach((final Node node) -> {
159+
if (StringUtils.isNotBlank(node.getTextContent())) {
160+
languages.add(node.getTextContent());
161+
}
162+
});
163+
return languages;
164+
})
165+
.orElseGet(ArrayList::new);
166+
}
167+
168+
/**
169+
* Gets the notice XML contents.
170+
*
171+
* @return The notice XML
172+
*/
173+
public String getXmlContents() {
174+
return this.xmlContents;
175+
}
176+
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
* Copyright 2022 European Union
3+
*
4+
* Licensed under the EUPL, Version 1.2 or – as soon they will be approved by the European
5+
* Commission – subsequent versions of the EUPL (the "Licence"); You may not use this work except in
6+
* compliance with the Licence. You may obtain a copy of the Licence at:
7+
* https://joinup.ec.europa.eu/software/page/eupl
8+
*
9+
* Unless required by applicable law or agreed to in writing, software distributed under the Licence
10+
* is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11+
* or implied. See the Licence for the specific language governing permissions and limitations under
12+
* the Licence.
13+
*/
14+
package eu.europa.ted.util;
15+
16+
import javax.xml.XMLConstants;
17+
import javax.xml.parsers.DocumentBuilder;
18+
import javax.xml.parsers.DocumentBuilderFactory;
19+
import javax.xml.parsers.ParserConfigurationException;
20+
import org.slf4j.Logger;
21+
import org.slf4j.LoggerFactory;
22+
23+
/**
24+
* Utility class for the creation of {@link DocumentBuilder} instances for XML parsing, using XXE
25+
* prevention techniques as recommended by OWASP.
26+
*
27+
* @see <a href=
28+
* "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#java">OWASP
29+
* XXE Prevention Cheat Sheet</a>
30+
*/
31+
public class SafeDocumentBuilder {
32+
33+
private static final Logger logger = LoggerFactory.getLogger(SafeDocumentBuilder.class);
34+
35+
private SafeDocumentBuilder() {
36+
throw new AssertionError("Utility class.");
37+
}
38+
39+
/**
40+
* Creates a {@link DocumentBuilder} using XXE prevention techniques. Allows DOCTYPE declarations.
41+
*
42+
* @return A {@link DocumentBuilder} instance
43+
* @throws ParserConfigurationException when the builder is configured with a feature that is
44+
* unsupported by the XML processor
45+
*/
46+
public static DocumentBuilder buildSafeDocumentBuilderAllowDoctype()
47+
throws ParserConfigurationException {
48+
return buildSafeDocumentBuilder(false);
49+
}
50+
51+
/**
52+
* Creates a {@link DocumentBuilder} using XXE prevention techniques. Raises a fatal error when a
53+
* DOCTYPE declaration is found.
54+
*
55+
* @return A {@link DocumentBuilder} instance
56+
* @throws ParserConfigurationException when the builder is configured with a feature that is
57+
* unsupported by the XML processor
58+
*/
59+
public static DocumentBuilder buildSafeDocumentBuilderStrict()
60+
throws ParserConfigurationException {
61+
return buildSafeDocumentBuilder(true);
62+
}
63+
64+
private static DocumentBuilder buildSafeDocumentBuilder(final boolean disallowDoctypeDecl)
65+
throws ParserConfigurationException {
66+
final DocumentBuilderFactory dbf = DocumentBuilderFactory.newDefaultInstance();
67+
String feature = null;
68+
try {
69+
feature = "http://apache.org/xml/features/disallow-doctype-decl";
70+
dbf.setFeature(feature, disallowDoctypeDecl);
71+
72+
feature = "http://xml.org/sax/features/external-general-entities";
73+
dbf.setFeature(feature, false);
74+
75+
feature = "http://xml.org/sax/features/external-parameter-entities";
76+
dbf.setFeature(feature, false);
77+
78+
feature = "http://apache.org/xml/features/nonvalidating/load-external-dtd";
79+
dbf.setFeature(feature, false);
80+
81+
dbf.setXIncludeAware(false);
82+
dbf.setExpandEntityReferences(false);
83+
dbf.setValidating(false);
84+
dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
85+
dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
86+
dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
87+
88+
return dbf.newDocumentBuilder();
89+
} catch (final ParserConfigurationException e) {
90+
logger.info("Error: The feature '{}' is probably not supported by your XML processor.",
91+
feature);
92+
logger.debug("ParserConfigurationException was thrown:", e);
93+
throw e;
94+
}
95+
}
96+
}

0 commit comments

Comments
 (0)