diff --git a/.circleci/config.yml b/.circleci/config.yml
deleted file mode 100644
index e821260..0000000
--- a/.circleci/config.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-version: 2
-jobs:
- build:
- docker:
- - image: circleci/node:8-browsers
- steps:
- - checkout
- - run:
- name: update-npm
- command: 'sudo npm install -g npm@5'
- - restore_cache:
- key: dependency-cache-{{ checksum "package.json" }}
- - run:
- name: install-npm-wee
- command: npm install
- - save_cache:
- key: dependency-cache-{{ checksum "package.json" }}
- paths:
- - ./node_modules
- - run:
- name: test
- command: npm test
diff --git a/.eslintignore b/.eslintignore
index 76add87..3c3629e 100644
--- a/.eslintignore
+++ b/.eslintignore
@@ -1,2 +1 @@
node_modules
-dist
\ No newline at end of file
diff --git a/.eslintrc b/.eslintrc
index faf286d..cd04aae 100644
--- a/.eslintrc
+++ b/.eslintrc
@@ -1,6 +1,13 @@
{
"extends": "apostrophe",
"rules": {
- "no-useless-escape": 1
+ "no-console": [
+ "error",
+ {
+ "allow": [
+ "warn"
+ ]
+ }
+ ]
}
}
diff --git a/.gitignore b/.gitignore
index 42da1d1..d8dd031 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,6 @@ package-lock.json
npm-debug.log
*.DS_Store
node_modules
-dist
+.idea
# We do not commit CSS, only LESS
public/css/*.css
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1336315..3477688 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,61 +1,253 @@
-## Changelog
+# Changelog
-1.27.4 (2020-08-26):
+## UNRELEASED
+
+- Fix unclosed tags (e.g., `
-
-`sanitize-html` provides a simple HTML sanitizer with a clear API.
-
-`sanitize-html` is tolerant. It is well suited for cleaning up HTML fragments such as those created by ckeditor and other rich text editors. It is especially handy for removing unwanted CSS when copying and pasting from Word.
-
-`sanitize-html` allows you to specify the tags you want to permit, and the permitted attributes for each of those tags.
-
-If a tag is not permitted, the contents of the tag are not discarded. There are
-some exceptions to this, discussed below in the "Discarding the entire contents
-of a disallowed tag" section.
-
-The syntax of poorly closed `p` and `img` elements is cleaned up.
-
-`href` attributes are validated to ensure they only contain `http`, `https`, `ftp` and `mailto` URLs. Relative URLs are also allowed. Ditto for `src` attributes.
-
-Allowing particular urls as a `src` to an iframe tag by filtering hostnames is also supported.
-
-HTML comments are not preserved.
-
-## Requirements
-
-`sanitize-html` is intended for use with Node. That's pretty much it. All of its npm dependencies are pure JavaScript. `sanitize-html` is built on the excellent `htmlparser2` module.
-
-## How to use
-
-### Browser
-
-*Think first: why do you want to use it in the browser?* Remember, *servers must never trust browsers.* You can't sanitize HTML for saving on the server anywhere else but on the server.
-
-But, perhaps you'd like to display sanitized HTML immediately in the browser for preview. Or ask the browser to do the sanitization work on every page load. You can if you want to!
-
-* Clone repository
-* Run npm install and build / minify:
-
-```bash
-npm install
-npm run minify
-```
-
-You'll find the minified and unminified versions of sanitize-html (with all its dependencies included) in the dist/ directory.
-
-Use it in the browser:
-
-```html
-
-
some text...
-``` - -We can do that with the following filter: - -```js -sanitizeHtml( - 'some text...
', - { - textFilter: function(text, tagName) { - if (['a'].indexOf(tagName) > -1) return //Skip anchor tags - - return text.replace(/\.\.\./, '…'); - } - } -); -``` - -Note that the text passed to the `textFilter` method is already escaped for safe display as HTML. You may add markup and use entity escape sequences in your `textFilter`. - -### Iframe Filters - -If you would like to allow iframe tags but want to control the domains that are allowed through you can provide an array of hostnames and(or) array of domains that you would like to allow as iframe sources. This hostname is a property in the options object passed as an argument to the `sanitize-html` function. - -These arrays will be checked against the html that is passed to the function and return only `src` urls that include the allowed hostnames or domains in the object. The url in the html that is passed must be formatted correctly (valid hostname) as an embedded iframe otherwise the module will strip out the src from the iframe. - -Make sure to pass a valid hostname along with the domain you wish to allow, i.e.: - -```js -allowedIframeHostnames: ['www.youtube.com', 'player.vimeo.com'], -allowedIframeDomains: ['zoom.us'] -``` - -You may also specify whether or not to allow relative URLs as iframe sources. - -```js -allowIframeRelativeUrls: true -``` - -Note that if unspecified, relative URLs will be allowed by default if no hostname or domain filter is provided but removed by default if a hostname or domain filter is provided. - -**Remember that the `iframe` tag must be allowed as well as the `src` attribute.** - -For example: - -```js -clean = sanitizeHtml('', { - allowedTags: [ 'p', 'em', 'strong', 'iframe' ], - allowedClasses: { - 'p': [ 'fancy', 'simple' ], - }, - allowedAttributes: { - 'iframe': ['src'] - }, - allowedIframeHostnames: ['www.youtube.com', 'player.vimeo.com'] -}); -``` - -will pass through as safe whereas: - -```js -clean = sanitizeHtml('
', { - allowedTags: [ 'p', 'em', 'strong', 'iframe' ], - allowedClasses: { - 'p': [ 'fancy', 'simple' ], - }, - allowedAttributes: { - 'iframe': ['src'] - }, - allowedIframeHostnames: ['www.youtube.com', 'player.vimeo.com'] -}); -``` - -or - -```js -clean = sanitizeHtml('
', { - allowedTags: [ 'p', 'em', 'strong', 'iframe' ], - allowedClasses: { - 'p': [ 'fancy', 'simple' ], - }, - allowedAttributes: { - 'iframe': ['src'] - }, - allowedIframeHostnames: ['www.youtube.com', 'player.vimeo.com'] -}); -``` - -will return an empty iframe tag. - -If you want to allow any subdomain of any level you can provide the domain in `allowedIframeDomains` - -```js -clean = sanitizeHtml('
', {
- allowedTags: [ 'p', 'em', 'strong', 'iframe' ],
- allowedClasses: {
- 'p': [ 'fancy', 'simple' ],
- },
- allowedAttributes: {
- 'iframe': ['src']
- },
- allowedIframeHostnames: ['www.youtube.com', 'player.vimeo.com'],
- allowedIframeDomains: ['zoom.us']
-});
-```
-
-will pass through as safe.
-
-### Allowed CSS Classes
-
-If you wish to allow specific CSS classes on a particular element, you can do so with the `allowedClasses` option. Any other CSS classes are discarded.
-
-This implies that the `class` attribute is allowed on that element.
-
-```js
-// Allow only a restricted set of CSS classes and only on the p tag
-clean = sanitizeHtml(dirty, {
- allowedTags: [ 'p', 'em', 'strong' ],
- allowedClasses: {
- 'p': [ 'fancy', 'simple' ]
- }
-});
-```
-
-### Allowed CSS Styles
-
-If you wish to allow specific CSS _styles_ on a particular element, you can do that with the `allowedStyles` option. Simply declare your desired attributes as regular expression options within an array for the given attribute. Specific elements will inherit whitelisted attributes from the global (\*) attribute. Any other CSS classes are discarded.
-
-**You must also use `allowedAttributes`** to activate the `style` attribute for the relevant elements. Otherwise this feature will never come into play.
-
-**When constructing regular expressions, don't forget `^` and `$`.** It's not enough to say "the string should contain this." It must also say "and only this."
-
-**URLs in inline styles are NOT filtered by any mechanism other than your regular expression.**
-
-```js
-clean = sanitizeHtml(dirty, {
- allowedTags: ['p'],
- allowedAttributes: {
- 'p': ["style"],
- },
- allowedStyles: {
- '*': {
- // Match HEX and RGB
- 'color': [/^#(0x)?[0-9a-f]+$/i, /^rgb\(\s*(\d{1,3})\s*,\s*(\d{1,3})\s*,\s*(\d{1,3})\s*\)$/],
- 'text-align': [/^left$/, /^right$/, /^center$/],
- // Match any number with px, em, or %
- 'font-size': [/^\d+(?:px|em|%)$/]
- },
- 'p': {
- 'font-size': [/^\d+rem$/]
- }
- }
-});
-```
-
-### Allowed URL schemes
-
-By default we allow the following URL schemes in cases where `href`, `src`, etc. are allowed:
-
-```js
-[ 'http', 'https', 'ftp', 'mailto' ]
-```
-
-You can override this if you want to:
-
-```js
-sanitizeHtml(
- // teeny-tiny valid transparent GIF in a data URL
- '',
- {
- allowedTags: [ 'img', 'p' ],
- allowedSchemes: [ 'data', 'http' ]
- }
-);
-```
-
-You can also allow a scheme for a particular tag only:
-
-```js
-allowedSchemes: [ 'http', 'https' ],
-allowedSchemesByTag: {
- img: [ 'data' ]
-}
-```
-
-And you can forbid the use of protocol-relative URLs (starting with `//`) to access another site using the current protocol, which is allowed by default:
-
-```js
-allowProtocolRelative: false
-```
-
-### Discarding the entire contents of a disallowed tag
-
-Normally, with a few exceptions, if a tag is not allowed, all of the text within it is preserved, and so are any allowed tags within it.
-
-The exceptions are:
-
-`style`, `script`, `textarea`, `option`
-
-If you wish to replace this list, for instance to discard whatever is found
-inside a `noscript` tag, use the `nonTextTags` option:
-
-```js
-nonTextTags: [ 'style', 'script', 'textarea', 'option', 'noscript' ]
-```
-
-Note that if you use this option you are responsible for stating the entire list. This gives you the power to retain the content of `textarea`, if you want to.
-
-The content still gets escaped properly, with the exception of the `script` and
-`style` tags. *Allowing either `script` or `style` leaves you open to XSS
-attacks. Don't do that* unless you have good reason to trust their origin.
-sanitize-html will log a warning if these tags are allowed, which can be
-disabled with the `allowVulnerableTags: true` option.
-
-### Choose what to do with disallowed tags
-
-Instead of discarding, or keeping text only, you may enable escaping of the entire content:
-
-```js
-disallowedTagsMode: 'escape'
-```
-
-This will transform `
diff --git a/index.js b/index.js
new file mode 100644
index 0000000..e79679e
--- /dev/null
+++ b/index.js
@@ -0,0 +1,971 @@
+const htmlparser = require('htmlparser2');
+const escapeStringRegexp = require('escape-string-regexp');
+const { isPlainObject } = require('is-plain-object');
+const deepmerge = require('deepmerge');
+const parseSrcset = require('parse-srcset');
+const { parse: postcssParse } = require('postcss');
+// Tags that can conceivably represent stand-alone media.
+const mediaTags = [
+ 'img', 'audio', 'video', 'picture', 'svg',
+ 'object', 'map', 'iframe', 'embed'
+];
+// Tags that are inherently vulnerable to being used in XSS attacks.
+const vulnerableTags = [ 'script', 'style' ];
+
+function each(obj, cb) {
+ if (obj) {
+ Object.keys(obj).forEach(function (key) {
+ cb(obj[key], key);
+ });
+ }
+}
+
+// Avoid false positives with .__proto__, .hasOwnProperty, etc.
+function has(obj, key) {
+ return ({}).hasOwnProperty.call(obj, key);
+}
+
+// Returns those elements of `a` for which `cb(a)` returns truthy
+function filter(a, cb) {
+ const n = [];
+ each(a, function(v) {
+ if (cb(v)) {
+ n.push(v);
+ }
+ });
+ return n;
+}
+
+function isEmptyObject(obj) {
+ for (const key in obj) {
+ if (has(obj, key)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+function stringifySrcset(parsedSrcset) {
+ return parsedSrcset.map(function(part) {
+ if (!part.url) {
+ throw new Error('URL missing');
+ }
+
+ return (
+ part.url +
+ (part.w ? ` ${part.w}w` : '') +
+ (part.h ? ` ${part.h}h` : '') +
+ (part.d ? ` ${part.d}x` : '')
+ );
+ }).join(', ');
+}
+
+module.exports = sanitizeHtml;
+
+// A valid attribute name.
+// We use a tolerant definition based on the set of strings defined by
+// html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
+// and html.spec.whatwg.org/multipage/parsing.html#attribute-name-state .
+// The characters accepted are ones which can be appended to the attribute
+// name buffer without triggering a parse error:
+// * unexpected-equals-sign-before-attribute-name
+// * unexpected-null-character
+// * unexpected-character-in-attribute-name
+// We exclude the empty string because it's impossible to get to the after
+// attribute name state with an empty attribute name buffer.
+const VALID_HTML_ATTRIBUTE_NAME = /^[^\0\t\n\f\r /<=>]+$/;
+
+// Ignore the _recursing flag; it's there for recursive
+// invocation as a guard against this exploit:
+// https://github.com/fb55/htmlparser2/issues/105
+
+function sanitizeHtml(html, options, _recursing) {
+ if (html == null) {
+ return '';
+ }
+ if (typeof html === 'number') {
+ html = html.toString();
+ }
+
+ let result = '';
+ // Used for hot swapping the result variable with an empty string in order to "capture" the text written to it.
+ let tempResult = '';
+
+ function Frame(tag, attribs) {
+ const that = this;
+ this.tag = tag;
+ this.attribs = attribs || {};
+ this.tagPosition = result.length;
+ this.text = ''; // Node inner text
+ this.openingTagLength = 0;
+ this.mediaChildren = [];
+
+ this.updateParentNodeText = function() {
+ if (stack.length) {
+ const parentFrame = stack[stack.length - 1];
+ parentFrame.text += that.text;
+ }
+ };
+
+ this.updateParentNodeMediaChildren = function() {
+ if (stack.length && mediaTags.includes(this.tag)) {
+ const parentFrame = stack[stack.length - 1];
+ parentFrame.mediaChildren.push(this.tag);
+ }
+ };
+ }
+
+ options = Object.assign({}, sanitizeHtml.defaults, options);
+ options.parser = Object.assign({}, htmlParserDefaults, options.parser);
+
+ const tagAllowed = function (name) {
+ return options.allowedTags === false || (options.allowedTags || []).indexOf(name) > -1;
+ };
+
+ // vulnerableTags
+ vulnerableTags.forEach(function (tag) {
+ if (tagAllowed(tag) && !options.allowVulnerableTags) {
+ console.warn(`\n\n⚠️ Your \`allowedTags\` option includes, \`${tag}\`, which is inherently\nvulnerable to XSS attacks. Please remove it from \`allowedTags\`.\nOr, to disable this warning, add the \`allowVulnerableTags\` option\nand ensure you are accounting for this risk.\n\n`);
+ }
+ });
+
+ // Tags that contain something other than HTML, or where discarding
+ // the text when the tag is disallowed makes sense for other reasons.
+ // If we are not allowing these tags, we should drop their content too.
+ // For other tags you would drop the tag but keep its content.
+ const nonTextTagsArray = options.nonTextTags || [
+ 'script',
+ 'style',
+ 'textarea',
+ 'option'
+ ];
+ let allowedAttributesMap;
+ let allowedAttributesGlobMap;
+ if (options.allowedAttributes) {
+ allowedAttributesMap = {};
+ allowedAttributesGlobMap = {};
+ each(options.allowedAttributes, function(attributes, tag) {
+ allowedAttributesMap[tag] = [];
+ const globRegex = [];
+ attributes.forEach(function(obj) {
+ if (typeof obj === 'string' && obj.indexOf('*') >= 0) {
+ globRegex.push(escapeStringRegexp(obj).replace(/\\\*/g, '.*'));
+ } else {
+ allowedAttributesMap[tag].push(obj);
+ }
+ });
+ if (globRegex.length) {
+ allowedAttributesGlobMap[tag] = new RegExp('^(' + globRegex.join('|') + ')$');
+ }
+ });
+ }
+ const allowedClassesMap = {};
+ const allowedClassesGlobMap = {};
+ const allowedClassesRegexMap = {};
+ each(options.allowedClasses, function(classes, tag) {
+ // Implicitly allows the class attribute
+ if (allowedAttributesMap) {
+ if (!has(allowedAttributesMap, tag)) {
+ allowedAttributesMap[tag] = [];
+ }
+ allowedAttributesMap[tag].push('class');
+ }
+
+ allowedClassesMap[tag] = classes;
+
+ if (Array.isArray(classes)) {
+ const globRegex = [];
+ allowedClassesMap[tag] = [];
+ allowedClassesRegexMap[tag] = [];
+ classes.forEach(function(obj) {
+ if (typeof obj === 'string' && obj.indexOf('*') >= 0) {
+ globRegex.push(escapeStringRegexp(obj).replace(/\\\*/g, '.*'));
+ } else if (obj instanceof RegExp) {
+ allowedClassesRegexMap[tag].push(obj);
+ } else {
+ allowedClassesMap[tag].push(obj);
+ }
+ });
+ if (globRegex.length) {
+ allowedClassesGlobMap[tag] = new RegExp('^(' + globRegex.join('|') + ')$');
+ }
+ }
+ });
+
+ const transformTagsMap = {};
+ let transformTagsAll;
+ each(options.transformTags, function(transform, tag) {
+ let transFun;
+ if (typeof transform === 'function') {
+ transFun = transform;
+ } else if (typeof transform === 'string') {
+ transFun = sanitizeHtml.simpleTransform(transform);
+ }
+ if (tag === '*') {
+ transformTagsAll = transFun;
+ } else {
+ transformTagsMap[tag] = transFun;
+ }
+ });
+
+ let depth;
+ let stack;
+ let skipMap;
+ let transformMap;
+ let skipText;
+ let skipTextDepth;
+ let addedText = false;
+
+ initializeState();
+
+ const parser = new htmlparser.Parser({
+ onopentag: function(name, attribs) {
+ if (options.onOpenTag) {
+ options.onOpenTag(name, attribs);
+ }
+
+ // If `enforceHtmlBoundary` is `true` and this has found the opening
+ // `html` tag, reset the state.
+ if (options.enforceHtmlBoundary && name === 'html') {
+ initializeState();
+ }
+
+ if (skipText) {
+ skipTextDepth++;
+ return;
+ }
+ const frame = new Frame(name, attribs);
+ stack.push(frame);
+
+ let skip = false;
+ const hasText = !!frame.text;
+ let transformedTag;
+ if (has(transformTagsMap, name)) {
+ transformedTag = transformTagsMap[name](name, attribs);
+
+ frame.attribs = attribs = transformedTag.attribs;
+
+ if (transformedTag.text !== undefined) {
+ frame.innerText = transformedTag.text;
+ }
+
+ if (name !== transformedTag.tagName) {
+ frame.name = name = transformedTag.tagName;
+ transformMap[depth] = transformedTag.tagName;
+ }
+ }
+ if (transformTagsAll) {
+ transformedTag = transformTagsAll(name, attribs);
+
+ frame.attribs = attribs = transformedTag.attribs;
+ if (name !== transformedTag.tagName) {
+ frame.name = name = transformedTag.tagName;
+ transformMap[depth] = transformedTag.tagName;
+ }
+ }
+
+ if (!tagAllowed(name) || (options.disallowedTagsMode === 'recursiveEscape' && !isEmptyObject(skipMap)) || (options.nestingLimit != null && depth >= options.nestingLimit)) {
+ skip = true;
+ skipMap[depth] = true;
+ if (options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') {
+ if (nonTextTagsArray.indexOf(name) !== -1) {
+ skipText = true;
+ skipTextDepth = 1;
+ }
+ }
+ }
+ depth++;
+ if (skip) {
+ if (options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') {
+ // We want the contents but not this tag
+ if (frame.innerText && !hasText) {
+ const escaped = escapeHtml(frame.innerText);
+ if (options.textFilter) {
+ result += options.textFilter(escaped, name);
+ } else {
+ result += escaped;
+ }
+ addedText = true;
+ }
+ return;
+ }
+ tempResult = result;
+ result = '';
+ }
+ result += '<' + name;
+
+ if (name === 'script') {
+ if (options.allowedScriptHostnames || options.allowedScriptDomains) {
+ frame.innerText = '';
+ }
+ }
+
+ const isBeingEscaped = skip && (options.disallowedTagsMode === 'escape' || options.disallowedTagsMode === 'recursiveEscape');
+ const shouldPreserveEscapedAttributes = isBeingEscaped && options.preserveEscapedAttributes;
+
+ if (shouldPreserveEscapedAttributes) {
+ each(attribs, function(value, a) {
+ result += ' ' + a + '="' + escapeHtml((value || ''), true) + '"';
+ });
+ } else if (!allowedAttributesMap || has(allowedAttributesMap, name) || allowedAttributesMap['*']) {
+ each(attribs, function(value, a) {
+ if (!VALID_HTML_ATTRIBUTE_NAME.test(a)) {
+ // This prevents part of an attribute name in the output from being
+ // interpreted as the end of an attribute, or end of a tag.
+ delete frame.attribs[a];
+ return;
+ }
+ // If the value is empty, check if the attribute is in the allowedEmptyAttributes array.
+ // If it is not in the allowedEmptyAttributes array, and it is a known non-boolean attribute, delete it
+ // List taken from https://html.spec.whatwg.org/multipage/indices.html#attributes-3
+ if (value === '' && (!options.allowedEmptyAttributes.includes(a)) &&
+ (options.nonBooleanAttributes.includes(a) || options.nonBooleanAttributes.includes('*'))) {
+ delete frame.attribs[a];
+ return;
+ }
+ // check allowedAttributesMap for the element and attribute and modify the value
+ // as necessary if there are specific values defined.
+ let passedAllowedAttributesMapCheck = false;
+ if (!allowedAttributesMap ||
+ (has(allowedAttributesMap, name) && allowedAttributesMap[name].indexOf(a) !== -1) ||
+ (allowedAttributesMap['*'] && allowedAttributesMap['*'].indexOf(a) !== -1) ||
+ (has(allowedAttributesGlobMap, name) && allowedAttributesGlobMap[name].test(a)) ||
+ (allowedAttributesGlobMap['*'] && allowedAttributesGlobMap['*'].test(a))) {
+ passedAllowedAttributesMapCheck = true;
+ } else if (allowedAttributesMap && allowedAttributesMap[name]) {
+ for (const o of allowedAttributesMap[name]) {
+ if (isPlainObject(o) && o.name && (o.name === a)) {
+ passedAllowedAttributesMapCheck = true;
+ let newValue = '';
+ if (o.multiple === true) {
+ // verify the values that are allowed
+ const splitStrArray = value.split(' ');
+ for (const s of splitStrArray) {
+ if (o.values.indexOf(s) !== -1) {
+ if (newValue === '') {
+ newValue = s;
+ } else {
+ newValue += ' ' + s;
+ }
+ }
+ }
+ } else if (o.values.indexOf(value) >= 0) {
+ // verified an allowed value matches the entire attribute value
+ newValue = value;
+ }
+ value = newValue;
+ }
+ }
+ }
+ if (passedAllowedAttributesMapCheck) {
+ if (options.allowedSchemesAppliedToAttributes.indexOf(a) !== -1) {
+ if (naughtyHref(name, value)) {
+ delete frame.attribs[a];
+ return;
+ }
+ }
+
+ if (name === 'script' && a === 'src') {
+
+ let allowed = true;
+
+ try {
+ const parsed = parseUrl(value);
+
+ if (options.allowedScriptHostnames || options.allowedScriptDomains) {
+ const allowedHostname = (options.allowedScriptHostnames || []).find(function (hostname) {
+ return hostname === parsed.url.hostname;
+ });
+ const allowedDomain = (options.allowedScriptDomains || []).find(function(domain) {
+ return parsed.url.hostname === domain || parsed.url.hostname.endsWith(`.${domain}`);
+ });
+ allowed = allowedHostname || allowedDomain;
+ }
+ } catch (e) {
+ allowed = false;
+ }
+
+ if (!allowed) {
+ delete frame.attribs[a];
+ return;
+ }
+ }
+
+ if (name === 'iframe' && a === 'src') {
+ let allowed = true;
+ try {
+ const parsed = parseUrl(value);
+
+ if (parsed.isRelativeUrl) {
+ // default value of allowIframeRelativeUrls is true
+ // unless allowedIframeHostnames or allowedIframeDomains specified
+ allowed = has(options, 'allowIframeRelativeUrls')
+ ? options.allowIframeRelativeUrls
+ : (!options.allowedIframeHostnames && !options.allowedIframeDomains);
+ } else if (options.allowedIframeHostnames || options.allowedIframeDomains) {
+ const allowedHostname = (options.allowedIframeHostnames || []).find(function (hostname) {
+ return hostname === parsed.url.hostname;
+ });
+ const allowedDomain = (options.allowedIframeDomains || []).find(function(domain) {
+ return parsed.url.hostname === domain || parsed.url.hostname.endsWith(`.${domain}`);
+ });
+ allowed = allowedHostname || allowedDomain;
+ }
+ } catch (e) {
+ // Unparseable iframe src
+ allowed = false;
+ }
+ if (!allowed) {
+ delete frame.attribs[a];
+ return;
+ }
+ }
+ if (a === 'srcset') {
+ try {
+ let parsed = parseSrcset(value);
+ parsed.forEach(function(value) {
+ if (naughtyHref('srcset', value.url)) {
+ value.evil = true;
+ }
+ });
+ parsed = filter(parsed, function(v) {
+ return !v.evil;
+ });
+ if (!parsed.length) {
+ delete frame.attribs[a];
+ return;
+ } else {
+ value = stringifySrcset(filter(parsed, function(v) {
+ return !v.evil;
+ }));
+ frame.attribs[a] = value;
+ }
+ } catch (e) {
+ // Unparseable srcset
+ delete frame.attribs[a];
+ return;
+ }
+ }
+ if (a === 'class') {
+ const allowedSpecificClasses = allowedClassesMap[name];
+ const allowedWildcardClasses = allowedClassesMap['*'];
+ const allowedSpecificClassesGlob = allowedClassesGlobMap[name];
+ const allowedSpecificClassesRegex = allowedClassesRegexMap[name];
+ const allowedWildcardClassesRegex = allowedClassesRegexMap['*'];
+ const allowedWildcardClassesGlob = allowedClassesGlobMap['*'];
+ const allowedClassesGlobs = [
+ allowedSpecificClassesGlob,
+ allowedWildcardClassesGlob
+ ]
+ .concat(allowedSpecificClassesRegex, allowedWildcardClassesRegex)
+ .filter(function (t) {
+ return t;
+ });
+ if (allowedSpecificClasses && allowedWildcardClasses) {
+ value = filterClasses(value, deepmerge(allowedSpecificClasses, allowedWildcardClasses), allowedClassesGlobs);
+ } else {
+ value = filterClasses(value, allowedSpecificClasses || allowedWildcardClasses, allowedClassesGlobs);
+ }
+ if (!value.length) {
+ delete frame.attribs[a];
+ return;
+ }
+ }
+ if (a === 'style') {
+ if (options.parseStyleAttributes) {
+ try {
+ const abstractSyntaxTree = postcssParse(name + ' {' + value + '}', { map: false });
+ const filteredAST = filterCss(abstractSyntaxTree, options.allowedStyles);
+
+ value = stringifyStyleAttributes(filteredAST);
+
+ if (value.length === 0) {
+ delete frame.attribs[a];
+ return;
+ }
+ } catch (e) {
+ if (typeof window !== 'undefined') {
+ console.warn('Failed to parse "' + name + ' {' + value + '}' + '", If you\'re running this in a browser, we recommend to disable style parsing: options.parseStyleAttributes: false, since this only works in a node environment due to a postcss dependency, More info: https://github.com/apostrophecms/sanitize-html/issues/547');
+ }
+ delete frame.attribs[a];
+ return;
+ }
+ } else if (options.allowedStyles) {
+ throw new Error('allowedStyles option cannot be used together with parseStyleAttributes: false.');
+ }
+ }
+ result += ' ' + a;
+ if (value && value.length) {
+ result += '="' + escapeHtml(value, true) + '"';
+ } else if (options.allowedEmptyAttributes.includes(a)) {
+ result += '=""';
+ }
+ } else {
+ delete frame.attribs[a];
+ }
+ });
+ }
+ if (options.selfClosing.indexOf(name) !== -1) {
+ result += ' />';
+ } else {
+ result += '>';
+ if (frame.innerText && !hasText && !options.textFilter) {
+ result += escapeHtml(frame.innerText);
+ addedText = true;
+ }
+ }
+ if (skip) {
+ result = tempResult + escapeHtml(result);
+ tempResult = '';
+ }
+ frame.openingTagLength = result.length - frame.tagPosition;
+ },
+ ontext: function(text) {
+ if (skipText) {
+ return;
+ }
+ const lastFrame = stack[stack.length - 1];
+ let tag;
+
+ if (lastFrame) {
+ tag = lastFrame.tag;
+ // If inner text was set by transform function then let's use it
+ text = lastFrame.innerText !== undefined ? lastFrame.innerText : text;
+ }
+
+ if (options.disallowedTagsMode === 'completelyDiscard' && !tagAllowed(tag)) {
+ text = '';
+ } else if ((options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') && ((tag === 'script') || (tag === 'style'))) {
+ // htmlparser2 gives us these as-is. Escaping them ruins the content. Allowing
+ // script tags is, by definition, game over for XSS protection, so if that's
+ // your concern, don't allow them. The same is essentially true for style tags
+ // which have their own collection of XSS vectors.
+ result += text;
+ } else if ((options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') && (nonTextTagsArray.indexOf(tag) !== -1)) {
+ // htmlparser2 does not decode entities inside raw text elements like
+ // textarea and option. The text is already properly encoded, so pass
+ // it through without additional escaping to avoid double-encoding.
+ result += text;
+ } else if (!addedText) {
+ const escaped = escapeHtml(text, false);
+ if (options.textFilter) {
+ result += options.textFilter(escaped, tag);
+ } else {
+ result += escaped;
+ }
+ }
+ if (stack.length) {
+ const frame = stack[stack.length - 1];
+ frame.text += text;
+ }
+ },
+ onclosetag: function(name, isImplied) {
+ if (options.onCloseTag) {
+ options.onCloseTag(name, isImplied);
+ }
+
+ if (skipText) {
+ skipTextDepth--;
+ if (!skipTextDepth) {
+ skipText = false;
+ } else {
+ return;
+ }
+ }
+
+ const frame = stack.pop();
+ if (!frame) {
+ // Do not crash on bad markup
+ return;
+ }
+
+ if (frame.tag !== name) {
+ // Another case of bad markup.
+ // Push to stack, so that it will be used in future closing tags.
+ stack.push(frame);
+ return;
+ }
+
+ skipText = options.enforceHtmlBoundary ? name === 'html' : false;
+ depth--;
+ const skip = skipMap[depth];
+ if (skip) {
+ delete skipMap[depth];
+ if (options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') {
+ frame.updateParentNodeText();
+ return;
+ }
+ tempResult = result;
+ result = '';
+ }
+
+ if (transformMap[depth]) {
+ name = transformMap[depth];
+ delete transformMap[depth];
+ }
+
+ if (options.exclusiveFilter) {
+ const filterResult = options.exclusiveFilter(frame);
+ if (filterResult === 'excludeTag') {
+ if (skip) {
+ // no longer escaping the tag since it's not added at all
+ result = tempResult;
+ tempResult = '';
+ }
+ // remove the opening tag from the result
+ result = result.substring(0, frame.tagPosition) + result.substring(frame.tagPosition + frame.openingTagLength);
+ return;
+ } else if (filterResult) {
+ result = result.substring(0, frame.tagPosition);
+ return;
+ }
+ }
+
+ frame.updateParentNodeMediaChildren();
+ frame.updateParentNodeText();
+
+ if (
+ // Already output />
+ options.selfClosing.indexOf(name) !== -1 ||
+ // Escaped tag, closing tag is implied
+ (isImplied && !tagAllowed(name) && [ 'escape', 'recursiveEscape' ].indexOf(options.disallowedTagsMode) >= 0)
+ ) {
+ if (skip) {
+ result = tempResult;
+ tempResult = '';
+ }
+ return;
+ }
+
+ result += '' + name + '>';
+ if (skip) {
+ result = tempResult + escapeHtml(result);
+ tempResult = '';
+ }
+ addedText = false;
+ }
+ }, options.parser);
+ parser.write(html);
+ parser.end();
+
+ if (options.disallowedTagsMode === 'escape' || options.disallowedTagsMode === 'recursiveEscape') {
+ const lastParsedIndex = parser.endIndex;
+ if (lastParsedIndex != null && lastParsedIndex >= 0 && lastParsedIndex < html.length) {
+ const unparsed = html.substring(lastParsedIndex);
+ result += escapeHtml(unparsed);
+ } else if ((lastParsedIndex == null || lastParsedIndex < 0) && html.length > 0 && result === '') {
+ result = escapeHtml(html);
+ }
+ }
+
+ return result;
+
+ function initializeState() {
+ result = '';
+ depth = 0;
+ stack = [];
+ skipMap = {};
+ transformMap = {};
+ skipText = false;
+ skipTextDepth = 0;
+ }
+
+ function escapeHtml(s, quote) {
+ if (typeof (s) !== 'string') {
+ s = s + '';
+ }
+ if (options.parser.decodeEntities) {
+ s = s.replace(/&/g, '&').replace(//g, '>');
+ if (quote) {
+ s = s.replace(/"/g, '"');
+ }
+ }
+ // TODO: this is inadequate because it will pass `&0;`. This approach
+ // will not work, each & must be considered with regard to whether it
+ // is followed by a 100% syntactically valid entity or not, and escaped
+ // if it is not. If this bothers you, don't set parser.decodeEntities
+ // to false. (The default is true.)
+ s = s.replace(/&(?![a-zA-Z0-9#]{1,20};)/g, '&') // Match ampersands not part of existing HTML entity
+ .replace(//g, '>');
+ if (quote) {
+ s = s.replace(/"/g, '"');
+ }
+ return s;
+ }
+
+ function naughtyHref(name, href) {
+ // Browsers ignore character codes of 32 (space) and below in a surprising
+ // number of situations. Start reading here:
+ // https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Embedded_tab
+ // eslint-disable-next-line no-control-regex
+ href = href.replace(/[\x00-\x20]+/g, '');
+ // Clobber any comments in URLs, which the browser might
+ // interpret inside an XML data island, allowing
+ // a javascript: URL to be snuck through
+ while (true) {
+ const firstIndex = href.indexOf('', firstIndex + 4);
+ if (lastIndex === -1) {
+ break;
+ }
+ href = href.substring(0, firstIndex) + href.substring(lastIndex + 3);
+ }
+ // Case insensitive so we don't get faked out by JAVASCRIPT #1
+ // Allow more characters after the first so we don't get faked
+ // out by certain schemes browsers accept
+ const matches = href.match(/^([a-zA-Z][a-zA-Z0-9.\-+]*):/);
+ if (!matches) {
+ // Protocol-relative URL starting with any combination of '/' and '\'
+ if (href.match(/^[/\\]{2}/)) {
+ return !options.allowProtocolRelative;
+ }
+
+ // No scheme
+ return false;
+ }
+ const scheme = matches[1].toLowerCase();
+
+ if (has(options.allowedSchemesByTag, name)) {
+ return options.allowedSchemesByTag[name].indexOf(scheme) === -1;
+ }
+
+ return !options.allowedSchemes || options.allowedSchemes.indexOf(scheme) === -1;
+ }
+
+ function parseUrl(value) {
+ value = value.replace(/^(\w+:)?\s*[\\/]\s*[\\/]/, '$1//');
+ if (value.startsWith('relative:')) {
+ // An attempt to exploit our workaround for base URLs being
+ // mandatory for relative URL validation in the WHATWG
+ // URL parser, reject it
+ throw new Error('relative: exploit attempt');
+ }
+ // naughtyHref is in charge of whether protocol relative URLs
+ // are cool. Here we are concerned just with allowed hostnames and
+ // whether to allow relative URLs.
+ //
+ // Build a placeholder "base URL" against which any reasonable
+ // relative URL may be parsed successfully
+ let base = 'relative://relative-site';
+ for (let i = 0; (i < 100); i++) {
+ base += `/${i}`;
+ }
+
+ const parsed = new URL(value, base);
+
+ const isRelativeUrl = parsed && parsed.hostname === 'relative-site' && parsed.protocol === 'relative:';
+ return {
+ isRelativeUrl,
+ url: parsed
+ };
+ }
+ /**
+ * Filters user input css properties by allowlisted regex attributes.
+ * Modifies the abstractSyntaxTree object.
+ *
+ * @param {object} abstractSyntaxTree - Object representation of CSS attributes.
+ * @property {array[Declaration]} abstractSyntaxTree.nodes[0] - Each object cointains prop and value key, i.e { prop: 'color', value: 'red' }.
+ * @param {object} allowedStyles - Keys are properties (i.e color), value is list of permitted regex rules (i.e /green/i).
+ * @return {object} - The modified tree.
+ */
+ function filterCss(abstractSyntaxTree, allowedStyles) {
+ if (!allowedStyles) {
+ return abstractSyntaxTree;
+ }
+
+ const astRules = abstractSyntaxTree.nodes[0];
+ let selectedRule;
+
+ // Merge global and tag-specific styles into new AST.
+ if (allowedStyles[astRules.selector] && allowedStyles['*']) {
+ selectedRule = deepmerge(
+ allowedStyles[astRules.selector],
+ allowedStyles['*']
+ );
+ } else {
+ selectedRule = allowedStyles[astRules.selector] || allowedStyles['*'];
+ }
+
+ if (selectedRule) {
+ abstractSyntaxTree.nodes[0].nodes = astRules.nodes.reduce(filterDeclarations(selectedRule), []);
+ }
+
+ return abstractSyntaxTree;
+ }
+
+ /**
+ * Extracts the style attributes from an AbstractSyntaxTree and formats those
+ * values in the inline style attribute format.
+ *
+ * @param {AbstractSyntaxTree} filteredAST
+ * @return {string} - Example: "color:yellow;text-align:center !important;font-family:helvetica;"
+ */
+ function stringifyStyleAttributes(filteredAST) {
+ return filteredAST.nodes[0].nodes
+ .reduce(function(extractedAttributes, attrObject) {
+ extractedAttributes.push(
+ `${attrObject.prop}:${attrObject.value}${attrObject.important ? ' !important' : ''}`
+ );
+ return extractedAttributes;
+ }, [])
+ .join(';');
+ }
+
+ /**
+ * Filters the existing attributes for the given property. Discards any attributes
+ * which don't match the allowlist.
+ *
+ * @param {object} selectedRule - Example: { color: red, font-family: helvetica }
+ * @param {array} allowedDeclarationsList - List of declarations which pass the allowlist.
+ * @param {object} attributeObject - Object representing the current css property.
+ * @property {string} attributeObject.type - Typically 'declaration'.
+ * @property {string} attributeObject.prop - The CSS property, i.e 'color'.
+ * @property {string} attributeObject.value - The corresponding value to the css property, i.e 'red'.
+ * @return {function} - When used in Array.reduce, will return an array of Declaration objects
+ */
+ function filterDeclarations(selectedRule) {
+ return function (allowedDeclarationsList, attributeObject) {
+ // If this property is allowlisted...
+ if (has(selectedRule, attributeObject.prop)) {
+ const matchesRegex = selectedRule[attributeObject.prop].some(function(regularExpression) {
+ return regularExpression.test(attributeObject.value);
+ });
+
+ if (matchesRegex) {
+ allowedDeclarationsList.push(attributeObject);
+ }
+ }
+ return allowedDeclarationsList;
+ };
+ }
+
+ function filterClasses(classes, allowed, allowedGlobs) {
+ if (!allowed) {
+ // The class attribute is allowed without filtering on this tag
+ return classes;
+ }
+ classes = classes.split(/\s+/);
+ return classes.filter(function(clss) {
+ return allowed.indexOf(clss) !== -1 || allowedGlobs.some(function(glob) {
+ return glob.test(clss);
+ });
+ }).join(' ');
+ }
+}
+
+// Defaults are accessible to you so that you can use them as a starting point
+// programmatically if you wish
+
+const htmlParserDefaults = {
+ decodeEntities: true
+};
+sanitizeHtml.defaults = {
+ allowedTags: [
+ // Sections derived from MDN element categories and limited to the more
+ // benign categories.
+ // https://developer.mozilla.org/en-US/docs/Web/HTML/Element
+ // Content sectioning
+ 'address', 'article', 'aside', 'footer', 'header',
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hgroup',
+ 'main', 'nav', 'section',
+ // Text content
+ 'blockquote', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure',
+ 'hr', 'li', 'menu', 'ol', 'p', 'pre', 'ul',
+ // Inline text semantics
+ 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn',
+ 'em', 'i', 'kbd', 'mark', 'q',
+ 'rb', 'rp', 'rt', 'rtc', 'ruby',
+ 's', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr',
+ // Table content
+ 'caption', 'col', 'colgroup', 'table', 'tbody', 'td', 'tfoot', 'th',
+ 'thead', 'tr'
+ ],
+ // Tags that cannot be boolean
+ nonBooleanAttributes: [
+ 'abbr', 'accept', 'accept-charset', 'accesskey', 'action',
+ 'allow', 'alt', 'as', 'autocapitalize', 'autocomplete',
+ 'blocking', 'charset', 'cite', 'class', 'color', 'cols',
+ 'colspan', 'content', 'contenteditable', 'coords', 'crossorigin',
+ 'data', 'datetime', 'decoding', 'dir', 'dirname', 'download',
+ 'draggable', 'enctype', 'enterkeyhint', 'fetchpriority', 'for',
+ 'form', 'formaction', 'formenctype', 'formmethod', 'formtarget',
+ 'headers', 'height', 'hidden', 'high', 'href', 'hreflang',
+ 'http-equiv', 'id', 'imagesizes', 'imagesrcset', 'inputmode',
+ 'integrity', 'is', 'itemid', 'itemprop', 'itemref', 'itemtype',
+ 'kind', 'label', 'lang', 'list', 'loading', 'low', 'max',
+ 'maxlength', 'media', 'method', 'min', 'minlength', 'name',
+ 'nonce', 'optimum', 'pattern', 'ping', 'placeholder', 'popover',
+ 'popovertarget', 'popovertargetaction', 'poster', 'preload',
+ 'referrerpolicy', 'rel', 'rows', 'rowspan', 'sandbox', 'scope',
+ 'shape', 'size', 'sizes', 'slot', 'span', 'spellcheck', 'src',
+ 'srcdoc', 'srclang', 'srcset', 'start', 'step', 'style',
+ 'tabindex', 'target', 'title', 'translate', 'type', 'usemap',
+ 'value', 'width', 'wrap',
+ // Event handlers
+ 'onauxclick', 'onafterprint', 'onbeforematch', 'onbeforeprint',
+ 'onbeforeunload', 'onbeforetoggle', 'onblur', 'oncancel',
+ 'oncanplay', 'oncanplaythrough', 'onchange', 'onclick', 'onclose',
+ 'oncontextlost', 'oncontextmenu', 'oncontextrestored', 'oncopy',
+ 'oncuechange', 'oncut', 'ondblclick', 'ondrag', 'ondragend',
+ 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart',
+ 'ondrop', 'ondurationchange', 'onemptied', 'onended',
+ 'onerror', 'onfocus', 'onformdata', 'onhashchange', 'oninput',
+ 'oninvalid', 'onkeydown', 'onkeypress', 'onkeyup',
+ 'onlanguagechange', 'onload', 'onloadeddata', 'onloadedmetadata',
+ 'onloadstart', 'onmessage', 'onmessageerror', 'onmousedown',
+ 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout',
+ 'onmouseover', 'onmouseup', 'onoffline', 'ononline', 'onpagehide',
+ 'onpageshow', 'onpaste', 'onpause', 'onplay', 'onplaying',
+ 'onpopstate', 'onprogress', 'onratechange', 'onreset', 'onresize',
+ 'onrejectionhandled', 'onscroll', 'onscrollend',
+ 'onsecuritypolicyviolation', 'onseeked', 'onseeking', 'onselect',
+ 'onslotchange', 'onstalled', 'onstorage', 'onsubmit', 'onsuspend',
+ 'ontimeupdate', 'ontoggle', 'onunhandledrejection', 'onunload',
+ 'onvolumechange', 'onwaiting', 'onwheel'
+ ],
+ disallowedTagsMode: 'discard',
+ allowedAttributes: {
+ a: [ 'href', 'name', 'target' ],
+ // We don't currently allow img itself by default, but
+ // these attributes would make sense if we did.
+ img: [ 'src', 'srcset', 'alt', 'title', 'width', 'height', 'loading' ]
+ },
+ allowedEmptyAttributes: [
+ 'alt'
+ ],
+ // Lots of these won't come up by default because we don't allow them
+ selfClosing: [ 'img', 'br', 'hr', 'area', 'base', 'basefont', 'input', 'link', 'meta' ],
+ // URL schemes we permit
+ allowedSchemes: [ 'http', 'https', 'ftp', 'mailto', 'tel' ],
+ allowedSchemesByTag: {},
+ allowedSchemesAppliedToAttributes: [ 'href', 'src', 'cite' ],
+ allowProtocolRelative: true,
+ enforceHtmlBoundary: false,
+ parseStyleAttributes: true,
+ preserveEscapedAttributes: false
+};
+
+sanitizeHtml.simpleTransform = function(newTagName, newAttribs, merge) {
+ merge = (merge === undefined) ? true : merge;
+ newAttribs = newAttribs || {};
+
+ return function(tagName, attribs) {
+ let attrib;
+ if (merge) {
+ for (attrib in newAttribs) {
+ attribs[attrib] = newAttribs[attrib];
+ }
+ } else {
+ attribs = newAttribs;
+ }
+
+ return {
+ tagName: newTagName,
+ attribs: attribs
+ };
+ };
+};
diff --git a/package.json b/package.json
index 7f9294a..23d98c6 100644
--- a/package.json
+++ b/package.json
@@ -1,17 +1,14 @@
{
"name": "sanitize-html",
- "version": "1.27.4",
- "description": "Clean up user-submitted HTML, preserving whitelisted elements and whitelisted attributes on a per-element basis",
+ "version": "2.17.0",
+ "description": "Clean up user-submitted HTML, preserving allowlisted elements and allowlisted attributes on a per-element basis",
"sideEffects": false,
- "main": "dist/sanitize-html.js",
+ "main": "index.js",
"files": [
- "dist/"
+ "index.js"
],
"scripts": {
- "build": "mkdir -p dist && browserify src/index.js > dist/sanitize-html-es2015.js --standalone 'sanitizeHtml' && babel dist/sanitize-html-es2015.js --out-file dist/sanitize-html.js --presets=@babel/preset-env",
- "minify": "npm run build && uglifyjs dist/sanitize-html.js > dist/sanitize-html.min.js",
- "prepublishOnly": "npm run minify",
- "test": "npx eslint . && npm run prepublishOnly && mocha test/test.js"
+ "test": "npx eslint . && mocha test/test.js"
},
"repository": {
"type": "git",
@@ -21,31 +18,27 @@
"html",
"parser",
"sanitizer",
- "apostrophecms"
+ "sanitize"
],
"author": "Apostrophe Technologies, Inc.",
"license": "MIT",
"dependencies": {
- "htmlparser2": "^4.1.0",
- "lodash": "^4.17.15",
+ "deepmerge": "^4.2.2",
+ "escape-string-regexp": "^4.0.0",
+ "htmlparser2": "^10.1.0",
+ "is-plain-object": "^5.0.0",
"parse-srcset": "^1.0.2",
- "postcss": "^7.0.27"
+ "postcss": "^8.3.11"
},
"devDependencies": {
- "@babel/cli": "^7.8.4",
- "@babel/core": "^7.8.4",
- "@babel/preset-env": "^7.8.4",
- "babelify": "^10.0.0",
- "browserify": "^16.2.3",
- "eslint": "^4.0.0",
- "eslint-config-apostrophe": "^3.1.0",
- "eslint-config-standard": "^11.0.0",
- "eslint-plugin-import": "^2.13.0",
- "eslint-plugin-node": "^6.0.1",
- "eslint-plugin-promise": "^3.8.0",
- "eslint-plugin-standard": "^3.1.0",
- "mocha": "^5.2.0",
- "sinon": "^9.0.2",
- "uglify-js": "^3.8.0"
+ "eslint": "^7.3.1",
+ "eslint-config-apostrophe": "^3.4.0",
+ "eslint-config-standard": "^14.1.1",
+ "eslint-plugin-import": "^2.25.2",
+ "eslint-plugin-node": "^11.1.0",
+ "eslint-plugin-promise": "^4.2.1",
+ "eslint-plugin-standard": "^4.0.1",
+ "mocha": "^10.2.0",
+ "sinon": "^9.0.2"
}
-}
\ No newline at end of file
+}
diff --git a/src/index.js b/src/index.js
deleted file mode 100644
index c7ec84e..0000000
--- a/src/index.js
+++ /dev/null
@@ -1,722 +0,0 @@
-/* eslint-disable no-useless-escape */
-var htmlparser = require('htmlparser2');
-var quoteRegexp = require('lodash/escapeRegExp');
-var cloneDeep = require('lodash/cloneDeep');
-var mergeWith = require('lodash/mergeWith');
-var isString = require('lodash/isString');
-var isPlainObject = require('lodash/isPlainObject');
-var parseSrcset = require('parse-srcset');
-var postcss = require('postcss');
-var url = require('url');
-// Tags that can conceivably represent stand-alone media.
-var mediaTags = [
- 'img', 'audio', 'video', 'picture', 'svg',
- 'object', 'map', 'iframe', 'embed'
-];
-// Tags that are inherently vulnerable to being used in XSS attacks.
-var vulnerableTags = ['script', 'style'];
-
-function each(obj, cb) {
- if (obj) {
- Object.keys(obj).forEach(function (key) {
- cb(obj[key], key);
- });
- }
-}
-
-// Avoid false positives with .__proto__, .hasOwnProperty, etc.
-function has(obj, key) {
- return ({}).hasOwnProperty.call(obj, key);
-}
-
-// Returns those elements of `a` for which `cb(a)` returns truthy
-function filter(a, cb) {
- var n = [];
- each(a, function(v) {
- if (cb(v)) {
- n.push(v);
- }
- });
- return n;
-}
-
-function isEmptyObject(obj) {
- for (var key in obj) {
- if (has(obj, key)) {
- return false;
- }
- }
- return true;
-}
-
-function stringifySrcset(parsedSrcset) {
- return parsedSrcset.map(function(part) {
- if (!part.url) {
- throw new Error('URL missing');
- }
-
- return (
- part.url +
- (part.w ? ` ${part.w}w` : '') +
- (part.h ? ` ${part.h}h` : '') +
- (part.d ? ` ${part.d}x` : '')
- );
- }).join(', ');
-}
-
-module.exports = sanitizeHtml;
-
-// A valid attribute name.
-// We use a tolerant definition based on the set of strings defined by
-// html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
-// and html.spec.whatwg.org/multipage/parsing.html#attribute-name-state .
-// The characters accepted are ones which can be appended to the attribute
-// name buffer without triggering a parse error:
-// * unexpected-equals-sign-before-attribute-name
-// * unexpected-null-character
-// * unexpected-character-in-attribute-name
-// We exclude the empty string because it's impossible to get to the after
-// attribute name state with an empty attribute name buffer.
-const VALID_HTML_ATTRIBUTE_NAME = /^[^\0\t\n\f\r /<=>]+$/;
-
-// Ignore the _recursing flag; it's there for recursive
-// invocation as a guard against this exploit:
-// https://github.com/fb55/htmlparser2/issues/105
-
-function sanitizeHtml(html, options, _recursing) {
- var result = '';
- // Used for hot swapping the result variable with an empty string in order to "capture" the text written to it.
- var tempResult = '';
-
- function Frame(tag, attribs) {
- var that = this;
- this.tag = tag;
- this.attribs = attribs || {};
- this.tagPosition = result.length;
- this.text = ''; // Node inner text
- this.mediaChildren = [];
-
- this.updateParentNodeText = function() {
- if (stack.length) {
- var parentFrame = stack[stack.length - 1];
- parentFrame.text += that.text;
- }
- };
-
- this.updateParentNodeMediaChildren = function() {
- if (stack.length && mediaTags.indexOf(this.tag) > -1) {
- var parentFrame = stack[stack.length - 1];
- parentFrame.mediaChildren.push(this.tag);
- }
- };
- }
-
- if (!options) {
- options = sanitizeHtml.defaults;
- options.parser = htmlParserDefaults;
- } else {
- options = Object.assign({}, sanitizeHtml.defaults, options);
- if (options.parser) {
- options.parser = Object.assign({}, htmlParserDefaults, options.parser);
- } else {
- options.parser = htmlParserDefaults;
- }
- }
- // vulnerableTags
- vulnerableTags.forEach(function (tag) {
- if (
- options.allowedTags && options.allowedTags.includes(tag) &&
- !options.allowVulnerableTags
- ) {
- // eslint-disable-next-line no-console
- console.warn(`\n\n⚠️ Your \`allowedTags\` option includes, \`${tag}\`, which is inherently\nvulnerable to XSS attacks. Please remove it from \`allowedTags\`.\nOr, to disable this warning, add the \`allowVulnerableTags\` option\nand ensure you are accounting for this risk.\n\n`);
- }
- });
-
- // Tags that contain something other than HTML, or where discarding
- // the text when the tag is disallowed makes sense for other reasons.
- // If we are not allowing these tags, we should drop their content too.
- // For other tags you would drop the tag but keep its content.
- var nonTextTagsArray = options.nonTextTags || [
- 'script',
- 'style',
- 'textarea',
- 'option'
- ];
- var allowedAttributesMap;
- var allowedAttributesGlobMap;
- if (options.allowedAttributes) {
- allowedAttributesMap = {};
- allowedAttributesGlobMap = {};
- each(options.allowedAttributes, function(attributes, tag) {
- allowedAttributesMap[tag] = [];
- var globRegex = [];
- attributes.forEach(function(obj) {
- if (isString(obj) && obj.indexOf('*') >= 0) {
- globRegex.push(quoteRegexp(obj).replace(/\\\*/g, '.*'));
- } else {
- allowedAttributesMap[tag].push(obj);
- }
- });
- allowedAttributesGlobMap[tag] = new RegExp('^(' + globRegex.join('|') + ')$');
- });
- }
- var allowedClassesMap = {};
- each(options.allowedClasses, function(classes, tag) {
- // Implicitly allows the class attribute
- if (allowedAttributesMap) {
- if (!has(allowedAttributesMap, tag)) {
- allowedAttributesMap[tag] = [];
- }
- allowedAttributesMap[tag].push('class');
- }
-
- allowedClassesMap[tag] = classes;
- });
-
- var transformTagsMap = {};
- var transformTagsAll;
- each(options.transformTags, function(transform, tag) {
- var transFun;
- if (typeof transform === 'function') {
- transFun = transform;
- } else if (typeof transform === 'string') {
- transFun = sanitizeHtml.simpleTransform(transform);
- }
- if (tag === '*') {
- transformTagsAll = transFun;
- } else {
- transformTagsMap[tag] = transFun;
- }
- });
-
- var depth;
- var stack;
- var skipMap;
- var transformMap;
- var skipText;
- var skipTextDepth;
- var addedText = false;
-
- initializeState();
-
- var parser = new htmlparser.Parser({
- onopentag: function(name, attribs) {
- // If `enforceHtmlBoundary` is `true` and this has found the opening
- // `html` tag, reset the state.
- if (options.enforceHtmlBoundary && name === 'html') {
- initializeState();
- }
-
- if (skipText) {
- skipTextDepth++;
- return;
- }
- var frame = new Frame(name, attribs);
- stack.push(frame);
-
- var skip = false;
- var hasText = !!frame.text;
- var transformedTag;
- if (has(transformTagsMap, name)) {
- transformedTag = transformTagsMap[name](name, attribs);
-
- frame.attribs = attribs = transformedTag.attribs;
-
- if (transformedTag.text !== undefined) {
- frame.innerText = transformedTag.text;
- }
-
- if (name !== transformedTag.tagName) {
- frame.name = name = transformedTag.tagName;
- transformMap[depth] = transformedTag.tagName;
- }
- }
- if (transformTagsAll) {
- transformedTag = transformTagsAll(name, attribs);
-
- frame.attribs = attribs = transformedTag.attribs;
- if (name !== transformedTag.tagName) {
- frame.name = name = transformedTag.tagName;
- transformMap[depth] = transformedTag.tagName;
- }
- }
-
- if ((options.allowedTags && options.allowedTags.indexOf(name) === -1) || (options.disallowedTagsMode === 'recursiveEscape' && !isEmptyObject(skipMap))) {
- skip = true;
- skipMap[depth] = true;
- if (options.disallowedTagsMode === 'discard') {
- if (nonTextTagsArray.indexOf(name) !== -1) {
- skipText = true;
- skipTextDepth = 1;
- }
- }
- skipMap[depth] = true;
- }
- depth++;
- if (skip) {
- if (options.disallowedTagsMode === 'discard') {
- // We want the contents but not this tag
- return;
- }
- tempResult = result;
- result = '';
- }
- result += '<' + name;
- if (!allowedAttributesMap || has(allowedAttributesMap, name) || allowedAttributesMap['*']) {
- each(attribs, function(value, a) {
- if (!VALID_HTML_ATTRIBUTE_NAME.test(a)) {
- // This prevents part of an attribute name in the output from being
- // interpreted as the end of an attribute, or end of a tag.
- delete frame.attribs[a];
- return;
- }
- var parsed;
- // check allowedAttributesMap for the element and attribute and modify the value
- // as necessary if there are specific values defined.
- var passedAllowedAttributesMapCheck = false;
- if (!allowedAttributesMap ||
- (has(allowedAttributesMap, name) && allowedAttributesMap[name].indexOf(a) !== -1) ||
- (allowedAttributesMap['*'] && allowedAttributesMap['*'].indexOf(a) !== -1) ||
- (has(allowedAttributesGlobMap, name) && allowedAttributesGlobMap[name].test(a)) ||
- (allowedAttributesGlobMap['*'] && allowedAttributesGlobMap['*'].test(a))) {
- passedAllowedAttributesMapCheck = true;
- } else if (allowedAttributesMap && allowedAttributesMap[name]) {
- for (const o of allowedAttributesMap[name]) {
- if (isPlainObject(o) && o.name && (o.name === a)) {
- passedAllowedAttributesMapCheck = true;
- var newValue = '';
- if (o.multiple === true) {
- // verify the values that are allowed
- const splitStrArray = value.split(' ');
- for (const s of splitStrArray) {
- if (o.values.indexOf(s) !== -1) {
- if (newValue === '') {
- newValue = s;
- } else {
- newValue += ' ' + s;
- }
- }
- }
- } else if (o.values.indexOf(value) >= 0) {
- // verified an allowed value matches the entire attribute value
- newValue = value;
- }
- value = newValue;
- }
- }
- }
- if (passedAllowedAttributesMapCheck) {
- if (options.allowedSchemesAppliedToAttributes.indexOf(a) !== -1) {
- if (naughtyHref(name, value)) {
- delete frame.attribs[a];
- return;
- }
- }
- if (name === 'iframe' && a === 'src') {
- var allowed = true;
- try {
- // naughtyHref is in charge of whether protocol relative URLs
- // are cool. We should just accept them
- parsed = url.parse(value, false, true);
- var isRelativeUrl = parsed && parsed.host === null && parsed.protocol === null;
- if (isRelativeUrl) {
- // default value of allowIframeRelativeUrls is true
- // unless allowedIframeHostnames or allowedIframeDomains specified
- allowed = has(options, 'allowIframeRelativeUrls')
- ? options.allowIframeRelativeUrls
- : (!options.allowedIframeHostnames && !options.allowedIframeDomains);
- } else if (options.allowedIframeHostnames || options.allowedIframeDomains) {
- var allowedHostname = (options.allowedIframeHostnames || []).find(function (hostname) {
- return hostname === parsed.hostname;
- });
- var allowedDomain = (options.allowedIframeDomains || []).find(function(domain) {
- return parsed.hostname === domain || parsed.hostname.endsWith(`.${domain}`);
- });
- allowed = allowedHostname || allowedDomain;
- }
- } catch (e) {
- // Unparseable iframe src
- allowed = false;
- }
- if (!allowed) {
- delete frame.attribs[a];
- return;
- }
- }
- if (a === 'srcset') {
- try {
- parsed = parseSrcset(value);
- each(parsed, function(value) {
- if (naughtyHref('srcset', value.url)) {
- value.evil = true;
- }
- });
- parsed = filter(parsed, function(v) {
- return !v.evil;
- });
- if (!parsed.length) {
- delete frame.attribs[a];
- return;
- } else {
- value = stringifySrcset(filter(parsed, function(v) {
- return !v.evil;
- }));
- frame.attribs[a] = value;
- }
- } catch (e) {
- // Unparseable srcset
- delete frame.attribs[a];
- return;
- }
- }
- if (a === 'class') {
- value = filterClasses(value, allowedClassesMap[name]);
- if (!value.length) {
- delete frame.attribs[a];
- return;
- }
- }
- if (a === 'style') {
- try {
- var abstractSyntaxTree = postcss.parse(name + ' {' + value + '}');
- var filteredAST = filterCss(abstractSyntaxTree, options.allowedStyles);
-
- value = stringifyStyleAttributes(filteredAST);
-
- if (value.length === 0) {
- delete frame.attribs[a];
- return;
- }
- } catch (e) {
- delete frame.attribs[a];
- return;
- }
- }
- result += ' ' + a;
- if (value && value.length) {
- result += '="' + escapeHtml(value, true) + '"';
- }
- } else {
- delete frame.attribs[a];
- }
- });
- }
- if (options.selfClosing.indexOf(name) !== -1) {
- result += ' />';
- } else {
- result += '>';
- if (frame.innerText && !hasText && !options.textFilter) {
- result += frame.innerText;
- addedText = true;
- }
- }
- if (skip) {
- result = tempResult + escapeHtml(result);
- tempResult = '';
- }
- },
- ontext: function(text) {
- if (skipText) {
- return;
- }
- var lastFrame = stack[stack.length - 1];
- var tag;
-
- if (lastFrame) {
- tag = lastFrame.tag;
- // If inner text was set by transform function then let's use it
- text = lastFrame.innerText !== undefined ? lastFrame.innerText : text;
- }
-
- if (options.disallowedTagsMode === 'discard' && ((tag === 'script') || (tag === 'style'))) {
- // htmlparser2 gives us these as-is. Escaping them ruins the content. Allowing
- // script tags is, by definition, game over for XSS protection, so if that's
- // your concern, don't allow them. The same is essentially true for style tags
- // which have their own collection of XSS vectors.
- result += text;
- } else {
- var escaped = escapeHtml(text, false);
- if (options.textFilter && !addedText) {
- result += options.textFilter(escaped, tag);
- } else if (!addedText) {
- result += escaped;
- }
- }
- if (stack.length) {
- var frame = stack[stack.length - 1];
- frame.text += text;
- }
- },
- onclosetag: function(name) {
-
- if (skipText) {
- skipTextDepth--;
- if (!skipTextDepth) {
- skipText = false;
- } else {
- return;
- }
- }
-
- var frame = stack.pop();
- if (!frame) {
- // Do not crash on bad markup
- return;
- }
- skipText = options.enforceHtmlBoundary ? name === 'html' : false;
- depth--;
- var skip = skipMap[depth];
- if (skip) {
- delete skipMap[depth];
- if (options.disallowedTagsMode === 'discard') {
- frame.updateParentNodeText();
- return;
- }
- tempResult = result;
- result = '';
- }
-
- if (transformMap[depth]) {
- name = transformMap[depth];
- delete transformMap[depth];
- }
-
- if (options.exclusiveFilter && options.exclusiveFilter(frame)) {
- result = result.substr(0, frame.tagPosition);
- return;
- }
-
- frame.updateParentNodeMediaChildren();
- frame.updateParentNodeText();
-
- if (options.selfClosing.indexOf(name) !== -1) {
- // Already output />
- if (skip) {
- result = tempResult;
- tempResult = '';
- }
- return;
- }
-
- result += '' + name + '>';
- if (skip) {
- result = tempResult + escapeHtml(result);
- tempResult = '';
- }
- }
- }, options.parser);
- parser.write(html);
- parser.end();
-
- return result;
-
- function initializeState() {
- result = '';
- depth = 0;
- stack = [];
- skipMap = {};
- transformMap = {};
- skipText = false;
- skipTextDepth = 0;
- }
-
- function escapeHtml(s, quote) {
- if (typeof (s) !== 'string') {
- s = s + '';
- }
- if (options.parser.decodeEntities) {
- s = s.replace(/&/g, '&').replace(//g, '>');
- if (quote) {
- s = s.replace(/\"/g, '"');
- }
- }
- // TODO: this is inadequate because it will pass `&0;`. This approach
- // will not work, each & must be considered with regard to whether it
- // is followed by a 100% syntactically valid entity or not, and escaped
- // if it is not. If this bothers you, don't set parser.decodeEntities
- // to false. (The default is true.)
- s = s.replace(/&(?![a-zA-Z0-9#]{1,20};)/g, '&') // Match ampersands not part of existing HTML entity
- .replace(//g, '>');
- if (quote) {
- s = s.replace(/\"/g, '"');
- }
- return s;
- }
-
- function naughtyHref(name, href) {
- // Browsers ignore character codes of 32 (space) and below in a surprising
- // number of situations. Start reading here:
- // https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Embedded_tab
- // eslint-disable-next-line no-control-regex
- href = href.replace(/[\x00-\x20]+/g, '');
- // Clobber any comments in URLs, which the browser might
- // interpret inside an XML data island, allowing
- // a javascript: URL to be snuck through
- href = href.replace(/<\!\-\-.*?\-\-\>/g, '');
- // Case insensitive so we don't get faked out by JAVASCRIPT #1
- var matches = href.match(/^([a-zA-Z]+)\:/);
- if (!matches) {
- // Protocol-relative URL starting with any combination of '/' and '\'
- if (href.match(/^[\/\\]{2}/)) {
- return !options.allowProtocolRelative;
- }
-
- // No scheme
- return false;
- }
- var scheme = matches[1].toLowerCase();
-
- if (has(options.allowedSchemesByTag, name)) {
- return options.allowedSchemesByTag[name].indexOf(scheme) === -1;
- }
-
- return !options.allowedSchemes || options.allowedSchemes.indexOf(scheme) === -1;
- }
-
- /**
- * Filters user input css properties by whitelisted regex attributes.
- *
- * @param {object} abstractSyntaxTree - Object representation of CSS attributes.
- * @property {array[Declaration]} abstractSyntaxTree.nodes[0] - Each object cointains prop and value key, i.e { prop: 'color', value: 'red' }.
- * @param {object} allowedStyles - Keys are properties (i.e color), value is list of permitted regex rules (i.e /green/i).
- * @return {object} - Abstract Syntax Tree with filtered style attributes.
- */
- function filterCss(abstractSyntaxTree, allowedStyles) {
- if (!allowedStyles) {
- return abstractSyntaxTree;
- }
-
- var filteredAST = cloneDeep(abstractSyntaxTree);
- var astRules = abstractSyntaxTree.nodes[0];
- var selectedRule;
-
- // Merge global and tag-specific styles into new AST.
- if (allowedStyles[astRules.selector] && allowedStyles['*']) {
- selectedRule = mergeWith(
- cloneDeep(allowedStyles[astRules.selector]),
- allowedStyles['*'],
- function(objValue, srcValue) {
- if (Array.isArray(objValue)) {
- return objValue.concat(srcValue);
- }
- }
- );
- } else {
- selectedRule = allowedStyles[astRules.selector] || allowedStyles['*'];
- }
-
- if (selectedRule) {
- filteredAST.nodes[0].nodes = astRules.nodes.reduce(filterDeclarations(selectedRule), []);
- }
-
- return filteredAST;
- }
-
- /**
- * Extracts the style attribues from an AbstractSyntaxTree and formats those
- * values in the inline style attribute format.
- *
- * @param {AbstractSyntaxTree} filteredAST
- * @return {string} - Example: "color:yellow;text-align:center;font-family:helvetica;"
- */
- function stringifyStyleAttributes(filteredAST) {
- return filteredAST.nodes[0].nodes
- .reduce(function(extractedAttributes, attributeObject) {
- extractedAttributes.push(
- attributeObject.prop + ':' + attributeObject.value
- );
- return extractedAttributes;
- }, [])
- .join(';');
- }
-
- /**
- * Filters the existing attributes for the given property. Discards any attributes
- * which don't match the whitelist.
- *
- * @param {object} selectedRule - Example: { color: red, font-family: helvetica }
- * @param {array} allowedDeclarationsList - List of declarations which pass whitelisting.
- * @param {object} attributeObject - Object representing the current css property.
- * @property {string} attributeObject.type - Typically 'declaration'.
- * @property {string} attributeObject.prop - The CSS property, i.e 'color'.
- * @property {string} attributeObject.value - The corresponding value to the css property, i.e 'red'.
- * @return {function} - When used in Array.reduce, will return an array of Declaration objects
- */
- function filterDeclarations(selectedRule) {
- return function (allowedDeclarationsList, attributeObject) {
- // If this property is whitelisted...
- if (selectedRule.hasOwnProperty(attributeObject.prop)) {
- var matchesRegex = selectedRule[attributeObject.prop].some(function(regularExpression) {
- return regularExpression.test(attributeObject.value);
- });
-
- if (matchesRegex) {
- allowedDeclarationsList.push(attributeObject);
- }
- }
- return allowedDeclarationsList;
- };
- }
-
- function filterClasses(classes, allowed) {
- if (!allowed) {
- // The class attribute is allowed without filtering on this tag
- return classes;
- }
- classes = classes.split(/\s+/);
- return classes.filter(function(clss) {
- return allowed.indexOf(clss) !== -1;
- }).join(' ');
- }
-}
-
-// Defaults are accessible to you so that you can use them as a starting point
-// programmatically if you wish
-
-var htmlParserDefaults = {
- decodeEntities: true
-};
-sanitizeHtml.defaults = {
- allowedTags: ['h3', 'h4', 'h5', 'h6', 'blockquote', 'p', 'a', 'ul', 'ol',
- 'nl', 'li', 'b', 'i', 'strong', 'em', 'strike', 'abbr', 'code', 'hr', 'br', 'div',
- 'table', 'thead', 'caption', 'tbody', 'tr', 'th', 'td', 'pre', 'iframe'],
- disallowedTagsMode: 'discard',
- allowedAttributes: {
- a: ['href', 'name', 'target'],
- // We don't currently allow img itself by default, but this
- // would make sense if we did. You could add srcset here,
- // and if you do the URL is checked for safety
- img: ['src']
- },
- // Lots of these won't come up by default because we don't allow them
- selfClosing: ['img', 'br', 'hr', 'area', 'base', 'basefont', 'input', 'link', 'meta'],
- // URL schemes we permit
- allowedSchemes: ['http', 'https', 'ftp', 'mailto'],
- allowedSchemesByTag: {},
- allowedSchemesAppliedToAttributes: ['href', 'src', 'cite'],
- allowProtocolRelative: true,
- enforceHtmlBoundary: false
-};
-
-sanitizeHtml.simpleTransform = function(newTagName, newAttribs, merge) {
- merge = (merge === undefined) ? true : merge;
- newAttribs = newAttribs || {};
-
- return function(tagName, attribs) {
- var attrib;
- if (merge) {
- for (attrib in newAttribs) {
- attribs[attrib] = newAttribs[attrib];
- }
- } else {
- attribs = newAttribs;
- }
-
- return {
- tagName: newTagName,
- attribs: attribs
- };
- };
-};
diff --git a/test/test.js b/test/test.js
index b142249..f0988f0 100644
--- a/test/test.js
+++ b/test/test.js
@@ -1,11 +1,10 @@
-/* eslint-disable no-useless-escape */
-var assert = require('assert');
+const assert = require('assert');
const sinon = require('sinon');
describe('sanitizeHtml', function() {
- var sanitizeHtml;
+ let sanitizeHtml;
it('should be successfully initialized', function() {
- sanitizeHtml = require('../dist/sanitize-html.js');
+ sanitizeHtml = require('../index.js');
});
it('should escape self closing tags', () => {
assert.equal(sanitizeHtml('before
after', {
@@ -14,7 +13,14 @@ describe('sanitizeHtml', function() {
allowedAttributes: false
}), 'before <img src="test.png" /> after');
});
- it('should pass through simple well-formed whitelisted markup', function() {
+ it('should handle numbers as strings', () => {
+ assert.equal(sanitizeHtml(5, {
+ allowedTags: [ 'b', 'em', 'i', 's', 'small', 'strong', 'sub', 'sup', 'time', 'u' ],
+ allowedAttributes: {},
+ disallowedTagsMode: 'recursiveEscape'
+ }), '5');
+ });
+ it('should pass through simple, well-formed markup', function() {
assert.equal(sanitizeHtml('
Hello there
Hello there
Whee!
'), 'Blah blah blahWhee!
'); }); - it('should reject markup not whitelisted without destroying its text', function() { + it('should return an empty string when input is explicit "undefined"', function() { + assert.equal(sanitizeHtml(undefined), ''); + }); + it('should return an empty string when input is explicit "null"', function() { + assert.equal(sanitizeHtml(null), ''); + }); + it('should return an empty string when input is not provided', function() { + assert.equal(sanitizeHtml(), ''); + }); + it('should return an empty string when input is an empty string', function() { + assert.equal(sanitizeHtml(''), ''); + }); + it('should reject markup not allowlisted without destroying its text', function() { assert.equal(sanitizeHtml('
Whee
Again
Wowcool', {
- allowedTags: sanitizeHtml.defaults.allowedTags.concat(['img'])
+ allowedTags: sanitizeHtml.defaults.allowedTags.concat([ 'img' ])
}), '
Whee
Again
Wowcool
'); }); it('should reject hrefs that are not relative, ftp, http, https or mailto', function() { @@ -89,22 +127,39 @@ describe('sanitizeHtml', function() { assert.equal(sanitizeHtml('Paragraph
'), 'NiftyParagraph
'); }); it('should discard the content of fibble elements if specified for nonTextTags', function() { - assert.equal(sanitizeHtml('Paragraph
', { nonTextTags: ['fibble'] }), 'Paragraph
'); + assert.equal(sanitizeHtml('Paragraph
', { nonTextTags: [ 'fibble' ] }), 'Paragraph
'); }); it('should retain allowed tags within a fibble element if fibble is not specified for nonTextTags', function() { assert.equal(sanitizeHtml('Paragraph
', {}), 'NiftyParagraph
'); }); it('should discard allowed tags within a fibble element if fibble is specified for nonTextTags', function() { - assert.equal(sanitizeHtml('Paragraph
', { nonTextTags: ['fibble'] }), 'Paragraph
'); + assert.equal(sanitizeHtml('Paragraph
', { nonTextTags: [ 'fibble' ] }), 'Paragraph
'); }); it('should preserve textarea content if textareas are allowed', function() { assert.equal(sanitizeHtml('Paragraph
', { - allowedTags: ['textarea', 'p'] + allowedTags: [ 'textarea', 'p' ] }), 'Paragraph
'); }); it('should preserve entities as such', function() { assert.equal(sanitizeHtml('<Kapow!>'), '<Kapow!>'); }); + it('should dump closing tags which do not have any opening tags.', function() { + assert.equal(sanitizeHtml('inner text 1
inner text 2
inner text 3
inner text 1
inner text 2
inner text 3
inner text 1
inner text 2
inner text 3
Whee
'), 'Whee
'); }); @@ -124,11 +179,14 @@ describe('sanitizeHtml', function() { assert.equal(sanitizeHtml('Hax'), 'Hax'); }); it('should dump character codes 1-32 even when escaped with padding rather than trailing ;', function() { + // htmlparser2 10.x correctly decodes zero-padded numeric entities. + // decodes to U+0001, which is stripped as a control char, + // revealing the javascript: scheme assert.equal(sanitizeHtml('Hax'), 'Hax'); - // This one is weird, but the browser does not interpret it - // as a scheme, so we're OK. That character is 65535, not null. I - // think it's a limitation of the entities module - assert.equal(sanitizeHtml('Hax'), 'Hax'); + // decodes to U+FFFD (replacement character per HTML spec), + // which is not a control char, so the URL is preserved safely since + // browsers don't interpret java�script: as javascript: + assert.equal(sanitizeHtml('Hax'), 'Hax'); }); it('should still like nice schemes', function() { assert.equal(sanitizeHtml('Hi'), 'Hi'); @@ -142,19 +200,19 @@ describe('sanitizeHtml', function() { it('should replace ol to ul and add class attribute with foo value', function() { assert.equal(sanitizeHtml('text before
text after
text before text after
'); + }); + it('should add new text when not initially set and replace attributes when they are changed by transforming function', function () { assert.equal(sanitizeHtml('', { transformTags: { @@ -257,7 +328,7 @@ describe('sanitizeHtml', function() { it('Should expose a node\'s inner text and inner HTML to the filter', function() { assert.strictEqual( - sanitizeHtml('12
3
4
12
3
These links hack more hack have disallowed href protocols
', { + exclusiveFilter: function (frame) { + return frame.tag === 'a' && !frame.attribs.src ? 'excludeTag' : false; + } + }), + 'These links hack more hack have disallowed href protocols
' + ); + }); + + it('should keep inner tags when exclusiveFilter returns "excludeTag"', function() { + assert.strictEqual( + sanitizeHtml('This div is badshould be kept as-is
should be kept as-is
.' + ); + }); + + it('should work with escaped tags when exclusiveFilter returns "excludeTag"', function () { + assert.strictEqual( + sanitizeHtml('hellowhee
', { - allowedTags: ['p'], + allowedTags: [ 'p' ], allowedClasses: { - p: ['nifty'] + p: [ 'nifty' ] } } ), 'whee
' ); }); + it('should allow specific classes when allowlisted with allowedClasses for all tags', function() { + assert.equal( + sanitizeHtml( + 'whee
', + { + allowedTags: [ 'p', 'div' ], + allowedClasses: { + '*': [ 'nifty' ] + } + } + ), + 'whee
' + ); + }); + it('should allow all classes that are allowlisted for a single tag or all tags', function() { + assert.equal( + sanitizeHtml( + 'whee
', + { + allowedTags: [ 'p', 'div' ], + allowedClasses: { + '*': [ 'simple' ], + p: [ 'nifty' ], + div: [ 'dippy' ] + } + } + ), + 'whee
' + ); + }); + it('should allow classes that match wildcards for a single tag or all tags', function() { + assert.equal( + sanitizeHtml( + 'whee
', + { + allowedTags: [ 'p' ], + allowedClasses: { + '*': [ 'dippy-*-simple' ], + p: [ 'nifty-*' ] + } + } + ), + 'whee
' + ); + }); + it('should allow all classes if `allowedClasses` contains a single `*`', function() { + assert.equal( + sanitizeHtml( + 'whee
', + { + allowedTags: [ 'p' ], + allowedClasses: { + '*': [ '*' ] + } + } + ), + 'whee
' + ); + }); + it('should allow all classes for a single tag if `allowedClasses` for the tag is false', function() { + assert.equal( + sanitizeHtml( + 'whee
', + { + allowedTags: [ 'p' ], + allowedClasses: { + p: false + } + } + ), + 'whee
' + ); + }); + it('should allow only classes that matches `allowedClasses` regex', function() { + assert.equal( + sanitizeHtml( + 'whee
', + { + allowedTags: [ 'p' ], + allowedClasses: { + p: [ /^nifty\d{2}$/, /^d\w{4}$/ ] + } + } + ), + 'whee
' + ); + }); + it('should allow classes that match `allowedClasses` regex for all tags', function() { + assert.equal( + sanitizeHtml( + 'whee
', + { + allowedClasses: { + '*': [ /^nifty\d{2}$/, /^d\w{4}$/ ] + } + } + ), + 'whee
' + ); + }); it('should allow defining schemes on a per-tag basis', function() { assert.equal( sanitizeHtml( // teeny-tiny valid transparent GIF in a data URL 'whee
', { - allowedTags: ['p'], + allowedTags: [ 'p' ], allowedClasses: { - p: ['nifty'] + p: [ 'nifty' ] } } ), @@ -414,9 +632,9 @@ describe('sanitizeHtml', function() { sanitizeHtml( 'Text
', { - allowedTags: ['p'], - allowedAttributes: { p: ['style'] }, + allowedTags: [ 'p' ], + allowedAttributes: { p: [ 'style' ] }, transformTags: { '*': function (tagName, attribs) { return { @@ -568,34 +786,34 @@ describe('sanitizeHtml', function() { it('should allow attributes to be specified as globs', function() { assert.equal( sanitizeHtml('click me', { - allowedTags: ['a'], - allowedAttributes: { a: ['data-*'] } + allowedTags: [ 'a' ], + allowedAttributes: { a: [ 'data-*' ] } }), 'click me' ); assert.equal( sanitizeHtml('click me', { - allowedTags: ['a'], - allowedAttributes: { a: ['data-*-foo'] } + allowedTags: [ 'a' ], + allowedAttributes: { a: [ 'data-*-foo' ] } }), 'click me' ); }); it('should quote regex chars in attributes specified as globs', function() { assert.equal( sanitizeHtml('click me', { - allowedTags: ['a'], - allowedAttributes: { a: ['data-b.*'] } + allowedTags: [ 'a' ], + allowedAttributes: { a: [ 'data-b.*' ] } }), 'click me' ); }); it('should not escape inner content of script and style tags (when allowed)', function() { assert.equal( sanitizeHtml('
', {
- allowedTags: sanitizeHtml.defaults.allowedTags.concat(['img'])
+ allowedTags: sanitizeHtml.defaults.allowedTags.concat([ 'img' ])
}),
'
'
);
@@ -702,17 +927,24 @@ describe('sanitizeHtml', function() {
it('should accept srcset if allowed', function() {
assert.equal(
sanitizeHtml('
', {
- allowedTags: sanitizeHtml.defaults.allowedTags.concat(['img']),
- allowedAttributes: { img: ['src', 'srcset'] }
+ allowedTags: sanitizeHtml.defaults.allowedTags.concat([ 'img' ]),
+ allowedAttributes: { img: [ 'src', 'srcset' ] }
}),
'
'
);
+ assert.equal(
+ sanitizeHtml('
', {
+ allowedTags: sanitizeHtml.defaults.allowedTags.concat([ 'img' ]),
+ allowedAttributes: { img: [ 'src', 'srcset' ] }
+ }),
+ '
'
+ );
});
it('should drop bogus srcset', function() {
assert.equal(
sanitizeHtml('
', {
- allowedTags: sanitizeHtml.defaults.allowedTags.concat(['img']),
- allowedAttributes: { img: ['src', 'srcset'] }
+ allowedTags: sanitizeHtml.defaults.allowedTags.concat([ 'img' ]),
+ allowedAttributes: { img: [ 'src', 'srcset' ] }
}),
'
'
);
@@ -720,39 +952,64 @@ describe('sanitizeHtml', function() {
it('should accept srcset with urls containing commas', function() {
assert.equal(
sanitizeHtml('
', {
- allowedTags: sanitizeHtml.defaults.allowedTags.concat(['img']),
- allowedAttributes: { img: ['src', 'srcset'] }
+ allowedTags: sanitizeHtml.defaults.allowedTags.concat([ 'img' ]),
+ allowedAttributes: { img: [ 'src', 'srcset' ] }
}),
'
'
);
});
+
+ it('text from transformTags should not specify tags', function() {
+ const input = '';
+ const want = '<script>alert(1)</script>';
+ // Runs the sanitizer with a policy that turns an attribute into
+ // text. A policy like this might be used to turn inputs into
+ // inline elements that look like the original but which do not
+ // affect form submissions.
+ const got = sanitizeHtml(
+ input,
+ {
+ allowedTags: [ 'u' ],
+ allowedAttributes: { '*': [ 'class' ] },
+ transformTags: {
+ input: function (tagName, attribs) {
+ return {
+ tagName: 'u',
+ attribs: { class: 'inlined-input' },
+ text: attribs.value
+ };
+ }
+ }
+ });
+ assert.equal(got, want);
+ });
it('drop attribute names with meta-characters', function() {
assert.equal(
sanitizeHtml('alert(1)//>', {
- allowedTags: ['span'],
- allowedAttributes: { span: ['data-*'] }
+ allowedTags: [ 'span' ],
+ allowedAttributes: { span: [ 'data-*' ] }
}),
'alert(1)//>'
);
});
it('should sanitize styles correctly', function() {
- var sanitizeString = 'bestetestestestestestsettestestest
'; - var expected = '
- test
- test
- test
- test
test
TESTETESTESTES
bestetestestestestestsettestestest
'; + const sanitizeString = '
- test
- test
- test
- test
test
TESTETESTESTES
bestetestestestestestsettestestest
'; + const expected = '
- test
- test
- test
- test
test
TESTETESTESTES
bestetestestestestestsettestestest
'; assert.equal( sanitizeHtml(sanitizeString, { allowedTags: false, allowedAttributes: { - '*': ['dir'], - p: ['dir', 'style'], - li: ['style'], - span: ['style'] + '*': [ 'dir' ], + p: [ 'dir', 'style' ], + li: [ 'style' ], + span: [ 'style' ] }, allowedStyles: { '*': { // Matches hex - color: [/\#(0x)?[0-9a-f]+/i], - 'text-align': [/left/, /right/, /center/, /justify/, /initial/, /inherit/], - 'font-size': [/36px/] + color: [ /#(0x)?[0-9a-f]+/i ], + 'text-align': [ /left/, /right/, /center/, /justify/, /initial/, /inherit/ ], + 'font-size': [ /36px/ ] } } }).replace(/ /g, ''), expected.replace(/ /g, '') @@ -767,184 +1024,298 @@ describe('sanitizeHtml', function() { '' ); }); - it('Should remote invalid styles', function() { + it('Should remove invalid styles', function() { assert.equal( sanitizeHtml('', { allowedTags: false, allowedAttributes: { - span: ['style'] + span: [ 'style' ] }, allowedStyles: { span: { - color: [/blue/], - 'text-align': [/left/] + color: [ /blue/ ], + 'text-align': [ /left/ ] } } }), '' ); }); + it('Should ignore styles when options.parseStyleAttributes is false', function() { + assert.equal( + sanitizeHtml('', { + allowedTags: false, + allowedAttributes: { + span: [ 'style' ] + }, + parseStyleAttributes: false + }), '' + ); + }); + it('Should throw an error if both allowedStyles is set and && parseStyleAttributes is set to false', function() { + try { + sanitizeHtml('', { + allowedTags: false, + allowedAttributes: { + span: [ 'style' ] + }, + allowedStyles: { + p: { + 'text-align': [ /^justify$/ ] + } + }, + parseStyleAttributes: false + }); + assert(false); + } catch (err) { + assert.equal(err.message, 'allowedStyles option cannot be used together with parseStyleAttributes: false.'); + } + }); + it('Should support !important styles', function() { + assert.equal( + sanitizeHtml('', { + allowedTags: false, + allowedAttributes: { + span: [ 'style' ] + }, + allowedStyles: { + span: { + color: [ /blue/ ] + } + } + }), '' + ); + }); it('Should allow a specific style from global', function() { assert.equal( sanitizeHtml('', { allowedTags: false, allowedAttributes: { - span: ['style'] + span: [ 'style' ] }, allowedStyles: { '*': { - color: [/yellow/], - 'text-align': [/center/] + color: [ /yellow/ ], + 'text-align': [ /center/ ] }, span: { - color: [/green/], - 'font-family': [/helvetica/] + color: [ /green/ ], + 'font-family': [ /helvetica/ ] } } }), '' ); }); - it('Should allow hostnames in an iframe that are whitelisted', function() { + it('should delete the script tag', function() { + assert.equal(sanitizeHtml('', { + allowedTags: [ 'script' ], + allowVulnerableTags: true, + allowedAttributes: { + script: [ 'src' ] + }, + allowedScriptHostnames: [ 'www.authorized.com' ] + }), ''); + }); + it('should delete the script tag since src is not a valid URL', function() { + assert.equal(sanitizeHtml('', { + allowedTags: [ 'script' ], + allowVulnerableTags: true, + allowedAttributes: { + script: [ 'src' ] + }, + allowedScriptHostnames: [ 'www.unauthorized.com' ] + }), ''); + }); + it('Should allow domains in a script that are in allowedScriptDomains', function() { + assert.equal( + sanitizeHtml('', { + allowedTags: [ 'script' ], + allowedAttributes: { + script: [ 'src' ] + }, + allowedScriptDomains: [ 'authorized.com' ] + }), '' + ); + }); + it('should delete the script tag content', function() { + assert.equal(sanitizeHtml('', { + allowedTags: [ 'script' ], + allowVulnerableTags: true, + allowedAttributes: { + script: [ 'src' ] + }, + allowedScriptHostnames: [ 'www.authorized.com' ] + }), ''); + }); + it('should delete the script tag content from script tags with no src when allowedScriptHostnames is present', function() { + assert.equal(sanitizeHtml('', { + allowedTags: [ 'script' ], + allowVulnerableTags: true, + allowedAttributes: { + script: [ 'src' ] + }, + allowedScriptHostnames: [ 'www.authorized.com' ] + }), ''); + }); + it('should delete the script tag content from script tags with no src when allowedScriptDomains is present', function() { + assert.equal(sanitizeHtml('', { + allowedTags: [ 'script' ], + allowVulnerableTags: true, + allowedAttributes: { + script: [ 'src' ] + }, + allowedScriptDomains: [ 'www.authorized.com' ] + }), ''); + }); + it('Should allow hostnames in a script that are in allowedScriptHostnames', function() { + assert.equal(sanitizeHtml('', { + allowedTags: [ 'script' ], + allowVulnerableTags: true, + allowedAttributes: { + script: [ 'src' ] + }, + allowedScriptHostnames: [ 'www.authorized.com' ] + }), ''); + }); + it('Should allow hostnames in an iframe that are in allowedIframeHostnames', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, - allowedIframeHostnames: ['www.youtube.com', 'player.vimeo.com'] + allowedIframeHostnames: [ 'www.youtube.com', 'player.vimeo.com' ] }), '' ); }); - it('Should remove iframe src urls that are not included in whitelisted hostnames', function() { + it('Should remove iframe src urls that are not included in allowedIframeHostnames', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, - allowedIframeHostnames: ['www.youtube.com', 'player.vimeo.com'] + allowedIframeHostnames: [ 'www.youtube.com', 'player.vimeo.com' ] }), '' ); }); it('Should not allow iframe urls that do not have proper hostname', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, - allowedIframeHostnames: ['www.youtube.com', 'player.vimeo.com'] + allowedIframeHostnames: [ 'www.youtube.com', 'player.vimeo.com' ] }), '' ); }); it('Should allow iframe through if no hostname option is set', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] } }), '' ); }); - it('Should allow domains in an iframe that are whitelisted', function() { + it('Should allow domains in an iframe that are in allowedIframeDomains', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, - allowedIframeDomains: ['zoom.us'] + allowedIframeDomains: [ 'zoom.us' ] }), '' ); }); - it('Should allow second-level domains in an iframe that are whitelisted', function() { + it('Should allow second-level domains in an iframe that are in allowedIframeDomains', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, - allowedIframeDomains: ['zoom.us'] + allowedIframeDomains: [ 'zoom.us' ] }), '' ); }); - it('Should remove iframe src urls that are not included in whitelisted domains', function() { + it('Should remove iframe src urls that are not included in allowedIframeDomains', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, - allowedIframeDomains: ['vimeo.com'] + allowedIframeDomains: [ 'vimeo.com' ] }), '' ); }); - it('Should remove iframe src urls with host that ends as whitelisted domains ' + - ' but not preceeded with a dot', function() { + it('Should remove iframe src urls with host that ends as allowed domains but not preceded with a dot', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, - allowedIframeDomains: ['zoom.us'] + allowedIframeDomains: [ 'zoom.us' ] }), '' ); }); - it('Should allow hostnames in an iframe that are whitelisted in allowedIframeHostnames ' + - 'and are not whitelisted in allowedIframeDomains', function() { + it('Should allow hostnames in an iframe that are in allowedIframeHostnames and are not in allowedIframeDomains', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, - allowedIframeHostnames: ['www.youtube.com', 'player.vimeo.com'], - allowedIframeDomains: ['zoom.us'] + allowedIframeHostnames: [ 'www.youtube.com', 'player.vimeo.com' ], + allowedIframeDomains: [ 'zoom.us' ] }), '' ); }); - it('Should allow hostnames in an iframe that are not whitelisted in allowedIframeHostnames ' + - 'and are whitelisted in allowedIframeDomains', function() { + it('Should allow hostnames in an iframe that are not in allowedIframeHostnames ' + + 'and are allowlisted in allowedIframeDomains', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, - allowedIframeHostnames: ['www.youtube.com', 'player.vimeo.com'], - allowedIframeDomains: ['zoom.us'] + allowedIframeHostnames: [ 'www.youtube.com', 'player.vimeo.com' ], + allowedIframeDomains: [ 'zoom.us' ] }), '' ); }); it('Should allow relative URLs for iframes by default', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] } }), '' ); @@ -952,11 +1323,11 @@ describe('sanitizeHtml', function() { it('Should allow relative URLs for iframes', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, allowIframeRelativeUrls: true }), '' @@ -965,124 +1336,124 @@ describe('sanitizeHtml', function() { it('Should remove relative URLs for iframes', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, allowIframeRelativeUrls: false }), '' ); }); - it('Should remove relative URLs for iframes when whitelisted hostnames specified', function() { + it('Should remove relative URLs for iframes when other hostnames are specified in allowedIframeHostnames', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, - allowedIframeHostnames: ['www.youtube.com'] + allowedIframeHostnames: [ 'www.youtube.com' ] }), '' ); }); - it('Should allow relative and whitelisted hostname URLs for iframes', function() { + it('Should allow relative and allowlisted hostname URLs for iframes', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, allowIframeRelativeUrls: true, - allowedIframeHostnames: ['www.youtube.com'] + allowedIframeHostnames: [ 'www.youtube.com' ] }), '' ); }); it('Should allow protocol-relative URLs for the right domain for iframes', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, - allowedIframeHostnames: ['www.youtube.com', 'player.vimeo.com'] + allowedIframeHostnames: [ 'www.youtube.com', 'player.vimeo.com' ] }), '' ); }); it('Should not allow protocol-relative iframe urls that do not have proper hostname', function() { assert.equal( sanitizeHtml('', { - allowedTags: ['p', 'iframe', 'a', 'img', 'i'], + allowedTags: [ 'p', 'iframe', 'a', 'img', 'i' ], allowedAttributes: { - iframe: ['src', 'href'], - a: ['src', 'href'], - img: ['src'] + iframe: [ 'src', 'href' ], + a: [ 'src', 'href' ], + img: [ 'src' ] }, - allowedIframeHostnames: ['www.youtube.com', 'player.vimeo.com'] + allowedIframeHostnames: [ 'www.youtube.com', 'player.vimeo.com' ] }), '' ); }); it('Should only allow attributes to have any combination of specific values', function() { assert.equal( - sanitizeHtml('', { - allowedTags: sanitizeHtml.defaults.allowedTags.concat(['iframe']), + sanitizeHtml('', { + allowedTags: sanitizeHtml.defaults.allowedTags.concat([ 'iframe' ]), allowedAttributes: { iframe: [ { name: 'sandbox', multiple: true, - values: ['allow-popups', 'allow-same-origin', 'allow-scripts'] + values: [ 'allow-popups', 'allow-same-origin', 'allow-scripts' ] }, 'allowfullscreen' ] } - }), ''); + }), ''); }); it('Should only allow attributes that match a specific value', function() { assert.equal( - sanitizeHtml('', { - allowedTags: sanitizeHtml.defaults.allowedTags.concat(['iframe']), + sanitizeHtml('', { + allowedTags: sanitizeHtml.defaults.allowedTags.concat([ 'iframe' ]), allowedAttributes: { iframe: [ { name: 'sandbox', multiple: false, - values: ['allow-popups', 'allow-same-origin', 'allow-scripts'] + values: [ 'allow-popups', 'allow-same-origin', 'allow-scripts' ] } ] } - }), ''); + }), ''); } ); it('Should not allow cite urls that do not have an allowed scheme', function() { assert.equal( - sanitizeHtml('
- test
- test
- test
- test
test
TESTETESTESTES
HTTP
HTTPS
MAILTO
TEL
FTP
DATA
LDAP
ACROBAT
VBSCRIPT
FILE
RLOGIN
WEBCAL
JAVASCRIPT
MMS', { - allowedTags: sanitizeHtml.defaults.allowedTags.concat(['q']), - allowedAttributes: { q: ['cite'] }, - allowedSchemes: sanitizeHtml.defaults.allowedSchemes.concat(['tel']) - }), '
HTTP
HTTPS
MAILTO
TEL
FTP
DATA
LDAP
ACROBAT
VBSCRIPT
FILE
RLOGIN
WEBCAL
JAVASCRIPT
MMS'); + sanitizeHtml('
HTTP
HTTPS
MAILTO
TEL
ms-calculator
FTP
DATA
LDAP
ACROBAT
VBSCRIPT
FILE
RLOGIN
WEBCAL
JAVASCRIPT
MMS', { + allowedTags: sanitizeHtml.defaults.allowedTags.concat([ 'q' ]), + allowedAttributes: { q: [ 'cite' ] }, + allowedSchemes: sanitizeHtml.defaults.allowedSchemes.concat([ 'tel' ]) + }), '
HTTP
HTTPS
MAILTO
TEL
ms-calculator
FTP
DATA
LDAP
ACROBAT
VBSCRIPT
FILE
RLOGIN
WEBCAL
JAVASCRIPT
MMS'); }); it('Should encode &, <, > and where necessary, "', function() { assert.equal(sanitizeHtml('"< & >" cool', { - allowedTags: ['span'], + allowedTags: [ 'span' ], allowedAttributes: { - span: ['class'] + span: [ 'class' ] } }), '"< & >" cool'); }); it('Should not pass through &0; unescaped if decodeEntities is true (the default)', function() { - assert.equal(sanitizeHtml('
World
World
World
</wiggly>World
World
World
<tiggly>JS</tiggly></wiggly>