Fastest method to escape HTML tags as HTML entitie

2018-12-31 14:53发布

I'm writing a Chrome extension that involves doing a lot of the following job: sanitizing strings that might contain HTML tags, by converting <, > and & to &lt;, &gt; and &amp;, respectively.

(In other words, the same as PHP's htmlspecialchars(str, ENT_NOQUOTES) – I don't think there's any real need to convert double-quote characters.)

This is the fastest function I have found so far:

function safe_tags(str) {
    return str.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;') ;
}

But there's still a big lag when I have to run a few thousand strings through it in one go.

Can anyone improve on this? It's mostly for strings between 10 and 150 characters, if that makes a difference.

(One idea I had was not to bother encoding the greater-than sign – would there be any real danger with that?)

10条回答
不再属于我。
2楼-- · 2018-12-31 15:22

Martijn's method as a prototype function:

String.prototype.escape = function() {
    var tagsToReplace = {
        '&': '&amp;',
        '<': '&lt;',
        '>': '&gt;'
    };
    return this.replace(/[&<>]/g, function(tag) {
        return tagsToReplace[tag] || tag;
    });
};

var a = "<abc>";
var b = a.escape(); // "&lt;abc&gt;"
查看更多
梦该遗忘
3楼-- · 2018-12-31 15:27

You could try passing a callback function to perform the replacement:

var tagsToReplace = {
    '&': '&amp;',
    '<': '&lt;',
    '>': '&gt;'
};

function replaceTag(tag) {
    return tagsToReplace[tag] || tag;
}

function safe_tags_replace(str) {
    return str.replace(/[&<>]/g, replaceTag);
}

Here is a performance test: http://jsperf.com/encode-html-entities to compare with calling the replace function repeatedly, and using the DOM method proposed by Dmitrij.

Your way seems to be faster...

Why do you need it, though?

查看更多
谁念西风独自凉
4楼-- · 2018-12-31 15:28

Martijn's method as single function with handling " mark (using in javascript) :

function escapeHTML(html) {
    var fn=function(tag) {
        var charsToReplace = {
            '&': '&amp;',
            '<': '&lt;',
            '>': '&gt;',
            '"': '&#34;'
        };
        return charsToReplace[tag] || tag;
    }
    return html.replace(/[&<>"]/g, fn);
}
查看更多
怪性笑人.
5楼-- · 2018-12-31 15:29

The AngularJS source code also has a version inside of angular-sanitize.js.

var SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
    // Match everything outside of normal chars and " (quote character)
    NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
/**
 * Escapes all potentially dangerous characters, so that the
 * resulting string can be safely inserted into attribute or
 * element text.
 * @param value
 * @returns {string} escaped text
 */
function encodeEntities(value) {
  return value.
    replace(/&/g, '&amp;').
    replace(SURROGATE_PAIR_REGEXP, function(value) {
      var hi = value.charCodeAt(0);
      var low = value.charCodeAt(1);
      return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';
    }).
    replace(NON_ALPHANUMERIC_REGEXP, function(value) {
      return '&#' + value.charCodeAt(0) + ';';
    }).
    replace(/</g, '&lt;').
    replace(/>/g, '&gt;');
}
查看更多
余生请多指教
6楼-- · 2018-12-31 15:31

All-in-one script:

// HTML entities Encode/Decode

function htmlspecialchars(str) {
    var map = {
        "&": "&amp;",
        "<": "&lt;",
        ">": "&gt;",
        "\"": "&quot;",
        "'": "&#39;" // ' -> &apos; for XML only
    };
    return str.replace(/[&<>"']/g, function(m) { return map[m]; });
}
function htmlspecialchars_decode(str) {
    var map = {
        "&amp;": "&",
        "&lt;": "<",
        "&gt;": ">",
        "&quot;": "\"",
        "&#39;": "'"
    };
    return str.replace(/(&amp;|&lt;|&gt;|&quot;|&#39;)/g, function(m) { return map[m]; });
}
function htmlentities(str) {
    var textarea = document.createElement("textarea");
    textarea.innerHTML = str;
    return textarea.innerHTML;
}
function htmlentities_decode(str) {
    var textarea = document.createElement("textarea");
    textarea.innerHTML = str;
    return textarea.value;
}

http://pastebin.com/JGCVs0Ts

查看更多
呛了眼睛熬了心
7楼-- · 2018-12-31 15:33

I'm not entirely sure about speed, but if you are looking for simplicity I would suggest using the lodash/underscore escape function.

查看更多
登录 后发表回答