Javascript word-count for any given DOM element

2020-02-17 10:08发布

I'm wondering if there's a way to count the words inside a div for example. Say we have a div like so:

<div id="content">
hello how are you?
</div>

Then have the JS function return an integer of 4.

Is this possible? I have done this with form elements but can't seem to do it for non-form ones.

Any ideas?

g

标签: javascript
8条回答
Bombasti
2楼-- · 2020-02-17 10:14
string_var.match(/[^\s]+/g).length

seems like it's a better method than

string_var.split(/\s+/).length

At least it won't count "word " as 2 words -- ['word'] rather than ['word', '']. And it doesn't really require any funny add-on logic.

查看更多
一纸荒年 Trace。
3楼-- · 2020-02-17 10:14

Or you can do this:

function CountWords (this_field, show_word_count, show_char_count) {
    if (show_word_count == null) {
        show_word_count = true;
    }
    if (show_char_count == null) {
        show_char_count = false;
    }
    var char_count = this_field.value.length;
    var fullStr = this_field.value + " ";
    var initial_whitespace_rExp = /^[^A-Za-z0-9]+/gi;
    var left_trimmedStr = fullStr.replace(initial_whitespace_rExp, "");
    var non_alphanumerics_rExp = rExp = /[^A-Za-z0-9]+/gi;
    var cleanedStr = left_trimmedStr.replace(non_alphanumerics_rExp, " ");
    var splitString = cleanedStr.split(" ");
    var word_count = splitString.length -1;
    if (fullStr.length <2) {
        word_count = 0;
    }
    if (word_count == 1) {
        wordOrWords = " word";
    } else {
        wordOrWords = " words";
    }
    if (char_count == 1) {
        charOrChars = " character";
    } else {
        charOrChars = " characters";
    }
    if (show_word_count & show_char_count) {
        alert ("Word Count:\n" + "    " + word_count + wordOrWords + "\n" + "    " + char_count + charOrChars);
    } else {
        if (show_word_count) {
            alert ("Word Count:  " + word_count + wordOrWords);
        } else {
            if (show_char_count) {
                alert ("Character Count:  " + char_count + charOrChars);
            }
        }
    }
    return word_count;
}
查看更多
女痞
4楼-- · 2020-02-17 10:17

The get_text function in Paolo Bergantino's answer didn't work properly for me when two child nodes have no space between them. eg <h1>heading</h1><p>paragraph</p> would be returned as headingparagraph (notice lack of space between the words). So prepending a space to the nodeValue fixes this. But it introduces a space at the front of the text but I found a word count function that trims it off (plus it uses several regexps to ensure it counts words only). Word count and edited get_text functions below:

function get_text(el) {
    ret = "";
    var length = el.childNodes.length;
    for(var i = 0; i < length; i++) {
        var node = el.childNodes[i];
        if(node.nodeType != 8) {
            ret += node.nodeType != 1 ? ' '+node.nodeValue : get_text(node);
        }
    }
    return ret;
}

function wordCount(fullStr) {
    if (fullStr.length == 0) {
        return 0;
    } else {
        fullStr = fullStr.replace(/\r+/g, " ");
        fullStr = fullStr.replace(/\n+/g, " ");
        fullStr = fullStr.replace(/[^A-Za-z0-9 ]+/gi, "");
        fullStr = fullStr.replace(/^\s+/, "");
        fullStr = fullStr.replace(/\s+$/, "");
        fullStr = fullStr.replace(/\s+/gi, " ");
        var splitString = fullStr.split(" ");
        return splitString.length;
    }
}

EDIT

kennebec's word counter is really good. But the one I've found includes a number as a word which is what I needed. Still, that's easy to add to kennebec's. But kennebec's text retrieval function will have the same problem.

查看更多
爱情/是我丢掉的垃圾
5楼-- · 2020-02-17 10:21

Or just use Countable.js to do the hard job ;)

查看更多
劫难
6楼-- · 2020-02-17 10:26

If you know that the DIV is only going to have text in it, you can KISS:

var count = document.getElementById('content').innerHTML.split(' ').length;

If the div can have HTML tags in it, you're going to have to traverse its children looking for text nodes:

function get_text(el) {
    ret = "";
    var length = el.childNodes.length;
    for(var i = 0; i < length; i++) {
        var node = el.childNodes[i];
        if(node.nodeType != 8) {
            ret += node.nodeType != 1 ? node.nodeValue : get_text(node);
        }
    }
    return ret;
}
var words = get_text(document.getElementById('content'));
var count = words.split(' ').length;

This is the same logic that the jQuery library uses to achieve the effect of its text() function. jQuery is a pretty awesome library that in this case is not necessary. However, if you find yourself doing a lot of DOM manipulation or AJAX then you might want to check it out.

EDIT:

As noted by Gumbo in the comments, the way we are splitting the strings above would count two consecutive spaces as a word. If you expect that sort of thing (and even if you don't) it's probably best to avoid it by splitting on a regular expression instead of on a simple space character. Keeping that in mind, instead of doing the above split, you should do something like this:

var count = words.split(/\s+/).length;

The only difference being on what we're passing to the split function.

查看更多
The star\"
7楼-- · 2020-02-17 10:33
document.deepText= function(hoo){
    var A= [];
    if(hoo){
        hoo= hoo.firstChild;
        while(hoo!= null){
            if(hoo.nodeType== 3){
                A[A.length]= hoo.data;
            }
            else A= A.concat(arguments.callee(hoo));
            hoo= hoo.nextSibling;
        }
    }
    return A;
}

I'd be fairly strict about what a word is-

function countwords(hoo){
    var text= document.deepText(hoo).join(' ');
    return text.match(/[A-Za-z\'\-]+/g).length;
}
alert(countwords(document.body))
查看更多
登录 后发表回答