Decode UTF-8 with Javascript

2019-01-05 00:45发布

I have Javascript in an XHTML web page that is passing UTF-8 encoded strings. It needs to continue to pass the UTF-8 version, as well as decode it. How is it possible to decode a UTF-8 string for display?

<script type="text/javascript">
// <![CDATA[
function updateUser(usernameSent){
    var usernameReceived = usernameSent; // Current value: Größe
    var usernameDecoded = usernameReceived;  // Decode to: Größe
    var html2id = '';
    html2id += 'Encoded: ' + usernameReceived + '<br />Decoded: ' + usernameDecoded;
    document.getElementById('userId').innerHTML = html2id;
}
// ]]>
</script>

9条回答
戒情不戒烟
2楼-- · 2019-01-05 00:48

I searched for a simple solution and this works well for me:

//input data
view = new Uint8Array(data);

//output string
serialString = ua2text(view);

//convert UTF8 to string
function ua2text(ua) {
    s = "";
    for (var i = 0; i < ua.length; i++) {
        s += String.fromCharCode(ua[i]);
    }
    return s;               
}

Only issue I have is sometimes I get one character at a time. This might be by design with my source of the arraybuffer. I'm using https://github.com/xseignard/cordovarduino to read serial data on an android device.

查看更多
Animai°情兽
3楼-- · 2019-01-05 00:50

Here is a solution handling all Unicode code points include upper (4 byte) values and supported by all modern browsers (IE and others > 5.5). It uses decodeURIComponent(), but NOT the deprecated escape/unescape functions:

function utf8_to_str(a) {
    for(var i=0, s=''; i<a.length; i++) {
        var h = a[i].toString(16)
        if(h.length < 2) h = '0' + h
        s += '%' + h
    }
    return decodeURIComponent(s)
}

Tested and available on GitHub

To create UTF-8 from a string:

function utf8_from_str(s) {
    for(var i=0, enc = encodeURIComponent(s), a = []; i < enc.length;) {
        if(enc[i] === '%') {
            a.push(parseInt(enc.substr(i+1, 2), 16))
            i += 3
        } else {
            a.push(enc.charCodeAt(i++))
        }
    }
    return a
}

Tested and available on GitHub

查看更多
神经病院院长
4楼-- · 2019-01-05 00:53

Perhaps using the textDecoder will be sufficient.

Not supported in all browsers though. But it might be sufficient if you use crosswalk or any other use case where you know what browser is used.

var decoder = new TextDecoder('utf-8'),
    decodedMessage;

decodedMessage = decoder.decode(message.data);
查看更多
冷血范
5楼-- · 2019-01-05 00:55

Update @Albert's answer adding condition for emoji.

function Utf8ArrayToStr(array) {
    var out, i, len, c;
    var char2, char3, char4;

    out = "";
    len = array.length;
    i = 0;
    while(i < len) {
    c = array[i++];
    switch(c >> 4)
    { 
      case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
        // 0xxxxxxx
        out += String.fromCharCode(c);
        break;
      case 12: case 13:
        // 110x xxxx   10xx xxxx
        char2 = array[i++];
        out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F));
        break;
      case 14:
        // 1110 xxxx  10xx xxxx  10xx xxxx
        char2 = array[i++];
        char3 = array[i++];
        out += String.fromCharCode(((c & 0x0F) << 12) |
                       ((char2 & 0x3F) << 6) |
                       ((char3 & 0x3F) << 0));
        break;
     case 15:
        // 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
        char2 = array[i++];
        char3 = array[i++];
        char4 = array[i++];
        out += String.fromCodePoint(((c & 0x07) << 18) | ((char2 & 0x3F) << 12) | ((char3 & 0x3F) << 6) | (char4 & 0x3F));

        break;
    }

    return out;
}
查看更多
虎瘦雄心在
6楼-- · 2019-01-05 00:58

I reckon the easiest way would be to use a built-in js functions decodeURI() / encodeURI().

function (usernameSent) {
  var usernameEncoded = usernameSent; // Current value: utf8
  var usernameDecoded = decodeURI(usernameReceived);  // Decoded
  // do stuff
}
查看更多
Anthone
7楼-- · 2019-01-05 00:59

// String to Utf8 ByteBuffer

function strToUTF8(str){
  return Uint8Array.from(encodeURIComponent(str).replace(/%(..)/g,(m,v)=>{return String.fromCodePoint(parseInt(v,16))}), c=>c.codePointAt(0))
}

// Utf8 ByteArray to string

function UTF8toStr(ba){
  return decodeURIComponent(ba.reduce((p,c)=>{return p+'%'+c.toString(16),''}))
}
查看更多
登录 后发表回答