base64 encoding in javascript with bit shifting

2019-09-10 11:11发布

问题:

I have the following decode/encode routine. However, the encoding is not working properly (it should be printing "CMlaKA" not "ClaKA" to the console log). I think the problem is with the bit shifting, but I cant tell where.

Here is a jsfiddle to explain

https://jsfiddle.net/4yfrLv9y/16/

Here is the code (routine is run at the bottom)

var consoleLine = "<p class=\"console-line\"></p>";

console = {
    log: function (text) {
        $("#console-log").append($(consoleLine).html(text));
    }
};

var Base64 = {
        _keyStr: ".ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=",

    encode : function (input) {
        var output = [],
            chr1, chr2, chr3, enc1, enc2, enc3, enc4,
            i = 0;
        while (i < input.length) {
            chr1 = input[i++];
            chr2 = input[i++];
            chr3 = input[i++];

            enc1 = chr1 & 0x3f;
            enc2 = (chr1 >> 6) | ((chr2 & 0x3c) << 2);
            enc3 = (chr2 >> 4) | ((chr3 & 0x3) << 4);
            enc4 = chr3 >> 2;

            if (isNaN(chr2)) {
                enc3 = enc4 = 64;
            } else if (isNaN(chr3)) {
                enc4 = 64;
            }

            output.push([this._keyStr.charAt(enc1),
                         this._keyStr.charAt(enc2),
                         this._keyStr.charAt(enc3),
                         this._keyStr.charAt(enc4)].join(''));
        }

        return output.join('');
    },

    decodeAsArray: function (b) {
        var d = this.decode(b),
            a = [],
            c;
                //alert("decoded base64:" + d);
        for (c = 0; c < d.length; c++) {
            a[c] = d.charCodeAt(c)
        }
                //alert("returning a");
        return a
    },

    decode: function( input ) {
        var output = "";
        var chr1, chr2, chr3 = "";
        var enc1, enc2, enc3, enc4 = "";
        var i = 0;

        do {
            enc1 = this._keyStr.indexOf(input.charAt(i++)) ;
            enc2 = this._keyStr.indexOf(input.charAt(i++)) ;
            enc3 = this._keyStr.indexOf(input.charAt(i++)) ;
            enc4 = this._keyStr.indexOf(input.charAt(i++)) ;

            chr1 = (enc1 | ((enc2 & 3) << 6));
            chr2 = (enc2 >> 2) | ((enc3 & 0x0F) << 4);
            chr3 = (enc3 >> 4) | (enc4 << 2);

            output = output + String.fromCharCode(chr1);
            if (enc3 != 64) {
                output = output + String.fromCharCode(chr2);
                        }
            if (enc4 != 64) {
                output = output + String.fromCharCode(chr3);
            }
            chr1 = chr2 = chr3 = "";
            enc1 = enc2 = enc3 = enc4 = "";
        } while (i < input.length);

        return (output);
    }

};

basedecode();

function basedecode(){
//Converts 'CMlaKa to CcnK by base64'
    var decoded = "CMlaKA"
    //67 99 110 75 0 0  - This is the Byte Array, or ArrayBuffer of CcnK
    decoded = Base64.decode(decoded)
    console.log(decoded);
}

baseencode();

function baseencode(){
    var encoded = [67,99,110,75];// byte array of CcnK
    console.log(Base64.encode(encoded) + ' ---- Should be CMlaKA not ClaKA== - why is it different?'); 
}

回答1:

I couldn't found your implementation algorithm, but found this one in wikipedia and corrected yours

var consoleLine = "<p class=\"console-line\"></p>";

console = {
    log: function (text) {
        $("#console-log").append($(consoleLine).html(text));
    }
};

var Base64 = {
        _keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",

    encode : function (input) {
        var output = [],
            chr1, chr2, chr3, enc1, enc2, enc3, enc4,
            i = 0;
        while (i < input.length) {
            chr1 = input.charCodeAt(i);
            chr2 = input.charCodeAt(i+1);
            chr3 = input.charCodeAt(i+2);
            i+=3;

                        /* enc1 = chr1 && 0x3f;
                        enc2 = (chr1 >> 6) | ((chr2 & 0x3c) << 4);
                        enc3 = (chr2 >> 4) | ((chr3 & 0x3) << 2);
                        enc4 = chr3 >> 2; */

            var _24c = (chr1 << 16) + (chr2 << 8) + chr3;
            //n = [(n >>> 18) & 63, (n >>> 12) & 63, (n >>> 6) & 63, n & 63];
            enc1 = _24c >>> 18 & 63
            enc2 = _24c >>> 12 & 63
            enc3 = _24c >>> 6 & 63
            enc4 = _24c & 63


           /**  if (isNaN(chr2)) {
                enc3 = enc4 = 64;
            } else if (isNaN(chr3)) {
                enc4 = 64;
            }
             */
            output.push([this._keyStr.charAt(enc1),
                         this._keyStr.charAt(enc2),
                         this._keyStr.charAt(enc3),
                         this._keyStr.charAt(enc4)].join(''));
        }

        return output.join('');
    },

    decodeAsArray: function (b) {
        var d = this.decode(b),
            a = [],
            c;
                //alert("decoded base64:" + d);
        for (c = 0; c < d.length; c++) {
            a[c] = d.charCodeAt(c)
        }
                //alert("returning a");
        return a
    },

    decode: function( input ) {
        var output = "";
        var chr1, chr2, chr3 = "";
        var enc1, enc2, enc3, enc4 = "";
        var i = 0;

        do {
            enc1 = this._keyStr.indexOf(input.charAt(i++)) ;
            enc2 = this._keyStr.indexOf(input.charAt(i++)) ;
            enc3 = this._keyStr.indexOf(input.charAt(i++)) ;
            enc4 = this._keyStr.indexOf(input.charAt(i++)) ;

            chr1 = (enc1 | ((enc2 & 3) << 6));
            chr2 = (enc2 >> 2) | ((enc3 & 0x0F) << 4);
            chr3 = (enc3 >> 4) | (enc4 << 2);

            output = output + String.fromCharCode(chr1);
            if (enc3 != 64) {
                output = output + String.fromCharCode(chr2);
                        }
            if (enc4 != 64) {
                output = output + String.fromCharCode(chr3);
            }
            chr1 = chr2 = chr3 = "";
            enc1 = enc2 = enc3 = enc4 = "";
        } while (i < input.length);

        return (output);
    }

};

// basedecode();

function basedecode(){
//Converts 'CMlaKa to CcnK by base64'
    var decoded = "CMlaKA"
    //67 99 110 75 0 0  - This is the Byte Array, or ArrayBuffer of CcnK
    decoded = Base64.decode(decoded)
    console.log(decoded);
}

// baseencode();

function baseencode(){
    var encoded = [67,99,110,75];// byte array of CcnK
    console.log(Base64.encode(encoded) + ' ---- Should be CMlaKA not +la+A== - where do the + and = signs come from?'); 
}

function hashAndAssert(string_to_hash, result) {
    var hash = Base64.encode(string_to_hash);
  return '' + (hash == result) + ', expected: ' + result + '; output: ' + hash;
}

function unitTest() {
    console.log('#1 Passed ' + hashAndAssert('', ''))
  console.log('#2 Passed ' + hashAndAssert('foo', 'Zm9v'))
  console.log('#1 Passed ' + hashAndAssert('foobar', 'Zm9vYmFy'))
}

unitTest();


回答2:

There are a couple of mistakes in your code and they involve not only the encode method, but the decode one as well.

First of all, you are using a bad key string. Accoring to Wikipedia Wikipedia - Base64 'A' equals '0', not '.' as in your example.

This will prevent you from checking your code against public websites for validity.

This is the "standard" key string:

_keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",

The '=' in the end is for padding and should not be used directly during conversion

Then, you got a problem with your byte shifting code. You calculate the base64 values in the wrong direction. You should look at the Wikipedia link to see which bytes should be considered as each base64 value.

Here is a fixed snippet for the decoder:

chr1 = enc1 << 2 | ((enc2 & 0xc0) >> 6);
chr2 = ((enc2 & 0x0f) << 4) | ((enc3 & 0x3c) >> 2);
chr3 = ((enc3 & 0x03) << 6) | enc4;

Here is a fixed snippet for the encoder:

enc1 =  (chr1 & 0xfc) >> 2;
enc2 = ((chr1 & 0x03) << 4) | ((chr2 & 0xf0) >> 4);
enc3 = ((chr2 & 0x0f) << 2) | ((chr3 & 0xc0) >> 6);
enc4 =   chr3 & 0x3f;

Moreover, you must trim the output values (in the encoder) because they are in unlimited containers, so:

enc1 = enc1 & 0x3f;
enc2 = enc2 & 0x3f;
enc3 = enc3 & 0x3f;

Assuming you made all these changes, if the input to the decoder is "CMlaKA", the output is [12,217,155,44,16] and then the encoder will return the correct answer.