Javascript FileReader reads file incorrectly

2019-07-17 01:03发布

问题:

I have a short JavaScript function which will take an uploaded file and display the hex equivalent of it. Comparing the original file and the output in a hex-editor shows that they are partially different but not completely.

String.prototype.hexEncode = function(){
  var hex, i;

  var result = "";
  for (i = 0; i < this.length; i++) {
    hex = this.charCodeAt(i).toString(16);
    result += ("" + hex).slice(-4);
  }

  return result
}

function upload() {
  var file = document.getElementById("fileToUpload").files[0];
  var reader = new FileReader();
  reader.readAsText(file, "windows-1252");
  reader.onload = function (evt) {
    var program = evt.target.result.hexEncode();
    program = program;
    console.log(program);
  }
}

Here are the original file and the output next to each other:

2A 2A 54 49 38 33 46 2A 1A 0A 0A 43 72 65 61 74
2A 2A 54 49 38 33 46 2A 1A AA 43 72 65 61 74 65

What is causing the difference in outputs? Any help would be greatly appreciated.

回答1:

To generate an Hex dump in js, you won't want to use the readAsText method, which will convert your data in either UCS-2 or UTF-16, instead, read directly the binary data you'll get from the readAsArrayBuffer method, and work from there:

function hexDump(file) {
  return new Promise((res, rej) => {
    if (!(file instanceof Blob)) rej('wrong input');
    const reader = new FileReader();
    reader.onload = e => {
      res(hex(reader.result));
    };
    reader.onerror = e => rej('error while reading');
    reader.readAsArrayBuffer(file);
  });
  // gotten from https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/digest#Example
  function hex(buffer) {
    const hexCodes = [];
    const view = new DataView(buffer);
    for (let i = 0; i < view.byteLength; i += 4) {
      // Using getUint32 reduces the number of iterations needed (we process 4 bytes each time)
      let value = view.getUint32(i)
      // toString(16) will give the hex representation of the number without padding
      let stringValue = value.toString(16)
      // We use concatenation and slice for padding
      let padding = '00000000'
      let paddedValue = (padding + stringValue).slice(-padding.length).toUpperCase();
      hexCodes.push( // simple prettyfying
        paddedValue.slice(0,2),
        paddedValue.slice(2,4),
        paddedValue.slice(4,6),
        paddedValue.slice(6,8)
        );
    }
    return hexCodes.join(' ');
  }
}

// How to use it
inp.onchange = e => hexDump(inp.files[0].slice(0, 100)) // for demo I slice the file
    .then(hex => console.log(hex))
    .catch(e => console.error(e));
<input type="file" id="inp">