Nodejs: Set highWaterMark of socket object

2019-01-26 21:49发布

问题:

is it possible to set the highWaterMark of a socket object after it was created:

var http = require('http');

var server = http.createServer();

server.on('upgrade', function(req, socket, head) {
    socket.on('data', function(chunk) {
        var frame = new WebSocketFrame(chunk);

        // skip invalid frames
        if (!frame.isValid()) return;

        // if the length in the head is unequal to the chunk 
        // node has maybe split it
        if (chunk.length != WebSocketFrame.getLength()) {
            socket.once('data', listenOnMissingChunks);
        });
    });
});

function listenOnMissingChunks(chunk, frame) {
    frame.addChunkToPayload(chunk);

    if (WebSocketFrame.getLength()) {
        // if still corrupted listen once more
    } else {
        // else proceed
    }
}

The above code example does not work. But how do I do it instead?

Further explaination: When I receive big WebSocket frames they get split into multiple data events. This makes it hard to parse the frames because I do not know if this is a splitted or corrupted frame.

回答1:

I think you misunderstand the nature of a TCP socket. Despite the fact that TCP sends its data over IP packets, TCP is not a packet protocol. A TCP socket is simply a stream of data. Thus, it is incorrect to view the data event as a logical message. In other words, one socket.write on one end does not equate to a single data event on the other.

There are many reasons that a single write to a socket does not map 1:1 to a single data event:

  • The sender's network stack may combine multiple small writes into a single IP packet. (The Nagle algorithm)
  • An IP packet may be fragmented (split into multiple packets) along its journey if its size exceeds any one hop's MTU.
  • The receiver's network stack may combine multiple packets into a single data event (as seen by your application).

Because of this, a single data event might contain multiple messages, a single message, or only part of a message.

In order to correctly handle messages sent over a stream, you must buffer incoming data until you have a complete message.

var net = require('net');


var max = 1024 * 1024 // 1 MB, the maximum amount of data that we will buffer (prevent a bad server from crashing us by filling up RAM)
    , allocate = 4096; // how much memory to allocate at once, 4 kB (there's no point in wasting 1 MB of RAM to buffer a few bytes)
    , buffer=new Buffer(allocate) // create a new buffer that allocates 4 kB to start
    , nread=0 // how many bytes we've buffered so far
    , nproc=0 // how many bytes in the buffer we've processed (to avoid looping over the entire buffer every time data is received)
    , client = net.connect({host:'example.com', port: 8124}); // connect to the server

client.on('data', function(chunk) {
    if (nread + chunk.length > buffer.length) { // if the buffer is too small to hold the data
        var need = Math.min(chunk.length, allocate); // allocate at least 4kB
        if (nread + need > max) throw new Error('Buffer overflow'); // uh-oh, we're all full - TODO you'll want to handle this more gracefully

        var newbuf = new Buffer(buffer.length + need); // because Buffers can't be resized, we must allocate a new one
        buffer.copy(newbuf); // and copy the old one's data to the new one
        buffer = newbuf; // the old, small buffer will be garbage collected
    }

    chunk.copy(buffer, nread); // copy the received chunk of data into the buffer
    nread += chunk.length; // add this chunk's length to the total number of bytes buffered

    pump(); // look at the buffer to see if we've received enough data to act
});

client.on('end', function() {
    // handle disconnect
});


client.on('error', function(err) {
    // handle errors
});


function find(byte) { // look for a specific byte in the buffer
    for (var i = nproc; i < nread; i++) { // look through the buffer, starting from where we left off last time
        if (buffer.readUInt8(i, true) == byte) { // we've found one
            return i;
        }
    }
}
function slice(bytes) { // discard bytes from the beginning of a buffer
    buffer = buffer.slice(bytes); // slice off the bytes
    nread -= bytes; // note that we've removed bytes
    nproc = 0; // and reset the processed bytes counter
}

function pump() {
    var pos; // position of a NULL character

    while ((pos = find(0x00)) >= 0) { // keep going while there's a NULL (0x00) somewhere in the buffer
        if (pos == 0) { // if there's more than one NULL in a row, the buffer will now start with a NULL
            slice(1); // discard it
            continue; // so that the next iteration will start with data
        }
        process(buffer.slice(0,pos)); // hand off the message
        slice(pos+1); // and slice the processed data off the buffer
    }
}

function process(msg) { // here's where we do something with a message
    if (msg.length > 0) { // ignore empty messages
        // here's where you have to decide what to do with the data you've received
        // experiment with the protocol
    }
}


回答2:

You don't need to. Incoming data will almost certainly be split across two or more reads: this is the nature of TCP and there is nothing you can do about it. Fiddling with obscure socket parameters certainly won't change it. And the data will be lit but certainly not corrupted. Just treat the socket as what it is: a byte stream.