is it possible to set the highWaterMark of a socket object after it was created:
var http = require('http');
var server = http.createServer();
server.on('upgrade', function(req, socket, head) {
socket.on('data', function(chunk) {
var frame = new WebSocketFrame(chunk);
// skip invalid frames
if (!frame.isValid()) return;
// if the length in the head is unequal to the chunk
// node has maybe split it
if (chunk.length != WebSocketFrame.getLength()) {
socket.once('data', listenOnMissingChunks);
});
});
});
function listenOnMissingChunks(chunk, frame) {
frame.addChunkToPayload(chunk);
if (WebSocketFrame.getLength()) {
// if still corrupted listen once more
} else {
// else proceed
}
}
The above code example does not work. But how do I do it instead?
Further explaination:
When I receive big WebSocket frames they get split into multiple data events. This makes it hard to parse the frames because I do not know if this is a splitted or corrupted frame.
I think you misunderstand the nature of a TCP socket. Despite the fact that TCP sends its data over IP packets, TCP is not a packet protocol. A TCP socket is simply a stream of data. Thus, it is incorrect to view the data
event as a logical message. In other words, one socket.write
on one end does not equate to a single data
event on the other.
There are many reasons that a single write to a socket does not map 1:1 to a single data
event:
- The sender's network stack may combine multiple small writes into a single IP packet. (The Nagle algorithm)
- An IP packet may be fragmented (split into multiple packets) along its journey if its size exceeds any one hop's MTU.
- The receiver's network stack may combine multiple packets into a single
data
event (as seen by your application).
Because of this, a single data
event might contain multiple messages, a single message, or only part of a message.
In order to correctly handle messages sent over a stream, you must buffer incoming data until you have a complete message.
var net = require('net');
var max = 1024 * 1024 // 1 MB, the maximum amount of data that we will buffer (prevent a bad server from crashing us by filling up RAM)
, allocate = 4096; // how much memory to allocate at once, 4 kB (there's no point in wasting 1 MB of RAM to buffer a few bytes)
, buffer=new Buffer(allocate) // create a new buffer that allocates 4 kB to start
, nread=0 // how many bytes we've buffered so far
, nproc=0 // how many bytes in the buffer we've processed (to avoid looping over the entire buffer every time data is received)
, client = net.connect({host:'example.com', port: 8124}); // connect to the server
client.on('data', function(chunk) {
if (nread + chunk.length > buffer.length) { // if the buffer is too small to hold the data
var need = Math.min(chunk.length, allocate); // allocate at least 4kB
if (nread + need > max) throw new Error('Buffer overflow'); // uh-oh, we're all full - TODO you'll want to handle this more gracefully
var newbuf = new Buffer(buffer.length + need); // because Buffers can't be resized, we must allocate a new one
buffer.copy(newbuf); // and copy the old one's data to the new one
buffer = newbuf; // the old, small buffer will be garbage collected
}
chunk.copy(buffer, nread); // copy the received chunk of data into the buffer
nread += chunk.length; // add this chunk's length to the total number of bytes buffered
pump(); // look at the buffer to see if we've received enough data to act
});
client.on('end', function() {
// handle disconnect
});
client.on('error', function(err) {
// handle errors
});
function find(byte) { // look for a specific byte in the buffer
for (var i = nproc; i < nread; i++) { // look through the buffer, starting from where we left off last time
if (buffer.readUInt8(i, true) == byte) { // we've found one
return i;
}
}
}
function slice(bytes) { // discard bytes from the beginning of a buffer
buffer = buffer.slice(bytes); // slice off the bytes
nread -= bytes; // note that we've removed bytes
nproc = 0; // and reset the processed bytes counter
}
function pump() {
var pos; // position of a NULL character
while ((pos = find(0x00)) >= 0) { // keep going while there's a NULL (0x00) somewhere in the buffer
if (pos == 0) { // if there's more than one NULL in a row, the buffer will now start with a NULL
slice(1); // discard it
continue; // so that the next iteration will start with data
}
process(buffer.slice(0,pos)); // hand off the message
slice(pos+1); // and slice the processed data off the buffer
}
}
function process(msg) { // here's where we do something with a message
if (msg.length > 0) { // ignore empty messages
// here's where you have to decide what to do with the data you've received
// experiment with the protocol
}
}
You don't need to. Incoming data will almost certainly be split across two or more reads: this is the nature of TCP and there is nothing you can do about it. Fiddling with obscure socket parameters certainly won't change it. And the data will be lit but certainly not corrupted. Just treat the socket as what it is: a byte stream.