Is TCP keepalive (with small timeouts) preventing client from hanging on recv, after the server is dead?
The scenario:
Server and client running on separate machines:
- Clients connects to a server through TCP with KEEPALIVE option
- Client sends "Hello server" and waits for a response
- Server receives "Hello server" and responds "Hello client"
- Client receives response, sleeps 10s and steps 2-4 are repeated (step 1 is now skipped - connection is preserved)
During the client sleep, the server is plugged off, now:
- Client wakes up
- Sends "Hello server" and waits for a response
- After 20 minutes recv gives up - I was expecting KEEPALIVE to break the recv function after 45 seconds:
Setting KEEPALIVE options:
void TCPclient::setkeepalive()
{
int optval;
socklen_t optlen = sizeof(optval);
/* Check the status for the keepalive option */
if(getsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &optval, &optlen) < 0) {
throw std::string("getsockopt");
}
optval = 1;
optlen = sizeof(optval);
if(setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &optval, optlen) < 0) {
close(s);
exit(EXIT_FAILURE);
}
optval = 2;
if (setsockopt(sock, SOL_TCP, TCP_KEEPCNT, &optval, optlen) < 0) {
throw std::string("setsockopt");
}
optval = 15;
if (setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, &optval, optlen) < 0) {
throw std::string("setsockopt");
}
optval = 15;
if (setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, &optval, optlen) < 0) {
throw std::string("setsockopt");
}
}
linux 3.2.0-84-generic
@MMA's response is correct.
I wrote a similar client waiting 20 seconds before writing. Once the client wakes up and sends the message, the ACK messages sent by keep alive are not sent anymore (the connection is not idle anymore).
After 15 retries (configured with tcp_retries2 in /proc/sys/net/ipv4) sending tcp segments, in which timeout increases exponentially until reaching ~2 minutes (in my case), connection is set to error and pending read or recv return with ETIMEDOUT (errno 110). It took, in my case, around ~15 min. This time depends on the RTO. See the TCPDUMP, there are two ACK's after the three-way-handshake (I don't know why the first of these 2 ack's) and then 15 messages with data and push flag set.
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on p2p1, link-type EN10MB (Ethernet), capture size 65535 bytes
01:16:45.296179 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [S], seq 515423022, win 14600, options [mss 1460,sackOK,TS val 19212623 ecr 0,nop,wscale 7], length 0
E..<.a@.@......d4.....'...........9............
.%)O........
01:16:45.477983 IP ec2-52-7-150-140.compute-1.amazonaws.com.10221 > 192.168.2.100.60895: Flags [S.], seq 3672727778, ack 515423023, win 26847, options [mss 1436,sackOK,TS val 114765522 ecr 19212623,nop,wscale 7], length 0
E..<..@.-...4......d'.....`..../..h............
.....%)O....
01:16:45.478046 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [.], ack 1, win 115, options [nop,nop,TS val 19212805 ecr 114765522], length 0
E..4.b@.@......d4.....'..../..`....s.......
.%*.....
01:17:00.512812 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [.], ack 1, win 115, options [nop,nop,TS val 19227840 ecr 114765522], length 0
E..4.c@.@......d4.....'.......`....s.......
.%d.....
01:17:00.731160 IP ec2-52-7-150-140.compute-1.amazonaws.com.10221 > 192.168.2.100.60895: Flags [.], ack 1, win 210, options [nop,nop,TS val 114769336 ecr 19212805], length 0
E..4N.@.-.r.4......d'.....`..../....M......
..=..%*.
01:17:05.478933 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, ack 1, win 115, options [nop,nop,TS val 19232806 ecr 114769336], length 14
E..B.d@.@......d4.....'..../..`....s.......
.%x&..=.Hello Word :).
01:17:06.027768 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, ack 1, win 115, options [nop,nop,TS val 19233354 ecr 114769336], length 14
E..B.e@.@......d4.....'..../..`....s.......
.%zJ..=.Hello Word :).
01:17:07.120879 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, ack 1, win 115, options [nop,nop,TS val 19234448 ecr 114769336], length 14
E..B.f@.@......d4.....'..../..`....s.......
.%~...=.Hello Word :).
01:17:09.312833 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, ack 1, win 115, options [nop,nop,TS val 19236640 ecr 114769336], length 14
E..B.g@.@......d4.....'..../..`....s.......
.%. ..=.Hello Word :).
01:17:13.697663 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, ack 1, win 115, options [nop,nop,TS val 19241024 ecr 114769336], length 14
E..B.h@.@......d4.....'..../..`....s.......
.%.@..=.Hello Word :).
01:17:22.466187 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, ack 1, win 115, options [nop,nop,TS val 19249793 ecr 114769336], length 14
E..B.i@.@......d4.....'..../..`....s.......
.%....=.Hello Word :).
01:17:40.001653 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, ack 1, win 115, options [nop,nop,TS val 19267328 ecr 114769336], length 14
E..B.j@.@......d4.....'..../..`....s.......
.%....=.Hello Word :).
01:18:15.074493 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, ack 1, win 115, options [nop,nop,TS val 19302401 ecr 114769336], length 14
E..B.k@.@......d4.....'..../..`....s.......
.&....=.Hello Word :).
01:19:25.217799 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, ack 1, win 115, options [nop,nop,TS val 19372545 ecr 114769336], length 14
E..B.l@.@......d4.....'..../..`....s.......
.'....=.Hello Word :).
01:21:25.537775 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, ack 1, win 115, options [nop,nop,TS val 19492864 ecr 114769336], length 14
E..B.m@.@......d4.....'..../..`....s.......
.)p...=.Hello Word :).
01:23:25.856854 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, 69336], length 14
E..B.n@.@......d4.....'..../..`....s.......
.+F...=.Hello Word :).
01:25:26.176894 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, 69336], length 14
E..B.o@.@......d4.....'..../..`....s.......
.-....=.Hello Word :).
01:27:26.497691 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, 69336], length 14
E..B.p@.@......d4.....'..../..`....s.......
......=.Hello Word :).
01:29:26.816905 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, 69336], length 14
E..B.q@.@......d4.....'..../..`....s.......
.0....=.Hello Word :).
01:31:27.137013 IP 192.168.2.100.60895 > ec2-52-7-150-140.compute-1.amazonaws.com.10221: Flags [P.], seq 1:15, ack 1, win 115, options [nop,nop,TS val 20094464 ecr 114769336], length 14
E..B.r@.@......d4.....'..../..`....s.......
.2....=.Hello Word :).
The client code I used:
#include <sys/types.h>
#include <sys/socket.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <netinet/in.h>
#include <net/if.h>
#include <arpa/inet.h>
#include <stdio.h>
#include <sys/socket.h>
#include <stdlib.h>
#include <netinet/tcp.h>
#define DEST_PORT 10221
#define ADDRLEN INET_ADDRSTRLEN
int main(int argc, char** argv)
{
int sock;
int bytesWritten;
struct sockaddr_in their_addr;
char buffer[] = "Hello Word :)";
char addrstr[ADDRLEN + 1];
if (argc != 2)
{
printf("ERROR - Number of args\n");
return 10;
}
strncpy(addrstr, argv[1], ADDRLEN);
bzero(&their_addr, sizeof(their_addr));
their_addr.sin_family = AF_INET;
their_addr.sin_port = htons(DEST_PORT);
if (inet_pton(AF_INET, addrstr,(void *)&their_addr.sin_addr) != 1)
{
printf("ERROR - Converting Address: %d\n", errno);
return 2;
}
if ((sock = socket(AF_INET, SOCK_STREAM, 0)) == -1)
{
printf("ERROR - Socket could not be open: %d\n", errno);
return 1;
}
//// Copied option setting
int optval;
socklen_t optlen = sizeof(optval);
/* Check the status for the keepalive option */
if(getsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &optval, &optlen) < 0) {
printf("ERROR - SOL_SOCKET: %d\n", errno);
return 19;
}
optval = 1;
optlen = sizeof(optval);
if(setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &optval, optlen) < 0) {
printf("ERROR - SOL_SOCKET-2: %d\n", errno);
return 20;
}
optval = 2;
if (setsockopt(sock, SOL_TCP, TCP_KEEPCNT, &optval, optlen) < 0) {
printf("ERROR - SOL_TCP: %d\n", errno);
return 21;
}
optval = 15;
if (setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, &optval, optlen) < 0) {
printf("ERROR - SOL_TCP-2: %d\n", errno);
return 22;
}
optval = 15;
if (setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, &optval, optlen) < 0) {
printf("ERROR - SOL_TCP-3: %d\n", errno);
return 23;
}
/////
if (connect(sock, (const struct sockaddr *)&their_addr,
(socklen_t)sizeof(their_addr)) == -1)
{
printf("ERROR - Could not connect to destination: %d\n", errno);
return 3;
}
/// Sleep 20 seconds
sleep(20);
printf("About to write\n");
if ((bytesWritten = write(sock, (const void *)buffer, sizeof(buffer))) == -1)
{
printf("ERROR - Sending message: %d\n", errno);
return 4;
}
printf("Message Sent to Address %s, Port: %d\n\n", addrstr, DEST_PORT);
int bytesRead;
if ((bytesRead = read(sock, buffer, sizeof(buffer))) == -1)
{
printf("ERROR - Sending message: %d\n", errno);
return 4;
}
close(sock);
return 0;
}
I run this test with a server hosted in AWS. The way to simulate removal of server without noticing the client was: I had a public (Elastic) IP associated to the server and immediately after three-way-handshake I disassociated the Elastic IP from server. I cannot paste server code but it is not relevant here.
Please not that in this example, keepalive is stopped because of send messages.