Linux TUN/TAP: Unable to read data back from TAP d

2019-04-11 11:34发布

The question is about the proper configuration of a Linux host that would like to make use of the Tun/Tap module.

My Goal:

Making use of an existing routing software (APP1 and APP2 in the following) but intercepting and modifiying all messages sent and received by it (done by the Mediator).

My Scenario:

              Ubuntu 10.04 Machine
+---------------------------------------------+
|                                             |
|APP1 --- tap1 --- Mediator --- tap2 --- APP2 |
|                                             |
+---------------------------------------------+
  • tap1 and tap2: tap devices setup with IFF_TAP flag and IPs 10.0.0.1/24 and 10.0.0.2/24 respectively. The code to create the devices is the following:

    #include <stdlib.h>
    #include <stdio.h>
    #include <sys/socket.h>
    #include <sys/ioctl.h>
    #include <fcntl.h>
    #include <linux/if.h>
    #include <linux/if_tun.h>
    #include <string.h>
    #include <errno.h>
    #include <sys/resource.h>
    
    void createTun(char *, char *, short);
    
    int main(void)
    {
        const short FLAGS = IFF_TAP;
        char *tunName;
        char *tunIP;
    
        // Create tap1
        tunName = "tap1\0";
        tunIP = "10.0.0.1/24\0";
        createTun(tunName, tunIP, FLAGS);
        printf("Created %s with IP %s\n", tunName, tunIP);
    
        // Create tap2
        tunName = "tap2\0";
        tunIP = "10.0.0.2/24\0";
        createTun(tunName, tunIP, FLAGS);
        printf("Created %s with IP %s\n", tunName, tunIP);
    
        return 0;
    }
    
    void createTun(char *tunName, char *tunIP, short FLAGS)
    {
        char *cmd;
        char *cloneDev = "/dev/net/tun";
        char *cmdIPLinkUpTemplate = "ip link set %s up";
        char *cmdIPAddrAddTemplate = "ip addr add %s dev %s";
        int cmdIPLinkUpRawLength = strlen(cmdIPLinkUpTemplate) - 2;
        int cmdIPAddrAddRawLength = strlen(cmdIPAddrAddTemplate) - 4;
        FILE *fp;
        int fd, err, owner, group;
        struct ifreq ifr;
    
        owner = geteuid();
        group = getegid();
    
        // open the clone device
        if((fd = open(cloneDev, O_RDWR)) < 0)
        {
            perror("OPEN CLONEDEV failed.");
            exit(EXIT_FAILURE);
        }
    
        memset(&ifr, 0, sizeof(struct ifreq));
        ifr.ifr_flags = FLAGS;
        strncpy(ifr.ifr_name, tunName, strlen(tunName));
    
        // create the device
        if(ioctl(fd, TUNSETIFF, (void *) &ifr) < 0)
        {
            perror("IOCTL SETIFF denied.");
            close(fd);
            exit(EXIT_FAILURE);
        }
    
        // set dev owner
        if(owner != -1)
        {
        if(ioctl(fd, TUNSETOWNER, owner) < 0)
        {
            perror("IOCTL SETOWNER denied.");
            close(fd);
            exit(EXIT_FAILURE);
        }
        }
    
        // set dev group
        if(group != -1)
        {
        if(ioctl(fd, TUNSETGROUP, group) < 0)
        {
            perror("IOCTL SETGROUP denied.");
            close(fd);
            exit(EXIT_FAILURE);
        }
        }
    
        // set dev persistent
        if(ioctl(fd, TUNSETPERSIST, 1) < 0)
        {
            perror("IOCTL SETPERSIST denied.");
            close(fd);
            exit(EXIT_FAILURE);
        }
    
        // Set dev up
        cmd = malloc(cmdIPLinkUpRawLength + strlen(tunName) + 1);
        sprintf(cmd, cmdIPLinkUpTemplate, ifr.ifr_name);
        fp = popen(cmd, "r");
        if(fp == NULL)
        {
            perror("POPEN failed.");
            close(fd);
            free(cmd);
            exit(EXIT_FAILURE);
        }
        pclose(fp);
        free(cmd);
    
        // Assign IP
        cmd = malloc(cmdIPAddrAddRawLength + strlen(tunIP) + strlen(tunName) + 1);
        sprintf(cmd, cmdIPAddrAddTemplate, tunIP, tunName);
        fp = popen(cmd, "r");
        if(fp == NULL)
        {
            perror("POPEN failed.");
            close(fd);
            free(cmd);
            exit(EXIT_FAILURE);
        }
    
        pclose(fp);
        free(cmd);
    
        return;
    }
    
  • Mediator: Small self-written code to simply relay data between tap1 and tap2. the basics structure is the following:

    #include <unistd.h>
    #include <stdio.h>
    #include <sys/socket.h>
    #include <netinet/ip.h>
    #include <sys/ioctl.h>
    #include <sys/resource.h>
    #include <sys/epoll.h>
    #include <errno.h>
    #include <fcntl.h>
    #include <stdlib.h>
    #include <string.h>
    #include <linux/if.h>
    #include <linux/if_tun.h>
    
    int main(int argc, char *argv[])
    {
        const int NOF_FD = 2;
        const char *TUN1 = "tap1";
        const char *TUN2 = "tap2";
        const char *CLONEDEV = "/dev/net/tun";
        int fd_tun1, fd_tun2, fd_epoll;
        struct ifreq ifr_tun1, ifr_tun2;
        struct epoll_event ev;
        const int MAX_EVENTS = 1;
        int ready, s, t;
        const int MAX_BUF = 2000;
        char buf[MAX_BUF];
        struct sockaddr_in to;
        const short FLAGS = IFF_TAP;
    
        // Open tap1
        if((fd_tun1 = open(CLONEDEV, O_RDWR)) < 0)
        {
            perror("OPEN CLONEDEV for tun1 failed");
            exit(EXIT_FAILURE);
        }
    
        memset(&ifr_tun1, 0, sizeof(struct ifreq));
        ifr_tun1.ifr_flags = FLAGS;
        strcpy(ifr_tun1.ifr_name, TUN1);
        if(ioctl(fd_tun1, TUNSETIFF, (void *) &ifr_tun1) < 0)
        {
            perror("IOCTL SETIFF for tap1 failed");
            close(fd_tun1);
            exit(EXIT_FAILURE);
        }
    
        // Open tap2
        if((fd_tun2 = open(CLONEDEV, O_RDWR)) < 0)
        {
            perror("OPEN CLONEDEV for tap2 failed");
            exit(EXIT_FAILURE);
        }
    
        memset(&ifr_tun2, 0, sizeof(struct ifreq));
        ifr_tun2.ifr_flags = FLAGS;
        strcpy(ifr_tun2.ifr_name, TUN2);
        if(ioctl(fd_tun2, TUNSETIFF, (void *) &ifr_tun2) < 0)
        {
            perror("IOCTL SETIFF for tun2 failed");
            close(fd_tun1);
            close(fd_tun2);
            exit(EXIT_FAILURE);
        }
    
        // Prepare EPOLL
        if((fd_epoll = epoll_create(NOF_FD)) < 0)
        {
            perror("EPOLL CREATE failed");
            close(fd_tun1);
            close(fd_tun2);
            exit(EXIT_FAILURE);
        }
    
        memset(&ev, 0, sizeof(ev));
        ev.events = EPOLLIN;
        ev.data.fd = fd_tun1;
        if(epoll_ctl(fd_epoll, EPOLL_CTL_ADD, fd_tun1, &ev) < 0)
        {
            perror("EPOLL CTL ADD fd_tun1 failed");
            close(fd_tun1);
            close(fd_tun2);
            close(fd_epoll);
            exit(EXIT_FAILURE);
        }
    
        memset(&ev, 0, sizeof(ev));
        ev.events = EPOLLIN;
        ev.data.fd = fd_tun2;
        if(epoll_ctl(fd_epoll, EPOLL_CTL_ADD, fd_tun2, &ev) < 0)
        {
            perror("EPOLL CTL ADD fd_tun2 failed");
            close(fd_tun1);
            close(fd_tun2);
            close(fd_epoll);
            exit(EXIT_FAILURE);
        }
    
        // Do relay
        while(1)
        {
            if((ready = epoll_wait(fd_epoll, &ev, MAX_EVENTS, -1)) < 0)
            {
                if(errno == EINTR)
                    continue;
                else
                {
                    perror("EPOLL WAIT failed");
                    close(fd_tun1);
                    close(fd_tun2);
                    close(fd_epoll);
                    exit(EXIT_FAILURE);
                }
            }
    
            //printf("EPOLL WAIT SIGNALED\n");
    
            if(ev.events & EPOLLIN)
            {
                if((s = read(ev.data.fd, buf, MAX_BUF)) < 0)
                {
                    perror("READ failed");
                    close(fd_tun1);
                    close(fd_tun2);
                    close(fd_epoll);
                    exit(EXIT_FAILURE);
                }
    
                printf("Read from %s. Bytes: %d\nData:\n", (ev.data.fd == fd_tun1 ? "tun1" : "tun2"), s);
                int k;
                for(k = 0; k < s; k++)
                {
                    printf("%c", buf[k]);
                }
                printf("\n");
    
                t = (ev.data.fd == fd_tun1) ? fd_tun2 : fd_tun1;
    
                if((s = write(t, buf, s)) < 0)
                {
                    perror("WRITE failed");
                    close(fd_tun1);
                    close(fd_tun2);
                    close(fd_epoll);
                    exit(EXIT_FAILURE);
                }
    
                printf("Written to %s. Bytes: %d\n", (t == fd_tun1 ? "tun1" : "tun2"), s);
    
                if(epoll_ctl(fd_epoll, EPOLL_CTL_DEL, ev.data.fd, NULL) < 0)
                {
                    perror("EPOLL CTL DEL failed");
                    close(fd_tun1);
                    close(fd_tun2);
                    close(fd_epoll);
                    exit(EXIT_FAILURE);
                }
    
                if(epoll_ctl(fd_epoll, EPOLL_CTL_ADD, ev.data.fd, &ev) < 0)
                {
                    perror("EPOLL CTL ADD failed");
                    close(fd_tun1);
                    close(fd_tun2);
                    close(fd_epoll);
                    exit(EXIT_FAILURE);
                }
            }
    
            printf("\n\n");
        }
    }
    
  • APP1 and APP2: OSPF routing daemons communicating via tap1 and tap2 respectively. An strace of the daemons shows that basically the following system calls are involved:

    socket(PF_INET, SOCK_RAW, 0X59 /*IPPROTO_??? */) = 8 // Opening a socket for OSPF and tap1
    fcntl64(8, F_SETFL, 0_RDONLY | 0_NONBLOCK) = 0
    setsockopt(8, SOL_IP, IP_TOS, [192], 4) = 0
    setsockopt(8, SOL_SOCKET, SO_PRIORITY, [7], 4) = 0
    setsockopt(8, SOL_IP, IP_PKTINFO, [1], 4) = 0
    setsockopt(8, SOL_IP, IP_MTU_DISCOVER, [0], 4) = 0
    setsockopt(8, SOL_IP, IP_MULTICAST_LOOP, [0], 4) = 0
    setsockopt(8, SOL_IP, IP_MULTICAST_TTL, [1], 4) = 0
    setsockopt(8, SOL_IP, IP_MUTLICAST_IF, "\0\0\0\0\n\0\0\1\223\0\0\0", 12) = 0
    setsockopt(8, SOL_SOCKET, SO_BINDTODEVICE, "tap1\0\0\0\0\0\0\0\0\0\0\0\0\0\315\375\307\250\352\t\t8\207\t\10\0\0\0\0", 32) = 0
    setsockopt(8, SOL_IP, IP_ADD_MEMBERSHIP, "340\0\0\5\n\0\0\1\223\0\0\0", 12) = 0
    
    // Then it gets in a cycle like:
    select(9, [3, 7, 8], [], NULL, {1, 0}) = 0 (Timeout)
    clock_gettime(CLOCK_MONOTONIC, {120893, 360452769}) = 0
    time(NULL)
    clock_gettime(CLOCK_MONOTONIC, {120893, 360504525}) = 0
    select(9, [3, 7, 8], [], NULL, {1, 0}) = 0 (Timeout)
    clock_gettime(CLOCK_MONOTONIC, {120894, 363022746}) = 0
    time(NULL)
    ...
    

My Usage:

  • Attach wireshark to tap1. (no traffic seen yet).
  • Start APP1. (wireshark sees IGMP and OSPF messages with source 10.0.0.1 (tap1))
  • Start APP2. (wireshark still sees only IGMP and OSPF messages with source 10.0.0.1 (tap1) since Mediator not running yet)
  • Start Mediator. (wireshark now sees IGMP and OSPF messages with sources of both tap1 and tap2).

My Issue:

Even though wireshark - attached to tap1 - sees messages from both tap1 and tap2, APP2 does not receive the messages sent by APP1 and neither does APP2 receive the messages from APP1. In the strace extract shown above the select() call never returns the file descriptor 8 which actually would be the socket connected to tap1.

My Questions:

Why does APP1 not receive the messages send by APP2 even though those messages are sent by APP2, relayed by the Mediator and seen by wireshark that is attached to tap1?

Do I have to add any type/kind of additional routes on my Linux host?

Did I make a mistake in setting up the tun/tap devices?

Does my Mediator code not work properly?

1条回答
冷血范
2楼-- · 2019-04-11 12:13

I've not tried your code (it's a bit strange that you were able to open TAP device twice from userspace not using a multiqueue flag, but let's assume that is correct), but you have a conceptual error in the way you handle TAP devices.

What TUN/TAP is essentially just a pipe, one side of this pipe is in the kernel (the tapX interface) and the other in some userspace application. Whatever this application writes to the pipe gets to the kernel interface as incoming traffic (and you see it with wireshark). Whatever kernel sends to that pipe (outgoing to tapX) ends up coming into application (the data you can read in application).

What your code currently doing is opening another userspace part of the same pipe, and that's not what you want. You want to get traffic on the other side of the pipe. Technically, what you're currently doing could be done by a simple bridge interface with both taps added as ports into it. Of course, if you want to not just bridge, but to modify traffic in some way things get a bit more complicated.

One way to solve this problem is to add another pair of TAP interfaces. You bridge (as in kernel bridge) your tap1 with tap3 and tap2 with tap4, now you open tap3 and tap4 in your 'mediator' and proxy frames between them. This is horribly inefficient, but may be a solution for your problem.

查看更多
登录 后发表回答