1 Reply Latest reply on May 18, 2017 7:40 PM by dariusd

    macOS 10.12 guest socket timeout with TCP window reduced to 0

    pkcxbr Lurker

      I've been trying to diagnose some software problems for a while with VMWare Fusion 8.5 on various network adapters with a macOS 10.12 guest.

       

      Basically, when a tcp connection to a remote host is open and the TCP window hits 0, it is never updated to include a larger window. This directly results in a socket timeout or hang when network speed is high enough to exhaust the tcp receive buffer.

       

      Here is a fully reproducible set of programs to demonstrate the issue. Please pardon the code, I definitely wasn't writing this to be clean--just to demonstrate an issue. They can be compiled with `cc client.c -o client` and `cc server.c -o server`. The programs were directly copied from Linux Howtos: C/C++ -> Sockets Tutorial with modification to keep the connection open forever and to add a timeout.

       

      client.c:

       

      #include <stdio.h>

      #include <stdlib.h>

      #include <unistd.h>

      #include <string.h>

      #include <sys/types.h>

      #include <sys/socket.h>

      #include <netinet/in.h>

      #include <netdb.h>

       

       

      void error(const char *msg)

      {

          perror(msg);

          exit(0);

      }

       

       

      int main(int argc, char *argv[])

      {

          fd_set set;

          int sockfd, portno, n, total, rv;

          struct sockaddr_in serv_addr;

          struct hostent *server;

          struct timeval timeout;

       

       

          char buffer[256];

          if (argc < 3) {

             fprintf(stderr,"usage %s hostname port\n", argv[0]);

             exit(0);

          }

          portno = atoi(argv[2]);

          sockfd = socket(AF_INET, SOCK_STREAM, 0);

          if (sockfd < 0)

              error("ERROR opening socket");

          server = gethostbyname(argv[1]);

          if (server == NULL) {

              fprintf(stderr,"ERROR, no such host\n");

              exit(0);

          }

          bzero((char *) &serv_addr, sizeof(serv_addr));

          serv_addr.sin_family = AF_INET;

          bcopy((char *)server->h_addr,

               (char *)&serv_addr.sin_addr.s_addr,

               server->h_length);

          serv_addr.sin_port = htons(portno);

          if (connect(sockfd,(struct sockaddr *) &serv_addr,sizeof(serv_addr)) < 0)

              error("ERROR connecting");

          bzero(buffer, 256);

       

       

          FD_ZERO(&set);

          FD_SET(sockfd, &set);

       

       

          sleep(1);

       

       

          timeout.tv_sec = 1;

          timeout.tv_usec = 0;

          total = 0;

          for (;;) {

              rv = select(sockfd + 1, &set, NULL, NULL, &timeout);

              if (rv == -1) {

                  perror("select\n");

              } else if(rv == 0) {

                  printf("timeout\n");

                  break;

              } else {

                  n = read(sockfd, buffer, 256);

                  if (n < 0)

                  error("ERROR reading from socket");

                  total += n;

                  printf("read %d / %d\n", n, total);

              }

          }

          close(sockfd);

          return 0;

      }

       

      server.c:

       

      /* A simple server in the internet domain using TCP

         The port number is passed as an argument */

      #include <stdio.h>

      #include <stdlib.h>

      #include <string.h>

      #include <unistd.h>

      #include <sys/types.h>

      #include <sys/socket.h>

      #include <netinet/in.h>

       

       

      void error(const char *msg)

      {

          perror(msg);

          exit(1);

      }

       

       

      int main(int argc, char *argv[])

      {

          int sockfd, newsockfd, portno;

          socklen_t clilen;

          char buffer[1024];

          struct sockaddr_in serv_addr, cli_addr;

          int n, total;

          if (argc < 2) {

              fprintf(stderr,"ERROR, no port provided\n");

              exit(1);

          }

          sockfd = socket(AF_INET, SOCK_STREAM, 0);

          if (sockfd < 0)

             error("ERROR opening socket");

          bzero((char *) &serv_addr, sizeof(serv_addr));

          portno = atoi(argv[1]);

          serv_addr.sin_family = AF_INET;

          serv_addr.sin_addr.s_addr = INADDR_ANY;

          serv_addr.sin_port = htons(portno);

          if (bind(sockfd, (struct sockaddr *) &serv_addr,

                  sizeof(serv_addr)) < 0)

              error("ERROR on binding");

          listen(sockfd,5);

          clilen = sizeof(cli_addr);

          newsockfd = accept(sockfd,

                    (struct sockaddr *) &cli_addr,

                    &clilen);

          if (newsockfd < 0)

              error("ERROR on accept");

          memset(buffer, '0xAB', sizeof(buffer));

          total = 0;

          for (;;) {

              n = write(newsockfd, buffer, sizeof(buffer));

              if (n < 0)

                  error("ERROR writing to socket");

              else

                  total += n;

                  printf("wrote %d / %d\n", n, total);

          }

          close(newsockfd);

          close(sockfd);

          return 0;

      }

       

      Usage

       

      Run the server on your host with `./server $port_number` and the client in your vm with `./client $server_ip_address $port_number`. You'll notice a socket timeout pretty quickly.

       

      Diagnosis

       

      If you happen to run Wireshark at the same time, you'll see the tcp window reduce to 0 and a zero window probe being repeatedly sent by the server. The client never recovers.

       

      Screen Shot 2017-02-04 at 8.02.35 PM.png

       

      I believe the issue to be related to the network drivers in the VM as the symptom isn't present when doing a host to host transfer nor a guest to guest transfer. The problem also doesn't occur between hosts on different network nodes.

       

      The problem presented here will cause a myriad of problems including things like git clones failing inside guests as reported by another user as well as failing to download large files as reported by a user on stack overflow 6 months ago.

       

      Version Info

       

      From host:

      $ '/Applications/VMware Fusion.app/Contents/Library/vmware-vmx' -v    

       

      VMware Fusion Information:

      VMware Fusion 8.5.3 build-4696910 Release

       

      From guest:

      $ /Library/Application\ Support/VMware\ Tools/vmware-tools-cli --version

      10.0.10.3275 (build-4301679)


      Questions

       

      1) Is this a known bug and does it have an ETA on resolution?

      2) Is there a workaround?

      3) Does anyone know of a version of VMWare that doesn't have this bug?