Bruno Haible | 4 Apr 14:50 2010

Re: nanosleep on mingw

The current implementation of nanosleep is broken on mingw for several reasons:

1) It requires linking with -lws2_32, as evidenced by these error messages:

   gcc-3 -mno-cygwin  -g -O2  -L/usr/local/mingw/lib -o test-nanosleep.exe test-nanosleep.o
../gllib/libgnu.a  
   ../gllib/libgnu.a(select.o): In function `rpl_select':
   /home/bruno/testdir2/gllib/select.c:93: undefined reference to `_WSAEnumNetworkEvents <at> 12'
   /home/bruno/testdir2/gllib/select.c:360: undefined reference to `_WSAEventSelect <at> 12'
   /home/bruno/testdir2/gllib/select.c:381: undefined reference to `_select <at> 20'
   /home/bruno/testdir2/gllib/select.c:434: undefined reference to `_WSAEventSelect <at> 12'
   /home/bruno/testdir2/gllib/select.c:435: undefined reference to `___WSAFDIsSet <at> 8'
   /home/bruno/testdir2/gllib/select.c:437: undefined reference to `___WSAFDIsSet <at> 8'
   /home/bruno/testdir2/gllib/select.c:439: undefined reference to `___WSAFDIsSet <at> 8'
   /home/bruno/testdir2/gllib/select.c:414: undefined reference to `_select <at> 20'
   collect2: ld returned 1 exit status
   make[4]: *** [test-nanosleep.exe] Error 1

   But there is no good reason to link with a sockets library just to sleep a
   certain amount of time.

2) Use of this the 'select' function in wsock2 requires a prior invocation
   of WSAStartup() (contained in the 'sockets' module).

   Witness: This program
===============================================================================
#include <winsock2.h>
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <windows.h>

int main (int argc, char *argv[])
{
  int usecs = atoi (argv[1]);
  LARGE_INTEGER pcfreq;
  LARGE_INTEGER before, after;
  struct timeval timeout;
  int ret;

  if (!QueryPerformanceFrequency (&pcfreq))
    printf ("QueryPerformanceFrequency failed\n");

  if (!QueryPerformanceCounter (&before))
    printf ("QueryPerformanceCounter failed\n");

  timeout.tv_sec = usecs / 1000000;
  timeout.tv_usec = usecs % 1000000;
  ret = select (0, NULL, NULL, NULL, &timeout);
  if (ret < 0) printf ("select failed, error = %d\n", WSAGetLastError());

  if (!QueryPerformanceCounter (&after))
    printf ("QueryPerformanceCounter failed\n");

  printf ("time slept: %g s\n", (double) (after.QuadPart - before.QuadPart) / (double) pcfreq.QuadPart);
  return 0;
}
===============================================================================
   produces the output

   $ ./usleep.exe 6000
   select failed, error = 10093
   time slept: 6.87238e-005 s

   Error 10093 is WSANOTINITIALISED.

3) Use of select without a socket descriptor, just a timeout, fails.

   Witness: This program
===============================================================================
#include <winsock2.h>
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <windows.h>

int main (int argc, char *argv[])
{
  int usecs = atoi (argv[1]);
  LARGE_INTEGER pcfreq;
  WSADATA data;
  LARGE_INTEGER before, after;
  struct timeval timeout;
  int ret;

  if (!QueryPerformanceFrequency (&pcfreq))
    printf ("QueryPerformanceFrequency failed\n");

  WSAStartup (0x201, &data);

  if (!QueryPerformanceCounter (&before))
    printf ("QueryPerformanceCounter failed\n");

  timeout.tv_sec = usecs / 1000000;
  timeout.tv_usec = usecs % 1000000;
  ret = select (0, NULL, NULL, NULL, &timeout);
  if (ret < 0) printf ("select failed, error = %d\n", WSAGetLastError());

  if (!QueryPerformanceCounter (&after))
    printf ("QueryPerformanceCounter failed\n");

  printf ("time slept: %g s\n", (double) (after.QuadPart - before.QuadPart) / (double) pcfreq.QuadPart);
  return 0;
}
===============================================================================
   produces the output

   $ ./usleep.exe 6000
   select failed, error = 10022
   time slept: 0.000258971 s

   Error 10022 is WSAEINVAL.

4) select sleeps for at least 6 ms.

   Witness: This program
===============================================================================
#include <winsock2.h>
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <windows.h>

int main (int argc, char *argv[])
{
  int usecs = atoi (argv[1]);
  LARGE_INTEGER pcfreq;
  WSADATA data;
  SOCKET s;
  fd_set dummy;
  LARGE_INTEGER before, after;
  struct timeval timeout;
  int ret;

  if (!QueryPerformanceFrequency (&pcfreq))
    printf ("QueryPerformanceFrequency failed\n");

  WSAStartup (0x201, &data);

  s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
  FD_ZERO(&dummy);
  FD_SET(s, &dummy);

  if (!QueryPerformanceCounter (&before))
    printf ("QueryPerformanceCounter failed\n");

  timeout.tv_sec = usecs / 1000000;
  timeout.tv_usec = usecs % 1000000;
  ret = select (0, NULL, NULL, &dummy, &timeout);
  if (ret < 0) printf ("select failed, error = %d\n", WSAGetLastError());

  if (!QueryPerformanceCounter (&after))
    printf ("QueryPerformanceCounter failed\n");

  printf ("time slept: %g s\n", (double) (after.QuadPart - before.QuadPart) / (double) pcfreq.QuadPart);
  return 0;
}
===============================================================================
   when run on Windows XP, produces output like this:

$ ./usleep.exe 1000 
time slept: 0.00615274 s

$ ./usleep.exe 2000
time slept: 0.00684696 s

$ ./usleep.exe 3000
time slept: 0.0065919 s

$ ./usleep.exe 4000
time slept: 0.00648378 s

$ ./usleep.exe 50000
time slept: 0.0564223 s

$ ./usleep.exe 51000
time slept: 0.0532749 s

$ ./usleep.exe 52000
time slept: 0.0534506 s

$ ./usleep.exe 53000
time slept: 0.0531825 s

$ ./usleep.exe 54000
time slept: 0.0532327 s

5) The Win32 Sleep function sleeps for at least 5 ms. (But at least it does not
   need prior initialization and a socket for doing this :-)).

   This program
===============================================================================
#include <winsock2.h>
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <windows.h>

int main (int argc, char *argv[])
{
  int usecs = atoi (argv[1]);
  LARGE_INTEGER pcfreq;
  LARGE_INTEGER before, after;

  if (!QueryPerformanceFrequency (&pcfreq))
    printf ("QueryPerformanceFrequency failed\n");

  if (!QueryPerformanceCounter (&before))
    printf ("QueryPerformanceCounter failed\n");

  Sleep (usecs / 1000);

  if (!QueryPerformanceCounter (&after))
    printf ("QueryPerformanceCounter failed\n");

  printf ("time slept: %g s\n", (double) (after.QuadPart - before.QuadPart) / (double) pcfreq.QuadPart);
  return 0;
}
===============================================================================
   outputs:

$ ./usleep.exe 1000
time slept: 0.0116696 s

$ ./usleep.exe 2000
time slept: 0.00430949 s

$ ./usleep.exe 1000
time slept: 0.00483553 s

$ ./usleep.exe 2000
time slept: 0.00500846 s

$ ./usleep.exe 3000
time slept: 0.0048143 s

$ ./usleep.exe 4000
time slept: 0.0047696 s

$ ./usleep.exe 5000
time slept: 0.0050146 s

$ ./usleep.exe 6000
time slept: 0.00530682 s

$ ./usleep.exe 7000
time slept: 0.0048752 s

And it also has only about a 15 ms resolution:

$ ./usleep.exe 60000
time slept: 0.0512395 s

$ ./usleep.exe 61000
time slept: 0.0520256 s

$ ./usleep.exe 62000
time slept: 0.0518923 s

$ ./usleep.exe 63000
time slept: 0.0674114 s

$ ./usleep.exe 64000
time slept: 0.0694077 s

A combination of Sleep and busy-looping appears to give good results.

===============================================================================
#include <winsock2.h>
#include <sys/time.h>
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <windows.h>

struct timespec
{
  time_t tv_sec;
  long int tv_nsec;
};

static void
nanosleep (const struct timespec *requested_delay)
{
  if (requested_delay->tv_sec > 0)
    /* At least one second. Millisecond resolution is sufficient. */
    Sleep (requested_delay->tv_sec * 1000 + requested_delay->tv_nsec / 1000000);
  else
    {
      /* Use Sleep for the largest part, and busy-loop for the rest. */
      static double frequency;
      if (frequency == 0)
        {
          LARGE_INTEGER freq;
          if (!QueryPerformanceFrequency (&freq))
            {
              /* Cannot use QueryPerformanceCounter. */
              Sleep (requested_delay->tv_nsec / 1000000);
              return;
            }
          frequency = (double) freq.QuadPart / 1000000000.0;
        }
      long long expected_counter_difference = requested_delay->tv_nsec * frequency;
      int sleep_part = (int) requested_delay->tv_nsec / 1000000 - 10;
      LARGE_INTEGER before;
      QueryPerformanceCounter (&before);
      long long expected_counter = before.QuadPart + expected_counter_difference;
      if (sleep_part > 0)
        Sleep (sleep_part);
      for (;;)
        {
          LARGE_INTEGER after;
          QueryPerformanceCounter (&after);
          if (after.QuadPart >= expected_counter)
            break;
        }
    }
}

int main (int argc, char *argv[])
{
  int usecs = atoi (argv[1]);
  LARGE_INTEGER pcfreq;
  LARGE_INTEGER before, after;

  if (!QueryPerformanceFrequency (&pcfreq))
    printf ("QueryPerformanceFrequency failed\n");

  if (!QueryPerformanceCounter (&before))
    printf ("QueryPerformanceCounter failed\n");

  struct timespec t;
  t.tv_sec = usecs / 1000000;
  t.tv_nsec = (usecs % 1000000) * 1000;
  nanosleep (&t);

  if (!QueryPerformanceCounter (&after))
    printf ("QueryPerformanceCounter failed\n");

  printf ("time slept: %g s\n", (double) (after.QuadPart - before.QuadPart) / (double) pcfreq.QuadPart);
  return 0;
}
===============================================================================

$ ./usleep.exe 699000
time slept: 0.699015 s

$ ./usleep.exe 699000
time slept: 0.699013 s

$ ./usleep.exe 700000
time slept: 0.700014 s

$ ./usleep.exe 701000
time slept: 0.701013 s

$ ./usleep.exe 4100  
time slept: 0.00411449 s

$ ./usleep.exe 1300
time slept: 0.00131469 s

$ ./usleep.exe 1200
time slept: 0.001213 s

$ ./usleep.exe 100
time slept: 0.000112864 s

$ ./usleep.exe 90
time slept: 0.000104203 s

$ ./usleep.exe 10
time slept: 2.45841e-005 s

(Apparently there is an offset of 13 to 14 microseconds, but this is likely
CPU speed dependent.)

So here is a proposed patch for implementing a Woe32 nanosleep with a
resolution of ca. 10 microseconds or higher.

2010-04-04  Bruno Haible  <bruno <at> clisp.org>

	Implement nanosleep for native Windows.
	* lib/nanosleep.c (nanosleep): New implementation for native Windows.

--- lib/nanosleep.c.orig	Sun Apr  4 14:45:40 2010
+++ lib/nanosleep.c	Sun Apr  4 14:43:28 2010
 <at>  <at>  -15,7 +15,8  <at>  <at> 
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

-/* written by Jim Meyering */
+/* written by Jim Meyering
+   and Bruno Haible for the Woe32 part */

 #include <config.h>

 <at>  <at>  -83,7 +84,102  <at>  <at> 
   return nanosleep (&intermediate, remaining_delay);
 }

+#elif (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
+/* Windows platforms.  */
+
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+
+/* The Win32 function Sleep() has a resolution of about 15 ms and takes
+   at least 5 ms to execute.  We use this function for longer time periods.
+   Additionally, we use busy-looping over short time periods, to get a
+   resolution of about 0.01 ms.  In order to measure such short timespans,
+   we use the QueryPerformanceCounter() function.  */
+
+int
+nanosleep (const struct timespec *requested_delay,
+           struct timespec *remaining_delay)
+{
+  static bool initialized;
+  /* Number of performance counter increments per nanosecond,
+     or zero if it could not be determined.  */
+  static double ticks_per_nanosecond;
+
+  if (requested_delay->tv_nsec < 0 || BILLION <= requested_delay->tv_nsec)
+    {
+      errno = EINVAL;
+      return -1;
+    }
+
+  /* For requested delays of one second or more, 15ms resolution is
+     sufficient.  */
+  if (requested_delay->tv_sec == 0)
+    {
+      if (!initialized)
+        {
+          /* Initialize ticks_per_nanosecond.  */
+          LARGE_INTEGER ticks_per_second;
+
+          if (QueryPerformanceFrequency (&ticks_per_second))
+            ticks_per_nanosecond =
+              (double) ticks_per_second.QuadPart / 1000000000.0;
+
+          initialized = true;
+        }
+      if (ticks_per_nanosecond)
+        {
+          /* QueryPerformanceFrequency worked.  We can use
+             QueryPerformanceCounter.  Use a combination of Sleep and
+             busy-looping.  */
+          /* Number of milliseconds to pass to the Sleep function.
+             Since Sleep can take up to 8 ms less or 8 ms more than requested
+             (or maybe more if the system is loaded), we subtract 10 ms.  */
+          int sleep_millis = (int) requested_delay->tv_nsec / 1000000 - 10;
+          /* Determine how many ticks to delay.  */
+          LONGLONG wait_ticks = requested_delay->tv_nsec * ticks_per_nanosecond;
+          /* Start.  */
+          LARGE_INTEGER counter_before;
+          if (QueryPerformanceCounter (&counter_before))
+            {
+              /* Wait until the performance counter has reached this value.
+                 We don't need to worry about overflow, because the performance
+                 counter is reset at reboot, and with a frequency of 3.6E6
+                 ticks per second 63 bits suffice for over 80000 years.  */
+              LONGLONG wait_until = counter_before.QuadPart + wait_ticks;
+              /* Use Sleep for the longest part.  */
+              if (sleep_millis > 0)
+                Sleep (sleep_millis);
+              /* Busy-loop for the rest.  */
+              for (;;)
+                {
+                  LARGE_INTEGER counter_after;
+                  if (!QueryPerformanceCounter (&counter_after))
+                    /* QueryPerformanceCounter failed, but succeeded earlier.
+                       Should not happen.  */
+                    break;
+                  if (counter_after.QuadPart >= wait_until)
+                    /* The requested time has elapsed.  */
+                    break;
+                }
+              goto done;
+            }
+        }
+    }
+  /* Implementation for long delays and as fallback.  */
+  Sleep (requested_delay->tv_sec * 1000 + requested_delay->tv_nsec / 1000000);
+
+ done:
+  /* Sleep is not interruptible.  So there is no remaining delay.  */
+  if (remaining_delay != NULL)
+    {
+      remaining_delay->tv_sec = 0;
+      remaining_delay->tv_nsec = 0;
+    }
+  return 0;
+}
+
 #else
+/* Unix platforms lacking nanosleep. */

 /* Some systems (MSDOS) don't have SIGCONT.
    Using SIGTERM here turns the signal-handling code below


Gmane