Events: available bytes calculation via ioctl(FIONREAD). This makes it possible to avoid looping for a long time while working with a fast enough peer when data are added to the socket buffer faster than we are able to read and process them (ticket #1431). This is basically what we already do on FreeBSD with kqueue, where information about the number of bytes in the socket buffer is returned by the kevent() call. With other event methods rev->available is now set to -1 when the socket is ready for reading. Later in ngx_recv() and ngx_recv_chain(), if full buffer is received, real number of bytes in the socket buffer is retrieved using ioctl(FIONREAD). Reading more than this number of bytes ensures that even with edge-triggered event methods the event will be triggered again, so it is safe to stop processing of the socket and switch to other connections. Using ioctl(FIONREAD) only after reading a full buffer is an optimization. With this approach we only call ioctl(FIONREAD) when there are at least two recv()/readv() calls.
diff --git a/auto/unix b/auto/unix index 43d3b25..ff9697a 100644 --- a/auto/unix +++ b/auto/unix
@@ -943,6 +943,18 @@ . auto/feature +ngx_feature="ioctl(FIONREAD)" +ngx_feature_name="NGX_HAVE_FIONREAD" +ngx_feature_run=no +ngx_feature_incs="#include <sys/ioctl.h> + #include <stdio.h> + $NGX_INCLUDE_SYS_FILIO_H" +ngx_feature_path= +ngx_feature_libs= +ngx_feature_test="int i = FIONREAD; printf(\"%d\", i)" +. auto/feature + + ngx_feature="struct tm.tm_gmtoff" ngx_feature_name="NGX_HAVE_GMTOFF" ngx_feature_run=no
diff --git a/src/event/modules/ngx_devpoll_module.c b/src/event/modules/ngx_devpoll_module.c index ee9f854..590eb28 100644 --- a/src/event/modules/ngx_devpoll_module.c +++ b/src/event/modules/ngx_devpoll_module.c
@@ -495,6 +495,7 @@ if ((revents & POLLIN) && rev->active) { rev->ready = 1; + rev->available = -1; if (flags & NGX_POST_EVENTS) { queue = rev->accept ? &ngx_posted_accept_events
diff --git a/src/event/modules/ngx_epoll_module.c b/src/event/modules/ngx_epoll_module.c index 76aee08..98e3ce7 100644 --- a/src/event/modules/ngx_epoll_module.c +++ b/src/event/modules/ngx_epoll_module.c
@@ -886,11 +886,10 @@ if (revents & EPOLLRDHUP) { rev->pending_eof = 1; } - - rev->available = 1; #endif rev->ready = 1; + rev->available = -1; if (flags & NGX_POST_EVENTS) { queue = rev->accept ? &ngx_posted_accept_events
diff --git a/src/event/modules/ngx_eventport_module.c b/src/event/modules/ngx_eventport_module.c index 11ad093..f67c704 100644 --- a/src/event/modules/ngx_eventport_module.c +++ b/src/event/modules/ngx_eventport_module.c
@@ -559,6 +559,7 @@ if (revents & POLLIN) { rev->ready = 1; + rev->available = -1; if (flags & NGX_POST_EVENTS) { queue = rev->accept ? &ngx_posted_accept_events
diff --git a/src/event/modules/ngx_poll_module.c b/src/event/modules/ngx_poll_module.c index b46ab53..c16f024 100644 --- a/src/event/modules/ngx_poll_module.c +++ b/src/event/modules/ngx_poll_module.c
@@ -370,6 +370,7 @@ ev = c->read; ev->ready = 1; + ev->available = -1; queue = ev->accept ? &ngx_posted_accept_events : &ngx_posted_events;
diff --git a/src/event/modules/ngx_select_module.c b/src/event/modules/ngx_select_module.c index 0644621..b9fceb3 100644 --- a/src/event/modules/ngx_select_module.c +++ b/src/event/modules/ngx_select_module.c
@@ -330,6 +330,7 @@ if (found) { ev->ready = 1; + ev->available = -1; queue = ev->accept ? &ngx_posted_accept_events : &ngx_posted_events;
diff --git a/src/event/modules/ngx_win32_poll_module.c b/src/event/modules/ngx_win32_poll_module.c index 9fe867f..2fbc1b3 100644 --- a/src/event/modules/ngx_win32_poll_module.c +++ b/src/event/modules/ngx_win32_poll_module.c
@@ -380,6 +380,7 @@ ev = c->read; ev->ready = 1; + ev->available = -1; queue = ev->accept ? &ngx_posted_accept_events : &ngx_posted_events;
diff --git a/src/event/modules/ngx_win32_select_module.c b/src/event/modules/ngx_win32_select_module.c index 8093a60..962514a 100644 --- a/src/event/modules/ngx_win32_select_module.c +++ b/src/event/modules/ngx_win32_select_module.c
@@ -330,6 +330,7 @@ if (found) { ev->ready = 1; + ev->available = -1; queue = ev->accept ? &ngx_posted_accept_events : &ngx_posted_events;
diff --git a/src/event/ngx_event.h b/src/event/ngx_event.h index bb77c4a..97f9673 100644 --- a/src/event/ngx_event.h +++ b/src/event/ngx_event.h
@@ -91,21 +91,14 @@ * write: available space in buffer when event is ready * or lowat when event is set with NGX_LOWAT_EVENT flag * - * epoll with EPOLLRDHUP: - * accept: 1 if accept many, 0 otherwise - * read: 1 if there can be data to read, 0 otherwise - * * iocp: TODO * * otherwise: * accept: 1 if accept many, 0 otherwise + * read: bytes to read when event is ready, -1 if not known */ -#if (NGX_HAVE_KQUEUE) || (NGX_HAVE_IOCP) int available; -#else - unsigned available:1; -#endif ngx_event_handler_pt handler;
diff --git a/src/os/unix/ngx_readv_chain.c b/src/os/unix/ngx_readv_chain.c index 454cfdc..a3577ce 100644 --- a/src/os/unix/ngx_readv_chain.c +++ b/src/os/unix/ngx_readv_chain.c
@@ -60,7 +60,7 @@ "readv: eof:%d, avail:%d", rev->pending_eof, rev->available); - if (!rev->available && !rev->pending_eof) { + if (rev->available == 0 && !rev->pending_eof) { return NGX_AGAIN; } } @@ -165,6 +165,40 @@ #endif +#if (NGX_HAVE_FIONREAD) + + if (rev->available >= 0) { + rev->available -= n; + + /* + * negative rev->available means some additional bytes + * were received between kernel notification and readv(), + * and therefore ev->ready can be safely reset even for + * edge-triggered event methods + */ + + if (rev->available < 0) { + rev->available = 0; + rev->ready = 0; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "readv: avail:%d", rev->available); + + } else if (n == size) { + + if (ngx_socket_nread(c->fd, &rev->available) == -1) { + n = ngx_connection_error(c, ngx_socket_errno, + ngx_socket_nread_n " failed"); + break; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "readv: avail:%d", rev->available); + } + +#endif + #if (NGX_HAVE_EPOLLRDHUP) if ((ngx_event_flags & NGX_USE_EPOLL_EVENT)
diff --git a/src/os/unix/ngx_recv.c b/src/os/unix/ngx_recv.c index c85fd45..ddfae4d 100644 --- a/src/os/unix/ngx_recv.c +++ b/src/os/unix/ngx_recv.c
@@ -57,7 +57,7 @@ "recv: eof:%d, avail:%d", rev->pending_eof, rev->available); - if (!rev->available && !rev->pending_eof) { + if (rev->available == 0 && !rev->pending_eof) { rev->ready = 0; return NGX_AGAIN; } @@ -116,6 +116,40 @@ #endif +#if (NGX_HAVE_FIONREAD) + + if (rev->available >= 0) { + rev->available -= n; + + /* + * negative rev->available means some additional bytes + * were received between kernel notification and recv(), + * and therefore ev->ready can be safely reset even for + * edge-triggered event methods + */ + + if (rev->available < 0) { + rev->available = 0; + rev->ready = 0; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "recv: avail:%d", rev->available); + + } else if ((size_t) n == size) { + + if (ngx_socket_nread(c->fd, &rev->available) == -1) { + n = ngx_connection_error(c, ngx_socket_errno, + ngx_socket_nread_n " failed"); + break; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "recv: avail:%d", rev->available); + } + +#endif + #if (NGX_HAVE_EPOLLRDHUP) if ((ngx_event_flags & NGX_USE_EPOLL_EVENT)
diff --git a/src/os/unix/ngx_socket.h b/src/os/unix/ngx_socket.h index fcc5153..ec66a6f 100644 --- a/src/os/unix/ngx_socket.h +++ b/src/os/unix/ngx_socket.h
@@ -38,6 +38,13 @@ #endif +#if (NGX_HAVE_FIONREAD) + +#define ngx_socket_nread(s, n) ioctl(s, FIONREAD, n) +#define ngx_socket_nread_n "ioctl(FIONREAD)" + +#endif + int ngx_tcp_nopush(ngx_socket_t s); int ngx_tcp_push(ngx_socket_t s);
diff --git a/src/os/win32/ngx_socket.c b/src/os/win32/ngx_socket.c index 05a39f4..b1b4afb 100644 --- a/src/os/win32/ngx_socket.c +++ b/src/os/win32/ngx_socket.c
@@ -28,6 +28,21 @@ int +ngx_socket_nread(ngx_socket_t s, int *n) +{ + unsigned long nread; + + if (ioctlsocket(s, FIONREAD, &nread) == -1) { + return -1; + } + + *n = nread; + + return 0; +} + + +int ngx_tcp_push(ngx_socket_t s) { return 0;
diff --git a/src/os/win32/ngx_socket.h b/src/os/win32/ngx_socket.h index f8a453d..ab56bc8 100644 --- a/src/os/win32/ngx_socket.h +++ b/src/os/win32/ngx_socket.h
@@ -31,6 +31,9 @@ #define ngx_nonblocking_n "ioctlsocket(FIONBIO)" #define ngx_blocking_n "ioctlsocket(!FIONBIO)" +int ngx_socket_nread(ngx_socket_t s, int *n); +#define ngx_socket_nread_n "ioctlsocket(FIONREAD)" + #define ngx_shutdown_socket shutdown #define ngx_shutdown_socket_n "shutdown()"
diff --git a/src/os/win32/ngx_win32_config.h b/src/os/win32/ngx_win32_config.h index 4824d05..9615687 100644 --- a/src/os/win32/ngx_win32_config.h +++ b/src/os/win32/ngx_win32_config.h
@@ -273,6 +273,10 @@ #define NGX_HAVE_SO_SNDLOWAT 0 #endif +#ifndef NGX_HAVE_FIONREAD +#define NGX_HAVE_FIONREAD 1 +#endif + #define NGX_HAVE_GETADDRINFO 1 #define ngx_random rand
diff --git a/src/os/win32/ngx_wsarecv.c b/src/os/win32/ngx_wsarecv.c index 1925f0b..ac88310 100644 --- a/src/os/win32/ngx_wsarecv.c +++ b/src/os/win32/ngx_wsarecv.c
@@ -51,6 +51,45 @@ return n; } +#if (NGX_HAVE_FIONREAD) + + if (rev->available >= 0 && bytes > 0) { + rev->available -= bytes; + + /* + * negative rev->available means some additional bytes + * were received between kernel notification and WSARecv(), + * and therefore ev->ready can be safely reset even for + * edge-triggered event methods + */ + + if (rev->available < 0) { + rev->available = 0; + rev->ready = 0; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "WSARecv: avail:%d", rev->available); + + } else if (bytes == size) { + + if (ngx_socket_nread(c->fd, &rev->available) == -1) { + n = ngx_connection_error(c, ngx_socket_errno, + ngx_socket_nread_n " failed"); + + if (n == NGX_ERROR) { + rev->error = 1; + } + + return n; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "WSARecv: avail:%d", rev->available); + } + +#endif + if (bytes < size) { rev->ready = 0; }
diff --git a/src/os/win32/ngx_wsarecv_chain.c b/src/os/win32/ngx_wsarecv_chain.c index 2598e09..87f0239 100644 --- a/src/os/win32/ngx_wsarecv_chain.c +++ b/src/os/win32/ngx_wsarecv_chain.c
@@ -94,6 +94,41 @@ return NGX_ERROR; } +#if (NGX_HAVE_FIONREAD) + + if (rev->available >= 0 && bytes > 0) { + rev->available -= bytes; + + /* + * negative rev->available means some additional bytes + * were received between kernel notification and WSARecv(), + * and therefore ev->ready can be safely reset even for + * edge-triggered event methods + */ + + if (rev->available < 0) { + rev->available = 0; + rev->ready = 0; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "WSARecv: avail:%d", rev->available); + + } else if (bytes == size) { + + if (ngx_socket_nread(c->fd, &rev->available) == -1) { + rev->error = 1; + ngx_connection_error(c, ngx_socket_errno, + ngx_socket_nread_n " failed"); + return NGX_ERROR; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "WSARecv: avail:%d", rev->available); + } + +#endif + if (bytes < size) { rev->ready = 0; }