The "reuseport" option of the "listen" directive.

When configured, an individual listen socket on a given address is
created for each worker process.  This allows to reduce in-kernel lock
contention on configurations with high accept rates, resulting in better
performance.  As of now it works on Linux and DragonFly BSD.

Note that on Linux incoming connection requests are currently tied up
to a specific listen socket, and if some sockets are closed, connection
requests will be reset, see https://lwn.net/Articles/542629/.  With
nginx, this may happen if the number of worker processes is reduced.
There is no such problem on DragonFly BSD.

Based on previous work by Sepherosa Ziehau and Yingqi Lu.
diff --git a/auto/unix b/auto/unix
index b90ba3c..595f905 100755
--- a/auto/unix
+++ b/auto/unix
@@ -308,6 +308,16 @@
 . auto/feature
 
 
+ngx_feature="SO_REUSEPORT"
+ngx_feature_name="NGX_HAVE_REUSEPORT"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="setsockopt(0, SOL_SOCKET, SO_REUSEPORT, NULL, 0)"
+. auto/feature
+
+
 ngx_feature="SO_ACCEPTFILTER"
 ngx_feature_name="NGX_HAVE_DEFERRED_ACCEPT"
 ngx_feature_run=no
diff --git a/src/core/ngx_connection.c b/src/core/ngx_connection.c
index 52b97ce..04a365a 100644
--- a/src/core/ngx_connection.c
+++ b/src/core/ngx_connection.c
@@ -91,6 +91,43 @@
 
 
 ngx_int_t
+ngx_clone_listening(ngx_conf_t *cf, ngx_listening_t *ls)
+{
+#if (NGX_HAVE_REUSEPORT)
+
+    ngx_int_t         n;
+    ngx_core_conf_t  *ccf;
+    ngx_listening_t   ols;
+
+    if (!ls->reuseport) {
+        return NGX_OK;
+    }
+
+    ols = *ls;
+
+    ccf = (ngx_core_conf_t *) ngx_get_conf(cf->cycle->conf_ctx,
+                                           ngx_core_module);
+
+    for (n = 1; n < ccf->worker_processes; n++) {
+
+        /* create a socket for each worker process */
+
+        ls = ngx_array_push(&cf->cycle->listening);
+        if (ls == NULL) {
+            return NGX_ERROR;
+        }
+
+        *ls = ols;
+        ls->worker = n;
+    }
+
+#endif
+
+    return NGX_OK;
+}
+
+
+ngx_int_t
 ngx_set_inherited_sockets(ngx_cycle_t *cycle)
 {
     size_t                     len;
@@ -106,6 +143,9 @@
 #if (NGX_HAVE_DEFERRED_ACCEPT && defined TCP_DEFER_ACCEPT)
     int                        timeout;
 #endif
+#if (NGX_HAVE_REUSEPORT)
+    int                        reuseport;
+#endif
 
     ls = cycle->listening.elts;
     for (i = 0; i < cycle->listening.nelts; i++) {
@@ -215,6 +255,25 @@
 #endif
 #endif
 
+#if (NGX_HAVE_REUSEPORT)
+
+        reuseport = 0;
+        olen = sizeof(int);
+
+        if (getsockopt(ls[i].fd, SOL_SOCKET, SO_REUSEPORT,
+                       (void *) &reuseport, &olen)
+            == -1)
+        {
+            ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+                          "getsockopt(SO_REUSEPORT) %V failed, ignored",
+                          &ls[i].addr_text);
+
+        } else {
+            ls[i].reuseport = reuseport ? 1 : 0;
+        }
+
+#endif
+
 #if (NGX_HAVE_TCP_FASTOPEN)
 
         olen = sizeof(int);
@@ -332,6 +391,31 @@
                 continue;
             }
 
+#if (NGX_HAVE_REUSEPORT)
+
+            if (ls[i].add_reuseport) {
+
+                /*
+                 * to allow transition from a socket without SO_REUSEPORT
+                 * to multiple sockets with SO_REUSEPORT, we have to set
+                 * SO_REUSEPORT on the old socket before opening new ones
+                 */
+
+                int  reuseport = 1;
+
+                if (setsockopt(ls[i].fd, SOL_SOCKET, SO_REUSEPORT,
+                               (const void *) &reuseport, sizeof(int))
+                    == -1)
+                {
+                    ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+                                  "setsockopt(SO_REUSEPORT) %V failed, ignored",
+                                  &ls[i].addr_text);
+                }
+
+                ls[i].add_reuseport = 0;
+            }
+#endif
+
             if (ls[i].fd != (ngx_socket_t) -1) {
                 continue;
             }
@@ -370,6 +454,32 @@
                 return NGX_ERROR;
             }
 
+#if (NGX_HAVE_REUSEPORT)
+
+            if (ls[i].reuseport) {
+                int  reuseport;
+
+                reuseport = 1;
+
+                if (setsockopt(s, SOL_SOCKET, SO_REUSEPORT,
+                               (const void *) &reuseport, sizeof(int))
+                    == -1)
+                {
+                    ngx_log_error(NGX_LOG_EMERG, log, ngx_socket_errno,
+                                  "setsockopt(SO_REUSEPORT) %V failed, ignored",
+                                  &ls[i].addr_text);
+
+                    if (ngx_close_socket(s) == -1) {
+                        ngx_log_error(NGX_LOG_EMERG, log, ngx_socket_errno,
+                                      ngx_close_socket_n " %V failed",
+                                      &ls[i].addr_text);
+                    }
+
+                    return NGX_ERROR;
+                }
+            }
+#endif
+
 #if (NGX_HAVE_INET6 && defined IPV6_V6ONLY)
 
             if (ls[i].sockaddr->sa_family == AF_INET6) {
diff --git a/src/core/ngx_connection.h b/src/core/ngx_connection.h
index 27bb8a9..a49aa95 100644
--- a/src/core/ngx_connection.h
+++ b/src/core/ngx_connection.h
@@ -51,6 +51,8 @@
     ngx_listening_t    *previous;
     ngx_connection_t   *connection;
 
+    ngx_uint_t          worker;
+
     unsigned            open:1;
     unsigned            remain:1;
     unsigned            ignore:1;
@@ -66,6 +68,10 @@
 #if (NGX_HAVE_INET6 && defined IPV6_V6ONLY)
     unsigned            ipv6only:1;
 #endif
+#if (NGX_HAVE_REUSEPORT)
+    unsigned            reuseport:1;
+    unsigned            add_reuseport:1;
+#endif
     unsigned            keepalive:2;
 
 #if (NGX_HAVE_DEFERRED_ACCEPT)
@@ -203,6 +209,7 @@
 
 ngx_listening_t *ngx_create_listening(ngx_conf_t *cf, void *sockaddr,
     socklen_t socklen);
+ngx_int_t ngx_clone_listening(ngx_conf_t *cf, ngx_listening_t *ls);
 ngx_int_t ngx_set_inherited_sockets(ngx_cycle_t *cycle);
 ngx_int_t ngx_open_listening_sockets(ngx_cycle_t *cycle);
 void ngx_configure_listening_sockets(ngx_cycle_t *cycle);
diff --git a/src/core/ngx_cycle.c b/src/core/ngx_cycle.c
index 4852e3b..b358f3d 100644
--- a/src/core/ngx_cycle.c
+++ b/src/core/ngx_cycle.c
@@ -493,6 +493,10 @@
                     continue;
                 }
 
+                if (ls[i].remain) {
+                    continue;
+                }
+
                 if (ngx_cmp_sockaddr(nls[n].sockaddr, nls[n].socklen,
                                      ls[i].sockaddr, ls[i].socklen, 1)
                     == NGX_OK)
@@ -540,6 +544,13 @@
                         nls[n].add_deferred = 1;
                     }
 #endif
+
+#if (NGX_HAVE_REUSEPORT)
+                    if (nls[n].reuseport && !ls[i].reuseport) {
+                        nls[n].add_reuseport = 1;
+                    }
+#endif
+
                     break;
                 }
             }
diff --git a/src/event/ngx_event.c b/src/event/ngx_event.c
index 4610acb..878372c 100644
--- a/src/event/ngx_event.c
+++ b/src/event/ngx_event.c
@@ -725,6 +725,12 @@
     ls = cycle->listening.elts;
     for (i = 0; i < cycle->listening.nelts; i++) {
 
+#if (NGX_HAVE_REUSEPORT)
+        if (ls[i].reuseport && ls[i].worker != ngx_worker) {
+            continue;
+        }
+#endif
+
         c = ngx_get_connection(ls[i].fd, cycle->log);
 
         if (c == NULL) {
diff --git a/src/event/ngx_event_accept.c b/src/event/ngx_event_accept.c
index 3f1c0b1..8888f5a 100644
--- a/src/event/ngx_event_accept.c
+++ b/src/event/ngx_event_accept.c
@@ -11,7 +11,7 @@
 
 
 static ngx_int_t ngx_enable_accept_events(ngx_cycle_t *cycle);
-static ngx_int_t ngx_disable_accept_events(ngx_cycle_t *cycle);
+static ngx_int_t ngx_disable_accept_events(ngx_cycle_t *cycle, ngx_uint_t all);
 static void ngx_close_accepted_connection(ngx_connection_t *c);
 
 
@@ -109,7 +109,7 @@
             }
 
             if (err == NGX_EMFILE || err == NGX_ENFILE) {
-                if (ngx_disable_accept_events((ngx_cycle_t *) ngx_cycle)
+                if (ngx_disable_accept_events((ngx_cycle_t *) ngx_cycle, 1)
                     != NGX_OK)
                 {
                     return;
@@ -390,7 +390,7 @@
                    "accept mutex lock failed: %ui", ngx_accept_mutex_held);
 
     if (ngx_accept_mutex_held) {
-        if (ngx_disable_accept_events(cycle) == NGX_ERROR) {
+        if (ngx_disable_accept_events(cycle, 0) == NGX_ERROR) {
             return NGX_ERROR;
         }
 
@@ -413,7 +413,7 @@
 
         c = ls[i].connection;
 
-        if (c->read->active) {
+        if (c == NULL || c->read->active) {
             continue;
         }
 
@@ -427,7 +427,7 @@
 
 
 static ngx_int_t
-ngx_disable_accept_events(ngx_cycle_t *cycle)
+ngx_disable_accept_events(ngx_cycle_t *cycle, ngx_uint_t all)
 {
     ngx_uint_t         i;
     ngx_listening_t   *ls;
@@ -438,10 +438,23 @@
 
         c = ls[i].connection;
 
-        if (!c->read->active) {
+        if (c == NULL || !c->read->active) {
             continue;
         }
 
+#if (NGX_HAVE_REUSEPORT)
+
+        /*
+         * do not disable accept on worker's own sockets
+         * when disabling accept events due to accept mutex
+         */
+
+        if (ls[i].reuseport && !all) {
+            continue;
+        }
+
+#endif
+
         if (ngx_del_event(c->read, NGX_READ_EVENT, NGX_DISABLE_EVENT)
             == NGX_ERROR)
         {
diff --git a/src/http/ngx_http.c b/src/http/ngx_http.c
index adb4b6f..4642559 100644
--- a/src/http/ngx_http.c
+++ b/src/http/ngx_http.c
@@ -1737,6 +1737,10 @@
             break;
         }
 
+        if (ngx_clone_listening(cf, ls) != NGX_OK) {
+            return NGX_ERROR;
+        }
+
         addr++;
         last--;
     }
@@ -1815,6 +1819,10 @@
     ls->fastopen = addr->opt.fastopen;
 #endif
 
+#if (NGX_HAVE_REUSEPORT)
+    ls->reuseport = addr->opt.reuseport;
+#endif
+
     return ls;
 }
 
diff --git a/src/http/ngx_http_core_module.c b/src/http/ngx_http_core_module.c
index 1f03425..f525526 100644
--- a/src/http/ngx_http_core_module.c
+++ b/src/http/ngx_http_core_module.c
@@ -4166,6 +4166,19 @@
 #endif
         }
 
+        if (ngx_strcmp(value[n].data, "reuseport") == 0) {
+#if (NGX_HAVE_REUSEPORT)
+            lsopt.reuseport = 1;
+            lsopt.set = 1;
+            lsopt.bind = 1;
+#else
+            ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
+                               "reuseport is not supported "
+                               "on this platform, ignored");
+#endif
+            continue;
+        }
+
         if (ngx_strcmp(value[n].data, "ssl") == 0) {
 #if (NGX_HTTP_SSL)
             lsopt.ssl = 1;
diff --git a/src/http/ngx_http_core_module.h b/src/http/ngx_http_core_module.h
index e0ca2ce..e6be5ac 100644
--- a/src/http/ngx_http_core_module.h
+++ b/src/http/ngx_http_core_module.h
@@ -85,6 +85,9 @@
 #if (NGX_HAVE_INET6 && defined IPV6_V6ONLY)
     unsigned                   ipv6only:1;
 #endif
+#if (NGX_HAVE_REUSEPORT)
+    unsigned                   reuseport:1;
+#endif
     unsigned                   so_keepalive:2;
     unsigned                   proxy_protocol:1;
 
diff --git a/src/stream/ngx_stream.c b/src/stream/ngx_stream.c
index 18aea28..1c5e7a8 100644
--- a/src/stream/ngx_stream.c
+++ b/src/stream/ngx_stream.c
@@ -410,6 +410,10 @@
                 break;
             }
 
+            if (ngx_clone_listening(cf, ls) != NGX_OK) {
+                return NGX_CONF_ERROR;
+            }
+
             addr++;
             last--;
         }
diff --git a/src/stream/ngx_stream.h b/src/stream/ngx_stream.h
index 83a43a4..a10f68f 100644
--- a/src/stream/ngx_stream.h
+++ b/src/stream/ngx_stream.h
@@ -45,6 +45,9 @@
 #if (NGX_HAVE_INET6 && defined IPV6_V6ONLY)
     unsigned                ipv6only:1;
 #endif
+#if (NGX_HAVE_REUSEPORT)
+    unsigned                reuseport:1;
+#endif
     unsigned                so_keepalive:2;
 #if (NGX_HAVE_KEEPALIVE_TUNABLE)
     int                     tcp_keepidle;
diff --git a/src/stream/ngx_stream_core_module.c b/src/stream/ngx_stream_core_module.c
index c0df412..c8d8e66 100644
--- a/src/stream/ngx_stream_core_module.c
+++ b/src/stream/ngx_stream_core_module.c
@@ -384,6 +384,18 @@
 #endif
         }
 
+        if (ngx_strcmp(value[i].data, "reuseport") == 0) {
+#if (NGX_HAVE_REUSEPORT)
+            ls->reuseport = 1;
+            ls->bind = 1;
+#else
+            ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
+                               "reuseport is not supported "
+                               "on this platform, ignored");
+#endif
+            continue;
+        }
+
         if (ngx_strcmp(value[i].data, "ssl") == 0) {
 #if (NGX_STREAM_SSL)
             ls->ssl = 1;