Gzip: support for a zlib variant from Intel.

A zlib variant from Intel as available from https://github.com/jtkukunas/zlib
uses 64K hash instead of scaling it from the specified memory level, and
also uses 16-byte padding in one of the window-sized memory buffers, and can
force window bits to 13 if compression level is set to 1 and appropriate
compile options are used.  As a result, nginx complained with "gzip filter
failed to use preallocated memory" alerts.

This change improves deflate_state allocation detection by testing that
items is 1 (deflate_state is the only allocation where items is 1).
Additionally, on first failure to use preallocated memory we now assume
that we are working with the Intel's modified zlib, and switch to using
appropriate preallocations.  If this does not help, we complain with the
usual alerts.

Previous version of this patch was published at
http://mailman.nginx.org/pipermail/nginx/2014-July/044568.html.
The zlib variant in question is used by default in ClearLinux from Intel,
see http://mailman.nginx.org/pipermail/nginx-ru/2017-October/060421.html,
http://mailman.nginx.org/pipermail/nginx-ru/2017-November/060544.html.
diff --git a/src/http/modules/ngx_http_gzip_filter_module.c b/src/http/modules/ngx_http_gzip_filter_module.c
index 73b6d89..e4c343c 100644
--- a/src/http/modules/ngx_http_gzip_filter_module.c
+++ b/src/http/modules/ngx_http_gzip_filter_module.c
@@ -57,6 +57,7 @@
     unsigned             nomem:1;
     unsigned             gzheader:1;
     unsigned             buffering:1;
+    unsigned             intel:1;
 
     size_t               zin;
     size_t               zout;
@@ -233,6 +234,8 @@
 static ngx_http_output_header_filter_pt  ngx_http_next_header_filter;
 static ngx_http_output_body_filter_pt    ngx_http_next_body_filter;
 
+static ngx_uint_t  ngx_http_gzip_assume_intel;
+
 
 static ngx_int_t
 ngx_http_gzip_header_filter(ngx_http_request_t *r)
@@ -527,7 +530,27 @@
      *  *) 5920 bytes on amd64 and sparc64
      */
 
-    ctx->allocated = 8192 + (1 << (wbits + 2)) + (1 << (memlevel + 9));
+    if (!ngx_http_gzip_assume_intel) {
+        ctx->allocated = 8192 + (1 << (wbits + 2)) + (1 << (memlevel + 9));
+
+    } else {
+        /*
+         * A zlib variant from Intel, https://github.com/jtkukunas/zlib.
+         * It can force window bits to 13 for fast compression level,
+         * on processors with SSE 4.2 it uses 64K hash instead of scaling
+         * it from the specified memory level, and also introduces
+         * 16-byte padding in one out of the two window-sized buffers.
+         */
+
+        if (conf->level == 1) {
+            wbits = ngx_max(wbits, 13);
+        }
+
+        ctx->allocated = 8192 + 16 + (1 << (wbits + 2))
+                         + (1 << (ngx_max(memlevel, 8) + 8))
+                         + (1 << (memlevel + 8));
+        ctx->intel = 1;
+    }
 }
 
 
@@ -1003,7 +1026,7 @@
 
     alloc = items * size;
 
-    if (alloc % 512 != 0 && alloc < 8192) {
+    if (items == 1 && alloc % 512 != 0 && alloc < 8192) {
 
         /*
          * The zlib deflate_state allocation, it takes about 6K,
@@ -1025,9 +1048,14 @@
         return p;
     }
 
-    ngx_log_error(NGX_LOG_ALERT, ctx->request->connection->log, 0,
-                  "gzip filter failed to use preallocated memory: %ud of %ui",
-                  items * size, ctx->allocated);
+    if (ctx->intel) {
+        ngx_log_error(NGX_LOG_ALERT, ctx->request->connection->log, 0,
+                      "gzip filter failed to use preallocated memory: "
+                      "%ud of %ui", items * size, ctx->allocated);
+
+    } else {
+        ngx_http_gzip_assume_intel = 1;
+    }
 
     p = ngx_palloc(ctx->request->pool, items * size);