|  |  | 
|  | /* | 
|  | * Copyright (C) Igor Sysoev | 
|  | * Copyright (C) Nginx, Inc. | 
|  | */ | 
|  |  | 
|  |  | 
|  | #if (NGX_SMP) | 
|  | #define NGX_SMP_LOCK  "lock;" | 
|  | #else | 
|  | #define NGX_SMP_LOCK | 
|  | #endif | 
|  |  | 
|  |  | 
|  | /* | 
|  | * "cmpxchgl  r, [m]": | 
|  | * | 
|  | *     if (eax == [m]) { | 
|  | *         zf = 1; | 
|  | *         [m] = r; | 
|  | *     } else { | 
|  | *         zf = 0; | 
|  | *         eax = [m]; | 
|  | *     } | 
|  | * | 
|  | * | 
|  | * The "r" means the general register. | 
|  | * The "=a" and "a" are the %eax register. | 
|  | * Although we can return result in any register, we use "a" because it is | 
|  | * used in cmpxchgl anyway.  The result is actually in %al but not in %eax, | 
|  | * however, as the code is inlined gcc can test %al as well as %eax, | 
|  | * and icc adds "movzbl %al, %eax" by itself. | 
|  | * | 
|  | * The "cc" means that flags were changed. | 
|  | */ | 
|  |  | 
|  | static ngx_inline ngx_atomic_uint_t | 
|  | ngx_atomic_cmp_set(ngx_atomic_t *lock, ngx_atomic_uint_t old, | 
|  | ngx_atomic_uint_t set) | 
|  | { | 
|  | u_char  res; | 
|  |  | 
|  | __asm__ volatile ( | 
|  |  | 
|  | NGX_SMP_LOCK | 
|  | "    cmpxchgl  %3, %1;   " | 
|  | "    sete      %0;       " | 
|  |  | 
|  | : "=a" (res) : "m" (*lock), "a" (old), "r" (set) : "cc", "memory"); | 
|  |  | 
|  | return res; | 
|  | } | 
|  |  | 
|  |  | 
|  | /* | 
|  | * "xaddl  r, [m]": | 
|  | * | 
|  | *     temp = [m]; | 
|  | *     [m] += r; | 
|  | *     r = temp; | 
|  | * | 
|  | * | 
|  | * The "+r" means the general register. | 
|  | * The "cc" means that flags were changed. | 
|  | */ | 
|  |  | 
|  |  | 
|  | #if !(( __GNUC__ == 2 && __GNUC_MINOR__ <= 7 ) || ( __INTEL_COMPILER >= 800 )) | 
|  |  | 
|  | /* | 
|  | * icc 8.1 and 9.0 compile broken code with -march=pentium4 option: | 
|  | * ngx_atomic_fetch_add() always return the input "add" value, | 
|  | * so we use the gcc 2.7 version. | 
|  | * | 
|  | * icc 8.1 and 9.0 with -march=pentiumpro option or icc 7.1 compile | 
|  | * correct code. | 
|  | */ | 
|  |  | 
|  | static ngx_inline ngx_atomic_int_t | 
|  | ngx_atomic_fetch_add(ngx_atomic_t *value, ngx_atomic_int_t add) | 
|  | { | 
|  | __asm__ volatile ( | 
|  |  | 
|  | NGX_SMP_LOCK | 
|  | "    xaddl  %0, %1;   " | 
|  |  | 
|  | : "+r" (add) : "m" (*value) : "cc", "memory"); | 
|  |  | 
|  | return add; | 
|  | } | 
|  |  | 
|  |  | 
|  | #else | 
|  |  | 
|  | /* | 
|  | * gcc 2.7 does not support "+r", so we have to use the fixed | 
|  | * %eax ("=a" and "a") and this adds two superfluous instructions in the end | 
|  | * of code, something like this: "mov %eax, %edx / mov %edx, %eax". | 
|  | */ | 
|  |  | 
|  | static ngx_inline ngx_atomic_int_t | 
|  | ngx_atomic_fetch_add(ngx_atomic_t *value, ngx_atomic_int_t add) | 
|  | { | 
|  | ngx_atomic_uint_t  old; | 
|  |  | 
|  | __asm__ volatile ( | 
|  |  | 
|  | NGX_SMP_LOCK | 
|  | "    xaddl  %2, %1;   " | 
|  |  | 
|  | : "=a" (old) : "m" (*value), "a" (add) : "cc", "memory"); | 
|  |  | 
|  | return old; | 
|  | } | 
|  |  | 
|  | #endif | 
|  |  | 
|  |  | 
|  | /* | 
|  | * on x86 the write operations go in a program order, so we need only | 
|  | * to disable the gcc reorder optimizations | 
|  | */ | 
|  |  | 
|  | #define ngx_memory_barrier()    __asm__ volatile ("" ::: "memory") | 
|  |  | 
|  | /* old "as" does not support "pause" opcode */ | 
|  | #define ngx_cpu_pause()         __asm__ (".byte 0xf3, 0x90") |