| |
| /* |
| * Copyright (C) Igor Sysoev |
| * Copyright (C) NGINX, Inc. |
| */ |
| |
| |
| #include <njs_main.h> |
| |
| |
| #define NJS_UTF8_START_TEST 0xC2 |
| |
| |
| static u_char invalid[] = { |
| |
| /* Invalid first byte less than 0xC2. */ |
| 1, 0x80, 0x00, 0x00, 0x00, |
| 1, 0xC0, 0x00, 0x00, 0x00, |
| 2, 0xC0, 0x00, 0x00, 0x00, |
| 3, 0xC0, 0x00, 0x00, 0x00, |
| 4, 0xC0, 0x00, 0x00, 0x00, |
| |
| /* Invalid 0x0x110000 value. */ |
| 4, 0xF4, 0x90, 0x80, 0x80, |
| |
| /* Incomplete length. */ |
| 2, 0xE0, 0xAF, 0xB5, 0x00, |
| |
| /* Overlong values. */ |
| 2, 0xC0, 0x80, 0x00, 0x00, |
| 2, 0xC1, 0xB3, 0x00, 0x00, |
| 3, 0xE0, 0x80, 0x80, 0x00, |
| 3, 0xE0, 0x81, 0xB3, 0x00, |
| 3, 0xE0, 0x90, 0x9A, 0x00, |
| 4, 0xF0, 0x80, 0x8A, 0x80, |
| 4, 0xF0, 0x80, 0x81, 0xB3, |
| 4, 0xF0, 0x80, 0xAF, 0xB5, |
| }; |
| |
| |
| static njs_int_t |
| utf8_overlong(u_char *overlong, size_t len) |
| { |
| u_char *p, utf8[4]; |
| size_t size; |
| uint32_t u, d; |
| njs_uint_t i; |
| const u_char *pp; |
| njs_unicode_decode_t ctx; |
| |
| njs_utf8_decode_init(&ctx); |
| |
| pp = overlong; |
| |
| d = njs_utf8_decode(&ctx, &pp, overlong + len); |
| |
| len = pp - overlong; |
| |
| if (d != 0xFFFFFFFF) { |
| p = njs_utf8_encode(utf8, d); |
| |
| size = (p != NULL) ? p - utf8 : 0; |
| |
| if (len != size || memcmp(overlong, utf8, size) != 0) { |
| |
| u = 0; |
| for (i = 0; i < len; i++) { |
| u = (u << 8) + overlong[i]; |
| } |
| |
| njs_printf("njs_utf8_decode(%05uXD, %uz) failed: %05uXD, %uz\n", |
| u, len, d, size); |
| |
| return NJS_ERROR; |
| } |
| } |
| |
| return NJS_OK; |
| } |
| |
| |
| static njs_int_t |
| utf8_unit_test(njs_uint_t start) |
| { |
| u_char *p, utf8[4]; |
| size_t len; |
| int32_t n; |
| uint32_t u, d; |
| njs_uint_t i, k, l, m; |
| const u_char *pp; |
| njs_unicode_decode_t ctx; |
| |
| njs_printf("utf8 test started\n"); |
| |
| /* Test valid UTF-8. */ |
| |
| for (u = 0; u <= NJS_UNICODE_MAX_CODEPOINT; u++) { |
| |
| p = njs_utf8_encode(utf8, u); |
| |
| if (p == NULL) { |
| njs_printf("njs_utf8_encode(%05uXD) failed\n", u); |
| return NJS_ERROR; |
| } |
| |
| pp = utf8; |
| |
| njs_utf8_decode_init(&ctx); |
| |
| d = njs_utf8_decode(&ctx, &pp, p); |
| |
| /* In UTF-8 not allowed UTF-16 surrogate pair sequences. */ |
| |
| if (u >= 0xD800 && u <= 0xDFFF) { |
| if (d != NJS_UNICODE_ERROR) { |
| njs_printf("njs_utf8_decode(%05uXD) failed for " |
| "surrogate pair: %05uxD\n", u, d); |
| |
| return NJS_ERROR; |
| } |
| |
| continue; |
| } |
| |
| if (u != d) { |
| njs_printf("njs_utf8_decode(%05uXD) failed: %05uxD\n", u, d); |
| return NJS_ERROR; |
| } |
| } |
| |
| /* Test some invalid UTF-8. */ |
| |
| for (i = 0; i < sizeof(invalid); i += 5) { |
| |
| len = invalid[i]; |
| utf8[0] = invalid[i + 1]; |
| utf8[1] = invalid[i + 2]; |
| utf8[2] = invalid[i + 3]; |
| utf8[3] = invalid[i + 4]; |
| |
| pp = utf8; |
| |
| njs_utf8_decode_init(&ctx); |
| |
| d = njs_utf8_decode(&ctx, &pp, utf8 + len); |
| |
| if (d <= NJS_UNICODE_MAX_CODEPOINT) { |
| |
| u = 0; |
| for (i = 0; i < len; i++) { |
| u = (u << 8) + utf8[i]; |
| } |
| |
| njs_printf("njs_utf8_decode(%05uXD, %uz) failed: %05uXD\n", |
| u, len, d); |
| return NJS_ERROR; |
| } |
| } |
| |
| /* Test all overlong UTF-8. */ |
| |
| for (i = start; i < 256; i++) { |
| utf8[0] = i; |
| |
| if (utf8_overlong(utf8, 1) != NJS_OK) { |
| return NJS_ERROR; |
| } |
| |
| for (k = 0; k < 256; k++) { |
| utf8[1] = k; |
| |
| if (utf8_overlong(utf8, 2) != NJS_OK) { |
| return NJS_ERROR; |
| } |
| |
| for (l = 0; l < 256; l++) { |
| utf8[2] = l; |
| |
| if (utf8_overlong(utf8, 3) != NJS_OK) { |
| return NJS_ERROR; |
| } |
| |
| for (m = 0; m < 256; m++) { |
| utf8[3] = m; |
| |
| if (utf8_overlong(utf8, 4) != NJS_OK) { |
| return NJS_ERROR; |
| } |
| } |
| } |
| } |
| } |
| |
| n = njs_utf8_casecmp((u_char *) "ABC АБВ ΑΒΓ", |
| (u_char *) "abc абв αβγ", |
| njs_length("ABC АБВ ΑΒΓ"), |
| njs_length("abc абв αβγ")); |
| |
| if (n != 0) { |
| njs_printf("njs_utf8_casecmp() failed\n"); |
| return NJS_ERROR; |
| } |
| |
| njs_printf("utf8 test passed\n"); |
| return NJS_OK; |
| } |
| |
| |
| static njs_int_t |
| utf16_unit_test() |
| { |
| int8_t length, length_to; |
| u_char *start, *end, *end_to; |
| uint32_t cp, i; |
| njs_unicode_decode_t ctx; |
| u_char buf[8], to[4]; |
| |
| njs_printf("utf16 test started\n"); |
| |
| end = buf + sizeof(buf); |
| end_to = to + sizeof(to); |
| |
| for (i = 0; i <= NJS_UNICODE_MAX_CODEPOINT; i++) { |
| |
| /* Skip surrogate pair. */ |
| |
| if (i >= 0xD800 && i <= 0xDFFF) { |
| continue; |
| } |
| |
| start = buf; |
| |
| length = njs_utf16_encode(i, &start, end); |
| if (length < NJS_OK) { |
| njs_printf("utf16 test encode failed\n"); |
| return NJS_ERROR; |
| } |
| |
| njs_utf16_decode_init(&ctx); |
| |
| start = buf; |
| |
| cp = njs_utf16_decode(&ctx, (const u_char **) &start, start + length); |
| if (cp > NJS_UNICODE_MAX_CODEPOINT) { |
| njs_printf("utf16 test decode failed\n"); |
| return NJS_ERROR; |
| } |
| |
| if (cp != i) { |
| njs_printf("utf16 test decode code point does not match\n"); |
| return NJS_ERROR; |
| } |
| |
| start = to; |
| |
| length_to = njs_utf16_encode(cp, &start, end_to); |
| if (length_to < NJS_OK) { |
| njs_printf("utf16 test encode failed\n"); |
| return NJS_ERROR; |
| } |
| |
| if (length_to != length || njs_strncmp(buf, to, length) != 0) { |
| njs_printf("utf16 test decode-encode failed\n"); |
| return NJS_ERROR; |
| } |
| } |
| |
| /* Surrogate pair. */ |
| |
| for (i = 0xD800; i <= 0xDFFF; i++) { |
| start = buf; |
| |
| length = njs_utf16_encode(i, &start, end); |
| if (length < NJS_OK) { |
| njs_printf("utf16 test surrogate pair encode lead failed\n"); |
| return NJS_ERROR; |
| } |
| |
| length_to = njs_utf16_encode(i - 0xD800 + 0xDC00, &start, end); |
| if (length_to < NJS_OK) { |
| njs_printf("utf16 test surrogate pair encode failed\n"); |
| return NJS_ERROR; |
| } |
| |
| njs_utf16_decode_init(&ctx); |
| |
| start = buf; |
| |
| cp = njs_utf16_decode(&ctx, (const u_char **) &start, |
| start + length + length_to); |
| if (cp > NJS_UNICODE_MAX_CODEPOINT) { |
| if (i < 0xDC00) { |
| njs_printf("utf16 test surrogate pair decode failed\n"); |
| return NJS_ERROR; |
| } |
| } |
| } |
| |
| njs_printf("utf16 test passed\n"); |
| |
| return NJS_OK; |
| } |
| |
| |
| int |
| main(int argc, char **argv) |
| { |
| njs_int_t ret; |
| njs_uint_t start; |
| |
| njs_printf("unicode unit test started\n"); |
| |
| if (argc > 1 && argv[1][0] == 'a') { |
| start = NJS_UTF8_START_TEST; |
| |
| } else { |
| start = 256; |
| } |
| |
| ret = utf8_unit_test(start); |
| if (ret != NJS_OK) { |
| return ret; |
| } |
| |
| ret = utf16_unit_test(); |
| if (ret != NJS_OK) { |
| return ret; |
| } |
| |
| njs_printf("unicode unit test passed\n"); |
| |
| return 0; |
| } |