| |
| /* |
| * Copyright (C) Igor Sysoev |
| * Copyright (C) Dmitry Volyntsev |
| * Copyright (C) NGINX, Inc. |
| */ |
| |
| |
| #include <njs_main.h> |
| |
| #ifdef NJS_HAVE_PCRE2 |
| |
| #define PCRE2_CODE_UNIT_WIDTH 8 |
| #include <pcre2.h> |
| |
| |
| static const u_char* njs_regex_pcre2_error(int errcode, u_char buffer[128]); |
| |
| #else |
| |
| #include <pcre.h> |
| |
| |
| static void *njs_pcre_malloc(size_t size); |
| static void njs_pcre_free(void *p); |
| |
| |
| static njs_regex_generic_ctx_t *regex_context; |
| |
| #endif |
| |
| |
| njs_regex_generic_ctx_t * |
| njs_regex_generic_ctx_create(njs_pcre_malloc_t private_malloc, |
| njs_pcre_free_t private_free, void *memory_data) |
| { |
| #ifdef NJS_HAVE_PCRE2 |
| |
| return pcre2_general_context_create(private_malloc, private_free, |
| memory_data); |
| #else |
| |
| njs_regex_generic_ctx_t *ctx; |
| |
| ctx = private_malloc(sizeof(njs_regex_generic_ctx_t), memory_data); |
| |
| if (njs_fast_path(ctx != NULL)) { |
| ctx->private_malloc = private_malloc; |
| ctx->private_free = private_free; |
| ctx->memory_data = memory_data; |
| } |
| |
| return ctx; |
| |
| #endif |
| } |
| |
| |
| njs_regex_compile_ctx_t * |
| njs_regex_compile_ctx_create(njs_regex_generic_ctx_t *ctx) |
| { |
| #ifdef NJS_HAVE_PCRE2 |
| pcre2_compile_context *cc; |
| |
| cc = pcre2_compile_context_create(ctx); |
| |
| #ifdef PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES |
| if (njs_fast_path(cc != NULL)) { |
| /* Workaround for surrogate pairs in regular expressions |
| * |
| * This option is needed because njs, unlike the standard ECMAScript, |
| * stores and processes strings in UTF-8 encoding. |
| * PCRE2 does not support surrogate pairs by default when it |
| * is compiled for UTF-8 only strings. But many polyfills |
| * and transpilers use such surrogate pairs expressions. |
| */ |
| pcre2_set_compile_extra_options(cc, |
| PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES); |
| } |
| #endif |
| |
| return cc; |
| |
| #else |
| |
| return ctx; |
| |
| #endif |
| } |
| |
| |
| |
| njs_int_t |
| njs_regex_escape(njs_mp_t *mp, njs_str_t *text) |
| { |
| #ifdef NJS_HAVE_PCRE2 |
| |
| return NJS_OK; |
| |
| #else |
| |
| /* |
| * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with |
| * lone closing square brackets as invalid. Whereas according |
| * to ES6: 11.8.5 it is a valid regexp expression. |
| * |
| * 2) escaping zero byte characters as "\u0000". |
| * |
| * Escaping it here as a workaround. |
| */ |
| |
| size_t brackets, zeros; |
| u_char *p, *dst, *start, *end; |
| njs_bool_t in; |
| |
| start = text->start; |
| end = text->start + text->length; |
| |
| in = 0; |
| zeros = 0; |
| brackets = 0; |
| |
| for (p = start; p < end; p++) { |
| |
| switch (*p) { |
| case '[': |
| in = 1; |
| break; |
| |
| case ']': |
| if (!in) { |
| brackets++; |
| } |
| |
| in = 0; |
| break; |
| |
| case '\\': |
| p++; |
| |
| if (p == end || *p != '\0') { |
| break; |
| } |
| |
| /* Fall through. */ |
| |
| case '\0': |
| zeros++; |
| break; |
| } |
| } |
| |
| if (!brackets && !zeros) { |
| return NJS_OK; |
| } |
| |
| text->length = text->length + brackets + zeros * njs_length("\\u0000"); |
| |
| text->start = njs_mp_alloc(mp, text->length); |
| if (njs_slow_path(text->start == NULL)) { |
| return NJS_ERROR; |
| } |
| |
| in = 0; |
| dst = text->start; |
| |
| for (p = start; p < end; p++) { |
| |
| switch (*p) { |
| case '[': |
| in = 1; |
| break; |
| |
| case ']': |
| if (!in) { |
| *dst++ = '\\'; |
| } |
| |
| in = 0; |
| break; |
| |
| case '\\': |
| *dst++ = *p++; |
| |
| if (p == end) { |
| goto done; |
| } |
| |
| if (*p != '\0') { |
| break; |
| } |
| |
| /* Fall through. */ |
| |
| case '\0': |
| dst = njs_cpymem(dst, "\\u0000", 6); |
| continue; |
| } |
| |
| *dst++ = *p; |
| } |
| |
| done: |
| |
| text->length = dst - text->start; |
| |
| return NJS_OK; |
| |
| #endif |
| } |
| |
| |
| njs_int_t |
| njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len, |
| njs_regex_flags_t flags, njs_regex_compile_ctx_t *cctx, njs_trace_t *trace) |
| { |
| #ifdef NJS_HAVE_PCRE2 |
| |
| int ret; |
| u_char *error; |
| size_t erroff; |
| njs_uint_t options; |
| u_char errstr[128]; |
| |
| options = PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF; |
| |
| if ((flags & NJS_REGEX_IGNORE_CASE)) { |
| options |= PCRE2_CASELESS; |
| } |
| |
| if ((flags & NJS_REGEX_MULTILINE)) { |
| options |= PCRE2_MULTILINE; |
| } |
| |
| if ((flags & NJS_REGEX_STICKY)) { |
| options |= PCRE2_ANCHORED; |
| } |
| |
| if ((flags & NJS_REGEX_UTF8)) { |
| options |= PCRE2_UTF; |
| } |
| |
| regex->code = pcre2_compile(source, len, options, &ret, &erroff, cctx); |
| |
| if (njs_slow_path(regex->code == NULL)) { |
| error = &source[erroff]; |
| |
| njs_alert(trace, NJS_LEVEL_ERROR, |
| "pcre_compile2(\"%s\") failed: %s at \"%s\"", |
| source, njs_regex_pcre2_error(ret, errstr), error); |
| |
| return NJS_DECLINED; |
| } |
| |
| ret = pcre2_pattern_info(regex->code, PCRE2_INFO_CAPTURECOUNT, |
| ®ex->ncaptures); |
| |
| if (njs_slow_path(ret < 0)) { |
| njs_alert(trace, NJS_LEVEL_ERROR, |
| "pcre2_pattern_info(\"%s\", PCRE2_INFO_CAPTURECOUNT) failed: %s", |
| source, njs_regex_pcre2_error(ret, errstr)); |
| |
| return NJS_ERROR; |
| } |
| |
| ret = pcre2_pattern_info(regex->code, PCRE2_INFO_BACKREFMAX, |
| ®ex->backrefmax); |
| |
| if (njs_slow_path(ret < 0)) { |
| njs_alert(trace, NJS_LEVEL_ERROR, |
| "pcre2_pattern_info(\"%s\", PCRE2_INFO_BACKREFMAX) failed: %s", |
| source, njs_regex_pcre2_error(ret, errstr)); |
| |
| return NJS_ERROR; |
| } |
| |
| /* Reserve additional elements for the first "$0" capture. */ |
| regex->ncaptures++; |
| |
| if (regex->ncaptures > 1) { |
| ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMECOUNT, |
| ®ex->nentries); |
| |
| if (njs_slow_path(ret < 0)) { |
| njs_alert(trace, NJS_LEVEL_ERROR, |
| "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMECOUNT) failed: %s", |
| source, njs_regex_pcre2_error(ret, errstr)); |
| |
| return NJS_ERROR; |
| } |
| |
| if (regex->nentries != 0) { |
| ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMEENTRYSIZE, |
| ®ex->entry_size); |
| |
| if (njs_slow_path(ret < 0)) { |
| njs_alert(trace, NJS_LEVEL_ERROR, |
| "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMEENTRYSIZE)" |
| " failed: %s", source, |
| njs_regex_pcre2_error(ret, errstr)); |
| |
| return NJS_ERROR; |
| } |
| |
| ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMETABLE, |
| ®ex->entries); |
| |
| if (njs_slow_path(ret < 0)) { |
| njs_alert(trace, NJS_LEVEL_ERROR, |
| "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMETABLE) " |
| "failed: %s", source, |
| njs_regex_pcre2_error(ret, errstr)); |
| |
| return NJS_ERROR; |
| } |
| } |
| } |
| |
| return NJS_OK; |
| |
| #else |
| |
| int ret, err, erroff; |
| char *pattern, *error; |
| void *(*saved_malloc)(size_t size); |
| void (*saved_free)(void *p); |
| njs_uint_t options; |
| const char *errstr; |
| njs_regex_generic_ctx_t *ctx; |
| |
| ctx = cctx; |
| |
| ret = NJS_ERROR; |
| |
| saved_malloc = pcre_malloc; |
| pcre_malloc = njs_pcre_malloc; |
| saved_free = pcre_free; |
| pcre_free = njs_pcre_free; |
| regex_context = ctx; |
| |
| #ifdef PCRE_JAVASCRIPT_COMPAT |
| /* JavaScript compatibility has been introduced in PCRE-7.7. */ |
| options = PCRE_JAVASCRIPT_COMPAT; |
| #else |
| options = 0; |
| #endif |
| |
| if ((flags & NJS_REGEX_IGNORE_CASE)) { |
| options |= PCRE_CASELESS; |
| } |
| |
| if ((flags & NJS_REGEX_MULTILINE)) { |
| options |= PCRE_MULTILINE; |
| } |
| |
| if ((flags & NJS_REGEX_STICKY)) { |
| options |= PCRE_ANCHORED; |
| } |
| |
| if ((flags & NJS_REGEX_UTF8)) { |
| options |= PCRE_UTF8; |
| } |
| |
| pattern = (char *) source; |
| |
| regex->code = pcre_compile(pattern, options, &errstr, &erroff, NULL); |
| |
| if (njs_slow_path(regex->code == NULL)) { |
| error = pattern + erroff; |
| |
| if (*error != '\0') { |
| njs_alert(trace, NJS_LEVEL_ERROR, |
| "pcre_compile(\"%s\") failed: %s at \"%s\"", |
| pattern, errstr, error); |
| |
| } else { |
| njs_alert(trace, NJS_LEVEL_ERROR, |
| "pcre_compile(\"%s\") failed: %s", pattern, errstr); |
| } |
| |
| ret = NJS_DECLINED; |
| |
| goto done; |
| } |
| |
| regex->extra = pcre_study(regex->code, 0, &errstr); |
| |
| if (njs_slow_path(errstr != NULL)) { |
| njs_alert(trace, NJS_LEVEL_WARN, |
| "pcre_study(\"%s\") failed: %s", pattern, errstr); |
| } |
| |
| err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_CAPTURECOUNT, |
| ®ex->ncaptures); |
| |
| if (njs_slow_path(err < 0)) { |
| njs_alert(trace, NJS_LEVEL_ERROR, |
| "pcre_fullinfo(\"%s\", PCRE_INFO_CAPTURECOUNT) failed: %d", |
| pattern, err); |
| |
| goto done; |
| } |
| |
| err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_BACKREFMAX, |
| ®ex->backrefmax); |
| |
| if (njs_slow_path(err < 0)) { |
| njs_alert(trace, NJS_LEVEL_ERROR, |
| "pcre_fullinfo(\"%s\", PCRE_INFO_BACKREFMAX) failed: %d", |
| pattern, err); |
| |
| goto done; |
| } |
| |
| /* Reserve additional elements for the first "$0" capture. */ |
| regex->ncaptures++; |
| |
| if (regex->ncaptures > 1) { |
| err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMECOUNT, |
| ®ex->nentries); |
| |
| if (njs_slow_path(err < 0)) { |
| njs_alert(trace, NJS_LEVEL_ERROR, |
| "pcre_fullinfo(\"%s\", PCRE_INFO_NAMECOUNT) failed: %d", |
| pattern, err); |
| |
| goto done; |
| } |
| |
| if (regex->nentries != 0) { |
| err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMEENTRYSIZE, |
| ®ex->entry_size); |
| |
| if (njs_slow_path(err < 0)) { |
| njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", " |
| "PCRE_INFO_NAMEENTRYSIZE) failed: %d", pattern, err); |
| |
| goto done; |
| } |
| |
| err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMETABLE, |
| ®ex->entries); |
| |
| if (njs_slow_path(err < 0)) { |
| njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", " |
| "PCRE_INFO_NAMETABLE) failed: %d", pattern, err); |
| |
| goto done; |
| } |
| } |
| } |
| |
| ret = NJS_OK; |
| |
| done: |
| |
| pcre_malloc = saved_malloc; |
| pcre_free = saved_free; |
| regex_context = NULL; |
| |
| return ret; |
| |
| #endif |
| } |
| |
| |
| njs_bool_t |
| njs_regex_is_valid(njs_regex_t *regex) |
| { |
| return (regex->code != NULL); |
| } |
| |
| |
| njs_int_t |
| njs_regex_named_captures(njs_regex_t *regex, njs_str_t *name, int n) |
| { |
| char *entry; |
| |
| if (name == NULL) { |
| return regex->nentries; |
| } |
| |
| if (n >= regex->nentries) { |
| return NJS_ERROR; |
| } |
| |
| entry = regex->entries + regex->entry_size * n; |
| |
| name->start = (u_char *) entry + 2; |
| name->length = njs_strlen(name->start); |
| |
| return (entry[0] << 8) + entry[1]; |
| } |
| |
| |
| njs_regex_match_data_t * |
| njs_regex_match_data(njs_regex_t *regex, njs_regex_generic_ctx_t *ctx) |
| { |
| #ifdef NJS_HAVE_PCRE2 |
| |
| if (regex != NULL) { |
| return pcre2_match_data_create_from_pattern(regex->code, ctx); |
| } |
| |
| return pcre2_match_data_create(0, ctx); |
| |
| #else |
| |
| size_t size; |
| njs_uint_t ncaptures; |
| njs_regex_match_data_t *match_data; |
| |
| if (regex != NULL) { |
| ncaptures = regex->ncaptures - 1; |
| |
| } else { |
| ncaptures = 0; |
| } |
| |
| /* Each capture is stored in 3 "int" vector elements. */ |
| ncaptures *= 3; |
| size = sizeof(njs_regex_match_data_t) + ncaptures * sizeof(int); |
| |
| match_data = ctx->private_malloc(size, ctx->memory_data); |
| |
| if (njs_fast_path(match_data != NULL)) { |
| match_data->ncaptures = ncaptures + 3; |
| } |
| |
| return match_data; |
| |
| #endif |
| } |
| |
| |
| void |
| njs_regex_match_data_free(njs_regex_match_data_t *match_data, |
| njs_regex_generic_ctx_t *ctx) |
| { |
| #ifdef NJS_HAVE_PCRE2 |
| |
| pcre2_match_data_free(match_data); |
| |
| #else |
| |
| ctx->private_free(match_data, ctx->memory_data); |
| |
| #endif |
| } |
| |
| |
| njs_int_t |
| njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t off, |
| size_t len, njs_regex_match_data_t *match_data, njs_trace_t *trace) |
| { |
| #ifdef NJS_HAVE_PCRE2 |
| |
| int ret; |
| u_char errstr[128]; |
| |
| ret = pcre2_match(regex->code, subject, len, off, 0, match_data, NULL); |
| |
| if (ret < 0) { |
| if (ret == PCRE2_ERROR_NOMATCH) { |
| return NJS_DECLINED; |
| } |
| |
| njs_alert(trace, NJS_LEVEL_ERROR, "pcre2_match() failed: %s", |
| njs_regex_pcre2_error(ret, errstr)); |
| return NJS_ERROR; |
| } |
| |
| return ret; |
| |
| #else |
| |
| int ret; |
| |
| ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len, |
| off, 0, match_data->captures, match_data->ncaptures); |
| |
| if (ret <= PCRE_ERROR_NOMATCH) { |
| if (ret == PCRE_ERROR_NOMATCH) { |
| return NJS_DECLINED; |
| } |
| |
| njs_alert(trace, NJS_LEVEL_ERROR, "pcre_exec() failed: %d", ret); |
| return NJS_ERROR; |
| } |
| |
| return ret; |
| |
| #endif |
| } |
| |
| |
| size_t |
| njs_regex_capture(njs_regex_match_data_t *match_data, njs_uint_t n) |
| { |
| #ifdef NJS_HAVE_PCRE2 |
| |
| size_t c; |
| |
| c = pcre2_get_ovector_pointer(match_data)[n]; |
| |
| if (c == PCRE2_UNSET) { |
| return NJS_REGEX_UNSET; |
| } |
| |
| return c; |
| |
| #else |
| |
| return match_data->captures[n]; |
| |
| #endif |
| } |
| |
| #ifdef NJS_HAVE_PCRE2 |
| |
| static const u_char * |
| njs_regex_pcre2_error(int errcode, u_char buffer[128]) |
| { |
| pcre2_get_error_message(errcode, buffer, 128); |
| |
| return buffer; |
| } |
| |
| #else |
| |
| static void * |
| njs_pcre_malloc(size_t size) |
| { |
| return regex_context->private_malloc(size, regex_context->memory_data); |
| } |
| |
| |
| static void |
| njs_pcre_free(void *p) |
| { |
| regex_context->private_free(p, regex_context->memory_data); |
| } |
| |
| #endif |
| |
| |