| |
| /* |
| * Copyright (C) Igor Sysoev |
| * Copyright (C) NGINX, Inc. |
| */ |
| |
| |
| #include <njs_main.h> |
| |
| |
| struct njs_regexp_group_s { |
| njs_str_t name; |
| uint32_t hash; |
| uint32_t capture; |
| }; |
| |
| |
| static void *njs_regexp_malloc(size_t size, void *memory_data); |
| static void njs_regexp_free(void *p, void *memory_data); |
| static njs_int_t njs_regexp_prototype_source(njs_vm_t *vm, |
| njs_object_prop_t *prop, njs_value_t *value, njs_value_t *setval, |
| njs_value_t *retval); |
| static int njs_regexp_pattern_compile(njs_vm_t *vm, njs_regex_t *regex, |
| u_char *source, int options); |
| static u_char *njs_regexp_compile_trace_handler(njs_trace_t *trace, |
| njs_trace_data_t *td, u_char *start); |
| static u_char *njs_regexp_match_trace_handler(njs_trace_t *trace, |
| njs_trace_data_t *td, u_char *start); |
| static njs_array_t *njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, |
| njs_regexp_utf8_t type, njs_string_prop_t *string, |
| njs_regex_match_data_t *data); |
| static njs_int_t njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value, |
| u_char *start, uint32_t size, int32_t length); |
| |
| |
| njs_int_t |
| njs_regexp_init(njs_vm_t *vm) |
| { |
| vm->regex_context = njs_regex_context_create(njs_regexp_malloc, |
| njs_regexp_free, vm->mem_pool); |
| if (njs_slow_path(vm->regex_context == NULL)) { |
| njs_memory_error(vm); |
| return NJS_ERROR; |
| } |
| |
| vm->single_match_data = njs_regex_match_data(NULL, vm->regex_context); |
| if (njs_slow_path(vm->single_match_data == NULL)) { |
| njs_memory_error(vm); |
| return NJS_ERROR; |
| } |
| |
| vm->regex_context->trace = &vm->trace; |
| |
| return NJS_OK; |
| } |
| |
| |
| static void * |
| njs_regexp_malloc(size_t size, void *memory_data) |
| { |
| return njs_mp_alloc(memory_data, size); |
| } |
| |
| |
| static void |
| njs_regexp_free(void *p, void *memory_data) |
| { |
| njs_mp_free(memory_data, p); |
| } |
| |
| |
| static njs_regexp_flags_t |
| njs_regexp_value_flags(njs_vm_t *vm, const njs_value_t *regexp) |
| { |
| njs_regexp_flags_t flags; |
| njs_regexp_pattern_t *pattern; |
| |
| flags = 0; |
| |
| pattern = njs_regexp_pattern(regexp); |
| |
| if (pattern->global) { |
| flags |= NJS_REGEXP_GLOBAL; |
| } |
| |
| if (pattern->ignore_case) { |
| flags |= NJS_REGEXP_IGNORE_CASE; |
| } |
| |
| if (pattern->multiline) { |
| flags |= NJS_REGEXP_MULTILINE; |
| } |
| |
| return flags; |
| } |
| |
| |
| static njs_int_t |
| njs_regexp_constructor(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, |
| njs_index_t unused) |
| { |
| u_char *start; |
| njs_int_t ret; |
| njs_str_t string; |
| njs_value_t source, *pattern, *flags; |
| njs_regexp_flags_t re_flags; |
| |
| pattern = njs_arg(args, nargs, 1); |
| |
| if (njs_is_regexp(pattern)) { |
| ret = njs_regexp_prototype_source(vm, NULL, pattern, NULL, &source); |
| if (njs_slow_path(ret != NJS_OK)) { |
| return ret; |
| } |
| |
| re_flags = njs_regexp_value_flags(vm, pattern); |
| |
| pattern = &source; |
| |
| } else { |
| if (njs_is_defined(pattern)) { |
| ret = njs_value_to_string(vm, pattern, pattern); |
| if (njs_slow_path(ret != NJS_OK)) { |
| return ret; |
| } |
| |
| } else { |
| pattern = njs_value_arg(&njs_string_empty); |
| } |
| |
| re_flags = 0; |
| } |
| |
| flags = njs_arg(args, nargs, 2); |
| |
| if (njs_is_defined(flags)) { |
| ret = njs_value_to_string(vm, flags, flags); |
| if (njs_slow_path(ret != NJS_OK)) { |
| return ret; |
| } |
| |
| njs_string_get(flags, &string); |
| |
| start = string.start; |
| |
| re_flags = njs_regexp_flags(&start, start + string.length); |
| if (njs_slow_path(re_flags < 0 |
| || (size_t) (start - string.start) != string.length)) |
| { |
| njs_syntax_error(vm, "Invalid RegExp flags \"%V\"", &string); |
| return NJS_ERROR; |
| } |
| } |
| |
| njs_string_get(pattern, &string); |
| |
| return njs_regexp_create(vm, &vm->retval, string.start, string.length, |
| re_flags); |
| } |
| |
| |
| njs_int_t |
| njs_regexp_create(njs_vm_t *vm, njs_value_t *value, u_char *start, |
| size_t length, njs_regexp_flags_t flags) |
| { |
| njs_regexp_t *regexp; |
| njs_regexp_pattern_t *pattern; |
| |
| if (length != 0 || flags != 0) { |
| if (length == 0) { |
| start = (u_char *) "(?:)"; |
| length = njs_length("(?:)"); |
| } |
| |
| pattern = njs_regexp_pattern_create(vm, start, length, flags); |
| if (njs_slow_path(pattern == NULL)) { |
| return NJS_ERROR; |
| } |
| |
| } else { |
| pattern = vm->shared->empty_regexp_pattern; |
| } |
| |
| regexp = njs_regexp_alloc(vm, pattern); |
| |
| if (njs_fast_path(regexp != NULL)) { |
| njs_set_regexp(value, regexp); |
| |
| return NJS_OK; |
| } |
| |
| return NJS_ERROR; |
| } |
| |
| |
| /* |
| * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with |
| * lone closing square brackets as invalid. Whereas according |
| * to ES6: 11.8.5 it is a valid regexp expression. |
| * |
| * 2) escaping zero byte characters as "\u0000". |
| * |
| * Escaping it here as a workaround. |
| */ |
| |
| njs_inline njs_int_t |
| njs_regexp_escape(njs_vm_t *vm, njs_str_t *text) |
| { |
| size_t brackets, zeros; |
| u_char *p, *dst, *start, *end; |
| njs_bool_t in; |
| |
| start = text->start; |
| end = text->start + text->length; |
| |
| in = 0; |
| zeros = 0; |
| brackets = 0; |
| |
| for (p = start; p < end; p++) { |
| |
| switch (*p) { |
| case '[': |
| in = 1; |
| break; |
| |
| case ']': |
| if (!in) { |
| brackets++; |
| } |
| |
| in = 0; |
| break; |
| |
| case '\\': |
| p++; |
| |
| if (p == end || *p != '\0') { |
| break; |
| } |
| |
| /* Fall through. */ |
| |
| case '\0': |
| zeros++; |
| break; |
| } |
| } |
| |
| if (!brackets && !zeros) { |
| return NJS_OK; |
| } |
| |
| text->length = text->length + brackets + zeros * njs_length("\\u0000"); |
| |
| text->start = njs_mp_alloc(vm->mem_pool, text->length); |
| if (njs_slow_path(text->start == NULL)) { |
| njs_memory_error(vm); |
| return NJS_ERROR; |
| } |
| |
| in = 0; |
| dst = text->start; |
| |
| for (p = start; p < end; p++) { |
| |
| switch (*p) { |
| case '[': |
| in = 1; |
| break; |
| |
| case ']': |
| if (!in) { |
| *dst++ = '\\'; |
| } |
| |
| in = 0; |
| break; |
| |
| case '\\': |
| *dst++ = *p++; |
| |
| if (p == end) { |
| goto done; |
| } |
| |
| if (*p != '\0') { |
| break; |
| } |
| |
| /* Fall through. */ |
| |
| case '\0': |
| dst = njs_cpymem(dst, "\\u0000", 6); |
| continue; |
| } |
| |
| *dst++ = *p; |
| } |
| |
| done: |
| |
| text->length = dst - text->start; |
| |
| return NJS_OK; |
| } |
| |
| |
| njs_regexp_flags_t |
| njs_regexp_flags(u_char **start, u_char *end) |
| { |
| u_char *p; |
| njs_regexp_flags_t flags, flag; |
| |
| flags = NJS_REGEXP_NO_FLAGS; |
| |
| for (p = *start; p < end; p++) { |
| switch (*p) { |
| case 'g': |
| flag = NJS_REGEXP_GLOBAL; |
| break; |
| |
| case 'i': |
| flag = NJS_REGEXP_IGNORE_CASE; |
| break; |
| |
| case 'm': |
| flag = NJS_REGEXP_MULTILINE; |
| break; |
| |
| default: |
| if (*p >= 'a' && *p <= 'z') { |
| goto invalid; |
| } |
| |
| goto done; |
| } |
| |
| if (njs_slow_path((flags & flag) != 0)) { |
| goto invalid; |
| } |
| |
| flags |= flag; |
| } |
| |
| done: |
| |
| *start = p; |
| |
| return flags; |
| |
| invalid: |
| |
| *start = p + 1; |
| |
| return NJS_REGEXP_INVALID_FLAG; |
| } |
| |
| |
| njs_regexp_pattern_t * |
| njs_regexp_pattern_create(njs_vm_t *vm, u_char *start, size_t length, |
| njs_regexp_flags_t flags) |
| { |
| int options, ret; |
| u_char *p, *end; |
| size_t size; |
| njs_str_t text; |
| njs_uint_t n; |
| njs_regex_t *regex; |
| njs_regexp_group_t *group; |
| njs_regexp_pattern_t *pattern; |
| |
| size = 1; /* A trailing "/". */ |
| size += ((flags & NJS_REGEXP_GLOBAL) != 0); |
| size += ((flags & NJS_REGEXP_IGNORE_CASE) != 0); |
| size += ((flags & NJS_REGEXP_MULTILINE) != 0); |
| |
| text.start = start; |
| text.length = length; |
| |
| ret = njs_regexp_escape(vm, &text); |
| if (njs_slow_path(ret != NJS_OK)) { |
| return NULL; |
| } |
| |
| pattern = njs_mp_zalloc(vm->mem_pool, sizeof(njs_regexp_pattern_t) + 1 |
| + text.length + size + 1); |
| if (njs_slow_path(pattern == NULL)) { |
| njs_memory_error(vm); |
| return NULL; |
| } |
| |
| pattern->flags = size; |
| |
| p = (u_char *) pattern + sizeof(njs_regexp_pattern_t); |
| pattern->source = p; |
| |
| *p++ = '/'; |
| p = memcpy(p, text.start, text.length); |
| p += text.length; |
| end = p; |
| *p++ = '\0'; |
| |
| pattern->global = ((flags & NJS_REGEXP_GLOBAL) != 0); |
| if (pattern->global) { |
| *p++ = 'g'; |
| } |
| |
| #ifdef PCRE_JAVASCRIPT_COMPAT |
| /* JavaScript compatibility has been introduced in PCRE-7.7. */ |
| options = PCRE_JAVASCRIPT_COMPAT; |
| #else |
| options = 0; |
| #endif |
| |
| pattern->ignore_case = ((flags & NJS_REGEXP_IGNORE_CASE) != 0); |
| if (pattern->ignore_case) { |
| *p++ = 'i'; |
| options |= PCRE_CASELESS; |
| } |
| |
| pattern->multiline = ((flags & NJS_REGEXP_MULTILINE) != 0); |
| if (pattern->multiline) { |
| *p++ = 'm'; |
| options |= PCRE_MULTILINE; |
| } |
| |
| *p++ = '\0'; |
| |
| ret = njs_regexp_pattern_compile(vm, &pattern->regex[0], |
| &pattern->source[1], options); |
| |
| if (njs_fast_path(ret >= 0)) { |
| pattern->ncaptures = ret; |
| |
| } else if (ret < 0 && ret != NJS_DECLINED) { |
| goto fail; |
| } |
| |
| ret = njs_regexp_pattern_compile(vm, &pattern->regex[1], |
| &pattern->source[1], options | PCRE_UTF8); |
| if (njs_fast_path(ret >= 0)) { |
| |
| if (njs_slow_path(njs_regex_is_valid(&pattern->regex[0]) |
| && (u_int) ret != pattern->ncaptures)) |
| { |
| njs_internal_error(vm, "regexp pattern compile failed"); |
| goto fail; |
| } |
| |
| pattern->ncaptures = ret; |
| |
| } else if (ret != NJS_DECLINED) { |
| goto fail; |
| } |
| |
| if (njs_regex_is_valid(&pattern->regex[0])) { |
| regex = &pattern->regex[0]; |
| |
| } else if (njs_regex_is_valid(&pattern->regex[1])) { |
| regex = &pattern->regex[1]; |
| |
| } else { |
| goto fail; |
| } |
| |
| *end = '/'; |
| |
| pattern->ngroups = njs_regex_named_captures(regex, NULL, 0); |
| |
| if (pattern->ngroups != 0) { |
| size = sizeof(njs_regexp_group_t) * pattern->ngroups; |
| |
| pattern->groups = njs_mp_alloc(vm->mem_pool, size); |
| if (njs_slow_path(pattern->groups == NULL)) { |
| njs_memory_error(vm); |
| return NULL; |
| } |
| |
| n = 0; |
| |
| do { |
| group = &pattern->groups[n]; |
| |
| group->capture = njs_regex_named_captures(regex, &group->name, n); |
| group->hash = njs_djb_hash(group->name.start, group->name.length); |
| |
| n++; |
| |
| } while (n != pattern->ngroups); |
| } |
| |
| njs_set_undefined(&vm->retval); |
| |
| return pattern; |
| |
| fail: |
| |
| njs_mp_free(vm->mem_pool, pattern); |
| return NULL; |
| } |
| |
| |
| static int |
| njs_regexp_pattern_compile(njs_vm_t *vm, njs_regex_t *regex, u_char *source, |
| int options) |
| { |
| njs_int_t ret; |
| njs_trace_handler_t handler; |
| |
| handler = vm->trace.handler; |
| vm->trace.handler = njs_regexp_compile_trace_handler; |
| |
| /* Zero length means a zero-terminated string. */ |
| ret = njs_regex_compile(regex, source, 0, options, vm->regex_context); |
| |
| vm->trace.handler = handler; |
| |
| if (njs_fast_path(ret == NJS_OK)) { |
| return regex->ncaptures; |
| } |
| |
| return ret; |
| } |
| |
| |
| static u_char * |
| njs_regexp_compile_trace_handler(njs_trace_t *trace, njs_trace_data_t *td, |
| u_char *start) |
| { |
| u_char *p; |
| njs_vm_t *vm; |
| |
| vm = trace->data; |
| |
| trace = trace->next; |
| p = trace->handler(trace, td, start); |
| |
| if (vm->parser != NULL && vm->parser->lexer != NULL) { |
| njs_syntax_error(vm, "%*s in %uD", p - start, start, |
| vm->parser->lexer->line); |
| |
| } else { |
| njs_syntax_error(vm, "%*s", p - start, start); |
| } |
| |
| return p; |
| } |
| |
| |
| njs_int_t |
| njs_regexp_match(njs_vm_t *vm, njs_regex_t *regex, const u_char *subject, |
| size_t off, size_t len, njs_regex_match_data_t *match_data) |
| { |
| njs_int_t ret; |
| njs_trace_handler_t handler; |
| |
| handler = vm->trace.handler; |
| vm->trace.handler = njs_regexp_match_trace_handler; |
| |
| ret = njs_regex_match(regex, subject, off, len, match_data, |
| vm->regex_context); |
| |
| vm->trace.handler = handler; |
| |
| return ret; |
| } |
| |
| |
| static u_char * |
| njs_regexp_match_trace_handler(njs_trace_t *trace, njs_trace_data_t *td, |
| u_char *start) |
| { |
| u_char *p; |
| njs_vm_t *vm; |
| |
| vm = trace->data; |
| |
| trace = trace->next; |
| p = trace->handler(trace, td, start); |
| |
| njs_internal_error(vm, (const char *) start); |
| |
| return p; |
| } |
| |
| |
| njs_regexp_t * |
| njs_regexp_alloc(njs_vm_t *vm, njs_regexp_pattern_t *pattern) |
| { |
| njs_regexp_t *regexp; |
| |
| regexp = njs_mp_alloc(vm->mem_pool, sizeof(njs_regexp_t)); |
| |
| if (njs_fast_path(regexp != NULL)) { |
| njs_lvlhsh_init(®exp->object.hash); |
| regexp->object.shared_hash = vm->shared->regexp_instance_hash; |
| regexp->object.__proto__ = &vm->prototypes[NJS_OBJ_TYPE_REGEXP].object; |
| regexp->object.slots = NULL; |
| regexp->object.type = NJS_REGEXP; |
| regexp->object.shared = 0; |
| regexp->object.extensible = 1; |
| regexp->object.fast_array = 0; |
| regexp->object.error_data = 0; |
| njs_set_number(®exp->last_index, 0); |
| regexp->pattern = pattern; |
| njs_string_short_set(®exp->string, 0, 0); |
| return regexp; |
| } |
| |
| njs_memory_error(vm); |
| |
| return NULL; |
| } |
| |
| |
| static njs_int_t |
| njs_regexp_prototype_last_index(njs_vm_t *vm, njs_object_prop_t *unused, |
| njs_value_t *value, njs_value_t *setval, njs_value_t *retval) |
| { |
| njs_regexp_t *regexp; |
| |
| regexp = njs_object_proto_lookup(njs_object(value), NJS_REGEXP, |
| njs_regexp_t); |
| if (njs_slow_path(regexp == NULL)) { |
| njs_set_undefined(retval); |
| return NJS_DECLINED; |
| } |
| |
| if (setval != NULL) { |
| regexp->last_index = *setval; |
| *retval = *setval; |
| |
| return NJS_OK; |
| } |
| |
| *retval = regexp->last_index; |
| return NJS_OK; |
| } |
| |
| |
| static njs_int_t |
| njs_regexp_prototype_global(njs_vm_t *vm, njs_object_prop_t *prop, |
| njs_value_t *value, njs_value_t *setval, njs_value_t *retval) |
| { |
| njs_regexp_pattern_t *pattern; |
| |
| pattern = njs_regexp_pattern(value); |
| *retval = pattern->global ? njs_value_true : njs_value_false; |
| njs_release(vm, value); |
| |
| return NJS_OK; |
| } |
| |
| |
| static njs_int_t |
| njs_regexp_prototype_ignore_case(njs_vm_t *vm, njs_object_prop_t *prop, |
| njs_value_t *value, njs_value_t *setval, njs_value_t *retval) |
| { |
| njs_regexp_pattern_t *pattern; |
| |
| pattern = njs_regexp_pattern(value); |
| *retval = pattern->ignore_case ? njs_value_true : njs_value_false; |
| njs_release(vm, value); |
| |
| return NJS_OK; |
| } |
| |
| |
| static njs_int_t |
| njs_regexp_prototype_multiline(njs_vm_t *vm, njs_object_prop_t *prop, |
| njs_value_t *value, njs_value_t *setval, njs_value_t *retval) |
| { |
| njs_regexp_pattern_t *pattern; |
| |
| pattern = njs_regexp_pattern(value); |
| *retval = pattern->multiline ? njs_value_true : njs_value_false; |
| njs_release(vm, value); |
| |
| return NJS_OK; |
| } |
| |
| |
| static njs_int_t |
| njs_regexp_prototype_source(njs_vm_t *vm, njs_object_prop_t *prop, |
| njs_value_t *value, njs_value_t *setval, njs_value_t *retval) |
| { |
| u_char *source; |
| int32_t length; |
| uint32_t size; |
| njs_regexp_pattern_t *pattern; |
| |
| pattern = njs_regexp_pattern(value); |
| /* Skip starting "/". */ |
| source = pattern->source + 1; |
| |
| size = njs_strlen(source) - pattern->flags; |
| length = njs_utf8_length(source, size); |
| |
| return njs_regexp_string_create(vm, retval, source, size, length); |
| } |
| |
| |
| static njs_int_t |
| njs_regexp_prototype_to_string(njs_vm_t *vm, njs_value_t *args, |
| njs_uint_t nargs, njs_index_t unused) |
| { |
| if (njs_is_regexp(njs_arg(args, nargs, 0))) { |
| return njs_regexp_to_string(vm, &vm->retval, &args[0]); |
| } |
| |
| njs_type_error(vm, "\"this\" argument is not a regexp"); |
| |
| return NJS_ERROR; |
| } |
| |
| |
| njs_int_t |
| njs_regexp_to_string(njs_vm_t *vm, njs_value_t *retval, |
| const njs_value_t *value) |
| { |
| u_char *source; |
| int32_t length; |
| uint32_t size; |
| njs_regexp_pattern_t *pattern; |
| |
| pattern = njs_regexp_pattern(value); |
| source = pattern->source; |
| |
| size = njs_strlen(source); |
| length = njs_utf8_length(source, size); |
| |
| return njs_regexp_string_create(vm, retval, source, size, length); |
| } |
| |
| |
| static njs_int_t |
| njs_regexp_prototype_test(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, |
| njs_index_t unused) |
| { |
| int *captures; |
| int64_t last_index; |
| njs_int_t ret, match; |
| njs_uint_t n; |
| njs_regex_t *regex; |
| njs_regexp_t *regexp; |
| njs_value_t *value, lvalue; |
| const njs_value_t *retval; |
| njs_string_prop_t string; |
| njs_regexp_pattern_t *pattern; |
| njs_regex_match_data_t *match_data; |
| |
| if (!njs_is_regexp(njs_arg(args, nargs, 0))) { |
| njs_type_error(vm, "\"this\" argument is not a regexp"); |
| return NJS_ERROR; |
| } |
| |
| retval = &njs_value_false; |
| |
| value = njs_lvalue_arg(&lvalue, args, nargs, 1); |
| |
| if (!njs_is_string(value)) { |
| ret = njs_value_to_string(vm, value, value); |
| if (njs_slow_path(ret != NJS_OK)) { |
| return ret; |
| } |
| } |
| |
| (void) njs_string_prop(&string, value); |
| |
| n = (string.length != 0); |
| |
| regexp = njs_regexp(njs_argument(args, 0)); |
| pattern = njs_regexp_pattern(&args[0]); |
| |
| regex = &pattern->regex[n]; |
| match_data = vm->single_match_data; |
| |
| if (njs_regex_is_valid(regex)) { |
| if (njs_regex_backrefs(regex) != 0) { |
| match_data = njs_regex_match_data(regex, vm->regex_context); |
| if (njs_slow_path(match_data == NULL)) { |
| njs_memory_error(vm); |
| return NJS_ERROR; |
| } |
| } |
| |
| match = njs_regexp_match(vm, regex, string.start, 0, string.size, |
| match_data); |
| if (match >= 0) { |
| retval = &njs_value_true; |
| |
| } else if (match != NJS_REGEX_NOMATCH) { |
| ret = NJS_ERROR; |
| goto done; |
| } |
| |
| if (pattern->global) { |
| ret = njs_value_to_length(vm, ®exp->last_index, &last_index); |
| if (njs_slow_path(ret != NJS_OK)) { |
| return NJS_ERROR; |
| } |
| |
| if (match >= 0) { |
| captures = njs_regex_captures(match_data); |
| last_index += captures[1]; |
| |
| } else { |
| last_index = 0; |
| } |
| |
| njs_set_number(®exp->last_index, last_index); |
| } |
| } |
| |
| ret = NJS_OK; |
| |
| vm->retval = *retval; |
| |
| done: |
| |
| if (match_data != vm->single_match_data) { |
| njs_regex_match_data_free(match_data, vm->regex_context); |
| } |
| |
| return ret; |
| } |
| |
| |
| /** |
| * TODO: sticky, unicode flags. |
| */ |
| static njs_int_t |
| njs_regexp_builtin_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, |
| njs_value_t *retval) |
| { |
| size_t length, offset; |
| int64_t last_index; |
| njs_int_t ret; |
| njs_array_t *result; |
| njs_regexp_t *regexp; |
| njs_string_prop_t string; |
| njs_regexp_utf8_t type; |
| njs_regexp_pattern_t *pattern; |
| njs_regex_match_data_t *match_data; |
| |
| regexp = njs_regexp(r); |
| regexp->string = *s; |
| pattern = regexp->pattern; |
| |
| ret = njs_value_to_length(vm, ®exp->last_index, &last_index); |
| if (njs_slow_path(ret != NJS_OK)) { |
| return NJS_ERROR; |
| } |
| |
| if (!pattern->global) { |
| last_index = 0; |
| } |
| |
| length = njs_string_prop(&string, s); |
| |
| if (njs_slow_path((size_t) last_index > length)) { |
| goto not_found; |
| } |
| |
| type = NJS_REGEXP_BYTE; |
| |
| if (length != string.size) { |
| /* UTF-8 string. */ |
| type = NJS_REGEXP_UTF8; |
| } |
| |
| pattern = regexp->pattern; |
| |
| if (njs_slow_path(!njs_regex_is_valid(&pattern->regex[type]))) { |
| goto not_found; |
| } |
| |
| match_data = njs_regex_match_data(&pattern->regex[type], vm->regex_context); |
| if (njs_slow_path(match_data == NULL)) { |
| njs_memory_error(vm); |
| return NJS_ERROR; |
| } |
| |
| if (type != NJS_REGEXP_UTF8) { |
| offset = last_index; |
| |
| } else { |
| /* UTF-8 string. */ |
| offset = njs_string_offset(string.start, string.start + string.size, |
| last_index) - string.start; |
| } |
| |
| ret = njs_regexp_match(vm, &pattern->regex[type], string.start, offset, |
| string.size, match_data); |
| if (ret >= 0) { |
| result = njs_regexp_exec_result(vm, regexp, type, &string, match_data); |
| if (njs_slow_path(result == NULL)) { |
| return NJS_ERROR; |
| } |
| |
| njs_set_array(retval, result); |
| return NJS_OK; |
| } |
| |
| if (njs_slow_path(ret != NJS_REGEX_NOMATCH)) { |
| njs_regex_match_data_free(match_data, vm->regex_context); |
| |
| return NJS_ERROR; |
| } |
| |
| not_found: |
| |
| if (pattern->global) { |
| njs_set_number(®exp->last_index, 0); |
| } |
| |
| njs_set_null(retval); |
| |
| return NJS_OK; |
| } |
| |
| |
| static njs_array_t * |
| njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, |
| njs_regexp_utf8_t type, njs_string_prop_t *string, |
| njs_regex_match_data_t *match_data) |
| { |
| int *captures; |
| u_char *start; |
| int32_t size, length; |
| uint32_t index; |
| njs_int_t ret; |
| njs_uint_t i, n; |
| njs_array_t *array; |
| njs_value_t name; |
| njs_object_t *groups; |
| njs_object_prop_t *prop; |
| njs_regexp_group_t *group; |
| njs_lvlhsh_query_t lhq; |
| njs_regexp_pattern_t *pattern; |
| |
| static const njs_value_t string_index = njs_string("index"); |
| static const njs_value_t string_input = njs_string("input"); |
| static const njs_value_t string_groups = njs_string("groups"); |
| |
| pattern = regexp->pattern; |
| array = njs_array_alloc(vm, 0, pattern->ncaptures, 0); |
| if (njs_slow_path(array == NULL)) { |
| goto fail; |
| } |
| |
| captures = njs_regex_captures(match_data); |
| |
| for (i = 0; i < pattern->ncaptures; i++) { |
| n = 2 * i; |
| |
| if (captures[n] != -1) { |
| start = &string->start[captures[n]]; |
| size = captures[n + 1] - captures[n]; |
| |
| if (type == NJS_REGEXP_UTF8) { |
| length = njs_max(njs_utf8_length(start, size), 0); |
| |
| } else { |
| length = size; |
| } |
| |
| ret = njs_regexp_string_create(vm, &array->start[i], start, size, |
| length); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto fail; |
| } |
| |
| } else { |
| njs_set_undefined(&array->start[i]); |
| } |
| } |
| |
| /* FIXME: implement fast CreateDataPropertyOrThrow(). */ |
| prop = njs_object_prop_alloc(vm, &string_index, &njs_value_undefined, 1); |
| if (njs_slow_path(prop == NULL)) { |
| goto fail; |
| } |
| |
| if (type == NJS_REGEXP_UTF8) { |
| index = njs_string_index(string, captures[0]); |
| |
| } else { |
| index = captures[0]; |
| } |
| |
| njs_set_number(&prop->value, index); |
| |
| if (pattern->global) { |
| if (type == NJS_REGEXP_UTF8) { |
| index = njs_string_index(string, captures[1]); |
| |
| } else { |
| index = captures[1]; |
| } |
| |
| njs_set_number(®exp->last_index, index); |
| } |
| |
| lhq.key_hash = NJS_INDEX_HASH; |
| lhq.key = njs_str_value("index"); |
| lhq.replace = 0; |
| lhq.value = prop; |
| lhq.pool = vm->mem_pool; |
| lhq.proto = &njs_object_hash_proto; |
| |
| ret = njs_lvlhsh_insert(&array->object.hash, &lhq); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto insert_fail; |
| } |
| |
| prop = njs_object_prop_alloc(vm, &string_input, ®exp->string, 1); |
| if (njs_slow_path(prop == NULL)) { |
| goto fail; |
| } |
| |
| lhq.key_hash = NJS_INPUT_HASH; |
| lhq.key = njs_str_value("input"); |
| lhq.value = prop; |
| |
| ret = njs_lvlhsh_insert(&array->object.hash, &lhq); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto insert_fail; |
| } |
| |
| prop = njs_object_prop_alloc(vm, &string_groups, &njs_value_undefined, 1); |
| if (njs_slow_path(prop == NULL)) { |
| goto fail; |
| } |
| |
| lhq.key_hash = NJS_GROUPS_HASH; |
| lhq.key = njs_str_value("groups"); |
| lhq.value = prop; |
| |
| ret = njs_lvlhsh_insert(&array->object.hash, &lhq); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto insert_fail; |
| } |
| |
| if (pattern->ngroups != 0) { |
| groups = njs_object_alloc(vm); |
| if (njs_slow_path(groups == NULL)) { |
| goto fail; |
| } |
| |
| njs_set_object(&prop->value, groups); |
| |
| i = 0; |
| |
| do { |
| group = &pattern->groups[i]; |
| |
| ret = njs_string_set(vm, &name, group->name.start, |
| group->name.length); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto fail; |
| } |
| |
| prop = njs_object_prop_alloc(vm, &name, |
| &array->start[group->capture], 1); |
| if (njs_slow_path(prop == NULL)) { |
| goto fail; |
| } |
| |
| lhq.key_hash = group->hash; |
| lhq.key = group->name; |
| lhq.value = prop; |
| |
| ret = njs_lvlhsh_insert(&groups->hash, &lhq); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto insert_fail; |
| } |
| |
| i++; |
| |
| } while (i < pattern->ngroups); |
| } |
| |
| ret = NJS_OK; |
| goto done; |
| |
| insert_fail: |
| |
| njs_internal_error(vm, "lvlhsh insert failed"); |
| |
| fail: |
| |
| ret = NJS_ERROR; |
| |
| done: |
| |
| njs_regex_match_data_free(match_data, vm->regex_context); |
| |
| return (ret == NJS_OK) ? array : NULL; |
| } |
| |
| |
| njs_int_t |
| njs_regexp_prototype_exec(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, |
| njs_index_t unused) |
| { |
| njs_int_t ret; |
| njs_value_t *r, *s; |
| njs_value_t string_lvalue; |
| |
| r = njs_argument(args, 0); |
| |
| if (njs_slow_path(!njs_is_regexp(r))) { |
| njs_type_error(vm, "\"this\" argument is not a regexp"); |
| return NJS_ERROR; |
| } |
| |
| s = njs_lvalue_arg(&string_lvalue, args, nargs, 1); |
| |
| ret = njs_value_to_string(vm, s, s); |
| if (njs_slow_path(ret != NJS_OK)) { |
| return ret; |
| } |
| |
| return njs_regexp_builtin_exec(vm, r, s, &vm->retval); |
| } |
| |
| |
| njs_int_t |
| njs_regexp_exec(njs_vm_t *vm, njs_value_t *r, njs_value_t *s, |
| njs_value_t *retval) |
| { |
| njs_int_t ret; |
| njs_value_t exec; |
| |
| static const njs_value_t string_exec = njs_string("exec"); |
| |
| ret = njs_value_property(vm, r, njs_value_arg(&string_exec), &exec); |
| if (njs_slow_path(ret == NJS_ERROR)) { |
| return NJS_ERROR; |
| } |
| |
| if (njs_is_function(&exec)) { |
| ret = njs_function_call(vm, njs_function(&exec), r, s, 1, retval); |
| if (njs_slow_path(ret == NJS_ERROR)) { |
| return NJS_ERROR; |
| } |
| |
| if (njs_slow_path(!njs_is_object(retval) && !njs_is_null(retval))) { |
| njs_type_error(vm, "unexpected \"%s\" retval in njs_regexp_exec()", |
| njs_type_string(retval->type)); |
| return NJS_ERROR; |
| } |
| |
| return NJS_OK; |
| } |
| |
| if (njs_slow_path(!njs_is_regexp(r))) { |
| njs_type_error(vm, "receiver argument is not a regexp"); |
| return NJS_ERROR; |
| } |
| |
| return njs_regexp_builtin_exec(vm, r, s, retval); |
| } |
| |
| |
| static njs_int_t |
| njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value, u_char *start, |
| uint32_t size, int32_t length) |
| { |
| length = (length >= 0) ? length : 0; |
| |
| return njs_string_new(vm, value, start, size, length); |
| } |
| |
| |
| static njs_int_t |
| njs_regexp_prototype_symbol_replace(njs_vm_t *vm, njs_value_t *args, |
| njs_uint_t nargs, njs_index_t unused) |
| { |
| u_char *p; |
| int64_t n, last_index, ncaptures, pos, next_pos, size, length; |
| njs_str_t rep, m; |
| njs_int_t ret; |
| njs_arr_t results; |
| njs_chb_t chain; |
| njs_uint_t i; |
| njs_bool_t global; |
| njs_array_t *array; |
| njs_value_t *arguments, *r, *rx, *string, *replace; |
| njs_value_t s_lvalue, r_lvalue, value, matched, groups, retval; |
| njs_function_t *func_replace; |
| njs_string_prop_t s; |
| |
| static const njs_value_t string_global = njs_string("global"); |
| static const njs_value_t string_groups = njs_string("groups"); |
| static const njs_value_t string_index = njs_string("index"); |
| static const njs_value_t string_lindex = njs_string("lastIndex"); |
| |
| rx = njs_argument(args, 0); |
| |
| if (njs_slow_path(!njs_is_object(rx))) { |
| njs_type_error(vm, "\"this\" is not object"); |
| return NJS_ERROR; |
| } |
| |
| string = njs_lvalue_arg(&s_lvalue, args, nargs, 1); |
| |
| ret = njs_value_to_string(vm, string, string); |
| if (njs_slow_path(ret != NJS_OK)) { |
| return ret; |
| } |
| |
| length = njs_string_prop(&s, string); |
| |
| rep.start = NULL; |
| rep.length = 0; |
| |
| replace = njs_lvalue_arg(&r_lvalue, args, nargs, 2); |
| func_replace = njs_is_function(replace) ? njs_function(replace) : NULL; |
| |
| if (!func_replace) { |
| ret = njs_value_to_string(vm, replace, replace); |
| if (njs_slow_path(ret != NJS_OK)) { |
| return ret; |
| } |
| } |
| |
| ret = njs_value_property(vm, rx, njs_value_arg(&string_global), &value); |
| if (njs_slow_path(ret == NJS_ERROR)) { |
| return NJS_ERROR; |
| } |
| |
| global = njs_bool(&value); |
| |
| if (global) { |
| njs_set_number(&value, 0); |
| ret = njs_value_property_set(vm, rx, njs_value_arg(&string_lindex), |
| &value); |
| if (njs_slow_path(ret != NJS_OK)) { |
| return NJS_ERROR; |
| } |
| } |
| |
| njs_chb_init(&chain, vm->mem_pool); |
| |
| results.separate = 0; |
| results.pointer = 0; |
| |
| r = njs_arr_init(vm->mem_pool, &results, NULL, 4, sizeof(njs_value_t)); |
| if (njs_slow_path(r == NULL)) { |
| return NJS_ERROR; |
| } |
| |
| for ( ;; ) { |
| r = njs_arr_add(&results); |
| if (njs_slow_path(r == NULL)) { |
| ret = NJS_ERROR; |
| goto exception; |
| } |
| |
| ret = njs_regexp_exec(vm, rx, string, r); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto exception; |
| } |
| |
| if (njs_is_null(r) || !global) { |
| break; |
| } |
| |
| if (njs_fast_path(njs_is_fast_array(r) && njs_array_len(r) != 0)) { |
| value = njs_array_start(r)[0]; |
| |
| } else { |
| ret = njs_value_property_i64(vm, r, 0, &value); |
| if (njs_slow_path(ret == NJS_ERROR)) { |
| goto exception; |
| } |
| } |
| |
| ret = njs_value_to_string(vm, &value, &value); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto exception; |
| } |
| |
| if (njs_string_length(&value) != 0) { |
| continue; |
| } |
| |
| ret = njs_value_property(vm, rx, njs_value_arg(&string_lindex), &value); |
| if (njs_slow_path(ret == NJS_ERROR)) { |
| goto exception; |
| } |
| |
| ret = njs_value_to_length(vm, &value, &last_index); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto exception; |
| } |
| |
| njs_set_number(&value, last_index + 1); |
| ret = njs_value_property_set(vm, rx, njs_value_arg(&string_lindex), |
| &value); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto exception; |
| } |
| } |
| |
| i = 0; |
| next_pos = 0; |
| |
| while (i < results.items) { |
| r = njs_arr_item(&results, i++); |
| |
| if (njs_slow_path(njs_is_null(r))) { |
| break; |
| } |
| |
| ret = njs_value_property_i64(vm, r, 0, &matched); |
| if (njs_slow_path(ret == NJS_ERROR)) { |
| goto exception; |
| } |
| |
| ret = njs_value_to_string(vm, &matched, &matched); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto exception; |
| } |
| |
| ret = njs_value_property(vm, r, njs_value_arg(&string_index), &value); |
| if (njs_slow_path(ret == NJS_ERROR)) { |
| goto exception; |
| } |
| |
| ret = njs_value_to_integer(vm, &value, &pos); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto exception; |
| } |
| |
| if ((size_t) length != s.size) { |
| /* UTF-8 string. */ |
| pos = njs_string_offset(s.start, s.start + s.size, pos) - s.start; |
| } |
| |
| pos = njs_max(njs_min(pos, (int64_t) s.size), 0); |
| |
| if (njs_fast_path(njs_is_fast_array(r) && njs_array_len(r) != 0)) { |
| array = njs_array(r); |
| |
| arguments = array->start; |
| arguments[0] = matched; |
| ncaptures = njs_max((int64_t) array->length - 1, 0); |
| |
| for (n = 1; n <= ncaptures; n++) { |
| if (njs_is_undefined(&arguments[n])) { |
| continue; |
| } |
| |
| ret = njs_value_to_string(vm, &arguments[n], &arguments[n]); |
| if (njs_slow_path(ret == NJS_ERROR)) { |
| goto exception; |
| } |
| } |
| |
| } else { |
| ret = njs_object_length(vm, r, &ncaptures); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto exception; |
| } |
| |
| ncaptures = njs_max(ncaptures - 1, 0); |
| |
| array = njs_array_alloc(vm, 0, ncaptures + 1, 0); |
| if (njs_slow_path(array == NULL)) { |
| goto exception; |
| } |
| |
| arguments = array->start; |
| arguments[0] = matched; |
| |
| for (n = 1; n <= ncaptures; n++) { |
| ret = njs_value_property_i64(vm, r, n, &arguments[n]); |
| if (njs_slow_path(ret == NJS_ERROR)) { |
| goto exception; |
| } |
| |
| if (njs_is_undefined(&arguments[n])) { |
| continue; |
| } |
| |
| ret = njs_value_to_string(vm, &arguments[n], &arguments[n]); |
| if (njs_slow_path(ret == NJS_ERROR)) { |
| goto exception; |
| } |
| } |
| } |
| |
| ret = njs_value_property(vm, r, njs_value_arg(&string_groups), &groups); |
| if (njs_slow_path(ret == NJS_ERROR)) { |
| goto exception; |
| } |
| |
| if (!func_replace) { |
| if (njs_is_defined(&groups)) { |
| ret = njs_value_to_object(vm, &groups); |
| if (njs_slow_path(ret != NJS_OK)) { |
| return ret; |
| } |
| } |
| |
| ret = njs_string_get_substitution(vm, &matched, string, pos, |
| arguments, ncaptures, &groups, |
| replace, &retval); |
| |
| } else { |
| ret = njs_array_expand(vm, array, 0, |
| njs_is_defined(&groups) ? 3 : 2); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto exception; |
| } |
| |
| arguments = array->start; |
| njs_set_number(&arguments[n++], pos); |
| arguments[n++] = *string; |
| |
| if (njs_is_defined(&groups)) { |
| arguments[n++] = groups; |
| } |
| |
| ret = njs_function_call(vm, func_replace, |
| njs_value_arg(&njs_value_undefined), |
| arguments, n, &retval); |
| } |
| |
| if (njs_slow_path(ret == NJS_ERROR)) { |
| return NJS_ERROR; |
| } |
| |
| ret = njs_value_to_string(vm, &retval, &retval); |
| if (njs_slow_path(ret != NJS_OK)) { |
| goto exception; |
| } |
| |
| if (pos >= next_pos) { |
| njs_chb_append(&chain, &s.start[next_pos], pos - next_pos); |
| |
| njs_string_get(&retval, &rep); |
| njs_chb_append_str(&chain, &rep); |
| |
| njs_string_get(&matched, &m); |
| |
| next_pos = pos + (int64_t) m.length; |
| } |
| } |
| |
| if (next_pos < (int64_t) s.size) { |
| njs_chb_append(&chain, &s.start[next_pos], s.size - next_pos); |
| } |
| |
| size = njs_chb_size(&chain); |
| if (njs_slow_path(size < 0)) { |
| njs_memory_error(vm); |
| ret = NJS_ERROR; |
| goto exception; |
| } |
| |
| length = njs_chb_utf8_length(&chain); |
| |
| p = njs_string_alloc(vm, &vm->retval, size, length); |
| if (njs_slow_path(p == NULL)) { |
| ret = NJS_ERROR; |
| goto exception; |
| } |
| |
| njs_chb_join_to(&chain, p); |
| |
| ret = NJS_OK; |
| |
| exception: |
| |
| njs_chb_destroy(&chain); |
| njs_arr_destroy(&results); |
| |
| return ret; |
| } |
| |
| |
| |
| |
| static const njs_object_prop_t njs_regexp_constructor_properties[] = |
| { |
| { |
| .type = NJS_PROPERTY, |
| .name = njs_string("name"), |
| .value = njs_string("RegExp"), |
| .configurable = 1, |
| }, |
| |
| { |
| .type = NJS_PROPERTY, |
| .name = njs_string("length"), |
| .value = njs_value(NJS_NUMBER, 1, 2.0), |
| .configurable = 1, |
| }, |
| |
| { |
| .type = NJS_PROPERTY_HANDLER, |
| .name = njs_string("prototype"), |
| .value = njs_prop_handler(njs_object_prototype_create), |
| }, |
| }; |
| |
| |
| const njs_object_init_t njs_regexp_constructor_init = { |
| njs_regexp_constructor_properties, |
| njs_nitems(njs_regexp_constructor_properties), |
| }; |
| |
| |
| static const njs_object_prop_t njs_regexp_prototype_properties[] = |
| { |
| { |
| .type = NJS_PROPERTY_HANDLER, |
| .name = njs_string("constructor"), |
| .value = njs_prop_handler(njs_object_prototype_create_constructor), |
| .writable = 1, |
| .configurable = 1, |
| }, |
| |
| { |
| .type = NJS_PROPERTY_HANDLER, |
| .name = njs_string("global"), |
| .value = njs_prop_handler(njs_regexp_prototype_global), |
| .configurable = 1, |
| }, |
| |
| { |
| .type = NJS_PROPERTY_HANDLER, |
| .name = njs_string("ignoreCase"), |
| .value = njs_prop_handler(njs_regexp_prototype_ignore_case), |
| .configurable = 1, |
| }, |
| |
| { |
| .type = NJS_PROPERTY_HANDLER, |
| .name = njs_string("multiline"), |
| .value = njs_prop_handler(njs_regexp_prototype_multiline), |
| .configurable = 1, |
| }, |
| |
| { |
| .type = NJS_PROPERTY_HANDLER, |
| .name = njs_string("source"), |
| .value = njs_prop_handler(njs_regexp_prototype_source), |
| .configurable = 1, |
| }, |
| |
| { |
| .type = NJS_PROPERTY, |
| .name = njs_string("toString"), |
| .value = njs_native_function(njs_regexp_prototype_to_string, 0), |
| .writable = 1, |
| .configurable = 1, |
| }, |
| |
| { |
| .type = NJS_PROPERTY, |
| .name = njs_string("test"), |
| .value = njs_native_function(njs_regexp_prototype_test, 1), |
| .writable = 1, |
| .configurable = 1, |
| }, |
| |
| { |
| .type = NJS_PROPERTY, |
| .name = njs_string("exec"), |
| .value = njs_native_function(njs_regexp_prototype_exec, 1), |
| .writable = 1, |
| .configurable = 1, |
| }, |
| |
| { |
| .type = NJS_PROPERTY, |
| .name = njs_wellknown_symbol(NJS_SYMBOL_REPLACE), |
| .value = njs_native_function(njs_regexp_prototype_symbol_replace, 2), |
| .writable = 1, |
| .configurable = 1, |
| }, |
| }; |
| |
| |
| const njs_object_prop_t njs_regexp_instance_properties[] = |
| { |
| { |
| .type = NJS_PROPERTY_HANDLER, |
| .name = njs_string("lastIndex"), |
| .value = njs_prop_handler(njs_regexp_prototype_last_index), |
| .writable = 1, |
| }, |
| }; |
| |
| |
| const njs_object_init_t njs_regexp_instance_init = { |
| njs_regexp_instance_properties, |
| njs_nitems(njs_regexp_instance_properties), |
| }; |
| |
| |
| const njs_object_init_t njs_regexp_prototype_init = { |
| njs_regexp_prototype_properties, |
| njs_nitems(njs_regexp_prototype_properties), |
| }; |
| |
| |
| const njs_object_type_init_t njs_regexp_type_init = { |
| .constructor = njs_native_ctor(njs_regexp_constructor, 2, 0), |
| .constructor_props = &njs_regexp_constructor_init, |
| .prototype_props = &njs_regexp_prototype_init, |
| .prototype_value = { .object = { .type = NJS_REGEXP } }, |
| }; |