blob: 4a04022631d807b27654c2ef85c1fab7d77966f6 [file] [log] [blame]
/*
* Copyright (C) Igor Sysoev
* Copyright (C) NGINX, Inc.
*/
#include <njs_main.h>
typedef struct njs_lexer_multi_s njs_lexer_multi_t;
struct njs_lexer_multi_s {
uint8_t symbol;
uint8_t token;
uint8_t count;
const njs_lexer_multi_t *next;
};
static njs_int_t njs_lexer_hash_test(njs_lvlhsh_query_t *lhq, void *data);
static njs_int_t njs_lexer_word(njs_lexer_t *lexer, njs_lexer_token_t *token);
static void njs_lexer_string(njs_lexer_t *lexer, njs_lexer_token_t *token,
u_char quote);
static void njs_lexer_number(njs_lexer_t *lexer, njs_lexer_token_t *token);
static void njs_lexer_multi(njs_lexer_t *lexer, njs_lexer_token_t *token,
const njs_lexer_multi_t *multi, size_t length);
static void njs_lexer_division(njs_lexer_t *lexer, njs_lexer_token_t *token);
const njs_lvlhsh_proto_t njs_lexer_hash_proto
njs_aligned(64) =
{
NJS_LVLHSH_DEFAULT,
njs_lexer_hash_test,
njs_lvlhsh_alloc,
njs_lvlhsh_free,
};
static const uint8_t njs_tokens[256] njs_aligned(64) = {
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
/* \t */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_SPACE,
/* \n */ NJS_TOKEN_LINE_END, NJS_TOKEN_ILLEGAL,
/* \r */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_SPACE,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
/* 0x10 */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
/* ! */ NJS_TOKEN_SPACE, NJS_TOKEN_LOGICAL_NOT,
/* " # */ NJS_TOKEN_DOUBLE_QUOTE, NJS_TOKEN_ILLEGAL,
/* $ % */ NJS_TOKEN_LETTER, NJS_TOKEN_REMAINDER,
/* & ' */ NJS_TOKEN_BITWISE_AND, NJS_TOKEN_SINGLE_QUOTE,
/* ( ) */ NJS_TOKEN_OPEN_PARENTHESIS, NJS_TOKEN_CLOSE_PARENTHESIS,
/* * + */ NJS_TOKEN_MULTIPLICATION, NJS_TOKEN_ADDITION,
/* , - */ NJS_TOKEN_COMMA, NJS_TOKEN_SUBSTRACTION,
/* . / */ NJS_TOKEN_DOT, NJS_TOKEN_DIVISION,
/* 0 1 */ NJS_TOKEN_DIGIT, NJS_TOKEN_DIGIT,
/* 2 3 */ NJS_TOKEN_DIGIT, NJS_TOKEN_DIGIT,
/* 4 5 */ NJS_TOKEN_DIGIT, NJS_TOKEN_DIGIT,
/* 6 7 */ NJS_TOKEN_DIGIT, NJS_TOKEN_DIGIT,
/* 8 9 */ NJS_TOKEN_DIGIT, NJS_TOKEN_DIGIT,
/* : ; */ NJS_TOKEN_COLON, NJS_TOKEN_SEMICOLON,
/* < = */ NJS_TOKEN_LESS, NJS_TOKEN_ASSIGNMENT,
/* > ? */ NJS_TOKEN_GREATER, NJS_TOKEN_CONDITIONAL,
/* @ A */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_LETTER,
/* B C */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* D E */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* F G */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* H I */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* J K */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* L M */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* N O */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* P Q */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* R S */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* T U */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* V W */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* X Y */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* Z [ */ NJS_TOKEN_LETTER, NJS_TOKEN_OPEN_BRACKET,
/* \ ] */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_CLOSE_BRACKET,
/* ^ _ */ NJS_TOKEN_BITWISE_XOR, NJS_TOKEN_LETTER,
/* ` a */ NJS_TOKEN_GRAVE, NJS_TOKEN_LETTER,
/* b c */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* d e */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* f g */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* h i */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* j k */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* l m */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* n o */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* p q */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* r s */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* t u */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* v w */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* x y */ NJS_TOKEN_LETTER, NJS_TOKEN_LETTER,
/* z { */ NJS_TOKEN_LETTER, NJS_TOKEN_OPEN_BRACE,
/* | } */ NJS_TOKEN_BITWISE_OR, NJS_TOKEN_CLOSE_BRACE,
/* ~ */ NJS_TOKEN_BITWISE_NOT, NJS_TOKEN_ILLEGAL,
/* 0x80 */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
/* 0x90 */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
/* 0xA0 */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
/* 0xB0 */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
/* TODO: the first byte of valid UTF-8: 0xC2 - 0xF4. */
/* 0xC0 */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
/* 0xD0 */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
/* 0xE0 */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
/* 0xF0 */ NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
NJS_TOKEN_ILLEGAL, NJS_TOKEN_ILLEGAL,
};
static const njs_lexer_multi_t njs_addition_token[] = {
{ '+', NJS_TOKEN_INCREMENT, 0, NULL },
{ '=', NJS_TOKEN_ADDITION_ASSIGNMENT, 0, NULL },
};
static const njs_lexer_multi_t njs_substraction_token[] = {
{ '-', NJS_TOKEN_DECREMENT, 0, NULL },
{ '=', NJS_TOKEN_SUBSTRACTION_ASSIGNMENT, 0, NULL },
};
static const njs_lexer_multi_t njs_exponentiation_token[] = {
{ '=', NJS_TOKEN_EXPONENTIATION_ASSIGNMENT, 0, NULL },
};
static const njs_lexer_multi_t njs_multiplication_token[] = {
{ '=', NJS_TOKEN_MULTIPLICATION_ASSIGNMENT, 0, NULL },
{ '*', NJS_TOKEN_EXPONENTIATION, 1, njs_exponentiation_token },
};
static const njs_lexer_multi_t njs_remainder_token[] = {
{ '=', NJS_TOKEN_REMAINDER_ASSIGNMENT, 0, NULL },
};
static const njs_lexer_multi_t njs_bitwise_and_token[] = {
{ '&', NJS_TOKEN_LOGICAL_AND, 0, NULL },
{ '=', NJS_TOKEN_BITWISE_AND_ASSIGNMENT, 0, NULL },
};
static const njs_lexer_multi_t njs_bitwise_xor_token[] = {
{ '=', NJS_TOKEN_BITWISE_XOR_ASSIGNMENT, 0, NULL },
};
static const njs_lexer_multi_t njs_bitwise_or_token[] = {
{ '|', NJS_TOKEN_LOGICAL_OR, 0, NULL },
{ '=', NJS_TOKEN_BITWISE_OR_ASSIGNMENT, 0, NULL },
};
static const njs_lexer_multi_t njs_strict_not_equal_token[] = {
{ '=', NJS_TOKEN_STRICT_NOT_EQUAL, 0, NULL },
};
static const njs_lexer_multi_t njs_logical_not_token[] = {
{ '=', NJS_TOKEN_NOT_EQUAL, 1, njs_strict_not_equal_token },
};
static const njs_lexer_multi_t njs_less_shift_token[] = {
{ '=', NJS_TOKEN_LEFT_SHIFT_ASSIGNMENT, 0, NULL },
};
static const njs_lexer_multi_t njs_less_token[] = {
{ '=', NJS_TOKEN_LESS_OR_EQUAL, 0, NULL },
{ '<', NJS_TOKEN_LEFT_SHIFT, 1, njs_less_shift_token },
};
static const njs_lexer_multi_t njs_strict_equal_token[] = {
{ '=', NJS_TOKEN_STRICT_EQUAL, 0, NULL },
};
static const njs_lexer_multi_t njs_unsigned_right_shift_token[] = {
{ '=', NJS_TOKEN_UNSIGNED_RIGHT_SHIFT_ASSIGNMENT, 0, NULL },
};
static const njs_lexer_multi_t njs_right_shift_token[] = {
{ '=', NJS_TOKEN_RIGHT_SHIFT_ASSIGNMENT, 0, NULL },
{ '>', NJS_TOKEN_UNSIGNED_RIGHT_SHIFT, 1,
njs_unsigned_right_shift_token },
};
static const njs_lexer_multi_t njs_greater_token[] = {
{ '=', NJS_TOKEN_GREATER_OR_EQUAL, 0, NULL },
{ '>', NJS_TOKEN_RIGHT_SHIFT, 2, njs_right_shift_token },
};
static const njs_lexer_multi_t njs_conditional_token[] = {
{ '?', NJS_TOKEN_COALESCE, 0, NULL },
};
static const njs_lexer_multi_t njs_assignment_token[] = {
{ '=', NJS_TOKEN_EQUAL, 1, njs_strict_equal_token },
{ '>', NJS_TOKEN_ARROW, 0, NULL },
};
njs_int_t
njs_lexer_init(njs_vm_t *vm, njs_lexer_t *lexer, njs_str_t *file,
u_char *start, u_char *end)
{
njs_memzero(lexer, sizeof(njs_lexer_t));
lexer->file = *file;
lexer->start = start;
lexer->end = end;
lexer->line = 1;
lexer->keywords_hash = &vm->shared->keywords_hash;
lexer->mem_pool = vm->mem_pool;
njs_queue_init(&lexer->preread);
return NJS_OK;
}
njs_inline njs_lexer_token_t *
njs_lexer_next_token(njs_lexer_t *lexer)
{
njs_int_t ret;
njs_lexer_token_t *token;
token = njs_mp_zalloc(lexer->mem_pool, sizeof(njs_lexer_token_t));
if (njs_slow_path(token == NULL)) {
return NULL;
}
do {
ret = njs_lexer_make_token(lexer, token);
if (njs_slow_path(ret != NJS_OK)) {
return NULL;
}
} while (token->type == NJS_TOKEN_COMMENT);
njs_queue_insert_tail(&lexer->preread, &token->link);
return token;
}
njs_lexer_token_t *
njs_lexer_token(njs_lexer_t *lexer, njs_bool_t with_end_line)
{
njs_queue_link_t *lnk;
njs_lexer_token_t *token;
lnk = njs_queue_first(&lexer->preread);
while (lnk != njs_queue_head(&lexer->preread)) {
token = njs_queue_link_data(lnk, njs_lexer_token_t, link);
if (!with_end_line && token->type == NJS_TOKEN_LINE_END) {
lexer->prev_type = token->type;
lnk = njs_queue_next(&token->link);
continue;
}
return token;
}
do {
token = njs_lexer_next_token(lexer);
if (token == NULL) {
return NULL;
}
if (!with_end_line && token->type == NJS_TOKEN_LINE_END) {
lexer->prev_type = token->type;
continue;
}
break;
} while (1);
return token;
}
njs_lexer_token_t *
njs_lexer_peek_token(njs_lexer_t *lexer, njs_lexer_token_t *current,
njs_bool_t with_end_line)
{
njs_queue_link_t *lnk;
njs_lexer_token_t *token;
lnk = njs_queue_next(&current->link);
while (lnk != njs_queue_head(&lexer->preread)) {
token = njs_queue_link_data(lnk, njs_lexer_token_t, link);
if (!with_end_line && token->type == NJS_TOKEN_LINE_END) {
lnk = njs_queue_next(&token->link);
continue;
}
return token;
}
do {
token = njs_lexer_next_token(lexer);
if (token == NULL) {
return NULL;
}
if (!with_end_line && token->type == NJS_TOKEN_LINE_END) {
continue;
}
break;
} while (1);
return token;
}
void
njs_lexer_consume_token(njs_lexer_t *lexer, unsigned length)
{
njs_queue_link_t *lnk;
njs_lexer_token_t *token;
while (length > 0) {
lnk = njs_queue_first(&lexer->preread);
token = njs_queue_link_data(lnk, njs_lexer_token_t, link);
lexer->prev_type = token->type;
if (token->type != NJS_TOKEN_LINE_END) {
length--;
}
njs_queue_remove(lnk);
njs_mp_free(lexer->mem_pool, token);
}
}
njs_int_t
njs_lexer_make_token(njs_lexer_t *lexer, njs_lexer_token_t *token)
{
u_char c, *p;
c = ' ';
while (lexer->start < lexer->end) {
c = *lexer->start++;
if (njs_tokens[c] != NJS_TOKEN_SPACE) {
break;
}
}
token->type = njs_tokens[c];
token->line = lexer->line;
switch (token->type) {
case NJS_TOKEN_LETTER:
return njs_lexer_word(lexer, token);
case NJS_TOKEN_DOUBLE_QUOTE:
case NJS_TOKEN_SINGLE_QUOTE:
njs_lexer_string(lexer, token, c);
break;
case NJS_TOKEN_DOT:
p = lexer->start;
if (p + 1 < lexer->end
&& njs_tokens[p[0]] == NJS_TOKEN_DOT
&& njs_tokens[p[1]] == NJS_TOKEN_DOT)
{
token->text.start = lexer->start - 1;
token->text.length = (p - token->text.start) + 2;
token->type = NJS_TOKEN_ELLIPSIS;
lexer->start += 2;
return NJS_OK;
}
if (p == lexer->end || njs_tokens[*p] != NJS_TOKEN_DIGIT) {
token->text.start = lexer->start - 1;
token->text.length = p - token->text.start;
token->type = NJS_TOKEN_DOT;
return NJS_OK;
}
/* Fall through. */
case NJS_TOKEN_DIGIT:
njs_lexer_number(lexer, token);
break;
case NJS_TOKEN_DIVISION:
njs_lexer_division(lexer, token);
break;
case NJS_TOKEN_ASSIGNMENT:
njs_lexer_multi(lexer, token, njs_assignment_token,
njs_nitems(njs_assignment_token));
break;
case NJS_TOKEN_ADDITION:
njs_lexer_multi(lexer, token, njs_addition_token,
njs_nitems(njs_addition_token));
break;
case NJS_TOKEN_SUBSTRACTION:
njs_lexer_multi(lexer, token, njs_substraction_token,
njs_nitems(njs_substraction_token));
break;
case NJS_TOKEN_MULTIPLICATION:
njs_lexer_multi(lexer, token, njs_multiplication_token,
njs_nitems(njs_multiplication_token));
break;
case NJS_TOKEN_REMAINDER:
njs_lexer_multi(lexer, token, njs_remainder_token,
njs_nitems(njs_remainder_token));
break;
case NJS_TOKEN_BITWISE_AND:
njs_lexer_multi(lexer, token, njs_bitwise_and_token,
njs_nitems(njs_bitwise_and_token));
break;
case NJS_TOKEN_BITWISE_XOR:
njs_lexer_multi(lexer, token, njs_bitwise_xor_token,
njs_nitems(njs_bitwise_xor_token));
break;
case NJS_TOKEN_BITWISE_OR:
njs_lexer_multi(lexer, token, njs_bitwise_or_token,
njs_nitems(njs_bitwise_or_token));
break;
case NJS_TOKEN_LOGICAL_NOT:
njs_lexer_multi(lexer, token, njs_logical_not_token,
njs_nitems(njs_logical_not_token));
break;
case NJS_TOKEN_LESS:
njs_lexer_multi(lexer, token, njs_less_token,
njs_nitems(njs_less_token));
break;
case NJS_TOKEN_GREATER:
njs_lexer_multi(lexer, token, njs_greater_token,
njs_nitems(njs_greater_token));
break;
case NJS_TOKEN_CONDITIONAL:
njs_lexer_multi(lexer, token, njs_conditional_token,
njs_nitems(njs_conditional_token));
break;
case NJS_TOKEN_SPACE:
token->type = NJS_TOKEN_END;
return NJS_OK;
case NJS_TOKEN_LINE_END:
lexer->line++;
/* Fall through. */
default:
token->text.start = lexer->start - 1;
token->text.length = lexer->start - token->text.start;
break;
}
return NJS_OK;
}
static njs_int_t
njs_lexer_hash_test(njs_lvlhsh_query_t *lhq, void *data)
{
njs_lexer_entry_t *entry;
entry = data;
if (entry->name.length == lhq->key.length
&& memcmp(entry->name.start, lhq->key.start, lhq->key.length) == 0)
{
return NJS_OK;
}
return NJS_DECLINED;
}
static njs_lexer_entry_t *
njs_lexer_keyword_find(njs_lexer_t *lexer, u_char *key, size_t length,
uint32_t hash)
{
njs_int_t ret;
njs_lexer_entry_t *entry;
njs_lvlhsh_query_t lhq;
lhq.key.start = key;
lhq.key.length = length;
lhq.key_hash = hash;
lhq.proto = &njs_lexer_hash_proto;
ret = njs_lvlhsh_find(lexer->keywords_hash, &lhq);
if (ret == NJS_OK) {
return lhq.value;
}
entry = njs_mp_alloc(lexer->mem_pool, sizeof(njs_lexer_entry_t));
if (njs_slow_path(entry == NULL)) {
return NULL;
}
entry->name.start = njs_mp_alloc(lexer->mem_pool, length + 1);
if (njs_slow_path(entry->name.start == NULL)) {
return NULL;
}
memcpy(entry->name.start, key, length);
entry->name.start[length] = '\0';
entry->name.length = length;
lhq.value = entry;
lhq.pool = lexer->mem_pool;
ret = njs_lvlhsh_insert(lexer->keywords_hash, &lhq);
if (njs_slow_path(ret != NJS_OK)) {
return NULL;
}
return entry;
}
static njs_int_t
njs_lexer_word(njs_lexer_t *lexer, njs_lexer_token_t *token)
{
u_char *p, c;
uint32_t hash_id;
const njs_lexer_entry_t *entry;
const njs_lexer_keyword_entry_t *key_entry;
/* TODO: UTF-8 */
static const uint8_t letter_digit[32] njs_aligned(32) = {
0x00, 0x00, 0x00, 0x00, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
/* '&%$ #"! /.-, |*)( 7654 3210 ?>=< ;:98 */
0x10, 0x00, 0xff, 0x03, /* 0001 0000 0000 0000 1111 1111 0000 0011 */
/* GFED CBA@ ONML KJIH WVUT SRQP _^]\ [ZYX */
0xfe, 0xff, 0xff, 0x87, /* 1111 1110 1111 1111 1111 1111 1000 0111 */
/* gfed cba` onml kjih wvut srqp ~}| {zyx */
0xfe, 0xff, 0xff, 0x07, /* 1111 1110 1111 1111 1111 1111 0000 0111 */
0x00, 0x00, 0x00, 0x00, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
0x00, 0x00, 0x00, 0x00, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
0x00, 0x00, 0x00, 0x00, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
0x00, 0x00, 0x00, 0x00, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
};
token->text.start = lexer->start - 1;
hash_id = njs_djb_hash_add(NJS_DJB_HASH_INIT, *token->text.start);
for (p = lexer->start; p < lexer->end; p++) {
c = *p;
if ((letter_digit[c / 8] & (1 << (c & 7))) == 0) {
break;
}
hash_id = njs_djb_hash_add(hash_id, c);
}
token->text.length = p - token->text.start;
lexer->start = p;
key_entry = njs_lexer_keyword(token->text.start, token->text.length);
if (key_entry == NULL) {
entry = njs_lexer_keyword_find(lexer, token->text.start,
token->text.length, hash_id);
if (njs_slow_path(entry == NULL)) {
return NJS_ERROR;
}
token->type = NJS_TOKEN_NAME;
token->keyword_type = NJS_KEYWORD_TYPE_UNDEF;
} else {
entry = &key_entry->value->entry;
token->type = key_entry->value->type;
token->keyword_type = NJS_KEYWORD_TYPE_KEYWORD;
token->keyword_type |= key_entry->value->reserved;
}
token->unique_id = (uintptr_t) entry;
return NJS_OK;
}
static void
njs_lexer_string(njs_lexer_t *lexer, njs_lexer_token_t *token, u_char quote)
{
u_char *p, c;
njs_bool_t escape;
escape = 0;
p = lexer->start;
token->text.start = p;
while (p < lexer->end) {
c = *p++;
if (c == '\\') {
if (p == lexer->end) {
break;
}
p++;
/* Line continuation. */
if (p < lexer->end && p[-1] == '\r' && p[0] == '\n') {
p++;
}
escape = 1;
continue;
}
/* Line terminator. */
if (c == '\r' || c == '\n') {
break;
}
if (c == quote) {
lexer->start = p;
token->text.length = (p - 1) - token->text.start;
token->type = (escape == 0) ? NJS_TOKEN_STRING
: NJS_TOKEN_ESCAPE_STRING;
return;
}
}
token->text.start--;
token->text.length = p - token->text.start;
token->type = NJS_TOKEN_UNTERMINATED_STRING;
}
static void
njs_lexer_number(njs_lexer_t *lexer, njs_lexer_token_t *token)
{
u_char c;
const u_char *p;
c = lexer->start[-1];
p = lexer->start;
token->text.start = lexer->start - 1;
if (c == '0' && p != lexer->end) {
/* Hexadecimal literal values. */
if (*p == 'x' || *p == 'X') {
p++;
if (p == lexer->end) {
goto illegal_token;
}
token->number = njs_number_hex_parse(&p, lexer->end, 1);
goto done;
}
/* Octal literal values. */
if (*p == 'o' || *p == 'O') {
p++;
if (p == lexer->end) {
goto illegal_token;
}
token->number = njs_number_oct_parse(&p, lexer->end);
if (p < lexer->end && (*p == '8' || *p == '9')) {
goto illegal_trailer;
}
goto done;
}
/* Binary literal values. */
if (*p == 'b' || *p == 'B') {
p++;
if (p == lexer->end) {
goto illegal_token;
}
token->number = njs_number_bin_parse(&p, lexer->end);
if (p < lexer->end && (*p >= '2' && *p <= '9')) {
goto illegal_trailer;
}
goto done;
}
/* Legacy Octal literals are deprecated. */
if ((*p >= '0' && *p <= '9') || *p == '_') {
goto illegal_trailer;
}
}
p--;
token->number = njs_number_dec_parse(&p, lexer->end, 1);
done:
if (p[-1] == '_') {
p--;
}
lexer->start = (u_char *) p;
token->text.length = p - token->text.start;
token->type = NJS_TOKEN_NUMBER;
return;
illegal_trailer:
p++;
illegal_token:
token->text.length = p - token->text.start;
token->type = NJS_TOKEN_ILLEGAL;
}
static void
njs_lexer_multi(njs_lexer_t *lexer, njs_lexer_token_t *token,
const njs_lexer_multi_t *multi, size_t length)
{
u_char c;
token->line = lexer->line;
token->text.start = lexer->start - 1;
while (length != 0 && multi != NULL && lexer->start < lexer->end) {
c = lexer->start[0];
if (c == multi->symbol) {
lexer->start++;
token->type = multi->token;
if (multi->count == 0) {
break;
}
length = multi->count;
multi = multi->next;
} else {
length--;
multi++;
}
}
token->text.length = lexer->start - token->text.start;
}
static void
njs_lexer_division(njs_lexer_t *lexer, njs_lexer_token_t *token)
{
u_char c, *p;
token->text.start = lexer->start - 1;
if (lexer->start >= lexer->end) {
goto done;
}
c = lexer->start[0];
if (c == '/') {
lexer->start++;
for (p = lexer->start; p < lexer->end; p++) {
if (*p == '\n' || (p + 1) == lexer->end) {
lexer->start = p + 1;
lexer->line++;
token->type = NJS_TOKEN_LINE_END;
goto done;
}
}
} else if (c == '*') {
lexer->start++;
for (p = lexer->start; p < lexer->end; p++) {
if (*p == '\n') {
lexer->line++;
continue;
}
if (*p == '*') {
if (p + 1 < lexer->end && p[1] == '/') {
lexer->start = p + 2;
token->type = NJS_TOKEN_COMMENT;
goto done;
}
}
}
token->type = NJS_TOKEN_ILLEGAL;
} else if (c == '=') {
lexer->start++;
token->type = NJS_TOKEN_DIVISION_ASSIGNMENT;
}
done:
token->text.length = lexer->start - token->text.start;
}