mirror of
https://github.com/neovim/neovim.git
synced 2025-09-07 03:48:18 +00:00

Allow Include What You Use to remove unnecessary includes and only include what is necessary. This helps with reducing compilation times and makes it easier to visualise which dependencies are actually required. Work on https://github.com/neovim/neovim/issues/549, but doesn't close it since this only works fully for .c files and not headers.
3081 lines
109 KiB
C
3081 lines
109 KiB
C
// This is an open source non-commercial project. Dear PVS-Studio, please check
|
||
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
|
||
|
||
/// VimL expression parser
|
||
|
||
// Planned incompatibilities (to be included into vim_diff.txt when this parser
|
||
// will be an actual part of VimL evaluation process):
|
||
//
|
||
// 1. Expressions are first fully parsed and only then executed. This means
|
||
// that while ":echo [system('touch abc')" will create file "abc" in Vim and
|
||
// only then raise syntax error regarding missing comma in list in Neovim
|
||
// trying to execute that will immediately raise syntax error regarding
|
||
// missing list end without actually executing anything.
|
||
// 2. Expressions are first fully parsed, without considering any runtime
|
||
// information. This means things like that "d.a" does not change its
|
||
// meaning depending on type of "d" (or whether Vim is currently executing or
|
||
// skipping). For compatibility reasons the dot thus may either be “concat
|
||
// or subscript” operator or just “concat” operator.
|
||
// 3. Expressions parser is aware whether it is called for :echo or <C-r>=.
|
||
// This means that while "<C-r>=1 | 2<CR>" is equivalent to "<C-r>=1<CR>"
|
||
// because "| 2" part is left to be treated as a command separator and then
|
||
// ignored in Neovim it is an error.
|
||
// 4. Expressions parser has generally better error reporting. But for
|
||
// compatibility reasons most errors have error code E15 while error messages
|
||
// are significantly different from Vim’s E15. Also some error codes were
|
||
// retired because of being harder to emulate or because of them being
|
||
// a result of differences in parsing process: e.g. with ":echo {a, b}" Vim
|
||
// will attempt to parse expression as lambda, fail, check whether it is
|
||
// a curly-braces-name, fail again, and evaluate that as a dictionary, giving
|
||
// error regarding undefined variable "a" (or about missing colon). Neovim
|
||
// will not try to evaluate anything here: comma right after an argument name
|
||
// means that expression may not be anything, but lambda, so the resulting
|
||
// error message will never be about missing variable or colon: it will be
|
||
// about missing arrow (or a continuation of argument list).
|
||
// 5. Failing to parse expression always gives exactly one error message: no
|
||
// more stack of error messages like >
|
||
//
|
||
// :echo [1,
|
||
// E697: Missing end of List ']':
|
||
// E15: Invalid expression: [1,
|
||
//
|
||
// < , just exactly one E697 message.
|
||
// 6. Some expressions involving calling parenthesis which are treated
|
||
// separately by Vim even when not separated by spaces are treated as one
|
||
// expression by Neovim: e.g. ":echo (1)(1)" will yield runtime error after
|
||
// failing to call "1", while Vim will echo "1 1". Reasoning is the same:
|
||
// type of what is in the first expression is generally not known when
|
||
// parsing, so to have separate expressions like this separate them with
|
||
// spaces.
|
||
// 7. 'isident' no longer applies to environment variables, they always include
|
||
// ASCII alphanumeric characters and underscore and nothing except this.
|
||
|
||
#include <assert.h>
|
||
#include <stdbool.h>
|
||
#include <stddef.h>
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
|
||
#include "klib/kvec.h"
|
||
#include "nvim/ascii.h"
|
||
#include "nvim/assert.h"
|
||
#include "nvim/charset.h"
|
||
#include "nvim/eval/typval.h"
|
||
#include "nvim/gettext.h"
|
||
#include "nvim/keycodes.h"
|
||
#include "nvim/macros.h"
|
||
#include "nvim/mbyte.h"
|
||
#include "nvim/memory.h"
|
||
#include "nvim/types.h"
|
||
#include "nvim/vim.h"
|
||
#include "nvim/viml/parser/expressions.h"
|
||
#include "nvim/viml/parser/parser.h"
|
||
|
||
typedef kvec_withinit_t(ExprASTNode **, 16) ExprASTStack;
|
||
|
||
/// Which nodes may be wanted
|
||
typedef enum {
|
||
/// Operators: function call, subscripts, binary operators, …
|
||
///
|
||
/// For unrestricted expressions.
|
||
kENodeOperator,
|
||
/// Values: literals, variables, nested expressions, unary operators.
|
||
///
|
||
/// For unrestricted expressions as well, implies that top item in AST stack
|
||
/// points to NULL.
|
||
kENodeValue,
|
||
} ExprASTWantedNode;
|
||
|
||
/// Parse type: what is being parsed currently
|
||
typedef enum {
|
||
/// Parsing regular VimL expression
|
||
kEPTExpr = 0,
|
||
/// Parsing lambda arguments
|
||
///
|
||
/// Just like parsing function arguments, but it is valid to be ended with an
|
||
/// arrow only.
|
||
kEPTLambdaArguments,
|
||
/// Assignment: parsing for :let
|
||
kEPTAssignment,
|
||
/// Single assignment: used when lists are not allowed (i.e. when nesting)
|
||
kEPTSingleAssignment,
|
||
} ExprASTParseType;
|
||
|
||
typedef kvec_withinit_t(ExprASTParseType, 4) ExprASTParseTypeStack;
|
||
|
||
/// Operator priority level
|
||
typedef enum {
|
||
kEOpLvlInvalid = 0,
|
||
kEOpLvlComplexIdentifier,
|
||
kEOpLvlParens,
|
||
kEOpLvlAssignment,
|
||
kEOpLvlArrow,
|
||
kEOpLvlComma,
|
||
kEOpLvlColon,
|
||
kEOpLvlTernaryValue,
|
||
kEOpLvlTernary,
|
||
kEOpLvlOr,
|
||
kEOpLvlAnd,
|
||
kEOpLvlComparison,
|
||
kEOpLvlAddition, ///< Addition, subtraction and concatenation.
|
||
kEOpLvlMultiplication, ///< Multiplication, division and modulo.
|
||
kEOpLvlUnary, ///< Unary operations: not, minus, plus.
|
||
kEOpLvlSubscript, ///< Subscripts.
|
||
kEOpLvlValue, ///< Values: literals, variables, nested expressions, …
|
||
} ExprOpLvl;
|
||
|
||
/// Operator associativity
|
||
typedef enum {
|
||
kEOpAssNo= 'n', ///< Not associative / not applicable.
|
||
kEOpAssLeft = 'l', ///< Left associativity.
|
||
kEOpAssRight = 'r', ///< Right associativity.
|
||
} ExprOpAssociativity;
|
||
|
||
#ifdef INCLUDE_GENERATED_DECLARATIONS
|
||
# include "viml/parser/expressions.c.generated.h"
|
||
#endif
|
||
|
||
/// Scale number by a given factor
|
||
///
|
||
/// Used to apply exponent to a number. Idea taken from uClibc.
|
||
///
|
||
/// @param[in] num Number to scale. Does not bother doing anything if it is
|
||
/// zero.
|
||
/// @param[in] base Base, should be 10 since non-decimal floating-point
|
||
/// numbers are not supported.
|
||
/// @param[in] exponent Exponent to scale by.
|
||
/// @param[in] exponent_negative True if exponent is negative.
|
||
static inline float_T scale_number(const float_T num, const uint8_t base,
|
||
const uvarnumber_T exponent, const bool exponent_negative)
|
||
FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_CONST
|
||
{
|
||
if (num == 0 || exponent == 0) {
|
||
return num;
|
||
}
|
||
assert(base);
|
||
uvarnumber_T exp = exponent;
|
||
float_T p_base = (float_T)base;
|
||
float_T ret = num;
|
||
while (exp) {
|
||
if (exp & 1) {
|
||
if (exponent_negative) {
|
||
ret /= p_base;
|
||
} else {
|
||
ret *= p_base;
|
||
}
|
||
}
|
||
exp >>= 1;
|
||
p_base *= p_base;
|
||
}
|
||
return ret;
|
||
}
|
||
|
||
/// Get next token for the VimL expression input
|
||
///
|
||
/// @param pstate Parser state.
|
||
/// @param[in] flags Flags, @see LexExprFlags.
|
||
///
|
||
/// @return Next token.
|
||
LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags)
|
||
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
|
||
{
|
||
LexExprToken ret = {
|
||
.type = kExprLexInvalid,
|
||
.start = pstate->pos,
|
||
};
|
||
ParserLine pline;
|
||
if (!viml_parser_get_remaining_line(pstate, &pline)) {
|
||
ret.type = kExprLexEOC;
|
||
return ret;
|
||
}
|
||
if (pline.size <= 0) {
|
||
ret.len = 0;
|
||
ret.type = kExprLexEOC;
|
||
goto viml_pexpr_next_token_adv_return;
|
||
}
|
||
ret.len = 1;
|
||
const uint8_t schar = (uint8_t)pline.data[0];
|
||
#define GET_CCS(ret, pline) \
|
||
do { \
|
||
if (ret.len < pline.size \
|
||
&& strchr("?#", pline.data[ret.len]) != NULL) { \
|
||
ret.data.cmp.ccs = \
|
||
(ExprCaseCompareStrategy)pline.data[ret.len]; \
|
||
ret.len++; \
|
||
} else { \
|
||
ret.data.cmp.ccs = kCCStrategyUseOption; \
|
||
} \
|
||
} while (0)
|
||
switch (schar) {
|
||
// Paired brackets.
|
||
#define BRACKET(typ, opning, clsing) \
|
||
case opning: \
|
||
case clsing: { \
|
||
ret.type = typ; \
|
||
ret.data.brc.closing = (schar == clsing); \
|
||
break; \
|
||
}
|
||
BRACKET(kExprLexParenthesis, '(', ')')
|
||
BRACKET(kExprLexBracket, '[', ']')
|
||
BRACKET(kExprLexFigureBrace, '{', '}')
|
||
#undef BRACKET
|
||
|
||
// Single character tokens without data.
|
||
#define CHAR(typ, ch) \
|
||
case ch: { \
|
||
ret.type = typ; \
|
||
break; \
|
||
}
|
||
CHAR(kExprLexQuestion, '?')
|
||
CHAR(kExprLexColon, ':')
|
||
CHAR(kExprLexComma, ',')
|
||
#undef CHAR
|
||
|
||
// Multiplication/division/modulo.
|
||
#define MUL(mul_type, ch) \
|
||
case ch: { \
|
||
ret.type = kExprLexMultiplication; \
|
||
ret.data.mul.type = mul_type; \
|
||
break; \
|
||
}
|
||
MUL(kExprLexMulMul, '*')
|
||
MUL(kExprLexMulDiv, '/')
|
||
MUL(kExprLexMulMod, '%')
|
||
#undef MUL
|
||
|
||
#define CHARREG(typ, cond) \
|
||
do { \
|
||
ret.type = typ; \
|
||
for (; (ret.len < pline.size \
|
||
&& cond(pline.data[ret.len])) \
|
||
; ret.len++) { \
|
||
} \
|
||
} while (0)
|
||
|
||
// Whitespace.
|
||
case ' ':
|
||
case TAB:
|
||
CHARREG(kExprLexSpacing, ascii_iswhite);
|
||
break;
|
||
|
||
// Control character, except for NUL, NL and TAB.
|
||
case Ctrl_A:
|
||
case Ctrl_B:
|
||
case Ctrl_C:
|
||
case Ctrl_D:
|
||
case Ctrl_E:
|
||
case Ctrl_F:
|
||
case Ctrl_G:
|
||
case Ctrl_H:
|
||
|
||
case Ctrl_K:
|
||
case Ctrl_L:
|
||
case Ctrl_M:
|
||
case Ctrl_N:
|
||
case Ctrl_O:
|
||
case Ctrl_P:
|
||
case Ctrl_Q:
|
||
case Ctrl_R:
|
||
case Ctrl_S:
|
||
case Ctrl_T:
|
||
case Ctrl_U:
|
||
case Ctrl_V:
|
||
case Ctrl_W:
|
||
case Ctrl_X:
|
||
case Ctrl_Y:
|
||
case Ctrl_Z:
|
||
#define ISCTRL(schar) (schar < ' ')
|
||
CHARREG(kExprLexInvalid, ISCTRL);
|
||
ret.data.err.type = kExprLexSpacing;
|
||
ret.data.err.msg =
|
||
_("E15: Invalid control character present in input: %.*s");
|
||
break;
|
||
#undef ISCTRL
|
||
|
||
// Number.
|
||
case '0':
|
||
case '1':
|
||
case '2':
|
||
case '3':
|
||
case '4':
|
||
case '5':
|
||
case '6':
|
||
case '7':
|
||
case '8':
|
||
case '9': {
|
||
ret.data.num.is_float = false;
|
||
ret.data.num.base = 10;
|
||
size_t frac_start = 0;
|
||
size_t exp_start = 0;
|
||
size_t frac_end = 0;
|
||
bool exp_negative = false;
|
||
CHARREG(kExprLexNumber, ascii_isdigit);
|
||
if (flags & kELFlagAllowFloat) {
|
||
const LexExprToken non_float_ret = ret;
|
||
if (pline.size > ret.len + 1
|
||
&& pline.data[ret.len] == '.'
|
||
&& ascii_isdigit(pline.data[ret.len + 1])) {
|
||
ret.len++;
|
||
frac_start = ret.len;
|
||
frac_end = ret.len;
|
||
ret.data.num.is_float = true;
|
||
for (; ret.len < pline.size && ascii_isdigit(pline.data[ret.len])
|
||
; ret.len++) {
|
||
// A small optimization: trailing zeroes in fractional part do not
|
||
// add anything to significand, so it is useless to include them in
|
||
// frac_end.
|
||
if (pline.data[ret.len] != '0') {
|
||
frac_end = ret.len + 1;
|
||
}
|
||
}
|
||
if (pline.size > ret.len + 1
|
||
&& (pline.data[ret.len] == 'e'
|
||
|| pline.data[ret.len] == 'E')
|
||
&& ((pline.size > ret.len + 2
|
||
&& (pline.data[ret.len + 1] == '+'
|
||
|| pline.data[ret.len + 1] == '-')
|
||
&& ascii_isdigit(pline.data[ret.len + 2]))
|
||
|| ascii_isdigit(pline.data[ret.len + 1]))) {
|
||
ret.len++;
|
||
if (pline.data[ret.len] == '+'
|
||
|| (exp_negative = (pline.data[ret.len] == '-'))) {
|
||
ret.len++;
|
||
}
|
||
exp_start = ret.len;
|
||
CHARREG(kExprLexNumber, ascii_isdigit);
|
||
}
|
||
}
|
||
if (pline.size > ret.len
|
||
&& (pline.data[ret.len] == '.'
|
||
|| ASCII_ISALPHA(pline.data[ret.len]))) {
|
||
ret = non_float_ret;
|
||
}
|
||
}
|
||
// TODO(ZyX-I): detect overflows
|
||
if (ret.data.num.is_float) {
|
||
// Vim used to use string2float here which in turn uses strtod(). There
|
||
// are two problems with this approach:
|
||
// 1. strtod() is locale-dependent. Not sure how it is worked around so
|
||
// that I do not see relevant bugs, but it still does not look like
|
||
// a good idea.
|
||
// 2. strtod() does not accept length argument.
|
||
//
|
||
// The below variant of parsing floats was recognized as acceptable
|
||
// because it is basically how uClibc does the thing: it generates
|
||
// a number ignoring decimal point (but recording its position), then
|
||
// uses recorded position to scale number down when processing exponent.
|
||
float_T significand_part = 0;
|
||
uvarnumber_T exp_part = 0;
|
||
const size_t frac_size = (size_t)(frac_end - frac_start);
|
||
for (size_t i = 0; i < frac_end; i++) {
|
||
if (i == frac_start - 1) {
|
||
continue;
|
||
}
|
||
significand_part = significand_part * 10 + (pline.data[i] - '0');
|
||
}
|
||
if (exp_start) {
|
||
vim_str2nr(pline.data + exp_start, NULL, NULL, 0, NULL, &exp_part,
|
||
(int)(ret.len - exp_start), false);
|
||
}
|
||
if (exp_negative) {
|
||
exp_part += frac_size;
|
||
} else {
|
||
if (exp_part < frac_size) {
|
||
exp_negative = true;
|
||
exp_part = frac_size - exp_part;
|
||
} else {
|
||
exp_part -= frac_size;
|
||
}
|
||
}
|
||
ret.data.num.val.floating = scale_number(significand_part, 10, exp_part,
|
||
exp_negative);
|
||
} else {
|
||
int len;
|
||
int prep;
|
||
vim_str2nr(pline.data, &prep, &len, STR2NR_ALL, NULL,
|
||
&ret.data.num.val.integer, (int)pline.size, false);
|
||
ret.len = (size_t)len;
|
||
const uint8_t bases[] = {
|
||
[0] = 10,
|
||
['0'] = 8,
|
||
['x'] = 16, ['X'] = 16,
|
||
['b'] = 2, ['B'] = 2,
|
||
};
|
||
ret.data.num.base = bases[prep];
|
||
}
|
||
break;
|
||
}
|
||
|
||
#define ISWORD_OR_AUTOLOAD(x) \
|
||
(ascii_isident(x) || (x) == AUTOLOAD_CHAR)
|
||
|
||
// Environment variable.
|
||
case '$':
|
||
CHARREG(kExprLexEnv, ascii_isident);
|
||
break;
|
||
|
||
// Normal variable/function name.
|
||
case 'a':
|
||
case 'b':
|
||
case 'c':
|
||
case 'd':
|
||
case 'e':
|
||
case 'f':
|
||
case 'g':
|
||
case 'h':
|
||
case 'i':
|
||
case 'j':
|
||
case 'k':
|
||
case 'l':
|
||
case 'm':
|
||
case 'n':
|
||
case 'o':
|
||
case 'p':
|
||
case 'q':
|
||
case 'r':
|
||
case 's':
|
||
case 't':
|
||
case 'u':
|
||
case 'v':
|
||
case 'w':
|
||
case 'x':
|
||
case 'y':
|
||
case 'z':
|
||
case 'A':
|
||
case 'B':
|
||
case 'C':
|
||
case 'D':
|
||
case 'E':
|
||
case 'F':
|
||
case 'G':
|
||
case 'H':
|
||
case 'I':
|
||
case 'J':
|
||
case 'K':
|
||
case 'L':
|
||
case 'M':
|
||
case 'N':
|
||
case 'O':
|
||
case 'P':
|
||
case 'Q':
|
||
case 'R':
|
||
case 'S':
|
||
case 'T':
|
||
case 'U':
|
||
case 'V':
|
||
case 'W':
|
||
case 'X':
|
||
case 'Y':
|
||
case 'Z':
|
||
case '_':
|
||
ret.data.var.scope = 0;
|
||
ret.data.var.autoload = false;
|
||
CHARREG(kExprLexPlainIdentifier, ascii_isident);
|
||
// "is" and "isnot" operators.
|
||
if (!(flags & kELFlagIsNotCmp)
|
||
&& ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0)
|
||
|| (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0))) {
|
||
ret.type = kExprLexComparison;
|
||
ret.data.cmp.type = kExprCmpIdentical;
|
||
ret.data.cmp.inv = (ret.len == 5);
|
||
GET_CCS(ret, pline);
|
||
// Scope: `s:`, etc.
|
||
} else if (ret.len == 1
|
||
&& pline.size > 1
|
||
&& memchr(EXPR_VAR_SCOPE_LIST, schar,
|
||
sizeof(EXPR_VAR_SCOPE_LIST)) != NULL
|
||
&& pline.data[ret.len] == ':'
|
||
&& !(flags & kELFlagForbidScope)) {
|
||
ret.len++;
|
||
ret.data.var.scope = (ExprVarScope)schar;
|
||
CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD);
|
||
ret.data.var.autoload = (
|
||
memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2)
|
||
!= NULL);
|
||
// Previous CHARREG stopped at autoload character in order to make it
|
||
// possible to detect `is#`. Continue now with autoload characters
|
||
// included.
|
||
//
|
||
// Warning: there is ambiguity for the lexer: `is#Foo(1)` is a call of
|
||
// function `is#Foo()`, `1is#Foo(1)` is a comparison `1 is# Foo(1)`. This
|
||
// needs to be resolved on the higher level where context is available.
|
||
} else if (pline.size > ret.len
|
||
&& pline.data[ret.len] == AUTOLOAD_CHAR) {
|
||
ret.data.var.autoload = true;
|
||
CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD);
|
||
}
|
||
break;
|
||
|
||
#undef ISWORD_OR_AUTOLOAD
|
||
#undef CHARREG
|
||
|
||
// Option.
|
||
case '&': {
|
||
#define OPTNAMEMISS(ret) \
|
||
do { \
|
||
ret.type = kExprLexInvalid; \
|
||
ret.data.err.type = kExprLexOption; \
|
||
ret.data.err.msg = _("E112: Option name missing: %.*s"); \
|
||
} while (0)
|
||
if (pline.size > 1 && pline.data[1] == '&') {
|
||
ret.type = kExprLexAnd;
|
||
ret.len++;
|
||
break;
|
||
}
|
||
if (pline.size == 1 || !ASCII_ISALPHA(pline.data[1])) {
|
||
OPTNAMEMISS(ret);
|
||
break;
|
||
}
|
||
ret.type = kExprLexOption;
|
||
if (pline.size > 2
|
||
&& pline.data[2] == ':'
|
||
&& memchr(EXPR_OPT_SCOPE_LIST, pline.data[1],
|
||
sizeof(EXPR_OPT_SCOPE_LIST)) != NULL) {
|
||
ret.len += 2;
|
||
ret.data.opt.scope = (ExprOptScope)pline.data[1];
|
||
ret.data.opt.name = pline.data + 3;
|
||
} else {
|
||
ret.data.opt.scope = kExprOptScopeUnspecified;
|
||
ret.data.opt.name = pline.data + 1;
|
||
}
|
||
const char *p = ret.data.opt.name;
|
||
const char *const e = pline.data + pline.size;
|
||
if (e - p >= 4 && p[0] == 't' && p[1] == '_') {
|
||
ret.data.opt.len = 4;
|
||
ret.len += 4;
|
||
} else {
|
||
for (; p < e && ASCII_ISALPHA(*p); p++) {}
|
||
ret.data.opt.len = (size_t)(p - ret.data.opt.name);
|
||
if (ret.data.opt.len == 0) {
|
||
OPTNAMEMISS(ret);
|
||
} else {
|
||
ret.len += ret.data.opt.len;
|
||
}
|
||
}
|
||
break;
|
||
#undef OPTNAMEMISS
|
||
}
|
||
|
||
// Register.
|
||
case '@':
|
||
ret.type = kExprLexRegister;
|
||
if (pline.size > 1) {
|
||
ret.len++;
|
||
ret.data.reg.name = (uint8_t)pline.data[1];
|
||
} else {
|
||
ret.data.reg.name = -1;
|
||
}
|
||
break;
|
||
|
||
// Single quoted string.
|
||
case '\'':
|
||
ret.type = kExprLexSingleQuotedString;
|
||
ret.data.str.closed = false;
|
||
for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) {
|
||
if (pline.data[ret.len] == '\'') {
|
||
if (ret.len + 1 < pline.size && pline.data[ret.len + 1] == '\'') {
|
||
ret.len++;
|
||
} else {
|
||
ret.data.str.closed = true;
|
||
}
|
||
}
|
||
}
|
||
break;
|
||
|
||
// Double quoted string.
|
||
case '"':
|
||
ret.type = kExprLexDoubleQuotedString;
|
||
ret.data.str.closed = false;
|
||
for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) {
|
||
if (pline.data[ret.len] == '\\') {
|
||
if (ret.len + 1 < pline.size) {
|
||
ret.len++;
|
||
}
|
||
} else if (pline.data[ret.len] == '"') {
|
||
ret.data.str.closed = true;
|
||
}
|
||
}
|
||
break;
|
||
|
||
// Unary not, (un)equality and regex (not) match comparison operators.
|
||
case '!':
|
||
case '=':
|
||
if (pline.size == 1) {
|
||
ret.type = (schar == '!' ? kExprLexNot : kExprLexAssignment);
|
||
ret.data.ass.type = kExprAsgnPlain;
|
||
break;
|
||
}
|
||
ret.type = kExprLexComparison;
|
||
ret.data.cmp.inv = (schar == '!');
|
||
if (pline.data[1] == '=') {
|
||
ret.data.cmp.type = kExprCmpEqual;
|
||
ret.len++;
|
||
} else if (pline.data[1] == '~') {
|
||
ret.data.cmp.type = kExprCmpMatches;
|
||
ret.len++;
|
||
} else if (schar == '!') {
|
||
ret.type = kExprLexNot;
|
||
} else {
|
||
ret.type = kExprLexAssignment;
|
||
ret.data.ass.type = kExprAsgnPlain;
|
||
}
|
||
GET_CCS(ret, pline);
|
||
break;
|
||
|
||
// Less/greater [or equal to] comparison operators.
|
||
case '>':
|
||
case '<': {
|
||
ret.type = kExprLexComparison;
|
||
const bool haseqsign = (pline.size > 1 && pline.data[1] == '=');
|
||
if (haseqsign) {
|
||
ret.len++;
|
||
}
|
||
GET_CCS(ret, pline);
|
||
ret.data.cmp.inv = (schar == '<');
|
||
ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign)
|
||
? kExprCmpGreaterOrEqual
|
||
: kExprCmpGreater);
|
||
break;
|
||
}
|
||
|
||
// Minus sign, arrow from lambdas or augmented assignment.
|
||
case '-': {
|
||
if (pline.size > 1 && pline.data[1] == '>') {
|
||
ret.len++;
|
||
ret.type = kExprLexArrow;
|
||
} else if (pline.size > 1 && pline.data[1] == '=') {
|
||
ret.len++;
|
||
ret.type = kExprLexAssignment;
|
||
ret.data.ass.type = kExprAsgnSubtract;
|
||
} else {
|
||
ret.type = kExprLexMinus;
|
||
}
|
||
break;
|
||
}
|
||
|
||
// Sign or augmented assignment.
|
||
#define CHAR_OR_ASSIGN(ch, ch_type, ass_type) \
|
||
case ch: { \
|
||
if (pline.size > 1 && pline.data[1] == '=') { \
|
||
ret.len++; \
|
||
ret.type = kExprLexAssignment; \
|
||
ret.data.ass.type = ass_type; \
|
||
} else { \
|
||
ret.type = ch_type; \
|
||
} \
|
||
break; \
|
||
}
|
||
CHAR_OR_ASSIGN('+', kExprLexPlus, kExprAsgnAdd)
|
||
CHAR_OR_ASSIGN('.', kExprLexDot, kExprAsgnConcat)
|
||
#undef CHAR_OR_ASSIGN
|
||
|
||
// Expression end because Ex command ended.
|
||
case NUL:
|
||
case NL:
|
||
if (flags & kELFlagForbidEOC) {
|
||
ret.type = kExprLexInvalid;
|
||
ret.data.err.msg = _("E15: Unexpected EOC character: %.*s");
|
||
ret.data.err.type = kExprLexSpacing;
|
||
} else {
|
||
ret.type = kExprLexEOC;
|
||
}
|
||
break;
|
||
|
||
case '|':
|
||
if (pline.size >= 2 && pline.data[ret.len] == '|') {
|
||
// "||" is or.
|
||
ret.len++;
|
||
ret.type = kExprLexOr;
|
||
} else if (flags & kELFlagForbidEOC) {
|
||
// Note: `<C-r>=1 | 2<CR>` actually yields 1 in Vim without any
|
||
// errors. This will be changed here.
|
||
ret.type = kExprLexInvalid;
|
||
ret.data.err.msg = _("E15: Unexpected EOC character: %.*s");
|
||
ret.data.err.type = kExprLexOr;
|
||
} else {
|
||
ret.type = kExprLexEOC;
|
||
}
|
||
break;
|
||
|
||
// Everything else is not valid.
|
||
default:
|
||
ret.len = (size_t)utfc_ptr2len_len(pline.data, (int)pline.size);
|
||
ret.type = kExprLexInvalid;
|
||
ret.data.err.type = kExprLexPlainIdentifier;
|
||
ret.data.err.msg = _("E15: Unidentified character: %.*s");
|
||
break;
|
||
}
|
||
#undef GET_CCS
|
||
viml_pexpr_next_token_adv_return:
|
||
if (!(flags & kELFlagPeek)) {
|
||
viml_parser_advance(pstate, ret.len);
|
||
}
|
||
return ret;
|
||
}
|
||
|
||
static const char *const eltkn_type_tab[] = {
|
||
[kExprLexInvalid] = "Invalid",
|
||
[kExprLexMissing] = "Missing",
|
||
[kExprLexSpacing] = "Spacing",
|
||
[kExprLexEOC] = "EOC",
|
||
|
||
[kExprLexQuestion] = "Question",
|
||
[kExprLexColon] = "Colon",
|
||
[kExprLexOr] = "Or",
|
||
[kExprLexAnd] = "And",
|
||
[kExprLexComparison] = "Comparison",
|
||
[kExprLexPlus] = "Plus",
|
||
[kExprLexMinus] = "Minus",
|
||
[kExprLexDot] = "Dot",
|
||
[kExprLexMultiplication] = "Multiplication",
|
||
|
||
[kExprLexNot] = "Not",
|
||
|
||
[kExprLexNumber] = "Number",
|
||
[kExprLexSingleQuotedString] = "SingleQuotedString",
|
||
[kExprLexDoubleQuotedString] = "DoubleQuotedString",
|
||
[kExprLexOption] = "Option",
|
||
[kExprLexRegister] = "Register",
|
||
[kExprLexEnv] = "Env",
|
||
[kExprLexPlainIdentifier] = "PlainIdentifier",
|
||
|
||
[kExprLexBracket] = "Bracket",
|
||
[kExprLexFigureBrace] = "FigureBrace",
|
||
[kExprLexParenthesis] = "Parenthesis",
|
||
[kExprLexComma] = "Comma",
|
||
[kExprLexArrow] = "Arrow",
|
||
[kExprLexAssignment] = "Assignment",
|
||
};
|
||
|
||
const char *const eltkn_cmp_type_tab[] = {
|
||
[kExprCmpEqual] = "Equal",
|
||
[kExprCmpMatches] = "Matches",
|
||
[kExprCmpGreater] = "Greater",
|
||
[kExprCmpGreaterOrEqual] = "GreaterOrEqual",
|
||
[kExprCmpIdentical] = "Identical",
|
||
};
|
||
|
||
const char *const expr_asgn_type_tab[] = {
|
||
[kExprAsgnPlain] = "Plain",
|
||
[kExprAsgnAdd] = "Add",
|
||
[kExprAsgnSubtract] = "Subtract",
|
||
[kExprAsgnConcat] = "Concat",
|
||
};
|
||
|
||
const char *const ccs_tab[] = {
|
||
[kCCStrategyUseOption] = "UseOption",
|
||
[kCCStrategyMatchCase] = "MatchCase",
|
||
[kCCStrategyIgnoreCase] = "IgnoreCase",
|
||
};
|
||
|
||
static const char *const eltkn_mul_type_tab[] = {
|
||
[kExprLexMulMul] = "Mul",
|
||
[kExprLexMulDiv] = "Div",
|
||
[kExprLexMulMod] = "Mod",
|
||
};
|
||
|
||
static const char *const eltkn_opt_scope_tab[] = {
|
||
[kExprOptScopeUnspecified] = "Unspecified",
|
||
[kExprOptScopeGlobal] = "Global",
|
||
[kExprOptScopeLocal] = "Local",
|
||
};
|
||
|
||
/// Represent token as a string
|
||
///
|
||
/// Intended for testing and debugging purposes.
|
||
///
|
||
/// @param[in] pstate Parser state, needed to get token string from it. May be
|
||
/// NULL, in which case in place of obtaining part of the
|
||
/// string represented by token only token length is
|
||
/// returned.
|
||
/// @param[in] token Token to represent.
|
||
/// @param[out] ret_size Return string size, for cases like NULs inside
|
||
/// a string. May be NULL.
|
||
///
|
||
/// @return Token represented in a string form, in a static buffer (overwritten
|
||
/// on each call).
|
||
const char *viml_pexpr_repr_token(const ParserState *const pstate, const LexExprToken token,
|
||
size_t *const ret_size)
|
||
FUNC_ATTR_WARN_UNUSED_RESULT
|
||
{
|
||
static char ret[1024];
|
||
char *p = ret;
|
||
const char *const e = &ret[1024] - 1;
|
||
#define ADDSTR(...) \
|
||
do { \
|
||
p += snprintf(p, (size_t)(sizeof(ret) - (size_t)(p - ret)), __VA_ARGS__); \
|
||
if (p >= e) { \
|
||
goto viml_pexpr_repr_token_end; \
|
||
} \
|
||
} while (0)
|
||
ADDSTR("%zu:%zu:%s", token.start.line, token.start.col,
|
||
eltkn_type_tab[token.type]);
|
||
switch (token.type) {
|
||
#define TKNARGS(tkn_type, ...) \
|
||
case tkn_type: { \
|
||
ADDSTR(__VA_ARGS__); \
|
||
break; \
|
||
}
|
||
TKNARGS(kExprLexComparison, "(type=%s,ccs=%s,inv=%i)",
|
||
eltkn_cmp_type_tab[token.data.cmp.type],
|
||
ccs_tab[token.data.cmp.ccs],
|
||
(int)token.data.cmp.inv)
|
||
TKNARGS(kExprLexMultiplication, "(type=%s)",
|
||
eltkn_mul_type_tab[token.data.mul.type])
|
||
TKNARGS(kExprLexAssignment, "(type=%s)",
|
||
expr_asgn_type_tab[token.data.ass.type])
|
||
TKNARGS(kExprLexRegister, "(name=%s)", intchar2str(token.data.reg.name))
|
||
case kExprLexDoubleQuotedString:
|
||
TKNARGS(kExprLexSingleQuotedString, "(closed=%i)",
|
||
(int)token.data.str.closed)
|
||
TKNARGS(kExprLexOption, "(scope=%s,name=%.*s)",
|
||
eltkn_opt_scope_tab[token.data.opt.scope],
|
||
(int)token.data.opt.len, token.data.opt.name)
|
||
TKNARGS(kExprLexPlainIdentifier, "(scope=%s,autoload=%i)",
|
||
intchar2str((int)token.data.var.scope),
|
||
(int)token.data.var.autoload)
|
||
TKNARGS(kExprLexNumber, "(is_float=%i,base=%i,val=%lg)",
|
||
(int)token.data.num.is_float,
|
||
(int)token.data.num.base,
|
||
(double)(token.data.num.is_float
|
||
? (double)token.data.num.val.floating
|
||
: (double)token.data.num.val.integer))
|
||
TKNARGS(kExprLexInvalid, "(msg=%s)", token.data.err.msg)
|
||
default:
|
||
// No additional arguments.
|
||
break;
|
||
#undef TKNARGS
|
||
}
|
||
if (pstate == NULL) {
|
||
ADDSTR("::%zu", token.len);
|
||
} else {
|
||
*p++ = ':';
|
||
memmove(p, &pstate->reader.lines.items[token.start.line].data[token.start.col],
|
||
token.len);
|
||
p += token.len;
|
||
*p = NUL;
|
||
}
|
||
#undef ADDSTR
|
||
viml_pexpr_repr_token_end:
|
||
if (ret_size != NULL) {
|
||
*ret_size = (size_t)(p - ret);
|
||
}
|
||
return ret;
|
||
}
|
||
|
||
const char *const east_node_type_tab[] = {
|
||
[kExprNodeMissing] = "Missing",
|
||
[kExprNodeOpMissing] = "OpMissing",
|
||
[kExprNodeTernary] = "Ternary",
|
||
[kExprNodeTernaryValue] = "TernaryValue",
|
||
[kExprNodeRegister] = "Register",
|
||
[kExprNodeSubscript] = "Subscript",
|
||
[kExprNodeListLiteral] = "ListLiteral",
|
||
[kExprNodeUnaryPlus] = "UnaryPlus",
|
||
[kExprNodeBinaryPlus] = "BinaryPlus",
|
||
[kExprNodeNested] = "Nested",
|
||
[kExprNodeCall] = "Call",
|
||
[kExprNodePlainIdentifier] = "PlainIdentifier",
|
||
[kExprNodePlainKey] = "PlainKey",
|
||
[kExprNodeComplexIdentifier] = "ComplexIdentifier",
|
||
[kExprNodeUnknownFigure] = "UnknownFigure",
|
||
[kExprNodeLambda] = "Lambda",
|
||
[kExprNodeDictLiteral] = "DictLiteral",
|
||
[kExprNodeCurlyBracesIdentifier] = "CurlyBracesIdentifier",
|
||
[kExprNodeComma] = "Comma",
|
||
[kExprNodeColon] = "Colon",
|
||
[kExprNodeArrow] = "Arrow",
|
||
[kExprNodeComparison] = "Comparison",
|
||
[kExprNodeConcat] = "Concat",
|
||
[kExprNodeConcatOrSubscript] = "ConcatOrSubscript",
|
||
[kExprNodeInteger] = "Integer",
|
||
[kExprNodeFloat] = "Float",
|
||
[kExprNodeSingleQuotedString] = "SingleQuotedString",
|
||
[kExprNodeDoubleQuotedString] = "DoubleQuotedString",
|
||
[kExprNodeOr] = "Or",
|
||
[kExprNodeAnd] = "And",
|
||
[kExprNodeUnaryMinus] = "UnaryMinus",
|
||
[kExprNodeBinaryMinus] = "BinaryMinus",
|
||
[kExprNodeNot] = "Not",
|
||
[kExprNodeMultiplication] = "Multiplication",
|
||
[kExprNodeDivision] = "Division",
|
||
[kExprNodeMod] = "Mod",
|
||
[kExprNodeOption] = "Option",
|
||
[kExprNodeEnvironment] = "Environment",
|
||
[kExprNodeAssignment] = "Assignment",
|
||
};
|
||
|
||
/// Represent `int` character as a string
|
||
///
|
||
/// Converts
|
||
/// - ASCII digits into '{digit}'
|
||
/// - ASCII printable characters into a single-character strings
|
||
/// - everything else to numbers.
|
||
///
|
||
/// @param[in] ch Character to convert.
|
||
///
|
||
/// @return Converted string, stored in a static buffer (overridden after each
|
||
/// call).
|
||
static const char *intchar2str(const int ch)
|
||
FUNC_ATTR_WARN_UNUSED_RESULT
|
||
{
|
||
static char buf[sizeof(int) * 3 + 1];
|
||
if (' ' <= ch && ch < 0x7f) {
|
||
if (ascii_isdigit(ch)) {
|
||
buf[0] = '\'';
|
||
buf[1] = (char)ch;
|
||
buf[2] = '\'';
|
||
buf[3] = NUL;
|
||
} else {
|
||
buf[0] = (char)ch;
|
||
buf[1] = NUL;
|
||
}
|
||
} else {
|
||
snprintf(buf, sizeof(buf), "%i", ch);
|
||
}
|
||
return buf;
|
||
}
|
||
|
||
#ifdef UNIT_TESTING
|
||
# include <stdio.h>
|
||
|
||
REAL_FATTR_UNUSED
|
||
static inline void viml_pexpr_debug_print_ast_node(const ExprASTNode *const *const eastnode_p,
|
||
const char *const prefix)
|
||
{
|
||
if (*eastnode_p == NULL) {
|
||
fprintf(stderr, "%s %p : NULL\n", prefix, (void *)eastnode_p);
|
||
} else {
|
||
fprintf(stderr, "%s %p : %p : %s : %zu:%zu:%zu\n",
|
||
prefix, (void *)eastnode_p, (void *)(*eastnode_p),
|
||
east_node_type_tab[(*eastnode_p)->type], (*eastnode_p)->start.line,
|
||
(*eastnode_p)->start.col, (*eastnode_p)->len);
|
||
}
|
||
}
|
||
|
||
REAL_FATTR_UNUSED
|
||
static inline void viml_pexpr_debug_print_ast_stack(const ExprASTStack *const ast_stack,
|
||
const char *const msg)
|
||
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE
|
||
{
|
||
fprintf(stderr, "\n%sstack: %zu:\n", msg, kv_size(*ast_stack));
|
||
for (size_t i = 0; i < kv_size(*ast_stack); i++) {
|
||
viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)kv_A(*ast_stack, i),
|
||
"-");
|
||
}
|
||
}
|
||
|
||
REAL_FATTR_UNUSED
|
||
static inline void viml_pexpr_debug_print_token(const ParserState *const pstate,
|
||
const LexExprToken token)
|
||
FUNC_ATTR_ALWAYS_INLINE
|
||
{
|
||
fprintf(stderr, "\ntkn: %s\n", viml_pexpr_repr_token(pstate, token, NULL));
|
||
}
|
||
# define PSTACK(msg) \
|
||
viml_pexpr_debug_print_ast_stack(&ast_stack, #msg)
|
||
# define PSTACK_P(msg) \
|
||
viml_pexpr_debug_print_ast_stack(ast_stack, #msg)
|
||
# define PNODE_P(eastnode_p, msg) \
|
||
viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)eastnode_p, \
|
||
(#msg))
|
||
# define PTOKEN(tkn) \
|
||
viml_pexpr_debug_print_token(pstate, tkn)
|
||
#endif
|
||
|
||
const uint8_t node_maxchildren[] = {
|
||
[kExprNodeMissing] = 0,
|
||
[kExprNodeOpMissing] = 2,
|
||
[kExprNodeTernary] = 2,
|
||
[kExprNodeTernaryValue] = 2,
|
||
[kExprNodeRegister] = 0,
|
||
[kExprNodeSubscript] = 2,
|
||
[kExprNodeListLiteral] = 1,
|
||
[kExprNodeUnaryPlus] = 1,
|
||
[kExprNodeBinaryPlus] = 2,
|
||
[kExprNodeNested] = 1,
|
||
[kExprNodeCall] = 2,
|
||
[kExprNodePlainIdentifier] = 0,
|
||
[kExprNodePlainKey] = 0,
|
||
[kExprNodeComplexIdentifier] = 2,
|
||
[kExprNodeUnknownFigure] = 1,
|
||
[kExprNodeLambda] = 2,
|
||
[kExprNodeDictLiteral] = 1,
|
||
[kExprNodeCurlyBracesIdentifier] = 1,
|
||
[kExprNodeComma] = 2,
|
||
[kExprNodeColon] = 2,
|
||
[kExprNodeArrow] = 2,
|
||
[kExprNodeComparison] = 2,
|
||
[kExprNodeConcat] = 2,
|
||
[kExprNodeConcatOrSubscript] = 2,
|
||
[kExprNodeInteger] = 0,
|
||
[kExprNodeFloat] = 0,
|
||
[kExprNodeSingleQuotedString] = 0,
|
||
[kExprNodeDoubleQuotedString] = 0,
|
||
[kExprNodeOr] = 2,
|
||
[kExprNodeAnd] = 2,
|
||
[kExprNodeUnaryMinus] = 1,
|
||
[kExprNodeBinaryMinus] = 2,
|
||
[kExprNodeNot] = 1,
|
||
[kExprNodeMultiplication] = 2,
|
||
[kExprNodeDivision] = 2,
|
||
[kExprNodeMod] = 2,
|
||
[kExprNodeOption] = 0,
|
||
[kExprNodeEnvironment] = 0,
|
||
[kExprNodeAssignment] = 2,
|
||
};
|
||
|
||
/// Free memory occupied by AST
|
||
///
|
||
/// @param ast AST stack to free.
|
||
void viml_pexpr_free_ast(ExprAST ast)
|
||
{
|
||
ExprASTStack ast_stack;
|
||
kvi_init(ast_stack);
|
||
kvi_push(ast_stack, &ast.root);
|
||
while (kv_size(ast_stack)) {
|
||
ExprASTNode **const cur_node = kv_last(ast_stack);
|
||
#ifndef NDEBUG
|
||
// Explicitly check for AST recursiveness.
|
||
for (size_t i = 0; i < kv_size(ast_stack) - 1; i++) {
|
||
assert(*kv_A(ast_stack, i) != *cur_node);
|
||
}
|
||
#endif
|
||
if (*cur_node == NULL) {
|
||
assert(kv_size(ast_stack) == 1);
|
||
kv_drop(ast_stack, 1);
|
||
} else if ((*cur_node)->children != NULL) {
|
||
#ifndef NDEBUG
|
||
const uint8_t maxchildren = node_maxchildren[(*cur_node)->type];
|
||
assert(maxchildren > 0);
|
||
assert(maxchildren <= 2);
|
||
assert(maxchildren == 1
|
||
? (*cur_node)->children->next == NULL
|
||
: ((*cur_node)->children->next == NULL
|
||
|| (*cur_node)->children->next->next == NULL));
|
||
#endif
|
||
kvi_push(ast_stack, &(*cur_node)->children);
|
||
} else if ((*cur_node)->next != NULL) {
|
||
kvi_push(ast_stack, &(*cur_node)->next);
|
||
} else if (*cur_node != NULL) {
|
||
kv_drop(ast_stack, 1);
|
||
switch ((*cur_node)->type) {
|
||
case kExprNodeDoubleQuotedString:
|
||
case kExprNodeSingleQuotedString:
|
||
xfree((*cur_node)->data.str.value);
|
||
break;
|
||
case kExprNodeMissing:
|
||
case kExprNodeOpMissing:
|
||
case kExprNodeTernary:
|
||
case kExprNodeTernaryValue:
|
||
case kExprNodeRegister:
|
||
case kExprNodeSubscript:
|
||
case kExprNodeListLiteral:
|
||
case kExprNodeUnaryPlus:
|
||
case kExprNodeBinaryPlus:
|
||
case kExprNodeNested:
|
||
case kExprNodeCall:
|
||
case kExprNodePlainIdentifier:
|
||
case kExprNodePlainKey:
|
||
case kExprNodeComplexIdentifier:
|
||
case kExprNodeUnknownFigure:
|
||
case kExprNodeLambda:
|
||
case kExprNodeDictLiteral:
|
||
case kExprNodeCurlyBracesIdentifier:
|
||
case kExprNodeAssignment:
|
||
case kExprNodeComma:
|
||
case kExprNodeColon:
|
||
case kExprNodeArrow:
|
||
case kExprNodeComparison:
|
||
case kExprNodeConcat:
|
||
case kExprNodeConcatOrSubscript:
|
||
case kExprNodeInteger:
|
||
case kExprNodeFloat:
|
||
case kExprNodeOr:
|
||
case kExprNodeAnd:
|
||
case kExprNodeUnaryMinus:
|
||
case kExprNodeBinaryMinus:
|
||
case kExprNodeNot:
|
||
case kExprNodeMultiplication:
|
||
case kExprNodeDivision:
|
||
case kExprNodeMod:
|
||
case kExprNodeOption:
|
||
case kExprNodeEnvironment:
|
||
break;
|
||
}
|
||
xfree(*cur_node);
|
||
*cur_node = NULL;
|
||
}
|
||
}
|
||
kvi_destroy(ast_stack);
|
||
}
|
||
|
||
// Binary operator precedence and associativity:
|
||
//
|
||
// Operator | Precedence | Associativity
|
||
// ---------+------------+-----------------
|
||
// || | 2 | left
|
||
// && | 3 | left
|
||
// cmp* | 4 | not associative
|
||
// + - . | 5 | left
|
||
// * / % | 6 | left
|
||
//
|
||
// * comparison operators:
|
||
//
|
||
// == ==# ==? != !=# !=?
|
||
// =~ =~# =~? !~ !~# !~?
|
||
// > ># >? <= <=# <=?
|
||
// < <# <? >= >=# >=?
|
||
// is is# is? isnot isnot# isnot?
|
||
|
||
/// Allocate a new node and set some of the values
|
||
///
|
||
/// @param[in] type Node type to allocate.
|
||
/// @param[in] level Node level to allocate
|
||
static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type)
|
||
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_MALLOC
|
||
{
|
||
ExprASTNode *ret = xmalloc(sizeof(*ret));
|
||
ret->type = type;
|
||
ret->children = NULL;
|
||
ret->next = NULL;
|
||
return ret;
|
||
}
|
||
|
||
static struct {
|
||
ExprOpLvl lvl;
|
||
ExprOpAssociativity ass;
|
||
} node_type_to_node_props[] = {
|
||
[kExprNodeMissing] = { kEOpLvlInvalid, kEOpAssNo, },
|
||
[kExprNodeOpMissing] = { kEOpLvlMultiplication, kEOpAssNo },
|
||
|
||
[kExprNodeNested] = { kEOpLvlParens, kEOpAssNo },
|
||
// Note: below nodes are kEOpLvlSubscript for “binary operator” itself, but
|
||
// kEOpLvlParens when it comes to inside the parenthesis.
|
||
[kExprNodeCall] = { kEOpLvlParens, kEOpAssNo },
|
||
[kExprNodeSubscript] = { kEOpLvlParens, kEOpAssNo },
|
||
|
||
[kExprNodeUnknownFigure] = { kEOpLvlParens, kEOpAssLeft },
|
||
[kExprNodeLambda] = { kEOpLvlParens, kEOpAssNo },
|
||
[kExprNodeDictLiteral] = { kEOpLvlParens, kEOpAssNo },
|
||
[kExprNodeListLiteral] = { kEOpLvlParens, kEOpAssNo },
|
||
|
||
[kExprNodeArrow] = { kEOpLvlArrow, kEOpAssNo },
|
||
|
||
// Right associativity for comma because this means easier access to arguments
|
||
// list, etc: for "[a, b, c, d]" you can access "a" in one step if it is
|
||
// represented as "list(comma(a, comma(b, comma(c, d))))" then if it is
|
||
// "list(comma(comma(comma(a, b), c), d))" in which case you will need to
|
||
// traverse all three comma() structures. And with comma operator (including
|
||
// actual comma operator from C which is not present in VimL) nobody cares
|
||
// about associativity, only about order of execution.
|
||
[kExprNodeComma] = { kEOpLvlComma, kEOpAssRight },
|
||
|
||
// Colons are not eligible for chaining, so nobody cares about associativity.
|
||
[kExprNodeColon] = { kEOpLvlColon, kEOpAssNo },
|
||
|
||
[kExprNodeTernary] = { kEOpLvlTernary, kEOpAssRight },
|
||
|
||
[kExprNodeOr] = { kEOpLvlOr, kEOpAssLeft },
|
||
|
||
[kExprNodeAnd] = { kEOpLvlAnd, kEOpAssLeft },
|
||
|
||
[kExprNodeTernaryValue] = { kEOpLvlTernaryValue, kEOpAssRight },
|
||
|
||
[kExprNodeComparison] = { kEOpLvlComparison, kEOpAssRight },
|
||
|
||
[kExprNodeBinaryPlus] = { kEOpLvlAddition, kEOpAssLeft },
|
||
[kExprNodeBinaryMinus] = { kEOpLvlAddition, kEOpAssLeft },
|
||
[kExprNodeConcat] = { kEOpLvlAddition, kEOpAssLeft },
|
||
|
||
[kExprNodeMultiplication] = { kEOpLvlMultiplication, kEOpAssLeft },
|
||
[kExprNodeDivision] = { kEOpLvlMultiplication, kEOpAssLeft },
|
||
[kExprNodeMod] = { kEOpLvlMultiplication, kEOpAssLeft },
|
||
|
||
[kExprNodeUnaryPlus] = { kEOpLvlUnary, kEOpAssNo },
|
||
[kExprNodeUnaryMinus] = { kEOpLvlUnary, kEOpAssNo },
|
||
[kExprNodeNot] = { kEOpLvlUnary, kEOpAssNo },
|
||
|
||
[kExprNodeConcatOrSubscript] = { kEOpLvlSubscript, kEOpAssLeft },
|
||
|
||
[kExprNodeCurlyBracesIdentifier] = { kEOpLvlComplexIdentifier, kEOpAssLeft },
|
||
|
||
[kExprNodeAssignment] = { kEOpLvlAssignment, kEOpAssLeft },
|
||
|
||
[kExprNodeComplexIdentifier] = { kEOpLvlValue, kEOpAssLeft },
|
||
|
||
[kExprNodePlainIdentifier] = { kEOpLvlValue, kEOpAssNo },
|
||
[kExprNodePlainKey] = { kEOpLvlValue, kEOpAssNo },
|
||
[kExprNodeRegister] = { kEOpLvlValue, kEOpAssNo },
|
||
[kExprNodeInteger] = { kEOpLvlValue, kEOpAssNo },
|
||
[kExprNodeFloat] = { kEOpLvlValue, kEOpAssNo },
|
||
[kExprNodeDoubleQuotedString] = { kEOpLvlValue, kEOpAssNo },
|
||
[kExprNodeSingleQuotedString] = { kEOpLvlValue, kEOpAssNo },
|
||
[kExprNodeOption] = { kEOpLvlValue, kEOpAssNo },
|
||
[kExprNodeEnvironment] = { kEOpLvlValue, kEOpAssNo },
|
||
};
|
||
|
||
/// Get AST node priority level
|
||
///
|
||
/// Used primary to reduce line length, so keep the name short.
|
||
///
|
||
/// @param[in] node Node to get priority for.
|
||
///
|
||
/// @return Node priority level.
|
||
static inline ExprOpLvl node_lvl(const ExprASTNode node)
|
||
FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
|
||
{
|
||
return node_type_to_node_props[node.type].lvl;
|
||
}
|
||
|
||
/// Get AST node associativity, to be used for operator nodes primary
|
||
///
|
||
/// Used primary to reduce line length, so keep the name short.
|
||
///
|
||
/// @param[in] node Node to get priority for.
|
||
///
|
||
/// @return Node associativity.
|
||
static inline ExprOpAssociativity node_ass(const ExprASTNode node)
|
||
FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
|
||
{
|
||
return node_type_to_node_props[node.type].ass;
|
||
}
|
||
|
||
/// Handle binary operator
|
||
///
|
||
/// This function is responsible for handling priority levels as well.
|
||
///
|
||
/// @param[in] pstate Parser state, used for error reporting.
|
||
/// @param ast_stack AST stack. May be popped of some values and will
|
||
/// definitely receive new ones.
|
||
/// @param bop_node New node to handle.
|
||
/// @param[out] want_node_p New value of want_node.
|
||
/// @param[out] ast_err Location where error is saved, if any.
|
||
///
|
||
/// @return True if no errors occurred, false otherwise.
|
||
static bool viml_pexpr_handle_bop(const ParserState *const pstate, ExprASTStack *const ast_stack,
|
||
ExprASTNode *const bop_node, ExprASTWantedNode *const want_node_p,
|
||
ExprASTError *const ast_err)
|
||
FUNC_ATTR_NONNULL_ALL
|
||
{
|
||
bool ret = true;
|
||
ExprASTNode **top_node_p = NULL;
|
||
ExprASTNode *top_node;
|
||
ExprOpLvl top_node_lvl;
|
||
ExprOpAssociativity top_node_ass;
|
||
assert(kv_size(*ast_stack));
|
||
const ExprOpLvl bop_node_lvl = ((bop_node->type == kExprNodeCall
|
||
|| bop_node->type == kExprNodeSubscript)
|
||
? kEOpLvlSubscript
|
||
: node_lvl(*bop_node));
|
||
#ifndef NDEBUG
|
||
const ExprOpAssociativity bop_node_ass = (
|
||
(bop_node->type == kExprNodeCall
|
||
|| bop_node->type == kExprNodeSubscript)
|
||
? kEOpAssLeft
|
||
: node_ass(*bop_node));
|
||
#endif
|
||
do {
|
||
ExprASTNode **new_top_node_p = kv_last(*ast_stack);
|
||
ExprASTNode *new_top_node = *new_top_node_p;
|
||
assert(new_top_node != NULL);
|
||
const ExprOpLvl new_top_node_lvl = node_lvl(*new_top_node);
|
||
const ExprOpAssociativity new_top_node_ass = node_ass(*new_top_node);
|
||
assert(bop_node_lvl != new_top_node_lvl
|
||
|| bop_node_ass == new_top_node_ass);
|
||
if (top_node_p != NULL
|
||
&& ((bop_node_lvl > new_top_node_lvl
|
||
|| (bop_node_lvl == new_top_node_lvl
|
||
&& new_top_node_ass == kEOpAssNo)))) {
|
||
break;
|
||
}
|
||
kv_drop(*ast_stack, 1);
|
||
top_node_p = new_top_node_p;
|
||
top_node = new_top_node;
|
||
top_node_lvl = new_top_node_lvl;
|
||
top_node_ass = new_top_node_ass;
|
||
if (bop_node_lvl == top_node_lvl && top_node_ass == kEOpAssRight) {
|
||
break;
|
||
}
|
||
} while (kv_size(*ast_stack));
|
||
if (top_node_ass == kEOpAssLeft || top_node_lvl != bop_node_lvl) {
|
||
// outer(op(x,y)) -> outer(new_op(op(x,y),*))
|
||
//
|
||
// Before: top_node_p = outer(*), points to op(x,y)
|
||
// Other stack elements unknown
|
||
//
|
||
// After: top_node_p = outer(*), points to new_op(op(x,y))
|
||
// &bop_node->children->next = new_op(op(x,y),*), points to NULL
|
||
*top_node_p = bop_node;
|
||
bop_node->children = top_node;
|
||
assert(bop_node->children->next == NULL);
|
||
kvi_push(*ast_stack, top_node_p);
|
||
kvi_push(*ast_stack, &bop_node->children->next);
|
||
} else {
|
||
assert(top_node_lvl == bop_node_lvl && top_node_ass == kEOpAssRight);
|
||
assert(top_node->children != NULL && top_node->children->next != NULL);
|
||
// outer(op(x,y)) -> outer(op(x,new_op(y,*)))
|
||
//
|
||
// Before: top_node_p = outer(*), points to op(x,y)
|
||
// Other stack elements unknown
|
||
//
|
||
// After: top_node_p = outer(*), points to op(x,new_op(y))
|
||
// &top_node->children->next = op(x,*), points to new_op(y)
|
||
// &bop_node->children->next = new_op(y,*), points to NULL
|
||
bop_node->children = top_node->children->next;
|
||
top_node->children->next = bop_node;
|
||
assert(bop_node->children->next == NULL);
|
||
kvi_push(*ast_stack, top_node_p);
|
||
kvi_push(*ast_stack, &top_node->children->next);
|
||
kvi_push(*ast_stack, &bop_node->children->next);
|
||
// TODO(ZyX-I): Make this not error, but treat like Python does
|
||
if (bop_node->type == kExprNodeComparison) {
|
||
east_set_error(pstate, ast_err,
|
||
_("E15: Operator is not associative: %.*s"),
|
||
bop_node->start);
|
||
ret = false;
|
||
}
|
||
}
|
||
*want_node_p = kENodeValue;
|
||
return ret;
|
||
}
|
||
|
||
/// ParserPosition literal based on ParserPosition pos with columns shifted
|
||
///
|
||
/// Function does not check whether resulting position is valid.
|
||
///
|
||
/// @param[in] pos Position to shift.
|
||
/// @param[in] shift Number of bytes to shift.
|
||
///
|
||
/// @return Shifted position.
|
||
static inline ParserPosition shifted_pos(const ParserPosition pos, const size_t shift)
|
||
FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT
|
||
{
|
||
return (ParserPosition) { .line = pos.line, .col = pos.col + shift };
|
||
}
|
||
|
||
/// ParserPosition literal based on ParserPosition pos with specified column
|
||
///
|
||
/// Function does not check whether remaining position is valid.
|
||
///
|
||
/// @param[in] pos Position to adjust.
|
||
/// @param[in] new_col New column.
|
||
///
|
||
/// @return Shifted position.
|
||
static inline ParserPosition recol_pos(const ParserPosition pos, const size_t new_col)
|
||
FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT
|
||
{
|
||
return (ParserPosition) { .line = pos.line, .col = new_col };
|
||
}
|
||
|
||
/// Get highlight group name
|
||
#define HL(g) (is_invalid ? "NvimInvalid" #g : "Nvim" #g)
|
||
|
||
/// Highlight current token with the given group
|
||
#define HL_CUR_TOKEN(g) \
|
||
viml_parser_highlight(pstate, cur_token.start, cur_token.len, \
|
||
HL(g))
|
||
|
||
/// Allocate new node, saving some values
|
||
#define NEW_NODE(type) \
|
||
viml_pexpr_new_node(type)
|
||
|
||
/// Set position of the given node to position from the given token
|
||
///
|
||
/// @param cur_node Node to modify.
|
||
/// @param cur_token Token to set position from.
|
||
#define POS_FROM_TOKEN(cur_node, cur_token) \
|
||
do { \
|
||
(cur_node)->start = cur_token.start; \
|
||
(cur_node)->len = cur_token.len; \
|
||
} while (0)
|
||
|
||
/// Allocate new node and set its position from the current token
|
||
///
|
||
/// If previous token happened to contain spacing then it will be included.
|
||
///
|
||
/// @param cur_node Variable to save allocated node to.
|
||
/// @param typ Node type.
|
||
#define NEW_NODE_WITH_CUR_POS(cur_node, typ) \
|
||
do { \
|
||
(cur_node) = NEW_NODE(typ); \
|
||
POS_FROM_TOKEN((cur_node), cur_token); \
|
||
if (prev_token.type == kExprLexSpacing) { \
|
||
(cur_node)->start = prev_token.start; \
|
||
(cur_node)->len += prev_token.len; \
|
||
} \
|
||
} while (0)
|
||
|
||
/// Check whether it is possible to have next expression after current
|
||
///
|
||
/// For :echo: `:echo @a @a` is a valid expression. `:echo (@a @a)` is not.
|
||
#define MAY_HAVE_NEXT_EXPR \
|
||
(kv_size(ast_stack) == 1)
|
||
|
||
/// Add operator node
|
||
///
|
||
/// @param[in] cur_node Node to add.
|
||
#define ADD_OP_NODE(cur_node) \
|
||
is_invalid |= !viml_pexpr_handle_bop(pstate, &ast_stack, cur_node, \
|
||
&want_node, &ast.err)
|
||
|
||
/// Record missing operator: for things like
|
||
///
|
||
/// :echo @a @a
|
||
///
|
||
/// (allowed) or
|
||
///
|
||
/// :echo (@a @a)
|
||
///
|
||
/// (parsed as OpMissing(@a, @a)).
|
||
#define OP_MISSING \
|
||
do { \
|
||
if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { \
|
||
/* Multiple expressions allowed, return without calling */ \
|
||
/* viml_parser_advance(). */ \
|
||
goto viml_pexpr_parse_end; \
|
||
} else { \
|
||
assert(*top_node_p != NULL); \
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Missing operator: %.*s")); \
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOpMissing); \
|
||
cur_node->len = 0; \
|
||
ADD_OP_NODE(cur_node); \
|
||
goto viml_pexpr_parse_process_token; \
|
||
} \
|
||
} while (0)
|
||
|
||
/// Record missing value: for things like "* 5"
|
||
///
|
||
/// @param[in] msg Error message.
|
||
#define ADD_VALUE_IF_MISSING(msg) \
|
||
do { \
|
||
if (want_node == kENodeValue) { \
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, (msg)); \
|
||
NEW_NODE_WITH_CUR_POS((*top_node_p), kExprNodeMissing); \
|
||
(*top_node_p)->len = 0; \
|
||
want_node = kENodeOperator; \
|
||
} \
|
||
} while (0)
|
||
|
||
/// Set AST error, unless AST already is not correct
|
||
///
|
||
/// @param[out] ret_ast AST to set error in.
|
||
/// @param[in] pstate Parser state, used to get error message argument.
|
||
/// @param[in] msg Error message, assumed to be already translated and
|
||
/// containing a single %token "%.*s".
|
||
/// @param[in] start Position at which error occurred.
|
||
static inline void east_set_error(const ParserState *const pstate, ExprASTError *const ret_ast_err,
|
||
const char *const msg, const ParserPosition start)
|
||
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE
|
||
{
|
||
if (ret_ast_err->msg != NULL) {
|
||
return;
|
||
}
|
||
const ParserLine pline = pstate->reader.lines.items[start.line];
|
||
ret_ast_err->msg = msg;
|
||
ret_ast_err->arg_len = (int)(pline.size - start.col);
|
||
ret_ast_err->arg = pline.data ? pline.data + start.col : NULL;
|
||
}
|
||
|
||
/// Set error from the given token and given message
|
||
#define ERROR_FROM_TOKEN_AND_MSG(cur_token, msg) \
|
||
do { \
|
||
is_invalid = true; \
|
||
east_set_error(pstate, &ast.err, msg, cur_token.start); \
|
||
} while (0)
|
||
|
||
/// Like #ERROR_FROM_TOKEN_AND_MSG, but gets position from a node
|
||
#define ERROR_FROM_NODE_AND_MSG(node, msg) \
|
||
do { \
|
||
is_invalid = true; \
|
||
east_set_error(pstate, &ast.err, msg, node->start); \
|
||
} while (0)
|
||
|
||
/// Set error from the given kExprLexInvalid token
|
||
#define ERROR_FROM_TOKEN(cur_token) \
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, cur_token.data.err.msg)
|
||
|
||
/// Select figure brace type, altering highlighting as well if needed
|
||
///
|
||
/// @param[out] node Node to modify type.
|
||
/// @param[in] new_type New type, one of ExprASTNodeType values without
|
||
/// kExprNode prefix.
|
||
/// @param[in] hl Corresponding highlighting, passed as an argument to #HL.
|
||
#define SELECT_FIGURE_BRACE_TYPE(node, new_type, hl) \
|
||
do { \
|
||
ExprASTNode *const node_ = (node); \
|
||
assert(node_->type == kExprNodeUnknownFigure \
|
||
|| node_->type == kExprNode##new_type); \
|
||
node_->type = kExprNode##new_type; \
|
||
if (pstate->colors) { \
|
||
kv_A(*pstate->colors, node_->data.fig.opening_hl_idx).group = \
|
||
HL(hl); \
|
||
} \
|
||
} while (0)
|
||
|
||
/// Add identifier which should constitute complex identifier node
|
||
///
|
||
/// This one is to be called only in case want_node is kENodeOperator.
|
||
///
|
||
/// @param new_ident_node_code Code used to create a new identifier node and
|
||
/// update want_node and ast_stack, without
|
||
/// a trailing semicolon.
|
||
/// @param hl Highlighting name to use, passed as an argument to #HL.
|
||
#define ADD_IDENT(new_ident_node_code, hl) \
|
||
do { \
|
||
assert(want_node == kENodeOperator); \
|
||
/* Operator: may only be curly braces name, but only under certain */ \
|
||
/* conditions. */ \
|
||
/* First condition is that there is no space before a part of complex */ \
|
||
/* identifier. */ \
|
||
if (prev_token.type == kExprLexSpacing) { \
|
||
OP_MISSING; \
|
||
} \
|
||
switch ((*top_node_p)->type) { \
|
||
/* Second is that previous node is one of the identifiers: */ \
|
||
/* complex, plain, curly braces. */ \
|
||
/* TODO(ZyX-I): Extend syntax to allow ${expr}. This is needed to */ \
|
||
/* handle environment variables like those bash uses for */ \
|
||
/* `export -f`: their names consist not only of alphanumeric */ \
|
||
/* characters. */ \
|
||
case kExprNodeComplexIdentifier: \
|
||
case kExprNodePlainIdentifier: \
|
||
case kExprNodeCurlyBracesIdentifier: { \
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComplexIdentifier); \
|
||
cur_node->len = 0; \
|
||
cur_node->children = *top_node_p; \
|
||
*top_node_p = cur_node; \
|
||
kvi_push(ast_stack, &cur_node->children->next); \
|
||
ExprASTNode **const new_top_node_p = kv_last(ast_stack); \
|
||
assert(*new_top_node_p == NULL); \
|
||
new_ident_node_code; \
|
||
*new_top_node_p = cur_node; \
|
||
HL_CUR_TOKEN(hl); \
|
||
break; \
|
||
} \
|
||
default: { \
|
||
OP_MISSING; \
|
||
break; \
|
||
} \
|
||
} \
|
||
} while (0)
|
||
|
||
/// Determine whether given parse type is an assignment
|
||
///
|
||
/// @param[in] pt Checked parse type.
|
||
///
|
||
/// @return true if parsing an assignment, false otherwise.
|
||
static inline bool pt_is_assignment(const ExprASTParseType pt)
|
||
FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
|
||
{
|
||
return (pt == kEPTAssignment || pt == kEPTSingleAssignment);
|
||
}
|
||
|
||
/// Structure used to define “string shifts” necessary to map string
|
||
/// highlighting to actual strings.
|
||
typedef struct {
|
||
size_t start; ///< Where special character starts in original string.
|
||
size_t orig_len; ///< Length of orininal string (e.g. 4 for "\x80").
|
||
size_t act_len; ///< Length of resulting character(s) (e.g. 1 for "\x80").
|
||
bool escape_not_known; ///< True if escape sequence in original is not known.
|
||
} StringShift;
|
||
|
||
/// Parse and highlight single- or double-quoted string
|
||
///
|
||
/// Function is supposed to detect and highlight regular expressions (but does
|
||
/// not do now).
|
||
///
|
||
/// @param[out] pstate Parser state which also contains a place where
|
||
/// highlighting is saved.
|
||
/// @param[out] node Node where string parsing results are saved.
|
||
/// @param[in] token Token to highlight.
|
||
/// @param[in] ast_stack Parser AST stack, used to detect whether current
|
||
/// string is a regex.
|
||
/// @param[in] is_invalid Whether currently processed token is not valid.
|
||
static void parse_quoted_string(ParserState *const pstate, ExprASTNode *const node,
|
||
const LexExprToken token, const ExprASTStack *ast_stack,
|
||
const bool is_invalid)
|
||
FUNC_ATTR_NONNULL_ALL
|
||
{
|
||
const ParserLine pline = pstate->reader.lines.items[token.start.line];
|
||
const char *const s = pline.data + token.start.col;
|
||
const char *const e = s + token.len - token.data.str.closed;
|
||
const char *p = s + 1;
|
||
const bool is_double = (token.type == kExprLexDoubleQuotedString);
|
||
size_t size = token.len - token.data.str.closed - 1;
|
||
kvec_withinit_t(StringShift, 16) shifts;
|
||
kvi_init(shifts);
|
||
if (!is_double) {
|
||
viml_parser_highlight(pstate, token.start, 1, HL(SingleQuote));
|
||
while (p < e) {
|
||
const char *const chunk_e = memchr(p, '\'', (size_t)(e - p));
|
||
if (chunk_e == NULL) {
|
||
break;
|
||
}
|
||
size--;
|
||
p = chunk_e + 2;
|
||
if (pstate->colors) {
|
||
kvi_push(shifts, ((StringShift) {
|
||
.start = token.start.col + (size_t)(chunk_e - s),
|
||
.orig_len = 2,
|
||
.act_len = 1,
|
||
.escape_not_known = false,
|
||
}));
|
||
}
|
||
}
|
||
node->data.str.size = size;
|
||
if (size == 0) {
|
||
node->data.str.value = NULL;
|
||
} else {
|
||
char *v_p;
|
||
v_p = node->data.str.value = xmallocz(size);
|
||
p = s + 1;
|
||
while (p < e) {
|
||
const char *const chunk_e = memchr(p, '\'', (size_t)(e - p));
|
||
if (chunk_e == NULL) {
|
||
memcpy(v_p, p, (size_t)(e - p));
|
||
break;
|
||
}
|
||
memcpy(v_p, p, (size_t)(chunk_e - p));
|
||
v_p += (size_t)(chunk_e - p) + 1;
|
||
v_p[-1] = '\'';
|
||
p = chunk_e + 2;
|
||
}
|
||
}
|
||
} else {
|
||
viml_parser_highlight(pstate, token.start, 1, HL(DoubleQuote));
|
||
for (p = s + 1; p < e; p++) {
|
||
if (*p == '\\' && p + 1 < e) {
|
||
p++;
|
||
if (p + 1 == e) {
|
||
size--;
|
||
break;
|
||
}
|
||
switch (*p) {
|
||
// A "\<x>" form occupies at least 4 characters, and produces up to
|
||
// to 9 characters (6 for the char and 3 for a modifier):
|
||
// reserve space for 5 extra, but do not compute actual length
|
||
// just now, it would be costly.
|
||
case '<':
|
||
size += 5;
|
||
break;
|
||
// Hexadecimal, always single byte, but at least three bytes each.
|
||
case 'x':
|
||
case 'X':
|
||
size--;
|
||
if (ascii_isxdigit(p[1])) {
|
||
size--;
|
||
if (p + 2 < e && ascii_isxdigit(p[2])) {
|
||
size--;
|
||
}
|
||
}
|
||
break;
|
||
// Unicode
|
||
//
|
||
// \uF takes 1 byte which is 2 bytes less then escape sequence.
|
||
// \uFF: 2 bytes, 2 bytes less.
|
||
// \uFFF: 3 bytes, 2 bytes less.
|
||
// \uFFFF: 3 bytes, 3 bytes less.
|
||
// \UFFFFF: 4 bytes, 3 bytes less.
|
||
// \UFFFFFF: 5 bytes, 3 bytes less.
|
||
// \UFFFFFFF: 6 bytes, 3 bytes less.
|
||
// \U7FFFFFFF: 6 bytes, 4 bytes less.
|
||
case 'u':
|
||
case 'U': {
|
||
const char *const esc_start = p;
|
||
size_t n = (*p == 'u' ? 4 : 8);
|
||
int nr = 0;
|
||
p++;
|
||
while (p + 1 < e && n-- && ascii_isxdigit(p[1])) {
|
||
p++;
|
||
nr = (nr << 4) + hex2nr(*p);
|
||
}
|
||
// Escape length: (esc_start - 1) points to "\\", esc_start to "u"
|
||
// or "U", p to the byte after last byte. So escape sequence
|
||
// occupies p - (esc_start - 1), but it stands for a utf_char2len
|
||
// bytes.
|
||
size -= (size_t)((p - (esc_start - 1)) - utf_char2len(nr));
|
||
p--;
|
||
break;
|
||
}
|
||
// Octal, always single byte, but at least two bytes each.
|
||
case '0':
|
||
case '1':
|
||
case '2':
|
||
case '3':
|
||
case '4':
|
||
case '5':
|
||
case '6':
|
||
case '7':
|
||
size--;
|
||
p++;
|
||
if (*p >= '0' && *p <= '7') {
|
||
size--;
|
||
p++;
|
||
if (p < e && *p >= '0' && *p <= '7') {
|
||
size--;
|
||
p++;
|
||
}
|
||
}
|
||
break;
|
||
default:
|
||
size--;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
if (size == 0) {
|
||
node->data.str.value = NULL;
|
||
node->data.str.size = 0;
|
||
} else {
|
||
char *v_p;
|
||
v_p = node->data.str.value = xmalloc(size);
|
||
p = s + 1;
|
||
while (p < e) {
|
||
const char *const chunk_e = memchr(p, '\\', (size_t)(e - p));
|
||
if (chunk_e == NULL) {
|
||
memcpy(v_p, p, (size_t)(e - p));
|
||
v_p += e - p;
|
||
break;
|
||
}
|
||
memcpy(v_p, p, (size_t)(chunk_e - p));
|
||
v_p += (size_t)(chunk_e - p);
|
||
p = chunk_e + 1;
|
||
if (p == e) {
|
||
*v_p++ = '\\';
|
||
break;
|
||
}
|
||
bool is_unknown = false;
|
||
const char *const v_p_start = v_p;
|
||
switch (*p) {
|
||
#define SINGLE_CHAR_ESC(ch, real_ch) \
|
||
case ch: { \
|
||
*v_p++ = real_ch; \
|
||
p++; \
|
||
break; \
|
||
}
|
||
SINGLE_CHAR_ESC('b', BS)
|
||
SINGLE_CHAR_ESC('e', ESC)
|
||
SINGLE_CHAR_ESC('f', FF)
|
||
SINGLE_CHAR_ESC('n', NL)
|
||
SINGLE_CHAR_ESC('r', CAR)
|
||
SINGLE_CHAR_ESC('t', TAB)
|
||
SINGLE_CHAR_ESC('"', '"')
|
||
SINGLE_CHAR_ESC('\\', '\\')
|
||
#undef SINGLE_CHAR_ESC
|
||
|
||
// Hexadecimal or unicode.
|
||
case 'X':
|
||
case 'x':
|
||
case 'u':
|
||
case 'U':
|
||
if (p + 1 < e && ascii_isxdigit(p[1])) {
|
||
size_t n;
|
||
int nr;
|
||
bool is_hex = (*p == 'x' || *p == 'X');
|
||
|
||
if (is_hex) {
|
||
n = 2;
|
||
} else if (*p == 'u') {
|
||
n = 4;
|
||
} else {
|
||
n = 8;
|
||
}
|
||
nr = 0;
|
||
while (p + 1 < e && n-- && ascii_isxdigit(p[1])) {
|
||
p++;
|
||
nr = (nr << 4) + hex2nr(*p);
|
||
}
|
||
p++;
|
||
if (is_hex) {
|
||
*v_p++ = (char)nr;
|
||
} else {
|
||
v_p += utf_char2bytes(nr, v_p);
|
||
}
|
||
} else {
|
||
is_unknown = true;
|
||
*v_p++ = *p;
|
||
p++;
|
||
}
|
||
break;
|
||
// Octal: "\1", "\12", "\123".
|
||
case '0':
|
||
case '1':
|
||
case '2':
|
||
case '3':
|
||
case '4':
|
||
case '5':
|
||
case '6':
|
||
case '7': {
|
||
uint8_t ch = (uint8_t)(*p++ - '0');
|
||
if (p < e && *p >= '0' && *p <= '7') {
|
||
ch = (uint8_t)((ch << 3) + *p++ - '0');
|
||
if (p < e && *p >= '0' && *p <= '7') {
|
||
ch = (uint8_t)((ch << 3) + *p++ - '0');
|
||
}
|
||
}
|
||
*v_p++ = (char)ch;
|
||
break;
|
||
}
|
||
// Special key, e.g.: "\<C-W>"
|
||
case '<': {
|
||
int flags = FSK_KEYCODE | FSK_IN_STRING;
|
||
|
||
if (p[1] != '*') {
|
||
flags |= FSK_SIMPLIFY;
|
||
}
|
||
const size_t special_len = trans_special((const char_u **)&p, (size_t)(e - p),
|
||
(char_u *)v_p, flags, false, NULL);
|
||
if (special_len != 0) {
|
||
v_p += special_len;
|
||
} else {
|
||
is_unknown = true;
|
||
mb_copy_char(&p, &v_p);
|
||
}
|
||
break;
|
||
}
|
||
default:
|
||
is_unknown = true;
|
||
mb_copy_char(&p, &v_p);
|
||
break;
|
||
}
|
||
if (pstate->colors) {
|
||
kvi_push(shifts, ((StringShift) {
|
||
.start = token.start.col + (size_t)(chunk_e - s),
|
||
.orig_len = (size_t)(p - chunk_e),
|
||
.act_len = (size_t)(v_p - (char *)v_p_start),
|
||
.escape_not_known = is_unknown,
|
||
}));
|
||
}
|
||
}
|
||
node->data.str.size = (size_t)(v_p - node->data.str.value);
|
||
}
|
||
}
|
||
if (pstate->colors) {
|
||
// TODO(ZyX-I): use ast_stack to determine and highlight regular expressions
|
||
// TODO(ZyX-I): use ast_stack to determine and highlight printf format str
|
||
// TODO(ZyX-I): use ast_stack to determine and highlight expression strings
|
||
size_t next_col = token.start.col + 1;
|
||
const char *const body_str = (is_double
|
||
? HL(DoubleQuotedBody)
|
||
: HL(SingleQuotedBody));
|
||
const char *const esc_str = (is_double
|
||
? HL(DoubleQuotedEscape)
|
||
: HL(SingleQuotedQuote));
|
||
const char *const ukn_esc_str = (is_double
|
||
? HL(DoubleQuotedUnknownEscape)
|
||
: HL(SingleQuotedUnknownEscape));
|
||
for (size_t i = 0; i < kv_size(shifts); i++) {
|
||
const StringShift cur_shift = kv_A(shifts, i);
|
||
if (cur_shift.start > next_col) {
|
||
viml_parser_highlight(pstate, recol_pos(token.start, next_col),
|
||
cur_shift.start - next_col,
|
||
body_str);
|
||
}
|
||
viml_parser_highlight(pstate, recol_pos(token.start, cur_shift.start),
|
||
cur_shift.orig_len,
|
||
(cur_shift.escape_not_known
|
||
? ukn_esc_str
|
||
: esc_str));
|
||
next_col = cur_shift.start + cur_shift.orig_len;
|
||
}
|
||
if (next_col - token.start.col < token.len - token.data.str.closed) {
|
||
viml_parser_highlight(pstate, recol_pos(token.start, next_col),
|
||
(token.start.col
|
||
+ token.len
|
||
- token.data.str.closed
|
||
- next_col),
|
||
body_str);
|
||
}
|
||
}
|
||
if (token.data.str.closed) {
|
||
if (is_double) {
|
||
viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1),
|
||
1, HL(DoubleQuote));
|
||
} else {
|
||
viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1),
|
||
1, HL(SingleQuote));
|
||
}
|
||
}
|
||
kvi_destroy(shifts);
|
||
}
|
||
|
||
/// Additional flags to pass to lexer depending on want_node
|
||
static const int want_node_to_lexer_flags[] = {
|
||
[kENodeValue] = kELFlagIsNotCmp,
|
||
[kENodeOperator] = kELFlagForbidScope,
|
||
};
|
||
|
||
/// Number of characters to highlight as NumberPrefix depending on the base
|
||
static const uint8_t base_to_prefix_length[] = {
|
||
[2] = 2,
|
||
[8] = 1,
|
||
[10] = 0,
|
||
[16] = 2,
|
||
};
|
||
|
||
/// Parse one VimL expression
|
||
///
|
||
/// @param pstate Parser state.
|
||
/// @param[in] flags Additional flags, see ExprParserFlags
|
||
///
|
||
/// @return Parsed AST.
|
||
ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags)
|
||
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
|
||
{
|
||
ExprAST ast = {
|
||
.err = {
|
||
.msg = NULL,
|
||
.arg_len = 0,
|
||
.arg = NULL,
|
||
},
|
||
.root = NULL,
|
||
};
|
||
// Expression stack contains current branch in AST tree: that is
|
||
// - Stack item 0 contains root of the tree, i.e. &ast->root.
|
||
// - Stack item i points to the previous stack items’ last child.
|
||
//
|
||
// When parser expects “value” node that is something like identifier or "["
|
||
// (list start) last stack item contains NULL. Otherwise last stack item is
|
||
// supposed to contain last “finished” value: e.g. "1" or "+(1, 1)" (node
|
||
// representing "1+1").
|
||
ExprASTStack ast_stack;
|
||
kvi_init(ast_stack);
|
||
kvi_push(ast_stack, &ast.root);
|
||
ExprASTWantedNode want_node = kENodeValue;
|
||
ExprASTParseTypeStack pt_stack;
|
||
kvi_init(pt_stack);
|
||
kvi_push(pt_stack, kEPTExpr);
|
||
if (flags & kExprFlagsParseLet) {
|
||
kvi_push(pt_stack, kEPTAssignment);
|
||
}
|
||
LexExprToken prev_token = { .type = kExprLexMissing };
|
||
bool highlighted_prev_spacing = false;
|
||
// Lambda node, valid when parsing lambda arguments only.
|
||
ExprASTNode *lambda_node = NULL;
|
||
size_t asgn_level = 0;
|
||
do {
|
||
const bool is_concat_or_subscript = (
|
||
want_node == kENodeValue
|
||
&& kv_size(ast_stack) > 1
|
||
&& (*kv_Z(ast_stack,
|
||
1))->type == kExprNodeConcatOrSubscript);
|
||
const int lexer_additional_flags = (
|
||
kELFlagPeek
|
||
| ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0)
|
||
| ((want_node == kENodeValue
|
||
&& (kv_size(ast_stack) == 1
|
||
|| ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat
|
||
&& ((*kv_Z(ast_stack, 1))->type
|
||
!= kExprNodeConcatOrSubscript))))
|
||
? kELFlagAllowFloat
|
||
: 0));
|
||
LexExprToken cur_token = viml_pexpr_next_token(pstate,
|
||
want_node_to_lexer_flags[want_node] |
|
||
lexer_additional_flags);
|
||
if (cur_token.type == kExprLexEOC) {
|
||
break;
|
||
}
|
||
LexExprTokenType tok_type = cur_token.type;
|
||
const bool token_invalid = (tok_type == kExprLexInvalid);
|
||
bool is_invalid = token_invalid;
|
||
viml_pexpr_parse_process_token:
|
||
// May use different flags this time.
|
||
cur_token = viml_pexpr_next_token(pstate,
|
||
want_node_to_lexer_flags[want_node] | lexer_additional_flags);
|
||
if (tok_type == kExprLexSpacing) {
|
||
if (is_invalid) {
|
||
HL_CUR_TOKEN(Spacing);
|
||
} else {
|
||
// Do not do anything: let regular spacing be highlighted as normal.
|
||
// This also allows later to highlight spacing as invalid.
|
||
}
|
||
goto viml_pexpr_parse_cycle_end;
|
||
} else if (is_invalid && prev_token.type == kExprLexSpacing
|
||
&& !highlighted_prev_spacing) {
|
||
viml_parser_highlight(pstate, prev_token.start, prev_token.len,
|
||
HL(Spacing));
|
||
is_invalid = false;
|
||
highlighted_prev_spacing = true;
|
||
}
|
||
const ParserLine pline = pstate->reader.lines.items[cur_token.start.line];
|
||
ExprASTNode **const top_node_p = kv_last(ast_stack);
|
||
assert(kv_size(ast_stack) >= 1);
|
||
ExprASTNode *cur_node = NULL;
|
||
#ifndef NDEBUG
|
||
const bool want_value = (want_node == kENodeValue);
|
||
assert(want_value == (*top_node_p == NULL));
|
||
assert(kv_A(ast_stack, 0) == &ast.root);
|
||
// Check that stack item i + 1 points to stack items’ i *last* child.
|
||
for (size_t i = 0; i + 1 < kv_size(ast_stack); i++) {
|
||
const bool item_null = (want_value && i + 2 == kv_size(ast_stack));
|
||
assert((&(*kv_A(ast_stack, i))->children == kv_A(ast_stack, i + 1)
|
||
&& (item_null
|
||
? (*kv_A(ast_stack, i))->children == NULL
|
||
: (*kv_A(ast_stack, i))->children->next == NULL))
|
||
|| ((&(*kv_A(ast_stack, i))->children->next
|
||
== kv_A(ast_stack, i + 1))
|
||
&& (item_null
|
||
? (*kv_A(ast_stack, i))->children->next == NULL
|
||
: (*kv_A(ast_stack, i))->children->next->next == NULL)));
|
||
}
|
||
#endif
|
||
// Note: in Vim whether expression "cond?d.a:2" is valid depends both on
|
||
// "cond" and whether "d" is a dictionary: expression is valid if condition
|
||
// is true and "d" is a dictionary (with "a" key or it will complain about
|
||
// missing one, but this is not relevant); if any of the requirements is
|
||
// broken then this thing is parsed as "d . a:2" yielding missing colon
|
||
// error. This parser does not allow such ambiguity, especially because it
|
||
// simply can’t: whether "d" is a dictionary is not known at the parsing
|
||
// time.
|
||
//
|
||
// Here example will always contain a concat with "a:2" sucking colon,
|
||
// making expression invalid both because there is no longer a spare colon
|
||
// for ternary and because concatenating dictionary with anything is not
|
||
// valid. There are more cases when this will make a difference though.
|
||
const bool node_is_key = (
|
||
is_concat_or_subscript
|
||
&& (cur_token.type == kExprLexPlainIdentifier
|
||
? (!cur_token.data.var.autoload
|
||
&& cur_token.data.var.scope == kExprVarScopeMissing)
|
||
: (cur_token.type == kExprLexNumber))
|
||
&& prev_token.type != kExprLexSpacing);
|
||
if (is_concat_or_subscript && !node_is_key) {
|
||
// Note: in Vim "d. a" (this is the reason behind `prev_token.type !=
|
||
// kExprLexSpacing` part of the condition) as well as any other "d.{expr}"
|
||
// where "{expr}" does not look like a key is invalid whenever "d" happens
|
||
// to be a dictionary. Since parser has no idea whether preceding
|
||
// expression is actually a dictionary it can’t outright reject anything,
|
||
// so it turns kExprNodeConcatOrSubscript into kExprNodeConcat instead,
|
||
// which will yield different errors then Vim does in a number of
|
||
// circumstances, and in any case runtime and not parse time errors.
|
||
(*kv_Z(ast_stack, 1))->type = kExprNodeConcat;
|
||
}
|
||
// Pop some stack pt_stack items in case of misplaced nodes.
|
||
const bool is_single_assignment = kv_last(pt_stack) == kEPTSingleAssignment;
|
||
switch (kv_last(pt_stack)) {
|
||
case kEPTExpr:
|
||
break;
|
||
case kEPTLambdaArguments:
|
||
if ((want_node == kENodeOperator
|
||
&& tok_type != kExprLexComma
|
||
&& tok_type != kExprLexArrow)
|
||
|| (want_node == kENodeValue
|
||
&& !(cur_token.type == kExprLexPlainIdentifier
|
||
&& cur_token.data.var.scope == kExprVarScopeMissing
|
||
&& !cur_token.data.var.autoload)
|
||
&& tok_type != kExprLexArrow)) {
|
||
lambda_node->data.fig.type_guesses.allow_lambda = false;
|
||
if (lambda_node->children != NULL
|
||
&& lambda_node->children->type == kExprNodeComma) {
|
||
// If lambda has comma child this means that parser has already seen
|
||
// at least "{arg1,", so node cannot possibly be anything, but
|
||
// lambda.
|
||
|
||
// Vim may give E121 or E720 in this case, but it does not look
|
||
// right to have either because both are results of reevaluation
|
||
// possibly-lambda node as a dictionary and here this is not going
|
||
// to happen.
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token,
|
||
_("E15: Expected lambda arguments list or arrow: %.*s"));
|
||
} else {
|
||
// Else it may appear that possibly-lambda node is actually
|
||
// a dictionary or curly-braces-name identifier.
|
||
lambda_node = NULL;
|
||
kv_drop(pt_stack, 1);
|
||
}
|
||
}
|
||
break;
|
||
case kEPTSingleAssignment:
|
||
case kEPTAssignment:
|
||
if (want_node == kENodeValue
|
||
&& tok_type != kExprLexBracket
|
||
&& tok_type != kExprLexPlainIdentifier
|
||
&& (tok_type != kExprLexFigureBrace || cur_token.data.brc.closing)
|
||
&& !(node_is_key && tok_type == kExprLexNumber)
|
||
&& tok_type != kExprLexEnv
|
||
&& tok_type != kExprLexOption
|
||
&& tok_type != kExprLexRegister) {
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token,
|
||
_("E15: Expected value part of assignment lvalue: %.*s"));
|
||
kv_drop(pt_stack, 1);
|
||
} else if (want_node == kENodeOperator
|
||
&& tok_type != kExprLexBracket
|
||
&& (tok_type != kExprLexFigureBrace
|
||
|| cur_token.data.brc.closing)
|
||
&& tok_type != kExprLexDot
|
||
&& (tok_type != kExprLexComma || !is_single_assignment)
|
||
&& tok_type != kExprLexAssignment
|
||
// Curly brace identifiers: will contain plain identifier or
|
||
// another curly brace in position where operator is wanted.
|
||
&& !((tok_type == kExprLexPlainIdentifier
|
||
|| (tok_type == kExprLexFigureBrace
|
||
&& !cur_token.data.brc.closing))
|
||
&& prev_token.type != kExprLexSpacing)) {
|
||
if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) {
|
||
goto viml_pexpr_parse_end;
|
||
}
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token,
|
||
_("E15: Expected assignment operator or subscript: %.*s"));
|
||
kv_drop(pt_stack, 1);
|
||
}
|
||
assert(kv_size(pt_stack));
|
||
break;
|
||
}
|
||
assert(kv_size(pt_stack));
|
||
const ExprASTParseType cur_pt = kv_last(pt_stack);
|
||
assert(lambda_node == NULL || cur_pt == kEPTLambdaArguments);
|
||
#define SIMPLE_UB_OP(op) \
|
||
case kExprLex##op: { \
|
||
if (want_node == kENodeValue) { \
|
||
/* Value level: assume unary operator. */ \
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnary##op); \
|
||
*top_node_p = cur_node; \
|
||
kvi_push(ast_stack, &cur_node->children); \
|
||
HL_CUR_TOKEN(Unary##op); \
|
||
} else { \
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinary##op); \
|
||
ADD_OP_NODE(cur_node); \
|
||
HL_CUR_TOKEN(Binary##op); \
|
||
} \
|
||
want_node = kENodeValue; \
|
||
break; \
|
||
}
|
||
switch (tok_type) {
|
||
case kExprLexMissing:
|
||
case kExprLexSpacing:
|
||
case kExprLexEOC:
|
||
abort();
|
||
case kExprLexInvalid:
|
||
ERROR_FROM_TOKEN(cur_token);
|
||
tok_type = cur_token.data.err.type;
|
||
goto viml_pexpr_parse_process_token;
|
||
case kExprLexRegister: {
|
||
if (want_node == kENodeOperator) {
|
||
// Register in operator position: e.g. @a @a
|
||
OP_MISSING;
|
||
}
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeRegister);
|
||
cur_node->data.reg.name = cur_token.data.reg.name;
|
||
*top_node_p = cur_node;
|
||
want_node = kENodeOperator;
|
||
HL_CUR_TOKEN(Register);
|
||
break;
|
||
}
|
||
SIMPLE_UB_OP(Plus)
|
||
SIMPLE_UB_OP(Minus)
|
||
#undef SIMPLE_UB_OP
|
||
#define SIMPLE_B_OP(op, msg) \
|
||
case kExprLex##op: { \
|
||
ADD_VALUE_IF_MISSING(_("E15: Unexpected " msg ": %.*s")); \
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##op); \
|
||
HL_CUR_TOKEN(op); \
|
||
ADD_OP_NODE(cur_node); \
|
||
break; \
|
||
}
|
||
SIMPLE_B_OP(Or, "or operator")
|
||
SIMPLE_B_OP(And, "and operator")
|
||
#undef SIMPLE_B_OP
|
||
case kExprLexMultiplication:
|
||
ADD_VALUE_IF_MISSING(_("E15: Unexpected multiplication-like operator: %.*s"));
|
||
switch (cur_token.data.mul.type) {
|
||
#define MUL_OP(lex_op_tail, node_op_tail) \
|
||
case kExprLexMul##lex_op_tail: { \
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##node_op_tail); \
|
||
HL_CUR_TOKEN(node_op_tail); \
|
||
break; \
|
||
}
|
||
MUL_OP(Mul, Multiplication)
|
||
MUL_OP(Div, Division)
|
||
MUL_OP(Mod, Mod)
|
||
#undef MUL_OP
|
||
}
|
||
ADD_OP_NODE(cur_node);
|
||
break;
|
||
case kExprLexOption: {
|
||
if (want_node == kENodeOperator) {
|
||
OP_MISSING;
|
||
}
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOption);
|
||
if (cur_token.type == kExprLexInvalid) {
|
||
assert(cur_token.len == 1
|
||
|| (cur_token.len == 3
|
||
&& pline.data[cur_token.start.col + 2] == ':'));
|
||
cur_node->data.opt.ident = (
|
||
pline.data + cur_token.start.col + cur_token.len);
|
||
cur_node->data.opt.ident_len = 0;
|
||
cur_node->data.opt.scope = (
|
||
cur_token.len == 3
|
||
? (ExprOptScope)pline.data[cur_token.start.col + 1]
|
||
: kExprOptScopeUnspecified);
|
||
} else {
|
||
cur_node->data.opt.ident = cur_token.data.opt.name;
|
||
cur_node->data.opt.ident_len = cur_token.data.opt.len;
|
||
cur_node->data.opt.scope = cur_token.data.opt.scope;
|
||
}
|
||
*top_node_p = cur_node;
|
||
want_node = kENodeOperator;
|
||
viml_parser_highlight(pstate, cur_token.start, 1, HL(OptionSigil));
|
||
const size_t scope_shift = (
|
||
cur_token.data.opt.scope == kExprOptScopeUnspecified ? 0 : 2);
|
||
if (scope_shift) {
|
||
viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1,
|
||
HL(OptionScope));
|
||
viml_parser_highlight(pstate, shifted_pos(cur_token.start, 2), 1,
|
||
HL(OptionScopeDelimiter));
|
||
}
|
||
viml_parser_highlight(pstate, shifted_pos(cur_token.start, scope_shift + 1),
|
||
cur_token.len - (scope_shift + 1), HL(OptionName));
|
||
break;
|
||
}
|
||
case kExprLexEnv:
|
||
if (want_node == kENodeOperator) {
|
||
OP_MISSING;
|
||
}
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeEnvironment);
|
||
cur_node->data.env.ident = pline.data + cur_token.start.col + 1;
|
||
cur_node->data.env.ident_len = cur_token.len - 1;
|
||
if (cur_node->data.env.ident_len == 0) {
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token,
|
||
_("E15: Environment variable name missing"));
|
||
}
|
||
*top_node_p = cur_node;
|
||
want_node = kENodeOperator;
|
||
viml_parser_highlight(pstate, cur_token.start, 1, HL(EnvironmentSigil));
|
||
viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1),
|
||
cur_token.len - 1, HL(EnvironmentName));
|
||
break;
|
||
case kExprLexNot:
|
||
if (want_node == kENodeOperator) {
|
||
OP_MISSING;
|
||
}
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNot);
|
||
*top_node_p = cur_node;
|
||
kvi_push(ast_stack, &cur_node->children);
|
||
HL_CUR_TOKEN(Not);
|
||
break;
|
||
case kExprLexComparison:
|
||
ADD_VALUE_IF_MISSING(_("E15: Expected value, got comparison operator: %.*s"));
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComparison);
|
||
if (cur_token.type == kExprLexInvalid) {
|
||
cur_node->data.cmp.ccs = kCCStrategyUseOption;
|
||
cur_node->data.cmp.type = kExprCmpEqual;
|
||
cur_node->data.cmp.inv = false;
|
||
} else {
|
||
cur_node->data.cmp.ccs = cur_token.data.cmp.ccs;
|
||
cur_node->data.cmp.type = cur_token.data.cmp.type;
|
||
cur_node->data.cmp.inv = cur_token.data.cmp.inv;
|
||
}
|
||
ADD_OP_NODE(cur_node);
|
||
if (cur_token.data.cmp.ccs != kCCStrategyUseOption) {
|
||
viml_parser_highlight(pstate, cur_token.start, cur_token.len - 1,
|
||
HL(Comparison));
|
||
viml_parser_highlight(pstate, shifted_pos(cur_token.start, cur_token.len - 1), 1,
|
||
HL(ComparisonModifier));
|
||
} else {
|
||
HL_CUR_TOKEN(Comparison);
|
||
}
|
||
want_node = kENodeValue;
|
||
break;
|
||
case kExprLexComma:
|
||
assert(!(want_node == kENodeValue && cur_pt == kEPTLambdaArguments));
|
||
if (want_node == kENodeValue) {
|
||
// Value level: comma appearing here is not valid.
|
||
// Note: in Vim string(,x) will give E116, this is not the case here.
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value, got comma: %.*s"));
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing);
|
||
cur_node->len = 0;
|
||
*top_node_p = cur_node;
|
||
want_node = kENodeOperator;
|
||
}
|
||
if (cur_pt == kEPTLambdaArguments) {
|
||
assert(lambda_node != NULL);
|
||
assert(lambda_node->data.fig.type_guesses.allow_lambda);
|
||
SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda);
|
||
}
|
||
if (kv_size(ast_stack) < 2) {
|
||
goto viml_pexpr_parse_invalid_comma;
|
||
}
|
||
for (size_t i = 1; i < kv_size(ast_stack); i++) {
|
||
ExprASTNode *const *const eastnode_p =
|
||
(ExprASTNode *const *)kv_Z(ast_stack, i);
|
||
const ExprASTNodeType eastnode_type = (*eastnode_p)->type;
|
||
const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p);
|
||
if (eastnode_type == kExprNodeLambda) {
|
||
assert(cur_pt == kEPTLambdaArguments
|
||
&& want_node == kENodeOperator);
|
||
break;
|
||
} else if (eastnode_type == kExprNodeDictLiteral
|
||
|| eastnode_type == kExprNodeListLiteral
|
||
|| eastnode_type == kExprNodeCall) {
|
||
break;
|
||
} else if (eastnode_type == kExprNodeComma
|
||
|| eastnode_type == kExprNodeColon
|
||
|| eastnode_lvl > kEOpLvlComma) {
|
||
// Do nothing
|
||
} else {
|
||
viml_pexpr_parse_invalid_comma:
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token,
|
||
_("E15: Comma outside of call, lambda or literal: %.*s"));
|
||
break;
|
||
}
|
||
if (i == kv_size(ast_stack) - 1) {
|
||
goto viml_pexpr_parse_invalid_comma;
|
||
}
|
||
}
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComma);
|
||
ADD_OP_NODE(cur_node);
|
||
HL_CUR_TOKEN(Comma);
|
||
break;
|
||
#define EXP_VAL_COLON "E15: Expected value, got colon: %.*s"
|
||
case kExprLexColon: {
|
||
bool is_ternary = false;
|
||
if (kv_size(ast_stack) < 2) {
|
||
goto viml_pexpr_parse_invalid_colon;
|
||
}
|
||
bool can_be_ternary = true;
|
||
bool is_subscript = false;
|
||
for (size_t i = 1; i < kv_size(ast_stack); i++) {
|
||
ExprASTNode *const *const eastnode_p =
|
||
(ExprASTNode *const *)kv_Z(ast_stack, i);
|
||
const ExprASTNodeType eastnode_type = (*eastnode_p)->type;
|
||
const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p);
|
||
STATIC_ASSERT(kEOpLvlTernary > kEOpLvlComma,
|
||
"Unexpected operator priorities");
|
||
if (can_be_ternary && eastnode_type == kExprNodeTernaryValue
|
||
&& !(*eastnode_p)->data.ter.got_colon) {
|
||
kv_drop(ast_stack, i);
|
||
(*eastnode_p)->start = cur_token.start;
|
||
(*eastnode_p)->len = cur_token.len;
|
||
if (prev_token.type == kExprLexSpacing) {
|
||
(*eastnode_p)->start = prev_token.start;
|
||
(*eastnode_p)->len += prev_token.len;
|
||
}
|
||
is_ternary = true;
|
||
(*eastnode_p)->data.ter.got_colon = true;
|
||
ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON));
|
||
assert((*eastnode_p)->children != NULL);
|
||
assert((*eastnode_p)->children->next == NULL);
|
||
kvi_push(ast_stack, &(*eastnode_p)->children->next);
|
||
break;
|
||
} else if (eastnode_type == kExprNodeUnknownFigure) {
|
||
SELECT_FIGURE_BRACE_TYPE(*eastnode_p, DictLiteral, Dict);
|
||
break;
|
||
} else if (eastnode_type == kExprNodeDictLiteral) {
|
||
break;
|
||
} else if (eastnode_type == kExprNodeSubscript) {
|
||
is_subscript = true;
|
||
// can_be_ternary = false;
|
||
assert(!is_ternary);
|
||
break;
|
||
} else if (eastnode_type == kExprNodeColon) {
|
||
goto viml_pexpr_parse_invalid_colon;
|
||
} else if (eastnode_lvl >= kEOpLvlTernaryValue) {
|
||
// Do nothing
|
||
} else if (eastnode_lvl >= kEOpLvlComma) {
|
||
can_be_ternary = false;
|
||
} else {
|
||
goto viml_pexpr_parse_invalid_colon;
|
||
}
|
||
if (i == kv_size(ast_stack) - 1) {
|
||
goto viml_pexpr_parse_invalid_colon;
|
||
}
|
||
}
|
||
if (is_subscript) {
|
||
assert(kv_size(ast_stack) > 1);
|
||
// Colon immediately following subscript start: it is empty subscript
|
||
// part like a[:2].
|
||
if (want_node == kENodeValue
|
||
&& (*kv_Z(ast_stack, 1))->type == kExprNodeSubscript) {
|
||
NEW_NODE_WITH_CUR_POS(*top_node_p, kExprNodeMissing);
|
||
(*top_node_p)->len = 0;
|
||
want_node = kENodeOperator;
|
||
} else {
|
||
ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON));
|
||
}
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon);
|
||
ADD_OP_NODE(cur_node);
|
||
HL_CUR_TOKEN(SubscriptColon);
|
||
} else {
|
||
goto viml_pexpr_parse_valid_colon;
|
||
viml_pexpr_parse_invalid_colon:
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token,
|
||
_("E15: Colon outside of dictionary or ternary operator: %.*s"));
|
||
viml_pexpr_parse_valid_colon:
|
||
ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON));
|
||
if (is_ternary) {
|
||
HL_CUR_TOKEN(TernaryColon);
|
||
} else {
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon);
|
||
ADD_OP_NODE(cur_node);
|
||
HL_CUR_TOKEN(Colon);
|
||
}
|
||
}
|
||
want_node = kENodeValue;
|
||
break;
|
||
}
|
||
#undef EXP_VAL_COLON
|
||
case kExprLexBracket:
|
||
if (cur_token.data.brc.closing) {
|
||
ExprASTNode **new_top_node_p = NULL;
|
||
// Always drop the topmost value:
|
||
//
|
||
// 1. When want_node != kENodeValue topmost item on stack is
|
||
// a *finished* left operand, which may as well be "{@a}" which
|
||
// needs not be finished again.
|
||
// 2. Otherwise it is pointing to NULL what nobody wants.
|
||
kv_drop(ast_stack, 1);
|
||
if (!kv_size(ast_stack)) {
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral);
|
||
cur_node->len = 0;
|
||
if (want_node != kENodeValue) {
|
||
cur_node->children = *top_node_p;
|
||
}
|
||
*top_node_p = cur_node;
|
||
goto viml_pexpr_parse_bracket_closing_error;
|
||
}
|
||
if (want_node == kENodeValue) {
|
||
// It is OK to want value if
|
||
//
|
||
// 1. It is empty list literal, in which case top node will be
|
||
// ListLiteral.
|
||
// 2. It is list literal with trailing comma, in which case top node
|
||
// will be that comma.
|
||
// 3. It is subscript with colon, but without one of the values:
|
||
// e.g. "a[:]", "a[1:]", top node will be colon in this case.
|
||
if ((*kv_last(ast_stack))->type != kExprNodeListLiteral
|
||
&& (*kv_last(ast_stack))->type != kExprNodeComma
|
||
&& (*kv_last(ast_stack))->type != kExprNodeColon) {
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token,
|
||
_("E15: Expected value, got closing bracket: %.*s"));
|
||
}
|
||
}
|
||
do {
|
||
new_top_node_p = kv_pop(ast_stack);
|
||
} while (kv_size(ast_stack)
|
||
&& (new_top_node_p == NULL
|
||
|| ((*new_top_node_p)->type != kExprNodeListLiteral
|
||
&& (*new_top_node_p)->type != kExprNodeSubscript)));
|
||
ExprASTNode *new_top_node = *new_top_node_p;
|
||
switch (new_top_node->type) {
|
||
case kExprNodeListLiteral:
|
||
if (pt_is_assignment(cur_pt) && new_top_node->children == NULL) {
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E475: Unable to assign to empty list: %.*s"));
|
||
}
|
||
HL_CUR_TOKEN(List);
|
||
break;
|
||
case kExprNodeSubscript:
|
||
HL_CUR_TOKEN(SubscriptBracket);
|
||
break;
|
||
default:
|
||
viml_pexpr_parse_bracket_closing_error:
|
||
assert(!kv_size(ast_stack));
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing figure brace: %.*s"));
|
||
HL_CUR_TOKEN(List);
|
||
break;
|
||
}
|
||
kvi_push(ast_stack, new_top_node_p);
|
||
want_node = kENodeOperator;
|
||
if (kv_size(ast_stack) <= asgn_level) {
|
||
assert(kv_size(ast_stack) == asgn_level);
|
||
asgn_level = 0;
|
||
if (cur_pt == kEPTAssignment) {
|
||
assert(ast.err.msg);
|
||
} else if (cur_pt == kEPTExpr
|
||
&& kv_size(pt_stack) > 1
|
||
&& pt_is_assignment(kv_Z(pt_stack, 1))) {
|
||
kv_drop(pt_stack, 1);
|
||
}
|
||
}
|
||
if (cur_pt == kEPTSingleAssignment && kv_size(ast_stack) == 1) {
|
||
kv_drop(pt_stack, 1);
|
||
}
|
||
} else {
|
||
if (want_node == kENodeValue) {
|
||
// Value means list literal or list assignment.
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral);
|
||
*top_node_p = cur_node;
|
||
kvi_push(ast_stack, &cur_node->children);
|
||
want_node = kENodeValue;
|
||
if (cur_pt == kEPTAssignment) {
|
||
// Additional assignment parse type allows to easily forbid nested
|
||
// lists.
|
||
kvi_push(pt_stack, kEPTSingleAssignment);
|
||
} else if (cur_pt == kEPTSingleAssignment) {
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token,
|
||
_("E475: Nested lists not allowed when assigning: %.*s"));
|
||
}
|
||
HL_CUR_TOKEN(List);
|
||
} else {
|
||
// Operator means subscript, also in assignment. But in assignment
|
||
// subscript may be pretty much any expression, so need to push
|
||
// kEPTExpr.
|
||
if (prev_token.type == kExprLexSpacing) {
|
||
OP_MISSING;
|
||
}
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeSubscript);
|
||
ADD_OP_NODE(cur_node);
|
||
HL_CUR_TOKEN(SubscriptBracket);
|
||
if (pt_is_assignment(cur_pt)) {
|
||
assert(want_node == kENodeValue); // Subtract 1 for NULL at top.
|
||
asgn_level = kv_size(ast_stack) - 1;
|
||
kvi_push(pt_stack, kEPTExpr);
|
||
}
|
||
}
|
||
}
|
||
break;
|
||
case kExprLexFigureBrace:
|
||
if (cur_token.data.brc.closing) {
|
||
ExprASTNode **new_top_node_p = NULL;
|
||
// Always drop the topmost value:
|
||
//
|
||
// 1. When want_node != kENodeValue topmost item on stack is
|
||
// a *finished* left operand, which may as well be "{@a}" which
|
||
// needs not be finished again.
|
||
// 2. Otherwise it is pointing to NULL what nobody wants.
|
||
kv_drop(ast_stack, 1);
|
||
if (!kv_size(ast_stack)) {
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure);
|
||
cur_node->data.fig.type_guesses.allow_lambda = false;
|
||
cur_node->data.fig.type_guesses.allow_dict = false;
|
||
cur_node->data.fig.type_guesses.allow_ident = false;
|
||
cur_node->len = 0;
|
||
if (want_node != kENodeValue) {
|
||
cur_node->children = *top_node_p;
|
||
}
|
||
*top_node_p = cur_node;
|
||
new_top_node_p = top_node_p;
|
||
goto viml_pexpr_parse_figure_brace_closing_error;
|
||
}
|
||
if (want_node == kENodeValue) {
|
||
if ((*kv_last(ast_stack))->type != kExprNodeUnknownFigure
|
||
&& (*kv_last(ast_stack))->type != kExprNodeComma) {
|
||
// kv_last being UnknownFigure may occur for empty dictionary
|
||
// literal, while Comma is expected in case of non-empty one.
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token,
|
||
_("E15: Expected value, got closing figure brace: %.*s"));
|
||
}
|
||
}
|
||
do {
|
||
new_top_node_p = kv_pop(ast_stack);
|
||
} while (kv_size(ast_stack)
|
||
&& (new_top_node_p == NULL
|
||
|| ((*new_top_node_p)->type != kExprNodeUnknownFigure
|
||
&& (*new_top_node_p)->type != kExprNodeDictLiteral
|
||
&& ((*new_top_node_p)->type
|
||
!= kExprNodeCurlyBracesIdentifier)
|
||
&& (*new_top_node_p)->type != kExprNodeLambda)));
|
||
ExprASTNode *new_top_node = *new_top_node_p;
|
||
switch (new_top_node->type) {
|
||
case kExprNodeUnknownFigure:
|
||
if (new_top_node->children == NULL) {
|
||
// No children of curly braces node indicates empty dictionary.
|
||
assert(want_node == kENodeValue);
|
||
assert(new_top_node->data.fig.type_guesses.allow_dict);
|
||
SELECT_FIGURE_BRACE_TYPE(new_top_node, DictLiteral, Dict);
|
||
HL_CUR_TOKEN(Dict);
|
||
} else if (new_top_node->data.fig.type_guesses.allow_ident) {
|
||
SELECT_FIGURE_BRACE_TYPE(new_top_node, CurlyBracesIdentifier,
|
||
Curly);
|
||
HL_CUR_TOKEN(Curly);
|
||
} else {
|
||
// If by this time type of the node has not already been
|
||
// guessed, but it definitely is not a curly braces name then
|
||
// it is invalid for sure.
|
||
ERROR_FROM_NODE_AND_MSG(new_top_node,
|
||
_("E15: Don't know what figure brace means: %.*s"));
|
||
if (pstate->colors) {
|
||
// Will reset to NvimInvalidFigureBrace.
|
||
kv_A(*pstate->colors,
|
||
new_top_node->data.fig.opening_hl_idx).group = (
|
||
HL(FigureBrace));
|
||
}
|
||
HL_CUR_TOKEN(FigureBrace);
|
||
}
|
||
break;
|
||
case kExprNodeDictLiteral:
|
||
HL_CUR_TOKEN(Dict);
|
||
break;
|
||
case kExprNodeCurlyBracesIdentifier:
|
||
HL_CUR_TOKEN(Curly);
|
||
break;
|
||
case kExprNodeLambda:
|
||
HL_CUR_TOKEN(Lambda);
|
||
break;
|
||
default:
|
||
viml_pexpr_parse_figure_brace_closing_error:
|
||
assert(!kv_size(ast_stack));
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing figure brace: %.*s"));
|
||
HL_CUR_TOKEN(FigureBrace);
|
||
break;
|
||
}
|
||
kvi_push(ast_stack, new_top_node_p);
|
||
want_node = kENodeOperator;
|
||
if (kv_size(ast_stack) <= asgn_level) {
|
||
assert(kv_size(ast_stack) == asgn_level);
|
||
if (cur_pt == kEPTExpr
|
||
&& kv_size(pt_stack) > 1
|
||
&& pt_is_assignment(kv_Z(pt_stack, 1))) {
|
||
kv_drop(pt_stack, 1);
|
||
asgn_level = 0;
|
||
}
|
||
}
|
||
} else {
|
||
if (want_node == kENodeValue) {
|
||
HL_CUR_TOKEN(FigureBrace);
|
||
// Value: may be any of lambda, dictionary literal and curly braces
|
||
// name.
|
||
|
||
// Though if we are in an assignment this may only be a curly braces
|
||
// name.
|
||
if (pt_is_assignment(cur_pt)) {
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCurlyBracesIdentifier);
|
||
cur_node->data.fig.type_guesses.allow_lambda = false;
|
||
cur_node->data.fig.type_guesses.allow_dict = false;
|
||
cur_node->data.fig.type_guesses.allow_ident = true;
|
||
kvi_push(pt_stack, kEPTExpr);
|
||
} else {
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure);
|
||
cur_node->data.fig.type_guesses.allow_lambda = true;
|
||
cur_node->data.fig.type_guesses.allow_dict = true;
|
||
cur_node->data.fig.type_guesses.allow_ident = true;
|
||
}
|
||
if (pstate->colors) {
|
||
cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors) - 1;
|
||
}
|
||
*top_node_p = cur_node;
|
||
kvi_push(ast_stack, &cur_node->children);
|
||
kvi_push(pt_stack, kEPTLambdaArguments);
|
||
lambda_node = cur_node;
|
||
} else {
|
||
// uncrustify:off
|
||
ADD_IDENT(do {
|
||
NEW_NODE_WITH_CUR_POS(cur_node,
|
||
kExprNodeCurlyBracesIdentifier);
|
||
cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors);
|
||
cur_node->data.fig.type_guesses.allow_lambda = false;
|
||
cur_node->data.fig.type_guesses.allow_dict = false;
|
||
cur_node->data.fig.type_guesses.allow_ident = true;
|
||
kvi_push(ast_stack, &cur_node->children);
|
||
if (pt_is_assignment(cur_pt)) {
|
||
kvi_push(pt_stack, kEPTExpr);
|
||
}
|
||
want_node = kENodeValue;
|
||
} while (0),
|
||
Curly);
|
||
// uncrustify:on
|
||
}
|
||
if (pt_is_assignment(cur_pt)
|
||
&& !pt_is_assignment(kv_last(pt_stack))) {
|
||
assert(want_node == kENodeValue); // Subtract 1 for NULL at top.
|
||
asgn_level = kv_size(ast_stack) - 1;
|
||
}
|
||
}
|
||
break;
|
||
case kExprLexArrow:
|
||
if (cur_pt == kEPTLambdaArguments) {
|
||
kv_drop(pt_stack, 1);
|
||
assert(kv_size(pt_stack));
|
||
if (want_node == kENodeValue) {
|
||
// Wanting value means trailing comma and NULL at the top of the
|
||
// stack.
|
||
kv_drop(ast_stack, 1);
|
||
}
|
||
assert(kv_size(ast_stack) >= 1);
|
||
while ((*kv_last(ast_stack))->type != kExprNodeLambda
|
||
&& (*kv_last(ast_stack))->type != kExprNodeUnknownFigure) {
|
||
kv_drop(ast_stack, 1);
|
||
}
|
||
assert((*kv_last(ast_stack)) == lambda_node);
|
||
SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda);
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow);
|
||
if (lambda_node->children == NULL) {
|
||
assert(want_node == kENodeValue);
|
||
lambda_node->children = cur_node;
|
||
kvi_push(ast_stack, &lambda_node->children);
|
||
} else {
|
||
assert(lambda_node->children->next == NULL);
|
||
lambda_node->children->next = cur_node;
|
||
kvi_push(ast_stack, &lambda_node->children->next);
|
||
}
|
||
kvi_push(ast_stack, &cur_node->children);
|
||
lambda_node = NULL;
|
||
} else {
|
||
// Only first branch is valid.
|
||
ADD_VALUE_IF_MISSING(_("E15: Unexpected arrow: %.*s"));
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Arrow outside of lambda: %.*s"));
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow);
|
||
ADD_OP_NODE(cur_node);
|
||
}
|
||
want_node = kENodeValue;
|
||
HL_CUR_TOKEN(Arrow);
|
||
break;
|
||
case kExprLexPlainIdentifier: {
|
||
const ExprVarScope scope = (cur_token.type == kExprLexInvalid
|
||
? kExprVarScopeMissing
|
||
: cur_token.data.var.scope);
|
||
if (want_node == kENodeValue) {
|
||
want_node = kENodeOperator;
|
||
NEW_NODE_WITH_CUR_POS(cur_node,
|
||
(node_is_key
|
||
? kExprNodePlainKey
|
||
: kExprNodePlainIdentifier));
|
||
cur_node->data.var.scope = scope;
|
||
const size_t scope_shift = (scope == kExprVarScopeMissing ? 0 : 2);
|
||
cur_node->data.var.ident = (pline.data + cur_token.start.col
|
||
+ scope_shift);
|
||
cur_node->data.var.ident_len = cur_token.len - scope_shift;
|
||
*top_node_p = cur_node;
|
||
if (scope_shift) {
|
||
assert(!node_is_key);
|
||
viml_parser_highlight(pstate, cur_token.start, 1,
|
||
HL(IdentifierScope));
|
||
viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1,
|
||
HL(IdentifierScopeDelimiter));
|
||
}
|
||
viml_parser_highlight(pstate, shifted_pos(cur_token.start,
|
||
scope_shift),
|
||
cur_token.len - scope_shift,
|
||
(node_is_key
|
||
? HL(IdentifierKey)
|
||
: HL(IdentifierName)));
|
||
} else {
|
||
if (scope == kExprVarScopeMissing) {
|
||
// uncrustify:off
|
||
ADD_IDENT(do {
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier);
|
||
cur_node->data.var.scope = scope;
|
||
cur_node->data.var.ident = pline.data + cur_token.start.col;
|
||
cur_node->data.var.ident_len = cur_token.len;
|
||
want_node = kENodeOperator;
|
||
} while (0),
|
||
IdentifierName);
|
||
// uncrustify:on
|
||
} else {
|
||
OP_MISSING;
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
case kExprLexNumber:
|
||
if (want_node != kENodeValue) {
|
||
OP_MISSING;
|
||
}
|
||
if (node_is_key) {
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainKey);
|
||
cur_node->data.var.ident = pline.data + cur_token.start.col;
|
||
cur_node->data.var.ident_len = cur_token.len;
|
||
HL_CUR_TOKEN(IdentifierKey);
|
||
} else if (cur_token.data.num.is_float) {
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeFloat);
|
||
cur_node->data.flt.value = cur_token.data.num.val.floating;
|
||
HL_CUR_TOKEN(Float);
|
||
} else {
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger);
|
||
cur_node->data.num.value = cur_token.data.num.val.integer;
|
||
const uint8_t prefix_length = base_to_prefix_length[
|
||
cur_token.data.num.base];
|
||
viml_parser_highlight(pstate, cur_token.start, prefix_length,
|
||
HL(NumberPrefix));
|
||
viml_parser_highlight(pstate, shifted_pos(cur_token.start, prefix_length),
|
||
cur_token.len - prefix_length, HL(Number));
|
||
}
|
||
want_node = kENodeOperator;
|
||
*top_node_p = cur_node;
|
||
break;
|
||
case kExprLexDot:
|
||
ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s"));
|
||
if (prev_token.type == kExprLexSpacing) {
|
||
if (cur_pt == kEPTAssignment) {
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Cannot concatenate in assignments: %.*s"));
|
||
}
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat);
|
||
HL_CUR_TOKEN(Concat);
|
||
} else {
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcatOrSubscript);
|
||
HL_CUR_TOKEN(ConcatOrSubscript);
|
||
}
|
||
ADD_OP_NODE(cur_node);
|
||
break;
|
||
case kExprLexParenthesis:
|
||
if (cur_token.data.brc.closing) {
|
||
if (want_node == kENodeValue) {
|
||
if (kv_size(ast_stack) > 1) {
|
||
const ExprASTNode *const prev_top_node = *kv_Z(ast_stack, 1);
|
||
if (prev_top_node->type == kExprNodeCall) {
|
||
// Function call without arguments, this is not an error.
|
||
// But further code does not expect NULL nodes.
|
||
kv_drop(ast_stack, 1);
|
||
goto viml_pexpr_parse_no_paren_closing_error;
|
||
}
|
||
}
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value, got parenthesis: %.*s"));
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing);
|
||
cur_node->len = 0;
|
||
*top_node_p = cur_node;
|
||
} else {
|
||
// Always drop the topmost value: when want_node != kENodeValue
|
||
// topmost item on stack is a *finished* left operand, which may as
|
||
// well be "(@a)" which needs not be finished again.
|
||
kv_drop(ast_stack, 1);
|
||
}
|
||
viml_pexpr_parse_no_paren_closing_error: {}
|
||
ExprASTNode **new_top_node_p = NULL;
|
||
while (kv_size(ast_stack)
|
||
&& (new_top_node_p == NULL
|
||
|| ((*new_top_node_p)->type != kExprNodeNested
|
||
&& (*new_top_node_p)->type != kExprNodeCall))) {
|
||
new_top_node_p = kv_pop(ast_stack);
|
||
}
|
||
if (new_top_node_p != NULL
|
||
&& ((*new_top_node_p)->type == kExprNodeNested
|
||
|| (*new_top_node_p)->type == kExprNodeCall)) {
|
||
if ((*new_top_node_p)->type == kExprNodeNested) {
|
||
HL_CUR_TOKEN(NestingParenthesis);
|
||
} else {
|
||
HL_CUR_TOKEN(CallingParenthesis);
|
||
}
|
||
} else {
|
||
// “Always drop the topmost value” branch has got rid of the single
|
||
// value stack had, so there is nothing known to enclose. Correct
|
||
// this.
|
||
if (new_top_node_p == NULL) {
|
||
new_top_node_p = top_node_p;
|
||
}
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing parenthesis: %.*s"));
|
||
HL_CUR_TOKEN(NestingParenthesis);
|
||
cur_node = NEW_NODE(kExprNodeNested);
|
||
cur_node->start = cur_token.start;
|
||
cur_node->len = 0;
|
||
// Unexpected closing parenthesis, assume that it was wanted to
|
||
// enclose everything in ().
|
||
cur_node->children = *new_top_node_p;
|
||
*new_top_node_p = cur_node;
|
||
assert(cur_node->next == NULL);
|
||
}
|
||
kvi_push(ast_stack, new_top_node_p);
|
||
want_node = kENodeOperator;
|
||
} else {
|
||
switch (want_node) {
|
||
case kENodeValue:
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNested);
|
||
*top_node_p = cur_node;
|
||
kvi_push(ast_stack, &cur_node->children);
|
||
HL_CUR_TOKEN(NestingParenthesis);
|
||
break;
|
||
case kENodeOperator:
|
||
if (prev_token.type == kExprLexSpacing) {
|
||
// For some reason "function (args)" is a function call, but
|
||
// "(funcref) (args)" is not. AFAIR this somehow involves
|
||
// compatibility and Bram was commenting that this is
|
||
// intentionally inconsistent and he is not very happy with the
|
||
// situation himself.
|
||
if ((*top_node_p)->type != kExprNodePlainIdentifier
|
||
&& (*top_node_p)->type != kExprNodeComplexIdentifier
|
||
&& (*top_node_p)->type != kExprNodeCurlyBracesIdentifier) {
|
||
OP_MISSING;
|
||
}
|
||
}
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCall);
|
||
ADD_OP_NODE(cur_node);
|
||
HL_CUR_TOKEN(CallingParenthesis);
|
||
break;
|
||
}
|
||
want_node = kENodeValue;
|
||
}
|
||
break;
|
||
case kExprLexQuestion: {
|
||
ADD_VALUE_IF_MISSING(_("E15: Expected value, got question mark: %.*s"));
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeTernary);
|
||
ADD_OP_NODE(cur_node);
|
||
HL_CUR_TOKEN(Ternary);
|
||
ExprASTNode *ter_val_node;
|
||
NEW_NODE_WITH_CUR_POS(ter_val_node, kExprNodeTernaryValue);
|
||
ter_val_node->data.ter.got_colon = false;
|
||
assert(cur_node->children != NULL);
|
||
assert(cur_node->children->next == NULL);
|
||
assert(kv_last(ast_stack) == &cur_node->children->next);
|
||
*kv_last(ast_stack) = ter_val_node;
|
||
kvi_push(ast_stack, &ter_val_node->children);
|
||
break;
|
||
}
|
||
case kExprLexDoubleQuotedString:
|
||
case kExprLexSingleQuotedString: {
|
||
const bool is_double = (tok_type == kExprLexDoubleQuotedString);
|
||
if (!cur_token.data.str.closed) {
|
||
// It is weird, but Vim has two identical errors messages with
|
||
// different error numbers: "E114: Missing quote" and
|
||
// "E115: Missing quote".
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, (is_double
|
||
? _("E114: Missing double quote: %.*s")
|
||
: _("E115: Missing single quote: %.*s")));
|
||
}
|
||
if (want_node == kENodeOperator) {
|
||
OP_MISSING;
|
||
}
|
||
NEW_NODE_WITH_CUR_POS(cur_node, (is_double
|
||
? kExprNodeDoubleQuotedString
|
||
: kExprNodeSingleQuotedString));
|
||
*top_node_p = cur_node;
|
||
parse_quoted_string(pstate, cur_node, cur_token, &ast_stack, is_invalid);
|
||
want_node = kENodeOperator;
|
||
break;
|
||
}
|
||
case kExprLexAssignment:
|
||
if (cur_pt == kEPTAssignment) {
|
||
kv_drop(pt_stack, 1);
|
||
} else if (cur_pt == kEPTSingleAssignment) {
|
||
kv_drop(pt_stack, 2);
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token,
|
||
_("E475: Expected closing bracket to end list assignment "
|
||
"lvalue: %.*s"));
|
||
} else {
|
||
ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Misplaced assignment: %.*s"));
|
||
}
|
||
assert(kv_size(pt_stack));
|
||
assert(kv_last(pt_stack) == kEPTExpr);
|
||
ADD_VALUE_IF_MISSING(_("E15: Unexpected assignment: %.*s"));
|
||
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeAssignment);
|
||
cur_node->data.ass.type = cur_token.data.ass.type;
|
||
switch (cur_token.data.ass.type) {
|
||
#define HL_ASGN(asgn, hl) \
|
||
case kExprAsgn##asgn: { HL_CUR_TOKEN(hl); break; }
|
||
HL_ASGN(Plain, PlainAssignment)
|
||
HL_ASGN(Add, AssignmentWithAddition)
|
||
HL_ASGN(Subtract, AssignmentWithSubtraction)
|
||
HL_ASGN(Concat, AssignmentWithConcatenation)
|
||
#undef HL_ASGN
|
||
}
|
||
ADD_OP_NODE(cur_node);
|
||
break;
|
||
}
|
||
viml_pexpr_parse_cycle_end:
|
||
prev_token = cur_token;
|
||
highlighted_prev_spacing = false;
|
||
viml_parser_advance(pstate, cur_token.len);
|
||
} while (true);
|
||
viml_pexpr_parse_end:
|
||
assert(kv_size(pt_stack));
|
||
assert(kv_size(ast_stack));
|
||
if (want_node == kENodeValue
|
||
// Blacklist some parse type entries as their presence means better error
|
||
// message in the other branch.
|
||
&& kv_last(pt_stack) != kEPTLambdaArguments) {
|
||
east_set_error(pstate, &ast.err, _("E15: Expected value, got EOC: %.*s"),
|
||
pstate->pos);
|
||
} else if (kv_size(ast_stack) != 1) {
|
||
// Something may be wrong, check whether it really is.
|
||
|
||
// Pointer to ast.root must never be dropped, so “!= 1” is expected to be
|
||
// the same as “> 1”.
|
||
assert(kv_size(ast_stack));
|
||
// Topmost stack item must be a *finished* value, so it must not be
|
||
// analyzed. E.g. it may contain an already finished nested expression.
|
||
kv_drop(ast_stack, 1);
|
||
while (ast.err.msg == NULL && kv_size(ast_stack)) {
|
||
const ExprASTNode *const cur_node = (*kv_pop(ast_stack));
|
||
// This should only happen when want_node == kENodeValue.
|
||
assert(cur_node != NULL);
|
||
// TODO(ZyX-I): Rehighlight as invalid?
|
||
switch (cur_node->type) {
|
||
case kExprNodeOpMissing:
|
||
case kExprNodeMissing:
|
||
// Error should’ve been already reported.
|
||
break;
|
||
case kExprNodeCall:
|
||
east_set_error(pstate, &ast.err,
|
||
_("E116: Missing closing parenthesis for function call: %.*s"),
|
||
cur_node->start);
|
||
break;
|
||
case kExprNodeNested:
|
||
east_set_error(pstate, &ast.err,
|
||
_("E110: Missing closing parenthesis for nested expression"
|
||
": %.*s"),
|
||
cur_node->start);
|
||
break;
|
||
case kExprNodeListLiteral:
|
||
// For whatever reason "[1" yields "E696: Missing comma in list" error
|
||
// in Vim while "[1," yields E697.
|
||
east_set_error(pstate, &ast.err,
|
||
_("E697: Missing end of List ']': %.*s"),
|
||
cur_node->start);
|
||
break;
|
||
case kExprNodeDictLiteral:
|
||
// Same problem like with list literal with E722 (missing comma) vs
|
||
// E723, but additionally just "{" yields only E15.
|
||
east_set_error(pstate, &ast.err,
|
||
_("E723: Missing end of Dictionary '}': %.*s"),
|
||
cur_node->start);
|
||
break;
|
||
case kExprNodeUnknownFigure:
|
||
east_set_error(pstate, &ast.err,
|
||
_("E15: Missing closing figure brace: %.*s"),
|
||
cur_node->start);
|
||
break;
|
||
case kExprNodeLambda:
|
||
east_set_error(pstate, &ast.err,
|
||
_("E15: Missing closing figure brace for lambda: %.*s"),
|
||
cur_node->start);
|
||
break;
|
||
case kExprNodeCurlyBracesIdentifier:
|
||
// Until trailing "}" it is impossible to distinguish curly braces
|
||
// identifier and dictionary, so it must not appear in the stack like
|
||
// this.
|
||
abort();
|
||
case kExprNodeInteger:
|
||
case kExprNodeFloat:
|
||
case kExprNodeSingleQuotedString:
|
||
case kExprNodeDoubleQuotedString:
|
||
case kExprNodeOption:
|
||
case kExprNodeEnvironment:
|
||
case kExprNodeRegister:
|
||
case kExprNodePlainIdentifier:
|
||
case kExprNodePlainKey:
|
||
// These are plain values and not containers, for them it should only
|
||
// be possible to show up in the topmost stack element, but it was
|
||
// unconditionally popped at the start.
|
||
abort();
|
||
case kExprNodeComma:
|
||
case kExprNodeColon:
|
||
case kExprNodeArrow:
|
||
// It is actually only valid inside something else, but everything
|
||
// where one of the above is valid requires to be closed and thus is
|
||
// to be caught later.
|
||
break;
|
||
case kExprNodeSubscript:
|
||
case kExprNodeConcatOrSubscript:
|
||
case kExprNodeComplexIdentifier:
|
||
case kExprNodeAssignment:
|
||
case kExprNodeMod:
|
||
case kExprNodeDivision:
|
||
case kExprNodeMultiplication:
|
||
case kExprNodeNot:
|
||
case kExprNodeAnd:
|
||
case kExprNodeOr:
|
||
case kExprNodeConcat:
|
||
case kExprNodeComparison:
|
||
case kExprNodeUnaryMinus:
|
||
case kExprNodeUnaryPlus:
|
||
case kExprNodeBinaryMinus:
|
||
case kExprNodeTernary:
|
||
case kExprNodeBinaryPlus:
|
||
// It is OK to see these in the stack.
|
||
break;
|
||
case kExprNodeTernaryValue:
|
||
if (!cur_node->data.ter.got_colon) {
|
||
// Actually Vim throws E109 in more cases.
|
||
east_set_error(pstate, &ast.err, _("E109: Missing ':' after '?': %.*s"),
|
||
cur_node->start);
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
kvi_destroy(ast_stack);
|
||
return ast;
|
||
}
|
||
|
||
#undef NEW_NODE
|
||
#undef HL
|