mirror of
https://github.com/neovim/neovim.git
synced 2025-09-06 03:18:16 +00:00
viml/expressions: Add lexer with some basic tests
This commit is contained in:
@@ -81,6 +81,8 @@ foreach(subdir
|
||||
event
|
||||
eval
|
||||
lua
|
||||
viml
|
||||
viml/parser
|
||||
)
|
||||
if(${subdir} MATCHES "tui" AND NOT FEAT_TUI)
|
||||
continue()
|
||||
|
367
src/nvim/viml/parser/expressions.c
Normal file
367
src/nvim/viml/parser/expressions.c
Normal file
@@ -0,0 +1,367 @@
|
||||
// This is an open source non-commercial project. Dear PVS-Studio, please check
|
||||
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
|
||||
|
||||
/// VimL expression parser
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "nvim/vim.h"
|
||||
#include "nvim/memory.h"
|
||||
#include "nvim/types.h"
|
||||
#include "nvim/charset.h"
|
||||
#include "nvim/ascii.h"
|
||||
|
||||
#include "nvim/viml/parser/expressions.h"
|
||||
#include "nvim/viml/parser/parser.h"
|
||||
|
||||
#ifdef INCLUDE_GENERATED_DECLARATIONS
|
||||
# include "viml/parser/expressions.c.generated.h"
|
||||
#endif
|
||||
|
||||
/// Character used as a separator in autoload function/variable names.
|
||||
#define AUTOLOAD_CHAR '#'
|
||||
|
||||
/// Get next token for the VimL expression input
|
||||
LexExprToken viml_pexpr_next_token(ParserState *const pstate)
|
||||
FUNC_ATTR_WARN_UNUSED_RESULT
|
||||
{
|
||||
LexExprToken ret = {
|
||||
.type = kExprLexInvalid,
|
||||
.start = pstate->pos,
|
||||
};
|
||||
ParserLine pline;
|
||||
if (!viml_parser_get_remaining_line(pstate, &pline)) {
|
||||
ret.type = kExprLexEOC;
|
||||
return ret;
|
||||
}
|
||||
if (pline.size <= 0) {
|
||||
ret.len = 0;
|
||||
ret.type = kExprLexEOC;
|
||||
goto viml_pexpr_next_token_adv_return;
|
||||
}
|
||||
ret.len = 1;
|
||||
const uint8_t schar = (uint8_t)pline.data[0];
|
||||
#define GET_CCS(ret, pline) \
|
||||
do { \
|
||||
if (ret.len < pline.size \
|
||||
&& strchr("?#", pline.data[ret.len]) != NULL) { \
|
||||
ret.data.cmp.ccs = \
|
||||
(CaseCompareStrategy)pline.data[ret.len]; \
|
||||
ret.len++; \
|
||||
} else { \
|
||||
ret.data.cmp.ccs = kCCStrategyUseOption; \
|
||||
} \
|
||||
} while (0)
|
||||
switch (schar) {
|
||||
// Paired brackets.
|
||||
#define BRACKET(typ, opning, clsing) \
|
||||
case opning: \
|
||||
case clsing: { \
|
||||
ret.type = typ; \
|
||||
ret.data.brc.closing = (schar == clsing); \
|
||||
break; \
|
||||
}
|
||||
BRACKET(kExprLexParenthesis, '(', ')')
|
||||
BRACKET(kExprLexBracket, '[', ']')
|
||||
BRACKET(kExprLexFigureBrace, '{', '}')
|
||||
#undef BRACKET
|
||||
|
||||
// Single character tokens without data.
|
||||
#define CHAR(typ, ch) \
|
||||
case ch: { \
|
||||
ret.type = typ; \
|
||||
break; \
|
||||
}
|
||||
CHAR(kExprLexQuestion, '?')
|
||||
CHAR(kExprLexColon, ':')
|
||||
CHAR(kExprLexDot, '.')
|
||||
CHAR(kExprLexPlus, '+')
|
||||
CHAR(kExprLexComma, ',')
|
||||
#undef CHAR
|
||||
|
||||
// Multiplication/division/modulo.
|
||||
#define MUL(mul_type, ch) \
|
||||
case ch: { \
|
||||
ret.type = kExprLexMultiplication; \
|
||||
ret.data.mul.type = mul_type; \
|
||||
break; \
|
||||
}
|
||||
MUL(kExprLexMulMul, '*')
|
||||
MUL(kExprLexMulDiv, '/')
|
||||
MUL(kExprLexMulMod, '%')
|
||||
#undef MUL
|
||||
|
||||
#define CHARREG(typ, cond) \
|
||||
do { \
|
||||
ret.type = typ; \
|
||||
for (; (ret.len < pline.size \
|
||||
&& cond(pline.data[ret.len])) \
|
||||
; ret.len++) { \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// Whitespace.
|
||||
case ' ':
|
||||
case TAB: {
|
||||
CHARREG(kExprLexSpacing, ascii_iswhite);
|
||||
break;
|
||||
}
|
||||
|
||||
// Control character, except for NUL, NL and TAB.
|
||||
case Ctrl_A: case Ctrl_B: case Ctrl_C: case Ctrl_D: case Ctrl_E:
|
||||
case Ctrl_F: case Ctrl_G: case Ctrl_H:
|
||||
|
||||
case Ctrl_K: case Ctrl_L: case Ctrl_M: case Ctrl_N: case Ctrl_O:
|
||||
case Ctrl_P: case Ctrl_Q: case Ctrl_R: case Ctrl_S: case Ctrl_T:
|
||||
case Ctrl_U: case Ctrl_V: case Ctrl_W: case Ctrl_X: case Ctrl_Y:
|
||||
case Ctrl_Z: {
|
||||
#define ISCTRL(schar) (schar < ' ')
|
||||
CHARREG(kExprLexInvalid, ISCTRL);
|
||||
ret.data.err.type = kExprLexSpacing;
|
||||
ret.data.err.msg =
|
||||
_("E15: Invalid control character present in input: %.*s");
|
||||
break;
|
||||
#undef ISCTRL
|
||||
}
|
||||
|
||||
// Number.
|
||||
// Note: determining whether dot is (not) a part of a float needs more
|
||||
// context, so lexer does not do this.
|
||||
// FIXME: Resolve ambiguity by additional argument.
|
||||
case '0': case '1': case '2': case '3': case '4': case '5': case '6':
|
||||
case '7': case '8': case '9': {
|
||||
CHARREG(kExprLexNumber, ascii_isdigit);
|
||||
break;
|
||||
}
|
||||
|
||||
// Environment variable.
|
||||
case '$': {
|
||||
CHARREG(kExprLexEnv, vim_isIDc);
|
||||
break;
|
||||
}
|
||||
|
||||
// Normal variable/function name.
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
|
||||
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
|
||||
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
|
||||
case 'v': case 'w': case 'x': case 'y': case 'z':
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
|
||||
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
|
||||
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
|
||||
case 'V': case 'W': case 'X': case 'Y': case 'Z':
|
||||
case '_': {
|
||||
#define ISWORD_OR_AUTOLOAD(x) \
|
||||
(ASCII_ISALNUM(x) || (x) == AUTOLOAD_CHAR || (x) == '_')
|
||||
#define ISWORD(x) \
|
||||
(ASCII_ISALNUM(x) || (x) == '_')
|
||||
ret.data.var.scope = 0;
|
||||
ret.data.var.autoload = false;
|
||||
CHARREG(kExprLexPlainIdentifier, ISWORD);
|
||||
// "is" and "isnot" operators.
|
||||
if ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0)
|
||||
|| (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0)) {
|
||||
ret.type = kExprLexComparison;
|
||||
ret.data.cmp.type = kExprLexCmpIdentical;
|
||||
ret.data.cmp.inv = (ret.len == 5);
|
||||
GET_CCS(ret, pline);
|
||||
// Scope: `s:`, etc.
|
||||
} else if (ret.len == 1
|
||||
&& pline.size > 1
|
||||
&& strchr("sgvbwtla", schar) != NULL
|
||||
&& pline.data[ret.len] == ':') {
|
||||
ret.len++;
|
||||
ret.data.var.scope = schar;
|
||||
CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD);
|
||||
ret.data.var.autoload = (
|
||||
memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2)
|
||||
!= NULL);
|
||||
// Previous CHARREG stopped at autoload character in order to make it
|
||||
// possible to detect `is#`. Continue now with autoload characters
|
||||
// included.
|
||||
//
|
||||
// Warning: there is ambiguity for the lexer: `is#Foo(1)` is a call of
|
||||
// function `is#Foo()`, `1is#Foo(1)` is a comparison `1 is# Foo(1)`. This
|
||||
// needs to be resolved on the higher level where context is available.
|
||||
} else if (pline.size > ret.len
|
||||
&& pline.data[ret.len] == AUTOLOAD_CHAR) {
|
||||
ret.data.var.autoload = true;
|
||||
CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD);
|
||||
}
|
||||
break;
|
||||
#undef ISWORD_OR_AUTOLOAD
|
||||
#undef ISWORD
|
||||
}
|
||||
#undef CHARREG
|
||||
|
||||
// Option.
|
||||
case '&': {
|
||||
#define OPTNAMEMISS(ret) \
|
||||
do { \
|
||||
ret.type = kExprLexInvalid; \
|
||||
ret.data.err.type = kExprLexOption; \
|
||||
ret.data.err.msg = _("E112: Option name missing: %.*s"); \
|
||||
} while (0)
|
||||
if (pline.size > 1 && pline.data[1] == '&') {
|
||||
ret.type = kExprLexAnd;
|
||||
ret.len++;
|
||||
break;
|
||||
}
|
||||
if (pline.size == 1 || !ASCII_ISALPHA(pline.data[1])) {
|
||||
OPTNAMEMISS(ret);
|
||||
break;
|
||||
}
|
||||
ret.type = kExprLexOption;
|
||||
if (pline.size > 2
|
||||
&& pline.data[2] == ':'
|
||||
&& strchr("gl", pline.data[1]) != NULL) {
|
||||
ret.len += 2;
|
||||
ret.data.opt.scope = (pline.data[1] == 'g'
|
||||
? kExprLexOptGlobal
|
||||
: kExprLexOptLocal);
|
||||
ret.data.opt.name = pline.data + 3;
|
||||
} else {
|
||||
ret.data.opt.scope = kExprLexOptUnspecified;
|
||||
ret.data.opt.name = pline.data + 1;
|
||||
}
|
||||
const char *p = ret.data.opt.name;
|
||||
const char *const e = pline.data + pline.size;
|
||||
if (e - p >= 4 && p[0] == 't' && p[1] == '_') {
|
||||
ret.data.opt.len = 4;
|
||||
ret.len += 4;
|
||||
} else {
|
||||
for (; p < e && ASCII_ISALPHA(*p); p++) {
|
||||
}
|
||||
ret.data.opt.len = (size_t)(p - ret.data.opt.name);
|
||||
if (ret.data.opt.len == 0) {
|
||||
OPTNAMEMISS(ret);
|
||||
} else {
|
||||
ret.len += ret.data.opt.len;
|
||||
}
|
||||
}
|
||||
break;
|
||||
#undef OPTNAMEMISS
|
||||
}
|
||||
|
||||
// Register.
|
||||
case '@': {
|
||||
ret.type = kExprLexRegister;
|
||||
if (pline.size > 1) {
|
||||
ret.len++;
|
||||
ret.data.reg.name = (uint8_t)pline.data[1];
|
||||
} else {
|
||||
ret.data.reg.name = -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Single quoted string.
|
||||
case '\'': {
|
||||
ret.type = kExprLexSingleQuotedString;
|
||||
ret.data.str.closed = false;
|
||||
for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) {
|
||||
if (pline.data[ret.len] == '\'') {
|
||||
if (ret.len + 1 < pline.size && pline.data[ret.len + 1] == '\'') {
|
||||
ret.len++;
|
||||
} else {
|
||||
ret.data.str.closed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Double quoted string.
|
||||
case '"': {
|
||||
ret.type = kExprLexDoubleQuotedString;
|
||||
ret.data.str.closed = false;
|
||||
for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) {
|
||||
if (pline.data[ret.len] == '\\') {
|
||||
if (ret.len + 1 < pline.size) {
|
||||
ret.len++;
|
||||
}
|
||||
} else if (pline.data[ret.len] == '"') {
|
||||
ret.data.str.closed = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Unary not, (un)equality and regex (not) match comparison operators.
|
||||
case '!':
|
||||
case '=': {
|
||||
if (pline.size == 1) {
|
||||
viml_pexpr_next_token_invalid_comparison:
|
||||
ret.type = (schar == '!' ? kExprLexNot : kExprLexInvalid);
|
||||
if (ret.type == kExprLexInvalid) {
|
||||
ret.data.err.msg = _("E15: Expected == or =~: %.*s");
|
||||
ret.data.err.type = kExprLexComparison;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ret.type = kExprLexComparison;
|
||||
ret.data.cmp.inv = (schar == '!');
|
||||
if (pline.data[1] == '=') {
|
||||
ret.data.cmp.type = kExprLexCmpEqual;
|
||||
ret.len++;
|
||||
} else if (pline.data[1] == '~') {
|
||||
ret.data.cmp.type = kExprLexCmpMatches;
|
||||
ret.len++;
|
||||
} else {
|
||||
goto viml_pexpr_next_token_invalid_comparison;
|
||||
}
|
||||
GET_CCS(ret, pline);
|
||||
break;
|
||||
}
|
||||
|
||||
// Less/greater [or equal to] comparison operators.
|
||||
case '>':
|
||||
case '<': {
|
||||
ret.type = kExprLexComparison;
|
||||
const bool haseqsign = (pline.size > 1 && pline.data[1] == '=');
|
||||
if (haseqsign) {
|
||||
ret.len++;
|
||||
}
|
||||
GET_CCS(ret, pline);
|
||||
ret.data.cmp.inv = (schar == '<');
|
||||
ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign)
|
||||
? kExprLexCmpGreaterOrEqual
|
||||
: kExprLexCmpGreater);
|
||||
break;
|
||||
}
|
||||
|
||||
// Minus sign or arrow from lambdas.
|
||||
case '-': {
|
||||
if (pline.size > 1 && pline.data[1] == '>') {
|
||||
ret.len++;
|
||||
ret.type = kExprLexArrow;
|
||||
} else {
|
||||
ret.type = kExprLexMinus;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Expression end because Ex command ended.
|
||||
case NUL:
|
||||
case NL: {
|
||||
ret.type = kExprLexEOC;
|
||||
break;
|
||||
}
|
||||
|
||||
// Everything else is not valid.
|
||||
default: {
|
||||
ret.len = (size_t)utfc_ptr2len_len((const char_u *)pline.data,
|
||||
(int)pline.size);
|
||||
ret.type = kExprLexInvalid;
|
||||
ret.data.err.type = kExprLexPlainIdentifier;
|
||||
ret.data.err.msg = _("E15: Unidentified character: %.*s");
|
||||
break;
|
||||
}
|
||||
}
|
||||
#undef GET_CCS
|
||||
viml_pexpr_next_token_adv_return:
|
||||
viml_parser_advance(pstate, ret.len);
|
||||
return ret;
|
||||
}
|
118
src/nvim/viml/parser/expressions.h
Normal file
118
src/nvim/viml/parser/expressions.h
Normal file
@@ -0,0 +1,118 @@
|
||||
#ifndef NVIM_VIML_PARSER_EXPRESSIONS_H
|
||||
#define NVIM_VIML_PARSER_EXPRESSIONS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nvim/types.h"
|
||||
#include "nvim/viml/parser/parser.h"
|
||||
|
||||
// Defines whether to ignore case:
|
||||
// == kCCStrategyUseOption
|
||||
// ==# kCCStrategyMatchCase
|
||||
// ==? kCCStrategyIgnoreCase
|
||||
typedef enum {
|
||||
kCCStrategyUseOption = 0, // 0 for xcalloc
|
||||
kCCStrategyMatchCase = '#',
|
||||
kCCStrategyIgnoreCase = '?',
|
||||
} CaseCompareStrategy;
|
||||
|
||||
/// Lexer token type
|
||||
typedef enum {
|
||||
kExprLexInvalid = 0, ///< Invalid token, indicaten an error.
|
||||
kExprLexMissing, ///< Missing token, for use in parser.
|
||||
kExprLexSpacing, ///< Spaces, tabs, newlines, etc.
|
||||
kExprLexEOC, ///< End of command character: NL, |, just end of stream.
|
||||
|
||||
kExprLexQuestion, ///< Question mark, for use in ternary.
|
||||
kExprLexColon, ///< Colon, for use in ternary.
|
||||
kExprLexOr, ///< Logical or operator.
|
||||
kExprLexAnd, ///< Logical and operator.
|
||||
kExprLexComparison, ///< One of the comparison operators.
|
||||
kExprLexPlus, ///< Plus sign.
|
||||
kExprLexMinus, ///< Minus sign.
|
||||
kExprLexDot, ///< Dot: either concat or subscript, also part of the float.
|
||||
kExprLexMultiplication, ///< Multiplication, division or modulo operator.
|
||||
|
||||
kExprLexNot, ///< Not: !.
|
||||
|
||||
kExprLexNumber, ///< Integer number literal, or part of a float.
|
||||
kExprLexSingleQuotedString, ///< Single quoted string literal.
|
||||
kExprLexDoubleQuotedString, ///< Double quoted string literal.
|
||||
kExprLexOption, ///< &optionname option value.
|
||||
kExprLexRegister, ///< @r register value.
|
||||
kExprLexEnv, ///< Environment $variable value.
|
||||
kExprLexPlainIdentifier, ///< Identifier without scope: `abc`, `foo#bar`.
|
||||
|
||||
kExprLexBracket, ///< Bracket, either opening or closing.
|
||||
kExprLexFigureBrace, ///< Figure brace, either opening or closing.
|
||||
kExprLexParenthesis, ///< Parenthesis, either opening or closing.
|
||||
kExprLexComma, ///< Comma.
|
||||
kExprLexArrow, ///< Arrow, like from lambda expressions.
|
||||
} LexExprTokenType;
|
||||
|
||||
/// Lexer token
|
||||
typedef struct {
|
||||
ParserPosition start;
|
||||
size_t len;
|
||||
LexExprTokenType type;
|
||||
union {
|
||||
struct {
|
||||
enum {
|
||||
kExprLexCmpEqual, ///< Equality, unequality.
|
||||
kExprLexCmpMatches, ///< Matches regex, not matches regex.
|
||||
kExprLexCmpGreater, ///< `>` or `<=`
|
||||
kExprLexCmpGreaterOrEqual, ///< `>=` or `<`.
|
||||
kExprLexCmpIdentical, ///< `is` or `isnot`
|
||||
} type; ///< Comparison type.
|
||||
CaseCompareStrategy ccs; ///< Case comparison strategy.
|
||||
bool inv; ///< True if comparison is to be inverted.
|
||||
} cmp; ///< For kExprLexComparison.
|
||||
|
||||
struct {
|
||||
enum {
|
||||
kExprLexMulMul, ///< Real multiplication.
|
||||
kExprLexMulDiv, ///< Division.
|
||||
kExprLexMulMod, ///< Modulo.
|
||||
} type; ///< Multiplication type.
|
||||
} mul; ///< For kExprLexMultiplication.
|
||||
|
||||
struct {
|
||||
bool closing; ///< True if bracket/etc is a closing one.
|
||||
} brc; ///< For brackets/braces/parenthesis.
|
||||
|
||||
struct {
|
||||
int name; ///< Register name, may be -1 if name not present.
|
||||
} reg; ///< For kExprLexRegister.
|
||||
|
||||
struct {
|
||||
bool closed; ///< True if quote was closed.
|
||||
} str; ///< For kExprLexSingleQuotedString and kExprLexDoubleQuotedString.
|
||||
|
||||
struct {
|
||||
const char *name; ///< Option name start.
|
||||
size_t len; ///< Option name length.
|
||||
enum {
|
||||
kExprLexOptUnspecified = 0,
|
||||
kExprLexOptGlobal = 1,
|
||||
kExprLexOptLocal = 2,
|
||||
} scope; ///< Option scope: &l:, &g: or not specified.
|
||||
} opt; ///< Option properties.
|
||||
|
||||
struct {
|
||||
int scope; ///< Scope character or 0 if not present.
|
||||
bool autoload; ///< Has autoload characters.
|
||||
} var; ///< For kExprLexPlainIdentifier
|
||||
|
||||
struct {
|
||||
LexExprTokenType type; ///< Suggested type for parsing incorrect code.
|
||||
const char *msg; ///< Error message.
|
||||
} err; ///< For kExprLexInvalid
|
||||
} data; ///< Additional data, if needed.
|
||||
} LexExprToken;
|
||||
|
||||
#ifdef INCLUDE_GENERATED_DECLARATIONS
|
||||
# include "viml/parser/expressions.h.generated.h"
|
||||
#endif
|
||||
|
||||
#endif // NVIM_VIML_PARSER_EXPRESSIONS_H
|
129
src/nvim/viml/parser/parser.h
Normal file
129
src/nvim/viml/parser/parser.h
Normal file
@@ -0,0 +1,129 @@
|
||||
#ifndef NVIM_VIML_PARSER_PARSER_H
|
||||
#define NVIM_VIML_PARSER_PARSER_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "nvim/lib/kvec.h"
|
||||
#include "nvim/func_attr.h"
|
||||
|
||||
/// One parsed line
|
||||
typedef struct {
|
||||
const char *data; ///< Parsed line pointer
|
||||
size_t size; ///< Parsed line size
|
||||
} ParserLine;
|
||||
|
||||
/// Line getter type for parser
|
||||
///
|
||||
/// Line getter must return {NULL, 0} for EOF.
|
||||
typedef void (*ParserLineGetter)(void *cookie, ParserLine *ret_pline);
|
||||
|
||||
/// Parser position in the input
|
||||
typedef struct {
|
||||
size_t line; ///< Line index in ParserInputReader.lines.
|
||||
size_t col; ///< Byte index in the line.
|
||||
} ParserPosition;
|
||||
|
||||
/// Parser state item.
|
||||
typedef struct {
|
||||
enum {
|
||||
kPTopStateParsingCommand = 0,
|
||||
kPTopStateParsingExpression,
|
||||
} type;
|
||||
union {
|
||||
struct {
|
||||
enum {
|
||||
kExprUnknown = 0,
|
||||
} type;
|
||||
} expr;
|
||||
} data;
|
||||
} ParserStateItem;
|
||||
|
||||
/// Structure defining input reader
|
||||
typedef struct {
|
||||
/// Function used to get next line.
|
||||
ParserLineGetter get_line;
|
||||
/// Data for get_line function.
|
||||
void *cookie;
|
||||
/// All lines obtained by get_line.
|
||||
kvec_withinit_t(ParserLine, 4) lines;
|
||||
} ParserInputReader;
|
||||
|
||||
/// Highlighted region definition
|
||||
///
|
||||
/// Note: one chunk may highlight only one line.
|
||||
typedef struct {
|
||||
ParserPosition start; ///< Start of the highlight: line and column.
|
||||
size_t end_col; ///< End column, points to the start of the next character.
|
||||
const char *group; ///< Highlight group.
|
||||
} ParserHighlightChunk;
|
||||
|
||||
/// Highlighting defined by a parser
|
||||
typedef kvec_withinit_t(ParserHighlightChunk, 16) ParserHighlight;
|
||||
|
||||
/// Structure defining parser state
|
||||
typedef struct {
|
||||
/// Line reader.
|
||||
ParserInputReader reader;
|
||||
/// Position up to which input was parsed.
|
||||
ParserPosition pos;
|
||||
/// Parser state stack.
|
||||
kvec_withinit_t(ParserStateItem, 16) stack;
|
||||
/// Highlighting support.
|
||||
ParserHighlight *colors;
|
||||
/// True if line continuation can be used.
|
||||
bool can_continuate;
|
||||
} ParserState;
|
||||
|
||||
static inline bool viml_parser_get_remaining_line(ParserState *const pstate,
|
||||
ParserLine *const ret_pline)
|
||||
REAL_FATTR_ALWAYS_INLINE REAL_FATTR_WARN_UNUSED_RESULT REAL_FATTR_NONNULL_ALL;
|
||||
|
||||
/// Get currently parsed line, shifted to pstate->pos.col
|
||||
///
|
||||
/// @param pstate Parser state to operate on.
|
||||
///
|
||||
/// @return True if there is a line, false in case of EOF.
|
||||
static inline bool viml_parser_get_remaining_line(ParserState *const pstate,
|
||||
ParserLine *const ret_pline)
|
||||
{
|
||||
const size_t num_lines = kv_size(pstate->reader.lines);
|
||||
if (pstate->pos.line == num_lines) {
|
||||
pstate->reader.get_line(pstate->reader.cookie, ret_pline);
|
||||
kvi_push(pstate->reader.lines, *ret_pline);
|
||||
} else {
|
||||
*ret_pline = kv_last(pstate->reader.lines);
|
||||
}
|
||||
assert(pstate->pos.line == kv_size(pstate->reader.lines) - 1);
|
||||
if (ret_pline->data != NULL) {
|
||||
ret_pline->data += pstate->pos.col;
|
||||
ret_pline->size -= pstate->pos.col;
|
||||
}
|
||||
return ret_pline->data != NULL;
|
||||
}
|
||||
|
||||
static inline void viml_parser_advance(ParserState *const pstate,
|
||||
const size_t len)
|
||||
REAL_FATTR_ALWAYS_INLINE REAL_FATTR_NONNULL_ALL;
|
||||
|
||||
/// Advance position by a given number of bytes
|
||||
///
|
||||
/// At maximum advances to the next line.
|
||||
///
|
||||
/// @param pstate Parser state to advance.
|
||||
/// @param[in] len Number of bytes to advance.
|
||||
static inline void viml_parser_advance(ParserState *const pstate,
|
||||
const size_t len)
|
||||
{
|
||||
assert(pstate->pos.line == kv_size(pstate->reader.lines) - 1);
|
||||
const ParserLine pline = kv_last(pstate->reader.lines);
|
||||
if (pstate->pos.col + len >= pline.size) {
|
||||
pstate->pos.line++;
|
||||
pstate->pos.col = 0;
|
||||
} else {
|
||||
pstate->pos.col += len;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // NVIM_VIML_PARSER_PARSER_H
|
337
test/unit/viml/expressions/lexer_spec.lua
Normal file
337
test/unit/viml/expressions/lexer_spec.lua
Normal file
@@ -0,0 +1,337 @@
|
||||
local helpers = require('test.unit.helpers')(after_each)
|
||||
local itp = helpers.gen_itp(it)
|
||||
|
||||
local child_call_once = helpers.child_call_once
|
||||
local cimport = helpers.cimport
|
||||
local ffi = helpers.ffi
|
||||
local eq = helpers.eq
|
||||
|
||||
local lib = cimport('./src/nvim/viml/parser/expressions.h')
|
||||
|
||||
local eltkn_type_tab, eltkn_cmp_type_tab, ccs_tab, eltkn_mul_type_tab
|
||||
local eltkn_opt_scope_tab
|
||||
child_call_once(function()
|
||||
eltkn_type_tab = {
|
||||
[tonumber(lib.kExprLexInvalid)] = 'Invalid',
|
||||
[tonumber(lib.kExprLexMissing)] = 'Missing',
|
||||
[tonumber(lib.kExprLexSpacing)] = 'Spacing',
|
||||
[tonumber(lib.kExprLexEOC)] = 'EOC',
|
||||
|
||||
[tonumber(lib.kExprLexQuestion)] = 'Question',
|
||||
[tonumber(lib.kExprLexColon)] = 'Colon',
|
||||
[tonumber(lib.kExprLexOr)] = 'Or',
|
||||
[tonumber(lib.kExprLexAnd)] = 'And',
|
||||
[tonumber(lib.kExprLexComparison)] = 'Comparison',
|
||||
[tonumber(lib.kExprLexPlus)] = 'Plus',
|
||||
[tonumber(lib.kExprLexMinus)] = 'Minus',
|
||||
[tonumber(lib.kExprLexDot)] = 'Dot',
|
||||
[tonumber(lib.kExprLexMultiplication)] = 'Multiplication',
|
||||
|
||||
[tonumber(lib.kExprLexNot)] = 'Not',
|
||||
|
||||
[tonumber(lib.kExprLexNumber)] = 'Number',
|
||||
[tonumber(lib.kExprLexSingleQuotedString)] = 'SingleQuotedString',
|
||||
[tonumber(lib.kExprLexDoubleQuotedString)] = 'DoubleQuotedString',
|
||||
[tonumber(lib.kExprLexOption)] = 'Option',
|
||||
[tonumber(lib.kExprLexRegister)] = 'Register',
|
||||
[tonumber(lib.kExprLexEnv)] = 'Env',
|
||||
[tonumber(lib.kExprLexPlainIdentifier)] = 'PlainIdentifier',
|
||||
|
||||
[tonumber(lib.kExprLexBracket)] = 'Bracket',
|
||||
[tonumber(lib.kExprLexFigureBrace)] = 'FigureBrace',
|
||||
[tonumber(lib.kExprLexParenthesis)] = 'Parenthesis',
|
||||
[tonumber(lib.kExprLexComma)] = 'Comma',
|
||||
[tonumber(lib.kExprLexArrow)] = 'Arrow',
|
||||
}
|
||||
|
||||
eltkn_cmp_type_tab = {
|
||||
[tonumber(lib.kExprLexCmpEqual)] = 'Equal',
|
||||
[tonumber(lib.kExprLexCmpMatches)] = 'Matches',
|
||||
[tonumber(lib.kExprLexCmpGreater)] = 'Greater',
|
||||
[tonumber(lib.kExprLexCmpGreaterOrEqual)] = 'GreaterOrEqual',
|
||||
[tonumber(lib.kExprLexCmpIdentical)] = 'Identical',
|
||||
}
|
||||
|
||||
ccs_tab = {
|
||||
[tonumber(lib.kCCStrategyUseOption)] = 'UseOption',
|
||||
[tonumber(lib.kCCStrategyMatchCase)] = 'MatchCase',
|
||||
[tonumber(lib.kCCStrategyIgnoreCase)] = 'IgnoreCase',
|
||||
}
|
||||
|
||||
eltkn_mul_type_tab = {
|
||||
[tonumber(lib.kExprLexMulMul)] = 'Mul',
|
||||
[tonumber(lib.kExprLexMulDiv)] = 'Div',
|
||||
[tonumber(lib.kExprLexMulMod)] = 'Mod',
|
||||
}
|
||||
|
||||
eltkn_opt_scope_tab = {
|
||||
[tonumber(lib.kExprLexOptUnspecified)] = 'Unspecified',
|
||||
[tonumber(lib.kExprLexOptGlobal)] = 'Global',
|
||||
[tonumber(lib.kExprLexOptLocal)] = 'Local',
|
||||
}
|
||||
end)
|
||||
|
||||
local function array_size(arr)
|
||||
return ffi.sizeof(arr) / ffi.sizeof(arr[0])
|
||||
end
|
||||
|
||||
local function kvi_size(kvi)
|
||||
return array_size(kvi.init_array)
|
||||
end
|
||||
|
||||
local function kvi_init(kvi)
|
||||
kvi.capacity = kvi_size(kvi)
|
||||
kvi.items = kvi.init_array
|
||||
return kvi
|
||||
end
|
||||
|
||||
local function kvi_new(ct)
|
||||
return kvi_init(ffi.new(ct))
|
||||
end
|
||||
|
||||
local function new_pstate(strings)
|
||||
local strings_idx = 0
|
||||
local function get_line(_, ret_pline)
|
||||
strings_idx = strings_idx + 1
|
||||
local str = strings[strings_idx]
|
||||
local data, size
|
||||
if type(str) == 'string' then
|
||||
data = str
|
||||
size = #str
|
||||
elseif type(str) == 'nil' then
|
||||
data = nil
|
||||
size = 0
|
||||
elseif type(str) == 'table' then
|
||||
data = str.data
|
||||
size = str.size
|
||||
elseif type(str) == 'function' then
|
||||
data, size = str()
|
||||
size = size or 0
|
||||
end
|
||||
ret_pline.data = data
|
||||
ret_pline.size = size
|
||||
end
|
||||
local pline_init = {
|
||||
data = nil,
|
||||
size = 0,
|
||||
}
|
||||
local state = {
|
||||
reader = {
|
||||
get_line = get_line,
|
||||
cookie = nil,
|
||||
},
|
||||
pos = { line = 0, col = 0 },
|
||||
colors = kvi_new('ParserHighlight'),
|
||||
can_continuate = false,
|
||||
}
|
||||
local ret = ffi.new('ParserState', state)
|
||||
kvi_init(ret.reader.lines)
|
||||
kvi_init(ret.stack)
|
||||
return ret
|
||||
end
|
||||
|
||||
local function conv_enum(etab, eval)
|
||||
local n = tonumber(eval)
|
||||
return etab[n] or n
|
||||
end
|
||||
|
||||
local function conv_eltkn_type(typ)
|
||||
return conv_enum(eltkn_type_tab, typ)
|
||||
end
|
||||
|
||||
local function pline2lua(pline)
|
||||
return ffi.string(pline.data, pline.size)
|
||||
end
|
||||
|
||||
local bracket_types = {
|
||||
Bracket = true,
|
||||
FigureBrace = true,
|
||||
Parenthesis = true,
|
||||
}
|
||||
|
||||
local function intchar2lua(ch)
|
||||
ch = tonumber(ch)
|
||||
return (20 <= ch and ch < 127) and ('%c'):format(ch) or ch
|
||||
end
|
||||
|
||||
local function eltkn2lua(pstate, tkn)
|
||||
local ret = {
|
||||
type = conv_eltkn_type(tkn.type),
|
||||
len = tonumber(tkn.len),
|
||||
start = { line = tonumber(tkn.start.line), col = tonumber(tkn.start.col) },
|
||||
}
|
||||
if ret.start.line < pstate.reader.lines.size then
|
||||
local pstr = pline2lua(pstate.reader.lines.items[ret.start.line])
|
||||
if ret.start.col >= #pstr then
|
||||
ret.error = 'start.col >= #pstr'
|
||||
else
|
||||
ret.str = pstr:sub(ret.start.col + 1, ret.start.col + ret.len)
|
||||
if #(ret.str) ~= ret.len then
|
||||
ret.error = '#str /= len'
|
||||
end
|
||||
end
|
||||
else
|
||||
ret.error = 'start.line >= pstate.reader.lines.size'
|
||||
end
|
||||
if ret.type == 'Comparison' then
|
||||
ret.data = {
|
||||
type = conv_enum(eltkn_cmp_type_tab, tkn.data.cmp.type),
|
||||
ccs = conv_enum(ccs_tab, tkn.data.cmp.ccs),
|
||||
inv = (not not tkn.data.cmp.inv),
|
||||
}
|
||||
elseif ret.type == 'Multiplication' then
|
||||
ret.data = { type = conv_enum(eltkn_mul_type_tab, tkn.data.mul.type) }
|
||||
elseif bracket_types[ret.type] then
|
||||
ret.data = { closing = (not not tkn.data.brc.closing) }
|
||||
elseif ret.type == 'Register' then
|
||||
ret.data = { name = intchar2lua(tkn.data.reg.name) }
|
||||
elseif (ret.type == 'SingleQuotedString'
|
||||
or ret.type == 'DoubleQuotedString') then
|
||||
ret.data = { closed = (not not tkn.data.str.closed) }
|
||||
elseif ret.type == 'Option' then
|
||||
ret.data = {
|
||||
scope = conv_enum(eltkn_opt_scope_tab, tkn.data.opt.scope),
|
||||
name = ffi.string(tkn.data.opt.name, tkn.data.opt.len),
|
||||
}
|
||||
elseif ret.type == 'PlainIdentifier' then
|
||||
ret.data = {
|
||||
scope = intchar2lua(tkn.data.var.scope),
|
||||
autoload = (not not tkn.data.var.autoload),
|
||||
}
|
||||
elseif ret.type == 'Invalid' then
|
||||
ret.data = { error = ffi.string(tkn.data.err.msg) }
|
||||
end
|
||||
return ret, tkn
|
||||
end
|
||||
|
||||
local function next_eltkn(pstate)
|
||||
return eltkn2lua(pstate, lib.viml_pexpr_next_token(pstate, false))
|
||||
end
|
||||
|
||||
describe('Expressions lexer', function()
|
||||
itp('works (single tokens)', function()
|
||||
local function singl_eltkn_test(typ, str, data)
|
||||
local pstate = new_pstate({str})
|
||||
eq({data=data, len=#str, start={col=0, line=0}, str=str, type=typ},
|
||||
next_eltkn(pstate))
|
||||
if not (
|
||||
typ == 'Spacing'
|
||||
or (typ == 'Register' and str == '@')
|
||||
or ((typ == 'SingleQuotedString' or typ == 'DoubleQuotedString')
|
||||
and not data.closed)
|
||||
) then
|
||||
pstate = new_pstate({str .. ' '})
|
||||
eq({data=data, len=#str, start={col=0, line=0}, str=str, type=typ},
|
||||
next_eltkn(pstate))
|
||||
end
|
||||
pstate = new_pstate({'x' .. str})
|
||||
pstate.pos.col = 1
|
||||
eq({data=data, len=#str, start={col=1, line=0}, str=str, type=typ},
|
||||
next_eltkn(pstate))
|
||||
end
|
||||
singl_eltkn_test('Parenthesis', '(', {closing=false})
|
||||
singl_eltkn_test('Parenthesis', ')', {closing=true})
|
||||
singl_eltkn_test('Bracket', '[', {closing=false})
|
||||
singl_eltkn_test('Bracket', ']', {closing=true})
|
||||
singl_eltkn_test('FigureBrace', '{', {closing=false})
|
||||
singl_eltkn_test('FigureBrace', '}', {closing=true})
|
||||
singl_eltkn_test('Question', '?')
|
||||
singl_eltkn_test('Colon', ':')
|
||||
singl_eltkn_test('Dot', '.')
|
||||
singl_eltkn_test('Plus', '+')
|
||||
singl_eltkn_test('Comma', ',')
|
||||
singl_eltkn_test('Multiplication', '*', {type='Mul'})
|
||||
singl_eltkn_test('Multiplication', '/', {type='Div'})
|
||||
singl_eltkn_test('Multiplication', '%', {type='Mod'})
|
||||
singl_eltkn_test('Spacing', ' \t\t \t\t')
|
||||
singl_eltkn_test('Spacing', ' ')
|
||||
singl_eltkn_test('Spacing', '\t')
|
||||
singl_eltkn_test('Invalid', '\x01\x02\x03', {error='E15: Invalid control character present in input: %.*s'})
|
||||
singl_eltkn_test('Number', '0123')
|
||||
singl_eltkn_test('Number', '0')
|
||||
singl_eltkn_test('Number', '9')
|
||||
singl_eltkn_test('Env', '$abc')
|
||||
singl_eltkn_test('Env', '$')
|
||||
singl_eltkn_test('PlainIdentifier', 'test', {autoload=false, scope=0})
|
||||
singl_eltkn_test('PlainIdentifier', '_test', {autoload=false, scope=0})
|
||||
singl_eltkn_test('PlainIdentifier', '_test_foo', {autoload=false, scope=0})
|
||||
singl_eltkn_test('PlainIdentifier', 't', {autoload=false, scope=0})
|
||||
singl_eltkn_test('PlainIdentifier', 'test5', {autoload=false, scope=0})
|
||||
singl_eltkn_test('PlainIdentifier', 't0', {autoload=false, scope=0})
|
||||
singl_eltkn_test('PlainIdentifier', 'test#var', {autoload=true, scope=0})
|
||||
singl_eltkn_test('PlainIdentifier', 'test#var#val###', {autoload=true, scope=0})
|
||||
singl_eltkn_test('PlainIdentifier', 't#####', {autoload=true, scope=0})
|
||||
local function scope_test(scope)
|
||||
singl_eltkn_test('PlainIdentifier', scope .. ':test#var', {autoload=true, scope=scope})
|
||||
singl_eltkn_test('PlainIdentifier', scope .. ':', {autoload=false, scope=scope})
|
||||
end
|
||||
scope_test('s')
|
||||
scope_test('g')
|
||||
scope_test('v')
|
||||
scope_test('b')
|
||||
scope_test('w')
|
||||
scope_test('t')
|
||||
scope_test('l')
|
||||
scope_test('a')
|
||||
local function comparison_test(op, inv_op, cmp_type)
|
||||
singl_eltkn_test('Comparison', op, {type=cmp_type, inv=false, ccs='UseOption'})
|
||||
singl_eltkn_test('Comparison', inv_op, {type=cmp_type, inv=true, ccs='UseOption'})
|
||||
singl_eltkn_test('Comparison', op .. '#', {type=cmp_type, inv=false, ccs='MatchCase'})
|
||||
singl_eltkn_test('Comparison', inv_op .. '#', {type=cmp_type, inv=true, ccs='MatchCase'})
|
||||
singl_eltkn_test('Comparison', op .. '?', {type=cmp_type, inv=false, ccs='IgnoreCase'})
|
||||
singl_eltkn_test('Comparison', inv_op .. '?', {type=cmp_type, inv=true, ccs='IgnoreCase'})
|
||||
end
|
||||
comparison_test('is', 'isnot', 'Identical')
|
||||
singl_eltkn_test('And', '&&')
|
||||
singl_eltkn_test('Invalid', '&', {error='E112: Option name missing: %.*s'})
|
||||
singl_eltkn_test('Option', '&opt', {scope='Unspecified', name='opt'})
|
||||
singl_eltkn_test('Option', '&t_xx', {scope='Unspecified', name='t_xx'})
|
||||
singl_eltkn_test('Option', '&t_\r\r', {scope='Unspecified', name='t_\r\r'})
|
||||
singl_eltkn_test('Option', '&t_\t\t', {scope='Unspecified', name='t_\t\t'})
|
||||
singl_eltkn_test('Option', '&t_ ', {scope='Unspecified', name='t_ '})
|
||||
singl_eltkn_test('Option', '&g:opt', {scope='Global', name='opt'})
|
||||
singl_eltkn_test('Option', '&l:opt', {scope='Local', name='opt'})
|
||||
singl_eltkn_test('Invalid', '&l:', {error='E112: Option name missing: %.*s'})
|
||||
singl_eltkn_test('Invalid', '&g:', {error='E112: Option name missing: %.*s'})
|
||||
singl_eltkn_test('Register', '@', {name=-1})
|
||||
singl_eltkn_test('Register', '@a', {name='a'})
|
||||
singl_eltkn_test('Register', '@\r', {name=13})
|
||||
singl_eltkn_test('Register', '@ ', {name=' '})
|
||||
singl_eltkn_test('Register', '@\t', {name=9})
|
||||
singl_eltkn_test('SingleQuotedString', '\'test', {closed=false})
|
||||
singl_eltkn_test('SingleQuotedString', '\'test\'', {closed=true})
|
||||
singl_eltkn_test('SingleQuotedString', '\'\'\'\'', {closed=true})
|
||||
singl_eltkn_test('SingleQuotedString', '\'x\'\'\'', {closed=true})
|
||||
singl_eltkn_test('SingleQuotedString', '\'\'\'x\'', {closed=true})
|
||||
singl_eltkn_test('SingleQuotedString', '\'\'\'', {closed=false})
|
||||
singl_eltkn_test('SingleQuotedString', '\'x\'\'', {closed=false})
|
||||
singl_eltkn_test('SingleQuotedString', '\'\'\'x', {closed=false})
|
||||
singl_eltkn_test('DoubleQuotedString', '"test', {closed=false})
|
||||
singl_eltkn_test('DoubleQuotedString', '"test"', {closed=true})
|
||||
singl_eltkn_test('DoubleQuotedString', '"\\""', {closed=true})
|
||||
singl_eltkn_test('DoubleQuotedString', '"x\\""', {closed=true})
|
||||
singl_eltkn_test('DoubleQuotedString', '"\\"x"', {closed=true})
|
||||
singl_eltkn_test('DoubleQuotedString', '"\\"', {closed=false})
|
||||
singl_eltkn_test('DoubleQuotedString', '"x\\"', {closed=false})
|
||||
singl_eltkn_test('DoubleQuotedString', '"\\"x', {closed=false})
|
||||
singl_eltkn_test('Not', '!')
|
||||
singl_eltkn_test('Invalid', '=', {error='E15: Expected == or =~: %.*s'})
|
||||
comparison_test('==', '!=', 'Equal')
|
||||
comparison_test('=~', '!~', 'Matches')
|
||||
comparison_test('>', '<=', 'Greater')
|
||||
comparison_test('>=', '<', 'GreaterOrEqual')
|
||||
singl_eltkn_test('Minus', '-')
|
||||
singl_eltkn_test('Arrow', '->')
|
||||
singl_eltkn_test('EOC', '\0')
|
||||
singl_eltkn_test('EOC', '\n')
|
||||
singl_eltkn_test('Invalid', '~', {error='E15: Unidentified character: %.*s'})
|
||||
|
||||
local pstate = new_pstate({{data=nil, size=0}})
|
||||
eq({len=0, error='start.col >= #pstr', start={col=0, line=0}, type='EOC'},
|
||||
next_eltkn(pstate))
|
||||
|
||||
local pstate = new_pstate({''})
|
||||
eq({len=0, error='start.col >= #pstr', start={col=0, line=0}, type='EOC'},
|
||||
next_eltkn(pstate))
|
||||
end)
|
||||
end)
|
Reference in New Issue
Block a user