viml/parser/expressions: Make lexer parse numbers, support non-decimal

This commit is contained in:
ZyX
2017-10-06 01:19:43 +03:00
parent 0bc4e22379
commit 163792e9b9
5 changed files with 362 additions and 34 deletions

View File

@@ -15,10 +15,13 @@
#include "nvim/ascii.h"
#include "nvim/assert.h"
#include "nvim/lib/kvec.h"
#include "nvim/eval/typval.h"
#include "nvim/viml/parser/expressions.h"
#include "nvim/viml/parser/parser.h"
#define vim_str2nr(s, ...) vim_str2nr((const char_u *)(s), __VA_ARGS__)
typedef kvec_withinit_t(ExprASTNode **, 16) ExprASTStack;
/// Which nodes may be wanted
@@ -72,6 +75,43 @@ typedef enum {
/// Character used as a separator in autoload function/variable names.
#define AUTOLOAD_CHAR '#'
/// Scale number by a given factor
///
/// Used to apply exponent to a number. Idea taken from uClibc.
///
/// @param[in] num Number to scale. Does not bother doing anything if it is
/// zero.
/// @param[in] base Base, should be 10 since non-decimal floating-point
/// numbers are not supported.
/// @param[in] exponent Exponent to scale by.
/// @param[in] exponent_negative True if exponent is negative.
static inline float_T scale_number(const float_T num,
const uint8_t base,
const uvarnumber_T exponent,
const bool exponent_negative)
FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_CONST
{
if (num == 0 || exponent == 0) {
return num;
}
assert(base);
uvarnumber_T exp = exponent;
float_T p_base = (float_T)base;
float_T ret = num;
while (exp) {
if (exp & 1) {
if (exponent_negative) {
ret /= p_base;
} else {
ret *= p_base;
}
}
exp >>= 1;
p_base *= p_base;
}
return ret;
}
/// Get next token for the VimL expression input
///
/// @param pstate Parser state.
@@ -184,6 +224,11 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags)
case '0': case '1': case '2': case '3': case '4': case '5': case '6':
case '7': case '8': case '9': {
ret.data.num.is_float = false;
ret.data.num.base = 10;
size_t frac_start = 0;
size_t exp_start = 0;
size_t frac_end = 0;
bool exp_negative = false;
CHARREG(kExprLexNumber, ascii_isdigit);
if (flags & kELFlagAllowFloat) {
const LexExprToken non_float_ret = ret;
@@ -191,8 +236,18 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags)
&& pline.data[ret.len] == '.'
&& ascii_isdigit(pline.data[ret.len + 1])) {
ret.len++;
frac_start = ret.len;
frac_end = ret.len;
ret.data.num.is_float = true;
CHARREG(kExprLexNumber, ascii_isdigit);
for (; ret.len < pline.size && ascii_isdigit(pline.data[ret.len])
; ret.len++) {
// A small optimization: trailing zeroes in fractional part do not
// add anything to significand, so it is useless to include them in
// frac_end.
if (pline.data[ret.len] != '0') {
frac_end = ret.len + 1;
}
}
if (pline.size > ret.len + 1
&& (pline.data[ret.len] == 'e'
|| pline.data[ret.len] == 'E')
@@ -202,9 +257,11 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags)
&& ascii_isdigit(pline.data[ret.len + 2]))
|| ascii_isdigit(pline.data[ret.len + 1]))) {
ret.len++;
if (pline.data[ret.len] == '+' || pline.data[ret.len] == '-') {
if (pline.data[ret.len] == '+'
|| (exp_negative = (pline.data[ret.len] == '-'))) {
ret.len++;
}
exp_start = ret.len;
CHARREG(kExprLexNumber, ascii_isdigit);
}
}
@@ -214,6 +271,58 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags)
ret = non_float_ret;
}
}
// TODO(ZyX-I): detect overflows
if (ret.data.num.is_float) {
// Vim used to use string2float here which in turn uses strtod(). There
// are two problems with this approach:
// 1. strtod() is locale-dependent. Not sure how it is worked around so
// that I do not see relevant bugs, but it still does not look like
// a good idea.
// 2. strtod() does not accept length argument.
//
// The below variant of parsing floats was recognized as acceptable
// because it is basically how uClibc does the thing: it generates
// a number ignoring decimal point (but recording its position), then
// uses recorded position to scale number down when processing exponent.
float_T significand_part = 0;
uvarnumber_T exp_part = 0;
const size_t frac_size = (size_t)(frac_end - frac_start);
for (size_t i = 0; i < frac_end; i++) {
if (i == frac_start - 1) {
continue;
}
significand_part = significand_part * 10 + (pline.data[i] - '0');
}
if (exp_start) {
vim_str2nr(pline.data + exp_start, NULL, NULL, 0, NULL, &exp_part,
(int)(ret.len - exp_start));
}
if (exp_negative) {
exp_part += frac_size;
} else {
if (exp_part < frac_size) {
exp_negative = true;
exp_part = frac_size - exp_part;
} else {
exp_part -= frac_size;
}
}
ret.data.num.val.floating = scale_number(significand_part, 10, exp_part,
exp_negative);
} else {
int len;
int prep;
vim_str2nr(pline.data, &prep, &len, STR2NR_ALL, NULL,
&ret.data.num.val.integer, (int)pline.size);
ret.len = (size_t)len;
const uint8_t bases[] = {
[0] = 10,
['0'] = 8,
['x'] = 16, ['X'] = 16,
['b'] = 2, ['B'] = 2,
};
ret.data.num.base = bases[prep];
}
break;
}
@@ -474,7 +583,6 @@ viml_pexpr_next_token_adv_return:
return ret;
}
#ifdef UNIT_TESTING
static const char *const eltkn_type_tab[] = {
[kExprLexInvalid] = "Invalid",
[kExprLexMissing] = "Missing",
@@ -617,7 +725,12 @@ const char *viml_pexpr_repr_token(const ParserState *const pstate,
(int)token.data.opt.len, token.data.opt.name)
TKNARGS(kExprLexPlainIdentifier, "(scope=%s,autoload=%i)",
intchar2str(token.data.var.scope), (int)token.data.var.autoload)
TKNARGS(kExprLexNumber, "(is_float=%i)", (int)token.data.num.is_float)
TKNARGS(kExprLexNumber, "(is_float=%i,base=%i,val=%lg)",
(int)token.data.num.is_float,
(int)token.data.num.base,
(double)(token.data.num.is_float
? token.data.num.val.floating
: token.data.num.val.integer))
TKNARGS(kExprLexInvalid, "(msg=%s)", token.data.err.msg)
default: {
// No additional arguments.
@@ -642,7 +755,6 @@ viml_pexpr_repr_token_end:
}
return ret;
}
#endif
#ifdef UNIT_TESTING
#include <stdio.h>
@@ -776,8 +888,10 @@ static inline void viml_pexpr_debug_print_token(
// NVimOperator -> Operator
// NVimUnaryOperator -> NVimOperator
// NVimBinaryOperator -> NVimOperator
//
// NVimComparisonOperator -> NVimBinaryOperator
// NVimComparisonOperatorModifier -> NVimComparisonOperator
//
// NVimTernary -> NVimOperator
// NVimTernaryColon -> NVimTernary
//
@@ -795,8 +909,21 @@ static inline void viml_pexpr_debug_print_token(
// NVimIdentifierScope -> NVimIdentifier
// NVimIdentifierScopeDelimiter -> NVimIdentifier
//
// NVimIdentifierKey -> Identifier
//
// NVimFigureBrace -> NVimInternalError
//
// NVimUnaryPlus -> NVimUnaryOperator
// NVimBinaryPlus -> NVimBinaryOperator
// NVimConcatOrSubscript -> NVimBinaryOperator
//
// NVimRegister -> SpecialChar
// NVimNumber -> Number
// NVimFloat -> NVimNumber
//
// NVimNestingParenthesis -> NVimParenthesis
// NVimCallingParenthesis -> NVimParenthesis
//
// NVimInvalidComma -> NVimInvalidDelimiter
// NVimInvalidSpacing -> NVimInvalid
// NVimInvalidTernary -> NVimInvalidOperator
@@ -814,12 +941,9 @@ static inline void viml_pexpr_debug_print_token(
// NVimInvalidIdentifierScopeDelimiter -> NVimInvalidValue
// NVimInvalidComparisonOperator -> NVimInvalidOperator
// NVimInvalidComparisonOperatorModifier -> NVimInvalidComparisonOperator
//
// NVimUnaryPlus -> NVimUnaryOperator
// NVimBinaryPlus -> NVimBinaryOperator
// NVimRegister -> SpecialChar
// NVimNestingParenthesis -> NVimParenthesis
// NVimCallingParenthesis -> NVimParenthesis
// NVimInvalidNumber -> NVimInvalidValue
// NVimInvalidFloat -> NVimInvalidValue
// NVimInvalidIdentifierKey -> NVimInvalidIdentifier
/// Allocate a new node and set some of the values
///

View File

@@ -7,6 +7,7 @@
#include "nvim/types.h"
#include "nvim/viml/parser/parser.h"
#include "nvim/eval/typval.h"
// Defines whether to ignore case:
// == kCCStrategyUseOption
@@ -113,6 +114,11 @@ typedef struct {
} err; ///< For kExprLexInvalid
struct {
union {
float_T floating;
uvarnumber_T integer;
} val; ///< Number value.
uint8_t base; ///< Base: 2, 8, 10 or 16.
bool is_float; ///< True if number is a floating-point.
} num; ///< For kExprLexNumber
} data; ///< Additional data, if needed.