Merge #7234 'built-in expression parser'

2025-11-30 22:20:42 +00:00 · 2017-12-09 18:47:34 +01:00
parent afae4b5141 fbdc3ac4ef
commit 3cc7ebf810
49 changed files with 17465 additions and 772 deletions
--- a/src/nvim/api/vim.c
+++ b/src/nvim/api/vim.c
@@ -33,6 +33,8 @@
 #include "nvim/syntax.h"
 #include "nvim/getchar.h"
 #include "nvim/os/input.h"
+#include "nvim/viml/parser/expressions.h"
+#include "nvim/viml/parser/parser.h"

 #define LINE_BUFFER_SIZE 4096

@@ -889,6 +891,458 @@ theend:
  return rv;
 }

+typedef struct {
+  ExprASTNode **node_p;
+  Object *ret_node_p;
+} ExprASTConvStackItem;
+
+typedef kvec_withinit_t(ExprASTConvStackItem, 16) ExprASTConvStack;
+
+/// Parse a VimL expression
+///
+/// @param[in]  expr  Expression to parse. Is always treated as a single line.
+/// @param[in]  flags  Flags:
+///
+///                    - "m" if multiple expressions in a row are allowed (only
+///                      the first one will be parsed),
+///                    - "E" if EOC tokens are not allowed (determines whether
+///                      they will stop parsing process or be recognized as an
+///                      operator/space, though also yielding an error).
+///                    - "l" when needing to start parsing with lvalues for
+///                      ":let" or ":for".
+///
+///                    Common flag sets:
+///                    - "m" to parse like for ":echo".
+///                    - "E" to parse like for "<C-r>=".
+///                    - empty string for ":call".
+///                    - "lm" to parse for ":let".
+/// @param[in]  highlight  If true, return value will also include "highlight"
+///                        key containing array of 4-tuples (arrays) (Integer,
+///                        Integer, Integer, String), where first three numbers
+///                        define the highlighted region and represent line,
+///                        starting column and ending column (latter exclusive:
+///                        one should highlight region [start_col, end_col)).
+///
+/// @return AST: top-level dictionary holds keys
+///
+///         "error": Dictionary with error, present only if parser saw some
+///                  error. Contains the following keys:
+///
+///           "message": String, error message in printf format, translated.
+///                      Must contain exactly one "%.*s".
+///           "arg": String, error message argument.
+///
+///         "len": Amount of bytes successfully parsed. With flags equal to ""
+///                that should be equal to the length of expr string.
+///
+///                @note: “Sucessfully parsed” here means “participated in AST
+///                       creation”, not “till the first error”.
+///
+///         "ast": AST, either nil or a dictionary with these keys:
+///
+///           "type": node type, one of the value names from ExprASTNodeType
+///                   stringified without "kExprNode" prefix.
+///           "start": a pair [line, column] describing where node is “started”
+///                    where "line" is always 0 (will not be 0 if you will be
+///                    using nvim_parse_viml() on e.g. ":let", but that is not
+///                    present yet). Both elements are Integers.
+///           "len": “length” of the node. This and "start" are there for
+///                  debugging purposes primary (debugging parser and providing
+///                  debug information).
+///           "children": a list of nodes described in top/"ast". There always
+///                       is zero, one or two children, key will not be present
+///                       if node has no children. Maximum number of children
+///                       may be found in node_maxchildren array.
+///
+///           Local values (present only for certain nodes):
+///
+///           "scope": a single Integer, specifies scope for "Option" and
+///                    "PlainIdentifier" nodes. For "Option" it is one of
+///                    ExprOptScope values, for "PlainIdentifier" it is one of
+///                    ExprVarScope values.
+///           "ident": identifier (without scope, if any), present for "Option",
+///                    "PlainIdentifier", "PlainKey" and "Environment" nodes.
+///           "name": Integer, register name (one character) or -1. Only present
+///                   for "Register" nodes.
+///           "cmp_type": String, comparison type, one of the value names from
+///                       ExprComparisonType, stringified without "kExprCmp"
+///                       prefix. Only present for "Comparison" nodes.
+///           "ccs_strategy": String, case comparison strategy, one of the
+///                           value names from ExprCaseCompareStrategy,
+///                           stringified without "kCCStrategy" prefix. Only
+///                           present for "Comparison" nodes.
+///           "augmentation": String, augmentation type for "Assignment" nodes.
+///                           Is either an empty string, "Add", "Subtract" or
+///                           "Concat" for "=", "+=", "-=" or ".=" respectively.
+///           "invert": Boolean, true if result of comparison needs to be
+///                     inverted. Only present for "Comparison" nodes.
+///           "ivalue": Integer, integer value for "Integer" nodes.
+///           "fvalue": Float, floating-point value for "Float" nodes.
+///           "svalue": String, value for "SingleQuotedString" and
+///                     "DoubleQuotedString" nodes.
+Dictionary nvim_parse_expression(String expr, String flags, Boolean highlight,
+                                 Error *err)
+  FUNC_API_SINCE(4) FUNC_API_ASYNC
+{
+  int pflags = 0;
+  for (size_t i = 0 ; i < flags.size ; i++) {
+    switch (flags.data[i]) {
+      case 'm': { pflags |= kExprFlagsMulti; break; }
+      case 'E': { pflags |= kExprFlagsDisallowEOC; break; }
+      case 'l': { pflags |= kExprFlagsParseLet; break; }
+      case NUL: {
+        api_set_error(err, kErrorTypeValidation, "Invalid flag: '\\0' (%u)",
+                      (unsigned)flags.data[i]);
+        return (Dictionary)ARRAY_DICT_INIT;
+      }
+      default: {
+        api_set_error(err, kErrorTypeValidation, "Invalid flag: '%c' (%u)",
+                      flags.data[i], (unsigned)flags.data[i]);
+        return (Dictionary)ARRAY_DICT_INIT;
+      }
+    }
+  }
+  ParserLine plines[] = {
+    {
+      .data = expr.data,
+      .size = expr.size,
+      .allocated = false,
+    },
+    { NULL, 0, false },
+  };
+  ParserLine *plines_p = plines;
+  ParserHighlight colors;
+  kvi_init(colors);
+  ParserHighlight *const colors_p = (highlight ? &colors : NULL);
+  ParserState pstate;
+  viml_parser_init(
+      &pstate, parser_simple_get_line, &plines_p, colors_p);
+  ExprAST east = viml_pexpr_parse(&pstate, pflags);
+
+  const size_t ret_size = (
+      2  // "ast", "len"
+      + (size_t)(east.err.msg != NULL)  // "error"
+      + (size_t)highlight  // "highlight"
+      + 0);
+  Dictionary ret = {
+    .items = xmalloc(ret_size * sizeof(ret.items[0])),
+    .size = 0,
+    .capacity = ret_size,
+  };
+  ret.items[ret.size++] = (KeyValuePair) {
+    .key = STATIC_CSTR_TO_STRING("ast"),
+    .value = NIL,
+  };
+  ret.items[ret.size++] = (KeyValuePair) {
+    .key = STATIC_CSTR_TO_STRING("len"),
+    .value = INTEGER_OBJ((Integer)(pstate.pos.line == 1
+                                   ? plines[0].size
+                                   : pstate.pos.col)),
+  };
+  if (east.err.msg != NULL) {
+    Dictionary err_dict = {
+      .items = xmalloc(2 * sizeof(err_dict.items[0])),
+      .size = 2,
+      .capacity = 2,
+    };
+    err_dict.items[0] = (KeyValuePair) {
+      .key = STATIC_CSTR_TO_STRING("message"),
+      .value = STRING_OBJ(cstr_to_string(east.err.msg)),
+    };
+    if (east.err.arg == NULL) {
+      err_dict.items[1] = (KeyValuePair) {
+        .key = STATIC_CSTR_TO_STRING("arg"),
+        .value = STRING_OBJ(STRING_INIT),
+      };
+    } else {
+      err_dict.items[1] = (KeyValuePair) {
+        .key = STATIC_CSTR_TO_STRING("arg"),
+        .value = STRING_OBJ(((String) {
+          .data = xmemdupz(east.err.arg, (size_t)east.err.arg_len),
+          .size = (size_t)east.err.arg_len,
+        })),
+      };
+    }
+    ret.items[ret.size++] = (KeyValuePair) {
+      .key = STATIC_CSTR_TO_STRING("error"),
+      .value = DICTIONARY_OBJ(err_dict),
+    };
+  }
+  if (highlight) {
+    Array hl = (Array) {
+      .items = xmalloc(kv_size(colors) * sizeof(hl.items[0])),
+      .capacity = kv_size(colors),
+      .size = kv_size(colors),
+    };
+    for (size_t i = 0 ; i < kv_size(colors) ; i++) {
+      const ParserHighlightChunk chunk = kv_A(colors, i);
+      Array chunk_arr = (Array) {
+        .items = xmalloc(4 * sizeof(chunk_arr.items[0])),
+        .capacity = 4,
+        .size = 4,
+      };
+      chunk_arr.items[0] = INTEGER_OBJ((Integer)chunk.start.line);
+      chunk_arr.items[1] = INTEGER_OBJ((Integer)chunk.start.col);
+      chunk_arr.items[2] = INTEGER_OBJ((Integer)chunk.end_col);
+      chunk_arr.items[3] = STRING_OBJ(cstr_to_string(chunk.group));
+      hl.items[i] = ARRAY_OBJ(chunk_arr);
+    }
+    ret.items[ret.size++] = (KeyValuePair) {
+      .key = STATIC_CSTR_TO_STRING("highlight"),
+      .value = ARRAY_OBJ(hl),
+    };
+  }
+  kvi_destroy(colors);
+
+  // Walk over the AST, freeing nodes in process.
+  ExprASTConvStack ast_conv_stack;
+  kvi_init(ast_conv_stack);
+  kvi_push(ast_conv_stack, ((ExprASTConvStackItem) {
+    .node_p = &east.root,
+    .ret_node_p = &ret.items[0].value,
+  }));
+  while (kv_size(ast_conv_stack)) {
+    ExprASTConvStackItem cur_item = kv_last(ast_conv_stack);
+    ExprASTNode *const node = *cur_item.node_p;
+    if (node == NULL) {
+      assert(kv_size(ast_conv_stack) == 1);
+      kv_drop(ast_conv_stack, 1);
+    } else {
+      if (cur_item.ret_node_p->type == kObjectTypeNil) {
+        const size_t ret_node_items_size = (size_t)(
+            3  // "type", "start" and "len"
+            + (node->children != NULL)  // "children"
+            + (node->type == kExprNodeOption
+               || node->type == kExprNodePlainIdentifier)  // "scope"
+            + (node->type == kExprNodeOption
+               || node->type == kExprNodePlainIdentifier
+               || node->type == kExprNodePlainKey
+               || node->type == kExprNodeEnvironment)  // "ident"
+            + (node->type == kExprNodeRegister)  // "name"
+            + (3  // "cmp_type", "ccs_strategy", "invert"
+               * (node->type == kExprNodeComparison))
+            + (node->type == kExprNodeInteger)  // "ivalue"
+            + (node->type == kExprNodeFloat)  // "fvalue"
+            + (node->type == kExprNodeDoubleQuotedString
+               || node->type == kExprNodeSingleQuotedString)  // "svalue"
+            + (node->type == kExprNodeAssignment)  // "augmentation"
+            + 0);
+        Dictionary ret_node = {
+          .items = xmalloc(ret_node_items_size * sizeof(ret_node.items[0])),
+          .capacity = ret_node_items_size,
+          .size = 0,
+        };
+        *cur_item.ret_node_p = DICTIONARY_OBJ(ret_node);
+      }
+      Dictionary *ret_node = &cur_item.ret_node_p->data.dictionary;
+      if (node->children != NULL) {
+        const size_t num_children = 1 + (node->children->next != NULL);
+        Array children_array = {
+          .items = xmalloc(num_children * sizeof(children_array.items[0])),
+          .capacity = num_children,
+          .size = num_children,
+        };
+        for (size_t i = 0; i < num_children; i++) {
+          children_array.items[i] = NIL;
+        }
+        ret_node->items[ret_node->size++] = (KeyValuePair) {
+          .key = STATIC_CSTR_TO_STRING("children"),
+          .value = ARRAY_OBJ(children_array),
+        };
+        kvi_push(ast_conv_stack, ((ExprASTConvStackItem) {
+          .node_p = &node->children,
+          .ret_node_p = &children_array.items[0],
+        }));
+      } else if (node->next != NULL) {
+        kvi_push(ast_conv_stack, ((ExprASTConvStackItem) {
+          .node_p = &node->next,
+          .ret_node_p = cur_item.ret_node_p + 1,
+        }));
+      } else if (node != NULL) {
+        kv_drop(ast_conv_stack, 1);
+        ret_node->items[ret_node->size++] = (KeyValuePair) {
+          .key = STATIC_CSTR_TO_STRING("type"),
+          .value = STRING_OBJ(cstr_to_string(east_node_type_tab[node->type])),
+        };
+        Array start_array = {
+          .items = xmalloc(2 * sizeof(start_array.items[0])),
+          .capacity = 2,
+          .size = 2,
+        };
+        start_array.items[0] = INTEGER_OBJ((Integer)node->start.line);
+        start_array.items[1] = INTEGER_OBJ((Integer)node->start.col);
+        ret_node->items[ret_node->size++] = (KeyValuePair) {
+          .key = STATIC_CSTR_TO_STRING("start"),
+          .value = ARRAY_OBJ(start_array),
+        };
+        ret_node->items[ret_node->size++] = (KeyValuePair) {
+          .key = STATIC_CSTR_TO_STRING("len"),
+          .value = INTEGER_OBJ((Integer)node->len),
+        };
+        switch (node->type) {
+          case kExprNodeDoubleQuotedString:
+          case kExprNodeSingleQuotedString: {
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("svalue"),
+              .value = STRING_OBJ(((String) {
+                .data = node->data.str.value,
+                .size = node->data.str.size,
+              })),
+            };
+            break;
+          }
+          case kExprNodeOption: {
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("scope"),
+              .value = INTEGER_OBJ(node->data.opt.scope),
+            };
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("ident"),
+              .value = STRING_OBJ(((String) {
+                .data = xmemdupz(node->data.opt.ident,
+                                 node->data.opt.ident_len),
+                .size = node->data.opt.ident_len,
+              })),
+            };
+            break;
+          }
+          case kExprNodePlainIdentifier: {
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("scope"),
+              .value = INTEGER_OBJ(node->data.var.scope),
+            };
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("ident"),
+              .value = STRING_OBJ(((String) {
+                .data = xmemdupz(node->data.var.ident,
+                                 node->data.var.ident_len),
+                .size = node->data.var.ident_len,
+              })),
+            };
+            break;
+          }
+          case kExprNodePlainKey: {
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("ident"),
+              .value = STRING_OBJ(((String) {
+                .data = xmemdupz(node->data.var.ident,
+                                 node->data.var.ident_len),
+                .size = node->data.var.ident_len,
+              })),
+            };
+            break;
+          }
+          case kExprNodeEnvironment: {
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("ident"),
+              .value = STRING_OBJ(((String) {
+                .data = xmemdupz(node->data.env.ident,
+                                 node->data.env.ident_len),
+                .size = node->data.env.ident_len,
+              })),
+            };
+            break;
+          }
+          case kExprNodeRegister: {
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("name"),
+              .value = INTEGER_OBJ(node->data.reg.name),
+            };
+            break;
+          }
+          case kExprNodeComparison: {
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("cmp_type"),
+              .value = STRING_OBJ(cstr_to_string(
+                  eltkn_cmp_type_tab[node->data.cmp.type])),
+            };
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("ccs_strategy"),
+              .value = STRING_OBJ(cstr_to_string(
+                  ccs_tab[node->data.cmp.ccs])),
+            };
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("invert"),
+              .value = BOOLEAN_OBJ(node->data.cmp.inv),
+            };
+            break;
+          }
+          case kExprNodeFloat: {
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("fvalue"),
+              .value = FLOAT_OBJ(node->data.flt.value),
+            };
+            break;
+          }
+          case kExprNodeInteger: {
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("ivalue"),
+              .value = INTEGER_OBJ((Integer)(
+                  node->data.num.value > API_INTEGER_MAX
+                  ? API_INTEGER_MAX
+                  : (Integer)node->data.num.value)),
+            };
+            break;
+          }
+          case kExprNodeAssignment: {
+            const ExprAssignmentType asgn_type = node->data.ass.type;
+            ret_node->items[ret_node->size++] = (KeyValuePair) {
+              .key = STATIC_CSTR_TO_STRING("augmentation"),
+              .value = STRING_OBJ(
+                  asgn_type == kExprAsgnPlain
+                  ? (String)STRING_INIT
+                  : cstr_to_string(expr_asgn_type_tab[asgn_type])),
+            };
+            break;
+          }
+          case kExprNodeMissing:
+          case kExprNodeOpMissing:
+          case kExprNodeTernary:
+          case kExprNodeTernaryValue:
+          case kExprNodeSubscript:
+          case kExprNodeListLiteral:
+          case kExprNodeUnaryPlus:
+          case kExprNodeBinaryPlus:
+          case kExprNodeNested:
+          case kExprNodeCall:
+          case kExprNodeComplexIdentifier:
+          case kExprNodeUnknownFigure:
+          case kExprNodeLambda:
+          case kExprNodeDictLiteral:
+          case kExprNodeCurlyBracesIdentifier:
+          case kExprNodeComma:
+          case kExprNodeColon:
+          case kExprNodeArrow:
+          case kExprNodeConcat:
+          case kExprNodeConcatOrSubscript:
+          case kExprNodeOr:
+          case kExprNodeAnd:
+          case kExprNodeUnaryMinus:
+          case kExprNodeBinaryMinus:
+          case kExprNodeNot:
+          case kExprNodeMultiplication:
+          case kExprNodeDivision:
+          case kExprNodeMod: {
+            break;
+          }
+        }
+        assert(cur_item.ret_node_p->data.dictionary.size
+               == cur_item.ret_node_p->data.dictionary.capacity);
+        xfree(*cur_item.node_p);
+        *cur_item.node_p = NULL;
+      }
+    }
+  }
+  kvi_destroy(ast_conv_stack);
+
+  assert(ret.size == ret.capacity);
+  // Should be a no-op actually, leaving it in case non-nodes will need to be
+  // freed later.
+  viml_pexpr_free_ast(east);
+  viml_parser_destroy(&pstate);
+  return ret;
+}
+

 /// Writes a message to vim output or error buffer. The string is split
 /// and flushed after each newline. Incomplete lines are kept for writing