gen_vimdoc.py: better handling of inline (non-block) nodes

This commit is contained in:
Justin M. Keyes
2019-12-24 00:04:14 -08:00
parent 27b678f577
commit b81547ce6d
2 changed files with 170 additions and 118 deletions

View File

@@ -1,18 +1,18 @@
#!/usr/bin/env python3
"""Generates Nvim help docs from C/Lua docstrings, using Doxygen.
"""Generates Nvim :help docs from C/Lua docstrings, using Doxygen.
Also generates *.mpack files. To inspect the *.mpack structure:
:new | put=json_encode(msgpackparse(readfile('runtime/doc/api.mpack')))
:new | put=v:lua.vim.inspect(msgpackparse(readfile('runtime/doc/api.mpack')))
Flow:
gen_docs
main
extract_from_xml
fmt_node_as_vimhelp
fmt_params_map_as_vimhelp
render_node
para_as_map
render_node
fmt_node_as_vimhelp \
para_as_map } recursive
update_params_map /
render_node
This would be easier using lxml and XSLT, but:
@@ -58,6 +58,7 @@ DEBUG = ('DEBUG' in os.environ)
INCLUDE_C_DECL = ('INCLUDE_C_DECL' in os.environ)
INCLUDE_DEPRECATED = ('INCLUDE_DEPRECATED' in os.environ)
fmt_vimhelp = False # HACK
text_width = 78
script_path = os.path.abspath(__file__)
base_dir = os.path.dirname(os.path.dirname(script_path))
@@ -144,6 +145,7 @@ def debug_this(cond, o):
except Exception:
pass
if ((callable(cond) and cond())
or (not callable(cond) and cond == True)
or (not callable(cond) and cond in o)):
raise RuntimeError('xxx: {}\n{}'.format(name, o))
@@ -156,20 +158,27 @@ def find_first(parent, name):
return sub[0]
def get_children(parent, name):
"""Yield matching child nodes within parent."""
def iter_children(parent, name):
"""Yields matching child nodes within parent."""
for child in parent.childNodes:
if child.nodeType == child.ELEMENT_NODE and child.nodeName == name:
yield child
def get_child(parent, name):
"""Get the first matching child node."""
for child in get_children(parent, name):
"""Gets the first matching child node."""
for child in iter_children(parent, name):
return child
return None
def self_or_child(n):
"""Gets the first child node, or self."""
if len(n.childNodes) == 0:
return n
return n.childNodes[0]
def clean_text(text):
"""Cleans text.
@@ -190,18 +199,21 @@ def is_blank(text):
return '' == clean_lines(text)
def get_text(parent, preformatted=False):
"""Combine all text in a node."""
if parent.nodeType == parent.TEXT_NODE:
return parent.data
out = ''
for node in parent.childNodes:
def get_text(n, preformatted=False):
"""Recursively concatenates all text in a node tree."""
text = ''
if n.nodeType == n.TEXT_NODE:
return n.data
if n.nodeName == 'computeroutput':
for node in n.childNodes:
text += get_text(node)
return '`{}` '.format(text)
for node in n.childNodes:
if node.nodeType == node.TEXT_NODE:
out += node.data if preformatted else clean_text(node.data)
text += node.data if preformatted else clean_text(node.data)
elif node.nodeType == node.ELEMENT_NODE:
out += ' ' + get_text(node, preformatted)
return out
text += ' ' + get_text(node, preformatted)
return text
# Gets the length of the last line in `text`, excluding newline ("\n") char.
@@ -221,6 +233,8 @@ def len_lastline_withoutindent(text, indent):
# Returns True if node `n` contains only inline (not block-level) elements.
def is_inline(n):
# if len(n.childNodes) == 0:
# return n.nodeType == n.TEXT_NODE or n.nodeName == 'computeroutput'
for c in n.childNodes:
if c.nodeType != c.TEXT_NODE and c.nodeName != 'computeroutput':
return False
@@ -271,11 +285,17 @@ def doc_wrap(text, prefix='', width=70, func=False, indent=None):
return result
def max_name(names):
if len(names) == 0:
return 0
return max(len(name) for name in names)
def update_params_map(parent, ret_map, width=62):
"""Updates `ret_map` with name:desc key-value pairs extracted
from Doxygen XML node `parent`.
"""
params = []
params = collections.OrderedDict()
for node in parent.childNodes:
if node.nodeType == node.TEXT_NODE:
continue
@@ -285,48 +305,34 @@ def update_params_map(parent, ret_map, width=62):
name = get_text(name_node)
if name in param_exclude:
continue
params.append((name.strip(), node))
params[name.strip()] = node
max_name_len = max_name(params.keys()) + 8
# `ret_map` is a name:desc map.
for name, node in params:
for name, node in params.items():
desc = ''
desc_node = get_child(node, 'parameterdescription')
if desc_node:
desc = fmt_node_as_vimhelp(desc_node, width=width, indent=(" " * len(name)))
desc = fmt_node_as_vimhelp(desc_node, width=width,
indent=(' ' * max_name_len))
ret_map[name] = desc
return ret_map
def fmt_params_map_as_vimhelp(m, width=62):
"""Renders a params map as Vim :help text."""
max_name_len = 0
for name, desc in m.items():
max_name_len = max(max_name_len, len(name) + 4)
out = ''
for name, desc in m.items():
name = ' {}'.format('{{{}}}'.format(name).ljust(max_name_len))
out += '{}{}\n'.format(name, desc)
return out.rstrip()
def render_node(n, text, prefix='', indent='', width=62):
"""Renders a node as Vim help text, recursively traversing all descendants."""
global fmt_vimhelp
def ind(s):
return s if fmt_vimhelp else ''
text = ''
# space_preceding = (len(text) > 0 and ' ' == text[-1][-1])
# text += (int(not space_preceding) * ' ')
if n.nodeType == n.TEXT_NODE:
# `prefix` is NOT sent to doc_wrap, it was already handled by now.
text += doc_wrap(n.data, indent=indent, width=width)
elif n.nodeName == 'computeroutput':
text += ' `{}` '.format(get_text(n))
elif n.nodeName == 'preformatted':
if n.nodeName == 'preformatted':
o = get_text(n, preformatted=True)
ensure_nl = '' if o[-1] == '\n' else '\n'
text += ' >{}{}\n<'.format(ensure_nl, o)
text += '>{}{}\n<'.format(ensure_nl, o)
elif is_inline(n):
for c in n.childNodes:
text += render_node(c, text)
text = doc_wrap(text, indent=indent, width=width)
text = doc_wrap(get_text(n), indent=indent, width=width)
elif n.nodeName == 'verbatim':
# TODO: currently we don't use this. The "[verbatim]" hint is there as
# a reminder that we must decide how to format this if we do use it.
@@ -341,8 +347,6 @@ def render_node(n, text, prefix='', indent='', width=62):
elif n.nodeName in ('para', 'heading'):
for c in n.childNodes:
text += render_node(c, text, indent=indent, width=width)
if is_inline(n):
text = doc_wrap(text, indent=indent, width=width)
elif n.nodeName == 'itemizedlist':
for c in n.childNodes:
text += '{}\n'.format(render_node(c, text, prefix='',
@@ -368,7 +372,7 @@ def render_node(n, text, prefix='', indent='', width=62):
text += '\n'
elif (n.nodeName == 'simplesect'
and n.getAttribute('kind') in ('return', 'see')):
text += ' '
text += ind(' ')
for c in n.childNodes:
text += render_node(c, text, indent=' ', width=width)
else:
@@ -395,11 +399,6 @@ def para_as_map(parent, indent='', width=62):
'xrefs': []
}
if is_inline(parent):
chunks["text"] = clean_lines(
doc_wrap(render_node(parent, ""), indent=indent, width=width).strip()
)
# Ordered dict of ordered lists.
groups = collections.OrderedDict([
('params', []),
@@ -413,27 +412,39 @@ def para_as_map(parent, indent='', width=62):
text = ''
kind = ''
last = ''
for child in parent.childNodes:
if child.nodeName == 'parameterlist':
groups['params'].append(child)
elif child.nodeName == 'xrefsect':
groups['xrefs'].append(child)
elif child.nodeName == 'simplesect':
last = kind
kind = child.getAttribute('kind')
if kind == 'return' or (kind == 'note' and last == 'return'):
groups['return'].append(child)
elif kind == 'see':
groups['seealso'].append(child)
elif kind in ('note', 'warning'):
text += render_node(child, text, indent=indent, width=width)
if is_inline(parent):
# Flatten inline text from a tree of non-block nodes.
text = doc_wrap(render_node(parent, ""), indent=indent, width=width)
else:
prev = None # Previous node
for child in parent.childNodes:
if child.nodeName == 'parameterlist':
groups['params'].append(child)
elif child.nodeName == 'xrefsect':
groups['xrefs'].append(child)
elif child.nodeName == 'simplesect':
last = kind
kind = child.getAttribute('kind')
if kind == 'return' or (kind == 'note' and last == 'return'):
groups['return'].append(child)
elif kind == 'see':
groups['seealso'].append(child)
elif kind in ('note', 'warning'):
text += render_node(child, text, indent=indent, width=width)
else:
raise RuntimeError('unhandled simplesect: {}\n{}'.format(
child.nodeName, child.toprettyxml(indent=' ', newl='\n')))
else:
raise RuntimeError('unhandled simplesect: {}\n{}'.format(
child.nodeName, child.toprettyxml(indent=' ', newl='\n')))
else:
text += render_node(child, text, indent=indent, width=width)
if (prev is not None
and is_inline(self_or_child(prev))
and is_inline(self_or_child(child))
and '' != get_text(self_or_child(child)).strip()
and ' ' != text[-1]):
text += ' '
text += render_node(child, text, indent=indent, width=width)
prev = child
chunks['text'] = text
chunks['text'] += text
# Generate map from the gathered items.
if len(groups['params']) > 0:
@@ -441,7 +452,7 @@ def para_as_map(parent, indent='', width=62):
update_params_map(child, ret_map=chunks['params'], width=width)
for child in groups['return']:
chunks['return'].append(render_node(
child, '', indent=indent, width=width).lstrip())
child, '', indent=indent, width=width))
for child in groups['seealso']:
chunks['seealso'].append(render_node(
child, '', indent=indent, width=width))
@@ -463,20 +474,30 @@ def fmt_node_as_vimhelp(parent, width=62, indent=''):
NB: Blank lines in a docstring manifest as <para> tags.
"""
rendered_blocks = []
def fmt_param_doc(m):
"""Renders a params map as Vim :help text."""
max_name_len = max_name(m.keys()) + 4
out = ''
for name, desc in m.items():
name = ' {}'.format('{{{}}}'.format(name).ljust(max_name_len))
out += '{}{}\n'.format(name, desc)
return out.rstrip()
def has_nonexcluded_params(m):
"""Returns true if any of the given params has at least
one non-excluded item."""
if fmt_param_doc(m) != '':
return True
for child in parent.childNodes:
para = para_as_map(child, indent, width)
def has_nonexcluded_params(m):
"""Returns true if any of the given params has at least
one non-excluded item."""
if fmt_params_map_as_vimhelp(m) != '':
return True
# Generate text from the gathered items.
chunks = [para['text']]
if len(para['params']) > 0 and has_nonexcluded_params(para['params']):
chunks.append('\nParameters: ~')
chunks.append(fmt_params_map_as_vimhelp(para['params'], width=width))
chunks.append(fmt_param_doc(para['params']))
if len(para['return']) > 0:
chunks.append('\nReturn: ~')
for s in para['return']:
@@ -493,20 +514,21 @@ def fmt_node_as_vimhelp(parent, width=62, indent=''):
return clean_lines('\n'.join(rendered_blocks).strip())
def extract_from_xml(filename, mode, fmt_vimhelp):
def extract_from_xml(filename, mode, width):
"""Extracts Doxygen info as maps without formatting the text.
Returns two maps:
1. Functions
2. Deprecated functions
The `fmt_vimhelp` parameter controls some special cases for use by
The `fmt_vimhelp` global controls some special cases for use by
fmt_doxygen_xml_as_vimhelp(). (TODO: ugly :)
"""
global xrefs
global fmt_vimhelp
xrefs.clear()
functions = {} # Map of func_name:docstring.
deprecated_functions = {} # Map of func_name:docstring.
fns = {} # Map of func_name:docstring.
deprecated_fns = {} # Map of func_name:docstring.
dom = minidom.parse(filename)
compoundname = get_text(dom.getElementsByTagName('compoundname')[0])
@@ -553,7 +575,7 @@ def extract_from_xml(filename, mode, fmt_vimhelp):
params = []
type_length = 0
for param in get_children(member, 'param'):
for param in iter_children(member, 'param'):
param_type = get_text(get_child(param, 'type')).strip()
param_name = ''
declname = get_child(param, 'declname')
@@ -590,15 +612,15 @@ def extract_from_xml(filename, mode, fmt_vimhelp):
' ')
# Minimum 8 chars between signature and vimtag
lhs = (text_width - 8) - len(prefix)
lhs = (width - 8) - len(prefix)
if len(prefix) + len(suffix) > lhs:
signature = vimtag.rjust(text_width) + '\n'
signature += doc_wrap(suffix, width=text_width-8, prefix=prefix,
signature = vimtag.rjust(width) + '\n'
signature += doc_wrap(suffix, width=width-8, prefix=prefix,
func=True)
else:
signature = prefix + suffix
signature += vimtag.rjust(text_width - len(signature))
signature += vimtag.rjust(width - len(signature))
paras = []
desc = find_first(member, 'detaileddescription')
@@ -638,25 +660,27 @@ def extract_from_xml(filename, mode, fmt_vimhelp):
fn['c_decl'] = c_decl
if 'Deprecated' in str(xrefs):
deprecated_functions[name] = fn
deprecated_fns[name] = fn
elif name.startswith(CONFIG[mode]['func_name_prefix']):
functions[name] = fn
fns[name] = fn
xrefs.clear()
return (functions, deprecated_functions)
return (fns, deprecated_fns)
def fmt_doxygen_xml_as_vimhelp(filename, mode):
"""Formats functions from doxygen XML into Vim :help format.
"""Entrypoint for generating Vim :help from from Doxygen XML.
Returns two strings:
1. Functions in Vim :help format
2. Deprecated functions (handled by caller, or ignored)
Returns 3 items:
1. Vim help text for functions found in `filename`.
2. Vim help text for deprecated functions.
"""
functions = {} # Map of func_name:docstring.
deprecated_functions = {} # Map of func_name:docstring.
fns, deprecated_fns = extract_from_xml(filename, mode, True)
global fmt_vimhelp
fmt_vimhelp = True
fns_txt = {} # Map of func_name:vim-help-text.
deprecated_fns_txt = {} # Map of func_name:vim-help-text.
fns, _ = extract_from_xml(filename, mode, width=text_width)
for name, fn in fns.items():
# Generate Vim :help for parameters.
@@ -685,15 +709,15 @@ def fmt_doxygen_xml_as_vimhelp(filename, mode):
func_doc = re.sub(r'^\s+([<>])$', r'\1', func_doc, flags=re.M)
if 'Deprecated' in xrefs:
deprecated_functions.append(func_doc)
deprecated_fns_txt[name] = func_doc
elif name.startswith(CONFIG[mode]['func_name_prefix']):
functions[name] = func_doc
fns_txt[name] = func_doc
xrefs.clear()
return ('\n\n'.join(list(functions.values())),
'\n\n'.join(deprecated_fns),
functions)
fmt_vimhelp = False
return ('\n\n'.join(list(fns_txt.values())),
'\n\n'.join(list(deprecated_fns_txt.values())))
def delete_lines_below(filename, tokenstr):
@@ -710,14 +734,15 @@ def delete_lines_below(filename, tokenstr):
fp.writelines(lines[0:i])
def gen_docs(config):
"""Generate formatted Vim :help docs and unformatted *.mpack files for use
by API clients.
def main(config):
"""Generates:
1. Vim :help docs
2. *.mpack files for use by API clients
Doxygen is called and configured through stdin.
"""
for mode in CONFIG:
functions = {} # Map of func_name:docstring.
mpack_file = os.path.join(
base_dir, 'runtime', 'doc',
CONFIG[mode]['filename'].replace('.txt', '.mpack'))
@@ -766,14 +791,13 @@ def gen_docs(config):
filename = get_text(find_first(compound, 'name'))
if filename.endswith('.c') or filename.endswith('.lua'):
# Extract unformatted (*.mpack).
fn_map, _ = extract_from_xml(os.path.join(base, '{}.xml'.format(
compound.getAttribute('refid'))), mode, False)
functions_text, deprecated_text, fns = fmt_doxygen_xml_as_vimhelp(
compound.getAttribute('refid'))), mode, width=9999)
# Extract formatted (:help).
functions_text, deprecated_text = fmt_doxygen_xml_as_vimhelp(
os.path.join(base, '{}.xml'.format(
compound.getAttribute('refid'))), mode)
# Collect functions from all modules (for the current `mode`).
functions = {**functions, **fns}
if not functions_text and not deprecated_text:
continue
@@ -900,6 +924,6 @@ if __name__ == "__main__":
if len(sys.argv) > 1:
filter_source(sys.argv[1])
else:
gen_docs(Doxyfile)
main(Doxyfile)
# vim: set ft=python ts=4 sw=4 tw=79 et :