gen_vimdoc.py: render nested lists, etc [ci skip]

- render_node() is now the main rendering function: it traverses a node
  and builds the Vim help text recursively.
- render_para() is weird and ugly, it is the entry-point for rendering
  the help text for one docstring'd function.
This commit is contained in:
Justin M. Keyes
2019-03-03 15:01:16 +01:00
parent eeb19a346a
commit bec4066033
2 changed files with 479 additions and 323 deletions

View File

@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""Parses Doxygen XML output to generate Neovim's API documentation.
"""Generates Nvim help docs from C docstrings, by parsing Doxygen XML.
This would be easier using lxml and XSLT, but:
@@ -35,6 +35,8 @@ import sys
import shutil
import textwrap
import subprocess
import collections
import pprint
from xml.dom import minidom
@@ -42,6 +44,10 @@ if sys.version_info[0] < 3:
print("use Python 3")
sys.exit(1)
DEBUG = ('DEBUG' in os.environ)
INCLUDE_C_DECL = ('INCLUDE_C_DECL' in os.environ)
INCLUDE_DEPRECATED = ('INCLUDE_DEPRECATED' in os.environ)
doc_filename = 'api.txt'
# String used to find the start of the generated part of the doc.
section_start_token = '*api-global*'
@@ -83,6 +89,12 @@ seen_funcs = set()
# deprecated functions.
xrefs = set()
def debug_this(s, n):
o = n if isinstance(n, str) else n.toprettyxml(indent=' ', newl='\n')
name = '' if isinstance(n, str) else n.nodeName
if s in o:
raise RuntimeError('xxx: {}\n{}'.format(name, o))
# XML Parsing Utilities {{{
def find_first(parent, name):
@@ -123,6 +135,10 @@ def clean_lines(text):
return re.sub(r'\A\n\s*\n*|\n\s*\n*\Z', '', re.sub(r'(\n\s*\n+)+', '\n\n', text))
def is_blank(text):
return '' == clean_lines(text)
def get_text(parent):
"""Combine all text in a node."""
if parent.nodeType == parent.TEXT_NODE:
@@ -137,16 +153,43 @@ def get_text(parent):
return out
def doc_wrap(text, prefix='', width=70, func=False):
# Gets the length of the last line in `text`, excluding newline ("\n") char.
def len_lastline(text):
lastnl = text.rfind('\n')
if -1 == lastnl:
return len(text)
if '\n' == text[-1]:
return lastnl - (1+ text.rfind('\n', 0, lastnl))
return len(text) - (1 + lastnl)
def len_lastline_withoutindent(text, indent):
n = len_lastline(text)
return (n - len(indent)) if n > len(indent) else 0
# Returns True if node `n` contains only inline (not block-level) elements.
def is_inline(n):
for c in n.childNodes:
if c.nodeType != c.TEXT_NODE and c.nodeName != 'computeroutput':
return False
if not is_inline(c):
return False
return True
def doc_wrap(text, prefix='', width=70, func=False, indent=None):
"""Wraps text to `width`.
The first line is prefixed with `prefix`, and subsequent lines are aligned.
First line is prefixed with `prefix`, subsequent lines are aligned.
If `func` is True, only wrap at commas.
"""
if not width:
# return prefix + text
return text
indent_space = ' ' * len(prefix)
# Whitespace used to indent all lines except the first line.
indent = ' ' * len(prefix) if indent is None else indent
indent_only = (prefix == '' and indent is not None)
if func:
lines = [prefix]
@@ -154,27 +197,37 @@ def doc_wrap(text, prefix='', width=70, func=False):
if part[-1] not in ');':
part += ', '
if len(lines[-1]) + len(part) > width:
lines.append(indent_space)
lines.append(indent)
lines[-1] += part
return '\n'.join(x.rstrip() for x in lines).rstrip()
# XXX: Dummy prefix to force TextWrapper() to wrap the first line.
if indent_only:
prefix = indent
tw = textwrap.TextWrapper(break_long_words = False,
break_on_hyphens = False,
width=width,
initial_indent=prefix,
subsequent_indent=indent_space)
return '\n'.join(tw.wrap(text.strip()))
subsequent_indent=indent)
result = '\n'.join(tw.wrap(text.strip()))
# XXX: Remove the dummy prefix.
if indent_only:
result = result[len(indent):]
return result
def parse_params(parent, width=62):
"""Parse Doxygen `parameterlist`."""
def render_params(parent, width=62):
"""Renders Doxygen <parameterlist> tag as Vim help text."""
name_length = 0
items = []
for child in parent.childNodes:
if child.nodeType == child.TEXT_NODE:
for node in parent.childNodes:
if node.nodeType == node.TEXT_NODE:
continue
name_node = find_first(child, 'parametername')
name_node = find_first(node, 'parametername')
if name_node.getAttribute('direction') == 'out':
continue
@@ -184,79 +237,152 @@ def parse_params(parent, width=62):
name = '{%s}' % name
name_length = max(name_length, len(name) + 2)
items.append((name.strip(), node))
out = ''
for name, node in items:
name = ' {}'.format(name.ljust(name_length))
desc = ''
desc_node = get_child(child, 'parameterdescription')
desc_node = get_child(node, 'parameterdescription')
if desc_node:
desc = parse_parblock(desc_node, width=None)
items.append((name.strip(), desc.strip()))
desc = parse_parblock(desc_node, width=width,
indent=(' ' * len(name)))
out = 'Parameters: ~\n'
for name, desc in items:
name = ' %s' % name.ljust(name_length)
out += doc_wrap(desc, prefix=name, width=width) + '\n'
return out.strip()
out += '{}{}\n'.format(name, desc)
return out.rstrip()
# Renders a node as Vim help text, recursively traversing all descendants.
def render_node(n, text, prefix='', indent='', width=62):
text = ''
# space_preceding = (len(text) > 0 and ' ' == text[-1][-1])
# text += (int(not space_preceding) * ' ')
def parse_para(parent, width=62):
"""Parse doxygen `para` tag.
if n.nodeType == n.TEXT_NODE:
# `prefix` is NOT sent to doc_wrap, it was already handled by now.
text += doc_wrap(n.data, indent=indent, width=width)
elif n.nodeName == 'computeroutput':
text += ' `{}` '.format(get_text(n))
elif is_inline(n):
for c in n.childNodes:
text += render_node(c, text)
text = doc_wrap(text, indent=indent, width=width)
elif n.nodeName == 'verbatim':
# TODO: currently we don't use this. The "[verbatim]" hint is there as
# a reminder that we must decide how to format this if we do use it.
text += ' [verbatim] {}'.format(get_text(n))
elif n.nodeName == 'listitem':
for c in n.childNodes:
text += indent + prefix + render_node(c, text, indent=indent+(' ' * len(prefix)), width=width)
elif n.nodeName == 'para':
for c in n.childNodes:
text += render_node(c, text, indent=indent, width=width)
if is_inline(n):
text = doc_wrap(text, indent=indent, width=width)
elif n.nodeName == 'itemizedlist':
for c in n.childNodes:
text += '{}\n'.format(render_node(c, text, prefix='- ',
indent=indent, width=width))
elif n.nodeName == 'orderedlist':
i = 1
for c in n.childNodes:
if is_blank(get_text(c)):
text += '\n'
continue
text += '{}\n'.format(render_node(c, text, prefix='{}. '.format(i),
indent=indent, width=width))
i = i + 1
elif n.nodeName == 'simplesect' and 'note' == n.getAttribute('kind'):
text += 'Note:\n '
for c in n.childNodes:
text += render_node(c, text, indent=' ', width=width)
text += '\n'
elif n.nodeName == 'simplesect' and 'warning' == n.getAttribute('kind'):
text += 'Warning:\n '
for c in n.childNodes:
text += render_node(c, text, indent=' ', width=width)
text += '\n'
elif (n.nodeName == 'simplesect'
and n.getAttribute('kind') in ('return', 'see')):
text += ' '
for c in n.childNodes:
text += render_node(c, text, indent=' ', width=width)
else:
raise RuntimeError('unhandled node type: {}\n{}'.format(
n.nodeName, n.toprettyxml(indent=' ', newl='\n')))
return text
I assume <para> is a paragraph block or "a block of text". It can contain
text nodes, or other tags.
def render_para(parent, indent='', width=62):
"""Renders Doxygen <para> containing arbitrary nodes.
NB: Blank lines in a docstring manifest as <para> tags.
"""
line = ''
lines = []
if is_inline(parent):
return clean_lines(doc_wrap(render_node(parent, ''),
indent=indent, width=width).strip())
# Ordered dict of ordered lists.
groups = collections.OrderedDict([
('params', []),
('return', []),
('seealso', []),
('xrefs', []),
])
# Gather nodes into groups. Mostly this is because we want "parameterlist"
# nodes to appear together.
text = ''
kind = ''
last = ''
for child in parent.childNodes:
if child.nodeType == child.TEXT_NODE:
line += child.data
elif child.nodeName == 'computeroutput':
line += '`%s`' % get_text(child)
else:
if line:
lines.append(doc_wrap(line, width=width))
line = ''
if child.nodeName == 'parameterlist':
lines.append(parse_params(child, width=width))
elif child.nodeName == 'xrefsect':
title = get_text(get_child(child, 'xreftitle'))
xrefs.add(title)
xrefdesc = parse_para(get_child(child, 'xrefdescription'))
lines.append(doc_wrap(xrefdesc, prefix='%s: ' % title,
width=width) + '\n')
elif child.nodeName == 'simplesect':
kind = child.getAttribute('kind')
if kind == 'note':
lines.append('Note:')
lines.append(doc_wrap(parse_para(child),
prefix=' ',
width=width))
elif kind == 'return':
lines.append('%s: ~' % kind.title())
lines.append(doc_wrap(parse_para(child),
prefix=' ',
width=width))
if child.nodeName == 'parameterlist':
groups['params'].append(child)
elif child.nodeName == 'xrefsect':
groups['xrefs'].append(child)
elif child.nodeName == 'simplesect':
last = kind
kind = child.getAttribute('kind')
if kind == 'return' or (kind == 'note' and last == 'return'):
groups['return'].append(child)
elif kind == 'see':
groups['seealso'].append(child)
elif kind in ('note', 'warning'):
text += render_node(child, text, indent=indent, width=width)
else:
lines.append(get_text(child))
raise RuntimeError('unhandled simplesect: {}\n{}'.format(
child.nodeName, child.toprettyxml(indent=' ', newl='\n')))
else:
text += render_node(child, text, indent=indent, width=width)
if line:
lines.append(doc_wrap(line, width=width))
return clean_lines('\n'.join(lines).strip())
chunks = [text]
# Generate text from the gathered items.
if len(groups['params']) > 0:
chunks.append('\nParameters: ~')
for child in groups['params']:
chunks.append(render_params(child, width=width))
if len(groups['return']) > 0:
chunks.append('\nReturn: ~')
for child in groups['return']:
chunks.append(render_node(child, chunks[-1][-1], indent=indent, width=width))
if len(groups['seealso']) > 0:
chunks.append('\nSee also: ~')
for child in groups['seealso']:
chunks.append(render_node(child, chunks[-1][-1], indent=indent, width=width))
for child in groups['xrefs']:
title = get_text(get_child(child, 'xreftitle'))
xrefs.add(title)
xrefdesc = render_para(get_child(child, 'xrefdescription'), width=width)
chunks.append(doc_wrap(xrefdesc, prefix='{}: '.format(title),
width=width) + '\n')
return clean_lines('\n'.join(chunks).strip())
def parse_parblock(parent, width=62):
"""Parses a nested block of `para` tags.
Named after the \parblock command, but not directly related.
"""
def parse_parblock(parent, prefix='', width=62, indent=''):
"""Renders a nested block of <para> tags as Vim help text."""
paragraphs = []
for child in parent.childNodes:
if child.nodeType == child.TEXT_NODE:
paragraphs.append(doc_wrap(child.data, width=width))
elif child.nodeName == 'para':
paragraphs.append(parse_para(child, width=width))
else:
paragraphs.append(doc_wrap(get_text(child), width=width))
paragraphs.append(render_para(child, width=width, indent=indent))
paragraphs.append('')
return clean_lines('\n'.join(paragraphs).strip())
# }}}
@@ -292,7 +418,7 @@ def parse_source_xml(filename):
if return_type.startswith(('ArrayOf', 'DictionaryOf')):
parts = return_type.strip('_').split('_')
return_type = '%s(%s)' % (parts[0], ', '.join(parts[1:]))
return_type = '{}({})'.format(parts[0], ', '.join(parts[1:]))
name = get_text(get_child(member, 'name'))
@@ -306,37 +432,37 @@ def parse_source_xml(filename):
annotations = filter(None, map(lambda x: annotation_map.get(x),
annotations.split()))
vimtag = '*%s()*' % name
args = []
vimtag = '*{}()*'.format(name)
params = []
type_length = 0
for param in get_children(member, 'param'):
arg_type = get_text(get_child(param, 'type')).strip()
arg_name = ''
param_type = get_text(get_child(param, 'type')).strip()
param_name = ''
declname = get_child(param, 'declname')
if declname:
arg_name = get_text(declname).strip()
param_name = get_text(declname).strip()
if arg_name in param_exclude:
if param_name in param_exclude:
continue
if arg_type.endswith('*'):
arg_type = arg_type.strip('* ')
arg_name = '*' + arg_name
type_length = max(type_length, len(arg_type))
args.append((arg_type, arg_name))
if param_type.endswith('*'):
param_type = param_type.strip('* ')
param_name = '*' + param_name
type_length = max(type_length, len(param_type))
params.append((param_type, param_name))
c_args = []
for arg_type, arg_name in args:
for param_type, param_name in params:
c_args.append(' ' + (
'%s %s' % (arg_type.ljust(type_length), arg_name)).strip())
'%s %s' % (param_type.ljust(type_length), param_name)).strip())
c_decl = textwrap.indent('%s %s(\n%s\n);' % (return_type, name,
',\n'.join(c_args)),
' ')
prefix = '%s(' % name
suffix = '%s)' % ', '.join('{%s}' % a[1] for a in args
suffix = '%s)' % ', '.join('{%s}' % a[1] for a in params
if a[0] not in ('void', 'Error'))
# Minimum 8 chars between signature and vimtag
@@ -354,7 +480,7 @@ def parse_source_xml(filename):
desc = find_first(member, 'detaileddescription')
if desc:
doc = parse_parblock(desc)
if 'DEBUG' in os.environ:
if DEBUG:
print(textwrap.indent(
re.sub(r'\n\s*\n+', '\n',
desc.toprettyxml(indent=' ', newl='\n')), ' ' * 16))
@@ -372,7 +498,7 @@ def parse_source_xml(filename):
else:
doc = doc[:i] + annotations + '\n\n' + doc[i:]
if 'INCLUDE_C_DECL' in os.environ:
if INCLUDE_C_DECL:
doc += '\n\nC Declaration: ~\n>\n'
doc += c_decl
doc += '\n<'
@@ -464,7 +590,7 @@ def gen_docs(config):
if functions:
doc += '\n\n' + functions
if 'INCLUDE_DEPRECATED' in os.environ and deprecated:
if INCLUDE_DEPRECATED and deprecated:
doc += '\n\n\nDeprecated %s Functions: ~\n\n' % name
doc += deprecated
@@ -551,6 +677,7 @@ XML_PROGRAMLISTING = NO
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = YES
EXPAND_ONLY_PREDEF = NO
MARKDOWN_SUPPORT = YES
'''
# }}}