mirror of
				https://github.com/neovim/neovim.git
				synced 2025-11-04 01:34:25 +00:00 
			
		
		
		
	Before this change, "static inline" functions in headers needed to have their function attributes specified in a completely different way. The prototype had to be duplicated, and REAL_FATTR_ had to be used instead of the public FUNC_ATTR_ names. TODO: need a check that a "header.h.inline.generated.h" file is not forgotten when the first "static inline" function with attributes is added to a header (they would just be silently missing).
		
			
				
	
	
		
			312 lines
		
	
	
		
			8.8 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
			
		
		
	
	
			312 lines
		
	
	
		
			8.8 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
--[[ Copyright (c) 2009 Peter "Corsix" Cawley
 | 
						|
 | 
						|
Permission is hereby granted, free of charge, to any person obtaining a copy of
 | 
						|
this software and associated documentation files (the "Software"), to deal in
 | 
						|
the Software without restriction, including without limitation the rights to
 | 
						|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 | 
						|
of the Software, and to permit persons to whom the Software is furnished to do
 | 
						|
so, subject to the following conditions:
 | 
						|
 | 
						|
The above copyright notice and this permission notice shall be included in all
 | 
						|
copies or substantial portions of the Software.
 | 
						|
 | 
						|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
						|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
						|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | 
						|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | 
						|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 | 
						|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 | 
						|
SOFTWARE. --]]
 | 
						|
 | 
						|
-- this C parser was taken from Corsix-TH, I'm sure this could be done much
 | 
						|
-- better (i.e.: I think everything I do could be substitutions made with LPeg
 | 
						|
-- during parsing), but I've just learned enough basic LPeg to make this
 | 
						|
-- work.
 | 
						|
-- see: http://lua-users.org/wiki/LpegRecipes
 | 
						|
 | 
						|
local lpeg = require 'lpeg'
 | 
						|
 | 
						|
local C, P, R, S, V = lpeg.C, lpeg.P, lpeg.R, lpeg.S, lpeg.V
 | 
						|
local Carg, Cc, Cp, Ct = lpeg.Carg, lpeg.Cc, lpeg.Cp, lpeg.Ct
 | 
						|
 | 
						|
local tokens = P {
 | 
						|
  'tokens',
 | 
						|
  -- Comment of form /* ... */
 | 
						|
  comment = Ct(P '/*' * C((V 'newline' + (1 - P '*/')) ^ 0) * P '*/' * Cc 'comment'),
 | 
						|
 | 
						|
  -- Single line comment
 | 
						|
  line_comment = Ct(P '//' * C((1 - V 'newline') ^ 0) * Cc 'comment_line'),
 | 
						|
 | 
						|
  -- Single platform independent line break which increments line number
 | 
						|
  newline = (P '\r\n' + P '\n\r' + S '\r\n') * (Cp() * Carg(1)) / function(pos, state)
 | 
						|
    state.line = state.line + 1
 | 
						|
    state.line_start = pos
 | 
						|
  end,
 | 
						|
 | 
						|
  -- Line continuation
 | 
						|
  line_extend = Ct(C(P [[\]] * V 'newline') * Cc 'line_extend'),
 | 
						|
 | 
						|
  -- Whitespace of any length (includes newlines)
 | 
						|
  whitespace = Ct(C((S ' \t' + V 'newline') ^ 1) * Cc 'whitespace'),
 | 
						|
 | 
						|
  -- Special form of #include with filename followed in angled brackets (matches 3 tokens)
 | 
						|
  include = Ct(C(P '#include') * Cc 'preprocessor') * Ct(C(S ' \t' ^ 1) * Cc 'whitespace') * Ct(
 | 
						|
    C(P '<' * (1 - P '>') ^ 1 * P '>') * Cc 'string'
 | 
						|
  ),
 | 
						|
 | 
						|
  -- Preprocessor instruction
 | 
						|
  preprocessor = V 'include'
 | 
						|
    + Ct(
 | 
						|
      C(
 | 
						|
        P '#'
 | 
						|
          * P ' ' ^ 0
 | 
						|
          * (P 'define' + P 'elif' + P 'else' + P 'endif' + P '#' + P 'error' + P 'ifdef' + P 'ifndef' + P 'if' + P 'import' + P 'include' + P 'line' + P 'pragma' + P 'undef' + P 'using' + P 'pragma')
 | 
						|
          * #S ' \r\n\t'
 | 
						|
      ) * Cc 'preprocessor'
 | 
						|
    ),
 | 
						|
 | 
						|
  -- Identifier of form [a-zA-Z_][a-zA-Z0-9_]*
 | 
						|
  identifier = Ct(C(R('az', 'AZ', '__') * R('09', 'az', 'AZ', '__') ^ 0) * Cc 'identifier'),
 | 
						|
 | 
						|
  -- Single character in a string
 | 
						|
  sstring_char = R('\001&', '([', ']\255') + (P '\\' * S [[ntvbrfa\?'"0x]]),
 | 
						|
  dstring_char = R('\001!', '#[', ']\255') + (P '\\' * S [[ntvbrfa\?'"0x]]),
 | 
						|
 | 
						|
  -- String literal
 | 
						|
  string = Ct(
 | 
						|
    C(
 | 
						|
      P "'" * (V 'sstring_char' + P '"') ^ 0 * P "'"
 | 
						|
        + P '"' * (V 'dstring_char' + P "'") ^ 0 * P '"'
 | 
						|
    ) * Cc 'string'
 | 
						|
  ),
 | 
						|
 | 
						|
  -- Operator
 | 
						|
  operator = Ct(
 | 
						|
    C(
 | 
						|
      P '>>='
 | 
						|
        + P '<<='
 | 
						|
        + P '...'
 | 
						|
        + P '::'
 | 
						|
        + P '<<'
 | 
						|
        + P '>>'
 | 
						|
        + P '<='
 | 
						|
        + P '>='
 | 
						|
        + P '=='
 | 
						|
        + P '!='
 | 
						|
        + P '||'
 | 
						|
        + P '&&'
 | 
						|
        + P '++'
 | 
						|
        + P '--'
 | 
						|
        + P '->'
 | 
						|
        + P '+='
 | 
						|
        + P '-='
 | 
						|
        + P '*='
 | 
						|
        + P '/='
 | 
						|
        + P '|='
 | 
						|
        + P '&='
 | 
						|
        + P '^='
 | 
						|
        + S '+-*/=<>%^|&.?:!~,'
 | 
						|
    ) * Cc 'operator'
 | 
						|
  ),
 | 
						|
 | 
						|
  -- Misc. char (token type is the character itself)
 | 
						|
  char = Ct(C(S '[]{}();') / function(x)
 | 
						|
    return x, x
 | 
						|
  end),
 | 
						|
 | 
						|
  -- Hex, octal or decimal number
 | 
						|
  int = Ct(
 | 
						|
    C((P '0x' * R('09', 'af', 'AF') ^ 1) + (P '0' * R '07' ^ 0) + R '09' ^ 1) * Cc 'integer'
 | 
						|
  ),
 | 
						|
 | 
						|
  -- Floating point number
 | 
						|
  f_exponent = S 'eE' + S '+-' ^ -1 * R '09' ^ 1,
 | 
						|
  f_terminator = S 'fFlL',
 | 
						|
  float = Ct(
 | 
						|
    C(
 | 
						|
      R '09' ^ 1 * V 'f_exponent' * V 'f_terminator' ^ -1
 | 
						|
        + R '09' ^ 0 * P '.' * R '09' ^ 1 * V 'f_exponent' ^ -1 * V 'f_terminator' ^ -1
 | 
						|
        + R '09' ^ 1 * P '.' * R '09' ^ 0 * V 'f_exponent' ^ -1 * V 'f_terminator' ^ -1
 | 
						|
    ) * Cc 'float'
 | 
						|
  ),
 | 
						|
 | 
						|
  -- Any token
 | 
						|
  token = V 'comment'
 | 
						|
    + V 'line_comment'
 | 
						|
    + V 'identifier'
 | 
						|
    + V 'whitespace'
 | 
						|
    + V 'line_extend'
 | 
						|
    + V 'preprocessor'
 | 
						|
    + V 'string'
 | 
						|
    + V 'char'
 | 
						|
    + V 'operator'
 | 
						|
    + V 'float'
 | 
						|
    + V 'int',
 | 
						|
 | 
						|
  -- Error for when nothing else matches
 | 
						|
  error = (Cp() * C(P(1) ^ -8) * Carg(1)) / function(pos, where, state)
 | 
						|
    error(
 | 
						|
      ("Tokenising error on line %i, position %i, near '%s'"):format(
 | 
						|
        state.line,
 | 
						|
        pos - state.line_start + 1,
 | 
						|
        where
 | 
						|
      )
 | 
						|
    )
 | 
						|
  end,
 | 
						|
 | 
						|
  -- Match end of input or throw error
 | 
						|
  finish = -P(1) + V 'error',
 | 
						|
 | 
						|
  -- Match stream of tokens into a table
 | 
						|
  tokens = Ct(V 'token' ^ 0) * V 'finish',
 | 
						|
}
 | 
						|
 | 
						|
local function TokeniseC(str)
 | 
						|
  return tokens:match(str, 1, { line = 1, line_start = 1 })
 | 
						|
end
 | 
						|
 | 
						|
local function set(t)
 | 
						|
  local s = {}
 | 
						|
  for _, v in ipairs(t) do
 | 
						|
    s[v] = true
 | 
						|
  end
 | 
						|
  return s
 | 
						|
end
 | 
						|
 | 
						|
local C_keywords = set { -- luacheck: ignore
 | 
						|
  'break',
 | 
						|
  'case',
 | 
						|
  'char',
 | 
						|
  'const',
 | 
						|
  'continue',
 | 
						|
  'default',
 | 
						|
  'do',
 | 
						|
  'double',
 | 
						|
  'else',
 | 
						|
  'enum',
 | 
						|
  'extern',
 | 
						|
  'float',
 | 
						|
  'for',
 | 
						|
  'goto',
 | 
						|
  'if',
 | 
						|
  'int',
 | 
						|
  'long',
 | 
						|
  'register',
 | 
						|
  'return',
 | 
						|
  'short',
 | 
						|
  'signed',
 | 
						|
  'sizeof',
 | 
						|
  'static',
 | 
						|
  'struct',
 | 
						|
  'switch',
 | 
						|
  'typedef',
 | 
						|
  'union',
 | 
						|
  'unsigned',
 | 
						|
  'void',
 | 
						|
  'volatile',
 | 
						|
  'while',
 | 
						|
}
 | 
						|
 | 
						|
-- Very primitive C formatter that tries to put "things" inside braces on one
 | 
						|
-- line. This is a step done after preprocessing the C source to ensure that
 | 
						|
-- the duplicate line detector can more reliably pick out identical declarations.
 | 
						|
--
 | 
						|
-- an example:
 | 
						|
--   struct mystruct
 | 
						|
--   {
 | 
						|
--      int a;
 | 
						|
--      int b;
 | 
						|
--   };
 | 
						|
--
 | 
						|
-- would become:
 | 
						|
--  struct mystruct { int a; int b; };
 | 
						|
--
 | 
						|
--  The first one will have a lot of false positives (the line '{' for
 | 
						|
--  example), the second one is more unique.
 | 
						|
--- @param string
 | 
						|
--- @return string
 | 
						|
local function formatc(str)
 | 
						|
  local toks = TokeniseC(str)
 | 
						|
  local result = {}
 | 
						|
  local block_level = 0
 | 
						|
  local allow_one_nl = false
 | 
						|
  local end_at_brace = false
 | 
						|
 | 
						|
  for _, token in ipairs(toks) do
 | 
						|
    local typ = token[2]
 | 
						|
    if typ == '{' then
 | 
						|
      block_level = block_level + 1
 | 
						|
    elseif typ == '}' then
 | 
						|
      block_level = block_level - 1
 | 
						|
 | 
						|
      if block_level == 0 and end_at_brace then
 | 
						|
        -- if we're not inside a block, we're at the basic statement level,
 | 
						|
        -- and ';' indicates we're at the end of a statement, so we put end
 | 
						|
        -- it with a newline.
 | 
						|
        token[1] = token[1] .. '\n'
 | 
						|
        end_at_brace = false
 | 
						|
      end
 | 
						|
    elseif typ == 'identifier' then
 | 
						|
      -- static and/or inline usually indicate an inline header function,
 | 
						|
      -- which has no trailing ';', so we have to add a newline after the
 | 
						|
      -- '}' ourselves.
 | 
						|
      local tok = token[1]
 | 
						|
      if tok == 'static' or tok == 'inline' or tok == '__inline' then
 | 
						|
        end_at_brace = true
 | 
						|
      end
 | 
						|
    elseif typ == 'preprocessor' then
 | 
						|
      -- preprocessor directives don't end in ';' but need their newline, so
 | 
						|
      -- we're going to allow the next newline to pass.
 | 
						|
      allow_one_nl = true
 | 
						|
    elseif typ == ';' then
 | 
						|
      if block_level == 0 then
 | 
						|
        -- if we're not inside a block, we're at the basic statement level,
 | 
						|
        -- and ';' indicates we're at the end of a statement, so we put end
 | 
						|
        -- it with a newline.
 | 
						|
        token[1] = ';\n'
 | 
						|
        end_at_brace = false
 | 
						|
      end
 | 
						|
    elseif typ == 'whitespace' then
 | 
						|
      -- replace all whitespace by one space
 | 
						|
      local repl = ' '
 | 
						|
 | 
						|
      -- except when allow_on_nl is true and there's a newline in the whitespace
 | 
						|
      if string.find(token[1], '[\r\n]+') and allow_one_nl == true then
 | 
						|
        -- in that case we replace all whitespace by one newline
 | 
						|
        repl = '\n'
 | 
						|
        allow_one_nl = false
 | 
						|
      end
 | 
						|
 | 
						|
      token[1] = string.gsub(token[1], '%s+', repl)
 | 
						|
    end
 | 
						|
    result[#result + 1] = token[1]
 | 
						|
  end
 | 
						|
 | 
						|
  return table.concat(result)
 | 
						|
end
 | 
						|
 | 
						|
-- standalone operation (very handy for debugging)
 | 
						|
local function standalone(...) -- luacheck: ignore
 | 
						|
  local Preprocess = require('preprocess')
 | 
						|
  Preprocess.add_to_include_path('./../../src')
 | 
						|
  Preprocess.add_to_include_path('./../../build/include')
 | 
						|
  Preprocess.add_to_include_path('./../../.deps/usr/include')
 | 
						|
 | 
						|
  local raw = Preprocess.preprocess('', arg[1])
 | 
						|
 | 
						|
  local formatted
 | 
						|
  if #arg == 2 and arg[2] == 'no' then
 | 
						|
    formatted = raw
 | 
						|
  else
 | 
						|
    formatted = formatc(raw)
 | 
						|
  end
 | 
						|
 | 
						|
  print(formatted)
 | 
						|
end
 | 
						|
-- uncomment this line (and comment the `return`) for standalone debugging
 | 
						|
-- example usage:
 | 
						|
--    ../../.deps/usr/bin/luajit formatc.lua ../../include/fileio.h.generated.h
 | 
						|
--    ../../.deps/usr/bin/luajit formatc.lua /usr/include/malloc.h
 | 
						|
-- standalone(...)
 | 
						|
return formatc
 |