Files
neovim/test/functional/legacy/095_regexp_multibyte_spec.lua
Björn Linse ffff2c9c47 encoding: Update handling of encoding in tests
Always run tests with encoding=utf-8, regardless of user locale
Don't set &encoding after startup in tests

Helped-By: Michael Reed <m.reed@mykolab.com>
2015-09-08 10:54:31 +02:00

271 lines
8.5 KiB
Lua
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- Test for regexp patterns with multi-byte support, using utf-8.
-- See test64 for the non-multi-byte tests.
-- A pattern that gives the expected result produces OK, so that we know it was
-- actually tried.
local helpers = require('test.functional.helpers')
local feed, insert, source = helpers.feed, helpers.insert, helpers.source
local clear, execute, expect = helpers.clear, helpers.execute, helpers.expect
describe('regex with multi-byte', function()
setup(clear)
it('is working', function()
insert([[
Results of test95:]])
source([=[
set nomore
let tl = []
call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna'])
call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ']) " equivalence classes
call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos'])
call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม'])
call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna'])
call add(tl, [2, '\p\+', 'ìa', 'ìa'])
call add(tl, [2, '\p*', 'aあ', 'aあ'])
call add(tl, [2, '\i\+', '&*¨xx ', 'xx'])
call add(tl, [2, '\f\+', '&*Ÿfname ', 'fname'])
call add(tl, [2, '.ม', 'xม่x yมy', 'yม'])
call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่'])
call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"])
call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
call add(tl, [2, "a", "ca\u0300t"])
call add(tl, [2, "ca", "ca\u0300t"])
call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])
call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"])
call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"])
call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"])
call add(tl, [2, 'ú\Z', 'x'])
call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה'])
call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה'])
call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה'])
call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה'])
call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ'])
call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"])
call add(tl, [2, "ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
call add(tl, [2, "ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"])
call add(tl, [2, "\u05b9\\Z", "xyz"])
call add(tl, [2, "\\Z\u05b9", "xyz"])
call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"])
call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"])
call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd'])
for t in tl
let re = t[0]
let pat = t[1]
let text = t[2]
let matchidx = 3
for engine in [0, 1, 2]
if engine == 2 && re == 0 || engine == 1 && re == 1
continue
endif
let &regexpengine = engine
try
let l = matchlist(text, pat)
catch
$put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"'
endtry
if len(l) == 0 && len(t) > matchidx
$put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
elseif len(l) > 0 && len(t) == matchidx
$put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected no match'
elseif len(t) > matchidx && l[0] != t[matchidx]
$put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected: \"' . t[matchidx] . '\"'
else
$put ='OK ' . engine . ' - ' . pat
endif
if len(l) > 0
for i in range(1, 9)
if len(t) <= matchidx + i
let e = ''
else
let e = t[matchidx + i]
endif
if l[i] != e
$put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", submatch ' . i . ': \"' . l[i] . '\", expected: \"' . e . '\"'
endif
endfor
unlet i
endif
endfor
endfor
unlet t tl e l
set regexpengine=1 ambiwidth=single
$put ='eng 1 ambi single: ' . match(\"\u00EC\", '\p')
set regexpengine=1 ambiwidth=double
$put ='eng 1 ambi double: ' . match(\"\u00EC\", '\p')
set regexpengine=2 ambiwidth=single
$put ='eng 2 ambi single: ' . match(\"\u00EC\", '\p')
set regexpengine=2 ambiwidth=double
$put ='eng 2 ambi double: ' . match(\"\u00EC\", '\p')
]=])
-- Assert buffer contents.
expect([=[
Results of test95:
OK 0 - [[:alpha:][=a=]]\+
OK 1 - [[:alpha:][=a=]]\+
OK 2 - [[:alpha:][=a=]]\+
OK 0 - [[=a=]]\+
OK 1 - [[=a=]]\+
OK 2 - [[=a=]]\+
OK 0 - [^ม ]\+
OK 1 - [^ม ]\+
OK 2 - [^ม ]\+
OK 0 - [^ ]\+
OK 1 - [^ ]\+
OK 2 - [^ ]\+
OK 0 - [ม[:alpha:][=a=]]\+
OK 1 - [ม[:alpha:][=a=]]\+
OK 2 - [ม[:alpha:][=a=]]\+
OK 0 - \p\+
OK 1 - \p\+
OK 2 - \p\+
OK 0 - \p*
OK 1 - \p*
OK 2 - \p*
OK 0 - \i\+
OK 1 - \i\+
OK 2 - \i\+
OK 0 - \f\+
OK 1 - \f\+
OK 2 - \f\+
OK 0 - .ม
OK 1 - .ม
OK 2 - .ม
OK 0 - .ม่
OK 1 - .ม่
OK 2 - .ม่
OK 0 - ֹ
OK 1 - ֹ
OK 2 - ֹ
OK 0 - .ֹ
OK 1 - .ֹ
OK 2 - .ֹ
OK 0 - ֹֻ
OK 1 - ֹֻ
OK 2 - ֹֻ
OK 0 - .ֹֻ
OK 1 - .ֹֻ
OK 2 - .ֹֻ
OK 0 - ֹֻ
OK 1 - ֹֻ
OK 2 - ֹֻ
OK 0 - .ֹֻ
OK 1 - .ֹֻ
OK 2 - .ֹֻ
OK 0 - ֹ
OK 1 - ֹ
OK 2 - ֹ
OK 0 - .ֹ
OK 1 - .ֹ
OK 2 - .ֹ
OK 0 - ֹ
OK 1 - ֹ
OK 2 - ֹ
OK 0 - .ֹ
OK 1 - .ֹ
OK 2 - .ֹ
OK 0 - ֹֻ
OK 2 - ֹֻ
OK 0 - .ֹֻ
OK 1 - .ֹֻ
OK 2 - .ֹֻ
OK 0 - a
OK 1 - a
OK 2 - a
OK 0 - ca
OK 1 - ca
OK 2 - ca
OK 0 - à
OK 1 - à
OK 2 - à
OK 0 - a\%C
OK 1 - a\%C
OK 2 - a\%C
OK 0 - ca\%C
OK 1 - ca\%C
OK 2 - ca\%C
OK 0 - ca\%Ct
OK 1 - ca\%Ct
OK 2 - ca\%Ct
OK 0 - ú\Z
OK 1 - ú\Z
OK 2 - ú\Z
OK 0 - יהוה\Z
OK 1 - יהוה\Z
OK 2 - יהוה\Z
OK 0 - יְהוָה\Z
OK 1 - יְהוָה\Z
OK 2 - יְהוָה\Z
OK 0 - יהוה\Z
OK 1 - יהוה\Z
OK 2 - יהוה\Z
OK 0 - יְהוָה\Z
OK 1 - יְהוָה\Z
OK 2 - יְהוָה\Z
OK 0 - יְ\Z
OK 1 - יְ\Z
OK 2 - יְ\Z
OK 0 - קֹx\Z
OK 1 - קֹx\Z
OK 2 - קֹx\Z
OK 0 - קֹx\Z
OK 1 - קֹx\Z
OK 2 - קֹx\Z
OK 0 - קx\Z
OK 1 - קx\Z
OK 2 - קx\Z
OK 0 - קx\Z
OK 1 - קx\Z
OK 2 - קx\Z
OK 0 - ֹ\Z
OK 1 - ֹ\Z
OK 2 - ֹ\Z
OK 0 - \Zֹ
OK 1 - \Zֹ
OK 2 - \Zֹ
OK 0 - ֹ\Z
OK 1 - ֹ\Z
OK 2 - ֹ\Z
OK 0 - \Zֹ
OK 1 - \Zֹ
OK 2 - \Zֹ
OK 0 - ֹ\+\Z
OK 2 - ֹ\+\Z
OK 0 - \Zֹ\+
OK 2 - \Zֹ\+
OK 0 - [^[=a=]]\+
OK 1 - [^[=a=]]\+
OK 2 - [^[=a=]]\+
eng 1 ambi single: 0
eng 1 ambi double: 0
eng 2 ambi single: 0
eng 2 ambi double: 0]=])
end)
end)