From 9a322f8103a9e6fb13e3d896f45abfc19396daac Mon Sep 17 00:00:00 2001 From: bfredl Date: Sat, 17 May 2025 16:41:58 +0200 Subject: [PATCH] refactor(helptags): remove useless homegrown encoding check This check was always broken. it will "detect" a file as other-than-UTF-8 if the first line of a help file only is ASCII. This only works by accident, as all our help files are UTF-8 (or ASCII-only, which is fully compatible), but are all ASCII-only on the first line of every help file which means that all helpfiles gets detected as not-UTF8 which makes the "consistency" test pass by accident even though the actual consistency is that every single file is UTF-8 compatible. This means that the "!_TAG_FILE_ENCODING\tutf-8\t" meta-tag already did not get emitted but YAGNI in either case as no encoding tag just means that 'encoding' is used which in neovim always is UTF-8 anyway. An alternative approach would be to integrate the real encoding detection already present in the codebase (an editor which edits text of various encodings) which checks the entire file instead of a weird first-line-only-hack, but as it happens to be 2025 the resolution of encoding trouble is to just use UTF-8 everywhere. And if you use something else you have to keep track yourself anyway it is not like we can detect if one helpfile of your plugin is latin-1 and another is latin-2 or whatever. Also, Nvim will detect the encoding of the file when you open the file as a :help buffer anyway. --- src/nvim/help.c | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/src/nvim/help.c b/src/nvim/help.c index 0da846bb9f..f63aef154c 100644 --- a/src/nvim/help.c +++ b/src/nvim/help.c @@ -839,8 +839,6 @@ static void helptags_one(char *dir, const char *ext, const char *tagfname, bool int filecount; char **files; char *s; - TriState utf8 = kNone; - bool mix = false; // detected mixed encodings // Find all *.txt files. size_t dirlen = xstrlcpy(NameBuff, dir, sizeof(NameBuff)); @@ -905,36 +903,7 @@ static void helptags_one(char *dir, const char *ext, const char *tagfname, bool const char *const fname = files[fi] + dirlen + 1; bool in_example = false; - bool firstline = true; while (!vim_fgets(IObuff, IOSIZE, fd) && !got_int) { - if (firstline) { - // Detect utf-8 file by a non-ASCII char in the first line. - TriState this_utf8 = kNone; - for (s = IObuff; *s != NUL; s++) { - if ((uint8_t)(*s) >= 0x80) { - this_utf8 = kTrue; - const int l = utf_ptr2len(s); - if (l == 1) { - // Illegal UTF-8 byte sequence. - this_utf8 = kFalse; - break; - } - s += l - 1; - } - } - if (this_utf8 == kNone) { // only ASCII characters found - this_utf8 = kFalse; - } - if (utf8 == kNone) { // first file - utf8 = this_utf8; - } else if (utf8 != this_utf8) { - semsg(_("E670: Mix of help file encodings within a language: %s"), - files[fi]); - mix = !got_int; - got_int = true; - } - firstline = false; - } if (in_example) { // skip over example; a non-white in the first column ends it if (vim_strchr(" \t\n\r", (uint8_t)IObuff[0])) { @@ -1008,10 +977,6 @@ static void helptags_one(char *dir, const char *ext, const char *tagfname, bool } } - if (utf8 == kTrue) { - fprintf(fd_tags, "!_TAG_FILE_ENCODING\tutf-8\t//\n"); - } - // Write the tags into the file. for (int i = 0; i < ga.ga_len; i++) { s = ((char **)ga.ga_data)[i]; @@ -1031,9 +996,6 @@ static void helptags_one(char *dir, const char *ext, const char *tagfname, bool } } } - if (mix) { - got_int = false; // continue with other languages - } GA_DEEP_CLEAR_PTR(&ga); fclose(fd_tags); // there is no check for an error...