From 9a322f8103a9e6fb13e3d896f45abfc19396daac Mon Sep 17 00:00:00 2001
From: bfredl <bjorn.linse@gmail.com>
Date: Sat, 17 May 2025 16:41:58 +0200
Subject: [PATCH] refactor(helptags): remove useless homegrown encoding check

This check was always broken. it will "detect" a file as
other-than-UTF-8 if the first line of a help file only is ASCII.

This only works by accident, as all our help files are UTF-8 (or
ASCII-only, which is fully compatible), but are all ASCII-only
on the first line of every help file which means that all helpfiles
gets detected as not-UTF8 which makes the "consistency" test pass
by accident even though the actual consistency is that every single
file is UTF-8 compatible. This means that the
"!_TAG_FILE_ENCODING\tutf-8\t" meta-tag already did not get emitted
but YAGNI in either case as no encoding tag just means that 'encoding'
is used which in neovim always is UTF-8 anyway.

An alternative approach would be to integrate the real encoding
detection already present in the codebase (an editor which edits text of
various encodings) which checks the entire file instead of a weird
first-line-only-hack, but as it happens to be 2025 the resolution of
encoding trouble is to just use UTF-8 everywhere. And if you use something
else you have to keep track yourself anyway it is not like we can detect
if one helpfile of your plugin is latin-1 and another is latin-2 or
whatever. Also, Nvim will detect the encoding of the file when you open
the file as a :help buffer anyway.
---
 src/nvim/help.c | 38 --------------------------------------
 1 file changed, 38 deletions(-)

diff --git a/src/nvim/help.c b/src/nvim/help.c
index 0da846bb9f..f63aef154c 100644
--- a/src/nvim/help.c
+++ b/src/nvim/help.c
@@ -839,8 +839,6 @@ static void helptags_one(char *dir, const char *ext, const char *tagfname, bool
   int filecount;
   char **files;
   char *s;
-  TriState utf8 = kNone;
-  bool mix = false;             // detected mixed encodings
 
   // Find all *.txt files.
   size_t dirlen = xstrlcpy(NameBuff, dir, sizeof(NameBuff));
@@ -905,36 +903,7 @@ static void helptags_one(char *dir, const char *ext, const char *tagfname, bool
     const char *const fname = files[fi] + dirlen + 1;
 
     bool in_example = false;
-    bool firstline = true;
     while (!vim_fgets(IObuff, IOSIZE, fd) && !got_int) {
-      if (firstline) {
-        // Detect utf-8 file by a non-ASCII char in the first line.
-        TriState this_utf8 = kNone;
-        for (s = IObuff; *s != NUL; s++) {
-          if ((uint8_t)(*s) >= 0x80) {
-            this_utf8 = kTrue;
-            const int l = utf_ptr2len(s);
-            if (l == 1) {
-              // Illegal UTF-8 byte sequence.
-              this_utf8 = kFalse;
-              break;
-            }
-            s += l - 1;
-          }
-        }
-        if (this_utf8 == kNone) {           // only ASCII characters found
-          this_utf8 = kFalse;
-        }
-        if (utf8 == kNone) {                // first file
-          utf8 = this_utf8;
-        } else if (utf8 != this_utf8) {
-          semsg(_("E670: Mix of help file encodings within a language: %s"),
-                files[fi]);
-          mix = !got_int;
-          got_int = true;
-        }
-        firstline = false;
-      }
       if (in_example) {
         // skip over example; a non-white in the first column ends it
         if (vim_strchr(" \t\n\r", (uint8_t)IObuff[0])) {
@@ -1008,10 +977,6 @@ static void helptags_one(char *dir, const char *ext, const char *tagfname, bool
       }
     }
 
-    if (utf8 == kTrue) {
-      fprintf(fd_tags, "!_TAG_FILE_ENCODING\tutf-8\t//\n");
-    }
-
     // Write the tags into the file.
     for (int i = 0; i < ga.ga_len; i++) {
       s = ((char **)ga.ga_data)[i];
@@ -1031,9 +996,6 @@ static void helptags_one(char *dir, const char *ext, const char *tagfname, bool
       }
     }
   }
-  if (mix) {
-    got_int = false;        // continue with other languages
-  }
 
   GA_DEEP_CLEAR_PTR(&ga);
   fclose(fd_tags);          // there is no check for an error...