Improve documentation for unidecode (#16986)

* Improve documentation for unidecode Minor changes to gen.py * Fix typo in gen.py
2026-06-06 03:44:14 +00:00 · 2021-02-09 22:47:07 +01:00
parent 635c0b6cb9
commit 00551f972e
2 changed files with 28 additions and 26 deletions
--- a/lib/pure/unidecode/unidecode.nim
+++ b/lib/pure/unidecode/unidecode.nim
@@ -7,30 +7,31 @@
 #    distribution, for details about the copyright.
 #

-## This module is based on Python's Unidecode module by Tomaz Solc,
-## which in turn is based on the ``Text::Unidecode`` Perl module by
-## Sean M. Burke
-## (http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm ).
+## This module is based on Python's [Unidecode](https://pypi.org/project/Unidecode/)
+## module by Tomaz Solc, which in turn is based on the
+## [Text::Unidecode](https://metacpan.org/pod/Text::Unidecode)
+## Perl module by Sean M. Burke.
 ##
-## It provides a single proc that does Unicode to ASCII transliterations:
-## It finds the sequence of ASCII characters that is the closest approximation
-## to the Unicode string.
+## It provides a `unidecode proc <#unidecode,string>`_ that does
+## Unicode to ASCII transliterations: It finds the sequence of ASCII characters
+## that is the closest approximation to the Unicode string.
 ##
 ## For example, the closest to string "Äußerst" in ASCII is "Ausserst". Some
 ## information is lost in this transformation, of course, since several Unicode
-## strings can be transformed in the same ASCII representation. So this is a
-## strictly one-way transformation. However a human reader will probably
-## still be able to guess what original string was meant from the context.
+## strings can be transformed to the same ASCII representation. So this is a
+## strictly one-way transformation. However, a human reader will probably
+## still be able to guess from the context, what the original string was.
 ##
-## This module needs the data file "unidecode.dat" to work: This file is
-## embedded as a resource into your application by default. But you an also
-## define the symbol ``--define:noUnidecodeTable`` during compile time and
-## use the `loadUnidecodeTable` proc to initialize this module.
+## This module needs the data file `unidecode.dat` to work: This file is
+## embedded as a resource into your application by default. You can also
+## define the symbol `--define:noUnidecodeTable` during compile time and
+## use the `loadUnidecodeTable proc <#loadUnidecodeTable>`_ to initialize
+## this module.

-import unicode
+import std/unicode

 when not defined(noUnidecodeTable):
-  import strutils
+  import std/strutils

  const translationTable = splitLines(slurp"unidecode/unidecode.dat")
 else:
@@ -38,10 +39,10 @@ else:
  var translationTable: seq[string]

 proc loadUnidecodeTable*(datafile = "unidecode.dat") =
-  ## loads the datafile that `unidecode` to work. This is only required if
-  ## the module was compiled with the ``--define:noUnidecodeTable`` switch.
-  ## This needs to be called by the main thread before any thread can make a
-  ## call to `unidecode`.
+  ## Loads the datafile that `unidecode <#unidecode,string>`_ needs to work.
+  ## This is only required if the module was compiled with the
+  ## `--define:noUnidecodeTable` switch. This needs to be called by the
+  ## main thread before any thread can make a call to `unidecode`.
  when defined(noUnidecodeTable):
    newSeq(translationTable, 0xffff)
    var i = 0
@@ -53,10 +54,11 @@ proc unidecode*(s: string): string =
  ## Finds the sequence of ASCII characters that is the closest approximation
  ## to the UTF-8 string `s`.
  runnableExamples:
-    assert unidecode("北京") == "Bei Jing "
+    doAssert unidecode("北京") == "Bei Jing "
+    doAssert unidecode("Äußerst") == "Ausserst"

  result = ""
  for r in runes(s):
    var c = int(r)
    if c <=% 127: add(result, chr(c))
-    elif c <% translationTable.len: add(result, translationTable[c-128])
+    elif c <% translationTable.len: add(result, translationTable[c - 128])