Improve documentation for unidecode (#16986)

* Improve documentation for unidecode

Minor changes to gen.py

* Fix typo in gen.py
This commit is contained in:
konsumlamm
2021-02-09 22:47:07 +01:00
committed by GitHub
parent 635c0b6cb9
commit 00551f972e
2 changed files with 28 additions and 26 deletions

View File

@@ -7,30 +7,31 @@
# distribution, for details about the copyright.
#
## This module is based on Python's Unidecode module by Tomaz Solc,
## which in turn is based on the ``Text::Unidecode`` Perl module by
## Sean M. Burke
## (http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm ).
## This module is based on Python's [Unidecode](https://pypi.org/project/Unidecode/)
## module by Tomaz Solc, which in turn is based on the
## [Text::Unidecode](https://metacpan.org/pod/Text::Unidecode)
## Perl module by Sean M. Burke.
##
## It provides a single proc that does Unicode to ASCII transliterations:
## It finds the sequence of ASCII characters that is the closest approximation
## to the Unicode string.
## It provides a `unidecode proc <#unidecode,string>`_ that does
## Unicode to ASCII transliterations: It finds the sequence of ASCII characters
## that is the closest approximation to the Unicode string.
##
## For example, the closest to string "Äußerst" in ASCII is "Ausserst". Some
## information is lost in this transformation, of course, since several Unicode
## strings can be transformed in the same ASCII representation. So this is a
## strictly one-way transformation. However a human reader will probably
## still be able to guess what original string was meant from the context.
## strings can be transformed to the same ASCII representation. So this is a
## strictly one-way transformation. However, a human reader will probably
## still be able to guess from the context, what the original string was.
##
## This module needs the data file "unidecode.dat" to work: This file is
## embedded as a resource into your application by default. But you an also
## define the symbol ``--define:noUnidecodeTable`` during compile time and
## use the `loadUnidecodeTable` proc to initialize this module.
## This module needs the data file `unidecode.dat` to work: This file is
## embedded as a resource into your application by default. You can also
## define the symbol `--define:noUnidecodeTable` during compile time and
## use the `loadUnidecodeTable proc <#loadUnidecodeTable>`_ to initialize
## this module.
import unicode
import std/unicode
when not defined(noUnidecodeTable):
import strutils
import std/strutils
const translationTable = splitLines(slurp"unidecode/unidecode.dat")
else:
@@ -38,10 +39,10 @@ else:
var translationTable: seq[string]
proc loadUnidecodeTable*(datafile = "unidecode.dat") =
## loads the datafile that `unidecode` to work. This is only required if
## the module was compiled with the ``--define:noUnidecodeTable`` switch.
## This needs to be called by the main thread before any thread can make a
## call to `unidecode`.
## Loads the datafile that `unidecode <#unidecode,string>`_ needs to work.
## This is only required if the module was compiled with the
## `--define:noUnidecodeTable` switch. This needs to be called by the
## main thread before any thread can make a call to `unidecode`.
when defined(noUnidecodeTable):
newSeq(translationTable, 0xffff)
var i = 0
@@ -53,10 +54,11 @@ proc unidecode*(s: string): string =
## Finds the sequence of ASCII characters that is the closest approximation
## to the UTF-8 string `s`.
runnableExamples:
assert unidecode("北京") == "Bei Jing "
doAssert unidecode("北京") == "Bei Jing "
doAssert unidecode("Äußerst") == "Ausserst"
result = ""
for r in runes(s):
var c = int(r)
if c <=% 127: add(result, chr(c))
elif c <% translationTable.len: add(result, translationTable[c-128])
elif c <% translationTable.len: add(result, translationTable[c - 128])