mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-18 21:40:32 +00:00
Improve documentation for unidecode (#16986)
* Improve documentation for unidecode Minor changes to gen.py * Fix typo in gen.py
This commit is contained in:
@@ -7,30 +7,31 @@
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## This module is based on Python's Unidecode module by Tomaz Solc,
|
||||
## which in turn is based on the ``Text::Unidecode`` Perl module by
|
||||
## Sean M. Burke
|
||||
## (http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm ).
|
||||
## This module is based on Python's [Unidecode](https://pypi.org/project/Unidecode/)
|
||||
## module by Tomaz Solc, which in turn is based on the
|
||||
## [Text::Unidecode](https://metacpan.org/pod/Text::Unidecode)
|
||||
## Perl module by Sean M. Burke.
|
||||
##
|
||||
## It provides a single proc that does Unicode to ASCII transliterations:
|
||||
## It finds the sequence of ASCII characters that is the closest approximation
|
||||
## to the Unicode string.
|
||||
## It provides a `unidecode proc <#unidecode,string>`_ that does
|
||||
## Unicode to ASCII transliterations: It finds the sequence of ASCII characters
|
||||
## that is the closest approximation to the Unicode string.
|
||||
##
|
||||
## For example, the closest to string "Äußerst" in ASCII is "Ausserst". Some
|
||||
## information is lost in this transformation, of course, since several Unicode
|
||||
## strings can be transformed in the same ASCII representation. So this is a
|
||||
## strictly one-way transformation. However a human reader will probably
|
||||
## still be able to guess what original string was meant from the context.
|
||||
## strings can be transformed to the same ASCII representation. So this is a
|
||||
## strictly one-way transformation. However, a human reader will probably
|
||||
## still be able to guess from the context, what the original string was.
|
||||
##
|
||||
## This module needs the data file "unidecode.dat" to work: This file is
|
||||
## embedded as a resource into your application by default. But you an also
|
||||
## define the symbol ``--define:noUnidecodeTable`` during compile time and
|
||||
## use the `loadUnidecodeTable` proc to initialize this module.
|
||||
## This module needs the data file `unidecode.dat` to work: This file is
|
||||
## embedded as a resource into your application by default. You can also
|
||||
## define the symbol `--define:noUnidecodeTable` during compile time and
|
||||
## use the `loadUnidecodeTable proc <#loadUnidecodeTable>`_ to initialize
|
||||
## this module.
|
||||
|
||||
import unicode
|
||||
import std/unicode
|
||||
|
||||
when not defined(noUnidecodeTable):
|
||||
import strutils
|
||||
import std/strutils
|
||||
|
||||
const translationTable = splitLines(slurp"unidecode/unidecode.dat")
|
||||
else:
|
||||
@@ -38,10 +39,10 @@ else:
|
||||
var translationTable: seq[string]
|
||||
|
||||
proc loadUnidecodeTable*(datafile = "unidecode.dat") =
|
||||
## loads the datafile that `unidecode` to work. This is only required if
|
||||
## the module was compiled with the ``--define:noUnidecodeTable`` switch.
|
||||
## This needs to be called by the main thread before any thread can make a
|
||||
## call to `unidecode`.
|
||||
## Loads the datafile that `unidecode <#unidecode,string>`_ needs to work.
|
||||
## This is only required if the module was compiled with the
|
||||
## `--define:noUnidecodeTable` switch. This needs to be called by the
|
||||
## main thread before any thread can make a call to `unidecode`.
|
||||
when defined(noUnidecodeTable):
|
||||
newSeq(translationTable, 0xffff)
|
||||
var i = 0
|
||||
@@ -53,10 +54,11 @@ proc unidecode*(s: string): string =
|
||||
## Finds the sequence of ASCII characters that is the closest approximation
|
||||
## to the UTF-8 string `s`.
|
||||
runnableExamples:
|
||||
assert unidecode("北京") == "Bei Jing "
|
||||
doAssert unidecode("北京") == "Bei Jing "
|
||||
doAssert unidecode("Äußerst") == "Ausserst"
|
||||
|
||||
result = ""
|
||||
for r in runes(s):
|
||||
var c = int(r)
|
||||
if c <=% 127: add(result, chr(c))
|
||||
elif c <% translationTable.len: add(result, translationTable[c-128])
|
||||
elif c <% translationTable.len: add(result, translationTable[c - 128])
|
||||
|
||||
Reference in New Issue
Block a user