Reimplementation of formatSize to return a more meaningful value (fixes #4198).

2026-02-15 15:44:14 +00:00 · 2016-06-01 12:56:01 +01:00
parent 24a56a2cb6
commit d855ad7045
2 changed files with 95 additions and 23 deletions
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -1392,7 +1392,7 @@ type

 proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault,
                         precision: range[0..32] = 16;
-                         decimalSep = '.'): string {.
+                         decimalSep = '.', trim = false): string {.
                         noSideEffect, rtl, extern: "nsu$1".} =
  ## Converts a floating point value `f` to a string.
  ##
@@ -1404,6 +1404,8 @@ proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault,
  ## after the decimal point for Nim's ``biggestFloat`` type.
  ##
  ## If ``precision == 0``, it tries to format it nicely.
+  ##
+  ## If ``trim == true``, trailing zeros will be removed.
  when defined(js):
    var res: cstring
    case format
@@ -1423,6 +1425,7 @@ proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault,
    var
      frmtstr {.noinit.}: array[0..5, char]
      buf {.noinit.}: array[0..2500, char]
+      splResult: seq[string]
      L: cint
    frmtstr[0] = '%'
    if precision > 0:
@@ -1443,8 +1446,21 @@ proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault,
      if buf[i] in {'.', ','}: result[i] = decimalsep
      else: result[i] = buf[i]

+    # Trim trailing zeros if required (used by formatSize)
+    if trim and result.contains(decimalSep):
+      if result.contains('e'):
+        splResult = result.split('e')
+        result = splResult[0]
+      while result[result.high] == '0':
+        result.setLen(result.len-1)
+      if result[result.high] == decimalSep:
+        result.setLen(result.len-1)
+      if splResult.len > 0:
+        result &= "e" & splResult[1]
+
 proc formatFloat*(f: float, format: FloatFormatMode = ffDefault,
-                  precision: range[0..32] = 16; decimalSep = '.'): string {.
+                  precision: range[0..32] = 16; decimalSep = '.',
+                  trim = false): string {.
                  noSideEffect, rtl, extern: "nsu$1".} =
  ## Converts a floating point value `f` to a string.
  ##
@@ -1454,30 +1470,69 @@ proc formatFloat*(f: float, format: FloatFormatMode = ffDefault,
  ## of significant digits to be printed.
  ## `precision`'s default value is the maximum number of meaningful digits
  ## after the decimal point for Nim's ``float`` type.
-  result = formatBiggestFloat(f, format, precision, decimalSep)
+  ## If `trim` is set to true, trailing zeros will be removed.
+  result = formatBiggestFloat(f, format, precision, decimalSep, trim)

-proc formatSize*(bytes: BiggestInt, decimalSep = '.'): string =
-  ## Rounds and formats `bytes`. Examples:
+type
+  BinaryPrefixMode* = enum ## the different names for binary prefixes
+    bpIEC, # use the IEC/ISO standard prefixes such as kibi
+    bpColloquial # use the colloquial kilo, mega etc
+
+proc formatSize*(bytes: int64,
+                 decimalSep = '.',
+                 prefix = bpIEC,
+                 includeSpace = false): string =
+  ## Rounds and formats `bytes`.
+  ##
+  ## By default, uses the IEC/ISO standard binary prefixes, so 1024 will be
+  ## formatted as 1KiB.  Set prefix to `bpColloquial` to use the colloquial
+  ## names from the SI standard (e.g. k for 1000 being reused as 1024).
+  ##
+  ## `includeSpace` can be set to true to include the (SI preferred) space
+  ## between the number and the unit (e.g. 1 KiB).
+  ##
+  ## Examples:
  ##
  ## .. code-block:: nim
  ##
-  ##    formatSize(1'i64 shl 31 + 300'i64) == "2.204GB"
-  ##    formatSize(4096) == "4KB"
+  ##    formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB"
+  ##    formatSize((2.234*1024*1024).int) == "2.234MiB"
+  ##    formatSize(4096, includeSpace=true) == "4 KiB"
+  ##    formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB"
+  ##    formatSize(4096) == "4KiB"
+  ##    formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB"
  ##
-  template frmt(a, b, c: expr): expr =
-    let bs = $b
-    insertSep($a) & decimalSep & bs.substr(0, 2) & c
-  let gigabytes = bytes shr 30
-  let megabytes = bytes shr 20
-  let kilobytes = bytes shr 10
-  if gigabytes != 0:
-    result = frmt(gigabytes, megabytes, "GB")
-  elif megabytes != 0:
-    result = frmt(megabytes, kilobytes, "MB")
-  elif kilobytes != 0:
-    result = frmt(kilobytes, bytes, "KB")
+  const iecPrefixes = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"]
+  const collPrefixes = ["", "k", "M", "G", "T", "P", "E", "Z", "Y"]
+  var
+    xb: int64 = bytes
+    fbytes: float
+    last_xb: int64 = bytes
+    matchedIndex: int
+    prefixes: array[9, string]
+  if prefix == bpColloquial:
+    prefixes = collPrefixes
  else:
-    result = insertSep($bytes) & "B"
+    prefixes = iecPrefixes
+
+  # Iterate through prefixes seeing if value will be greater than
+  # 0 in each case
+  for index in 1..<prefixes.len:
+    last_xb = xb
+    xb = bytes div (1'i64 shl (index*10))
+    matchedIndex = index
+    if xb == 0:
+      xb = last_xb
+      matchedIndex = index - 1
+      break
+  # xb has the integer number for the latest value; index should be correct
+  fbytes = bytes.float / (1'i64 shl (matchedIndex*10)).float
+  result = formatFloat(fbytes, format=ffDecimal, precision=3,
+    decimalSep=decimalSep, trim=true)
+  if includeSpace:
+    result &= " "
+  result &= prefixes[matchedIndex]
+  result &= "B"

 proc findNormalized(x: string, inArray: openArray[string]): int =
  var i = 0
@@ -1673,9 +1728,19 @@ when isMainModule:
                                                   ["1,0e-11", "1,0e-011"]

  doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c"
-  when not defined(testing):
-    echo formatSize(1'i64 shl 31 + 300'i64) # == "4,GB"
-    echo formatSize(1'i64 shl 31)
+
+  block: # formatFloat trim tests
+    doAssert formatFloat(2.33, trim=true) == "2.33"
+    doAssert formatFloat(2.34e50, ffScientific, precision=8, trim=true) in
+             ["2.34e+50", "2.34e+050"]
+
+  block: # formatSize tests
+    doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB"
+    doAssert formatSize((2.234*1024*1024).int) == "2.234MiB"
+    doAssert formatSize(4096) == "4KiB"
+    doAssert formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB"
+    doAssert formatSize(4096, includeSpace=true) == "4 KiB"
+    doAssert formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB"

  doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] ==
           "The cat eats fish."
--- a/web/news.txt
+++ b/web/news.txt
@@ -50,6 +50,11 @@ Changes affecting backwards compatibility
  than changing the operation for even and odd numbers.
 - The ``round`` function now accepts a ``places`` argument to round to a
  given number of places (e.g. round 4.35 to 4.4 if ``places`` is 1).
+- In ``strutils.nim``, ``formatSize`` now returns a number representing the
+  size in conventional decimal format (e.g. 2.234GB meaning 2.234 GB rather
+  than meaning 2.285 GB as in the previous implementation).  By default it
+  also uses IEC prefixes (KiB, MiB) etc and optionally uses colloquial names
+  (kB, MB etc) and the (SI-preferred) space.


 Library Additions
@@ -63,6 +68,8 @@ Library Additions
  versions are for ``char`` and ``set[char]``.
 - Added ``splitDecimal`` to ``math.nim`` to split a floating point value
  into an integer part and a floating part (in the range -1<x<1).
+- Added ``trim`` argument to ``formatFloat`` in ``strutils.nim`` to trim
+  trailing zeros.


 Compiler Additions