copyFile with bufferSize instead of hardcoded value (#22769)

- `copyFile` allows to specify `bufferSize` instead of hardcoded wrong
value. Tiny diff.


# Performance

- 1200% Performance improvement.


# Check it yourself

Execute:

```bash
for i in $(seq 0 10); do
  bs=$((1024*2**$i))
  printf "%7s Kb\t" $bs
  timeout --foreground -sINT 2 dd bs=$bs if=/dev/zero of=/dev/null 2>&1 | sed -n 's/.* \([0-9.,]* [GM]B\/s\)/\1/p'
done
```

(This script can be ported to PowerShell for Windows I guess, it works
in Windows MinGW Bash anyways).


# Stats

- Hardcoded `8192` or `8000` Kb bufferSize gives `5` GB/s.
- Setting `262144` Kb bufferSize gives `65` GB/s (script suggestion).

---------

Co-authored-by: Andreas Rumpf <rumpf_a@web.de>
This commit is contained in:
Juan Carlos
2023-09-30 01:31:28 -03:00
committed by GitHub
parent 5eeafbf550
commit a38e3dcb1f
2 changed files with 11 additions and 9 deletions

View File

@@ -8,6 +8,7 @@
[//]: # "Changes:"
- Changed `std/osfiles.copyFile` to allow to specify `bufferSize` instead of a hardcoded one.
[//]: # "Additions:"

View File

@@ -173,7 +173,7 @@ type
const copyFlagSymlink = {cfSymlinkAsIs, cfSymlinkFollow, cfSymlinkIgnore}
proc copyFile*(source, dest: string, options = {cfSymlinkFollow}) {.rtl,
proc copyFile*(source, dest: string, options = {cfSymlinkFollow}; bufferSize = 16_384) {.rtl,
extern: "nos$1", tags: [ReadDirEffect, ReadIOEffect, WriteIOEffect],
noWeirdTarget.} =
## Copies a file from `source` to `dest`, where `dest.parentDir` must exist.
@@ -202,6 +202,7 @@ proc copyFile*(source, dest: string, options = {cfSymlinkFollow}) {.rtl,
## On OSX, `copyfile` C api will be used (available since OSX 10.5) unless
## `-d:nimLegacyCopyFile` is used.
##
## `copyFile` allows to specify `bufferSize` to improve I/O performance.
## See also:
## * `CopyFlag enum`_
## * `copyDir proc`_
@@ -210,8 +211,7 @@ proc copyFile*(source, dest: string, options = {cfSymlinkFollow}) {.rtl,
## * `removeFile proc`_
## * `moveFile proc`_
doAssert card(copyFlagSymlink * options) == 1, "There should be exactly " &
"one cfSymlink* in options"
doAssert card(copyFlagSymlink * options) == 1, "There should be exactly one cfSymlink* in options"
let isSymlink = source.symlinkExists
if isSymlink and (cfSymlinkIgnore in options or defined(windows)):
return
@@ -238,15 +238,14 @@ proc copyFile*(source, dest: string, options = {cfSymlinkFollow}) {.rtl,
if status2 != 0: raiseOSError(osLastError(), $(source, dest))
else:
# generic version of copyFile which works for any platform:
const bufSize = 8000 # better for memory manager
var d, s: File
if not open(s, source):raiseOSError(osLastError(), source)
if not open(d, dest, fmWrite):
close(s)
raiseOSError(osLastError(), dest)
var buf = alloc(bufSize)
var buf = alloc(bufferSize)
while true:
var bytesread = readBuffer(s, buf, bufSize)
var bytesread = readBuffer(s, buf, bufferSize)
if bytesread > 0:
var byteswritten = writeBuffer(d, buf, bytesread)
if bytesread != byteswritten:
@@ -254,13 +253,13 @@ proc copyFile*(source, dest: string, options = {cfSymlinkFollow}) {.rtl,
close(s)
close(d)
raiseOSError(osLastError(), dest)
if bytesread != bufSize: break
if bytesread != bufferSize: break
dealloc(buf)
close(s)
flushFile(d)
close(d)
proc copyFileToDir*(source, dir: string, options = {cfSymlinkFollow})
proc copyFileToDir*(source, dir: string, options = {cfSymlinkFollow}; bufferSize = 16_384)
{.noWeirdTarget, since: (1,3,7).} =
## Copies a file `source` into directory `dir`, which must exist.
##
@@ -268,12 +267,14 @@ proc copyFileToDir*(source, dir: string, options = {cfSymlinkFollow})
## if `source` is a symlink, copies the file symlink points to. `options` is
## ignored on Windows: symlinks are skipped.
##
## `copyFileToDir` allows to specify `bufferSize` to improve I/O performance.
##
## See also:
## * `CopyFlag enum`_
## * `copyFile proc`_
if dir.len == 0: # treating "" as "." is error prone
raise newException(ValueError, "dest is empty")
copyFile(source, dir / source.lastPathPart, options)
copyFile(source, dir / source.lastPathPart, options, bufferSize)
proc copyFileWithPermissions*(source, dest: string,