mirror of
https://github.com/nim-lang/Nim.git
synced 2026-01-05 12:37:46 +00:00
Add top of module comment. Re-order definitions to
type, then converter, then 3 iterators lowest- to highest-level (also fastest to slowest) including a new intermediate iterator lines(MemFile, buffer) that is more like readLine(File) in case that helps anyone port code. Add doc comments. Also have toString just use newString+c_memcpy instead of currently fragile toNimStr which Araq wants a separate PR for.
This commit is contained in:
@@ -11,6 +11,9 @@
|
||||
##
|
||||
## This module provides support for `memory mapped files`:idx:
|
||||
## (Posix's `mmap`:idx:) on the different operating systems.
|
||||
##
|
||||
## It also provides some fast iterators over lines in text files
|
||||
## delimited in Unix or Windows styles (or similarly delimited records).
|
||||
|
||||
when defined(windows):
|
||||
import winlean
|
||||
@@ -249,7 +252,23 @@ type MemSlice* = object
|
||||
data*: pointer
|
||||
size*: int
|
||||
|
||||
proc c_memcpy(a, b: pointer, n: int) {.importc: "memcpy", header: "<string.h>".}
|
||||
|
||||
proc toString*(ms: MemSlice): string {.inline.} =
|
||||
## Return a Nim string built from a MemSlice.
|
||||
var buf = newString(ms.size)
|
||||
c_memcpy(addr(buf[0]), ms.data, ms.size)
|
||||
buf[ms.size] = '\0'
|
||||
result = buf
|
||||
|
||||
iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} =
|
||||
## Iterates over [optional eat]delim-delimited slices in a MemFile.
|
||||
## Default delimiting is [\r]\l which parse Unix or Windows text file lines.
|
||||
## Pass eat='\0' to be strictly delim-delimited.
|
||||
## This zero copy, memchr-limited method is probably the fastest way to
|
||||
## iterate through lines in a file, however the returned (data,size) objects
|
||||
## are NOT Nim strings or even terminated C strings. So, be careful how data
|
||||
## is accessed (e.g., use C mem* functions, not str* functions).
|
||||
proc c_memchr(cstr: pointer, c: char, n: csize): pointer {.
|
||||
importc: "memchr", header: "<string.h>" .}
|
||||
proc `-!`(p, q: pointer): int {.inline.} = return cast[int](p) -% cast[int](q)
|
||||
@@ -270,9 +289,20 @@ iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} =
|
||||
ms.data = cast[pointer](cast[int](ending) +% 1) # skip delim
|
||||
remaining = mfile.size - (ms.data -! mfile.mem)
|
||||
|
||||
proc toString*(ms: MemSlice): string {.inline.} =
|
||||
proc toNimStr(str: cstring, len: int): string {. importc: "toNimStr" .}
|
||||
result = toNimStr(cast[cstring](ms.data), ms.size)
|
||||
iterator lines*(mfile: MemFile, buf: var TaintedString, delim='\l', eat='\r'): TaintedString {.inline.} =
|
||||
## Replace contents of passed buffer with each new line, like readLine(File).
|
||||
## Default delimiting is [\r]\l which parse Unix or Windows text file lines.
|
||||
## Pass eat='\0' to be strictly delim-delimited.
|
||||
for ms in memSlices(mfile, delim, eat):
|
||||
buf.setLen(ms.size)
|
||||
c_memcpy(addr(buf[0]), ms.data, ms.size)
|
||||
buf[ms.size] = '\0'
|
||||
yield buf
|
||||
|
||||
iterator lines*(mfile: MemFile): string {.inline.} =
|
||||
for ms in memSlices(mfile): yield toString(ms)
|
||||
iterator lines*(mfile: MemFile, delim='\l', eat='\r'): TaintedString {.inline.} =
|
||||
## Return each line in a file as a Nim string, like lines(File).
|
||||
## Default delimiting is [\r]\l which parse Unix or Windows text file lines.
|
||||
## Pass eat='\0' to be strictly delim-delimited.
|
||||
var buf = TaintedString(newStringOfCap(80))
|
||||
for line in lines(mfile, buf, delim, eat):
|
||||
yield buf
|
||||
|
||||
Reference in New Issue
Block a user