Merge pull request #3143 from def-/readline-faster

Improve performance of readLine by using fgets
This commit is contained in:
Andreas Rumpf
2015-08-21 21:30:27 +02:00
2 changed files with 36 additions and 60 deletions

View File

@@ -2635,17 +2635,17 @@ when not defined(JS): #and not defined(nimscript):
proc readLine*(f: File): TaintedString {.tags: [ReadIOEffect], benign.}
## reads a line of text from the file `f`. May throw an IO exception.
## A line of text may be delimited by ``CR``, ``LF`` or
## ``CRLF``. The newline character(s) are not part of the returned string.
## A line of text may be delimited by ``LF`` or ``CRLF``. The newline
## character(s) are not part of the returned string.
proc readLine*(f: File, line: var TaintedString): bool {.tags: [ReadIOEffect],
benign.}
## reads a line of text from the file `f` into `line`. `line` must not be
## ``nil``! May throw an IO exception.
## A line of text may be delimited by ``CR``, ``LF`` or
## ``CRLF``. The newline character(s) are not part of the returned string.
## Returns ``false`` if the end of the file has been reached, ``true``
## otherwise. If ``false`` is returned `line` contains no new data.
## A line of text may be delimited by ``LF`` or ``CRLF``. The newline
## character(s) are not part of the returned string. Returns ``false``
## if the end of the file has been reached, ``true`` otherwise. If
## ``false`` is returned `line` contains no new data.
proc writeLn*[Ty](f: File, x: varargs[Ty, `$`]) {.inline,
tags: [WriteIOEffect], benign, deprecated.}

View File

@@ -31,26 +31,6 @@ proc fprintf(f: File, frmt: cstring) {.importc: "fprintf",
proc strlen(c: cstring): int {.
importc: "strlen", header: "<string.h>", tags: [].}
when defined(posix):
proc getc_unlocked(stream: File): cint {.importc: "getc_unlocked",
header: "<stdio.h>", tags: [ReadIOEffect].}
proc flockfile(stream: File) {.importc: "flockfile", header: "<stdio.h>",
tags: [ReadIOEffect].}
proc funlockfile(stream: File) {.importc: "funlockfile", header: "<stdio.h>",
tags: [ReadIOEffect].}
elif false:
# doesn't work on Windows yet:
proc getc_unlocked(stream: File): cint {.importc: "_fgetc_nolock",
header: "<stdio.h>", tags: [ReadIOEffect].}
proc flockfile(stream: File) {.importc: "_lock_file", header: "<stdio.h>",
tags: [ReadIOEffect].}
proc funlockfile(stream: File) {.importc: "_unlock_file", header: "<stdio.h>",
tags: [ReadIOEffect].}
# C routine that is used here:
proc fread(buf: pointer, size, n: int, f: File): int {.
importc: "fread", header: "<stdio.h>", tags: [ReadIOEffect].}
@@ -59,6 +39,10 @@ proc fseek(f: File, offset: clong, whence: int): int {.
proc ftell(f: File): int {.importc: "ftell", header: "<stdio.h>", tags: [].}
proc setvbuf(stream: File, buf: pointer, typ, size: cint): cint {.
importc, header: "<stdio.h>", tags: [].}
proc memchr(s: pointer, c: cint, n: csize): pointer {.
importc: "memchr", header: "<string.h>", tags: [].}
proc memset(s: pointer, c: cint, n: csize) {.
header: "<string.h>", importc: "memset", tags: [].}
{.push stackTrace:off, profiler:off.}
proc write(f: File, c: cstring) = fputs(c, f)
@@ -86,40 +70,32 @@ const
proc raiseEIO(msg: string) {.noinline, noreturn.} =
sysFatal(IOError, msg)
when declared(getc_unlocked):
proc readLine(f: File, line: var TaintedString): bool =
setLen(line.string, 0) # reuse the buffer!
flockfile(f)
while true:
var c = getc_unlocked(f)
if c < 0'i32:
if line.len > 0: break
else: return false
if c == 10'i32: break # LF
if c == 13'i32: # CR
c = getc_unlocked(f) # is the next char LF?
if c != 10'i32: ungetc(c, f) # no, put the character back
break
add line.string, chr(int(c))
result = true
funlockfile(f)
else:
proc readLine(f: File, line: var TaintedString): bool =
# of course this could be optimized a bit; but IO is slow anyway...
# and it was difficult to get this CORRECT with Ansi C's methods
setLen(line.string, 0) # reuse the buffer!
while true:
var c = fgetc(f)
if c < 0'i32:
if line.len > 0: break
else: return false
if c == 10'i32: break # LF
if c == 13'i32: # CR
c = fgetc(f) # is the next char LF?
if c != 10'i32: ungetc(c, f) # no, put the character back
break
add line.string, chr(int(c))
result = true
proc readLine(f: File, line: var TaintedString): bool =
var pos = 0
# Use the currently reserved space for a first try
var space = cast[PGenericSeq](line.string).space
line.string.setLen(space)
while true:
# memset to \l so that we can tell how far fgets wrote, even on EOF, where
# fgets doesn't append an \l
memset(addr line.string[pos], '\l'.ord, space)
if fgets(addr line.string[pos], space, f) == nil:
line.string.setLen(0)
return false
let m = memchr(addr line.string[pos], '\l'.ord, space)
if m != nil:
# \l found: Could be our own or the one by fgets, in any case, we're done
let last = cast[ByteAddress](m) - cast[ByteAddress](addr line.string[0])
if last > 0 and line.string[last-1] == '\c':
line.string.setLen(last-1)
return true
line.string.setLen(last)
return true
# No \l found: Increase buffer and read more
inc pos, space
space = 128 # read in 128 bytes at a time
line.string.setLen(pos+space)
proc readLine(f: File): TaintedString =
result = TaintedString(newStringOfCap(80))