mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-20 06:20:38 +00:00
Merge pull request #3143 from def-/readline-faster
Improve performance of readLine by using fgets
This commit is contained in:
@@ -2635,17 +2635,17 @@ when not defined(JS): #and not defined(nimscript):
|
||||
|
||||
proc readLine*(f: File): TaintedString {.tags: [ReadIOEffect], benign.}
|
||||
## reads a line of text from the file `f`. May throw an IO exception.
|
||||
## A line of text may be delimited by ``CR``, ``LF`` or
|
||||
## ``CRLF``. The newline character(s) are not part of the returned string.
|
||||
## A line of text may be delimited by ``LF`` or ``CRLF``. The newline
|
||||
## character(s) are not part of the returned string.
|
||||
|
||||
proc readLine*(f: File, line: var TaintedString): bool {.tags: [ReadIOEffect],
|
||||
benign.}
|
||||
## reads a line of text from the file `f` into `line`. `line` must not be
|
||||
## ``nil``! May throw an IO exception.
|
||||
## A line of text may be delimited by ``CR``, ``LF`` or
|
||||
## ``CRLF``. The newline character(s) are not part of the returned string.
|
||||
## Returns ``false`` if the end of the file has been reached, ``true``
|
||||
## otherwise. If ``false`` is returned `line` contains no new data.
|
||||
## A line of text may be delimited by ``LF`` or ``CRLF``. The newline
|
||||
## character(s) are not part of the returned string. Returns ``false``
|
||||
## if the end of the file has been reached, ``true`` otherwise. If
|
||||
## ``false`` is returned `line` contains no new data.
|
||||
|
||||
proc writeLn*[Ty](f: File, x: varargs[Ty, `$`]) {.inline,
|
||||
tags: [WriteIOEffect], benign, deprecated.}
|
||||
|
||||
@@ -31,26 +31,6 @@ proc fprintf(f: File, frmt: cstring) {.importc: "fprintf",
|
||||
proc strlen(c: cstring): int {.
|
||||
importc: "strlen", header: "<string.h>", tags: [].}
|
||||
|
||||
when defined(posix):
|
||||
proc getc_unlocked(stream: File): cint {.importc: "getc_unlocked",
|
||||
header: "<stdio.h>", tags: [ReadIOEffect].}
|
||||
|
||||
proc flockfile(stream: File) {.importc: "flockfile", header: "<stdio.h>",
|
||||
tags: [ReadIOEffect].}
|
||||
|
||||
proc funlockfile(stream: File) {.importc: "funlockfile", header: "<stdio.h>",
|
||||
tags: [ReadIOEffect].}
|
||||
elif false:
|
||||
# doesn't work on Windows yet:
|
||||
proc getc_unlocked(stream: File): cint {.importc: "_fgetc_nolock",
|
||||
header: "<stdio.h>", tags: [ReadIOEffect].}
|
||||
|
||||
proc flockfile(stream: File) {.importc: "_lock_file", header: "<stdio.h>",
|
||||
tags: [ReadIOEffect].}
|
||||
|
||||
proc funlockfile(stream: File) {.importc: "_unlock_file", header: "<stdio.h>",
|
||||
tags: [ReadIOEffect].}
|
||||
|
||||
# C routine that is used here:
|
||||
proc fread(buf: pointer, size, n: int, f: File): int {.
|
||||
importc: "fread", header: "<stdio.h>", tags: [ReadIOEffect].}
|
||||
@@ -59,6 +39,10 @@ proc fseek(f: File, offset: clong, whence: int): int {.
|
||||
proc ftell(f: File): int {.importc: "ftell", header: "<stdio.h>", tags: [].}
|
||||
proc setvbuf(stream: File, buf: pointer, typ, size: cint): cint {.
|
||||
importc, header: "<stdio.h>", tags: [].}
|
||||
proc memchr(s: pointer, c: cint, n: csize): pointer {.
|
||||
importc: "memchr", header: "<string.h>", tags: [].}
|
||||
proc memset(s: pointer, c: cint, n: csize) {.
|
||||
header: "<string.h>", importc: "memset", tags: [].}
|
||||
|
||||
{.push stackTrace:off, profiler:off.}
|
||||
proc write(f: File, c: cstring) = fputs(c, f)
|
||||
@@ -86,40 +70,32 @@ const
|
||||
proc raiseEIO(msg: string) {.noinline, noreturn.} =
|
||||
sysFatal(IOError, msg)
|
||||
|
||||
when declared(getc_unlocked):
|
||||
proc readLine(f: File, line: var TaintedString): bool =
|
||||
setLen(line.string, 0) # reuse the buffer!
|
||||
flockfile(f)
|
||||
while true:
|
||||
var c = getc_unlocked(f)
|
||||
if c < 0'i32:
|
||||
if line.len > 0: break
|
||||
else: return false
|
||||
if c == 10'i32: break # LF
|
||||
if c == 13'i32: # CR
|
||||
c = getc_unlocked(f) # is the next char LF?
|
||||
if c != 10'i32: ungetc(c, f) # no, put the character back
|
||||
break
|
||||
add line.string, chr(int(c))
|
||||
result = true
|
||||
funlockfile(f)
|
||||
else:
|
||||
proc readLine(f: File, line: var TaintedString): bool =
|
||||
# of course this could be optimized a bit; but IO is slow anyway...
|
||||
# and it was difficult to get this CORRECT with Ansi C's methods
|
||||
setLen(line.string, 0) # reuse the buffer!
|
||||
while true:
|
||||
var c = fgetc(f)
|
||||
if c < 0'i32:
|
||||
if line.len > 0: break
|
||||
else: return false
|
||||
if c == 10'i32: break # LF
|
||||
if c == 13'i32: # CR
|
||||
c = fgetc(f) # is the next char LF?
|
||||
if c != 10'i32: ungetc(c, f) # no, put the character back
|
||||
break
|
||||
add line.string, chr(int(c))
|
||||
result = true
|
||||
proc readLine(f: File, line: var TaintedString): bool =
|
||||
var pos = 0
|
||||
# Use the currently reserved space for a first try
|
||||
var space = cast[PGenericSeq](line.string).space
|
||||
line.string.setLen(space)
|
||||
|
||||
while true:
|
||||
# memset to \l so that we can tell how far fgets wrote, even on EOF, where
|
||||
# fgets doesn't append an \l
|
||||
memset(addr line.string[pos], '\l'.ord, space)
|
||||
if fgets(addr line.string[pos], space, f) == nil:
|
||||
line.string.setLen(0)
|
||||
return false
|
||||
let m = memchr(addr line.string[pos], '\l'.ord, space)
|
||||
if m != nil:
|
||||
# \l found: Could be our own or the one by fgets, in any case, we're done
|
||||
let last = cast[ByteAddress](m) - cast[ByteAddress](addr line.string[0])
|
||||
if last > 0 and line.string[last-1] == '\c':
|
||||
line.string.setLen(last-1)
|
||||
return true
|
||||
line.string.setLen(last)
|
||||
return true
|
||||
# No \l found: Increase buffer and read more
|
||||
inc pos, space
|
||||
space = 128 # read in 128 bytes at a time
|
||||
line.string.setLen(pos+space)
|
||||
|
||||
proc readLine(f: File): TaintedString =
|
||||
result = TaintedString(newStringOfCap(80))
|
||||
|
||||
Reference in New Issue
Block a user