Madhu <
eno...@meer.net> writes:
> Maybe READ-LINE's API is enough without the READ-SEQUENCE parts which
> are confusing
They're not confusing to me, I've used them a lot.
It's perfectly natural to use parts of a buffer with READ-SEQUENCE and
other operations, when you are using a buffer.
I'd propose the following reference implementation. An actual
implementation can be more optimized, because because it can have
knowledge of the length of the next line to read from the kernel
buffers, or its own buffers (in buffered streams).
While READ-LINE-INTO-SEQUENCE wants to avoid copying data, (eg. an
implementation could call the kernel with a pointer into the buffer
itself), the unix API doesn't let the userspace process know how many
bytes have been buffered for a line input (definitely not for a file,
and not even from the tty device driver, check man 4 tty_ioctl ; for a
tty in line discipline, the information would be in present in the
driver, but in this only case where we could optimize the thing, there's
no point since 1- it's slow human input, 2- it's a small input: humans
don't usually input megacharacter lines). So in any case, we will have
at least one internal buffer (kernel or userspace), and one copying to
our buffer. The kernels doesn't allow to scan its buffer, so an
implementation would better have its own userspace buffers, instruct the
kernel to fill them (so direct from the device to the userspace buffer),
and then the implementation can scan it for the newline and copy it to
our buffer.
Finally, notice that if you want to have performance with line input
with files, you can always implement a record structure on a a unix file
(be it variable record length or fixed record length), and then there's
no need to scan for a newline anymore.
------------------------------------------------------------------------
(eval-when (:compile-toplevel :load-toplevel :execute)
(pushnew :costly-assert *features*))
(declaim (inline %read-char-until %finish-read-line-into-sequence))
(defun %read-char-until (stream recursivep store)
(loop
:for ch := (read-char stream nil nil recursivep)
:while (and ch (funcall store ch))
:finally (return ch)))
(defun %finish-read-line-into-sequence (ch buffer stream eof-error-p eof-value start)
(if (null ch)
(if eof-error-p
(error 'end-of-file :stream stream)
(values eof-value start nil))
(values buffer start (eql ch #\Newline))))
(defgeneric read-line-into-sequence (sequence input-stream
&key
eof-error-p eof-value recursivep
start end)
(:documentation "
Reads characters from the INPUT-STREAM until a #\\Newline is found, and
store the characters read into the SEQUENCE, from START, up to below
END. If END is reached before the #\\Newline character is read, then
reading stops there and the third result value is NIL. The #\Newline
character is not stored. No other slot of the SEQUENCE is modified
than those between START and POSITION.
RETURN: VALUE, POSITION, NEWLINE-SEEN-P
VALUE: Either SEQUENCE or EOF-VALUE depending on whether an
end-of-file has been seen.
SEQUENCE: A sequence (OR LIST VECTOR). If specialized, the vector
must have an element-type that is a supertype of the
stream element-type. If a fill-pointer is present, it
is ignored.
POSITION: The index in the SEQUENCE of the first element not
written. (<= START POSITION (OR END (LENGTH BUFFER)))
NEWLINE-SEEN-P: Whether a #\\Newline has been read.
INPUT-STREAM: an input stream. The element-type of the INPUT-STREAM
must be a subtype of CHARACTER.
EOF-ERROR-P: a generalized boolean. The default is true. If true,
then an END-OF-FILE error is signaled upon end of file.
EOF-VALUE: an object. The default is NIL.
RECURSIVE-P: a generalized boolean. The default is NIL. If
RECURSIVE-P is true, this call is expected to be
embedded in a higher-level call to read or a similar
function used by the Lisp reader.
START, END: bounding index designators of SEQUENCE.
The defaults for START and END are 0 and NIL, respectively.
")
(:method ((buffer vector) (stream stream) &key
(eof-error-p t)
(eof-value nil)
(recursivep nil)
(start 0)
(end nil))
(let ((end (or end (length buffer))))
(check-type start (and fixnum (integer 0)))
(check-type end (or null (and fixnum (integer 0))))
(cond
((and (= end start) (<= start (length buffer)))
(values buffer start nil))
((or (< end start) (< (length buffer) start))
(error "Bad interval for sequence operation on ~S: start = ~A, end = ~A"
buffer start end))
(t
(%finish-read-line-into-sequence
(%read-char-until stream recursivep
(lambda (ch)
(if (char= #\Newline ch)
nil
(progn
(setf (aref buffer start) ch)
(incf start)
(< start end)))))
buffer stream eof-error-p eof-value start)))))
(:method ((buffer list) (stream stream) &key
(eof-error-p t)
(eof-value nil)
(recursivep nil)
(start 0)
(end nil))
(check-type start (and fixnum (integer 0)))
(check-type end (or null (and fixnum (integer 0))))
(let ((current buffer))
(loop
:repeat start
:do (if (null current)
(error "Bad interval for sequence operation on ~S: start = ~A, end = ~A"
buffer start end)
(pop current)))
#+costly-assert (assert (<= start (length buffer)))
(cond
((if end
(= start end)
(null current))
(values buffer start nil))
((or (null current) (and end (< end start)))
(error "Bad interval for sequence operation on ~S: start = ~A, end = ~A"
buffer start end))
(t
#+costly-assert (assert (and (or (null end) (<= start end))
(< start (length buffer))))
(%finish-read-line-into-sequence
(%read-char-until stream recursivep
(lambda (ch)
(if (char= #\Newline ch)
nil
(progn
(setf (car current) ch
current (cdr current))
(incf start)
(if end
(< start end)
current)))))
buffer stream eof-error-p eof-value start))))))
(defun test/read-line-into-sequence ()
(let ((buffer (make-array 40 :element-type 'character :initial-element #\space)))
(with-input-from-string (input "Hello
World
Howdy")
(assert (equal
(multiple-value-list
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 1 :end 1))
'(" "
1 nil)))
(assert (handler-case
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 2 :end 1)
(:no-error (&rest results)
(declare (ignore results))
nil)
(error (err)
(declare (ignore err))
t)))
(assert (handler-case
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 41)
(:no-error (&rest results)
(declare (ignore results))
nil)
(error (err)
(declare (ignore err))
t)))
(assert (equal
(multiple-value-list
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 0))
'("Hello "
5 t)))
(assert (equal
(multiple-value-list
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 10 :end 13))
'("Hello Wor "
13 nil)))
(assert (equal
(multiple-value-list
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 20))
'("Hello Wor ld "
22 t)))
(assert (equal
(multiple-value-list
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 37))
'("Hello Wor ld How"
40 nil)))
(assert (equal
(multiple-value-list
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 30))
'(:eof 32 nil)))))
(let ((buffer (make-list 40 :initial-element 0)))
(with-input-from-string (input "Hello
World
Howdy")
(assert (equal
(multiple-value-list
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 1 :end 1))
'((0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0) 1 nil)))
(assert (handler-case
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 2 :end 1)
(:no-error (&rest results)
(declare (ignore results))
nil)
(error (err)
(declare (ignore err))
t)))
(assert (handler-case
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 41)
(:no-error (&rest results)
(declare (ignore results))
nil)
(error (err)
(declare (ignore err))
t)))
(assert (equal
(multiple-value-list
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 0))
'((#\H #\e #\l #\l #\o 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) 5 t)))
(assert (equal
(multiple-value-list
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 10 :end 13))
'((#\H #\e #\l #\l #\o 0 0 0 0 0 #\W #\o #\r 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) 13 nil)))
(assert (equal
(multiple-value-list
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 20))
'((#\H #\e #\l #\l #\o 0 0 0 0 0 #\W #\o #\r 0 0 0 0 0 0 0
#\l #\d 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) 22 t) ))
(assert (equal
(multiple-value-list
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 37))
'((#\H #\e #\l #\l #\o 0 0 0 0 0 #\W #\o #\r 0 0 0 0 0 0 0
#\l #\d 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #\H #\o #\w) 40
nil)))
(assert (equal
(multiple-value-list
(read-line-into-sequence buffer input
:eof-error-p nil :eof-value :eof :recursivep nil
:start 30))
'(:eof 32 nil)))))
:success)
(test/read-line-into-sequence)
------------------------------------------------------------------------