Myrddin: libbio

Summary

Myrddin's BIO library is used for buffered input and output. It is a fairly simple library that handles reading and writing from file descriptors.

The concepts in libbio should be familiar to anyone that has used a buffered IO library in most languages. The usual concepts are supported: Files, formatted output, binary and ascii reads, and so on. There are also some utility functions to deal with reading integers in a known endianness.

One thing to keep in mind with libbio that it does not attempt to flush buffers on program exit, which means that any unwritten, unflushed data will be lost. Closing the bio files will prevent this issue.

pkg bio =
    type mode
    const Rd    : mode
    const Wr    : mode
    const Rw    : mode

    type file = struct
    ;;

    type lineiter

    type std.result(@a, err)) = union
        `Eof
        `Ok @a
        `Err ioerr
    ;;

    type ioerr = union
        `Ebadfile
        `Ebadbuf
        `Ebadfd
        `Eioerr
    ;;

    type vtab = struct
        read    : (buf : byte[:] -> std.result(std.size, std.errno))
        write   : (buf : byte[:] -> std.result(std.size, std.errno))
        seek    : (idx : std.off -> std.result(std.off, std.errno))
        close   : (-> bool)
    ;;

    impl iterable lineiter
    impl iterable chariter

    /* creation */
    const mkfile    : (fd : std.fd, mode : mode -> file#)
    const dial  : (srv : byte[:], mod : mode    -> file#)
    const mk    : (mode : mode, vt : vtab   -> file#)
    const open  : (path : byte[:], mode : mode  -> std.result(file#, byte[:]))
    const dial  : (srv  : byte[:], mode : mode  -> std.result(file#, byte[:]))
    const create    : (path : byte[:], mode : mode, perm : int  -> std.result(file#, byte[:]))
    const close : (f : file# -> bool)
    const free  : (f : file# -> void)

    /* basic i/o. Returns sub-buffer when applicable. */
    const write : (f : file#, src : byte[:] -> std.result(std.size, err))
    const read  : (f : file#, dst : byte[:] -> std.result(std.size, err))
    const flush : (f : file# -> bool)

    /* seeking */
    const seek  : (f : file#, std.off -> std.result(std.off, ioerr))

    /* single unit operations */
    const putb  : (f : file#, b : byte  -> std.result(std.size, err)))
    const putc  : (f : file#, c : char  -> std.result(std.size, err)))
    const getb  : (f : file# -> std.result(byte, err)))
    const getc  : (f : file# -> std.result(char, err)))

    /* peeking */
    const peekb : (f : file# -> std.result(byte, err)))
    const peekc : (f : file# -> std.result(char, err)))

    /* delimited read; returns freshly allocated buffer. */
    const readln    : (f : file#    -> std.result(byte[:], err)))
    const readto    : (f : file#, delim : byte[:]   -> std.result(byte[:], err)))
    const skipto    : (f : file#, delim : byte[:]   -> bool)
    const skipspace : (f : file# -> bool)

    /* iterators */
    const byline    : (f : file# -> lineiter)
    const bychar    : (f : file# -> chariter)

    /* formatted i/o */
    const put   : (f : file#, fmt : byte[:], args : ... -> std.result(std.size, err)))

    /* unsigned big endian reads */
    generic getbe8  : (f : file# -> std.result(@a::(numeric,integral, err))))
    generic getbe16 : (f : file# -> std.result(@a::(numeric,integral, err))))
    generic getbe32 : (f : file# -> std.result(@a::(numeric,integral, err))))
    generic getbe64 : (f : file# -> std.result(@a::(numeric,integral, err))))

    /* signed big endian reads */
    generic getle8  : (f : file# -> std.result(@a::(numeric,integral, err))))
    generic getle16 : (f : file# -> std.result(@a::(numeric,integral, err))))
    generic getle32 : (f : file# -> std.result(@a::(numeric,integral, err))))
    generic getle64 : (f : file# -> std.result(@a::(numeric,integral, err))))

    /* unsigned big endian */
    generic putbe8  : (f : file#, v : @a::(numeric,integral) -> std.result(std.size, err)))
    generic putbe16 : (f : file#, v : @a::(numeric,integral) -> std.result(std.size, err)))
    generic putbe32 : (f : file#, v : @a::(numeric,integral) -> std.result(std.size, err)))
    generic putbe64 : (f : file#, v : @a::(numeric,integral) -> std.result(std.size, err)))

    /* unsigned little endian */
    generic putle8  : (f : file#, v : @a::(numeric,integral) -> std.result(std.size, err)))
    generic putle16 : (f : file#, v : @a::(numeric,integral) -> std.result(std.size, err)))
    generic putle32 : (f : file#, v : @a::(numeric,integral) -> std.result(std.size, err)))
    generic putle64 : (f : file#, v : @a::(numeric,integral) -> std.result(std.size, err)))
;;

The Data Structures

type file = struct
        fd  : std.fd
;;

The bio.file type contains the state required to implement buffered files. All of the state is internal, except for the file descriptor, which is exposed for the purposes of poll() and friends. Reading from it directly may lead to inconsistent buffer state, and is strongly not recommended.

type err = union
        `Eio
        `Eof
        `Ebadf
;;

The std.result(@a, err) union returns the result of the read operation, which is going to either be a result of type '@a', an error-free end of file, or an error of type ioerr. All of these conditions are transient, and are retried on subsequent calls to the IO operations. For example, if reading a file returns `Eof, but data is appended to a file, then a subsequent read will return data again.

In general, errors will be queued up, so if there is a buffered read or write that partly succeeds but is interrupted halfway, the error will be reported on the next operation. This way, partial operations are not lost.

type vtab = struct
    read    : (buf : byte[:] -> std.result(std.size, std.errno))
    write   : (buf : byte[:] -> std.result(std.size, std.errno))
    seek    : (idx : std.off -> std.result(std.off, std.errno))
    close   : (-> bool)
;;

If file i/o isn't sufficient, libbio provides a way of hooking in your own read and write closures. When passed to mk, these functions will be called instead of the usual file io functions.

File Creation and Opening

const Rd    : mode
const Wr    : mode
const Rw    : mode

When opening a file, one of the above flags must be passed to set the mode of the file. Rd indicates that the bio file should be read only, Wr indicates that it should be write only, and Rw indicates that it should be both readable and writable.

Bio file creation

const mk    : (mode : mode, vt : vtab   -> file#)

This function creates a bio file vtable and a mode. The functions provided are called, instead of the standard, file-descriptor based I/O calls. This allows the opened file to provide any sort of data it wishes.

const mkfile    : (fd : std.fd, mode : mode -> file#)

This function creates a bio file from a file descriptor and mode, returning a bio.file# which will buffer reads and/or writes from the fd. This function assumes that you are passing it a correctly initialized fd, and will always succeed. If the FD is incorrectly configured, uses of it will error out.

Returns: A buffered file wrapping the file descriptor fd

const open  : (path : byte[:], mode : mode  -> std.result(file#, byte[:]))

This function attempts to open the path passed in with the mode, returning a result which is either a file, or a string representing the error that prevented the file from being opened.

const dial  : (srv  : byte[:], mode : mode  -> std.result(file#, byte[:]))

This function attempts to dial a server on dial string ds, returning a result which is either a file, or a string describing the failure to dial the server.

Returns: A buffered file wrapping the file descriptor fd, or an error.

const mkmem : (buf : byte[:] -> file#)

This function creates a bio file from a memory buffer, returning a bio.file#. This bio.file will read from and write into the buffer that is provided.

Returns: A buffered file wrapping the memory mem.

Returns: A buffered file representing path opened with the requested permissions.

const dial  : (srv  : byte[:], mode : mode  -> std.result(file#, byte[:]))

This function is similar to open, however, this function will open a connection via a dialstring (as in std.dial), and buffer the fd returned from that.

Returns: A buffered file representing dialstr opened with the requested permissions.

const create    : (path : byte[:], mode : mode, perm : int  -> std.result(file#, byte[:]))

This function is similar to open, however, this function will attempt to atomically create and open the file descriptor.

Returns: A buffered file representing path opened with the requested permissions.

const close : (f : file# -> bool)

Closes the file descriptor that the bio file is wrapping, and frees any resources used for buffering it. Any data that has not yet been written is flushed.

Returns: true if flushing the file succeeded, false if it failed.

const free  : (f : file# -> void)

Frees any resources used for the file descriptor, but leaves it open. This is useful if the file descriptor has been 'stolen' using bio.mkfile, and is not owned by the bio file.

const flush : (f : file# -> bool)

Clears any data that has not been sent to the backing file descriptor.

Returns: true if flushing the file succeeded, false if it failed.

Binary I/O

const write : (f : file#, src : byte[:] -> result(std.size, byte[:]))

Writes bytes from the buffer src to a bio file, returning the number of bytes written in the case of success. This number may be smaller than the size of the buffer, but will never be larger.

const read  : (f : file#, dst : byte[:] -> result(byte[:]))

Reads from a bio file, into the buffer 'dst', returning the section of the buffer that was read in the result.

const seek  : (f : file#, std.off -> std.result(std.off, ioerr))

Seeks the bio file to the given absolute offset.

const flush : (f : file# -> bool)

Flush attempts to clear the buffers within f, writing everything within the buffers and discarding them. If writing fails, false is returned.

Single Unit Operations

    /* single unit operations */
const putb  : (f : file#, b : byte  -> std.result(std.size, err)))

Writes a single byte out to the file 'f', returning a status. If it is successful, the return value represents the number of bytes written. For single byte writes, this value is unsurprisingly always 1.

const putc  : (f : file#, c : char  -> std.result(std.size, err)))

Writes a single unicode character out to the file 'f', returning a status. This character is encoded in utf-8. If it is successful, the return value represents the number of bytes written, varying between 1 and 4.

const getb  : (f : file# -> std.result(byte, err)))

Reads a single byte from the file f, returning a status. If this read succeeds, the next byte in the file is returned.

const getc  : (f : file# -> std.result(char, err)))

Reads a unicode character from the file f, returning a status. If this read was successful, the next character is returned from the file. The file is assumed to be encoded in utf-8.

/* peeking */
const peekb : (f : file# -> std.result(byte, err)))
const peekc : (f : file# -> std.result(char, err)))

Both peekb and peekc are similar to getb and getc respectively, although they return the value without advancing the position within the buffer.

Delimited Operations

/* delimited read; returns freshly allocated buffer. */
const readln    : (f : file#    -> std.result(byte[:], err)))

Readln reads a single line of input from the file 'f', returning the line with the line ending characters removed. '\n', '\r', and '\r\n' are all accepted as valid line endings, and are treated interchangably when reading.

The buffer is heap allocated, and must be freed with std.slfree

const readto    : (f : file#, delim : byte[:]   -> std.result(byte[:], err)))

Readto is similar to readln, but instead of reading to a line ending, it will read up to the point where it finds the requested delimiter. The delimiter is an arbitrary sequence of bytes. If an end of file is reached, then the buffer up to the Eof is returned.

The buffer is heap allocated, and must be freed with std.slfree

const skipto    : (f : file#, delim : byte[:]   -> bool)

Skipto is identical to readto, but instead of allocating a buffer and reading into it, skipto will ignore the values and drop them. It returns true for any successful reads, and false if an error was encountered.

const skipspace : (f : file# -> bool)

Skipspace will consume any space tokens from the start of the input stream, It returns true for any successful reads, and false if an error was encountered.

Iteration

const byline    : (f : file# -> lineiter)
const bychar    : (f : file# -> chariter)

Byline will return an interable type that will iterate through each line in a file. These lines are allocated on the heap, and are automatically freed at the end of the iteration.

Bychar behaves similarly, iterating over the contents of a file character by character.

Both stop at the end of file or error.

The returned iterator object is a value type, and does not need to be freed.

Formatted IO

const put   : (f : file#, fmt : byte[:], args : ... -> std.result(std.size, err)))

This formats the output using the std.fmt api, and writes it to the file f. All custom formatters installed for std.fmt are used for this formatting. This call returns the number of bytes written on success.

Endian Aware Reads

generic getbe8  : (f : file# -> std.result(@a, err))) :: numeric,integral @a
generic getbe16 : (f : file# -> std.result(@a, err))) :: numeric,integral @a
generic getbe32 : (f : file# -> std.result(@a, err))) :: numeric,integral @a
generic getbe64 : (f : file# -> std.result(@a, err))) :: numeric,integral @a
                                                                    @a
generic getle8  : (f : file# -> std.result(@a, err))) :: numeric,integral @a
generic getle16 : (f : file# -> std.result(@a, err))) :: numeric,integral @a
generic getle32 : (f : file# -> std.result(@a, err))) :: numeric,integral @a
generic getle64 : (f : file# -> std.result(@a, err))) :: numeric,integral @a

The functions above all read from a bio file, and return the value read on success. The 'be' variants read big endian values, and the `le' variants read little endian values. They final result is converted to whatever numeric, integral type is desired. If the value does not fit within the bounds of the type, it is truncated.

The number of bytes read is determined by the number at the end of the function call. For eample, getbe8() will read 8 bits, or one byte from the stream. getle64 will get a 64 bit, or 8 byte, value. The number of bytes consumed is independent of the size of the type being assigned to.

generic putbe8  : (f : file#, v : @a -> std.result(std.size, err))) :: numeric,integral @a
generic putbe16 : (f : file#, v : @a -> std.result(std.size, err))) :: numeric,integral @a
generic putbe32 : (f : file#, v : @a -> std.result(std.size, err))) :: numeric,integral @a
generic putbe64 : (f : file#, v : @a -> std.result(std.size, err))) :: numeric,integral @a

generic putle8  : (f : file#, v : @a -> std.result(std.size, err))) :: numeric,integral @a
generic putle16 : (f : file#, v : @a -> std.result(std.size, err))) :: numeric,integral @a
generic putle32 : (f : file#, v : @a -> std.result(std.size, err))) :: numeric,integral @a
generic putle64 : (f : file#, v : @a -> std.result(std.size, err))) :: numeric,integral @a

The functions above all write to a bio file, and return the number of bytes written on success. The number of bytes written will always be the size of the type. The 'be' variants write the value in a big endian representation, and the 'le' variants write it in a little endian representation.

The number of bytes written is determined by the number at the end of the function call. For eample, putbe8() will write 8 bits, or one byte. putbe64 will write 64 bits, or 8 bytes. The number of bytes consumed is independent of the size of the type being assigned to.

Examples

The example below is the simplest program that creates and opens a file, and writes to it. It creates it with 0o644 permissions (ie, rw-r--r--), and then immediately closes it. The result of this program should be a file called create-example containing the words.

use std
use bio

const main = {
    var f

    match bio.create("create-example", bio.Wr, 0o644)
    | `std.Ok bio:    f = bio
    | `std.Err e:        std.fatal("Failed to open file: {}\n", e)
    ;;
    bio.write(f, "Hello user\n")
    bio.close(f)
}

The next example shows reading from a file called "lines", line by line. It should echo the lines, numbering them as it prints them:

use std
use bio

const main = {
    var f, i

    match bio.open("lines", bio.Rd)
    | `std.Ok bio:        f = bio
    | `std.Err e:    std.fatal("Unable to open data file: {}\n", e)
    ;;

    i = 0
    while true
        match bio.readln(f)
        | `std.Err `bio.Eof:    break;
        | `std.Err e:       std.fatal("err: {}\n", e)
        | `std.Ok ln:       std.put("line {}: {}\n", i++, ln)
        ;;
    ;;
}