123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634 |
- /*
- Copyright (c) 2005-2006, 2008-2009, 2013 Genome Research Ltd.
- Author: James Bonfield <jkb@sanger.ac.uk>
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
- 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
- Institute nor the names of its contributors may be used to endorse or promote
- products derived from this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #ifdef HAVE_CONFIG_H
- #include "io_lib_config.h"
- #endif
- #include <stdio.h>
- #include <stdlib.h>
- #include <errno.h>
- #include <string.h>
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <fcntl.h>
- #include <unistd.h>
- #include <stdarg.h>
- #include "cram/os.h"
- #include "cram/mFILE.h"
- #include "cram/vlen.h"
- /*
- * This file contains memory-based versions of the most commonly used
- * (by io_lib) stdio functions.
- *
- * Actual file IO takes place either on opening or closing an mFILE.
- *
- * Coupled to this are a bunch of rather scary macros which can be obtained
- * by including stdio_hack.h. It is recommended though that you use mFILE.h
- * instead and replace fopen with mfopen (etc). This is more or less
- * mandatory if you wish to use both FILE and mFILE structs in a single file.
- */
- static mFILE *m_channel[3]; /* stdin, stdout and stderr fakes */
- /*
- * Reads the entirety of fp into memory. If 'fn' exists it is the filename
- * associated with fp. This will be used for more optimal reading (via a
- * stat to identify the size and a single read). Otherwise we use successive
- * reads until EOF.
- *
- * Returns a malloced buffer on success of length *size
- * NULL on failure
- */
- static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) {
- struct stat sb;
- char *data = NULL;
- size_t allocated = 0, used = 0;
- int bufsize = 8192;
- #ifdef _WIN32
- if (binary)
- _setmode(_fileno(fp), _O_BINARY);
- else
- _setmode(_fileno(fp), _O_TEXT);
- #endif
- if (fn && -1 != stat(fn, &sb)) {
- data = malloc(allocated = sb.st_size);
- bufsize = sb.st_size;
- } else {
- fn = NULL;
- }
- do {
- size_t len;
- if (used + bufsize > allocated) {
- allocated += bufsize;
- data = realloc(data, allocated);
- }
- len = fread(data + used, 1, allocated - used, fp);
- if (len > 0)
- used += len;
- } while (!feof(fp) && (fn == NULL || used < sb.st_size));
- *size = used;
- return data;
- }
- /*
- * Creates and returns m_channel[0].
- * We initialise this on the first attempted read, which then slurps in
- * all of stdin until EOF is met.
- */
- mFILE *mstdin(void) {
- if (m_channel[0])
- return m_channel[0];
- m_channel[0] = mfcreate(NULL, 0);
- if (NULL == m_channel[0]) return NULL;
- m_channel[0]->fp = stdin;
- return m_channel[0];
- }
- static void init_mstdin(void) {
- static int done_stdin = 0;
- if (done_stdin)
- return;
- m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1);
- m_channel[0]->mode = MF_READ;
- done_stdin = 1;
- }
- /*
- * Creates and returns m_channel[1]. This is the fake for stdout. It starts as
- * an empty buffer which is physically written out only when mfflush or
- * mfclose are called.
- */
- mFILE *mstdout(void) {
- if (m_channel[1])
- return m_channel[1];
- m_channel[1] = mfcreate(NULL, 0);
- if (NULL == m_channel[1]) return NULL;
- m_channel[1]->fp = stdout;
- m_channel[1]->mode = MF_WRITE;
- return m_channel[1];
- }
- /*
- * Stderr as an mFILE.
- * The code handles stderr by returning m_channel[2], but also checking
- * for stderr in fprintf (the common usage of it) to auto-flush.
- */
- mFILE *mstderr(void) {
- if (m_channel[2])
- return m_channel[2];
- m_channel[2] = mfcreate(NULL, 0);
- if (NULL == m_channel[2]) return NULL;
- m_channel[2]->fp = stderr;
- m_channel[2]->mode = MF_WRITE;
- return m_channel[2];
- }
- /*
- * For creating existing mFILE pointers directly from memory buffers.
- */
- mFILE *mfcreate(char *data, int size) {
- mFILE *mf = (mFILE *)malloc(sizeof(*mf));
- if (NULL == mf) return NULL;
- mf->fp = NULL;
- mf->data = data;
- mf->alloced = size;
- mf->size = size;
- mf->eof = 0;
- mf->offset = 0;
- mf->flush_pos = 0;
- mf->mode = MF_READ | MF_WRITE;
- return mf;
- }
- /*
- * Recreate an existing mFILE to house new data/size.
- * It also rewinds the file.
- */
- void mfrecreate(mFILE *mf, char *data, int size) {
- if (mf->data)
- free(mf->data);
- mf->data = data;
- mf->size = size;
- mf->alloced = size;
- mf->eof = 0;
- mf->offset = 0;
- mf->flush_pos = 0;
- }
- /*
- * Creates a new mFILE to contain the contents of the FILE pointer.
- * This mFILE is purely for in-memory operations and has no links to the
- * original FILE* it came from. It also doesn't close the FILE pointer.
- * Consider using mfreopen() is you need different behaviour.
- *
- * Returns mFILE * on success
- * NULL on failure.
- */
- mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) {
- mFILE *mf;
- /* Open using mfreopen() */
- if (NULL == (mf = mfreopen(path, mode_str, fp)))
- return NULL;
-
- /* Disassociate from the input stream */
- mf->fp = NULL;
- return mf;
- }
- /*
- * Converts a FILE * to an mFILE *.
- * Use this for wrapper functions to turn external prototypes requring
- * FILE * as an argument into internal code using mFILE *.
- */
- mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) {
- mFILE *mf;
- int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0;
- /* Parse mode:
- * r = read file contents (if truncated => don't read)
- * w = write on close
- * a = position at end of buffer
- * x = position at same location as the original fp, don't seek on flush
- */
- if (strchr(mode_str, 'r'))
- r = 1, mode |= MF_READ;
- if (strchr(mode_str, 'w'))
- w = 1, mode |= MF_WRITE | MF_TRUNC;
- if (strchr(mode_str, 'a'))
- w = a = 1, mode |= MF_WRITE | MF_APPEND;
- if (strchr(mode_str, 'b'))
- b = 1, mode |= MF_BINARY;
- if (strchr(mode_str, 'x'))
- x = 1;
- if (strchr(mode_str, '+')) {
- w = 1, mode |= MF_READ | MF_WRITE;
- if (a)
- r = 1;
- }
- if (r) {
- mf = mfcreate(NULL, 0);
- if (NULL == mf) return NULL;
- if (!(mode & MF_TRUNC)) {
- mf->data = mfload(fp, path, &mf->size, b);
- mf->alloced = mf->size;
- if (!a)
- fseek(fp, 0, SEEK_SET);
- }
- } else if (w) {
- /* Write - initialise the data structures */
- mf = mfcreate(NULL, 0);
- if (NULL == mf) return NULL;
- } else {
- fprintf(stderr, "Must specify either r, w or a for mode\n");
- return NULL;
- }
- mf->fp = fp;
- mf->mode = mode;
- if (x) {
- mf->mode |= MF_MODEX;
- }
-
- if (a) {
- mf->flush_pos = mf->size;
- fseek(fp, 0, SEEK_END);
- }
- return mf;
- }
- /*
- * Opens a file. If we have read access (r or a+) then it loads the entire
- * file into memory. If We have write access then the pathname is stored.
- * We do not actually write until an mfclose, which then checks this pathname.
- */
- mFILE *mfopen(const char *path, const char *mode) {
- FILE *fp;
- if (NULL == (fp = fopen(path, mode)))
- return NULL;
- return mfreopen(path, mode, fp);
- }
- /*
- * Closes an mFILE. If the filename is known (implying write access) then this
- * also writes the data to disk.
- *
- * Stdout is handled by calling mfflush which writes to stdout if appropriate.
- */
- int mfclose(mFILE *mf) {
- if (!mf)
- return -1;
- mfflush(mf);
- if (mf->fp)
- fclose(mf->fp);
- mfdestroy(mf);
- return 0;
- }
- /*
- * Closes the file pointer contained within the mFILE without destroying
- * the in-memory data.
- */
- int mfdetach(mFILE *mf) {
- if (!mf)
- return -1;
- mfflush(mf);
- if (mf->fp) {
- fclose(mf->fp);
- mf->fp = NULL;
- }
- return 0;
- }
- /*
- * Destroys an mFILE structure but does not flush or close it
- */
- int mfdestroy(mFILE *mf) {
- if (!mf)
- return -1;
- if (mf->data)
- free(mf->data);
- free(mf);
- return 0;
- }
- /*
- * Steals that data out of an mFILE. The mFILE itself will be closed.
- * It is up to the caller to free the stolen buffer. If size_out is
- * not NULL, mf->size will be stored in it.
- * This is more-or-less the opposite of mfcreate().
- */
- void *mfsteal(mFILE *mf, size_t *size_out) {
- void *data;
- if (!mf) return NULL;
- data = mf->data;
-
- if (NULL != size_out) *size_out = mf->size;
- mfdetach(mf);
- mf->data = NULL;
- mfdestroy(mf);
- return data;
- }
- /*
- * Seek/tell functions. Nothing more than updating and reporting an
- * in-memory index. NB we can seek on stdin or stdout even provided we
- * haven't been flushing.
- */
- int mfseek(mFILE *mf, long offset, int whence) {
- switch (whence) {
- case SEEK_SET:
- mf->offset = offset;
- break;
- case SEEK_CUR:
- mf->offset += offset;
- break;
- case SEEK_END:
- mf->offset = mf->size + offset;
- break;
- default:
- errno = EINVAL;
- return -1;
- }
- mf->eof = 0;
- return 0;
- }
- long mftell(mFILE *mf) {
- return mf->offset;
- }
- void mrewind(mFILE *mf) {
- mf->offset = 0;
- mf->eof = 0;
- }
- /*
- * mftruncate is not directly a translation of ftruncate as the latter
- * takes a file descriptor instead of a FILE *. It performs the analogous
- * role though.
- *
- * If offset is -1 then the file is truncated to be the current file
- * offset.
- */
- void mftruncate(mFILE *mf, long offset) {
- mf->size = offset != -1 ? offset : mf->offset;
- if (mf->offset > mf->size)
- mf->offset = mf->size;
- }
- int mfeof(mFILE *mf) {
- return mf->eof;
- }
- /*
- * mFILE read/write functions. Basically these turn fread/fwrite syntax
- * into memcpy statements, with appropriate memory handling for writing.
- */
- size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
- size_t len;
- char *cptr = (char *)ptr;
-
- if (mf == m_channel[0]) init_mstdin();
- if (mf->size <= mf->offset)
- return 0;
- len = size * nmemb <= mf->size - mf->offset
- ? size * nmemb
- : mf->size - mf->offset;
- if (!size)
- return 0;
- memcpy(cptr, &mf->data[mf->offset], len);
- mf->offset += len;
-
- if (len != size * nmemb) {
- mf->eof = 1;
- }
- return len / size;
- }
- size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
- if (!(mf->mode & MF_WRITE))
- return 0;
- /* Append mode => forced all writes to end of file */
- if (mf->mode & MF_APPEND)
- mf->offset = mf->size;
- /* Make sure we have enough room */
- while (size * nmemb + mf->offset > mf->alloced) {
- size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024;
- void * new_data = realloc(mf->data, new_alloced);
- if (NULL == new_data) return 0;
- mf->alloced = new_alloced;
- mf->data = new_data;
- }
- /* Record where we need to reflush from */
- if (mf->offset < mf->flush_pos)
- mf->flush_pos = mf->offset;
- /* Copy the data over */
- memcpy(&mf->data[mf->offset], ptr, size * nmemb);
- mf->offset += size * nmemb;
- if (mf->size < mf->offset)
- mf->size = mf->offset;
- return nmemb;
- }
- int mfgetc(mFILE *mf) {
- if (mf == m_channel[0]) init_mstdin();
- if (mf->offset < mf->size) {
- return (unsigned char)mf->data[mf->offset++];
- }
- mf->eof = 1;
- return -1;
- }
- int mungetc(int c, mFILE *mf) {
- if (mf->offset > 0) {
- mf->data[--mf->offset] = c;
- return c;
- }
-
- mf->eof = 1;
- return -1;
- }
- char *mfgets(char *s, int size, mFILE *mf) {
- int i;
- if (mf == m_channel[0]) init_mstdin();
- *s = 0;
- for (i = 0; i < size-1;) {
- if (mf->offset < mf->size) {
- s[i] = mf->data[mf->offset++];
- if (s[i++] == '\n')
- break;
- } else {
- mf->eof = 1;
- break;
- }
- }
- s[i] = 0;
- return i ? s : NULL;
- }
- /*
- * Flushes an mFILE. If this is a real open of a file in write mode then
- * mFILE->fp will be set. We then write out any new data in mFILE since the
- * last flush. We cannot tell what may have been modified as we don't keep
- * track of that, so we typically rewrite out the entire file contents between
- * the last flush_pos and the end of file.
- *
- * For stderr/stdout we also reset the offsets so we cannot modify things
- * we've already output.
- */
- int mfflush(mFILE *mf) {
- if (!mf->fp)
- return 0;
- /* FIXME: only do this when opened in write mode */
- if (mf == m_channel[1] || mf == m_channel[2]) {
- if (mf->flush_pos < mf->size) {
- size_t bytes = mf->size - mf->flush_pos;
- if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)
- return -1;
- if (0 != fflush(mf->fp))
- return -1;
- }
- /* Stdout & stderr are non-seekable streams so throw away the data */
- mf->offset = mf->size = mf->flush_pos = 0;
- }
- /* only flush when opened in write mode */
- if (mf->mode & MF_WRITE) {
- if (mf->flush_pos < mf->size) {
- size_t bytes = mf->size - mf->flush_pos;
- if (!(mf->mode & MF_MODEX)) {
- fseek(mf->fp, mf->flush_pos, SEEK_SET);
- }
- if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)
- return -1;
- if (0 != fflush(mf->fp))
- return -1;
- }
- if (ftell(mf->fp) != -1 &&
- ftruncate(fileno(mf->fp), ftell(mf->fp)) == -1)
- return -1;
- mf->flush_pos = mf->size;
- }
- return 0;
- }
- /*
- * A wrapper around vsprintf() to write to an mFILE. This also uses vflen() to
- * estimate how many additional bytes of storage will be required for the
- * vsprintf to work.
- */
- int mfprintf(mFILE *mf, char *fmt, ...) {
- int ret;
- size_t est_length;
- va_list args;
- va_start(args, fmt);
- est_length = vflen(fmt, args);
- va_end(args);
- while (est_length + mf->offset > mf->alloced) {
- size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024;
- void * new_data = realloc(mf->data, new_alloced);
- if (NULL == new_data) return -1;
- mf->alloced = new_alloced;
- mf->data = new_data;
- }
- va_start(args, fmt);
- ret = vsprintf(&mf->data[mf->offset], fmt, args);
- va_end(args);
- if (ret > 0) {
- mf->offset += ret;
- if (mf->size < mf->offset)
- mf->size = mf->offset;
- }
- if (mf->fp == stderr) {
- /* Auto-flush for stderr */
- if (0 != mfflush(mf)) return -1;
- }
- return ret;
- }
- /*
- * Converts an mFILE from binary to ascii mode by replacing all
- * cr-nl with nl.
- *
- * Primarily used on windows when we've uncompressed a binary file which
- * happens to be a text file (eg Experiment File). Previously we would have
- * seeked back to the start and used _setmode(fileno(fp), _O_TEXT).
- *
- * Side effect: resets offset and flush_pos back to the start.
- */
- void mfascii(mFILE *mf) {
- size_t p1, p2;
- for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) {
- if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') {
- p2--; /* delete the \r */
- }
- mf->data[p2] = mf->data[p1];
- }
- mf->size = p2;
- mf->offset = mf->flush_pos = 0;
- }
|