Revision: 21582
Author: gervandiepen
Date: Tue Mar 24 14:43:31 2015 UTC
Log: Split off base class MultiFileBase from MultiFile
Create MultiHDF5
https://code.google.com/p/casacore/source/detail?r=21582
Added:
/branches/nov14/casa/IO/MultiFileBase.cc
/branches/nov14/casa/IO/MultiFileBase.h
/branches/nov14/casa/IO/MultiHDF5.cc
/branches/nov14/casa/IO/MultiHDF5.h
/branches/nov14/casa/IO/test/tMultiHDF5.cc
Modified:
/branches/nov14/casa/CMakeLists.txt
/branches/nov14/casa/HDF5/HDF5DataSet.cc
/branches/nov14/casa/HDF5/HDF5DataSet.h
/branches/nov14/casa/IO/AipsIO.cc
/branches/nov14/casa/IO/AipsIO.h
/branches/nov14/casa/IO/BucketFile.cc
/branches/nov14/casa/IO/BucketFile.h
/branches/nov14/casa/IO/MFFileIO.cc
/branches/nov14/casa/IO/MFFileIO.h
/branches/nov14/casa/IO/MultiFile.cc
/branches/nov14/casa/IO/MultiFile.h
/branches/nov14/casa/IO/test/CMakeLists.txt
/branches/nov14/casa/IO/test/tAipsIO.cc
/branches/nov14/casa/IO/test/tAipsIO.out
/branches/nov14/casa/IO/test/tMFFileIO.cc
/branches/nov14/casa/IO/test/tMultiFile.cc
=======================================
--- /dev/null
+++ /branches/nov14/casa/IO/MultiFileBase.cc Tue Mar 24 14:43:31 2015 UTC
@@ -0,0 +1,288 @@
+//# MultiFileBase.cc: Class to combine multiple files in a single one
+//# Copyright (C) 2014
+//# Associated Universities, Inc. Washington DC, USA.
+//#
+//# This library is free software; you can redistribute it and/or modify it
+//# under the terms of the GNU Library General Public License as published
by
+//# the Free Software Foundation; either version 2 of the License, or (at
your
+//# option) any later version.
+//#
+//# This library is distributed in the hope that it will be useful, but
WITHOUT
+//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+//# License for more details.
+//#
+//# You should have received a copy of the GNU Library General Public
License
+//# along with this library; if not, write to the Free Software Foundation,
+//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
+//#
+//# Correspondence concerning AIPS++ should be addressed as follows:
+//# Internet email:
aips2-...@nrao.edu.
+//# Postal address: AIPS++ Project Office
+//# National Radio Astronomy Observatory
+//# 520 Edgemont Road
+//# Charlottesville, VA 22903-2475 USA
+//#
+//# $Id: RegularFileIO.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
+
+//# Includes
+#include <casacore/casa/IO/MultiFileBase.h>
+#include <casacore/casa/OS/Path.h>
+#include <casacore/casa/BasicSL/STLIO.h>
+#include <casacore/casa/Utilities/Assert.h>
+#include <casacore/casa/Exceptions/Error.h>
+#include <casacore/casa/OS/File.h> // for fileFSTAT
+#include <sys/stat.h> // needed for stat or stat64
+
+namespace casacore { //# NAMESPACE CASACORE - BEGIN
+
+ void operator<< (ostream& ios, const MultiFileInfo& info)
+ { ios <<
info.name << ' ' << info.blockNrs << ' ' << info.fsize << ' '
+ << info.curBlock << ' ' << info.dirty << endl; }
+ void operator<< (AipsIO& ios, const MultiFileInfo& info)
+ { ios <<
info.name << info.blockNrs << info.fsize; }
+ void operator>> (AipsIO& ios, MultiFileInfo& info)
+ { ios >>
info.name >> info.blockNrs >> info.fsize; }
+
+
+ MultiFileBase::MultiFileBase (const String& name, ByteIO::OpenOption
option,
+ Int blockSize)
+ : itsBlockSize (blockSize),
+ itsNrBlock (0),
+ itsHdrCounter (0),
+ itsChanged (False)
+ {
+ itsName = Path(name).expandedName();
+ if (option == ByteIO::New || option == ByteIO::NewNoReplace) {
+ // New file.
+ itsChanged = True;
+ // Use file system block size, but not less than given size.
+ if (itsBlockSize <= 0) {
+ struct fileSTAT sfs;
+ fileFSTAT (itsFD, &sfs);
+ Int64 blksz = sfs.st_blksize;
+ itsBlockSize = std::max (-itsBlockSize, blksz);
+ }
+ AlwaysAssert (itsBlockSize > 0, AipsError);
+ }
+ }
+
+ MultiFileBase::~MultiFileBase()
+ {
+ itsInfo.clear();
+ }
+
+ uInt MultiFileBase::nfile() const
+ {
+ Int nf = 0;
+ for (vector<MultiFileInfo>::const_iterator iter=itsInfo.begin();
+ iter!=itsInfo.end(); ++iter) {
+ if (! iter->name.empty()) {
+ nf++;
+ }
+ }
+ return nf;
+ }
+
+ void MultiFileBase::flush()
+ {
+ // Flush all buffers if needed.
+ for (vector<MultiFileInfo>::iterator iter=itsInfo.begin();
+ iter!=itsInfo.end(); ++iter) {
+ if (iter->dirty) {
+ writeDirty (*iter);
+ }
+ }
+ // Header only needs to be written if blocks were added since last
flush.
+ if (itsChanged) {
+ writeHeader();
+ itsChanged = False;
+ }
+ }
+
+ Int64 MultiFileBase::read (Int fileId, void* buf,
+ Int64 size, Int64 offset)
+ {
+ if (fileId >= Int(itsInfo.size()) || itsInfo[fileId].name.empty()) {
+ throw AipsError ("MultiFileBase::read - invalid fileId given");
+ }
+ char* buffer = static_cast<char*>(buf);
+ MultiFileInfo& info = itsInfo[fileId];
+ // Determine the logical block to read and the start offset in that
block.
+ Int64 nrblk = (info.fsize + itsBlockSize - 1) / itsBlockSize;
+ Int64 blknr = offset/itsBlockSize;
+ Int64 start = offset - blknr*itsBlockSize;
+ Int64 done = 0;
+ Int64 szdo = std::min(size, info.fsize - offset); // not past EOF
+ // Read until done.
+ while (done < szdo) {
+ AlwaysAssert (blknr < nrblk, AipsError);
+ Int64 todo = std::min(szdo-done, itsBlockSize-start);
+ // If already in buffer, copy from there.
+ if (blknr == info.curBlock) {
+ memcpy (buffer, &(info.buffer[start]), todo);
+ } else {
+ // Read directly into buffer if it fits exactly.
+ if (todo == itsBlockSize) {
+ readBlock (info, blknr, buffer);
+ } else {
+ if (info.dirty) {
+ writeDirty (info);
+ }
+ // Read into file buffer and copy correct part.
+ readBlock (info, blknr, &(info.buffer[0]));
+ info.curBlock = blknr;
+ memcpy (buffer, &(info.buffer[start]), todo);
+ }
+ }
+ // Increment counters.
+ done += todo;
+ buffer += todo;
+ blknr++;
+ start = 0;
+ }
+ return done;
+ }
+
+ Int64 MultiFileBase::write (Int fileId, const void* buf,
+ Int64 size, Int64 offset)
+ {
+ if (fileId >= Int(itsInfo.size()) || itsInfo[fileId].name.empty()) {
+ throw AipsError ("MultiFileBase::write - invalid fileId given");
+ }
+ const char* buffer = static_cast<const char*>(buf);
+ AlwaysAssert (itsWritable, AipsError);
+ MultiFileInfo& info = itsInfo[fileId];
+ // Determine the logical block to write and the start offset in that
block.
+ Int64 blknr = offset/itsBlockSize;
+ Int64 start = offset - blknr*itsBlockSize;
+ Int64 done = 0;
+ // If beyond EOF, add blocks as needed.
+ Int64 lastblk = blknr + (start+size+itsBlockSize-1) / itsBlockSize;
+ Int64 curnrb = (info.fsize+itsBlockSize-1) / itsBlockSize;
+ if (lastblk >= curnrb) {
+ extend (info, lastblk);
+ itsChanged = True;
+ }
+ // Write until all done.
+ while (done < size) {
+ Int64 todo = std::min(size-done, itsBlockSize-start);
+ // Favor sequential writing, thus write current buffer first.
+ if (blknr == info.curBlock) {
+ memcpy (&(info.buffer[start]), buffer, todo);
+ info.dirty = True;
+ if (done+todo > size) {
+ writeDirty (info);
+ }
+ } else if (todo == itsBlockSize) {
+ // Write directly from buffer if it fits exactly.
+ writeBlock (info, blknr, buffer);
+ } else {
+ // Write into temporary buffer and copy correct part.
+ // First write possibly dirty buffer.
+ if (info.dirty) {
+ writeDirty (info);
+ }
+ if (blknr >= curnrb) {
+ memset (&(info.buffer[0]), 0, itsBlockSize);
+ } else {
+ readBlock (info, blknr, &(info.buffer[0]));
+ }
+ info.curBlock = blknr;
+ memcpy (&(info.buffer[start]), buffer, todo);
+ info.dirty = True;
+ }
+ done += todo;
+ buffer += todo;
+ blknr++;
+ start = 0;
+ }
+ if (offset+size > info.fsize) {
+ info.fsize = offset+size;
+ }
+ return done;
+ }
+
+ void MultiFileBase::resync()
+ {
+ AlwaysAssert (!itsChanged, AipsError);
+ // Clear all blocknrs.
+ for (vector<MultiFileInfo>::iterator iter=itsInfo.begin();
+ iter!=itsInfo.end(); ++iter) {
+ AlwaysAssert (!iter->dirty, AipsError);
+ iter->curBlock = -1;
+ }
+ readHeader();
+ }
+
+ Int MultiFileBase::addFile (const String& fname)
+ {
+ if (fname.empty()) {
+ throw AipsError("MultiFileBase::addFile - empty file name given");
+ }
+ // Only use the basename part (to avoid directory rename problems).
+ String bname = Path(fname).baseName();
+ // Check that file name is not used yet.
+ // Also determine (last) free file slot.
+ uInt inx = itsInfo.size();
+ uInt i = 0;
+ for (vector<MultiFileInfo>::iterator iter=itsInfo.begin();
+ iter!=itsInfo.end(); ++iter, ++i) {
+ if (iter->name.empty()) {
+ inx = i; // free file slot
+ } else if (bname == iter->name) {
+ throw AipsError ("MultiFileBase::addFile - file name " + bname +
+ " already in use");
+ }
+ }
+ // Add a new file entry if needed.
+ if (inx == itsInfo.size()) {
+ itsInfo.resize (inx+1);
+ }
+ itsInfo[inx] = MultiFileInfo(itsBlockSize);
+ itsInfo[inx].name = bname;
+ doAddFile (itsInfo[inx]);
+ itsChanged = True;
+ return inx;
+ }
+
+ Int MultiFileBase::fileId (const String& fname, Bool throwExcp) const
+ {
+ // Only use the basename part (to avoid directory rename problems).
+ String bname = Path(fname).baseName();
+ for (size_t i=0; i<itsInfo.size(); ++i) {
+ if (bname == itsInfo[i].name) {
+ return i;
+ }
+ }
+ if (throwExcp) {
+ throw AipsError ("MultiFileBase::fileId - file name " + fname +
+ " is unknown");
+ }
+ return -1;
+ }
+
+ void MultiFileBase::deleteFile (Int fileId)
+ {
+ if (fileId >= Int(itsInfo.size()) || itsInfo[fileId].name.empty()) {
+ throw AipsError ("MultiFileBase::deleteFile - invalid fileId given");
+ }
+ MultiFileInfo& info = itsInfo[fileId];
+ doDeleteFile (info);
+ // Clear this slot.
+ info = MultiFileInfo();
+ itsChanged = True;
+ }
+
+
+
+ MultiFileInfo::MultiFileInfo (Int64 bufSize)
+ : curBlock (-1),
+ fsize (0),
+ dirty (False)
+ {
+ buffer.resize (bufSize);
+ }
+
+
+} //# NAMESPACE CASACORE - END
=======================================
--- /dev/null
+++ /branches/nov14/casa/IO/MultiFileBase.h Tue Mar 24 14:43:31 2015 UTC
@@ -0,0 +1,250 @@
+//# MultiFileBase.h: Abstract base class to combine multiple files in a
single one
+//# Copyright (C) 2014
+//# Associated Universities, Inc. Washington DC, USA.
+//#
+//# This library is free software; you can redistribute it and/or modify it
+//# under the terms of the GNU Library General Public License as published
by
+//# the Free Software Foundation; either version 2 of the License, or (at
your
+//# option) any later version.
+//#
+//# This library is distributed in the hope that it will be useful, but
WITHOUT
+//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+//# License for more details.
+//#
+//# You should have received a copy of the GNU Library General Public
License
+//# along with this library; if not, write to the Free Software Foundation,
+//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
+//#
+//# Correspondence concerning AIPS++ should be addressed as follows:
+//# Internet email:
aips2-...@nrao.edu.
+//# Postal address: AIPS++ Project Office
+//# National Radio Astronomy Observatory
+//# 520 Edgemont Road
+//# Charlottesville, VA 22903-2475 USA
+//#
+//# $Id: RegularFileIO.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
+
+#ifndef CASA_MULTIFILEBASE_H
+#define CASA_MULTIFILEBASE_H
+
+//# Includes
+#include <casacore/casa/aips.h>
+#include <casacore/casa/IO/ByteIO.h>
+#include <casacore/casa/BasicSL/String.h>
+#include <casacore/casa/Utilities/CountedPtr.h>
+#include <casacore/casa/vector.h>
+#include <casacore/casa/ostream.h>
+
+
+namespace casacore { //# NAMESPACE CASACORE - BEGIN
+
+ //# Forward declaration.
+ class AipsIO;
+ class HDF5Group;
+ class HDF5DataSet;
+
+
+ // <summary>
+ // Helper class for MultiFileBase containing info per internal file
+ // </summary>
+ // <use visibility=local>
+ struct MultiFileInfo {
+ explicit MultiFileInfo (Int64 bufSize=0);
+ vector<Int64> blockNrs; // physical blocknrs for this logical file
+ vector<char> buffer; // buffer holding a data block
+ Int64 curBlock; // the data block held in buffer (<0 is
none)
+ Int64 fsize; // file size (in bytes)
+ String name; // the virtual file name
+ Bool dirty; // has data in buffer been changed?
+ CountedPtr<HDF5Group> group;
+ CountedPtr<HDF5DataSet> dataSet;
+ };
+ void operator<< (ostream&, const MultiFileInfo&);
+ void operator<< (AipsIO&, const MultiFileInfo&);
+ void operator>> (AipsIO&, MultiFileInfo&);
+
+
+ // <summary>
+ // Abstract base class to combine multiple files in a single one.
+ // </summary>
+
+ // <use visibility=export>
+
+ // <reviewed reviewer="" date="" tests="tMultiFile" demos="">
+ // </reviewed>
+
+ // <synopsis>
+ // This class is a container file holding multiple virtual files. It is
+ // primarily meant as a container file for the storage manager files of a
+ // table to reduce the number of files used (especially for Lustre) and
to
+ // reduce the number of open files (especially when concatenating
tables).
+ // <br>A secondary goal is offering the ability to use an IO buffer size
+ // that matches the file system well (large buffer size for e.g. ZFS).
+ //
+ // The SetupNewTable constructor has a StorageOption argument to define
+ // if a MultiFile has to be used and if so, the buffer size to use.
+ // It is also possible to specify that through aipsrc variables.
+ //
+ // A virtual file is spread over multiple (fixed size) data blocks in the
+ // MultiFile. A data block is never shared by multiple files.
+ // For each virtual file MultiFile keeps a MultiFileInfo object telling
+ // the file size and the blocks numbers used for the file. When flushing
+ // the MultiFile, this meta info is written into a header block and,
+ // if needed, continuation blocks. On open and resync, it is read back.
+ // <br>
+ //
+ // A virtual file is represented by an MFFileIO object, which is derived
+ // from ByteIO and as such part of the casacore IO framework. It makes it
+ // possible for applications to access a virtual file in the same way as
+ // a regular file.
+ //
+ // It is possible to delete a virtual file. Its blocks will be added to
+ // the free block list (which is also stored in the meta info).
+ // </synopsis>
+
+ // <example>
+ // In principle it is possible to use the MultiFile functions directly.
+ // However, in general it is much easier to use an MFFileIO object
+ // per virtual file as shown below.
+ // <srcblock>
+ // // Create a new MultiFile using a block size of 1 MB.
+ // MultiFile mfile("file.mf', ByteIO::New, 1048576);
+ // // Create a virtual file in it.
+ // MFFileIO mf1(mfile, "mf1", ByteIO::New);
+ // // Use it (for example) as the sink of AipsIO.
+ // AipsIO stream (&mf1);
+ // // Write values.
+ // stream << (Int)10;
+ // stream << True;
+ // // Seek to beginning of file and read data in.
+ // stream.setpos (0);
+ // Int vali;
+ // Bool valb;
+ // stream >> vali >> valb;
+ // </srcblock>
+ // </example>
+
+ // <todo>
+ // <li> write headers at alternating file positions (for robustness)
+ // <li> possibly write headers entirely at the end if larger than
blocksize
+ // </todo>
+
+
+ class MultiFileBase
+ {
+ public:
+ // Open or create a MultiFileBase with the given name.
+ // Upon creation the block size can be given. If 0, it uses the block
size
+ // of the file system the file is on.
+ MultiFileBase (const String& name, ByteIO::OpenOption, Int
blockSize=0);
+
+ // The destructor flushes and closes the file.
+ virtual ~MultiFileBase();
+
+ // Return the file id of a file in the MultiFileBase object.
+ // If the name is unknown, an exception is thrown if throwExcp is set.
+ // Otherwise it returns -1.
+ Int fileId (const String& name, Bool throwExcp=True) const;
+
+ // Add a file to the MultiFileBase object. It returns the file id.
+ // Only the base name of the given file name is used. In this way the
+ // MultiFileBase container file can be moved.
+ Int addFile (const String& name);
+
+ // Delete a file. It adds its blocks to the free block list.
+ void deleteFile (Int fileId);
+
+ // Read a block at the given offset. It returns the actual size read.
+ Int64 read (Int fileId, void* buffer, Int64 size, Int64 offset);
+
+ // Write a block at the given offset. It returns the actual size
written.
+ Int64 write (Int fileId, const void* buffer, Int64 size, Int64 offset);
+
+ // Flush the file by writing all dirty data and all header info.
+ void flush();
+
+ // Resync with another process by clearing the buffers and rereading
+ // the header. The header is only read if its counter has changed.
+ void resync();
+
+ // Reopen the underlying file for read/write access.
+ // Nothing will be done if the file is writable already.
+ // Otherwise it will be reopened and an exception will be thrown
+ // if it is not possible to reopen it for read/write access.
+ virtual void reopenRW() = 0;
+
+ // Fsync the file (i.e., force the data to be physically written).
+ virtual void fsync() = 0;
+
+ // Get the file name of the MultiFileBase.
+ String fileName() const
+ { return itsName; }
+
+ // Is the file writable?
+ Bool isWritable() const
+ { return itsWritable; }
+
+ // Get the block size used.
+ Int64 blockSize() const
+ { return itsBlockSize; }
+
+ // Get the nr of virtual files.
+ uInt nfile() const;
+
+ // Get the total nr of data blocks used.
+ Int64 size() const
+ { return itsNrBlock; }
+
+ // Get the info object (for test purposes mainly).
+ const vector<MultiFileInfo>& info() const
+ { return itsInfo; }
+
+ // Get the free blocks (for test purposes mainly).
+ const vector<Int64>& freeBlocks() const
+ { return itsFreeBlocks; }
+
+ private:
+ void writeDirty (MultiFileInfo& info)
+ {
+ writeBlock (info, info.curBlock, &(info.buffer[0]));
+ info.dirty = False;
+ }
+
+ // Do the class-specific actions on adding a file.
+ virtual void doAddFile (MultiFileInfo&) = 0;
+ // Do the class-specific actions on deleting a file.
+ virtual void doDeleteFile (MultiFileInfo&) = 0;
+ // Flush and close the file.
+ virtual void close() = 0;
+ // Write the header info.
+ virtual void writeHeader() = 0;
+ // Read the header info. If always==False, the info is only read if the
+ // header counter has changed.
+ virtual void readHeader (Bool always=True) = 0;
+ // Extend the virtual file to fit lastblk.
+ virtual void extend (MultiFileInfo& info, Int64 lastblk) = 0;
+ // Write a data block.
+ virtual void writeBlock (MultiFileInfo& info, Int64 blknr,
+ const void* buffer) = 0;
+ // Read a data block.
+ virtual void readBlock (MultiFileInfo& info, Int64 blknr,
+ void* buffer) = 0;
+
+ protected:
+ //# Data members
+ String itsName;
+ Int64 itsBlockSize; // The blocksize used
+ Int64 itsNrBlock; // The total nr of blocks actually used
+ Int64 itsHdrCounter; // Counter of header changes
+ vector<MultiFileInfo> itsInfo;
+ int itsFD;
+ Bool itsWritable; // Is the file writable?
+ Bool itsChanged; // Has header info changed since
last flush?
+ vector<Int64> itsFreeBlocks;
+ };
+
+
+} //# NAMESPACE CASACORE - END
+
+#endif
=======================================
--- /dev/null
+++ /branches/nov14/casa/IO/MultiHDF5.cc Tue Mar 24 14:43:31 2015 UTC
@@ -0,0 +1,153 @@
+//# MultiHDF5.cc: Class to combine multiple files in a single HDF5 file
+//# Copyright (C) 2015
+//# Associated Universities, Inc. Washington DC, USA.
+//#
+//# This library is free software; you can redistribute it and/or modify it
+//# under the terms of the GNU Library General Public License as published
by
+//# the Free Software Foundation; either version 2 of the License, or (at
your
+//# option) any later version.
+//#
+//# This library is distributed in the hope that it will be useful, but
WITHOUT
+//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+//# License for more details.
+//#
+//# You should have received a copy of the GNU Library General Public
License
+//# along with this library; if not, write to the Free Software Foundation,
+//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
+//#
+//# Correspondence concerning AIPS++ should be addressed as follows:
+//# Internet email:
aips2-...@nrao.edu.
+//# Postal address: AIPS++ Project Office
+//# National Radio Astronomy Observatory
+//# 520 Edgemont Road
+//# Charlottesville, VA 22903-2475 USA
+//#
+//# $Id: RegularFileIO.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
+
+//# Includes
+#include <casacore/casa/IO/MultiHDF5.h>
+#include <casacore/casa/HDF5/HDF5Group.h>
+#include <casacore/casa/HDF5/HDF5DataSet.h>
+#include <casacore/casa/HDF5/HDF5Record.h>
+#include <casacore/casa/Containers/Record.h>
+
+namespace casacore { //# NAMESPACE CASACORE - BEGIN
+
+ MultiHDF5::MultiHDF5 (const String& name, ByteIO::OpenOption option,
+ Int blockSize)
+ : MultiFileBase (name, option, blockSize),
+ itsFile (itsName, option)
+ {
+ if (option != ByteIO::New && option != ByteIO::NewNoReplace) {
+ readHeader();
+ }
+ itsWritable = itsFile.isWritable();
+ }
+
+ MultiHDF5::~MultiHDF5()
+ {
+ close();
+ }
+
+ void MultiHDF5::close()
+ {
+ flush();
+ itsFile.close();
+ }
+
+ void MultiHDF5::reopenRW()
+ {
+ itsFile.reopenRW();
+ itsWritable = True;
+ }
+
+ void MultiHDF5::fsync()
+ {}
+
+ void MultiHDF5::writeHeader()
+ {
+ Record rec;
+ itsHdrCounter++;
+ rec.define ("blockSize", itsBlockSize);
+ rec.define ("hdrCounter", itsHdrCounter);
+ Vector<String> names(itsInfo.size());
+ Vector<Int64> sizes(itsInfo.size());
+ for (uInt i=0; i<itsInfo.size(); ++i) {
+ names[i] = itsInfo[i].name;
+ sizes[i] = itsInfo[i].fsize;
+ }
+ rec.define ("names", names);
+ rec.define ("sizes", sizes);
+ HDF5Record::writeRecord (itsFile, "__MultiHDF5_Header__", rec);
+ }
+
+ void MultiHDF5::readHeader (Bool always)
+ {
+ Record rec = HDF5Record::readRecord (itsFile, "__MultiHDF5_Header__");
+ itsBlockSize = rec.asInt64 ("blockSize");
+ Int64 hdrCounter = rec.asInt64 ("hdrCounter");
+ // Only if needed, interpret the rest of the header.
+ if (hdrCounter == itsHdrCounter && !always) {
+ return;
+ }
+ itsHdrCounter = hdrCounter;
+ Vector<String> names (rec.asArrayString("names"));
+ Vector<Int64> sizes(rec.asArrayInt64("sizes"));
+ // Set info fields.
+ itsInfo.reserve (names.size());
+ for (uInt i=0; i<names.size(); ++i) {
+ MultiFileInfo info(itsBlockSize);
+
info.name = names[i];
+ info.fsize = sizes[i];
+ if (! info.name.empty()) {
+ info.group.reset (new HDF5Group (itsFile,
info.name, true, false));
+ info.dataSet.reset (new HDF5DataSet (*info.group, "FileData",
+ (const uChar*)0));
+ }
+ itsInfo.push_back (info);
+ }
+ }
+
+ void MultiHDF5::doAddFile (MultiFileInfo& info)
+ {
+ // Create a group and dataset for the file.
+ info.group.reset (new HDF5Group (itsFile,
info.name, false, true));
+ info.dataSet.reset (new HDF5DataSet (*info.group, "FileData",
+ IPosition(2, itsBlockSize, 0),
+ IPosition(2, itsBlockSize, 1),
+ (const uChar*)0));
+ }
+
+ void MultiHDF5::doDeleteFile (MultiFileInfo& info)
+ {
+ // Close the group and dataset.
+ info.dataSet.reset();
+ info.group.reset();
+ // Delete the group.
+ HDF5Group::remove (itsFile,
info.name);
+ }
+
+ void MultiHDF5::extend (MultiFileInfo& info, Int64 lastblk)
+ {
+ info.dataSet->extend (IPosition(2, itsBlockSize, lastblk+1));
+ }
+
+ void MultiHDF5::readBlock (MultiFileInfo& info, Int64 blknr,
+ void* buffer)
+ {
+ Slicer slicer(IPosition(2, 0, blknr),
+ IPosition(2, itsBlockSize, 1));
+ info.dataSet->get (slicer, buffer);
+ }
+
+ void MultiHDF5::writeBlock (MultiFileInfo& info, Int64 blknr,
+ const void* buffer)
+ {
+ Slicer slicer(IPosition(2, 0, blknr),
+ IPosition(2, itsBlockSize, 1));
+ info.dataSet->put (slicer, buffer);
+ }
+
+
+} //# NAMESPACE CASACORE - END
=======================================
--- /dev/null
+++ /branches/nov14/casa/IO/MultiHDF5.h Tue Mar 24 14:43:31 2015 UTC
@@ -0,0 +1,153 @@
+//# MultiHDF5.h: Class to combine multiple files in a single HDF5 file
+//# Copyright (C) 2015
+//# Associated Universities, Inc. Washington DC, USA.
+//#
+//# This library is free software; you can redistribute it and/or modify it
+//# under the terms of the GNU Library General Public License as published
by
+//# the Free Software Foundation; either version 2 of the License, or (at
your
+//# option) any later version.
+//#
+//# This library is distributed in the hope that it will be useful, but
WITHOUT
+//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+//# License for more details.
+//#
+//# You should have received a copy of the GNU Library General Public
License
+//# along with this library; if not, write to the Free Software Foundation,
+//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
+//#
+//# Correspondence concerning AIPS++ should be addressed as follows:
+//# Internet email:
aips2-...@nrao.edu.
+//# Postal address: AIPS++ Project Office
+//# National Radio Astronomy Observatory
+//# 520 Edgemont Road
+//# Charlottesville, VA 22903-2475 USA
+//#
+//# $Id: RegularFileIO.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
+
+#ifndef CASA_MULTIHDF5_H
+#define CASA_MULTIHDF5_H
+
+//# Includes
+#include <casacore/casa/aips.h>
+#include <casacore/casa/IO/MultiFile.h>
+#include <casacore/casa/HDF5/HDF5File.h>
+
+
+namespace casacore { //# NAMESPACE CASACORE - BEGIN
+
+ // <summary>
+ // Class to combine multiple files in a single HDF5 file.
+ // </summary>
+
+ // <use visibility=export>
+
+ // <reviewed reviewer="" date="" tests="tMultiHDF5" demos="">
+ // </reviewed>
+
+ // <synopsis>
+ // This class is a container file holding multiple virtual files. It is
+ // primarily meant as a container file for the storage manager files of a
+ // table to reduce the number of files used (especially for Lustre) and
to
+ // reduce the number of open files (especially when concatenating
tables).
+ // <br>A secondary goal is offering the ability to use an IO buffer size
+ // that matches the file system well (large buffer size for e.g. ZFS).
+ //
+ // The SetupNewTable constructor has a StorageOption argument to define
+ // if a MultiFile has to be used and if so, the buffer size to use.
+ // It is also possible to specify that through aipsrc variables.
+ //
+ // A virtual file is spread over multiple (fixed size) data blocks in the
+ // MultiFile. A data block is never shared by multiple files.
+ // For each virtual file MultiFile keeps a MultiFileInfo object telling
+ // the file size and the blocks numbers used for the file. When flushing
+ // the MultiFile, this meta info is written into a header block and,
+ // if needed, continuation blocks. On open and resync, it is read back.
+ // <br>
+ //
+ // A virtual file is represented by an MFFileIO object, which is derived
+ // from ByteIO and as such part of the casacore IO framework. It makes it
+ // possible for applications to access a virtual file in the same way as
+ // a regular file.
+ //
+ // It is possible to delete a virtual file. Its blocks will be added to
+ // the free block list (which is also stored in the meta info).
+ // </synopsis>
+
+ // <example>
+ // In principle it is possible to use the MultiFile functions directly.
+ // However, in general it is much easier to use an MFFileIO object
+ // per virtual file as shown below.
+ // <srcblock>
+ // // Create a new MultiFile using a block size of 1 MB.
+ // MultiFile mfile("file.mf', ByteIO::New, 1048576);
+ // // Create a virtual file in it.
+ // MFFileIO mf1(mfile, "mf1", ByteIO::New);
+ // // Use it (for example) as the sink of AipsIO.
+ // AipsIO stream (&mf1);
+ // // Write values.
+ // stream << (Int)10;
+ // stream << True;
+ // // Seek to beginning of file and read data in.
+ // stream.setpos (0);
+ // Int vali;
+ // Bool valb;
+ // stream >> vali >> valb;
+ // </srcblock>
+ // </example>
+
+ // <todo>
+ // <li> write headers at alternating file positions (for robustness)
+ // <li> possibly write headers entirely at the end if larger than
blocksize
+ // </todo>
+
+
+ class MultiHDF5 : public MultiFileBase
+ {
+ public:
+ // Open or create a MultiHDF5 with the given name.
+ // Upon creation the block size can be given. If 0, it uses the block
size
+ // of the file system the file is on.
+ MultiHDF5 (const String& name, ByteIO::OpenOption, Int blockSize=0);
+
+ // The destructor flushes and closes the file.
+ virtual ~MultiHDF5();
+
+ // Reopen the underlying file for read/write access.
+ // Nothing will be done if the file is writable already.
+ // Otherwise it will be reopened and an exception will be thrown
+ // if it is not possible to reopen it for read/write access.
+ virtual void reopenRW();
+
+ // Fsync the file (i.e., force the data to be physically written).
+ virtual void fsync();
+
+ private:
+ // Do the class-specific actions on adding a file.
+ virtual void doAddFile (MultiFileInfo&);
+ // Do the class-specific actions on deleting a file.
+ virtual void doDeleteFile (MultiFileInfo&);
+ // Flush and close the file.
+ virtual void close();
+ // Write the header info.
+ virtual void writeHeader();
+ // Read the header info. If always==False, the info is only read if the
+ // header counter has changed.
+ virtual void readHeader (Bool always=True);
+ // Extend the virtual file to fit lastblk.
+ virtual void extend (MultiFileInfo& info, Int64 lastblk);
+ // Read a data block.
+ virtual void readBlock (MultiFileInfo& info, Int64 blknr,
+ void* buffer);
+ // Write a data block.
+ virtual void writeBlock (MultiFileInfo& info, Int64 blknr,
+ const void* buffer);
+
+ //# Data members
+ HDF5File itsFile;
+ };
+
+
+} //# NAMESPACE CASACORE - END
+
+#endif
=======================================
--- /dev/null
+++ /branches/nov14/casa/IO/test/tMultiHDF5.cc Tue Mar 24 14:43:31 2015 UTC
@@ -0,0 +1,298 @@
+//# tMultiHDF5.cc: Test program for class MultiHDF5
+//# Copyright (C) 2014
+//# Associated Universities, Inc. Washington DC, USA.
+//#
+//# This library is free software; you can redistribute it and/or modify it
+//# under the terms of the GNU Library General Public License as published
by
+//# the Free Software Foundation; either version 2 of the License, or (at
your
+//# option) any later version.
+//#
+//# This library is distributed in the hope that it will be useful, but
WITHOUT
+//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+//# License for more details.
+//#
+//# You should have received a copy of the GNU Library General Public
License
+//# along with this library; if not, write to the Free Software Foundation,
+//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
+//#
+//# Correspondence concerning AIPS++ should be addressed as follows:
+//# Internet email:
aips2-...@nrao.edu.
+//# Postal address: AIPS++ Project Office
+//# National Radio Astronomy Observatory
+//# 520 Edgemont Road
+//# Charlottesville, VA 22903-2475 USA
+//#
+//# $Id: RegularFileIO.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
+
+//# Includes
+#include <casacore/casa/IO/MultiHDF5.h>
+#include <casacore/casa/IO/MultiHDF5.h>
+#include <casacore/casa/HDF5/HDF5Object.h>
+#include <casacore/casa/Utilities/Assert.h>
+#include <casacore/casa/Arrays/Vector.h>
+#include <casacore/casa/Arrays/ArrayMath.h>
+#include <casacore/casa/Arrays/ArrayLogical.h>
+#include <casacore/casa/Arrays/ArrayIO.h>
+#include <casacore/casa/BasicSL/STLIO.h>
+#include <casacore/casa/OS/Timer.h>
+#include <iostream>
+#include <stdexcept>
+
+using namespace casacore;
+using namespace std;
+
+void showMultiFile (MultiFileBase& mfile)
+{
+ cout << mfile.fileName() << ' ' << mfile.blockSize() << ' '
+ << mfile.nfile() << ' ' << mfile.size() << ' '
+ << mfile.freeBlocks() << endl;
+}
+
+void makeFile (Int64 blockSize)
+{
+ MultiHDF5 mfile("tMultiHDF5_tmp.dat", ByteIO::New, blockSize);
+ AlwaysAssertExit (mfile.isWritable());
+ showMultiFile(mfile);
+}
+
+void readFile()
+{
+ MultiHDF5 mfile("tMultiHDF5_tmp.dat", ByteIO::Old);
+ AlwaysAssertExit (! mfile.isWritable());
+ showMultiFile(mfile);
+ for (uInt i=0; i<
mfile.info().size(); ++i) {
+ String nm = "file" + String::toString(i);
+ cout << nm << ' ' << mfile.fileId(nm, False) << endl;
+ }
+}
+
+void addFiles()
+{
+ MultiHDF5 mfile("tMultiHDF5_tmp.dat", ByteIO::Update);
+ AlwaysAssertExit (mfile.isWritable());
+ Int fid0 = mfile.addFile ("file0");
+ Int fid1 = mfile.addFile ("file1");
+ Int fid2 = mfile.addFile ("file2");
+ AlwaysAssertExit (mfile.nfile()==3 && fid0==0 && fid1==1 && fid2==2);
+ showMultiFile(mfile);
+}
+
+void writeFiles1()
+{
+ MultiHDF5 mfile("tMultiHDF5_tmp.dat", ByteIO::Update);
+ Vector<Int64> buf(128);
+ indgen(buf);
+ mfile.write (0, buf.data(), 1024, 0);
+ buf += Int64(128);
+ mfile.write (2, buf.data(), 1024, 0);
+ buf += Int64(128);
+ mfile.write (0, buf.data(), 1024, 1024);
+ buf += Int64(128);
+ mfile.write (0, buf.data(), 1024, 2048);
+ buf += Int64(128);
+ mfile.write (1, buf.data(), 1024, 1024);
+ buf += Int64(128);
+ mfile.write (2, buf.data(), 1024, 1024);
+ cout <<
mfile.info() << endl;
+}
+
+void checkFiles1 (Bool do1=True)
+{
+ MultiHDF5 mfile("tMultiHDF5_tmp.dat", ByteIO::Old);
+ Vector<Int64> buf1(128), buf(128),buff(3*128);
+ indgen(buf1);
+ mfile.read (0, buf.data(), 1024, 0);
+ AlwaysAssertExit (allEQ(buf, buf1));
+ buf1 += Int64(128);
+ mfile.read (2, buf.data(), 1024, 0);
+ if (!allEQ(buf, buf1)) {
+ cout << buf << endl;
+ }
+ AlwaysAssertExit (allEQ(buf, buf1));
+ buf1 += Int64(128);
+ mfile.read (0, buf.data(), 1024, 1024);
+ AlwaysAssertExit (allEQ(buf, buf1));
+ buf1 += Int64(128);
+ mfile.read (0, buf.data(), 1024, 2048);
+ AlwaysAssertExit (allEQ(buf, buf1));
+ buf1 += Int64(128);
+ if (do1) {
+ mfile.read (1, buf.data(), 1024, 1024);
+ AlwaysAssertExit (allEQ(buf, buf1));
+ }
+ buf1 += Int64(128);
+ mfile.read (2, buf.data(), 1024, 1024);
+ AlwaysAssertExit (allEQ(buf, buf1));
+ // Check a single read.
+ indgen(buf1);
+ mfile.read (0, buff.data(), 3072, 0);
+ AlwaysAssertExit (allEQ(buff(Slice(0,128)), buf1));
+ AlwaysAssertExit (allEQ(buff(Slice(128,128)), buf1+Int64(256)));
+ AlwaysAssertExit (allEQ(buff(Slice(256,128)), buf1+Int64(384)));
+ mfile.read (0, buff.data(), 3072-24, 8);
+ AlwaysAssertExit (allEQ(buff(Slice(0,127)), buf1(Slice(1,127))));
+ AlwaysAssertExit (allEQ(buff(Slice(127,128)), buf1+Int64(256)));
+ AlwaysAssertExit (allEQ(buff(Slice(255,126)),
buf1(Slice(0,126))+Int64(384)));
+ AlwaysAssertExit (buff[380]==509 && buff[381]==509); // check not
overwritten
+}
+
+void deleteFile()
+{
+ cout <<"test deleteFile"<<endl;
+ {
+ MultiHDF5 mfile("tMultiHDF5_tmp.dat", ByteIO::Update);
+ mfile.deleteFile (1);
+ cout <<
mfile.info() << endl;
+ }
+ readFile();
+}
+
+void writeFiles2()
+{
+ MultiHDF5 mfile("tMultiHDF5_tmp.dat", ByteIO::Update);
+ Vector<Int64> buf(128), buf1(128);
+ indgen(buf);
+ mfile.write (0, buf.data(), 1016, 8);
+ mfile.read (0, buf1.data(), 1024, 0);
+ AlwaysAssertExit(buf[0]==buf1[0] && allEQ(buf1(Slice(1,127)),
buf(Slice(0,127))));
+ mfile.write (0, buf.data(), 1024, 0);
+ mfile.write (2, buf.data(), 16, 2048);
+ cout <<
mfile.info() << endl;
+}
+
+void checkFiles2()
+{
+ checkFiles1(False);
+ MultiHDF5 mfile("tMultiHDF5_tmp.dat", ByteIO::Old);
+ Vector<Int64> buf1(2), buf(2);
+ indgen(buf1);
+ mfile.read (2, buf.data(), 16, 2048);
+ AlwaysAssertExit (allEQ(buf, buf1));
+}
+
+void timeExact()
+{
+ MultiHDF5 mfile("tMultiHDF5_tmp.dat", ByteIO::New, 32768);
+ Int id = mfile.addFile ("file0");
+ Vector<Int64> buf(32768/8, 0);
+ for (Int j=0; j<2; ++j) {
+ Timer timer;
+ for (uInt i=0; i<1000; ++i) {
+ mfile.write (id, buf.data(), 32768, i*32768);
+ }
+ mfile.fsync();
+ timer.show ("exact ");
+ }
+}
+
+void timeDouble()
+{
+ MultiHDF5 mfile("tMultiHDF5_tmp.dat", ByteIO::New, 16384);
+ Int id = mfile.addFile ("file0");
+ Vector<Int64> buf(32768/8, 0);
+ for (Int j=0; j<2; ++j) {
+ Timer timer;
+ for (uInt i=0; i<1000; ++i) {
+ mfile.write (id, buf.data(), 32768, i*32768);
+ }
+ mfile.fsync();
+ timer.show ("double");
+ }
+}
+
+void timePartly()
+{
+ MultiHDF5 mfile("tMultiHDF5_tmp.dat", ByteIO::New, 32768);
+ Int id = mfile.addFile ("file0");
+ Vector<Int64> buf(16384/8, 0);
+ for (Int j=0; j<2; ++j) {
+ Timer timer;
+ for (uInt i=0; i<2000; ++i) {
+ mfile.write (id, buf.data(), 16384, i*16384);
+ }
+ mfile.fsync();
+ timer.show ("partly");
+ }
+}
+
+void timeMove1()
+{
+ Vector<Int64> buf1(4, 3);
+ Vector<Int64> buf2(4, 0);
+ Timer timer;
+ for (uInt i=0; i<5000000; ++i) {
+ memcpy (buf2.data(), buf1.data(), 8*4);
+ }
+ timer.show ("move1 ");
+}
+
+typedef void* moveFunc(void*, const void*, size_t);
+void* mymemcpy (void* to, const void* from, size_t n)
+ { return memcpy (to, from, n); }
+
+void timeMove2 (moveFunc func)
+{
+ Vector<Int64> buf1(4, 3);
+ Vector<Int64> buf2(4, 0);
+ Timer timer;
+ for (uInt i=0; i<5000000; ++i) {
+ func (buf2.data(), buf1.data(), 8*4);
+ }
+ timer.show ("move2 ");
+}
+
+void timeMove3()
+{
+ Vector<Int64> buf1(4, 3);
+ Vector<Int64> buf2(4, 0);
+ Timer timer;
+ for (uInt i=0; i<5000000; ++i) {
+ for (uInt j=0;j<4; ++j) {
+ buf2.data()[j] = buf1.data()[j];
+ }
+ }
+ timer.show ("move3 ");
+}
+
+void doTest (Int64 blockSize)
+{
+ cout << "MultiHDF5 test with blockSize=" << blockSize << endl;
+ makeFile (blockSize);
+ readFile();
+ addFiles();
+ readFile();
+ writeFiles1();
+ readFile();
+ checkFiles1();
+ deleteFile();
+ writeFiles2();
+ readFile();
+ checkFiles2();
+ cout << endl;
+}
+
+int main()
+{
+ if (! HDF5Object::hasHDF5Support()) {
+ cout << "tMultiHDF5 not run; HDF5 is not supported in casacore build"
+ << endl;
+ return 3;
+ }
+ try {
+ doTest (128); // requires extra header file
+ doTest (1024); // no extra header file
+ timeExact();
+ timeDouble();
+ timePartly();
+ //timeMove1();
+ //timeMove2(memcpy);
+ //timeMove2(mymemcpy);
+ //timeMove3();
+ } catch (std::exception& x) {
+ cout << "Unexpected exception: " << x.what() << endl;
+ return 1;
+ }
+ cout << "OK" << endl;
+ return 0;
+}
=======================================
--- /branches/nov14/casa/CMakeLists.txt Thu Jan 29 12:59:43 2015 UTC
+++ /branches/nov14/casa/CMakeLists.txt Tue Mar 24 14:43:31 2015 UTC
@@ -81,6 +81,8 @@
IO/MMapfdIO.cc
IO/MMapIO.cc
IO/MultiFile.cc
+IO/MultiFileBase.cc
+IO/MultiHDF5.cc
IO/RawIO.cc
IO/RegularFileIO.cc
IO/StreamIO.cc
@@ -420,6 +422,8 @@
IO/MMapfdIO.h
IO/MMapIO.h
IO/MultiFile.h
+IO/MultiFileBase.h
+IO/MultiHDF5.h
IO/RawIO.h
IO/RegularFileIO.h
IO/StreamIO.h
=======================================
--- /branches/nov14/casa/HDF5/HDF5DataSet.cc Wed Dec 10 08:06:42 2014 UTC
+++ /branches/nov14/casa/HDF5/HDF5DataSet.cc Tue Mar 24 14:43:31 2015 UTC
@@ -41,6 +41,14 @@
{
create (parentHid, name, shape, tileShape);
}
+
+ HDF5DataSet::HDF5DataSet (const HDF5Object& parentHid, const String&
name,
+ const IPosition& shape, const IPosition& tileShape,
+ const uChar* type)
+ : itsDataType (type)
+ {
+ create (parentHid, name, shape, tileShape);
+ }
HDF5DataSet::HDF5DataSet (const HDF5Object& parentHid, const String&
name,
const IPosition& shape, const IPosition& tileShape,
@@ -96,6 +104,13 @@
{
open (parentHid, name);
}
+
+ HDF5DataSet::HDF5DataSet (const HDF5Object& parentHid, const String&
name,
+ const uChar* type)
+ : itsDataType (type)
+ {
+ open (parentHid, name);
+ }
HDF5DataSet::HDF5DataSet (const HDF5Object& parentHid, const String&
name,
const Int* type)
@@ -157,7 +172,7 @@
itsTileShape = IPosition(shape.nelements(), 1);
// Trailing elements already have value 1; set the first elements.
for (uInt i=0; i<tileShape.nelements(); ++i) {
- itsTileShape[i] = std::min (tileShape[i], shape[i]);
+ itsTileShape[i] = std::max(ssize_t(1), std::min(tileShape[i],
shape[i]));
}
// Create access property for later setting of cache size.
itsDaplid = H5Pcreate (H5P_DATASET_ACCESS);
@@ -165,7 +180,13 @@
// Create the data space for the array.
int rank = itsShape.nelements();
Block<hsize_t> ls = fromShape (itsShape);
- itsDSid = H5Screate_simple(rank, ls.storage(), NULL);
+ Block<hsize_t> maxls(ls);
+ for (uInt i=0; i<maxls.size(); ++i) {
+ if (maxls[i] == 0) {
+ maxls[i] = H5S_UNLIMITED;
+ }
+ }
+ itsDSid = H5Screate_simple(rank, ls.storage(), maxls.storage());
AlwaysAssert (itsDSid.getHid() >= 0, AipsError);
// Create the properties to hold the tile shape.
itsPLid = H5Pcreate (H5P_DATASET_CREATE);
@@ -326,12 +347,36 @@
NULL, count.storage(), NULL) < 0) {
throw HDF5Error("setting slab of memory buffer");
}
- // Read the data.
+ // Write the data.
if (H5Dwrite (getHid(), itsDataType.getHidMem(), memspace, itsDSid,
H5P_DEFAULT, buf) < 0) {
throw HDF5Error("writing slab into data set array");
}
}
+
+ void HDF5DataSet::extend (const IPosition& shape)
+ {
+ AlwaysAssert (shape.size() == itsShape.size(), AipsError);
+ // Extend the data set if one of the axes is larger than the current
shape.
+ IPosition newShape(itsShape);
+ Bool ext = False;
+ for (uInt i=0; i<shape.size(); ++i) {
+ if (shape[i] > newShape[i]) {
+ newShape[i] = shape[i];
+ ext = True;
+ }
+ }
+ if (ext) {
+ Block<hsize_t> ls = fromShape (newShape);
+ if (H5Dset_extent (getHid(), ls.storage()) < 0) {
+ throw HDF5Error("Could not extend data set");
+ }
+ itsShape = newShape;
+ // The DataSpace has to be refreshed.
+ itsDSid.close();
+ itsDSid = H5Dget_space(getHid());
+ }
+ }
Block<hsize_t> HDF5DataSet::fromShape (const IPosition& shape)
{
@@ -375,6 +420,9 @@
void HDF5DataSet::put (const Slicer&, const void*)
{}
+
+ void HDF5DataSet::extend (const IPosition&)
+ {}
Block<hsize_t> HDF5DataSet::fromShape (const IPosition&)
{ return Block<hsize_t>(); }
=======================================
--- /branches/nov14/casa/HDF5/HDF5DataSet.h Wed Jan 7 09:08:57 2015 UTC
+++ /branches/nov14/casa/HDF5/HDF5DataSet.h Tue Mar 24 14:43:31 2015 UTC
@@ -91,6 +91,8 @@
HDF5DataSet (const HDF5Object&, const String&, const IPosition& shape,
const IPosition& tileShape, const Bool*);
HDF5DataSet (const HDF5Object&, const String&, const IPosition& shape,
+ const IPosition& tileShape, const uChar*);
+ HDF5DataSet (const HDF5Object&, const String&, const IPosition& shape,
const IPosition& tileShape, const Int*);
HDF5DataSet (const HDF5Object&, const String&, const IPosition& shape,
const IPosition& tileShape, const Int64*);
@@ -108,6 +110,7 @@
// It checks if the internal type matches the given type.
// <group>
HDF5DataSet (const HDF5Object&, const String&, const Bool*);
+ HDF5DataSet (const HDF5Object&, const String&, const uChar*);
HDF5DataSet (const HDF5Object&, const String&, const Int*);
HDF5DataSet (const HDF5Object&, const String&, const Int64*);
HDF5DataSet (const HDF5Object&, const String&, const Float*);
@@ -144,6 +147,9 @@
// Put a section of data.
void put (const Slicer&, const void* buf);
+ // Extend the dataset if an axis in the new shape is larger.
+ void extend (const IPosition& shape);
+
// Helper functions to convert shapes.
// It reverses the axes, because HDF5 uses C-order.
// <group>
=======================================
--- /branches/nov14/casa/IO/AipsIO.cc Wed Dec 10 08:06:42 2014 UTC
+++ /branches/nov14/casa/IO/AipsIO.cc Tue Mar 24 14:43:31 2015 UTC
@@ -57,7 +57,7 @@
{}
AipsIO::AipsIO (const String& fileName, ByteIO::OpenOption fop,
- uInt filebufSize, MultiFile* mfile)
+ uInt filebufSize, MultiFileBase* mfile)
: opened_p (0),
maxlev_p (10),
objlen_p (10),
@@ -98,7 +98,7 @@
void AipsIO::open (const String& fileName, ByteIO::OpenOption fop,
- uInt filebufSize, MultiFile* mfile)
+ uInt filebufSize, MultiFileBase* mfile)
{
// Initialize everything for the open.
openInit (fop);
=======================================
--- /branches/nov14/casa/IO/AipsIO.h Wed Dec 10 08:06:42 2014 UTC
+++ /branches/nov14/casa/IO/AipsIO.h Tue Mar 24 14:43:31 2015 UTC
@@ -43,7 +43,7 @@
class TypeIO;
class ByteIO;
class RegularFileIO;
-class MultiFile;
+class MultiFileBase;
// <summary>
@@ -174,13 +174,13 @@
// Construct and open/create a file with the given name.
// The actual IO is done via a CanonicalIO object on a regular file
// using buffered IO with a buffer of the given size.
- // <br>If the MultiFile pointer is not null, a virtual file in the
- // MultiFile will be used instead of a regular file.
+ // <br>If the MultiFileBase pointer is not null, a virtual file in the
+ // MultiFileBase will be used instead of a regular file.
explicit AipsIO (const String& fileName,
ByteIO::OpenOption = ByteIO::Old,
uInt filebufSize=65536,
//// uInt filebufSize=1048576,
- MultiFile* mfile=0);
+ MultiFileBase* mfile=0);
// Construct and open/create a file with the given name.
// This can for instance by used to use AipsIO on a file descriptor
@@ -197,10 +197,10 @@
// Close if not done yet
~AipsIO();
- // Open/create file (either a regular file or a MultiFile virtual
file).
+ // Open/create file (either a regular file or a MultiFileBase virtual
file).
// An exception is thrown if the object contains an already open file.
void open (const String& fileName, ByteIO::OpenOption = ByteIO::Old,
- uInt filebufSize=65536, MultiFile* mfile=0);
+ uInt filebufSize=65536, MultiFileBase* mfile=0);
// Open by connecting to the given byte stream.
// This can for instance by used to use AipsIO on a file descriptor
=======================================
--- /branches/nov14/casa/IO/BucketFile.cc Wed Dec 10 08:06:42 2014 UTC
+++ /branches/nov14/casa/IO/BucketFile.cc Tue Mar 24 14:43:31 2015 UTC
@@ -56,7 +56,7 @@
BucketFile::BucketFile (const String& fileName,
uInt bufSizeFile, Bool mappedFile,
- MultiFile* mfile)
+ MultiFileBase* mfile)
: name_p (Path(fileName).expandedName()),
isWritable_p (True),
isMapped_p (mappedFile),
@@ -81,7 +81,7 @@
BucketFile::BucketFile (const String& fileName, Bool isWritable,
uInt bufSizeFile, Bool mappedFile,
- MultiFile* mfile)
+ MultiFileBase* mfile)
: name_p (Path(fileName).expandedName()),
isWritable_p (isWritable),
isMapped_p (mappedFile),
@@ -162,7 +162,7 @@
{
close();
if (mfile_p) {
- // Remove the file from the MultiFile. Note it might not exist yet.
+ // Remove the file from the MultiFileBase. Note it might not exist
yet.
Int id = mfile_p->fileId (name_p, False);
if (id >= 0) {
mfile_p->deleteFile (id);
=======================================
--- /branches/nov14/casa/IO/BucketFile.h Wed Dec 10 08:06:42 2014 UTC
+++ /branches/nov14/casa/IO/BucketFile.h Tue Mar 24 14:43:31 2015 UTC
@@ -41,7 +41,7 @@
namespace casacore { //# NAMESPACE CASACORE - BEGIN
//# Forward Declarations
-class MultiFile;
+class MultiFileBase;
// <summary>
// File object for BucketCache.
@@ -69,7 +69,7 @@
// An explicit open call has to be given before the file can be used.
// <p>
// The file can be opened as an ordinary file (with a file descriptor)
-// or as a file in a MultiFile object. An ordinary file can be accessed
+// or as a file in a MultiFileBase object. An ordinary file can be accessed
// in 3 ways:
// <ul>
// <li> In an unbuffered way, where the parent BucketCache class accesses
@@ -79,7 +79,7 @@
// <li> In a buffered way, where the parent BucketBuffered class does
// the access using the FilebufIO member.
// </ul>
-// A MultiFile file can only be accessed in the unbuffered way.
+// A MultiFileBase file can only be accessed in the unbuffered way.
// </synopsis>
// <motivation>
@@ -109,23 +109,23 @@
public:
// Create a BucketFile object for a new file.
// The file with the given name will be created as a normal file or
- // as part of a MultiFile (if mfile != 0).
+ // as part of a MultiFileBase (if mfile != 0).
// It can be indicated if a MMapfdIO and/or FilebufIO object must be
- // created for the file. If a MultiFile is used, memory-mapped IO
+ // created for the file. If a MultiFileBase is used, memory-mapped IO
// cannot be used and mappedFile is ignored.
explicit BucketFile (const String& fileName,
uInt bufSizeFile=0, Bool mappedFile=False,
- MultiFile* mfile=0);
+ MultiFileBase* mfile=0);
// Create a BucketFile object for an existing file.
// The file should be opened by the <src>open</src>.
// Tell if the file must be opened writable.
// It can be indicated if a MMapfdIO and/or FilebufIO object must be
- // created for the file. If a MultiFile is used, memory-mapped IO
+ // created for the file. If a MultiFileBase is used, memory-mapped IO
// cannot be used and mappedFile is ignored.
BucketFile (const String& fileName, Bool writable,
uInt bufSizeFile=0, Bool mappedFile=False,
- MultiFile* mfile=0);
+ MultiFileBase* mfile=0);
// The destructor closes the file (if open).
virtual ~BucketFile();
@@ -201,8 +201,8 @@
MMapfdIO* mappedFile_p;
// The optional buffered file.
FilebufIO* bufferedFile_p;
- // The possibly used MultiFile.
- MultiFile* mfile_p;
+ // The possibly used MultiFileBase.
+ MultiFileBase* mfile_p;
// Forbid copy constructor.
=======================================
--- /branches/nov14/casa/IO/MFFileIO.cc Wed Dec 10 08:06:42 2014 UTC
+++ /branches/nov14/casa/IO/MFFileIO.cc Tue Mar 24 14:43:31 2015 UTC
@@ -1,4 +1,4 @@
-//# MFFileIO.cc: A single file in a MultiFile
+//# MFFileIO.cc: A single file in a MultiFileBase
//# Copyright (C) 2014
//# Associated Universities, Inc. Washington DC, USA.
//#
@@ -32,7 +32,7 @@
namespace casacore { //# NAMESPACE CASACORE - BEGIN
- MFFileIO::MFFileIO (MultiFile& file, const String& name,
+ MFFileIO::MFFileIO (MultiFileBase& file, const String& name,
ByteIO::OpenOption opt)
: itsFile (file),
itsPosition (0),
@@ -71,7 +71,7 @@
throw AipsError ("MFFileIO::read - incorrect number of bytes ("
+ String::toString(n) + " out of "
+ String::toString(size) + ") read for file "
- + itsName + " in MultiFile " + itsFile.fileName());
+ + itsName + " in MultiFileBase " +
itsFile.fileName());
}
return n;
}
@@ -105,7 +105,7 @@
Int64 MFFileIO::length()
{
- return itsFile.info()[itsId].size;
+ return itsFile.info()[itsId].fsize;
}
Bool MFFileIO::isReadable() const
=======================================
--- /branches/nov14/casa/IO/MFFileIO.h Wed Dec 10 08:06:42 2014 UTC
+++ /branches/nov14/casa/IO/MFFileIO.h Tue Mar 24 14:43:31 2015 UTC
@@ -1,4 +1,4 @@
-//# MFFileIO.h: Class for IO on a virtual file in a MultiFile
+//# MFFileIO.h: Class for IO on a virtual file in a MultiFileBase
//# Copyright (C) 2014
//# Associated Universities, Inc. Washington DC, USA.
//#
@@ -31,13 +31,13 @@
//# Includes
#include <casacore/casa/aips.h>
#include <casacore/casa/IO/ByteIO.h>
-#include <casacore/casa/IO/MultiFile.h>
+#include <casacore/casa/IO/MultiFileBase.h>
namespace casacore { //# NAMESPACE CASACORE - BEGIN
// <summary>
- // Class for IO on a virtual file in a MultiFile
+ // Class for IO on a virtual file in a MultiFileBase
// </summary>
// <use visibility=export>
@@ -48,7 +48,7 @@
// <synopsis>
// This class is a specialization of class
// <linkto class=ByteIO>ByteIO</linkto>. It uses a
- // <linkto class=MultiFile>MultiFile</linkto> as the data store.
+ // <linkto class=MultiFileBase>MultiFileBase</linkto> as the data store.
// <p>
// Similar to a regular file it is possible to read and write data and to
// seek in the file. The object keeps track of the current file offset.
@@ -79,8 +79,8 @@
public:
// Open or create a virtual file with the given name. Note that only
the
// basename of the file name is actually used.
- // It is created in the given MultiFile.
- MFFileIO (MultiFile&, const String& name,
+ // It is created in the given MultiFileBase.
+ MFFileIO (MultiFileBase&, const String& name,
ByteIO::OpenOption = ByteIO::Old);
// The destructor flushes and closes the file.
@@ -95,13 +95,13 @@
// Write a block at the given offset.
virtual void write (Int64 size, const void* buffer);
- // Reopen the file (and possibly underlying MultiFile) for read/write
access.
+ // Reopen the file (and possibly underlying MultiFileBase) for
read/write access.
// Nothing will be done if the stream is writable already.
// An exception will be thrown if it is not possible to reopen it for
// read/write access.
virtual void reopenRW();
- // Remove the file from the MultiFile object.
+ // Remove the file from the MultiFileBase object.
// It makes the object invalid by setting the fileId to -1.
void remove();
@@ -132,10 +132,10 @@
private:
//# Data members
- MultiFile& itsFile;
- Int64 itsPosition;
- String itsName;
- Int itsId;
+ MultiFileBase& itsFile;
+ Int64 itsPosition;
+ String itsName;
+ Int itsId;
};
=======================================
--- /branches/nov14/casa/IO/MultiFile.cc Wed Dec 10 08:06:42 2014 UTC
+++ /branches/nov14/casa/IO/MultiFile.cc Tue Mar 24 14:43:31 2015 UTC
@@ -33,70 +33,36 @@
#include <casacore/casa/IO/AipsIO.h>
#include <casacore/casa/BasicSL/STLIO.h>
#include <casacore/casa/OS/CanonicalConversion.h>
-#include <casacore/casa/OS/File.h> // for fileFSTAT
#include <casacore/casa/Utilities/GenSort.h>
#include <casacore/casa/Utilities/Assert.h>
#include <casacore/casa/Exceptions/Error.h>
#include <unistd.h>
-#include <sys/stat.h> // needed for stat or stat64
namespace casacore { //# NAMESPACE CASACORE - BEGIN
- void operator<< (ostream& ios, const MultiFileInfo& info)
- { ios <<
info.name << ' ' << info.blockNrs << ' ' << info.size << ' '
- << info.curBlock << ' ' << info.dirty << endl; }
- void operator<< (AipsIO& ios, const MultiFileInfo& info)
- { ios <<
info.name << info.blockNrs << info.size; }
- void operator>> (AipsIO& ios, MultiFileInfo& info)
- { ios >>
info.name >> info.blockNrs >> info.size; }
-
-
MultiFile::MultiFile (const String& name, ByteIO::OpenOption option,
Int blockSize)
- : itsBlockSize (blockSize),
- itsHdrCounter (0),
- itsChanged (False)
+ : MultiFileBase (name, option, blockSize)
{
- itsFD = RegularFileIO::openCreate (name, option);
- itsIO.attach (itsFD, name);
+ itsFD = RegularFileIO::openCreate (itsName, option);
+ itsIO.attach (itsFD, itsName);
if (option == ByteIO::New || option == ByteIO::NewNoReplace) {
// New file; first block is for administration.
itsNrBlock = 1;
- itsChanged = True;
- // Use file system block size, but not less than given size.
- if (itsBlockSize <= 0) {
- struct fileSTAT sfs;
- fileFSTAT (itsFD, &sfs);
- Int64 blksz = sfs.st_blksize;
- itsBlockSize = std::max (-itsBlockSize, blksz);
- }
} else {
readHeader();
}
- AlwaysAssert (itsBlockSize > 0, AipsError);
+ itsWritable = itsIO.isWritable();
}
MultiFile::~MultiFile()
{
close();
}
-
- uInt MultiFile::nfile() const
- {
- Int nf = 0;
- for (vector<MultiFileInfo>::const_iterator iter=itsInfo.begin();
- iter!=itsInfo.end(); ++iter) {
- if (! iter->name.empty()) {
- nf++;
- }
- }
- return nf;
- }
void MultiFile::close()
{
flush();
- itsInfo.clear();
FiledesIO::close (itsFD);
}
@@ -105,43 +71,20 @@
if (isWritable()) {
return;
}
- String fname = itsIO.fileName();
// First try if the file can be opened as read/write.
- int fd = RegularFileIO::openCreate (fname, ByteIO::Update);
+ int fd = RegularFileIO::openCreate (itsName, ByteIO::Update);
// Now close the readonly file and reset fd.
FiledesIO::close (itsFD);
itsIO.detach();
itsFD = fd;
- itsIO.attach (itsFD, fname);
+ itsIO.attach (itsFD, itsName);
itsIO.setWritable();
-}
-
- void MultiFile::flush()
- {
- // Flush all buffers if needed.
- for (vector<MultiFileInfo>::iterator iter=itsInfo.begin();
- iter!=itsInfo.end(); ++iter) {
- if (iter->dirty) {
- writeDirty (*iter);
- }
- }
- // Header only needs to be written if blocks were added since last
flush.
- if (itsChanged) {
- writeHeader();
- itsChanged = False;
- }
+ itsWritable = True;
}
- void MultiFile::resync()
+ void MultiFile::fsync()
{
- AlwaysAssert (!itsChanged, AipsError);
- // Clear all blocknrs.
- for (vector<MultiFileInfo>::iterator iter=itsInfo.begin();
- iter!=itsInfo.end(); ++iter) {
- AlwaysAssert (!iter->dirty, AipsError);
- iter->curBlock = -1;
- }
- readHeader();
+ itsIO.fsync();
}
void MultiFile::writeHeader()
@@ -152,10 +95,8 @@
MemoryIO mio(itsBlockSize, itsBlockSize);
CanonicalIO cio(&mio);
AipsIO aio(&cio);
- Int64 next=0;
itsHdrCounter++;
- cio.write (1, &next); // possible link to subsequent header
block
- cio.write (1, &next); // reserve space for header size
+ cio.write (1, &itsBlockSize); // reserve space for header size
cio.write (1, &itsBlockSize);
cio.write (1, &itsHdrCounter);
aio.putstart ("MultiFile", 1);
@@ -163,46 +104,48 @@
aio.putend();
Int64 todo = mio.length();
uChar* buf = const_cast<uChar*>(mio.getBuffer());
- CanonicalConversion::fromLocal (buf + sizeof(next), todo); //
header size
- // If the buffer does not fit in a single block, next has to be filled.
- if (todo > itsBlockSize) {
- next = itsNrBlock;
- CanonicalConversion::fromLocal (buf, next);
- }
+ CanonicalConversion::fromLocal (buf, todo); // header size
// Write the first part of the buffer at the beginning of the file.
itsIO.seek (0);
itsIO.write (itsBlockSize, buf);
todo -= itsBlockSize;
if (todo > 0) {
- // The rest is written at the end of the file.
- itsIO.seek (itsNrBlock*itsBlockSize);
- itsIO.write (todo, buf+itsBlockSize);
+ // The rest is written in another file. If the header info was
written
+ // at the end of the file, it would be overwritten when extending
with
+ // possible file corruption if the program or system crashes.
+ // By using a separate file, corruption chances are much lower.
+ // Even better would be using another name and doing a rename at the
end.
+ int fd = RegularFileIO::openCreate (itsName + "_hdrext",
ByteIO::New);
+ FiledesIO iohdr (fd, itsName + "_hdrext");
+ iohdr.write (todo, buf+itsBlockSize);
+ FiledesIO::close (fd);
}
}
void MultiFile::readHeader (Bool always)
{
// Read the first part of the header.
- vector<char> buf(4*sizeof(Int64));
+ vector<char> buf(3*sizeof(Int64));
itsIO.seek (0);
itsIO.read (buf.size(), &(buf[0]));
// Extract the required info.
- Int64 next, headerSize, hdrCounter;
- CanonicalConversion::toLocal (next, &(buf[0]));
- CanonicalConversion::toLocal (headerSize, &(buf[sizeof(Int64)]));
- CanonicalConversion::toLocal (itsBlockSize, &(buf[2*sizeof(Int64)]));
- CanonicalConversion::toLocal (hdrCounter, &(buf[3*sizeof(Int64)]));
+ Int64 headerSize, hdrCounter;
+ CanonicalConversion::toLocal (headerSize, &(buf[0]));
+ CanonicalConversion::toLocal (itsBlockSize, &(buf[1*sizeof(Int64)]));
+ CanonicalConversion::toLocal (hdrCounter, &(buf[2*sizeof(Int64)]));
// Only if needed, read the rest of the header.
if (hdrCounter == itsHdrCounter && !always) {
return;
}
itsHdrCounter = hdrCounter;
- Int64 leadSize = 4*sizeof(Int64);
+ Int64 leadSize = 3*sizeof(Int64);
buf.resize (headerSize);
if (headerSize > itsBlockSize) {
itsIO.read (itsBlockSize - leadSize, &(buf[leadSize]));
- itsIO.seek (next*itsBlockSize);
- itsIO.read (headerSize - itsBlockSize, &(buf[itsBlockSize]));
+ int fd = RegularFileIO::openCreate (itsName + "_hdrext",
ByteIO::Old);
+ FiledesIO iohdr (fd, itsName + "_hdrext");
+ iohdr.read (headerSize - itsBlockSize, &(buf[itsBlockSize]));
+ FiledesIO::close (fd);
} else {
itsIO.read (headerSize - leadSize, &(buf[leadSize]));
}
@@ -223,60 +166,13 @@
}
}
- Int MultiFile::addFile (const String& fname)
- {
- if (fname.empty()) {
- throw AipsError("MultiFile::addFile - empty file name given");
- }
- // Only use the basename part (to avoid directory rename problems).
- String bname = Path(fname).baseName();
- // Check that file name is not used yet.
- // Also determine (last) free file slot.
- uInt inx = itsInfo.size();
- uInt i = 0;
- for (vector<MultiFileInfo>::iterator iter=itsInfo.begin();
- iter!=itsInfo.end(); ++iter, ++i) {
- if (iter->name.empty()) {
- inx = i; // free file slot
- } else if (bname == iter->name) {
- throw AipsError ("MultiFile::addFile - file name " + bname +
- " already in use");
- }
- }
- // Add a new file entry if needed.
- if (inx == itsInfo.size()) {
- itsInfo.resize (inx+1);
- }
- itsInfo[inx] = MultiFileInfo(itsBlockSize);
- itsInfo[inx].name = bname;
- itsChanged = True;
- return inx;
- }
+ void MultiFile::doAddFile (MultiFileInfo&)
+ {}
- Int MultiFile::fileId (const String& fname, Bool throwExcp) const
+ void MultiFile::doDeleteFile (MultiFileInfo& info)
{
- // Only use the basename part (to avoid directory rename problems).
- String bname = Path(fname).baseName();
- for (size_t i=0; i<itsInfo.size(); ++i) {
- if (bname == itsInfo[i].name) {
- return i;
- }
- }
- if (throwExcp) {
- throw AipsError ("MultiFile::fileId - file name " + fname +
- " is unknown");
- }
- return -1;
- }
-
- void MultiFile::deleteFile (Int fileId)
- {
- if (fileId >= Int(itsInfo.size()) || itsInfo[fileId].name.empty()) {
- throw AipsError ("MultiFile::deleteFile - invalid fileId given");
- }
// Add the blocknrs to the free list.
// Later we can merge them in order and leave out blocks past last
block used.
- MultiFileInfo& info = itsInfo[fileId];
itsFreeBlocks.reserve (itsFreeBlocks.size() + info.blockNrs.size());
for (size_t i=0; i<info.blockNrs.size(); ++i) {
itsFreeBlocks.push_back (info.blockNrs[i]);
@@ -284,139 +180,35 @@
// Sort them in descending order, so free blocks can be taken from the
tail.
genSort (&(itsFreeBlocks[0]), itsFreeBlocks.size(),
Sort::Descending, Sort::QuickSort);
- // Clear this slot.
- info = MultiFileInfo();
- itsChanged = True;
}
- void MultiFile::writeDirty (MultiFileInfo& info)
+ void MultiFile::extend (MultiFileInfo& info, Int64 lastblk)
{
- itsIO.seek (info.blockNrs[info.curBlock] * itsBlockSize);
- itsIO.write (itsBlockSize, &(info.buffer[0]));
- info.dirty = False;
- }
-
- Int64 MultiFile::read (Int fileId, void* buf, Int64 size, Int64 offset)
- {
- if (fileId >= Int(itsInfo.size()) || itsInfo[fileId].name.empty()) {
- throw AipsError ("MultiFile::read - invalid fileId given");
- }
- char* buffer = static_cast<char*>(buf);
- MultiFileInfo& info = itsInfo[fileId];
- // Determine the logical block to read and the start offset in that
block.
- Int64 nrblk = info.blockNrs.size();
- Int64 blknr = offset/itsBlockSize;
- Int64 start = offset - blknr*itsBlockSize;
- Int64 done = 0;
- Int64 szdo = std::min(size, info.size - offset); // not past EOF
- // Read until done.
- while (done < szdo) {
- AlwaysAssert (blknr<nrblk, AipsError);
- Int64 todo = std::min(szdo-done, itsBlockSize-start);
- // If already in buffer, copy from there.
- if (blknr == info.curBlock) {
- memcpy (buffer, &(info.buffer[start]), todo);
+ Int64 curnrb = info.blockNrs.size();
+ info.blockNrs.resize (lastblk);
+ for (Int64 i=curnrb; i<lastblk; ++i) {
+ if (itsFreeBlocks.empty()) {
+ info.blockNrs[i] = itsNrBlock;
+ itsNrBlock++;
} else {
- // Read directly into buffer if it fits exactly.
- if (todo == itsBlockSize) {
- itsIO.seek (info.blockNrs[blknr] * itsBlockSize);
- itsIO.read (itsBlockSize, buffer);
- } else {
- if (info.dirty) {
- writeDirty (info);
- }
- // Read into file buffer and copy correct part.
- itsIO.seek (info.blockNrs[blknr] * itsBlockSize);
- itsIO.read (itsBlockSize, &(info.buffer[0]));
- info.curBlock = blknr;
- memcpy (buffer, &(info.buffer[start]), todo);
- }
+ info.blockNrs[i] = itsFreeBlocks[itsFreeBlocks.size() - 1];
+ itsFreeBlocks.resize (itsFreeBlocks.size() - 1);
}
- // Increment counters.
- done += todo;
- buffer += todo;
- blknr++;
- start = 0;
}
- return done;
}
- Int64 MultiFile::write (Int fileId, const void* buf, Int64 size, Int64
offset)
+ void MultiFile::readBlock (MultiFileInfo& info, Int64 blknr,
+ void* buffer)
{
- if (fileId >= Int(itsInfo.size()) || itsInfo[fileId].name.empty()) {
- throw AipsError ("MultiFile::read - invalid fileId given");
- }
- const char* buffer = static_cast<const char*>(buf);
- AlwaysAssert (itsIO.isWritable(), AipsError);
- MultiFileInfo& info = itsInfo[fileId];
- // Determine the logical block to write and the start offset in that
block.
- Int64 blknr = offset/itsBlockSize;
- Int64 start = offset - blknr*itsBlockSize;
- Int64 done = 0;
- // If beyond EOF, add blocks as needed.
- Int64 lastblk = blknr + (start+size+itsBlockSize-1) / itsBlockSize;
- Int64 curnrb = info.blockNrs.size();
- if (lastblk >= curnrb) {
- info.blockNrs.resize (lastblk);
- for (Int64 i=curnrb; i<lastblk; ++i) {
- if (itsFreeBlocks.empty()) {
- info.blockNrs[i] = itsNrBlock;
- itsNrBlock++;
- } else {
- info.blockNrs[i] = itsFreeBlocks[itsFreeBlocks.size() - 1];
- itsFreeBlocks.resize (itsFreeBlocks.size() - 1);
- }
- }
- itsChanged = True;
- }
- // Write until all done.
- while (done < size) {
- Int64 todo = std::min(size-done, itsBlockSize-start);
- // Favor sequential writing, thus write current buffer first.
- if (blknr == info.curBlock) {
- memcpy (&(info.buffer[start]), buffer, todo);
- info.dirty = True;
- if (done+todo > size) {
- writeDirty (info);
- }
- } else if (todo == itsBlockSize) {
- // Write directly from buffer if it fits exactly.
- itsIO.seek (info.blockNrs[blknr] * itsBlockSize);
- itsIO.write (itsBlockSize, buffer);
- } else {
- // Write into temporary buffer and copy correct part.
- // First write possibly dirty buffer.
- if (info.dirty) {
- writeDirty (info);
- }
- if (blknr >= curnrb) {
- memset (&(info.buffer[0]), 0, itsBlockSize);
- } else {
- itsIO.seek (info.blockNrs[blknr] * itsBlockSize);
- itsIO.read (itsBlockSize, &(info.buffer[0]));
- }
- info.curBlock = blknr;
- memcpy (&(info.buffer[start]), buffer, todo);
- info.dirty = True;
- }
- done += todo;
- buffer += todo;
- blknr++;
- start = 0;
- }
- if (offset+size > info.size) {
- info.size = offset+size;
- }
- return done;
+ itsIO.seek (info.blockNrs[blknr] * itsBlockSize);
+ itsIO.read (itsBlockSize, buffer);
}
-
- MultiFileInfo::MultiFileInfo (Int64 bufSize)
- : curBlock (-1),
- size (0),
- dirty (False)
+ void MultiFile::writeBlock (MultiFileInfo& info, Int64 blknr,
+ const void* buffer)
{
- buffer.resize (bufSize);
+ itsIO.seek (info.blockNrs[blknr] * itsBlockSize);
+ itsIO.write (itsBlockSize, buffer);
}
=======================================
--- /branches/nov14/casa/IO/MultiFile.h Wed Dec 10 08:06:42 2014 UTC
+++ /branches/nov14/casa/IO/MultiFile.h Tue Mar 24 14:43:31 2015 UTC
@@ -30,34 +30,12 @@
//# Includes
#include <casacore/casa/aips.h>
+#include <casacore/casa/IO/MultiFileBase.h>
#include <casacore/casa/IO/FiledesIO.h>
-#include <casacore/casa/vector.h>
-#include <casacore/casa/ostream.h>
namespace casacore { //# NAMESPACE CASACORE - BEGIN
- //# Forward declaration.
- class AipsIO;
-
- // <summary>
- // Helper class for MultiFile containing info per internal file
- // </summary>
- // <use visibility=local>
- struct MultiFileInfo {
- explicit MultiFileInfo (Int64 bufSize=0);
- vector<Int64> blockNrs; // physical blocknrs for this logical file
- vector<char> buffer; // buffer holding a data block
- Int64 curBlock; // the data block held in buffer (<0 is
none)
- Int64 size; // file size (in bytes)
- String name; // the virtual file name
- Bool dirty; // has data in buffer been changed?
- };
- void operator<< (ostream&, const MultiFileInfo&);
- void operator<< (AipsIO&, const MultiFileInfo&);
- void operator>> (AipsIO&, MultiFileInfo&);
-
-
// <summary>
// Class to combine multiple files in a single one.
// </summary>
@@ -83,7 +61,9 @@
// MultiFile. A data block is never shared by multiple files.
// For each virtual file MultiFile keeps a MultiFileInfo object telling
// the file size and the blocks numbers used for the file. When flushing
- // the MultiFile, this meta info is written into a header block and,
+ // the MultiFile, this meta info is written into the header block. If it
+ // does not fit in the header block, the rest is written in a
separate "-ext"
+ // file.
// if needed, continuation blocks. On open and resync, it is read back.
// <br>
//
@@ -124,7 +104,7 @@
// </todo>
- class MultiFile
+ class MultiFile: public MultiFileBase
{
public:
// Open or create a MultiFile with the given name.
@@ -133,91 +113,41 @@
MultiFile (const String& name, ByteIO::OpenOption, Int blockSize=0);
// The destructor flushes and closes the file.
- ~MultiFile();
-
- // Add a file to the MultiFile object. It returns the file id.
- // Only the base name of the given file name is used. In this way the
- // MultiFile container file can be moved.
- Int addFile (const String& name);
-
- // Return the file id of a file in the MultiFile object.
- // If the name is unknown, an exception is thrown if throwExcp is set.
- // Otherwise it returns -1.
- Int fileId (const String& name, Bool throwExcp=True) const;
-
- // Delete a file. It adds its blocks to the free block list.
- void deleteFile (Int fileId);
-
- // Read a block at the given offset. It returns the actual size read.
- Int64 read (Int fileId, void* buffer, Int64 size, Int64 offset);
-
- // Write a block at the given offset. It returns the actual size
written.
- Int64 write (Int fileId, const void* buffer, Int64 size, Int64 offset);
+ virtual ~MultiFile();
// Reopen the underlying file for read/write access.
// Nothing will be done if the file is writable already.
// Otherwise it will be reopened and an exception will be thrown
// if it is not possible to reopen it for read/write access.
- void reopenRW();
-
- // Flush the file by writing all dirty data and all header info.
- void flush();
+ virtual void reopenRW();
// Fsync the file (i.e., force the data to be physically written).
- void fsync()
- { itsIO.fsync(); }
-
- // Resync with another process by clearing the buffers and rereading
- // the header. The header is only read if its counter has changed.
- void resync();
-
- // Get the file name of the MultiFile.
- String fileName() const
- { return itsIO.fileName(); }
-
- // Is the file writable?
- Bool isWritable() const
- { return itsIO.isWritable(); }
-
- // Get the block size used.
- Int64 blockSize() const
- { return itsBlockSize; }
-
- // Get the nr of virtual files.
- uInt nfile() const;
-
- // Get the total nr of data blocks used.
- Int64 size() const
- { return itsNrBlock; }
-
- // Get the info object (for test purposes mainly).
- const vector<MultiFileInfo>& info() const
- { return itsInfo; }
-
- // Get the free blocks (for test purposes mainly).
- const vector<Int64>& freeBlocks() const
- { return itsFreeBlocks; }
+ virtual void fsync();
private:
+ // Do the class-specific actions on adding a file.
+ virtual void doAddFile (MultiFileInfo&);
+ // Do the class-specific actions on deleting a file.
+ virtual void doDeleteFile (MultiFileInfo&);
// Flush and close the file.
- void close();
+ virtual void close();
// Write the header info.
- void writeHeader();
+ virtual void writeHeader();
// Read the header info. If always==False, the info is only read if the
// header counter has changed.
- void readHeader (Bool always=True);
- // Write a changed data block.
- void writeDirty (MultiFileInfo& info);
+ virtual void readHeader (Bool always=True);
+ // Extend the virtual file to fit lastblk.
+ virtual void extend (MultiFileInfo& info, Int64 lastblk);
+ // Write a data block.
+ virtual void writeBlock (MultiFileInfo& info, Int64 blknr,
+ const void* buffer);
+ // Read a data block.
+ virtual void readBlock (MultiFileInfo& info, Int64 blknr,
+ void* buffer);
+ protected:
//# Data members
- Int64 itsBlockSize; // The blocksize used
- Int64 itsNrBlock; // The total nr of blocks actually used
- Int64 itsHdrCounter; // Counter of header changes
FiledesIO itsIO;
- vector<MultiFileInfo> itsInfo;
- int itsFD;
- Bool itsChanged; // Has header info changed since
last flush?
- vector<Int64> itsFreeBlocks;
};
=======================================
--- /branches/nov14/casa/IO/test/CMakeLists.txt Fri Nov 21 11:43:02 2014 UTC
+++ /branches/nov14/casa/IO/test/CMakeLists.txt Tue Mar 24 14:43:31 2015 UTC
@@ -16,6 +16,7 @@
tMappedIO
tMMapIO
tMultiFile
+tMultiHDF5
tTapeIO
tTypeIO
)
=======================================
--- /branches/nov14/casa/IO/test/tAipsIO.cc Fri Nov 21 11:43:02 2014 UTC
+++ /branches/nov14/casa/IO/test/tAipsIO.cc Tue Mar 24 14:43:31 2015 UTC
@@ -31,6 +31,7 @@
#include <casacore/casa/IO/MemoryIO.h>
#include <casacore/casa/IO/RawIO.h>
#include <casacore/casa/IO/MultiFile.h>
+#include <casacore/casa/IO/MultiHDF5.h>
#include <casacore/casa/BasicSL/Complex.h>
#include <casacore/casa/BasicSL/String.h>
#include <casacore/casa/Exceptions/Error.h>
=======================================
--- /branches/nov14/casa/IO/test/tAipsIO.out Fri Nov 21 11:43:02 2014 UTC
+++ /branches/nov14/casa/IO/test/tAipsIO.out Tue Mar 24 14:43:31 2015 UTC
@@ -144,9 +144,9 @@
strin2 strin2 stri3 stri3
stri3 stri3 string45 string45
string45 string45 s s
-MFFileIO::read - incorrect number of bytes (0 out of 4) read for file
tAipsIO_tmp.data in MultiFile tAipsIO_tmp.mf
+MFFileIO::read - incorrect number of bytes (0 out of 4) read for file
tAipsIO_tmp.data in MultiFileBase tAipsIO_tmp.mf
3000288
-MFFileIO::read - incorrect number of bytes (0 out of 4) read for file
tAipsIO_tmp.data in MultiFile tAipsIO_tmp.mf
+MFFileIO::read - incorrect number of bytes (0 out of 4) read for file
tAipsIO_tmp.data in MultiFileBase tAipsIO_tmp.mf
Length=3000555
AipsIO::getNextType: tAipsIO_tmp.data - no magic value found
AipsIO::getstart: found object type abcdefghij, expected aa
=======================================
--- /branches/nov14/casa/IO/test/tMFFileIO.cc Wed Dec 10 08:06:42 2014 UTC
+++ /branches/nov14/casa/IO/test/tMFFileIO.cc Tue Mar 24 14:43:31 2015 UTC
@@ -27,19 +27,22 @@
//# Includes
#include <casacore/casa/IO/MFFileIO.h>
-#include <casacore/casa/Utilities/Assert.h>
+#include <casacore/casa/IO/MultiFile.h>
+#include <casacore/casa/IO/MultiHDF5.h>
+#include <casacore/casa/HDF5/HDF5Object.h>
#include <casacore/casa/Arrays/Vector.h>
#include <casacore/casa/Arrays/ArrayMath.h>
#include <casacore/casa/Arrays/ArrayLogical.h>
#include <casacore/casa/Arrays/ArrayIO.h>
#include <casacore/casa/BasicSL/STLIO.h>
+#include <casacore/casa/Utilities/Assert.h>
#include <iostream>
#include <stdexcept>
using namespace casacore;
using namespace std;
-void showMultiFile (MultiFile& mfile)
+void showMultiFile (MultiFileBase& mfile)
{
cout << mfile.fileName() << ' ' << mfile.blockSize() << ' '
<< mfile.nfile() << ' ' << mfile.size() << ' '
@@ -53,14 +56,14 @@
}
-void makeFile (MultiFile& mfile)
+void makeFile (MultiFileBase& mfile)
{
MFFileIO mff(mfile, "mff1", ByteIO::New);
showMultiFile(mfile);
showMFFile (mff);
}
-void writeFiles1 (MultiFile& mfile)
+void writeFiles1 (MultiFileBase& mfile)
{
MFFileIO mff(mfile, "mff1", ByteIO::Update);
Vector<Int64> buf(120);
@@ -74,7 +77,7 @@
cout <<
mfile.info() << endl;
}
-void checkFiles1 (MultiFile& mfile)
+void checkFiles1 (MultiFileBase& mfile)
{
MFFileIO mff(mfile, "mff1", ByteIO::Update);
Vector<Int64> buf1(250), buf(250);
@@ -87,10 +90,18 @@
int main()
{
try {
- MultiFile mfile("tMFFileIO_tmp.dat", ByteIO::New, 512);
- makeFile(mfile);
- writeFiles1(mfile);
- checkFiles1(mfile);
+ {
+ MultiFile mfile("tMFFileIO_tmp.dat1", ByteIO::New, 512);
+ makeFile(mfile);
+ writeFiles1(mfile);
+ checkFiles1(mfile);
+ }
+ if (HDF5Object::hasHDF5Support()) {
+ MultiHDF5 mfile("tMFFileIO_tmp.dat2", ByteIO::New, 512);
+ makeFile(mfile);
+ writeFiles1(mfile);
+ checkFiles1(mfile);
+ }
} catch (std::exception& x) {
cout << "Unexpected exception: " << x.what() << endl;
return 1;
=======================================
--- /branches/nov14/casa/IO/test/tMultiFile.cc Wed Dec 10 08:06:42 2014 UTC
+++ /branches/nov14/casa/IO/test/tMultiFile.cc Tue Mar 24 14:43:31 2015 UTC
@@ -27,6 +27,7 @@
//# Includes
#include <casacore/casa/IO/MultiFile.h>
+#include <casacore/casa/IO/MultiHDF5.h>
#include <casacore/casa/Utilities/Assert.h>
#include <casacore/casa/Arrays/Vector.h>
#include <casacore/casa/Arrays/ArrayMath.h>
@@ -47,9 +48,9 @@
<< mfile.freeBlocks() << endl;
}
-void makeFile()
+void makeFile (Int64 blockSize)
{
- MultiFile mfile("tMultiFile_tmp.dat", ByteIO::New, 1024);
+ MultiFile mfile("tMultiFile_tmp.dat", ByteIO::New, blockSize);
AlwaysAssertExit (mfile.isWritable());
showMultiFile(mfile);
}
@@ -253,21 +254,28 @@
timer.show ("move3 ");
}
+void doTest (Int64 blockSize)
+{
+ cout << "MultiFile test with blockSize=" << blockSize << endl;
+ makeFile (blockSize);
+ readFile();
+ addFiles();
+ readFile();
+ writeFiles1();
+ readFile();
+ checkFiles1();
+ deleteFile();
+ writeFiles2();
+ readFile();
+ checkFiles2();
+ cout << endl;
+}
int main()
{
try {
- makeFile();
- readFile();
- addFiles();
- readFile();
- writeFiles1();
- readFile();
- checkFiles1();
- deleteFile();
- writeFiles2();
- readFile();
- checkFiles2();
+ doTest (128); // requires extra header file
+ doTest (1024); // no extra header file
timeExact();
timeDouble();
timePartly();
@@ -279,5 +287,6 @@
cout << "Unexpected exception: " << x.what() << endl;
return 1;
}
+ cout << "OK" << endl;
return 0;
}