[sfs commit] r34 - Added replication level checking and enforcement during mount.

1 view
Skip to first unread message

codesite...@google.com

unread,
Jun 9, 2009, 2:57:32 AM6/9/09
to sfs-dev...@googlegroups.com
Author: matt.burkhart
Date: Mon Jun 8 23:56:50 2009
New Revision: 34

Added:
trunk/src/tests/
trunk/src/tests/CMakeLists.txt
trunk/src/tests/replication_test.sh (contents, props changed)
Modified:
trunk/README
trunk/src/CMakeLists.txt
trunk/src/switchfs.cc
trunk/src/switchfs.h

Log:
Added replication level checking and enforcement during mount.

Modified: trunk/README
==============================================================================
--- trunk/README (original)
+++ trunk/README Mon Jun 8 23:56:50 2009
@@ -1,26 +1,29 @@
SwitchFS
http://code.google.com/p/sfs

-SwitchFS requires FUSE 2.5 or later with relevant kernel support.
+SwitchFS requires FUSE 2.5 or later with relevant kernel support. This is
+included with most distributions, however more information may be found at:
http://sourceforge.net/projects/fuse

-To run an out-of-source CMake build:
+To run an out-of-source CMake build (recommended):
# mkdir bin
# cd bin
# cmake ../src
# make

-To set up a sfs mount, create a configuration file containing a space
-or line delimited list of source directories (specified by absolute
-path) where files are to be stored. Pass the location of this file on
-the command line while mounting sfs. See switchfs.conf.example and the
-example command below.
+To run the test suite:
+# make && make test
+
+To set up a sfs mount, create a configuration file containing a space or
line
+delimited list of source directories (specified by absolute path) where
files
+are to be stored. Pass the location of this file on the command line while
+mounting sfs. See switchfs.conf.example and the example command below.

To mount:
-# ./sfs -o config=<configuration_file> <mount_dir>
+# ./sfs -check -balance -o config=<configuration_file> <mount_dir>

To unmount:
# fusermount -u <mount_dir>

To mount running in the foreground for debugging add the '-d' flag:
-# ./sfs -d -o config=<configuration_file> <mount_dir>
+# ./sfs -d -check -balance -o config=<configuration_file> <mount_dir>

Modified: trunk/src/CMakeLists.txt
==============================================================================
--- trunk/src/CMakeLists.txt (original)
+++ trunk/src/CMakeLists.txt Mon Jun 8 23:56:50 2009
@@ -2,12 +2,18 @@

PROJECT(SFS)

+ENABLE_TESTING()
+
ADD_EXECUTABLE(sfs
logger.cc logger.h
main.cc
path_utils.cc path_utils.h
source.cc source.h
switchfs.cc switchfs.h
+)
+
+SUBDIRS(
+tests
)

ADD_DEFINITIONS(

Modified: trunk/src/switchfs.cc
==============================================================================
--- trunk/src/switchfs.cc (original)
+++ trunk/src/switchfs.cc Mon Jun 8 23:56:50 2009
@@ -92,7 +92,10 @@

for (set<string>::const_iterator file = unique_files.begin();
file != unique_files.end(); ++file) {
- int error = BalanceFile(Join(virtual_path, *file));
+ string virtual_file_path = Join(virtual_path, *file);
+ int error = ReplicateFile(virtual_file_path);
+ if (error) return error;
+ error = BalanceFile(virtual_file_path);
if (error) return error;
}
for (set<string>::const_iterator directory = unique_directories.begin();
@@ -103,6 +106,58 @@
return 0;
}

+int SwitchFS::ReplicateFile(const string& virtual_path) {
+ size_t target_replica_count = GetDesiredReplication(virtual_path);
+ size_t current_replica_count = GetFileSources(virtual_path).size();
+
+ if (current_replica_count < target_replica_count) {
+ LOG("File \"%s\" is under-replicated; %d versus %d. Creating
replicas:",
+ virtual_path.c_str(), current_replica_count, target_replica_count);
+ size_t difference = target_replica_count - current_replica_count;
+
+ // Note that the call to AllocateSources(...) invalidates the Source
pointer
+ // addresses (since it sorts the sources vector) so it must be called
before
+ // we get the list of current file sources.
+ vector<Source*> optimal_sources = AllocateSources(sources_.size());
+ vector<Source*> current_sources = GetFileSources(virtual_path);
+ sort(optimal_sources.begin(), optimal_sources.end());
+ sort(current_sources.begin(), current_sources.end());
+ vector<Source*> target_sources(optimal_sources.size());
+ vector<Source*>::iterator target_sources_end =
+ set_difference(optimal_sources.begin(), optimal_sources.end(),
+ current_sources.begin(), current_sources.end(),
+ target_sources.begin());
+ sort(target_sources.begin(), target_sources_end, SourceComparitor());
+
+ // Starting with the drive with the highest allocation priority, copy
new
+ // physical replicas until the desired replication level is reached.
+ for (size_t replica = 0; replica < difference; ++replica) {
+ string old_physical_path = Join(*current_sources.front(),
virtual_path);
+ string new_physical_path = Join(*target_sources[replica],
virtual_path);
+ int error = CopyPhysicalFile(old_physical_path, new_physical_path);
+ if (error) return error;
+ }
+ }
+
+ if (current_replica_count > target_replica_count) {
+ LOG("File \"%s\" is over-replicated; %d versus %d. Destroying
replicas:",
+ virtual_path.c_str(), current_replica_count, target_replica_count);
+ size_t difference = current_replica_count - target_replica_count;
+ vector<Source*> current_sources = GetFileSources(virtual_path);
+ sort(current_sources.begin(), current_sources.end(),
SourceComparitor());
+
+ // Starting with the file on the most over crowded drive, delete
physical
+ // replicas until the desired replication level is reached.
+ for (size_t replica = 0; replica < difference; ++replica) {
+ string physical_path = Join(*current_sources.back(), virtual_path);
+ current_sources.pop_back();
+ int error = RemovePhysicalFile(physical_path);
+ if (error) return error;
+ }
+ }
+ return 0;
+}
+
int SwitchFS::BalanceFile(const string& virtual_path) {
// The balance threshold determines the minimum amount by which the free
space
// must differ between current and desired sources for a balance move to
take
@@ -110,9 +165,8 @@
const unsigned long long kBalanceThreshold = 1024 * 1024 * 1024; // 1
GB.

// Note that the call to AllocateSources(...) invalidates the Source
pointer
- // addresses so it must be called before we get the list of current file
- // sources if we want to deal with the vector of pointers instead of
copies of
- // instances.
+ // addresses (since it sorts the sources vector) so it must be called
before
+ // we get the list of current file sources.
size_t replication_count = GetFileSources(virtual_path).size();
vector<Source*> optimal_sources = AllocateSources(replication_count);
vector<Source*> current_sources = GetFileSources(virtual_path);
@@ -120,16 +174,15 @@
// Given the list of current file sources and the optimal file sources,
// determine which of the optimal sources to move to and which current
sources
// to move from based off of which has the most available free space.
- std::sort(current_sources.begin(), current_sources.end());
- std::sort(optimal_sources.begin(), optimal_sources.end());
+ sort(current_sources.begin(), current_sources.end());
+ sort(optimal_sources.begin(), optimal_sources.end());
vector<Source*> target_sources(current_sources.size());
vector<Source*>::iterator target_sources_end =
- std::set_difference(optimal_sources.begin(), optimal_sources.end(),
- current_sources.begin(), current_sources.end(),
- target_sources.begin());
-
- std::sort(current_sources.begin(), current_sources.end(),
SourceComparitor());
- std::sort(target_sources.begin(), target_sources_end,
SourceComparitor());
+ set_difference(optimal_sources.begin(), optimal_sources.end(),
+ current_sources.begin(), current_sources.end(),
+ target_sources.begin());
+ sort(current_sources.begin(), current_sources.end(), SourceComparitor());
+ sort(target_sources.begin(), target_sources_end, SourceComparitor());

// For each target source *starting with the least allocated* move a file
// replica from the *most over-allocated* source. Stop if we have moved
to all
@@ -138,22 +191,16 @@
for (vector<Source*>::const_iterator target_source =
target_sources.begin();
target_source != target_sources_end; ++target_source) {
Source* current_source = current_sources.back();
+ current_sources.pop_back();
if ((*target_source)->GetFreeSpace() - current_source->GetFreeSpace() <
kBalanceThreshold) {
break;
}

- string old_physical_path = Join(*current_source, virtual_path.c_str());
- string new_physical_path = Join(**target_source, virtual_path.c_str());
- LOG("Moving \"%s\" to \"%s\".",
- old_physical_path.c_str(), new_physical_path.c_str());
- const char* const args [] = {
- "mv", "--", old_physical_path.c_str(), new_physical_path.c_str(),
NULL };
- int error = ExecCommand("mv", args);
- if (error) {
- return error; // Move error.
- }
- current_sources.pop_back();
+ string old_physical_path = Join(*current_source, virtual_path);
+ string new_physical_path = Join(**target_source, virtual_path);
+ int error = MovePhysicalFile(old_physical_path, new_physical_path);
+ if (error) return error;
}
return 0;
}
@@ -250,7 +297,7 @@
// Check if the file exists. If so, open if an read out the replication
// level. If the file is malformed, all we can do is print out a
warning and
// ignore the file.
- DEBUG("Looking for directive file at: " << directive_path << endl);
+ DEBUG("Looking for directive file: " << directive_path << endl);
if (::access(directive_path.c_str(), F_OK) == 0) {
ifstream directive_file(directive_path.c_str());
int replication_level;
@@ -264,7 +311,32 @@
}
pthread_rwlock_unlock(&sources_lock_);

+ assert(desired_replication > 0);
return desired_replication;
+}
+
+int SwitchFS::MovePhysicalFile(const string& physical_path_source,
+ const string& physical_path_dest) {
+ LOG("Moving \"%s\" to \"%s\".",
+ physical_path_source.c_str(), physical_path_dest.c_str());
+ const char* const args [] = {
+ "--", physical_path_source.c_str(), physical_path_dest.c_str(), NULL };
+ return ExecCommand("mv", args);
+}
+
+int SwitchFS::CopyPhysicalFile(const string& physical_path_source,
+ const string& physical_path_dest) {
+ LOG("Copying \"%s\" to \"%s\".",
+ physical_path_source.c_str(), physical_path_dest.c_str());
+ const char* const args [] = {
+ "--", physical_path_source.c_str(), physical_path_dest.c_str(), NULL };
+ return ExecCommand("cp", args);
+}
+
+int SwitchFS::RemovePhysicalFile(const string& physical_path) {
+ LOG("Removing \"%s\".", physical_path.c_str());
+ const char* const args [] = { "--", physical_path.c_str(), NULL };
+ return ExecCommand("rm", args);
}

int SwitchFS::ExecCommand(const char* cmd, const char* const* args) {

Modified: trunk/src/switchfs.h
==============================================================================
--- trunk/src/switchfs.h (original)
+++ trunk/src/switchfs.h Mon Jun 8 23:56:50 2009
@@ -106,11 +106,18 @@
private:
Logger& logger();

- // For a specified file, redistribute its replicas across drives as if
the
- // file was newly created according to the file allocation policy. This
has
- // the effect of redistributing load across sources evenly. Note that
this
- // does not check or enforce desired file replication levels. However
this
- // could be extended to do so in the future.
+ // For the specified virtual file path, ensure that it has exactly the
number
+ // of replicas as specified by the user via the file replication
directives
+ // and / or the default replication amount. If the file does not have
enough
+ // physical replicas, more are made. If the file has too many physical
+ // replicas, existing replicas are deleted.
+ int ReplicateFile(const std::string& virtual_path);
+
+ // For the specified virtual file path, redistribute its replicas across
+ // drives as if the file was newly created according to the file
allocation
+ // policy. This has the effect of redistributing load across sources
evenly.
+ // Note that this does not check or enforce desired file replication
levels.
+ // However this could be extended to do so in the future.
int BalanceFile(const std::string& virtual_path);

// Choose count number of source locations to place the "next" new
file(s).
@@ -147,6 +154,16 @@

// Amount of space to reserve on each source location. Units in bytes.
static unsigned long long GetBytesReserved();
+
+ // The following are blocking calls to perform the associated file system
+ // action on the specified physical file path(s). If the command was
executed
+ // and then exited with a successful status, 0 is returned. Otherwise, -1
+ // indicates and error. Each method call is logged.
+ int MovePhysicalFile(const std::string& physical_path_source,
+ const std::string& physical_path_dest);
+ int CopyPhysicalFile(const std::string& physical_path_source,
+ const std::string& physical_path_dest);
+ int RemovePhysicalFile(const std::string& physical_path);

// Blocking call to execute the specified command with the specified
// arguments. The command may be an absolute path or a file name in
which case

Added: trunk/src/tests/CMakeLists.txt
==============================================================================
--- (empty file)
+++ trunk/src/tests/CMakeLists.txt Mon Jun 8 23:56:50 2009
@@ -0,0 +1,2 @@
+ADD_TEST(replication_test
+ ${CMAKE_SOURCE_DIR}/tests/replication_test.sh ${CMAKE_BINARY_DIR})

Added: trunk/src/tests/replication_test.sh
==============================================================================
--- (empty file)
+++ trunk/src/tests/replication_test.sh Mon Jun 8 23:56:50 2009
@@ -0,0 +1,57 @@
+#!/bin/bash
+#
+# Test that replication levels are maintained even during a file system
check.
+# This test verifies that SFS will replace lost / deleted file replicas
+# automatically.
+#
+# Usage: replication_test.sh <sfs_binary_dir>
+
+SFS_BINARY_DIR=$1
+
+# Set up. For the following test, we create a set of SFS sources with both
under
+# and over replicated files. For this test to succeed, we expect SFS to fix
+# these replication errors automatically when it is mounted.
+TESTING_ROOT=`mktemp -d`
+mkdir $TESTING_ROOT/src1
+touch $TESTING_ROOT/src1/a
+mkdir $TESTING_ROOT/src2
+echo "2" >> $TESTING_ROOT/src1/.backup
+echo "2" >> $TESTING_ROOT/src2/.backup
+mkdir $TESTING_ROOT/src1/foo
+touch $TESTING_ROOT/src1/foo/a
+mkdir $TESTING_ROOT/src2/foo
+touch $TESTING_ROOT/src2/foo/a
+echo "1" >> $TESTING_ROOT/src1/foo/.backup
+echo "1" >> $TESTING_ROOT/src2/foo/.backup
+
+echo "$TESTING_ROOT/src1" >> $TESTING_ROOT/conf
+echo "$TESTING_ROOT/src2" >> $TESTING_ROOT/conf
+
+# Mount.
+SFS_DIR=$TESTING_ROOT/out
+mkdir $SFS_DIR
+$SFS_BINARY_DIR/sfs -check -balance -o config=$TESTING_ROOT/conf $SFS_DIR
+
+# Verify.
+EXIT_STATUS=0
+
+touch $SFS_DIR/b
+if [ ! -e $TESTING_ROOT/src1/b -o ! -e $TESTING_ROOT/src2/b ]; then
+ echo "Failure: Replication level failure when creating a new file."
+ EXIT_STATUS=1
+fi
+if [ ! -e $TESTING_ROOT/src1/a -o ! -e $TESTING_ROOT/src2/a ]; then
+ echo "Failure: Replication level not correctly increased upon
start-up."
+ EXIT_STATUS=1
+fi
+if [ -e $TESTING_ROOT/src1/foo/a -a -e $TESTING_ROOT/src2/foo/a ]; then
+ echo "Failure: Replication level not correctly decreased upon
start-up."
+ EXIT_STATUS=1
+fi
+
+# Clean up.
+fusermount -u $SFS_DIR
+rm -rf $TESTING_ROOT
+
+echo "Exit status: $EXIT_STATUS"
+exit $EXIT_STATUS

Reply all
Reply to author
Forward
0 new messages