[RFC Utils] mog2ficlone: reflink a local MogileFS instance to a CoW FS

9 views
Skip to first unread message

Eric Wong

unread,
Aug 15, 2020, 3:45:10 PM8/15/20
to mog...@googlegroups.com
Not a really serious patch, but maybe it's of use to someone
in a similar situation as me...

This depends on several things being true:

* All MogileFS devices are on the same copy-on-write FS
(e.g btrfs(5)) as the output on a Linux system.

* Key name structure resembles a filesystem (e.g. "foo/bar")

* Keys don't conflict with "directory names" of other keys

* Inline::C and C compiler are installed along with Linux
development headers (for <linux/fs.h>)
---
Fwiw, I'm pretty happy with btrfs in raid1(c[34]) mode being
able to mix HDDs of wildly different sizes. I'll likely be
reducing my use of MogileFS as time goes on. I haven't needed
multiple hosts in years, and can't imagine needing multiple
hosts again given current HDD capacities.

MANIFEST | 1 +
Makefile.PL | 2 +-
mog2ficlone | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 224 insertions(+), 1 deletion(-)
create mode 100755 mog2ficlone

diff --git a/MANIFEST b/MANIFEST
index 071935c..7803457 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -14,3 +14,4 @@ moglistfids
moglistkeys
mogfiledebug
mogrename
+mog2ficlone
diff --git a/Makefile.PL b/Makefile.PL
index b87a073..6c91eac 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -10,7 +10,7 @@ WriteMakefile(
ABSTRACT => 'MogileFS utilities',
EXE_FILES => ['mogtool', 'mogadm', 'mogstats',
'mogupload', 'mogfetch', 'mogdelete', 'mogfileinfo', 'moglistkeys',
- 'moglistfids', 'mogfiledebug', 'mogrename',
+ 'moglistfids', 'mogfiledebug', 'mogrename', 'mog2ficlone'
],
PREREQ_PM => {
'LWP::Simple' => 0,
diff --git a/mog2ficlone b/mog2ficlone
new file mode 100755
index 0000000..06bd9d5
--- /dev/null
+++ b/mog2ficlone
@@ -0,0 +1,222 @@
+#!/usr/bin/perl -w
+=head1 NAME
+
+mog2ficlone -- reflink a localhost MogileFS instance to a CoW filesystem
+
+=head1 SYNOPSIS
+
+ $ mog2ficlone --docroot=/path/to/devs --output=/path/to/out --uri-decode
+
+=head1 DESCRIPTION
+
+Designed for MogileFS instances where all keys are present on a single
+copy-on-write-filesystem (e.g. L<btrfs(5)>, it can extract the keys to
+that filesystem via reflinks to avoid data copy overhead.
+
+This assumes your MogileFS file keys are structured like filesystem
+paths (e.g. "foo/bar") and there are no directory/file conflicts
+(see L</BUGS> below). It requires the MogileFS instance stores all
+relevant files on the same filesystem as the desired C<--output>.
+
+It uses L<Inline::C(3pm)> module and depends on the Linux-specific
+L<ioctl_ficlone(2)>.
+
+=head1 OPTIONS
+
+=over
+
+=item --trackers=host1:7001,host2:7001
+
+Use these MogileFS trackers to negotiate with.
+
+=item --domain=<domain>
+
+Set the MogileFS domain to use.
+
+=item --docroot=<path>
+
+Filesystem path name for device mount points (same as mogstored).
+
+=item --output=<PATH>
+
+Filesystem path name for destination directory
+
+=item --uri-decode
+
+Perform URI decoding on keys when converting to filesystem pathnames
+
+=item --key_prefix="/foo/bar/"
+
+Limit extraction to keys starting with this prefix. Can be an arbitrary string.
+
+=item --jobs=<NUM>
+
+Control parallelization (default: 1)
+
+=item --keep
+
+Keep existing files in C<--output=> if they exist. By default,
+existing files in C<--output=> are unlinked.
+
+=back
+
+=head1 AUTHOR
+
+Eric Wong E<lt>L<e...@80x24.org>E<gt>
+
+=head1 BUGS
+
+Directory/file conflicts not handled, that is file keys must not resemble
+a "directory" of another key.
+
+=head1 LICENSE
+
+Licensed for use and redistribution under the same terms as Perl itself.
+
+=cut
+
+use v5.10.1;
+use strict;
+use MogileFS::Utils;
+use Data::Dumper;
+use IO::Handle; # ->autoflush
+use POSIX qw(WNOHANG);
+$Data::Dumper::Useqq = 1;
+my $util = MogileFS::Utils->new;
+my $usage =
+"--docroot=dir --output=dir --trackers=hosts --domain=foo --key_prefix='bar/'";
+my $c = $util->getopts($usage, qw(
+ docroot=s keep output=s key_prefix=s jobs=i uri-decode));
+BEGIN {
+ use File::Path qw(mkpath);
+ my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //= (
+ $ENV{XDG_CACHE_HOME} //
+ ( ($ENV{HOME} // '/nonexistent').'/.cache' )
+ ).'/mog2ficlone/inline-c';
+ unless (-d $inline_dir) {
+ mkpath($inline_dir) or die "mkpath $inline_dir: $!\n";
+ }
+}
+use Inline C => <<EOF;
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+
+int ficlone(int dest_fd, int src_fd)
+{
+ return ioctl(dest_fd, FICLONE, src_fd);
+}
+EOF
+
+my $docroot = $c->{docroot};
+die "--docroot not given\n" unless defined($docroot);
+die "--docroot not a directory\n" unless -d $docroot;
+my $output = $c->{output};
+die "--output not given\n" unless defined($c->{output});
+$output =~ s!/+\z!!;
+my $uri_decode = $c->{'uri-decode'};
+my $jobs = $c->{jobs} // 1;
+my $err = 0;
+my $keep = $c->{keep};
+my %pids; # PID => $w (pipe writer)
+my $chld_handler = $SIG{CHLD} = sub {
+ my ($sig) = @_;
+ my $flags = $sig ? WNOHANG : 0;
+ while (scalar keys %pids) {
+ my $pid = waitpid(-1, $flags) or return;
+ return if $pid < 0;
+ if (delete $pids{$pid}) {
+ next if $? == 0;
+ warn "E: PID:$pid exited with \$?=$?\n";
+ $err = 1;
+ exit($err); # kill the rest?
+ } else {
+ warn "W: reaped unknown PID:$pid \$?=$?\n";
+ }
+ }
+};
+
+for my $j (1..$jobs) {
+ pipe(my ($r, $w)) or die "pipe: $!";
+ defined(my $pid = fork) or die "fork: $!";
+ if ($pid == 0) {
+ close $w;
+ %pids = ();
+ my $mogc = $util->client;
+ local $/ = "\0";
+ while (my $k = <$r>) {
+ chomp $k;
+ my $dir = $k;
+ if ($uri_decode) {
+ $dir =~ s/%([a-f0-9]{2})/pack('C', hex($1))/egi;
+ }
+ $dir =~ s!([^/]+)\z!!;
+ my $base = $1;
+ $dir = "$output/$dir";
+ my $dst = $dir.$base;
+ my $exists = -e $dst;
+ if ($exists) {
+ next if $keep;
+ my @st = stat(_);
+ $exists = [ $st[8], $st[9] ]; # atime, mtime
+ }
+ my $info = $mogc->file_info($k, { devices => 1 });
+ unless ($info) {
+ warn "E: $k no info\n";
+ next;
+ }
+ my $fid = $info->{fid} // die "E: key=$k no {fid}\n";
+ $info->{devids} // die "E: key=$k no {devids}\n";
+ my (@devs) = split(/\s*,\s*/, $info->{devids});
+ if (!-d $dir && !mkpath($dir)) {
+ # mkpath may race with other processes
+ die "mkpath: $dir: $!" if !-d $dir;
+ }
+ my $nfid = sprintf('%010u', $fid);
+ $nfid =~ /\A(\d)(\d{3})(\d{3})(?:\d{3})\z/ or
+ die "$k = $fid not handled\n";
+ my $fn = "$1/$2/$3/$nfid.fid";
+ my $ok = 0;
+ for my $dev (@devs) {
+ my $src = "$docroot/dev$dev/$fn";
+ open my $sh, '<', $src or next;
+ if ($exists) {
+ unlink($dst) or die "unlink: $dst: $!";
+ }
+ open my $dh, '>', $dst or die "open: >$dst: $!";
+ if (ficlone(fileno($dh), fileno($sh)) < 0) {
+ die "E: $src => $dst: $!\n";
+ }
+ if ($exists) {
+ utime(@$exists, $dh) or
+ die "utime: $dst: $!";
+ }
+ say "OK $src => $dst";
+ ++$ok;
+ last;
+ }
+ $ok or warn "W: $k (FID=$fid) not ready\n";
+ }
+ exit(0);
+ } else {
+ close $r;
+ $pids{$pid} = $w;
+ $w->autoflush(1);
+ }
+}
+
+my @pipes = values %pids;
+my $mogc = $util->client;
+my $nr = 0;
+$mogc->foreach_key(prefix => $c->{key_prefix}, sub {
+ my ($key) = @_;
+ if (index($key, "\0") >= 0) {
+ warn "key contains illegal character: ", Dumper($key), "\n";
+ next;
+ }
+ print { $pipes[$nr++ % scalar(@pipes)] } "$key\0" or die "print: $!\n";
+});
+
+$SIG{CHLD} = 'DEFAULT';
+close($_) foreach @pipes;
+$chld_handler->();
+exit($err);
Reply all
Reply to author
Forward
0 new messages