-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
Archive-name: tarmap0-is
Submitted-by:
onei...@gmail.com
Last-modified: 2011-11-05 +00:00
Copyright-Notice: both the README and the code are under a BSD-style license
README.tarmap0-is -*- Text -*-
Synopsis
$ tarmap0-is COMMAND [ARGUMENTS...] < ARCHIVE.tar
Requirements
Perl 5, Archive::Tar, Linux::PipeMagic.
Description
tarmap0-is allows one to execute an arbitrary program for each
of the files contained within a Tar archive. E. g., one may
extract all the such files, individually compressing each of
them with gzip(1) or xz(1), without having to actually unpack
the archive first. Or, one may apply sha1sum(1) to compute
their respective SHA-1 hashes. Or it may be wc(1), etc.
While the filter program specified is executed, its stdin is a
pipe through which the archive member's contents is piped; its
stdout is the stdout of the tarmap0-is process itself. The
environment of the filter will contain a FILE variable, holding
the filename of the Tar archive member being processed.
The filter command given is passed to execvp (), as for
find(1)'s -exec. No Shell special character processing is done.
If necessary, Shell could be specified explicitly, as shown
below.
Examples
$ tarmap0-is bash -c 'xz -9c > "$FILE".xz'
$ sha1sum -- /bin/bash /bin/dash
add19e504c254758f2ea8dcda3821c77fafb4923 /bin/bash
7f123974c1814d026a26e79981453881efb49916 /bin/dash
$ tar -c -- /bin/bash /bin/dash | tarmap0-is sha1sum
tar: Removing leading `/' from member names
bin/bash
add19e504c254758f2ea8dcda3821c77fafb4923 -
bin/dash
7f123974c1814d026a26e79981453881efb49916 -
$
(Please note that the filenames in the last example's output are
the stderr of the tarmap0-is process.)
Bugs
Should fall back to sysread (), syswrite () (as per perlfunc(3))
if the system it runs on isn't capable of syssplice(2).
sysread () logic may be flawed.
pax_global_header is treated like an ordinary file.
A split(1)-like tool for Tar-archives (i. e., performing a split
along the members' boundaries) would share a lot of its logic
with tarmap0-is. Moreover, split(1) (as of GNU coreutils 8.13)
has already got a --filter= option. Therefore, it may be
feasible to re-use the tarmap0-is code as the basis of a more
generic tarsplit(1) tool.
Command line interface is almost non-existent. It should at
least support POSIX options terminator (--) and GNU --help and
--version options.
README.tarmap0-is ends here
#!/usr/bin/perl -w
###
tarmap-2011-11-05.pl --- $ split --filter=, tar(1)-wise -*- Perl -*-
### Copyright (C) 2011 Ivan Shmakov
##
## Permission to copy this software, to modify it, to redistribute it,
## to distribute modified versions, and to use it for any purpose is
## granted, subject to the following restrictions and understandings.
##
## 1. Any copy made of this software must include this copyright notice
## in full.
##
## 2. I have made no warranty or representation that the operation of
## this software will be error-free, and I am under no obligation to
## provide any services, by way of maintenance, update, or
## otherwise.
##
## 3. In conjunction with products arising from the use of this
## material, there shall be no use of my name in any advertising,
## promotional, or sales literature without prior written consent in
## each case.
### Code:
# use Data::Dump qw (pp);
use Linux::PipeMagic qw (syssplice);
require Archive::Tar::File;
require IO::Handle;
sub process_chunk {
my ($in, $size, $cmd, @args) = @_;
open (my $pipe, "|-", $cmd, @args)
or die ("Cannot execute command",
": ", $!);
binmode ($pipe, ":raw")
or die ();
## FIXME: fall back to sysread (), syswrite () if no syssplice ()
for (my $rest = $size; $rest > 0; ) {
my $wr
= syssplice ($in, $pipe, $rest, 0);
defined ($wr)
or die ("syssplice () failed",
": ", $!);
$wr > 0
or die ("syssplice () resulted in a short I/O",
" (${wr} vs. ${rest})",
": ", $!);
$rest
-= $wr;
}
close ($pipe);
## .
return 1;
}
sub blkread_or_die {
my ($in, $buffer, $size) = @_;
## NB: assuming octet (:raw; no-:utf8, no-:encoding) mode
my $rd
= sysread ($in, $$buffer, $size);
defined ($rd)
or die ("Read error",
": ", $!);
($rd == $size || ($rd == 0 && eof ($in)))
or die ("sysread () resulted in a short read",
" (${rd} vs. ${size})",
": ", $!);
## .
return $rd;
}
## FIXME: provide command-line interface comparable to GNU's split(1)
my $in
= new IO::Handle->new_from_fd (fileno (STDIN), "r");
binmode ($in, ":raw")
or die ();
(1 + $#ARGV > 0)
or die ("Usage: tarmap COMMAND [ARGS...]");
my @cmd
= @ARGV;
## normally, an all-zero block ends the archive
my $ignore_all_zero_p
= 1;
## standard Tar block size
my $block_sz
= 512;
my $all_zero
= ("\000" x $block_sz);
my $seen_all_zero
= 0;
## NB: using sysread () to avoid buffering for syssplice () later
my ($rd, $buf);
while (($rd = blkread_or_die ($in, \$buf, $block_sz)) > 0) {
if ($buf eq $all_zero) {
$seen_all_zero = 1;
if ($ignore_all_zero_p) {
next;
}
last;
}
# print STDERR (q ($buf => ), pp ($buf), "\n");
my $head
= Archive::Tar::File->new (chunk => $buf)
or die ("Cannot create an Archive::Tar::File object");
my $filename
= $head->full_path ();
my $size
= $head->size ();
($size >= 0)
or die ("Negative size of a file in the Tar archive");
unless ($size > 0) {
printf STDERR ("%s: no associated content; ignored\n",
$filename);
## .
next;
}
print STDERR ($filename, "\n");
## NB: like split(1)'s --filter=
{
local $ENV{"FILE"}
= $filename;
process_chunk ($in, $size, @cmd)
or die ("Failed to process chunk");
}
## drop the rest
my $tail;
blkread_or_die ($in, \$tail,
(($block_sz - $size) % $block_sz));
}
if (eof ($in)) {
($seen_all_zero || $ignore_all_zero_p)
or warn ("Unexpected EOF");
}
### Emacs trailer
## Local variables:
## indent-tabs-mode: nil
## fill-column: 72
## End:
###
tarmap-2011-11-05.pl ends here
- --
FSF associate member #7257
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.10 (GNU/Linux)
iQIcBAEBAgAGBQJOtV1vAAoJEOhSCjIbp0KhTgkP/3oRnNyhPW5MGxBmL0d33O8n
v1gTQiT797eQNsYf9vtmA43RB1a8aVIoy2ULcNqg1oM9JkXkO8FQnFxHUibQx0gM
qyn2NiIK/NkVEbJpGp0RPFZloCJJ6YUe64Lc6rwr9IZwsywSv+CgcTowYllJ2h0J
8PNUTORtcekcbZGbR/91CnM/d7Y/Ybp8ScGFhzrPrBHWTbLmJycTrYXLOeoMuvj+
ODesNDXE8ok4QhsjzqPst9RbUAUQyai9p25L5nRgpJoDrJcXcU8otpdRldXaBoKe
vIY4g5zurQQjRyFCuk+5TypzqLrtV5nbe5UGQWSM930UGZksWmv5N+0t006nJv/j
+9jfmJHSqApRdX3P0GSnkR85aQj4egghgl+XiNUMiAmsL+OPibiXxOS2w4XUC6Yf
WRoegqYAr+zAea24Ffsk1zk+SAt5SwE7BuHomkkdcTKAbNeTuuxTgFre7q4eZvhQ
NItbe23tv5wVjXmsJsJWd9KrUIxVnNKnoQSeQqVqk0lxoqcjA6Y8Gfn6f0uwrmNO
t/LCVfQgSxVmGes49mkKJpvHZmUiuWLGm8+mwRhCnC962cz6d0JIDc/ERV1oLYQp
T3zIDM1sSpjVxXifm+MS73F5BP/PjrdEHWdPBlsZsucSZo0t9rgqVIxmooJMNAh0
ByS6pYJOVDfDgaK8+Xfr
=aPNu
-----END PGP SIGNATURE-----