Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

PERFORCE change 176831 for review

4 views
Skip to first unread message

Garrett Cooper

unread,
Apr 12, 2010, 8:30:54 AM4/12/10
to Perforce Change Reviews
http://p4web.freebsd.org/@@176831?ac=10

Change 176831 by gcooper@gcooper-bayonetta on 2010/04/12 12:30:36


Introducing ... unpack_file_to_fd!

A shortcut from the atypical unpack operation as extracting a single file from the beginning of a large tarfile is really braindead, so let's extract the file to disk and return a file descriptor instead :) (can't seem to find a file descriptor returning analog with libarchive... hrmmm...).

Overall it could potentially stand to be aligned with the exit code convention from unpack, but for now it works just fine.

bde@-ify headers and fields while I'm at it so things are properly ordered and aligned in memory a tad bit better.

Sidenote: we'll need to implement similar logic with info/perform.c with a small user-defined loop of all potential metadata files so we don't get killed off performance-wise when reading large tarballs like we are today with unpack.

Suggested-by: kientzle

Affected files ...

.. //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/add/perform.c#5 edit
.. //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/lib/file.c#11 edit
.. //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/lib/lib.h#5 edit

Differences ...

==== //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/add/perform.c#5 (text+ko) ====

@@ -60,23 +60,23 @@
static int
pkg_do(char *pkg)
{
+ struct stat sb;
Package Plist;
+ PackingList p;
char pkg_fullname[FILENAME_MAX];
char playpen[FILENAME_MAX];
+ char pre_script[FILENAME_MAX] = INSTALL_FNAME;
+ char post_script[FILENAME_MAX];
+ char pre_arg[FILENAME_MAX], post_arg[FILENAME_MAX];
+ char *conflict[2];
+ char **matched;
char *extract;
const char *where_to;
FILE *cfile;
int code;
- PackingList p;
- struct stat sb;
int inPlace, conflictsfound, errcode;
/* support for separate pre/post install scripts */
int new_m = 0;
- char pre_script[FILENAME_MAX] = INSTALL_FNAME;
- char post_script[FILENAME_MAX];
- char pre_arg[FILENAME_MAX], post_arg[FILENAME_MAX];
- char *conflict[2];
- char **matched;
int fd;

conflictsfound = 0;
@@ -107,7 +107,6 @@
warnx("unable to fetch '%s' by URL", pkg);
return 1;
}
- strcpy(pkg_fullname, pkg);
cfile = fopen(CONTENTS_FNAME, "r");
if (!cfile) {
warnx(
@@ -119,33 +118,38 @@
fclose(cfile);
}
else {
- strcpy(pkg_fullname, pkg); /*
- * Copy for sanity's sake,
- * could remove pkg_fullname
- */
- if (strcmp(pkg, "-")) {
- if (stat(pkg_fullname, &sb) == FAIL) {
- warnx("can't stat package file '%s'", pkg_fullname);
+
+ /*
+ * If TRUE: We have to extract the whole thing to disk because
+ * this could be our one and only shot to do so...
+ */
+ Boolean extract_whole_archive_from_stdin = FALSE;
+
+ if (strcmp(pkg, "-") == 0) {
+ extract_whole_archive_from_stdin = TRUE;
+ sb.st_size = 100000; /* Make up a plausible average size */
+ } else {
+ if (stat(pkg, &sb) == FAIL) {
+ warnx("can't stat package file '%s'", pkg);
goto bomb;
}
- extract = CONTENTS_FNAME;
- }
- else {
- extract = NULL;
- sb.st_size = 100000; /* Make up a plausible average size */
}
if (!(where_to = make_playpen(playpen, sb.st_size * 4)))
errx(1, "unable to make playpen for %lld bytes", (long long)sb.st_size * 4);
/* Since we can call ourselves recursively, keep notes on where we came from */
if (!getenv("_TOP"))
setenv("_TOP", where_to, 1);
- if (unpack(pkg_fullname, extract)) {
- warnx(
- "unable to extract table of contents file from '%s' - not a package?",
- pkg_fullname);
- goto bomb;
- }
- cfile = fopen(CONTENTS_FNAME, "r");
+ if (extract_whole_archive_from_stdin == TRUE) {
+ if (unpack(NULL, NULL) == 0)
+ cfile = fopen(CONTENTS_FNAME, "r");
+ else {
+ warnx("unable to extract table of contents file from '%s' "
+ "- not a package?", pkg);
+ goto bomb;
+ }
+ } else
+ cfile = unpack_file_to_fd(pkg, CONTENTS_FNAME);
+
if (!cfile) {
warnx(
"unable to open table of contents file '%s' - not a package?",
@@ -158,7 +162,7 @@
/* Extract directly rather than moving? Oh goodie! */
if (find_plist_option(&Plist, "extract-in-place")) {
if (Verbose)
- printf("Doing in-place extraction for %s\n", pkg_fullname);
+ printf("Doing in-place extraction for %s\n", pkg);
p = find_plist(&Plist, PLIST_CWD);
if (p) {
if (!isdir(p->name) && !Fake) {
@@ -174,9 +178,8 @@
inPlace = 1;
}
else {
- warnx(
- "no prefix specified in '%s' - this is a bad package!",
- pkg_fullname);
+ warnx("no prefix specified in '%s' - this is a bad "
+ "package!", pkg);
goto bomb;
}
}
@@ -192,7 +195,7 @@
"Please set your PKG_TMPDIR variable to point to a location with more\n"
"free space and try again", (long long)sb.st_size * 4);
warnx("not extracting %s\ninto %s, sorry!",
- pkg_fullname, where_to);
+ pkg, where_to);
goto bomb;
}

@@ -202,8 +205,8 @@

/* Finally unpack the whole mess. If extract is null we
already + did so so don't bother doing it again. */
- if (extract && unpack(pkg_fullname, NULL)) {
- warnx("unable to extract '%s'!", pkg_fullname);
+ if (extract && unpack(pkg, NULL)) {
+ warnx("unable to extract '%s'!", pkg);
goto bomb;
}
}
@@ -370,13 +373,13 @@
else if ((cp = fileGetURL(pkg, p->name, KeepPackage)) != NULL) {
if (Verbose)
printf("Finished loading %s via a URL\n", p->name);
- if (!fexists("+CONTENTS")) {
- warnx("autoloaded package %s has no +CONTENTS file?",
- p->name);
+ if (!fexists(CONTENTS_FNAME)) {
+ warnx("autoloaded package %s has no %s file?",
+ p->name, CONTENTS_FNAME);
if (!Force)
++code;
}
- else if (vsystem("(pwd; /bin/cat +CONTENTS) | %s %s %s %s -S", PkgAddCmd, Verbose ? "-v" : "", PrefixRecursive ? prefixArg : "", KeepPackage ? "-K" : "")) {
+ else if (vsystem("(pwd; /bin/cat " CONTENTS_FNAME ") | %s %s %s %s -S", PkgAddCmd, Verbose ? "-v" : "", PrefixRecursive ? prefixArg : "", KeepPackage ? "-K" : "")) {
warnx("pkg_add of dependency '%s' failed%s",
p->name, Force ? " (proceeding anyway)" : "!");
if (!Force)

==== //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/lib/file.c#11 (text+ko) ====

@@ -22,13 +22,13 @@
__FBSDID("$FreeBSD: src/usr.sbin/pkg_install/lib/file.c,v 1.70 2010/04/01 14:27:29 flz Exp $");

#include "lib.h"
+#include <sys/wait.h>
#include <archive.h>
#include <archive_entry.h>
#include <err.h>
#include <fnmatch.h>
#include <pwd.h>
#include <time.h>
-#include <sys/wait.h>

/* Quick check to see if a file exists */
Boolean
@@ -341,13 +341,116 @@
ARCHIVE_EXTRACT_TIME |ARCHIVE_EXTRACT_ACL | \
ARCHIVE_EXTRACT_FFLAGS|ARCHIVE_EXTRACT_XATTR)

-/* Unpack a tar file */
+/*
+ * Unpack a single file from a tar-file to a file descriptor; this is written
+ * like so as an optimization to abbreviate the extract to *open step, as well
+ * as to reduce the number of required steps needed when unpacking a tarball on
+ * disk, as the previous method employed with tar(1) used -q // --fast-read .
+ *
+ * Return NULL on failure, and non-NULL on success
+ *
+ * XXX (gcooper): this is currently implemented with FILE* / fopen(3) due to
+ * legacy issues that need to be sorted out over the next couple of weeks for
+ * 1) locking to function properly, and to gain the potential performance boost
+ * by using mmap(2), and read(2) (ugh).
+ *
+ * But first things first, we need a working solution with minimal changes;
+ * then we move on from there.
+ *
+ * [The return code info will eventually be...]
+ *
+ * Return -1 on failure, a value greater than 0 on success [in accordance with
+ * open(2)].
+ */
+FILE*
+unpack_file_to_fd(const char *pkg, const char *file)
+{
+ struct archive *archive;
+ struct archive_entry *archive_entry;
+ Boolean found_match = FALSE;
+
+ const char *entry_pathname = NULL;
+ const char *error = NULL;
+ const char *pkg_name_humanized;
+
+ FILE *fd = NULL;
+ /* int fd = -1; */
+ int r;
+
+ if (Verbose)
+ printf("%s: will extract %s from %s\n", __func__, file, pkg);
+
+ archive = archive_read_new();
+ archive_read_support_compression_all(archive);
+ archive_read_support_format_tar(archive);
+
+ /* The initial open failed */
+ if (archive_read_open_filename(archive, pkg,
+ ARCHIVE_DEFAULT_BYTES_PER_BLOCK) != ARCHIVE_OK) {
+
+ error = archive_error_string(archive);
+ warnx("%s: unable to open the package from %s: %s",
+ __func__, pkg_name_humanized, error);
+
+ }
+ else
+ while (error == NULL && found_match == FALSE &&
+ (r = archive_read_next_header(archive, &archive_entry)) ==
+ ARCHIVE_OK) {
+
+ entry_pathname = archive_entry_pathname(archive_entry);
+
+ if (strncmp(file, entry_pathname, PATH_MAX) == 0) {
+
+ /*
+ * Regardless of whether or not extract passes,
+ * we found our target file so let's exit
+ * quickly because the underlying issue is most
+ * likely unrecoverable.
+ */
+ found_match = TRUE;
+
+ r = archive_read_extract(archive, archive_entry,
+ EXTRACT_ARCHIVE_FLAGS);
+ if (r == ARCHIVE_OK) {
+ if (Verbose)
+ printf("X - %s\n",
+ entry_pathname);
+ fd = fopen(entry_pathname, "r");
+ } else {
+ error = archive_error_string(archive);
+ warnx("%s: extraction for %s failed: "
+ "%s", __func__, pkg_name_humanized,
+ error);
+ }
+
+ } else
+ if (Verbose)
+ printf("S - %s\n", entry_pathname);
+
+ }
+
+ archive_read_finish(archive);
+
+ return fd;
+
+}
+
+/*
+ * Unpack a tar file, or a subset of the contents.
+ *
+ * Return 0 on success, 1 on failure
+ *
+ * NOTE: the exit code is 0 / 1 so that this can be fed directly into exit
+ * when doing piped tar commands for copying hierarchies *hint*, *hint*.
+ */
int
unpack(const char *pkg, const char *file_expr)
{
struct archive *archive;
struct archive_entry *archive_entry;
Boolean extract_whole_archive = FALSE;
+ const char *entry_pathname = NULL;
const char *error = NULL;
const char *pkg_name_humanized;
int r;
@@ -355,19 +458,21 @@
if (file_expr == NULL || strcmp("*", file_expr) == 0)
extract_whole_archive = TRUE;

+ if (pkg == NULL || strcmp(pkg, "-") == 0)
+ pkg_name_humanized = "(stdin)";
+ else
+ pkg_name_humanized = pkg;
+
if (Verbose) {
if (extract_whole_archive)
- printf("%s: will extract whole archive\n", __func__);
+ printf("%s: %s - will extract whole archive\n",
+ pkg_name_humanized, __func__);
else
- printf("%s: will extract files that match: %s\n",
- __func__, file_expr);
+ printf("%s: %s - will extract files that match "
+ "expression: %s\n",
+ pkg_name_humanized, __func__, file_expr);
}

- if (pkg == NULL || strcmp(pkg, "-") == 0)
- pkg_name_humanized = "(stdin)";
- else
- pkg_name_humanized = pkg;
-
archive = archive_read_new();
archive_read_support_compression_all(archive);
archive_read_support_format_tar(archive);
@@ -386,29 +491,29 @@
(r = archive_read_next_header(archive, &archive_entry)) ==
ARCHIVE_OK) {

+ entry_pathname = archive_entry_pathname(archive_entry);
+
/* Let's extract the whole archive, or just a file. */
if (extract_whole_archive == TRUE ||
- (fnmatch(file_expr,
- archive_entry_pathname(archive_entry),
- FNM_PATHNAME) == 0)) {
+ (fnmatch(file_expr, entry_pathname,
+ FNM_PATHNAME)) == 0) {

r = archive_read_extract(archive, archive_entry,
EXTRACT_ARCHIVE_FLAGS);
- if (r != ARCHIVE_OK) {
+ if (r == ARCHIVE_OK) {
+ if (Verbose)
+ printf("X - %s\n",
+ entry_pathname);
+ } else {
error = archive_error_string(archive);
warnx("%s: extraction for %s failed: "
"%s", __func__, pkg_name_humanized,
error);
}
- if (Verbose) {
- printf("X - %s\n",
- archive_entry_pathname(archive_entry));
- }

- } else if (Verbose) {
- printf("S - %s\n",
- archive_entry_pathname(archive_entry));
- }
+ } else
+ if (Verbose)
+ printf("S - %s\n", entry_pathname);

}

==== //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/lib/lib.h#5 (text+ko) ====

@@ -188,6 +188,7 @@
void copy_hierarchy(const char *, const char *, Boolean);
int delete_hierarchy(const char *, Boolean, Boolean);
int unpack(const char *, const char *);
+FILE* unpack_file_to_fd(const char*, const char *);
void format_cmd(char *, int, const char *, const char *, const char *);

/* Msg */

Tim Kientzle

unread,
Apr 12, 2010, 11:56:11 PM4/12/10
to Garrett Cooper, Perforce Change Reviews
Garrett Cooper wrote:
> +FILE*
> +unpack_file_to_fd(const char *pkg, const char *file)
> +{
....

> + r = archive_read_extract(archive, archive_entry,
> + EXTRACT_ARCHIVE_FLAGS);
> + if (r == ARCHIVE_OK) {
> + if (Verbose)
> + printf("X - %s\n",
> + entry_pathname);
> + fd = fopen(entry_pathname, "r");

This is potentially race-prone. I would suggest instead that
you:

int fd = open(entry_pathname, O_RDWR | O_CREAT, ...)
archive_read_data_into_fd(a, fd);

Then rewind the fd and return it. archive_read_extract()
is handy but it's way overkill for this kind of simple
operation. I would also put in a simple verification
that the entry you found is a regular entry:
archive_entry_filetype(entry) == AE_IFREG


> + * NOTE: the exit code is 0 / 1 so that this can be fed directly into exit
> + * when doing piped tar commands for copying hierarchies *hint*, *hint*.

Why do you want to copy hierarchies?
Seems a waste of disk bandwidth. *hint* *hint* ;-)

> int
> unpack(const char *pkg, const char *file_expr)
> {

Since this is only called with file_expr=="*" and file_expr=="+*",
I don't think you need fnmatch(). You can get away with
a simple
int unpack(const char *pkg, int metadata_only)

and then use
if (!metadata_only || entry_pathname[0] == '+')

> + (fnmatch(file_expr, entry_pathname,
> + FNM_PATHNAME)) == 0) {
>

You might also consider passing in an fd to unpack and
using archive_read_open_fd() to attach an archive
handle to it instead of archive_read_open_filename().
(It doesn't make any performance difference, but it
does avoid races and opens the door to other interesting
games in the future.)


0 new messages