[OE-core] [PATCH] libarchive: enable non-recursive extract/list

Patrick Ohly patrick.ohly at intel.com
Thu Dec 1 09:40:46 UTC 2016


Required for meta-swupd performance enhancements: in meta-swupd, the
so called "mega" image contains a rootfs with all files that can
potentially be installed on a device. Other virtual image recipes need
a subset of those files or directories, and a partial extraction from
a single tar archive is faster than letting all virtual image recipes
share access to a directory under a single pseudo instance.

It may be necessary to extract a directory with all of its attributes
without the content of the directory, hence this patch. Upstream
agreed to consider merging such a patch (see
https://groups.google.com/forum/#!topic/libarchive-discuss/JO3hqSaAVfs)
but has been slow in actually commenting on it, so for now it has
to be carried as distro patch.

Signed-off-by: Patrick Ohly <patrick.ohly at intel.com>
---
 .../files/non-recursive-extract-and-list.patch     | 153 +++++++++++++++++++++
 .../libarchive/libarchive_3.2.2.bb                 |   1 +
 2 files changed, 154 insertions(+)
 create mode 100644 meta/recipes-extended/libarchive/files/non-recursive-extract-and-list.patch

diff --git a/meta/recipes-extended/libarchive/files/non-recursive-extract-and-list.patch b/meta/recipes-extended/libarchive/files/non-recursive-extract-and-list.patch
new file mode 100644
index 0000000..61f8f3e
--- /dev/null
+++ b/meta/recipes-extended/libarchive/files/non-recursive-extract-and-list.patch
@@ -0,0 +1,153 @@
+From d6271709d2deb980804f92e75f9b5cb600dc42ed Mon Sep 17 00:00:00 2001
+From: Patrick Ohly <patrick.ohly at intel.com>
+Date: Mon, 24 Oct 2016 12:54:48 +0200
+Subject: [PATCH 1/2] non-recursive extract and list
+
+Sometimes it makes sense to extract or list a directory contained in
+an archive without also doing the same for the content of the
+directory, i.e. allowing -n (= --no-recursion) in combination with the
+x and t modes.
+
+bsdtar uses the match functionality in libarchive to track include
+matches. A new libarchive API call
+archive_match_include_directories_recursively() gets introduced to
+influence the matching behavior, with the default behavior as before.
+
+Non-recursive matching can be achieved by anchoring the path match at
+both start and end. Asking for a directory which itself isn't in the
+archive when in non-recursive mode is an error and handled by the
+existing mechanism for tracking unused inclusion entries.
+
+Upstream-Status: Submitted [https://github.com/libarchive/libarchive/pull/812]
+
+Signed-off-by: Patrick Ohly <patrick.ohly at intel.com>
+
+---
+ libarchive/archive.h       |  2 ++
+ libarchive/archive_match.c | 30 +++++++++++++++++++++++++++++-
+ tar/bsdtar.1               |  3 +--
+ tar/bsdtar.c               | 12 ++++++++++--
+ 4 files changed, 42 insertions(+), 5 deletions(-)
+
+diff --git a/libarchive/archive.h b/libarchive/archive.h
+index ff401e9..38d8746 100644
+--- a/libarchive/archive.h
++++ b/libarchive/archive.h
+@@ -1085,6 +1085,8 @@ __LA_DECL int	archive_match_excluded(struct archive *,
+  */
+ __LA_DECL int	archive_match_path_excluded(struct archive *,
+ 		    struct archive_entry *);
++/* Control recursive inclusion of directory content when directory is included. Default on. */
++__LA_DECL int	archive_match_include_directories_recursively(struct archive *, int _enabled);
+ /* Add exclusion pathname pattern. */
+ __LA_DECL int	archive_match_exclude_pattern(struct archive *, const char *);
+ __LA_DECL int	archive_match_exclude_pattern_w(struct archive *,
+diff --git a/libarchive/archive_match.c b/libarchive/archive_match.c
+index 0719cbd..6d03a65 100644
+--- a/libarchive/archive_match.c
++++ b/libarchive/archive_match.c
+@@ -93,6 +93,9 @@ struct archive_match {
+ 	/* exclusion/inclusion set flag. */
+ 	int			 setflag;
+ 
++	/* Recursively include directory content? */
++	int			 recursive_include;
++
+ 	/*
+ 	 * Matching filename patterns.
+ 	 */
+@@ -223,6 +226,7 @@ archive_match_new(void)
+ 		return (NULL);
+ 	a->archive.magic = ARCHIVE_MATCH_MAGIC;
+ 	a->archive.state = ARCHIVE_STATE_NEW;
++	a->recursive_include = 1;
+ 	match_list_init(&(a->inclusions));
+ 	match_list_init(&(a->exclusions));
+ 	__archive_rb_tree_init(&(a->exclusion_tree), &rb_ops_mbs);
+@@ -471,6 +475,28 @@ archive_match_path_excluded(struct archive *_a,
+ }
+ 
+ /*
++ * When recursive inclusion of directory content is enabled,
++ * an inclusion pattern that matches a directory will also
++ * include everything beneath that directory. Enabled by default.
++ *
++ * For compatibility with GNU tar, exclusion patterns always
++ * match if a subset of the full patch matches (i.e., they are
++ * are not rooted at the beginning of the path) and thus there
++ * is no corresponding non-recursive exclusion mode.
++ */
++int
++archive_match_include_directories_recursively(struct archive *_a, int _enabled)
++{
++	struct archive_match *a;
++
++	archive_check_magic(_a, ARCHIVE_MATCH_MAGIC,
++	    ARCHIVE_STATE_NEW, "archive_match_include_directories_recursively");
++	a = (struct archive_match *)_a;
++	a->recursive_include = _enabled;
++	return (ARCHIVE_OK);
++}
++
++/*
+  * Utilty functions to get statistic information for inclusion patterns.
+  */
+ int
+@@ -781,7 +807,9 @@ static int
+ match_path_inclusion(struct archive_match *a, struct match *m,
+     int mbs, const void *pn)
+ {
+-	int flag = PATHMATCH_NO_ANCHOR_END;
++	int flag = a->recursive_include ?
++		PATHMATCH_NO_ANCHOR_END : /* Prefix match is good enough. */
++		0; /* Full match required. */
+ 	int r;
+ 
+ 	if (mbs) {
+diff --git a/tar/bsdtar.1 b/tar/bsdtar.1
+index 9eadaaf..f5d6457 100644
+--- a/tar/bsdtar.1
++++ b/tar/bsdtar.1
+@@ -346,8 +346,7 @@ In extract or list modes, this option is ignored.
+ Do not extract modification time.
+ By default, the modification time is set to the time stored in the archive.
+ .It Fl n , Fl Fl norecurse , Fl Fl no-recursion
+-(c, r, u modes only)
+-Do not recursively archive the contents of directories.
++Do not recursively archive (c, r, u), extract (x) or list (t) the contents of directories.
+ .It Fl Fl newer Ar date
+ (c, r, u modes only)
+ Only include files and directories newer than the specified date.
+diff --git a/tar/bsdtar.c b/tar/bsdtar.c
+index 93bf60a..001d5ed 100644
+--- a/tar/bsdtar.c
++++ b/tar/bsdtar.c
+@@ -738,8 +738,6 @@ main(int argc, char **argv)
+ 			break;
+ 		}
+ 	}
+-	if (bsdtar->option_no_subdirs)
+-		only_mode(bsdtar, "-n", "cru");
+ 	if (bsdtar->option_stdout)
+ 		only_mode(bsdtar, "-O", "xt");
+ 	if (bsdtar->option_unlink_first)
+@@ -788,6 +786,16 @@ main(int argc, char **argv)
+ 		only_mode(bsdtar, buff, "cru");
+ 	}
+ 
++	/*
++	 * When creating an archive from a directory tree, the directory
++	 * walking code will already avoid entering directories when
++	 * recursive inclusion of directory content is disabled, therefore
++	 * changing the matching behavior has no effect for creation modes.
++	 * It is relevant for extraction or listing.
++	 */
++	archive_match_include_directories_recursively(bsdtar->matching,
++						      !bsdtar->option_no_subdirs);
++
+ 	/* Filename "-" implies stdio. */
+ 	if (strcmp(bsdtar->filename, "-") == 0)
+ 		bsdtar->filename = NULL;
+-- 
+2.1.4
+
diff --git a/meta/recipes-extended/libarchive/libarchive_3.2.2.bb b/meta/recipes-extended/libarchive/libarchive_3.2.2.bb
index cf66e9d..64e8983 100644
--- a/meta/recipes-extended/libarchive/libarchive_3.2.2.bb
+++ b/meta/recipes-extended/libarchive/libarchive_3.2.2.bb
@@ -32,6 +32,7 @@ PACKAGECONFIG[nettle] = "--with-nettle,--without-nettle,nettle,"
 PACKAGECONFIG[lz4] = "--with-lz4,--without-lz4,lz4,"
 
 SRC_URI = "http://libarchive.org/downloads/libarchive-${PV}.tar.gz \
+           file://non-recursive-extract-and-list.patch \
            "
 
 SRC_URI[md5sum] = "1ec00b7dcaf969dd2a5712f85f23c764"
-- 
2.1.4




More information about the Openembedded-core mailing list