Merge branch 'jn/5.0.0' into maint More fixes from the 5.0 branch: Fix decoding of LZMA2 streams with blocks with no uncompressed data. Taking this patch improves compatibility, while not taking this patch would give people writing encoders a way to notice their output is buggy. It's too easy not to check the edge cases when writing an encoder, so compatibility wins. Validate encoder arguments better. The checks that prevent NULL pointer dereferences are not very important, but checks that avoid producing malformed compressed output are crucial. Fix $0 parsing in xzgrep. xzegrep was treating its argument as a BRE, which is just asking for subtle trouble in scripts that call it. Thanks to Lasse Collin for advice.

commit: 487f5d593f8ffb2c5f1839b48e248372937c3291 [log] [tgz]
author: Jonathan Nieder <jrnieder@gmail.com> Sat Jun 23 02:40:18 2012 -0500
committer: Jonathan Nieder <jrnieder@gmail.com> Sat Jun 23 02:40:18 2012 -0500
tree: eca5bc55e28c46247c577a3b0c511da1b6742a5f
parent: 73bf3a24d62f74555f26cd245a7e9ceb717d0f69 [diff]
parent: 2e7bb37b64b1ba735c25d36d59a8c10aedc53f70 [diff]
diff --git a/debian/changelog b/debian/changelog
index 10eccbd..2dccf6a 100644
--- a/debian/changelog
+++ b/debian/changelog

@@ -5,6 +5,14 @@
       - lzma_easy_buffer_encode() and lzma_stream_buffer_encode()
         avoid writing Blocks with empty compressed data that xz and
         liblzma versions before 5.0.2 cannot read.
+      - The LZMA2 decoder skips Blocks with empty compressed data
+        instead of rejecting them.
+      - Validates encoder arguments better.  It is harder to segfault
+        or create a corrupt XZ file instead of receiving an error
+        when calling these functions:
+        - lzma_stream_buffer_encode() and lzma_block_buffer_encode()
+          reject unsupported integrity checks;
+        - lzma_block_encoder() checks for block == NULL.
       - Plugs a memory leak in lzma_stream_encoder().
       - lzma_index_init() returns NULL instead of segfaulting on
         allocation failure.
@@ -13,6 +21,9 @@
       reading and free()ing pointers from past the end of an on-stack
       array when one of the listed files has an unmeaningful Block
       header size.
+    * xzegrep and xzfgrep perform extended regex and fixed-string
+      matches, respectively.  (The previous behavior was to always
+      use basic regexes.)
     * The exit status from “xzdiff foo.xz bar.xz” reflects whether
       files differ.  Thanks to Peter Pallinger.  Closes: #635501.
     * xzgrep does not fail just because the decompressor has died
@@ -24,8 +35,8 @@
       extra 'N'.
   * debian/rules: "chmod +x tests/test_scripts.sh" for new xzdiff
     tests.
-  * debian/symbols: Bump minimal versions for lzma_easy_buffer_encode
-    and lzma_stream_buffer_encode.
+  * debian/symbols: Bump the minimal versions for LZMA2 encoder
+    functions that reject more bad arguments and skip empty blocks.
   * liblzma-dev: Install an appropriate library for static linking
     instead of the decompression-only version used to build xzdec.
     Thanks to Anton Tolchanov.  Closes: #673001.

diff --git a/debian/patches/decode-empty-blocks b/debian/patches/decode-empty-blocks
new file mode 100644
index 0000000..89fea1b
--- /dev/null
+++ b/debian/patches/decode-empty-blocks

@@ -0,0 +1,41 @@
+From: Lasse Collin <lasse.collin@tukaani.org>
+Date: Thu, 31 Mar 2011 11:54:48 +0300
+Subject: liblzma: Fix decoding of LZMA2 streams having no uncompressed data.
+
+The decoder considered empty LZMA2 streams to be corrupt.
+This shouldn't matter much with .xz files, because no encoder
+creates empty LZMA2 streams in .xz. This bug is more likely
+to cause problems in applications that use raw LZMA2 streams.
+---
+ src/liblzma/lzma/lzma2_decoder.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/src/liblzma/lzma/lzma2_decoder.c b/src/liblzma/lzma/lzma2_decoder.c
+index f38879ce..3e42575d 100644
+--- a/src/liblzma/lzma/lzma2_decoder.c
++++ b/src/liblzma/lzma/lzma2_decoder.c
+@@ -67,6 +67,10 @@ lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict,
+ 		const uint32_t control = in[*in_pos];
+ 		++*in_pos;
+ 
++		// End marker
++		if (control == 0x00)
++			return LZMA_STREAM_END;
++
+ 		if (control >= 0xE0 || control == 1) {
+ 			// Dictionary reset implies that next LZMA chunk has
+ 			// to set new properties.
+@@ -104,10 +108,6 @@ lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict,
+ 							&coder->options);
+ 			}
+ 		} else {
+-			// End marker
+-			if (control == 0x00)
+-				return LZMA_STREAM_END;
+-
+ 			// Invalid control values
+ 			if (control > 2)
+ 				return LZMA_DATA_ERROR;
+-- 
+1.7.11.rc3
+

diff --git a/debian/patches/decode-empty-blocks-test b/debian/patches/decode-empty-blocks-test
new file mode 100644
index 0000000..e97bb28
--- /dev/null
+++ b/debian/patches/decode-empty-blocks-test

@@ -0,0 +1,28 @@
+From: Lasse Collin <lasse.collin@tukaani.org>
+Date: Thu, 31 Mar 2011 12:22:55 +0300
+Subject: Tests: Add a new file to test empty LZMA2 streams.
+
+---
+ tests/files/README            |   4 ++++
+ tests/files/good-1-lzma2-5.xz | Bin 0 -> 52 bytes
+ 2 files changed, 4 insertions(+)
+ create mode 100644 tests/files/good-1-lzma2-5.xz
+
+diff --git a/tests/files/README b/tests/files/README
+index 108ce8fb..53950edb 100644
+--- a/tests/files/README
++++ b/tests/files/README
+@@ -87,6 +87,10 @@
+     uncompressed with dictionary reset, and third is LZMA with new
+     properties but without dictionary reset.
+ 
++    good-1-lzma2-5.xz has an empty LZMA2 stream with only the end of
++    payload marker. XZ Utils 5.0.1 and older incorrectly see this file
++    as corrupt.
++
+     good-1-3delta-lzma2.xz has three Delta filters and LZMA2.
+ 
+ 
+-- 
+1.7.11.rc3
+

diff --git a/debian/patches/encoder-api-checks b/debian/patches/encoder-api-checks
new file mode 100644
index 0000000..0ff5341
--- /dev/null
+++ b/debian/patches/encoder-api-checks

@@ -0,0 +1,91 @@
+From: Lasse Collin <lasse.collin@tukaani.org>
+Date: Mon, 11 Apr 2011 13:21:28 +0300
+Subject: liblzma: Validate encoder arguments better.
+
+The biggest problem was that the integrity check type
+wasn't validated, and e.g. lzma_easy_buffer_encode()
+would create a corrupt .xz Stream if given an unsupported
+Check ID. Luckily applications don't usually try to use
+an unsupport Check ID, so this bug is unlikely to cause
+many real-world problems.
+---
+ src/liblzma/common/block_buffer_encoder.c  | 18 ++++++++++++------
+ src/liblzma/common/block_encoder.c         |  5 +++++
+ src/liblzma/common/stream_buffer_encoder.c |  3 +++
+ 3 files changed, 20 insertions(+), 6 deletions(-)
+
+diff --git a/src/liblzma/common/block_buffer_encoder.c b/src/liblzma/common/block_buffer_encoder.c
+index a8f71c21..519c6a68 100644
+--- a/src/liblzma/common/block_buffer_encoder.c
++++ b/src/liblzma/common/block_buffer_encoder.c
+@@ -226,16 +226,23 @@ lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator,
+ 		const uint8_t *in, size_t in_size,
+ 		uint8_t *out, size_t *out_pos, size_t out_size)
+ {
+-	// Sanity checks
+-	if (block == NULL || block->filters == NULL
+-			|| (in == NULL && in_size != 0) || out == NULL
++	// Validate the arguments.
++	if (block == NULL || (in == NULL && in_size != 0) || out == NULL
+ 			|| out_pos == NULL || *out_pos > out_size)
+ 		return LZMA_PROG_ERROR;
+ 
+-	// Check the version field.
++	// The contents of the structure may depend on the version so
++	// check the version before validating the contents of *block.
+ 	if (block->version != 0)
+ 		return LZMA_OPTIONS_ERROR;
+ 
++	if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX
++			|| block->filters == NULL)
++		return LZMA_PROG_ERROR;
++
++	if (!lzma_check_is_supported(block->check))
++		return LZMA_UNSUPPORTED_CHECK;
++
+ 	// Size of a Block has to be a multiple of four, so limit the size
+ 	// here already. This way we don't need to check it again when adding
+ 	// Block Padding.
+@@ -243,8 +250,7 @@ lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator,
+ 
+ 	// Get the size of the Check field.
+ 	const size_t check_size = lzma_check_size(block->check);
+-	if (check_size == UINT32_MAX)
+-		return LZMA_PROG_ERROR;
++	assert(check_size != UINT32_MAX);
+ 
+ 	// Reserve space for the Check field.
+ 	if (out_size - *out_pos <= check_size)
+diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c
+index ca515235..b34c5013 100644
+--- a/src/liblzma/common/block_encoder.c
++++ b/src/liblzma/common/block_encoder.c
+@@ -161,6 +161,11 @@ lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+ {
+ 	lzma_next_coder_init(&lzma_block_encoder_init, next, allocator);
+ 
++	if (block == NULL)
++		return LZMA_PROG_ERROR;
++
++	// The contents of the structure may depend on the version so
++	// check the version first.
+ 	if (block->version != 0)
+ 		return LZMA_OPTIONS_ERROR;
+ 
+diff --git a/src/liblzma/common/stream_buffer_encoder.c b/src/liblzma/common/stream_buffer_encoder.c
+index 0542c30a..2450ee2e 100644
+--- a/src/liblzma/common/stream_buffer_encoder.c
++++ b/src/liblzma/common/stream_buffer_encoder.c
+@@ -51,6 +51,9 @@ lzma_stream_buffer_encode(lzma_filter *filters, lzma_check check,
+ 			|| out_pos_ptr == NULL || *out_pos_ptr > out_size)
+ 		return LZMA_PROG_ERROR;
+ 
++	if (!lzma_check_is_supported(check))
++		return LZMA_UNSUPPORTED_CHECK;
++
+ 	// Note for the paranoids: Index encoder prevents the Stream from
+ 	// getting too big and still being accepted with LZMA_OK, and Block
+ 	// encoder catches if the input is too big. So we don't need to
+-- 
+1.7.11.rc3
+

diff --git a/debian/patches/series b/debian/patches/series
index 958e893..fa7f036 100644
--- a/debian/patches/series
+++ b/debian/patches/series

@@ -2,8 +2,12 @@
 abi-chunk-size-func
 abi-debian-soname
 cs-sparse-file
+encoder-api-checks
+decode-empty-blocks
+decode-empty-blocks-test
 stream_encoder-init-leak
 encoder-skip-empty-blocks
+xzgrep-argv0-parsing
 index_init-NULL-dereference
 xz-lvv-invalid-free
 xz-lvv-invalid-free-test

diff --git a/debian/patches/xzgrep-argv0-parsing b/debian/patches/xzgrep-argv0-parsing
new file mode 100644
index 0000000..c3e2a58
--- /dev/null
+++ b/debian/patches/xzgrep-argv0-parsing

@@ -0,0 +1,36 @@
+From: Martin Väth <vaeth@mathematik.uni-wuerzburg.de>
+Date: Fri, 15 Apr 2011 04:54:49 -0400
+Subject: xzgrep: fix typo in $0 parsing
+
+Reported-by: Diego Elio Pettenò <flameeyes@gentoo.org>
+Signed-off-by: Martin Väth <vaeth@mathematik.uni-wuerzburg.de>
+Signed-off-by: Mike Frysinger <vapier@gentoo.org>
+---
+ src/scripts/xzgrep.in | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/scripts/xzgrep.in b/src/scripts/xzgrep.in
+index cbc6b238..dd945137 100644
+--- a/src/scripts/xzgrep.in
++++ b/src/scripts/xzgrep.in
+@@ -27,7 +27,7 @@
+ xz='@xz@ --format=auto'
+ unset GZIP BZIP BZIP2
+ 
+-case ${0##/*} in
++case ${0##*/} in
+   *egrep*) prog=xzegrep; grep=${GREP:-egrep};;
+   *fgrep*) prog=xzfgrep; grep=${GREP:-fgrep};;
+   *)       prog=xzgrep; grep=${GREP:-grep};;
+@@ -35,7 +35,7 @@ esac
+ 
+ version="$prog (@PACKAGE_NAME@) @VERSION@"
+ 
+-usage="Usage: ${0##/*} [OPTION]... [-e] PATTERN [FILE]...
++usage="Usage: ${0##*/} [OPTION]... [-e] PATTERN [FILE]...
+ Look for instances of PATTERN in the input FILEs, using their
+ uncompressed contents if they are compressed.
+ 
+-- 
+1.7.11.rc3
+

diff --git a/debian/source/include-binaries b/debian/source/include-binaries
index d4f7e04..bc7b083 100644
--- a/debian/source/include-binaries
+++ b/debian/source/include-binaries

@@ -1 +1,2 @@
 tests/files/bad-1-block_header-6.xz
+tests/files/good-1-lzma2-5.xz

diff --git a/debian/symbols b/debian/symbols
index 82322ba..1a7d6ea 100644
--- a/debian/symbols
+++ b/debian/symbols

@@ -5,10 +5,10 @@
  lzma_auto_decoder@Base 4.999.9beta
  lzma_block_buffer_bound@Base 4.999.9beta
  lzma_block_buffer_decode@Base 4.999.9beta
- lzma_block_buffer_encode@Base 4.999.9beta
+ lzma_block_buffer_encode@Base 5.0.0-2.1~
  lzma_block_compressed_size@Base 4.999.9beta+20100602
  lzma_block_decoder@Base 4.999.9beta
- lzma_block_encoder@Base 4.999.9beta
+ lzma_block_encoder@Base 5.0.0-2.1~
  lzma_block_header_decode@Base 4.999.9beta
  lzma_block_header_encode@Base 4.999.9beta
  lzma_block_header_size@Base 4.999.9beta

diff --git a/src/liblzma/common/block_buffer_encoder.c b/src/liblzma/common/block_buffer_encoder.c
index a8f71c2..519c6a6 100644
--- a/src/liblzma/common/block_buffer_encoder.c
+++ b/src/liblzma/common/block_buffer_encoder.c

@@ -226,16 +226,23 @@
 		const uint8_t *in, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 {
-	// Sanity checks
-	if (block == NULL || block->filters == NULL
-			|| (in == NULL && in_size != 0) || out == NULL
+	// Validate the arguments.
+	if (block == NULL || (in == NULL && in_size != 0) || out == NULL
 			|| out_pos == NULL || *out_pos > out_size)
 		return LZMA_PROG_ERROR;
 
-	// Check the version field.
+	// The contents of the structure may depend on the version so
+	// check the version before validating the contents of *block.
 	if (block->version != 0)
 		return LZMA_OPTIONS_ERROR;
 
+	if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX
+			|| block->filters == NULL)
+		return LZMA_PROG_ERROR;
+
+	if (!lzma_check_is_supported(block->check))
+		return LZMA_UNSUPPORTED_CHECK;
+
 	// Size of a Block has to be a multiple of four, so limit the size
 	// here already. This way we don't need to check it again when adding
 	// Block Padding.
@@ -243,8 +250,7 @@
 
 	// Get the size of the Check field.
 	const size_t check_size = lzma_check_size(block->check);
-	if (check_size == UINT32_MAX)
-		return LZMA_PROG_ERROR;
+	assert(check_size != UINT32_MAX);
 
 	// Reserve space for the Check field.
 	if (out_size - *out_pos <= check_size)

diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c
index ca51523..b34c501 100644
--- a/src/liblzma/common/block_encoder.c
+++ b/src/liblzma/common/block_encoder.c

@@ -161,6 +161,11 @@
 {
 	lzma_next_coder_init(&lzma_block_encoder_init, next, allocator);
 
+	if (block == NULL)
+		return LZMA_PROG_ERROR;
+
+	// The contents of the structure may depend on the version so
+	// check the version first.
 	if (block->version != 0)
 		return LZMA_OPTIONS_ERROR;
 

diff --git a/src/liblzma/common/stream_buffer_encoder.c b/src/liblzma/common/stream_buffer_encoder.c
index 0542c30..2450ee2 100644
--- a/src/liblzma/common/stream_buffer_encoder.c
+++ b/src/liblzma/common/stream_buffer_encoder.c

@@ -51,6 +51,9 @@
 			|| out_pos_ptr == NULL || *out_pos_ptr > out_size)
 		return LZMA_PROG_ERROR;
 
+	if (!lzma_check_is_supported(check))
+		return LZMA_UNSUPPORTED_CHECK;
+
 	// Note for the paranoids: Index encoder prevents the Stream from
 	// getting too big and still being accepted with LZMA_OK, and Block
 	// encoder catches if the input is too big. So we don't need to

diff --git a/src/liblzma/lzma/lzma2_decoder.c b/src/liblzma/lzma/lzma2_decoder.c
index f38879c..3e42575 100644
--- a/src/liblzma/lzma/lzma2_decoder.c
+++ b/src/liblzma/lzma/lzma2_decoder.c

@@ -67,6 +67,10 @@
 		const uint32_t control = in[*in_pos];
 		++*in_pos;
 
+		// End marker
+		if (control == 0x00)
+			return LZMA_STREAM_END;
+
 		if (control >= 0xE0 || control == 1) {
 			// Dictionary reset implies that next LZMA chunk has
 			// to set new properties.
@@ -104,10 +108,6 @@
 							&coder->options);
 			}
 		} else {
-			// End marker
-			if (control == 0x00)
-				return LZMA_STREAM_END;
-
 			// Invalid control values
 			if (control > 2)
 				return LZMA_DATA_ERROR;

diff --git a/src/scripts/xzgrep.in b/src/scripts/xzgrep.in
index cbc6b23..dd94513 100644
--- a/src/scripts/xzgrep.in
+++ b/src/scripts/xzgrep.in

@@ -27,7 +27,7 @@
 xz='@xz@ --format=auto'
 unset GZIP BZIP BZIP2
 
-case ${0##/*} in
+case ${0##*/} in
   *egrep*) prog=xzegrep; grep=${GREP:-egrep};;
   *fgrep*) prog=xzfgrep; grep=${GREP:-fgrep};;
   *)       prog=xzgrep; grep=${GREP:-grep};;
@@ -35,7 +35,7 @@
 
 version="$prog (@PACKAGE_NAME@) @VERSION@"
 
-usage="Usage: ${0##/*} [OPTION]... [-e] PATTERN [FILE]...
+usage="Usage: ${0##*/} [OPTION]... [-e] PATTERN [FILE]...
 Look for instances of PATTERN in the input FILEs, using their
 uncompressed contents if they are compressed.
 

diff --git a/tests/files/README b/tests/files/README
index 108ce8f..53950ed 100644
--- a/tests/files/README
+++ b/tests/files/README

@@ -87,6 +87,10 @@
     uncompressed with dictionary reset, and third is LZMA with new
     properties but without dictionary reset.
 
+    good-1-lzma2-5.xz has an empty LZMA2 stream with only the end of
+    payload marker. XZ Utils 5.0.1 and older incorrectly see this file
+    as corrupt.
+
     good-1-3delta-lzma2.xz has three Delta filters and LZMA2.
 
 

diff --git a/tests/files/good-1-lzma2-5.xz b/tests/files/good-1-lzma2-5.xz
new file mode 100644
index 0000000..339d1c3
--- /dev/null
+++ b/tests/files/good-1-lzma2-5.xz
Binary files differ
commit	487f5d593f8ffb2c5f1839b48e248372937c3291	[log] [tgz]
author	Jonathan Nieder <jrnieder@gmail.com>	Sat Jun 23 02:40:18 2012 -0500
committer	Jonathan Nieder <jrnieder@gmail.com>	Sat Jun 23 02:40:18 2012 -0500
tree	eca5bc55e28c46247c577a3b0c511da1b6742a5f
parent	73bf3a24d62f74555f26cd245a7e9ceb717d0f69 [diff]
parent	2e7bb37b64b1ba735c25d36d59a8c10aedc53f70 [diff]