From 107259e306bcfc2336a0fb870fb58034c28faa52 Mon Sep 17 00:00:00 2001
From: Lasse Collin <lasse.collin@tukaani.org>
Date: Sun, 20 Jan 2008 20:12:58 +0200
Subject: [PATCH] Fix alignment handling bugs in Subblock encoder.

This leaves one known alignment bug unfixed: If repeat count
doesn't fit into 28-bit integer, the encoder has to split
this to multiple Subblocks with Subblock Type `Repeating Data'.
The extra Subblocks may have wrong alignment. Correct alignment
is restored after the split Repeating Data has been completely
written out.

Since the encoder doesn't even try to fix the alignment unless
the size of Data is at least 4 bytes, to trigger this bug you
need at least 4 GiB of repeating data with sequence length of
4 or more bytes. Since the worst thing done by this bug is
misaligned data (no data corruption), this bug simply isn't
worth fixing, because a proper fix isn't simple.
---
 src/liblzma/subblock/subblock_encoder.c | 170 +++++++++++++++++-------
 1 file changed, 119 insertions(+), 51 deletions(-)

diff --git a/src/liblzma/subblock/subblock_encoder.c b/src/liblzma/subblock/subblock_encoder.c
index d033ea2..6fc420b 100644
--- a/src/liblzma/subblock/subblock_encoder.c
+++ b/src/liblzma/subblock/subblock_encoder.c
@@ -21,17 +21,27 @@
 #include "raw_encoder.h"
 
 
+/// Maximum number of repeats that a single Repeating Data can indicate.
+/// This is directly from the file format specification.
 #define REPEAT_COUNT_MAX (1U << 28)
 
-/// Number of bytes the data chunk being repeated must be before we care
-/// about alignment. This is somewhat arbitrary. It just doesn't make sense
-/// to waste bytes for alignment when the data chunk is very small.
-///
-/// TODO Rename and use this also for Subblock Data?
-#define RLE_MIN_SIZE_FOR_ALIGN 3
+/// Number of bytes the data chunk (not including the header part) must be
+/// before we care about alignment. This is somewhat arbitrary. It just
+/// doesn't make sense to waste bytes for alignment when the data chunk
+/// is very small.
+#define MIN_CHUNK_SIZE_FOR_ALIGN 4
+
+/// Number of bytes of the header part of Subblock Type `Data'. This is
+/// used as the `skew' argument for subblock_align().
+#define ALIGN_SKEW_DATA 4
+
+/// Like above but for Repeating Data.
+#define ALIGN_SKEW_REPEATING_DATA 5
 
+/// Writes one byte to output buffer and updates the alignment counter.
 #define write_byte(b) \
 do { \
+	assert(*out_pos < out_size); \
 	out[*out_pos] = b; \
 	++*out_pos; \
 	++coder->alignment.out_pos; \
@@ -77,10 +87,6 @@ struct lzma_coder_s {
 		/// LZMA_SUBBLOCK_ALIGNMENT_DEFAULT if options is NULL.
 		uint32_t multiple;
 
-		/// Number of input bytes that we have already read but
-		/// not yet started writing out.
-		uint32_t in_pending;
-
 		/// Number of input bytes which we have processed and started
 		/// writing out. 32-bit integer is enough since we care only
 		/// about the lowest bits when fixing alignment.
@@ -100,6 +106,12 @@ struct lzma_coder_s {
 
 		/// Allocated size of the buffer.
 		size_t limit;
+
+		/// Number of input bytes that we have already read but
+		/// not yet started writing out. This can be different
+		/// to `size' when using Subfilter. That's why we track
+		/// in_pending separately for RLE (see below).
+		uint32_t in_pending;
 	} subblock;
 
 	struct {
@@ -112,7 +124,10 @@ struct lzma_coder_s {
 
 		/// Number of times the first `size' bytes of buffer[]
 		/// will be repeated.
-		lzma_vli count;
+		uint64_t count;
+
+		/// Like subblock.in_pending above, but for RLE.
+		uint32_t in_pending;
 	} rle;
 
 	struct {
@@ -156,6 +171,7 @@ struct lzma_coder_s {
 
 	} subfilter;
 
+	/// Temporary buffer used when we are not the last filter in the chain.
 	struct {
 		size_t pos;
 		size_t size;
@@ -170,27 +186,28 @@ struct lzma_coder_s {
 /// a multiple of coder->alignment.multiple.
 static bool
 subblock_align(lzma_coder *coder, uint8_t *restrict out,
-		size_t *restrict out_pos, size_t out_size, uint32_t skew)
+		size_t *restrict out_pos, size_t out_size,
+		size_t chunk_size, uint32_t skew)
 {
 	assert(*out_pos < out_size);
 
-	const uint32_t target = coder->alignment.in_pos
-			% coder->alignment.multiple;
+	// Fix the alignment only if it makes sense at least a little.
+	if (chunk_size >= MIN_CHUNK_SIZE_FOR_ALIGN) {
+		const uint32_t target = coder->alignment.in_pos
+				% coder->alignment.multiple;
 
-	while ((coder->alignment.out_pos + skew)
-			% coder->alignment.multiple != target) {
-		// Zero indicates padding.
-		write_byte(0x00);
+		while ((coder->alignment.out_pos + skew)
+				% coder->alignment.multiple != target) {
+			// Zero indicates padding.
+			write_byte(0x00);
 
-		// Check if output buffer got full and indicate it to
-		// the caller.
-		if (*out_pos == out_size)
-			return true;
+			// Check if output buffer got full and indicate it to
+			// the caller.
+			if (*out_pos == out_size)
+				return true;
+		}
 	}
 
-	coder->alignment.in_pos += coder->alignment.in_pending;
-	coder->alignment.in_pending = 0;
-
 	// Output buffer is not full.
 	return false;
 }
@@ -245,10 +262,20 @@ subblock_rle_flush(lzma_coder *coder)
 		}
 	}
 
-	if (coder->rle.count > REPEAT_COUNT_MAX)
+	if (coder->rle.count == 1) {
+		// The buffer should be repeated only once. It is
+		// waste of space to use Repeating Data. Instead,
+		// write a regular Data Subblock. See SEQ_RLE_COUNT_0
+		// in subblock_buffer() for more info.
+		coder->tmp = coder->rle.size - 1;
+	} else if (coder->rle.count > REPEAT_COUNT_MAX) {
+		// There's so much to repeat that it doesn't fit into
+		// 28-bit integer. We will write two or more Subblocks
+		// of type Repeating Data.
 		coder->tmp = REPEAT_COUNT_MAX - 1;
-	else
+	} else {
 		coder->tmp = coder->rle.count - 1;
+	}
 
 	coder->sequence = SEQ_RLE_COUNT_0;
 
@@ -372,7 +399,7 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
 			assert(coder->subfilter.subcoder.code == NULL);
 
 			// No Subfilter is enabled, just copy the data as is.
-			coder->alignment.in_pending += bufcpy(
+			coder->subblock.in_pending += bufcpy(
 					in, in_pos, in_size,
 					coder->subblock.data,
 					&coder->subblock.size,
@@ -415,7 +442,7 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
 						? LZMA_FINISH : action);
 
 			const size_t in_used = *in_pos - in_start;
-			coder->alignment.in_pending += in_used;
+			coder->subblock.in_pending += in_used;
 			if (in_used > 0)
 				coder->subfilter.got_input = true;
 
@@ -527,16 +554,21 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
 							coder->rle.size);
 
 				// Test if coder->subblock.data is repeating.
+				// If coder->rle.count would overflow, we
+				// force flushing. Forced flushing shouldn't
+				// really happen in real-world situations.
 				const size_t count = coder->subblock.size
 						/ coder->rle.size;
-				if (is_repeating(coder->rle.buffer,
-						coder->rle.size,
-						coder->subblock.data, count)) {
-					if (LZMA_VLI_VALUE_MAX - count
-							< coder->rle.count)
-						return LZMA_PROG_ERROR;
-
+				if (UINT64_MAX - count > coder->rle.count
+						&& is_repeating(
+							coder->rle.buffer,
+							coder->rle.size,
+							coder->subblock.data,
+							count)) {
 					coder->rle.count += count;
+					coder->rle.in_pending += coder
+							->subblock.in_pending;
+					coder->subblock.in_pending = 0;
 					coder->subblock.size = 0;
 
 				} else if (coder->rle.count > 0) {
@@ -621,17 +653,42 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
 		break;
 
 	case SEQ_RLE_COUNT_0:
-		// Make the Data field properly aligned, but only if the data
-		// chunk to be repeated isn't extremely small. We have four
-		// bytes for Count and one byte for Size, thus the number five.
-		if (coder->rle.size >= RLE_MIN_SIZE_FOR_ALIGN
-				&& subblock_align(
-					coder, out, out_pos, out_size, 5))
-			return LZMA_OK;
-
 		assert(coder->rle.count > 0);
 
-		write_byte(0x30 | (coder->tmp & 0x0F));
+		if (coder->rle.count == 1) {
+			// The buffer should be repeated only once. Fix
+			// the alignment and write the first byte of
+			// Subblock Type `Data'.
+			if (subblock_align(coder, out, out_pos, out_size,
+					coder->rle.size, ALIGN_SKEW_DATA))
+				return LZMA_OK;
+
+			write_byte(0x20 | (coder->tmp & 0x0F));
+
+		} else {
+			// We have something to actually repeat, which should
+			// mean that it takes less space with run-length
+			// encoding.
+			if (subblock_align(coder, out, out_pos, out_size,
+						coder->rle.size,
+						ALIGN_SKEW_REPEATING_DATA))
+				return LZMA_OK;
+
+			write_byte(0x30 | (coder->tmp & 0x0F));
+		}
+
+		// NOTE: If we have to write more than one Repeating Data
+		// due to rle.count > REPEAT_COUNT_MAX, the subsequent
+		// Repeating Data Subblocks may get wrong alignment, because
+		// we add rle.in_pending to alignment.in_pos at once instead
+		// of adding only as much as this particular Repeating Data
+		// consumed input data. Correct alignment is always restored
+		// after all the required Repeating Data Subblocks have been
+		// written. This problem occurs in such a weird cases that
+		// it's not worth fixing.
+		coder->alignment.out_pos += coder->rle.size;
+		coder->alignment.in_pos += coder->rle.in_pending;
+		coder->rle.in_pending = 0;
 
 		coder->sequence = SEQ_RLE_COUNT_1;
 		break;
@@ -649,12 +706,18 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
 	case SEQ_RLE_COUNT_3:
 		write_byte(coder->tmp >> 20);
 
+		// Again, see if we are writing regular Data or Repeating Data.
+		// In the former case, we skip SEQ_RLE_SIZE.
+		if (coder->rle.count == 1)
+			coder->sequence = SEQ_RLE_DATA;
+		else
+			coder->sequence = SEQ_RLE_SIZE;
+
 		if (coder->rle.count > REPEAT_COUNT_MAX)
 			coder->rle.count -= REPEAT_COUNT_MAX;
 		else
 			coder->rle.count = 0;
 
-		coder->sequence = SEQ_RLE_SIZE;
 		break;
 
 	case SEQ_RLE_SIZE:
@@ -670,17 +733,20 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
 		if (coder->pos < coder->rle.size)
 			return LZMA_OK;
 
-		coder->alignment.out_pos += coder->rle.size;
-
 		coder->pos = 0;
 		coder->sequence = SEQ_FLUSH;
 		break;
 
 	case SEQ_DATA_SIZE_0:
 		// We need four bytes for the Size field.
-		if (subblock_align(coder, out, out_pos, out_size, 4))
+		if (subblock_align(coder, out, out_pos, out_size,
+				coder->subblock.size, ALIGN_SKEW_DATA))
 			return LZMA_OK;
 
+		coder->alignment.out_pos += coder->subblock.size;
+		coder->alignment.in_pos += coder->subblock.in_pending;
+		coder->subblock.in_pending = 0;
+
 		write_byte(0x20 | (coder->tmp & 0x0F));
 		coder->sequence = SEQ_DATA_SIZE_1;
 		break;
@@ -706,7 +772,6 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
 		if (coder->pos < coder->subblock.size)
 			return LZMA_OK;
 
-		coder->alignment.out_pos += coder->subblock.size;
 		coder->subblock.size = 0;
 		coder->pos = 0;
 		coder->sequence = SEQ_FLUSH;
@@ -714,7 +779,9 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
 
 	case SEQ_SUBFILTER_INIT: {
 		assert(coder->subblock.size == 0);
+		assert(coder->subblock.in_pending == 0);
 		assert(coder->rle.count == 0);
+		assert(coder->rle.in_pending == 0);
 		assert(coder->subfilter.mode == SUB_SET);
 		assert(coder->options != NULL);
 
@@ -884,11 +951,12 @@ lzma_subblock_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 			== LZMA_VLI_VALUE_UNKNOWN;
 	next->coder->pos = 0;
 
-	next->coder->alignment.in_pending = 0;
 	next->coder->alignment.in_pos = 0;
 	next->coder->alignment.out_pos = 0;
 	next->coder->subblock.size = 0;
+	next->coder->subblock.in_pending = 0;
 	next->coder->rle.count = 0;
+	next->coder->rle.in_pending = 0;
 	next->coder->subfilter.mode = SUB_NONE;
 	next->coder->subfilter.mode_locked = false;
 
-- 
2.39.2