1 files changed, 227 insertions, 0 deletions
diff --git a/src/liblzma/simple/arm64.c b/src/liblzma/simple/arm64.c
new file mode 100644
index 00000000..911e30c1
--- /dev/null
+++ b/src/liblzma/simple/arm64.c
@@ -0,0 +1,227 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       arm64.c
+/// \brief      Filter for ARM64 binaries
+///
+//  Authors:    Lasse Collin
+//              Jia Tan
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "simple_private.h"
+
+#ifdef HAVE_ENCODER_ARM64
+#	include "simple_encoder.h"
+#endif
+
+#ifdef HAVE_DECODER_ARM64
+#	include "simple_decoder.h"
+#endif
+
+
+// In ARM64, there are two main branch instructions.
+// bl - branch and link: Calls a function and stores the return address.
+// b - branch: Jumps to a location, but does not store a return address.
+//
+// After some benchmarking, it was determined that only the bl instruction
+// is beneficial for compression. A majority of the jumps for the b
+// instruction are very small (+/- 0xFF). These are typical for loops
+// and if-statements. Encoding them to their absolute address reduces
+// redundancy since many of the small relative jump values are repeated,
+// but very few of the absolute addresses are.
+//
+// Thus, only the bl instruction will be encoded and decoded.
+// The bl instruction is 32 bits in size. The highest 6 bits contain
+// the opcode (10 0101 == 0x25) and the remaining 26 bits are
+// the immediate value. The immediate is a signed integer that
+// encodes the target address as a multiple of four bytes so
+// the range is +/-128 MiB.
+
+// The 6-bit op code for the bl instruction in ARM64
+#define ARM64_BL_OPCODE 0x25
+
+// Once the 26-bit immediate is multiple by four, the address is 28 bits
+// with the two lowest bits being zero. This mask is used to clear the
+// unwanted bits.
+#define ADDR28_MASK 0x0FFFFFFCU
+
+
+typedef struct {
+	uint32_t sign_bit;
+	uint32_t sign_mask;
+} lzma_simple_arm64;
+
+
+static size_t
+arm64_code(void *simple_ptr, uint32_t now_pos, bool is_encoder,
+		uint8_t *buffer, size_t size)
+{
+	const lzma_simple_arm64 *simple = simple_ptr;
+	const uint32_t sign_bit = simple->sign_bit;
+	const uint32_t sign_mask = simple->sign_mask;
+
+	size_t i;
+	for (i = 0; i + 4 <= size; i += 4) {
+		if ((buffer[i + 3] >> 2) == ARM64_BL_OPCODE) {
+			// Get the relative 28-bit address from
+			// the 26-bit immediate.
+			uint32_t src = read32le(buffer + i);
+			src <<= 2;
+			src &= ADDR28_MASK;
+
+			if ((src & sign_mask) != 0
+					&& (src & sign_mask) != sign_mask)
+				continue;
+
+			// Some files like static libraries or Linux kernel
+			// modules have the immediate value filled with
+			// zeros. Converting these placeholder values would
+			// make compression worse so don't touch them.
+			if (src == 0)
+				continue;
+
+			const uint32_t pc = now_pos + (uint32_t)(i);
+
+			uint32_t dest;
+			if (is_encoder)
+				dest = pc + src;
+			else
+				dest = src - pc;
+
+			dest &= ADDR28_MASK;
+
+			// Sign-extend negative values or unset sign bits
+			// from positive values.
+			if (dest & sign_bit)
+				dest |= sign_mask;
+			else
+				dest &= ~sign_mask;
+
+			assert((dest & sign_mask) == 0
+					|| (dest & sign_mask) == sign_mask);
+
+			// Since also the decoder will ignore src values
+			// of 0, we must ensure that nothing is ever encoded
+			// to 0. This is achieved by encoding such values
+			// as pc instead. When decoding, pc will be first
+			// converted to 0 which we will catch here and fix.
+			if (dest == 0) {
+				// We cannot get here if pc is zero because
+				// then src would need to be zero too but we
+				// already ensured that src != 0.
+				assert((pc & ADDR28_MASK) != 0);
+				dest = is_encoder ? pc : 0U - pc;
+				dest &= ADDR28_MASK;
+
+				if (dest & sign_bit)
+					dest |= sign_mask;
+				else
+					dest &= ~sign_mask;
+			}
+
+			assert((dest & sign_mask) == 0
+					|| (dest & sign_mask) == sign_mask);
+			assert((dest & ~ADDR28_MASK) == 0);
+
+			// Construct and store the modified 32-bit instruction.
+			dest >>= 2;
+			dest |= (uint32_t)ARM64_BL_OPCODE << 26;
+			write32le(buffer + i, dest);
+		}
+	}
+
+	return i;
+}
+
+
+#ifdef HAVE_ENCODER_ARM64
+extern lzma_ret
+lzma_arm64_props_encode(const void *options, uint8_t *out)
+{
+	const lzma_options_arm64 *const opt = options;
+
+	if (opt->width < LZMA_ARM64_WIDTH_MIN
+			|| opt->width > LZMA_ARM64_WIDTH_MAX)
+		return LZMA_OPTIONS_ERROR;
+
+	out[0] = (uint8_t)(opt->width - LZMA_ARM64_WIDTH_MIN);
+	return LZMA_OK;
+}
+#endif
+
+
+#ifdef HAVE_DECODER_ARM64
+extern lzma_ret
+lzma_arm64_props_decode(void **options, const lzma_allocator *allocator,
+		const uint8_t *props, size_t props_size)
+{
+	if (props_size != 1)
+		return LZMA_OPTIONS_ERROR;
+
+	if (props[0] > LZMA_ARM64_WIDTH_MAX - LZMA_ARM64_WIDTH_MIN)
+		return LZMA_OPTIONS_ERROR;
+
+	lzma_options_arm64 *opt = lzma_alloc(sizeof(lzma_options_arm64),
+			allocator);
+	if (opt == NULL)
+		return LZMA_MEM_ERROR;
+
+	opt->width = props[0] + LZMA_ARM64_WIDTH_MIN;
+	*options = opt;
+	return LZMA_OK;
+
+}
+#endif
+
+
+static lzma_ret
+arm64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+		const lzma_filter_info *filters, bool is_encoder)
+{
+	if (filters[0].options == NULL)
+		return LZMA_PROG_ERROR;
+
+	const lzma_options_arm64 *opt = filters[0].options;
+	if (opt->width < LZMA_ARM64_WIDTH_MIN
+			|| opt->width > LZMA_ARM64_WIDTH_MAX)
+		return LZMA_OPTIONS_ERROR;
+
+	const lzma_ret ret = lzma_simple_coder_init(next, allocator, filters,
+			&arm64_code, sizeof(lzma_simple_arm64), 4, 4,
+			is_encoder, false);
+
+	if (ret == LZMA_OK) {
+		lzma_simple_coder *coder = next->coder;
+		lzma_simple_arm64 *simple = coder->simple;
+
+		simple->sign_bit = UINT32_C(1) << (opt->width - 1);
+		simple->sign_mask = (UINT32_C(1) << 28) - simple->sign_bit;
+	}
+
+	return ret;
+}
+
+
+#ifdef HAVE_ENCODER_ARM64
+extern lzma_ret
+lzma_simple_arm64_encoder_init(lzma_next_coder *next,
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters)
+{
+	return arm64_coder_init(next, allocator, filters, true);
+}
+#endif
+
+
+#ifdef HAVE_DECODER_ARM64
+extern lzma_ret
+lzma_simple_arm64_decoder_init(lzma_next_coder *next,
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters)
+{
+	return arm64_coder_init(next, allocator, filters, false);
+}
+#endif