diff options
Diffstat (limited to '')
-rw-r--r-- | src/liblzma/simple/arm64.c | 227 |
1 files changed, 227 insertions, 0 deletions
diff --git a/src/liblzma/simple/arm64.c b/src/liblzma/simple/arm64.c new file mode 100644 index 00000000..911e30c1 --- /dev/null +++ b/src/liblzma/simple/arm64.c @@ -0,0 +1,227 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file arm64.c +/// \brief Filter for ARM64 binaries +/// +// Authors: Lasse Collin +// Jia Tan +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + +#ifdef HAVE_ENCODER_ARM64 +# include "simple_encoder.h" +#endif + +#ifdef HAVE_DECODER_ARM64 +# include "simple_decoder.h" +#endif + + +// In ARM64, there are two main branch instructions. +// bl - branch and link: Calls a function and stores the return address. +// b - branch: Jumps to a location, but does not store a return address. +// +// After some benchmarking, it was determined that only the bl instruction +// is beneficial for compression. A majority of the jumps for the b +// instruction are very small (+/- 0xFF). These are typical for loops +// and if-statements. Encoding them to their absolute address reduces +// redundancy since many of the small relative jump values are repeated, +// but very few of the absolute addresses are. +// +// Thus, only the bl instruction will be encoded and decoded. +// The bl instruction is 32 bits in size. The highest 6 bits contain +// the opcode (10 0101 == 0x25) and the remaining 26 bits are +// the immediate value. The immediate is a signed integer that +// encodes the target address as a multiple of four bytes so +// the range is +/-128 MiB. + +// The 6-bit op code for the bl instruction in ARM64 +#define ARM64_BL_OPCODE 0x25 + +// Once the 26-bit immediate is multiple by four, the address is 28 bits +// with the two lowest bits being zero. This mask is used to clear the +// unwanted bits. +#define ADDR28_MASK 0x0FFFFFFCU + + +typedef struct { + uint32_t sign_bit; + uint32_t sign_mask; +} lzma_simple_arm64; + + +static size_t +arm64_code(void *simple_ptr, uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + const lzma_simple_arm64 *simple = simple_ptr; + const uint32_t sign_bit = simple->sign_bit; + const uint32_t sign_mask = simple->sign_mask; + + size_t i; + for (i = 0; i + 4 <= size; i += 4) { + if ((buffer[i + 3] >> 2) == ARM64_BL_OPCODE) { + // Get the relative 28-bit address from + // the 26-bit immediate. + uint32_t src = read32le(buffer + i); + src <<= 2; + src &= ADDR28_MASK; + + if ((src & sign_mask) != 0 + && (src & sign_mask) != sign_mask) + continue; + + // Some files like static libraries or Linux kernel + // modules have the immediate value filled with + // zeros. Converting these placeholder values would + // make compression worse so don't touch them. + if (src == 0) + continue; + + const uint32_t pc = now_pos + (uint32_t)(i); + + uint32_t dest; + if (is_encoder) + dest = pc + src; + else + dest = src - pc; + + dest &= ADDR28_MASK; + + // Sign-extend negative values or unset sign bits + // from positive values. + if (dest & sign_bit) + dest |= sign_mask; + else + dest &= ~sign_mask; + + assert((dest & sign_mask) == 0 + || (dest & sign_mask) == sign_mask); + + // Since also the decoder will ignore src values + // of 0, we must ensure that nothing is ever encoded + // to 0. This is achieved by encoding such values + // as pc instead. When decoding, pc will be first + // converted to 0 which we will catch here and fix. + if (dest == 0) { + // We cannot get here if pc is zero because + // then src would need to be zero too but we + // already ensured that src != 0. + assert((pc & ADDR28_MASK) != 0); + dest = is_encoder ? pc : 0U - pc; + dest &= ADDR28_MASK; + + if (dest & sign_bit) + dest |= sign_mask; + else + dest &= ~sign_mask; + } + + assert((dest & sign_mask) == 0 + || (dest & sign_mask) == sign_mask); + assert((dest & ~ADDR28_MASK) == 0); + + // Construct and store the modified 32-bit instruction. + dest >>= 2; + dest |= (uint32_t)ARM64_BL_OPCODE << 26; + write32le(buffer + i, dest); + } + } + + return i; +} + + +#ifdef HAVE_ENCODER_ARM64 +extern lzma_ret +lzma_arm64_props_encode(const void *options, uint8_t *out) +{ + const lzma_options_arm64 *const opt = options; + + if (opt->width < LZMA_ARM64_WIDTH_MIN + || opt->width > LZMA_ARM64_WIDTH_MAX) + return LZMA_OPTIONS_ERROR; + + out[0] = (uint8_t)(opt->width - LZMA_ARM64_WIDTH_MIN); + return LZMA_OK; +} +#endif + + +#ifdef HAVE_DECODER_ARM64 +extern lzma_ret +lzma_arm64_props_decode(void **options, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 1) + return LZMA_OPTIONS_ERROR; + + if (props[0] > LZMA_ARM64_WIDTH_MAX - LZMA_ARM64_WIDTH_MIN) + return LZMA_OPTIONS_ERROR; + + lzma_options_arm64 *opt = lzma_alloc(sizeof(lzma_options_arm64), + allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + opt->width = props[0] + LZMA_ARM64_WIDTH_MIN; + *options = opt; + return LZMA_OK; + +} +#endif + + +static lzma_ret +arm64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + if (filters[0].options == NULL) + return LZMA_PROG_ERROR; + + const lzma_options_arm64 *opt = filters[0].options; + if (opt->width < LZMA_ARM64_WIDTH_MIN + || opt->width > LZMA_ARM64_WIDTH_MAX) + return LZMA_OPTIONS_ERROR; + + const lzma_ret ret = lzma_simple_coder_init(next, allocator, filters, + &arm64_code, sizeof(lzma_simple_arm64), 4, 4, + is_encoder, false); + + if (ret == LZMA_OK) { + lzma_simple_coder *coder = next->coder; + lzma_simple_arm64 *simple = coder->simple; + + simple->sign_bit = UINT32_C(1) << (opt->width - 1); + simple->sign_mask = (UINT32_C(1) << 28) - simple->sign_bit; + } + + return ret; +} + + +#ifdef HAVE_ENCODER_ARM64 +extern lzma_ret +lzma_simple_arm64_encoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm64_coder_init(next, allocator, filters, true); +} +#endif + + +#ifdef HAVE_DECODER_ARM64 +extern lzma_ret +lzma_simple_arm64_decoder_init(lzma_next_coder *next, + const lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm64_coder_init(next, allocator, filters, false); +} +#endif |