/////////////////////////////////////////////////////////////////////////////// // /// \file arm64.c /// \brief Filter for ARM64 binaries /// // Authors: Lasse Collin // Jia Tan // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "simple_private.h" #ifdef HAVE_ENCODER_ARM64 # include "simple_encoder.h" #endif #ifdef HAVE_DECODER_ARM64 # include "simple_decoder.h" #endif // In ARM64, there are two main branch instructions. // bl - branch and link: Calls a function and stores the return address. // b - branch: Jumps to a location, but does not store a return address. // // After some benchmarking, it was determined that only the bl instruction // is beneficial for compression. A majority of the jumps for the b // instruction are very small (+/- 0xFF). These are typical for loops // and if-statements. Encoding them to their absolute address reduces // redundancy since many of the small relative jump values are repeated, // but very few of the absolute addresses are. // // Thus, only the bl instruction will be encoded and decoded. // The bl instruction is 32 bits in size. The highest 6 bits contain // the opcode (10 0101 == 0x25) and the remaining 26 bits are // the immediate value. The immediate is a signed integer that // encodes the target address as a multiple of four bytes so // the range is +/-128 MiB. // The 6-bit op code for the bl instruction in ARM64 #define ARM64_BL_OPCODE 0x25 // Once the 26-bit immediate is multiple by four, the address is 28 bits // with the two lowest bits being zero. This mask is used to clear the // unwanted bits. #define ADDR28_MASK 0x0FFFFFFCU typedef struct { uint32_t sign_bit; uint32_t sign_mask; } lzma_simple_arm64; static size_t arm64_code(void *simple_ptr, uint32_t now_pos, bool is_encoder, uint8_t *buffer, size_t size) { const lzma_simple_arm64 *simple = simple_ptr; const uint32_t sign_bit = simple->sign_bit; const uint32_t sign_mask = simple->sign_mask; size_t i; for (i = 0; i + 4 <= size; i += 4) { if ((buffer[i + 3] >> 2) == ARM64_BL_OPCODE) { // Get the relative 28-bit address from // the 26-bit immediate. uint32_t src = read32le(buffer + i); src <<= 2; src &= ADDR28_MASK; if ((src & sign_mask) != 0 && (src & sign_mask) != sign_mask) continue; // Some files like static libraries or Linux kernel // modules have the immediate value filled with // zeros. Converting these placeholder values would // make compression worse so don't touch them. if (src == 0) continue; const uint32_t pc = now_pos + (uint32_t)(i); uint32_t dest; if (is_encoder) dest = pc + src; else dest = src - pc; dest &= ADDR28_MASK; // Sign-extend negative values or unset sign bits // from positive values. if (dest & sign_bit) dest |= sign_mask; else dest &= ~sign_mask; assert((dest & sign_mask) == 0 || (dest & sign_mask) == sign_mask); // Since also the decoder will ignore src values // of 0, we must ensure that nothing is ever encoded // to 0. This is achieved by encoding such values // as pc instead. When decoding, pc will be first // converted to 0 which we will catch here and fix. if (dest == 0) { // We cannot get here if pc is zero because // then src would need to be zero too but we // already ensured that src != 0. assert((pc & ADDR28_MASK) != 0); dest = is_encoder ? pc : 0U - pc; dest &= ADDR28_MASK; if (dest & sign_bit) dest |= sign_mask; else dest &= ~sign_mask; } assert((dest & sign_mask) == 0 || (dest & sign_mask) == sign_mask); assert((dest & ~ADDR28_MASK) == 0); // Construct and store the modified 32-bit instruction. dest >>= 2; dest |= (uint32_t)ARM64_BL_OPCODE << 26; write32le(buffer + i, dest); } } return i; } #ifdef HAVE_ENCODER_ARM64 extern lzma_ret lzma_arm64_props_encode(const void *options, uint8_t *out) { const lzma_options_arm64 *const opt = options; if (opt->width < LZMA_ARM64_WIDTH_MIN || opt->width > LZMA_ARM64_WIDTH_MAX) return LZMA_OPTIONS_ERROR; out[0] = (uint8_t)(opt->width - LZMA_ARM64_WIDTH_MIN); return LZMA_OK; } #endif #ifdef HAVE_DECODER_ARM64 extern lzma_ret lzma_arm64_props_decode(void **options, const lzma_allocator *allocator, const uint8_t *props, size_t props_size) { if (props_size != 1) return LZMA_OPTIONS_ERROR; if (props[0] > LZMA_ARM64_WIDTH_MAX - LZMA_ARM64_WIDTH_MIN) return LZMA_OPTIONS_ERROR; lzma_options_arm64 *opt = lzma_alloc(sizeof(lzma_options_arm64), allocator); if (opt == NULL) return LZMA_MEM_ERROR; opt->width = props[0] + LZMA_ARM64_WIDTH_MIN; *options = opt; return LZMA_OK; } #endif static lzma_ret arm64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, bool is_encoder) { if (filters[0].options == NULL) return LZMA_PROG_ERROR; const lzma_options_arm64 *opt = filters[0].options; if (opt->width < LZMA_ARM64_WIDTH_MIN || opt->width > LZMA_ARM64_WIDTH_MAX) return LZMA_OPTIONS_ERROR; const lzma_ret ret = lzma_simple_coder_init(next, allocator, filters, &arm64_code, sizeof(lzma_simple_arm64), 4, 4, is_encoder, false); if (ret == LZMA_OK) { lzma_simple_coder *coder = next->coder; lzma_simple_arm64 *simple = coder->simple; simple->sign_bit = UINT32_C(1) << (opt->width - 1); simple->sign_mask = (UINT32_C(1) << 28) - simple->sign_bit; } return ret; } #ifdef HAVE_ENCODER_ARM64 extern lzma_ret lzma_simple_arm64_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters) { return arm64_coder_init(next, allocator, filters, true); } #endif #ifdef HAVE_DECODER_ARM64 extern lzma_ret lzma_simple_arm64_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters) { return arm64_coder_init(next, allocator, filters, false); } #endif