///////////////////////////////////////////////////////////////////////////////
//
/// \file arm64.c
/// \brief Filter for ARM64 binaries
///
// Authors: Lasse Collin
// Jia Tan
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#include "simple_private.h"
#ifdef HAVE_ENCODER_ARM64
# include "simple_encoder.h"
#endif
#ifdef HAVE_DECODER_ARM64
# include "simple_decoder.h"
#endif
// In ARM64, there are two main branch instructions.
// bl - branch and link: Calls a function and stores the return address.
// b - branch: Jumps to a location, but does not store a return address.
//
// After some benchmarking, it was determined that only the bl instruction
// is beneficial for compression. A majority of the jumps for the b
// instruction are very small (+/- 0xFF). These are typical for loops
// and if-statements. Encoding them to their absolute address reduces
// redundancy since many of the small relative jump values are repeated,
// but very few of the absolute addresses are.
//
// Thus, only the bl instruction will be encoded and decoded.
// The bl instruction is 32 bits in size. The highest 6 bits contain
// the opcode (10 0101 == 0x25) and the remaining 26 bits are
// the immediate value. The immediate is a signed integer that
// encodes the target address as a multiple of four bytes so
// the range is +/-128 MiB.
// The 6-bit op code for the bl instruction in ARM64
#define ARM64_BL_OPCODE 0x25
// Once the 26-bit immediate is multiple by four, the address is 28 bits
// with the two lowest bits being zero. This mask is used to clear the
// unwanted bits.
#define ADDR28_MASK 0x0FFFFFFCU
typedef struct {
uint32_t sign_bit;
uint32_t sign_mask;
} lzma_simple_arm64;
static size_t
arm64_code(void *simple_ptr, uint32_t now_pos, bool is_encoder,
uint8_t *buffer, size_t size)
{
const lzma_simple_arm64 *simple = simple_ptr;
const uint32_t sign_bit = simple->sign_bit;
const uint32_t sign_mask = simple->sign_mask;
size_t i;
for (i = 0; i + 4 <= size; i += 4) {
if ((buffer[i + 3] >> 2) == ARM64_BL_OPCODE) {
// Get the relative 28-bit address from
// the 26-bit immediate.
uint32_t src = read32le(buffer + i);
src <<= 2;
src &= ADDR28_MASK;
if ((src & sign_mask) != 0
&& (src & sign_mask) != sign_mask)
continue;
// Some files like static libraries or Linux kernel
// modules have the immediate value filled with
// zeros. Converting these placeholder values would
// make compression worse so don't touch them.
if (src == 0)
continue;
const uint32_t pc = now_pos + (uint32_t)(i);
uint32_t dest;
if (is_encoder)
dest = pc + src;
else
dest = src - pc;
dest &= ADDR28_MASK;
// Sign-extend negative values or unset sign bits
// from positive values.
if (dest & sign_bit)
dest |= sign_mask;
else
dest &= ~sign_mask;
assert((dest & sign_mask) == 0
|| (dest & sign_mask) == sign_mask);
// Since also the decoder will ignore src values
// of 0, we must ensure that nothing is ever encoded
// to 0. This is achieved by encoding such values
// as pc instead. When decoding, pc will be first
// converted to 0 which we will catch here and fix.
if (dest == 0) {
// We cannot get here if pc is zero because
// then src would need to be zero too but we
// already ensured that src != 0.
assert((pc & ADDR28_MASK) != 0);
dest = is_encoder ? pc : 0U - pc;
dest &= ADDR28_MASK;
if (dest & sign_bit)
dest |= sign_mask;
else
dest &= ~sign_mask;
}
assert((dest & sign_mask) == 0
|| (dest & sign_mask) == sign_mask);
assert((dest & ~ADDR28_MASK) == 0);
// Construct and store the modified 32-bit instruction.
dest >>= 2;
dest |= (uint32_t)ARM64_BL_OPCODE << 26;
write32le(buffer + i, dest);
}
}
return i;
}
#ifdef HAVE_ENCODER_ARM64
extern lzma_ret
lzma_arm64_props_encode(const void *options, uint8_t *out)
{
const lzma_options_arm64 *const opt = options;
if (opt->width < LZMA_ARM64_WIDTH_MIN
|| opt->width > LZMA_ARM64_WIDTH_MAX)
return LZMA_OPTIONS_ERROR;
out[0] = (uint8_t)(opt->width - LZMA_ARM64_WIDTH_MIN);
return LZMA_OK;
}
#endif
#ifdef HAVE_DECODER_ARM64
extern lzma_ret
lzma_arm64_props_decode(void **options, const lzma_allocator *allocator,
const uint8_t *props, size_t props_size)
{
if (props_size != 1)
return LZMA_OPTIONS_ERROR;
if (props[0] > LZMA_ARM64_WIDTH_MAX - LZMA_ARM64_WIDTH_MIN)
return LZMA_OPTIONS_ERROR;
lzma_options_arm64 *opt = lzma_alloc(sizeof(lzma_options_arm64),
allocator);
if (opt == NULL)
return LZMA_MEM_ERROR;
opt->width = props[0] + LZMA_ARM64_WIDTH_MIN;
*options = opt;
return LZMA_OK;
}
#endif
static lzma_ret
arm64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
const lzma_filter_info *filters, bool is_encoder)
{
if (filters[0].options == NULL)
return LZMA_PROG_ERROR;
const lzma_options_arm64 *opt = filters[0].options;
if (opt->width < LZMA_ARM64_WIDTH_MIN
|| opt->width > LZMA_ARM64_WIDTH_MAX)
return LZMA_OPTIONS_ERROR;
const lzma_ret ret = lzma_simple_coder_init(next, allocator, filters,
&arm64_code, sizeof(lzma_simple_arm64), 4, 4,
is_encoder, false);
if (ret == LZMA_OK) {
lzma_simple_coder *coder = next->coder;
lzma_simple_arm64 *simple = coder->simple;
simple->sign_bit = UINT32_C(1) << (opt->width - 1);
simple->sign_mask = (UINT32_C(1) << 28) - simple->sign_bit;
}
return ret;
}
#ifdef HAVE_ENCODER_ARM64
extern lzma_ret
lzma_simple_arm64_encoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
const lzma_filter_info *filters)
{
return arm64_coder_init(next, allocator, filters, true);
}
#endif
#ifdef HAVE_DECODER_ARM64
extern lzma_ret
lzma_simple_arm64_decoder_init(lzma_next_coder *next,
const lzma_allocator *allocator,
const lzma_filter_info *filters)
{
return arm64_coder_init(next, allocator, filters, false);
}
#endif