diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2019-05-13 20:05:17 +0300 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2019-07-13 17:56:28 +0300 |
commit | 596ed3de4485a4b1d83b5fe506ae9d0a172139b4 (patch) | |
tree | 180958bc66ccbca4a38fd988154273ba302e1481 | |
parent | Update THANKS. (diff) | |
download | xz-596ed3de4485a4b1d83b5fe506ae9d0a172139b4.tar.xz |
liblzma: Avoid memcpy(NULL, foo, 0) because it is undefined behavior.
I should have always known this but I didn't. Here is an example
as a reminder to myself:
int mycopy(void *dest, void *src, size_t n)
{
memcpy(dest, src, n);
return dest == NULL;
}
In the example, a compiler may assume that dest != NULL because
passing NULL to memcpy() would be undefined behavior. Testing
with GCC 8.2.1, mycopy(NULL, NULL, 0) returns 1 with -O0 and -O1.
With -O2 the return value is 0 because the compiler infers that
dest cannot be NULL because it was already used with memcpy()
and thus the test for NULL gets optimized out.
In liblzma, if a null-pointer was passed to memcpy(), there were
no checks for NULL *after* the memcpy() call, so I cautiously
suspect that it shouldn't have caused bad behavior in practice,
but it's hard to be sure, and the problematic cases had to be
fixed anyway.
Thanks to Jeffrey Walton.
-rw-r--r-- | src/liblzma/common/common.c | 6 | ||||
-rw-r--r-- | src/liblzma/lz/lz_decoder.c | 12 | ||||
-rw-r--r-- | src/liblzma/simple/simple_coder.c | 10 |
3 files changed, 23 insertions, 5 deletions
diff --git a/src/liblzma/common/common.c b/src/liblzma/common/common.c index 57e3f8eb..0053a6aa 100644 --- a/src/liblzma/common/common.c +++ b/src/liblzma/common/common.c @@ -99,7 +99,11 @@ lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, const size_t out_avail = out_size - *out_pos; const size_t copy_size = my_min(in_avail, out_avail); - memcpy(out + *out_pos, in + *in_pos, copy_size); + // Call memcpy() only if there is something to copy. If there is + // nothing to copy, in or out might be NULL and then the memcpy() + // call would trigger undefined behavior. + if (copy_size > 0) + memcpy(out + *out_pos, in + *in_pos, copy_size); *in_pos += copy_size; *out_pos += copy_size; diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c index bb21d0d0..6c9024e2 100644 --- a/src/liblzma/lz/lz_decoder.c +++ b/src/liblzma/lz/lz_decoder.c @@ -91,11 +91,17 @@ decode_buffer(lzma_coder *coder, in, in_pos, in_size); // Copy the decoded data from the dictionary to the out[] - // buffer. + // buffer. Do it conditionally because out can be NULL + // (in which case copy_size is always 0). Calling memcpy() + // with a null-pointer is undefined even if the third + // argument is 0. const size_t copy_size = coder->dict.pos - dict_start; assert(copy_size <= out_size - *out_pos); - memcpy(out + *out_pos, coder->dict.buf + dict_start, - copy_size); + + if (copy_size > 0) + memcpy(out + *out_pos, coder->dict.buf + dict_start, + copy_size); + *out_pos += copy_size; // Reset the dictionary if so requested by coder->lz.code(). diff --git a/src/liblzma/simple/simple_coder.c b/src/liblzma/simple/simple_coder.c index 13ebabc7..4f499bef 100644 --- a/src/liblzma/simple/simple_coder.c +++ b/src/liblzma/simple/simple_coder.c @@ -118,7 +118,15 @@ simple_code(void *coder_ptr, const lzma_allocator *allocator, // coder->pos and coder->size yet. This way the coder can be // restarted if the next filter in the chain returns e.g. // LZMA_MEM_ERROR. - memcpy(out + *out_pos, coder->buffer + coder->pos, buf_avail); + // + // Do the memcpy() conditionally because out can be NULL + // (in which case buf_avail is always 0). Calling memcpy() + // with a null-pointer is undefined even if the third + // argument is 0. + if (buf_avail > 0) + memcpy(out + *out_pos, coder->buffer + coder->pos, + buf_avail); + *out_pos += buf_avail; // Copy/Encode/Decode more data to out[]. |