aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2022-07-17 21:36:25 +0300
committerLasse Collin <lasse.collin@tukaani.org>2022-07-17 21:36:25 +0300
commit6a4a4a7d2667837dc824c26fcb19ed6ca5aff645 (patch)
tree7473724eef5a7812d8e632a9719b1f1857093411
parentxzgrep: Fix parsing of certain options. (diff)
downloadxz-6a4a4a7d2667837dc824c26fcb19ed6ca5aff645.tar.xz
xzgrep: Add more LC_ALL=C to avoid bugs with multibyte characters.
Also replace one use of expr with printf. The rationale for LC_ALL=C was already mentioned in 69d1b3fc29677af8ade8dc15dba83f0589cb63d6 that fixed a security issue. However, unrelated uses weren't changed in that commit yet. POSIX says that with sed and such tools one should use LC_ALL=C to ensure predictable behavior when strings contain byte sequences that aren't valid multibyte characters in the current locale. See under "Application usage" in here: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html With GNU sed invalid multibyte strings would work without this; it's documented in its Texinfo manual. Some other implementations aren't so forgiving.
-rw-r--r--src/scripts/xzgrep.in14
1 files changed, 8 insertions, 6 deletions
diff --git a/src/scripts/xzgrep.in b/src/scripts/xzgrep.in
index fce7940a..c851c7ed 100644
--- a/src/scripts/xzgrep.in
+++ b/src/scripts/xzgrep.in
@@ -75,9 +75,10 @@ while test $# -ne 0; do
# For example, "grep -25F" is equivalent to "grep -C25 -F". If only
# digits are specified like "grep -25" we don't get here because the
# above pattern in the case-statement doesn't match such strings.
- arg2=-\'$(expr "X${option}X" : 'X-.[0-9]*\(.*\)' | sed "$escape")
+ arg2=-\'$(LC_ALL=C expr "X${option}X" : 'X-.[0-9]*\(.*\)' |
+ LC_ALL=C sed "$escape")
eval "set -- $arg2 "'${1+"$@"}'
- option=$(expr "X$option" : 'X\(-.[0-9]*\)');;
+ option=$(LC_ALL=C expr "X$option" : 'X\(-.[0-9]*\)');;
(--binary-*=* | --[lm]a*=* | --reg*=*)
# These options require an argument and an argument has been provided
# with the --foo=argument syntax. All is good.
@@ -87,7 +88,7 @@ while test $# -ne 0; do
# If it isn't, display an error and exit.
case ${1?"$option option requires an argument"} in
(*\'*)
- optarg=" '"$(printf '%sX\n' "$1" | sed "$escape");;
+ optarg=" '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");;
(*)
optarg=" '$1'";;
esac
@@ -99,7 +100,8 @@ while test $# -ne 0; do
(*)
case $option in
(*\'*)
- operands="$operands '"$(printf '%sX\n' "$option" | sed "$escape");;
+ operands="$operands '"$(printf '%sX\n' "$option" |
+ LC_ALL=C sed "$escape");;
(*)
operands="$operands '$option'";;
esac
@@ -136,7 +138,7 @@ while test $# -ne 0; do
case $option in
(*\'?*)
- option=\'$(expr "X${option}X" : 'X\(.*\)' | sed "$escape");;
+ option=\'$(printf '%sX\n' "$option" | LC_ALL=C sed "$escape");;
(*)
option="'$option'";;
esac
@@ -153,7 +155,7 @@ eval "set -- $operands "'${1+"$@"}'
if test $have_pat -eq 0; then
case ${1?"Missing pattern; try \`${0##*/} --help' for help"} in
(*\'*)
- grep="$grep -- '"$(printf '%sX\n' "$1" | sed "$escape");;
+ grep="$grep -- '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");;
(*)
grep="$grep -- '$1'";;
esac