diff --git a/CHANGELOG.md b/CHANGELOG.md index f1e4c13..280bb62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,35 @@ and this project adheres to ## [Unreleased] +### BREAKING CHANGES + +> [!NOTE] +> +> `json.bash` is version `0.x`, so the major version is not incremented. + +- The `json.validate` function now sets and clears a bash trap for `SIGPIPE` + when called. If something else in a bash script is also setting a trap for + `SIGPIPE`, the trap will be cleared after validating JSON. (It's not practical + to detect and restore existing `SIGPIPE` traps due to the performance cost of + doing so.) ([#15](https://github.com/h4l/json.bash/pull/15)) + +### Fixed + +- The JSON validation co-process exiting at startup could cause bash to exit + with an unbound variable error, due to the special coproc `_PID` var not being + set. Interaction with the validator co-process is now more robust to errors + that can occur when it exits unexpectedly. For example, if the grep command is + not compatible with the GNU grep flags we use. + ([#15](https://github.com/h4l/json.bash/pull/15)) + +### Added + +- The `JSON_BASH_GREP` environment variable can be set to a `:` delimited list + of commands to use when starting the grep JSON validator co-process. It + defaults to `ggrep:grep`, so systems with `ggrep` will use it first. (GNU grep + is commonly `ggrep` when `grep` is not GNU grep.) + ([#15](https://github.com/h4l/json.bash/pull/15)) + ### Changed - Fixed broken link in README's manual install instructions diff --git a/README.md b/README.md index ff16af0..74c2422 100644 --- a/README.md +++ b/README.md @@ -176,6 +176,7 @@ to find where it is). 1. [Security and correctness](#security-and-correctness) 1. [`jb-cat`, `jb-echo`, `jb-stream` utility programs](#jb-cat-jb-echo-jb-stream-utility-programs) 1. [Streaming output](#streaming-output) +1. [Configuring environment variables](#configuring-environment-variables) These examples mostly use `jb`, which is the `json.bash` library run as a stand-alone program. From within a bash script you get better performance by @@ -1010,6 +1011,30 @@ inputs, even when backtracking and matched-region output are disabled. The full syntax of `jb` arguments is documented in a (pseudo) grammar in [hack/syntax_patterns.bash](hack/syntax_patterns.bash). +### Configuring environment variables + +`json.bash` can be configured by setting certain environment variables: + +#### `JSON_BASH_GREP` + +The command(s) to run to start the JSON validator grep co-process. + +Default: `ggrep:grep` + +`json.bash` uses a +[regular expression to validate JSON data](./hack/syntax_patterns.bash). It uses +a GNU grep co-process to validate lines of JSON data using this regex +(specifically GNU grep, because it's a [PCRE] expression). + +This environment variable contains a list of command names or absolute paths to +execute. Multiple commands are separated with `:`. The first command that exists +is used, so the list can contain missing commands, so long as one exists. + +The default of `ggrep:grep` means `ggrep` will be used if it's available, +otherwise `grep`. + +[PCRE]: https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions + ## Background & performance notes Quite reasonably, you may be wondering why anyone would use Bash to implement a diff --git a/hack/trap_bench.bash b/hack/trap_bench.bash new file mode 100755 index 0000000..d3f2ed7 --- /dev/null +++ b/hack/trap_bench.bash @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +set -euo pipefail + +# This tests the cost of setting and clearing traps to handle SIGPIPE around a +# write that needs to avoid terminating the whole process on SIGPIPE. We do this +# in json.validate() + +method=${1:-} +count=${2:-10} +readarray text # read lines from stdin + +exec {null_fd}>/dev/null + +function write_without_trap() { + for ((id=0; id<$count; ++id)) do + for line in "${text[@]}"; do + echo -n "${line?}" >&"${null_fd:?}" + done + done +} + +function write_with_one_trap() { + trap -- '' SIGPIPE + write_without_trap + trap - SIGPIPE +} + +function write_with_trap() { + for ((id=0; id<$count; ++id)) do + for line in "${text[@]}"; do + trap -- '' SIGPIPE + echo -n "${line?}" >&"${null_fd:?}" + trap - SIGPIPE + done + done +} + +echo "write_${method:?}" >&2 +"write_${method:?}" diff --git a/json.bash b/json.bash index 6499d44..e264411 100755 --- a/json.bash +++ b/json.bash @@ -401,7 +401,7 @@ function json.get_entry_encode_fn() { function json.start_json_validator() { if [[ ${_json_validator_pids[$$]:-} != "" ]]; then return 0; fi - local array validation_request + local validation_request # This is a PCRE regex that matches JSON. This is possible because we use # PCRE's recursive patterns to match JSON's nested constructs. And also # possessive repetition quantifiers to prevent expensive backtracking on match @@ -413,7 +413,7 @@ function json.start_json_validator() { # version. # # It doesn't match JSON directly, but rather it matches a simple validation - # request protocol. json.validate costructs validation request messages that + # request protocol. json.validate constructs validation request messages that # match this regex when the JSON data is valid for the type specified in the # validation request. validation_request=$'(:?(?true|false)(?true)(?false)(?null)(?true|false|null|(?-?+(?:0|[1-9][0-9]*+)(?:\\.[0-9]*+)?+(?:[eE][+-]?+[0-9]++)?+)|(?"(?:[^\\x00-\\x1F"\\\\]|\\\\(?:["\\\\/bfnrt]|u[A-Fa-f0-9]{4}))*+"))(?[\\x20\\x09\\x0A\\x0D]*+)(?\\[(?:(?&ws)(?&json)(?:(?&ws),(?&ws)(?&json))*+)?+(?&ws)\\]|\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&json))(?:(?&ws),(?&entry))*+)?+(?&ws)\\}|(?&atom))){0}^[\\w]++(?:(?=((?:(?&pair)?+\\x1E(?:j(?&ws)(?&json)(?&ws)|s(?&ws)(?&str)(?&ws)|n(?&ws)(?&num)(?&ws)|b(?&ws)(?&bool)(?&ws)|t(?&ws)(?&true)(?&ws)|f(?&ws)(?&false)(?&ws)|z(?&ws)(?&null)(?&ws)|a(?&ws)(?&atom)(?&ws)|Oj(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&json))(?:(?&ws),(?&entry_json))*+)?+(?&ws)\\}(?&ws)|Os(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&str))(?:(?&ws),(?&entry_str))*+)?+(?&ws)\\}(?&ws)|On(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&num))(?:(?&ws),(?&entry_num))*+)?+(?&ws)\\}(?&ws)|Ob(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&bool))(?:(?&ws),(?&entry_bool))*+)?+(?&ws)\\}(?&ws)|Ot(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&true))(?:(?&ws),(?&entry_true))*+)?+(?&ws)\\}(?&ws)|Of(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&false))(?:(?&ws),(?&entry_false))*+)?+(?&ws)\\}(?&ws)|Oz(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&null))(?:(?&ws),(?&entry_null))*+)?+(?&ws)\\}(?&ws)|Oa(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&atom))(?:(?&ws),(?&entry_atom))*+)?+(?&ws)\\}(?&ws)|Aj(?&ws)\\[(?:(?&ws)(?&json)(?:(?&ws),(?&ws)(?&json))*+)?+(?&ws)\\](?&ws)|As(?&ws)\\[(?:(?&ws)(?&str)(?:(?&ws),(?&ws)(?&str))*+)?+(?&ws)\\](?&ws)|An(?&ws)\\[(?:(?&ws)(?&num)(?:(?&ws),(?&ws)(?&num))*+)?+(?&ws)\\](?&ws)|Ab(?&ws)\\[(?:(?&ws)(?&bool)(?:(?&ws),(?&ws)(?&bool))*+)?+(?&ws)\\](?&ws)|At(?&ws)\\[(?:(?&ws)(?&true)(?:(?&ws),(?&ws)(?&true))*+)?+(?&ws)\\](?&ws)|Af(?&ws)\\[(?:(?&ws)(?&false)(?:(?&ws),(?&ws)(?&false))*+)?+(?&ws)\\](?&ws)|Az(?&ws)\\[(?:(?&ws)(?&null)(?:(?&ws),(?&ws)(?&null))*+)?+(?&ws)\\](?&ws)|Aa(?&ws)\\[(?:(?&ws)(?&atom)(?:(?&ws),(?&ws)(?&atom))*+)?+(?&ws)\\](?&ws)))$)):++)?+' @@ -432,14 +432,20 @@ function json.start_json_validator() { { exec {saved_fds[i]}<&"$(( 63 - i ))"; } 2>/dev/null || break done - { coproc json_validator ( LC_ALL=C.UTF-8 grep --only-matching --line-buffered \ - -P -e "${validation_request//[$' \n']/}" ) - } 2>/dev/null # hide interactive job control PID output - _json_validator_pids[$$]=${json_validator_PID:?} + # This is a separate function so that we can mock it when testing. + json._start_grep_coproc + # restore FDs bash closed... for ((i=0; i < ${#saved_fds[@]}; ++i)); do eval "exec $(( 63 - i ))<&${saved_fds[i]:?} {saved_fds[i]}<&-" done + + # json_validator and json_validator_PID are set automatically by coproc. If + # the coproc dies, bash unsets these vars immediately, so they must be treated + # like accessing mutable data that can be set by another thread. + # Real example: https://github.com/h4l/json.bash/issues/10 + _json_validator_pids[$$]=${json_validator_PID:-} + # Bash only allows 1 coproc per bash process, so by creating a coproc we would # normally prevent another things in this process from creating one. We can # avoid this restriction by duplicating the coproc's pipe FDs to new ones, and @@ -448,8 +454,54 @@ function json.start_json_validator() { # prevent forked shells using this process's coprocess, we store the new FDs # in an array indexed by PID, so we only use FDs owned by our process. # shellcheck disable=SC1083,SC2102 - exec {_json_validator_out_fds[$$]}<&"${json_validator[0]}"- \ - {_json_validator_in_fds[$$]}>&"${json_validator[1]}"- + + # $json_validator can be unset here. (Even if we were to copy & validate the + # values, the FDs can be closed by the time we try to open them here, so we + # just try, and interpret failure as the coproc being dead - so unset the PID. + { exec {_json_validator_out_fds[$$]}<&"${json_validator[0]:-}"- \ + {_json_validator_in_fds[$$]}>&"${json_validator[1]:-}"- ; + } 2>/dev/null || _json_validator_pids[$$]= + + if [[ ! ${_json_validator_pids[$$]:-} ]]; then + msg_context="json validator co-process failed to start" \ + json._notify_coproc_terminated + return 2 + fi +} + +function json._start_grep_coproc() { + local grep + out=grep json._resolve_grep + + { coproc json_validator ( LC_ALL=C.UTF-8 "${grep:?}" --only-matching --line-buffered \ + -P -e "${validation_request:?}" ) + } 2>/dev/null # hide interactive job control PID output +} + +function json._resolve_grep() { + local -n _jrg_out=${out:?'json._resolve_grep: $out must be set to receive the grep path'} + local IFS=':' + # shellcheck disable=SC2206 + local _jrg_grep_options=(${JSON_BASH_GREP:-ggrep:grep}) + for option in "${_jrg_grep_options[@]}"; do + if type -t -- "${option?}" > /dev/null; then + _jrg_out=${option?} + return 0 + fi + done + # default to the final option even if it failed to resolve + _jrg_out=${option:-grep} +} + +function json._notify_coproc_terminated() { + local grep + out=grep json._resolve_grep + + echo "json.bash: ${msg_context:?}:" \ + "${grep@Q} must support GNU grep options (-P --only-matching" \ + "--line-buffered). Set JSON_BASH_GREP to a GNU grep executable, or use" \ + "the :raw type instead of :json to avoid starting a JSON validator grep" \ + "process." >&2 } function json.check_json_validator_running() { @@ -461,13 +513,37 @@ function json.check_json_validator_running() { return 0 # alive or not expected to be alive } +function json._reset_json_validator() { + if [[ ${_json_validator_pids[$$]:-} != "" ]]; then + kill "${_json_validator_pids[$$]:-}" 2>/dev/null + unset "_json_validator_pids[$$]" + fi +} + function json.validate() { - if [[ ${_json_validator_pids[$$]:-} == "" ]]; - then json.start_json_validator; fi + if [[ ${_json_validator_pids[$$]:-} == "" ]]; then + json.start_json_validator || return 2 + fi - local _jv_type=${_json_bash_validation_types[${type:-json}]:?"json.validate: unsupported \$type: ${type@Q}"} + local _jv_type=${_json_bash_validation_types[${type:-json}]:?"json.validate: unsupported \$type: ${type@Q}"} \ + _jv_write_error='' _jv_response='' + + let "_json_validate_id=${_json_validate_id:-0}+1"; local _jv_id=$_json_validate_id + + # When we write to stdin file descriptor of our grep coproc (from + # _json_validator_in_fds), we'll get SIGPIPE if the process has terminated. + # Default bash behaviour is to exit with 141. (e.g. this would happen when + # writing to stdout that's connected to a `head` command that's received + # enough input.) However this would stop us handling the coproc's unexpected + # termination. This situation does not warrant exiting, as writing to the + # coproc is not the primary purpose of this process. + # + # We don't set a SIGPIPE trap globally because if we do, any SIGPIPE error + # would result in bash printing a message to stdout indicating an IO error. + # We also don't attempt to restore a potential existing trap, because + # detecting existing traps is too slow to make sense to do every call. + trap '' SIGPIPE - let "_json_validate_id=${_json_validate_id:-0}+1"; local id=$_json_validate_id local count_markers IFS # delimit JSON with Record Separator # Ideally we'd use null-terminated "lines" with grep's --null-data, but I can't # get consistent reads after writes that way. (The problem appears to be with @@ -481,34 +557,43 @@ function json.validate() { if [[ ${#_validate_json_in[@]} == 0 ]]; then return 0; fi printf -v count_markers ':%.0s' "${!_validate_json_in[@]}" { - printf '%d%s' "${id:?}" "${count_markers:?}" + printf '%d%s' "${_jv_id:?}" "${count_markers:?}" # \n is the end of a validation request, so we need to remove \n in JSON # input. We map them to \r, which don't JSON affect validity. printf "\x1E${_jv_type:?}%s" "${_validate_json_in[@]//$'\n'/$'\r'}" printf '\n' - } >&"${_json_validator_in_fds[$$]:?}" + } >&"${_json_validator_in_fds[$$]:?}" 2>/dev/null || _jv_write_error=true else IFS=''; count_markers=${*/*/:} { - printf '%d%s' "${id:?}" "${count_markers?}" + printf '%d%s' "${_jv_id:?}" "${count_markers?}" printf "\x1E${_jv_type:?}%s" "${@//$'\n'/$'\r'}" printf '\n' - } >&"${_json_validator_in_fds[$$]:?}" + } >&"${_json_validator_in_fds[$$]:?}" 2>/dev/null || _jv_write_error=true fi + trap - SIGPIPE # restore default SIGPIPE behaviour IFS='' - if ! read -ru "${_json_validator_out_fds[$$]:?}" -t 4 response; then + if [[ ${_jv_write_error:-} ]] || ! read -ru "${_json_validator_out_fds[$$]:?}" -t 4 _jv_response; then if ! json.check_json_validator_running; then - echo "json.bash: json validator coprocess unexpectedly died" >&2 + msg_context="json validator co-process unexpectedly died" \ + json._notify_coproc_terminated return 2 fi - echo "json.validate: failed to read json validator response: $? ${response@Q}" >&2 + # After an IO error the validator stream will be in an unknown state, so we + # can't keep using it. + json._reset_json_validator + if [[ ${_jv_write_error:-} ]]; then + echo "json.validate: failed to write json validator request: ${_jv_id:?}" >&2 + else + echo "json.validate: failed to read json validator response: ${_jv_id:?}" >&2 + fi return 2 fi - if [[ $response != "${id:?}${count_markers?}" ]]; then - if [[ $response != "${id:?}"* ]]; then - echo "json.validate: mismatched validator response ID: ${id@A}," \ - "${response@A}" >&2; return 2 + if [[ ${_jv_response?} != "${_jv_id:?}${count_markers?}" ]]; then + if [[ ${_jv_response?} != "${_jv_id:?}"* ]]; then + echo "json.validate: mismatched validator response ID: ${_jv_id@A}," \ + "${_jv_response@A}" >&2; return 2 fi return 1 fi diff --git a/json.bats b/json.bats index 23ec1c5..2f56d00 100644 --- a/json.bats +++ b/json.bats @@ -11,6 +11,9 @@ if [[ ${CI:-false} == true ]]; then TIMEOUT_SECONDS=60 fi +# Always use grep, otherwise errors will be different on systems with ggrep. +export JSON_BASH_GREP=grep + setup() { cd "${BATS_TEST_DIRNAME:?}" } @@ -2842,6 +2845,183 @@ json(): Could not encode the value of argument 'a:number=oops' as a 'number' val [[ $status == 0 && $output == '["foo","bar"]' ]] } +@test "json validator :: resolves grep executable via JSON_BASH_GREP :: defaulting to ggrep then grep" { + msg_file=$(mktemp_bats) + + function validate_with_ggrep() { + unset JSON_BASH_GREP + function ggrep() { + echo "ggrep" > "${msg_file:?}" + command -p grep "$@" + } + function grep() { + echo "grep" > "${msg_file:?}" + command -p grep "$@" + } + + # ggrep is defined so it's used by default + json.validate '42' + [[ $(<"${msg_file?}") == 'ggrep' ]] + + # when ggrep isn't defined, grep is used + json._reset_json_validator + unset ggrep + json.validate '42' + [[ $(<"${msg_file?}") == 'grep' ]] + } + + # unset PATH to avoid finding ggrep on the system running this test + PATH= run validate_with_ggrep + [[ $status == 0 && $output == "" ]] +} + +@test "json validator :: resolves grep executable via JSON_BASH_GREP :: uses first defined command after splitting by : or the final undefined entry" { + msg_file=$(mktemp_bats) + + function validate_with_ggrep() { + JSON_BASH_GREP=missing_grep1:custom_grep:missing_grep2 + function custom_grep() { + echo "custom_grep" > "${msg_file:?}" + command -p grep "$@" + } + + # custom_grep is defined so it's used + json.validate '42' + [[ $(<"${msg_file?}") == 'custom_grep' ]] + + # when none of the options are available, the final choice is used (and fails) + json._reset_json_validator + unset custom_grep + json.validate '42' || [[ $? == 2 ]] + } + + # unset PATH to avoid finding ggrep on the system running this test + PATH= run validate_with_ggrep + [[ $status == 0 && $output == "json.bash: json validator co-process unexpectedly died: 'missing_grep2' must support GNU grep options (-P --only-matching --line-buffered). Set JSON_BASH_GREP to a GNU grep executable, or use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] +} + +@test "json validator :: reports validator grep process failing to start :: PID & FDs unset" { + # Because it's a parallel process, the coproc failure behaviour is timing + # dependent, making it difficult to test specific scenarios without mocking. + # So we mock the function that starts the coproc to simulate particular error + # conditions that it can exhibit in practice. + function json._start_grep_coproc() { + true + } + + run --separate-stderr json.validate '42' + [[ $status == 2 && $output == "" ]] + echo "$stderr" + [[ $stderr == "json.bash: json validator co-process failed to start: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Set JSON_BASH_GREP to a GNU grep executable, or use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] +} + +@test "json validator :: reports validator grep process failing to start :: PID set, FDs unset" { + function json._start_grep_coproc() { + json_validator_PID=9999 + # json_validator (FDs) var not set + } + + run --separate-stderr json.validate '42' + [[ $status == 2 && $output == "" ]] + echo "$stderr" + [[ $stderr == "json.bash: json validator co-process failed to start: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Set JSON_BASH_GREP to a GNU grep executable, or use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] +} + +@test "json validator :: reports validator grep process failing to start :: PID unset, FDs set" { + function json._start_grep_coproc() { + json_validator=(9000 9001) + # json_validator_PID var not set + } + + run --separate-stderr json.validate '42' + [[ $status == 2 && $output == "" ]] + echo "$stderr" + [[ $stderr == "json.bash: json validator co-process failed to start: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Set JSON_BASH_GREP to a GNU grep executable, or use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] +} + +@test "json validator :: reports validator grep process failing after start" { + function kill_coproc_then_validate() { + json.validate '1' 2>&1 + kill "${_json_validator_pids[$$]:?}" || true + wait "${_json_validator_pids[$$]:?}" || [[ $? == 143 ]] # expect SIGTERM + + # Should fail when writing the validation request to the coproc + validate_status=0 + json.validate '2' || validate_status=$? + [[ $validate_status == 2 ]] # should fail + + # validator should be reset, and succeed now + json.validate '3' 2>&1 + } + + run --separate-stderr kill_coproc_then_validate + echo "$status" + echo "${stderr?}" + [[ $status == 0 && $output == "" ]] + [[ $stderr == "json.bash: json validator co-process unexpectedly died: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Set JSON_BASH_GREP to a GNU grep executable, or use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] +} + +@test "json validator :: reports validator grep process IO write error" { + function kill_coproc_then_validate() { + json.validate '1' 2>&1 # start the coproc + + # simulate a write error + fifo=$(mktemp_bats); rm "${fifo:?}" + mkfifo "${fifo:?}" + cat < "${fifo:?}" & + exec {fifo_in_fd}>"${fifo:?}" # open the write end of fifo + # Break the pipe by killing the reader. Writing to fifo_in_fd will now error + # with SIGPIPE. + kill $! || true; wait $! || true + + # using the validator will now write to the broken fifo + _json_validator_in_fds[$$]=${fifo_in_fd} + + validate_status=0 + json.validate '2' || validate_status=$? + [[ $validate_status == 2 ]] # should fail + + # validator should be reset, and succeed now + json.validate '3' 2>&1 + } + + run --separate-stderr kill_coproc_then_validate + echo "$status" + echo "${stderr@Q}" + [[ $status == 0 && $output == "" ]] + [[ $stderr == "json.validate: failed to write json validator request: 2" ]] +} + +@test "json validator :: reports validator grep process IO read error" { + function kill_coproc_then_validate() { + json.validate '1' 2>&1 # start the coproc + + # simulate a read error + fifo=$(mktemp_bats); rm "${fifo:?}" + mkfifo "${fifo:?}" + sleep infinity > "${fifo:?}" & + exec {fifo_out_fd}<"${fifo:?}" # open the read end of fifo + # Break the pipe by killing the writer + kill $! || true; wait $! || true + + # using the validator will now read the broken fifo + _json_validator_out_fds[$$]=${fifo_out_fd} + + validate_status=0 + json.validate '2' || validate_status=$? + [[ $validate_status == 2 ]] # should fail + + # validator should be reset, and succeed now + json.validate '3' 2>&1 + } + + run --separate-stderr kill_coproc_then_validate + echo "$status" + echo "${stderr@Q}" + [[ $status == 0 && $output == "" ]] + [[ $stderr == "json.validate: failed to read json validator response: 2" ]] +} + @test "json validator :: validates valid JSON via arg" { initials=('' 'true' '{}' '[]' '42' '"hi"' 'null') for initial in "${initials[@]}"; do