From 4e43988a2d53566290280e8de5cb68de46a5d652 Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Sat, 20 Jul 2024 13:05:55 +0000 Subject: [PATCH 1/8] chore: remove unused 'array' local --- json.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/json.bash b/json.bash index 6499d44..e1b09fc 100755 --- a/json.bash +++ b/json.bash @@ -401,7 +401,7 @@ function json.get_entry_encode_fn() { function json.start_json_validator() { if [[ ${_json_validator_pids[$$]:-} != "" ]]; then return 0; fi - local array validation_request + local validation_request # This is a PCRE regex that matches JSON. This is possible because we use # PCRE's recursive patterns to match JSON's nested constructs. And also # possessive repetition quantifiers to prevent expensive backtracking on match From 4e3721ed3235bb15a467a1967514ba8f60021e22 Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Sun, 21 Jul 2024 02:58:36 +0000 Subject: [PATCH 2/8] refactor: remove no-op substitution I believe this was used to strip whitespace before the regex was pre-generated in hack/syntax_patterns.bash. --- json.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/json.bash b/json.bash index e1b09fc..9c240d3 100755 --- a/json.bash +++ b/json.bash @@ -433,7 +433,7 @@ function json.start_json_validator() { done { coproc json_validator ( LC_ALL=C.UTF-8 grep --only-matching --line-buffered \ - -P -e "${validation_request//[$' \n']/}" ) + -P -e "${validation_request:?}" ) } 2>/dev/null # hide interactive job control PID output _json_validator_pids[$$]=${json_validator_PID:?} # restore FDs bash closed... From ff7112165e5d7d1bd19c170853c7708f9ada4d76 Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Sun, 21 Jul 2024 17:30:28 +0000 Subject: [PATCH 3/8] fix: harden json validator coproc interaction The json validator grep coproc interaction was working reliably when grep was GNU grep, but it was not gracefully handling the termination of the coproc in all cases. When a coproc exits immediately at startup, accessing the coproc special variables (json_validator and json_validator_PID) is subject to race condition, because bash unsets the variables as soon as the coproc process exits. In the previous implementation, we read the PID with ${:?} syntax, which could fail if the PID var was unset by bash. We now interact with the coproc in a rather more defensive way, handling either or both of json_validator and json_validator_PID being unset at creation; and then also handling SIGPIPE errors when later writing to the FD of our writable end of the coproc's stdin. This is somewhat fiddly, as it requires trapping SIGPIPE errors, otherwise bash exits with 141. We now have tests covering coproc failure at startup, after startup, and IO read & write errors. We can also restart the coproc after an IO error to recover. This could potentially occur when a read times out under high system load. Finally, several local vars in json.validate were not prefixed, so they could collide with a caller's in= reference var. They're all prefixed now. --- json.bash | 105 ++++++++++++++++++++++++++++++++++++---------- json.bats | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 21 deletions(-) diff --git a/json.bash b/json.bash index 9c240d3..d098d4b 100755 --- a/json.bash +++ b/json.bash @@ -432,14 +432,20 @@ function json.start_json_validator() { { exec {saved_fds[i]}<&"$(( 63 - i ))"; } 2>/dev/null || break done - { coproc json_validator ( LC_ALL=C.UTF-8 grep --only-matching --line-buffered \ - -P -e "${validation_request:?}" ) - } 2>/dev/null # hide interactive job control PID output - _json_validator_pids[$$]=${json_validator_PID:?} + # This is a separate function so that we can mock it when testing. + json._start_grep_coproc + # restore FDs bash closed... for ((i=0; i < ${#saved_fds[@]}; ++i)); do eval "exec $(( 63 - i ))<&${saved_fds[i]:?} {saved_fds[i]}<&-" done + + # json_validator and json_validator_PID are set automatically by coproc. If + # the coproc dies, bash unsets these vars immediately, so they must be treated + # like accessing mutable data that can be set by another thread. + # Real example: https://github.com/h4l/json.bash/issues/10 + _json_validator_pids[$$]=${json_validator_PID:-} + # Bash only allows 1 coproc per bash process, so by creating a coproc we would # normally prevent another things in this process from creating one. We can # avoid this restriction by duplicating the coproc's pipe FDs to new ones, and @@ -448,8 +454,32 @@ function json.start_json_validator() { # prevent forked shells using this process's coprocess, we store the new FDs # in an array indexed by PID, so we only use FDs owned by our process. # shellcheck disable=SC1083,SC2102 - exec {_json_validator_out_fds[$$]}<&"${json_validator[0]}"- \ - {_json_validator_in_fds[$$]}>&"${json_validator[1]}"- + + # $json_validator can be unset here. (Even if we were to copy & validate the + # values, the FDs can be closed by the time we try to open them here, so we + # just try, and interpret failure as the coproc being dead - so unset the PID. + { exec {_json_validator_out_fds[$$]}<&"${json_validator[0]:-}"- \ + {_json_validator_in_fds[$$]}>&"${json_validator[1]:-}"- ; + } 2>/dev/null || _json_validator_pids[$$]= + + if [[ ! ${_json_validator_pids[$$]:-} ]]; then + msg_context="json validator 'grep' process failed to start" \ + json._notify_coproc_terminated + return 2 + fi +} + +function json._start_grep_coproc() { + { coproc json_validator ( LC_ALL=C.UTF-8 grep --only-matching --line-buffered \ + -P -e "${validation_request:?}" ) + } 2>/dev/null # hide interactive job control PID output +} + +function json._notify_coproc_terminated() { + echo "json.bash: ${msg_context:?}:" \ + "'grep' must support GNU grep options (-P --only-matching" \ + "--line-buffered). Use the :raw type instead of :json to avoid starting a" \ + "JSON validator grep process. " >&2 } function json.check_json_validator_running() { @@ -461,13 +491,37 @@ function json.check_json_validator_running() { return 0 # alive or not expected to be alive } +function json._reset_json_validator() { + if [[ ${_json_validator_pids[$$]:-} != "" ]]; then + kill "${_json_validator_pids[$$]:-}" 2>/dev/null + unset "_json_validator_pids[$$]" + fi +} + function json.validate() { - if [[ ${_json_validator_pids[$$]:-} == "" ]]; - then json.start_json_validator; fi + if [[ ${_json_validator_pids[$$]:-} == "" ]]; then + json.start_json_validator || return 2 + fi + + local _jv_type=${_json_bash_validation_types[${type:-json}]:?"json.validate: unsupported \$type: ${type@Q}"} \ + _jv_write_error='' _jv_response='' + + let "_json_validate_id=${_json_validate_id:-0}+1"; local _jv_id=$_json_validate_id - local _jv_type=${_json_bash_validation_types[${type:-json}]:?"json.validate: unsupported \$type: ${type@Q}"} + # When we write to stdin file descriptor of our grep coproc (from + # _json_validator_in_fds), we'll get SIGPIPE if the process has terminated. + # Default bash behaviour is to exit with 141. (e.g. this would happen when + # writing to stdout that's connected to a `head` command that's received + # enough input.) However this would stop us handling the coproc's unexpected + # termination. This situation does not warrant exiting, as writing to the + # coproc is not the primary purpose of this process. + # + # We don't set a SIGPIPE trap globally because if we do, any SIGPIPE error + # would result in bash printing a message to stdout indicating an IO error. + # We also don't attempt to restore a potential existing trap, because + # detecting existing traps is too slow to make sense to do every call. + trap '' SIGPIPE - let "_json_validate_id=${_json_validate_id:-0}+1"; local id=$_json_validate_id local count_markers IFS # delimit JSON with Record Separator # Ideally we'd use null-terminated "lines" with grep's --null-data, but I can't # get consistent reads after writes that way. (The problem appears to be with @@ -481,34 +535,43 @@ function json.validate() { if [[ ${#_validate_json_in[@]} == 0 ]]; then return 0; fi printf -v count_markers ':%.0s' "${!_validate_json_in[@]}" { - printf '%d%s' "${id:?}" "${count_markers:?}" + printf '%d%s' "${_jv_id:?}" "${count_markers:?}" # \n is the end of a validation request, so we need to remove \n in JSON # input. We map them to \r, which don't JSON affect validity. printf "\x1E${_jv_type:?}%s" "${_validate_json_in[@]//$'\n'/$'\r'}" printf '\n' - } >&"${_json_validator_in_fds[$$]:?}" + } >&"${_json_validator_in_fds[$$]:?}" 2>/dev/null || _jv_write_error=true else IFS=''; count_markers=${*/*/:} { - printf '%d%s' "${id:?}" "${count_markers?}" + printf '%d%s' "${_jv_id:?}" "${count_markers?}" printf "\x1E${_jv_type:?}%s" "${@//$'\n'/$'\r'}" printf '\n' - } >&"${_json_validator_in_fds[$$]:?}" + } >&"${_json_validator_in_fds[$$]:?}" 2>/dev/null || _jv_write_error=true fi + trap - SIGPIPE # restore default SIGPIPE behaviour IFS='' - if ! read -ru "${_json_validator_out_fds[$$]:?}" -t 4 response; then + if [[ ${_jv_write_error:-} ]] || ! read -ru "${_json_validator_out_fds[$$]:?}" -t 4 _jv_response; then if ! json.check_json_validator_running; then - echo "json.bash: json validator coprocess unexpectedly died" >&2 + msg_context="json validator coprocess unexpectedly died" \ + json._notify_coproc_terminated return 2 fi - echo "json.validate: failed to read json validator response: $? ${response@Q}" >&2 + # After an IO error the validator stream will be in an unknown state, so we + # can't keep using it. + json._reset_json_validator + if [[ ${_jv_write_error:-} ]]; then + echo "json.validate: failed to write json validator request: ${_jv_id:?}" >&2 + else + echo "json.validate: failed to read json validator response: ${_jv_id:?}" >&2 + fi return 2 fi - if [[ $response != "${id:?}${count_markers?}" ]]; then - if [[ $response != "${id:?}"* ]]; then - echo "json.validate: mismatched validator response ID: ${id@A}," \ - "${response@A}" >&2; return 2 + if [[ ${_jv_response?} != "${_jv_id:?}${count_markers?}" ]]; then + if [[ ${_jv_response?} != "${_jv_id:?}"* ]]; then + echo "json.validate: mismatched validator response ID: ${_jv_id@A}," \ + "${_jv_response@A}" >&2; return 2 fi return 1 fi diff --git a/json.bats b/json.bats index 23ec1c5..ba716d2 100644 --- a/json.bats +++ b/json.bats @@ -2842,6 +2842,128 @@ json(): Could not encode the value of argument 'a:number=oops' as a 'number' val [[ $status == 0 && $output == '["foo","bar"]' ]] } +@test "json validator :: reports validator grep process failing to start :: PID & FDs unset" { + # Because it's a parallel process, the coproc failure behaviour is timing + # dependent, making it difficult to test specific scenarios without mocking. + # So we mock the function that starts the coproc to simulate particular error + # conditions that it can exhibit in practice. + function json._start_grep_coproc() { + true + } + + run --separate-stderr json.validate '42' + [[ $status == 2 && $output == "" ]] + echo "$stderr" + [[ $stderr == *"json.bash: json validator 'grep' process failed to start: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Use the :raw type instead of :json to avoid starting a JSON validator grep process."* ]] +} + +@test "json validator :: reports validator grep process failing to start :: PID set, FDs unset" { + function json._start_grep_coproc() { + json_validator_PID=9999 + # json_validator (FDs) var not set + } + + run --separate-stderr json.validate '42' + [[ $status == 2 && $output == "" ]] + echo "$stderr" + [[ $stderr == *"json.bash: json validator 'grep' process failed to start: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Use the :raw type instead of :json to avoid starting a JSON validator grep process."* ]] +} + +@test "json validator :: reports validator grep process failing to start :: PID unset, FDs set" { + function json._start_grep_coproc() { + json_validator=(9000 9001) + # json_validator_PID var not set + } + + run --separate-stderr json.validate '42' + [[ $status == 2 && $output == "" ]] + echo "$stderr" + [[ $stderr == "json.bash: json validator 'grep' process failed to start: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] +} + +@test "json validator :: reports validator grep process failing after start" { + function kill_coproc_then_validate() { + json.validate '1' 2>&1 + kill "${_json_validator_pids[$$]:?}" || true + wait "${_json_validator_pids[$$]:?}" || [[ $? == 143 ]] # expect SIGTERM + + # Should fail when writing the validation request to the coproc + validate_status=0 + json.validate '2' || validate_status=$? + [[ $validate_status == 2 ]] # should fail + + # validator should be reset, and succeed now + json.validate '3' 2>&1 + } + + run --separate-stderr kill_coproc_then_validate + echo "$status" + echo "${stderr@Q}" + [[ $status == 0 && $output == "" ]] + [[ $stderr == "json.bash: json validator coprocess unexpectedly died: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] +} + +@test "json validator :: reports validator grep process IO write error" { + function kill_coproc_then_validate() { + json.validate '1' 2>&1 # start the coproc + + # simulate a write error + fifo=$(mktemp_bats); rm "${fifo:?}" + mkfifo "${fifo:?}" + cat < "${fifo:?}" & + exec {fifo_in_fd}>"${fifo:?}" # open the write end of fifo + # Break the pipe by killing the reader. Writing to fifo_in_fd will now error + # with SIGPIPE. + kill $! || true; wait $! || true + + # using the validator will now write to the broken fifo + _json_validator_in_fds[$$]=${fifo_in_fd} + + validate_status=0 + json.validate '2' || validate_status=$? + [[ $validate_status == 2 ]] # should fail + + # validator should be reset, and succeed now + json.validate '3' 2>&1 + } + + run --separate-stderr kill_coproc_then_validate + echo "$status" + echo "${stderr@Q}" + [[ $status == 0 && $output == "" ]] + [[ $stderr == "json.validate: failed to write json validator request: 2" ]] +} + +@test "json validator :: reports validator grep process IO read error" { + function kill_coproc_then_validate() { + json.validate '1' 2>&1 # start the coproc + + # simulate a read error + fifo=$(mktemp_bats); rm "${fifo:?}" + mkfifo "${fifo:?}" + sleep infinity > "${fifo:?}" & + exec {fifo_out_fd}<"${fifo:?}" # open the read end of fifo + # Break the pipe by killing the writer + kill $! || true; wait $! || true + + # using the validator will now read the broken fifo + _json_validator_out_fds[$$]=${fifo_out_fd} + + validate_status=0 + json.validate '2' || validate_status=$? + [[ $validate_status == 2 ]] # should fail + + # validator should be reset, and succeed now + json.validate '3' 2>&1 + } + + run --separate-stderr kill_coproc_then_validate + echo "$status" + echo "${stderr@Q}" + [[ $status == 0 && $output == "" ]] + [[ $stderr == "json.validate: failed to read json validator response: 2" ]] +} + @test "json validator :: validates valid JSON via arg" { initials=('' 'true' '{}' '[]' '42' '"hi"' 'null') for initial in "${initials[@]}"; do From d908708a26b397224ab510c35e4150d3b52e9f8b Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Mon, 22 Jul 2024 05:08:46 +0000 Subject: [PATCH 4/8] chore: add bench for trap setting/resetting We do this in json.validate now. --- hack/trap_bench.bash | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 hack/trap_bench.bash diff --git a/hack/trap_bench.bash b/hack/trap_bench.bash new file mode 100755 index 0000000..d3f2ed7 --- /dev/null +++ b/hack/trap_bench.bash @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +set -euo pipefail + +# This tests the cost of setting and clearing traps to handle SIGPIPE around a +# write that needs to avoid terminating the whole process on SIGPIPE. We do this +# in json.validate() + +method=${1:-} +count=${2:-10} +readarray text # read lines from stdin + +exec {null_fd}>/dev/null + +function write_without_trap() { + for ((id=0; id<$count; ++id)) do + for line in "${text[@]}"; do + echo -n "${line?}" >&"${null_fd:?}" + done + done +} + +function write_with_one_trap() { + trap -- '' SIGPIPE + write_without_trap + trap - SIGPIPE +} + +function write_with_trap() { + for ((id=0; id<$count; ++id)) do + for line in "${text[@]}"; do + trap -- '' SIGPIPE + echo -n "${line?}" >&"${null_fd:?}" + trap - SIGPIPE + done + done +} + +echo "write_${method:?}" >&2 +"write_${method:?}" From d3ab9d3d4986947019e54e885bce61f49534d5ab Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Mon, 22 Jul 2024 05:09:51 +0000 Subject: [PATCH 5/8] chore: fix typo in comment --- json.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/json.bash b/json.bash index d098d4b..8f33052 100755 --- a/json.bash +++ b/json.bash @@ -413,7 +413,7 @@ function json.start_json_validator() { # version. # # It doesn't match JSON directly, but rather it matches a simple validation - # request protocol. json.validate costructs validation request messages that + # request protocol. json.validate constructs validation request messages that # match this regex when the JSON data is valid for the type specified in the # validation request. validation_request=$'(:?(?true|false)(?true)(?false)(?null)(?true|false|null|(?-?+(?:0|[1-9][0-9]*+)(?:\\.[0-9]*+)?+(?:[eE][+-]?+[0-9]++)?+)|(?"(?:[^\\x00-\\x1F"\\\\]|\\\\(?:["\\\\/bfnrt]|u[A-Fa-f0-9]{4}))*+"))(?[\\x20\\x09\\x0A\\x0D]*+)(?\\[(?:(?&ws)(?&json)(?:(?&ws),(?&ws)(?&json))*+)?+(?&ws)\\]|\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&json))(?:(?&ws),(?&entry))*+)?+(?&ws)\\}|(?&atom))){0}^[\\w]++(?:(?=((?:(?&pair)?+\\x1E(?:j(?&ws)(?&json)(?&ws)|s(?&ws)(?&str)(?&ws)|n(?&ws)(?&num)(?&ws)|b(?&ws)(?&bool)(?&ws)|t(?&ws)(?&true)(?&ws)|f(?&ws)(?&false)(?&ws)|z(?&ws)(?&null)(?&ws)|a(?&ws)(?&atom)(?&ws)|Oj(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&json))(?:(?&ws),(?&entry_json))*+)?+(?&ws)\\}(?&ws)|Os(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&str))(?:(?&ws),(?&entry_str))*+)?+(?&ws)\\}(?&ws)|On(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&num))(?:(?&ws),(?&entry_num))*+)?+(?&ws)\\}(?&ws)|Ob(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&bool))(?:(?&ws),(?&entry_bool))*+)?+(?&ws)\\}(?&ws)|Ot(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&true))(?:(?&ws),(?&entry_true))*+)?+(?&ws)\\}(?&ws)|Of(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&false))(?:(?&ws),(?&entry_false))*+)?+(?&ws)\\}(?&ws)|Oz(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&null))(?:(?&ws),(?&entry_null))*+)?+(?&ws)\\}(?&ws)|Oa(?&ws)\\{(?:(?(?&ws)(?&str)(?&ws):(?&ws)(?&atom))(?:(?&ws),(?&entry_atom))*+)?+(?&ws)\\}(?&ws)|Aj(?&ws)\\[(?:(?&ws)(?&json)(?:(?&ws),(?&ws)(?&json))*+)?+(?&ws)\\](?&ws)|As(?&ws)\\[(?:(?&ws)(?&str)(?:(?&ws),(?&ws)(?&str))*+)?+(?&ws)\\](?&ws)|An(?&ws)\\[(?:(?&ws)(?&num)(?:(?&ws),(?&ws)(?&num))*+)?+(?&ws)\\](?&ws)|Ab(?&ws)\\[(?:(?&ws)(?&bool)(?:(?&ws),(?&ws)(?&bool))*+)?+(?&ws)\\](?&ws)|At(?&ws)\\[(?:(?&ws)(?&true)(?:(?&ws),(?&ws)(?&true))*+)?+(?&ws)\\](?&ws)|Af(?&ws)\\[(?:(?&ws)(?&false)(?:(?&ws),(?&ws)(?&false))*+)?+(?&ws)\\](?&ws)|Az(?&ws)\\[(?:(?&ws)(?&null)(?:(?&ws),(?&ws)(?&null))*+)?+(?&ws)\\](?&ws)|Aa(?&ws)\\[(?:(?&ws)(?&atom)(?:(?&ws),(?&ws)(?&atom))*+)?+(?&ws)\\](?&ws)))$)):++)?+' From e6007c42d4097b9006acbebda59fd5513f1e9afc Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Mon, 22 Jul 2024 07:25:06 +0000 Subject: [PATCH 6/8] feat: specify grep command via JSON_BASH_GREP The JSON_BASH_GREP envar now defines the grep command used by json.bash when it starts a json validator co-process. The envar can be set to a : delimited list of paths/commands to use. It defaults to "ggrep:grep", so systems with ggrep installed with non-GNU grep will use ggrep. --- json.bash | 34 +++++++++++++++++++++++----- json.bats | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 91 insertions(+), 11 deletions(-) diff --git a/json.bash b/json.bash index 8f33052..e264411 100755 --- a/json.bash +++ b/json.bash @@ -463,23 +463,45 @@ function json.start_json_validator() { } 2>/dev/null || _json_validator_pids[$$]= if [[ ! ${_json_validator_pids[$$]:-} ]]; then - msg_context="json validator 'grep' process failed to start" \ + msg_context="json validator co-process failed to start" \ json._notify_coproc_terminated return 2 fi } function json._start_grep_coproc() { - { coproc json_validator ( LC_ALL=C.UTF-8 grep --only-matching --line-buffered \ + local grep + out=grep json._resolve_grep + + { coproc json_validator ( LC_ALL=C.UTF-8 "${grep:?}" --only-matching --line-buffered \ -P -e "${validation_request:?}" ) } 2>/dev/null # hide interactive job control PID output } +function json._resolve_grep() { + local -n _jrg_out=${out:?'json._resolve_grep: $out must be set to receive the grep path'} + local IFS=':' + # shellcheck disable=SC2206 + local _jrg_grep_options=(${JSON_BASH_GREP:-ggrep:grep}) + for option in "${_jrg_grep_options[@]}"; do + if type -t -- "${option?}" > /dev/null; then + _jrg_out=${option?} + return 0 + fi + done + # default to the final option even if it failed to resolve + _jrg_out=${option:-grep} +} + function json._notify_coproc_terminated() { + local grep + out=grep json._resolve_grep + echo "json.bash: ${msg_context:?}:" \ - "'grep' must support GNU grep options (-P --only-matching" \ - "--line-buffered). Use the :raw type instead of :json to avoid starting a" \ - "JSON validator grep process. " >&2 + "${grep@Q} must support GNU grep options (-P --only-matching" \ + "--line-buffered). Set JSON_BASH_GREP to a GNU grep executable, or use" \ + "the :raw type instead of :json to avoid starting a JSON validator grep" \ + "process." >&2 } function json.check_json_validator_running() { @@ -554,7 +576,7 @@ function json.validate() { IFS='' if [[ ${_jv_write_error:-} ]] || ! read -ru "${_json_validator_out_fds[$$]:?}" -t 4 _jv_response; then if ! json.check_json_validator_running; then - msg_context="json validator coprocess unexpectedly died" \ + msg_context="json validator co-process unexpectedly died" \ json._notify_coproc_terminated return 2 fi diff --git a/json.bats b/json.bats index ba716d2..2f56d00 100644 --- a/json.bats +++ b/json.bats @@ -11,6 +11,9 @@ if [[ ${CI:-false} == true ]]; then TIMEOUT_SECONDS=60 fi +# Always use grep, otherwise errors will be different on systems with ggrep. +export JSON_BASH_GREP=grep + setup() { cd "${BATS_TEST_DIRNAME:?}" } @@ -2842,6 +2845,61 @@ json(): Could not encode the value of argument 'a:number=oops' as a 'number' val [[ $status == 0 && $output == '["foo","bar"]' ]] } +@test "json validator :: resolves grep executable via JSON_BASH_GREP :: defaulting to ggrep then grep" { + msg_file=$(mktemp_bats) + + function validate_with_ggrep() { + unset JSON_BASH_GREP + function ggrep() { + echo "ggrep" > "${msg_file:?}" + command -p grep "$@" + } + function grep() { + echo "grep" > "${msg_file:?}" + command -p grep "$@" + } + + # ggrep is defined so it's used by default + json.validate '42' + [[ $(<"${msg_file?}") == 'ggrep' ]] + + # when ggrep isn't defined, grep is used + json._reset_json_validator + unset ggrep + json.validate '42' + [[ $(<"${msg_file?}") == 'grep' ]] + } + + # unset PATH to avoid finding ggrep on the system running this test + PATH= run validate_with_ggrep + [[ $status == 0 && $output == "" ]] +} + +@test "json validator :: resolves grep executable via JSON_BASH_GREP :: uses first defined command after splitting by : or the final undefined entry" { + msg_file=$(mktemp_bats) + + function validate_with_ggrep() { + JSON_BASH_GREP=missing_grep1:custom_grep:missing_grep2 + function custom_grep() { + echo "custom_grep" > "${msg_file:?}" + command -p grep "$@" + } + + # custom_grep is defined so it's used + json.validate '42' + [[ $(<"${msg_file?}") == 'custom_grep' ]] + + # when none of the options are available, the final choice is used (and fails) + json._reset_json_validator + unset custom_grep + json.validate '42' || [[ $? == 2 ]] + } + + # unset PATH to avoid finding ggrep on the system running this test + PATH= run validate_with_ggrep + [[ $status == 0 && $output == "json.bash: json validator co-process unexpectedly died: 'missing_grep2' must support GNU grep options (-P --only-matching --line-buffered). Set JSON_BASH_GREP to a GNU grep executable, or use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] +} + @test "json validator :: reports validator grep process failing to start :: PID & FDs unset" { # Because it's a parallel process, the coproc failure behaviour is timing # dependent, making it difficult to test specific scenarios without mocking. @@ -2854,7 +2912,7 @@ json(): Could not encode the value of argument 'a:number=oops' as a 'number' val run --separate-stderr json.validate '42' [[ $status == 2 && $output == "" ]] echo "$stderr" - [[ $stderr == *"json.bash: json validator 'grep' process failed to start: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Use the :raw type instead of :json to avoid starting a JSON validator grep process."* ]] + [[ $stderr == "json.bash: json validator co-process failed to start: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Set JSON_BASH_GREP to a GNU grep executable, or use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] } @test "json validator :: reports validator grep process failing to start :: PID set, FDs unset" { @@ -2866,7 +2924,7 @@ json(): Could not encode the value of argument 'a:number=oops' as a 'number' val run --separate-stderr json.validate '42' [[ $status == 2 && $output == "" ]] echo "$stderr" - [[ $stderr == *"json.bash: json validator 'grep' process failed to start: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Use the :raw type instead of :json to avoid starting a JSON validator grep process."* ]] + [[ $stderr == "json.bash: json validator co-process failed to start: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Set JSON_BASH_GREP to a GNU grep executable, or use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] } @test "json validator :: reports validator grep process failing to start :: PID unset, FDs set" { @@ -2878,7 +2936,7 @@ json(): Could not encode the value of argument 'a:number=oops' as a 'number' val run --separate-stderr json.validate '42' [[ $status == 2 && $output == "" ]] echo "$stderr" - [[ $stderr == "json.bash: json validator 'grep' process failed to start: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] + [[ $stderr == "json.bash: json validator co-process failed to start: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Set JSON_BASH_GREP to a GNU grep executable, or use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] } @test "json validator :: reports validator grep process failing after start" { @@ -2898,9 +2956,9 @@ json(): Could not encode the value of argument 'a:number=oops' as a 'number' val run --separate-stderr kill_coproc_then_validate echo "$status" - echo "${stderr@Q}" + echo "${stderr?}" [[ $status == 0 && $output == "" ]] - [[ $stderr == "json.bash: json validator coprocess unexpectedly died: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] + [[ $stderr == "json.bash: json validator co-process unexpectedly died: 'grep' must support GNU grep options (-P --only-matching --line-buffered). Set JSON_BASH_GREP to a GNU grep executable, or use the :raw type instead of :json to avoid starting a JSON validator grep process." ]] } @test "json validator :: reports validator grep process IO write error" { From 4f1e99a9712abb4182b96360bc89c4102bf2a04a Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Mon, 22 Jul 2024 07:56:43 +0000 Subject: [PATCH 7/8] docs: update CHANGELOG for coproc changes --- CHANGELOG.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f1e4c13..280bb62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,35 @@ and this project adheres to ## [Unreleased] +### BREAKING CHANGES + +> [!NOTE] +> +> `json.bash` is version `0.x`, so the major version is not incremented. + +- The `json.validate` function now sets and clears a bash trap for `SIGPIPE` + when called. If something else in a bash script is also setting a trap for + `SIGPIPE`, the trap will be cleared after validating JSON. (It's not practical + to detect and restore existing `SIGPIPE` traps due to the performance cost of + doing so.) ([#15](https://github.com/h4l/json.bash/pull/15)) + +### Fixed + +- The JSON validation co-process exiting at startup could cause bash to exit + with an unbound variable error, due to the special coproc `_PID` var not being + set. Interaction with the validator co-process is now more robust to errors + that can occur when it exits unexpectedly. For example, if the grep command is + not compatible with the GNU grep flags we use. + ([#15](https://github.com/h4l/json.bash/pull/15)) + +### Added + +- The `JSON_BASH_GREP` environment variable can be set to a `:` delimited list + of commands to use when starting the grep JSON validator co-process. It + defaults to `ggrep:grep`, so systems with `ggrep` will use it first. (GNU grep + is commonly `ggrep` when `grep` is not GNU grep.) + ([#15](https://github.com/h4l/json.bash/pull/15)) + ### Changed - Fixed broken link in README's manual install instructions From ee1c6bea7cfa21f26fd6d6291b58467dfb3547c7 Mon Sep 17 00:00:00 2001 From: Hal Blackburn Date: Tue, 23 Jul 2024 04:37:44 +0000 Subject: [PATCH 8/8] docs: document JSON_BASH_GREP envar in README --- README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/README.md b/README.md index ff16af0..74c2422 100644 --- a/README.md +++ b/README.md @@ -176,6 +176,7 @@ to find where it is). 1. [Security and correctness](#security-and-correctness) 1. [`jb-cat`, `jb-echo`, `jb-stream` utility programs](#jb-cat-jb-echo-jb-stream-utility-programs) 1. [Streaming output](#streaming-output) +1. [Configuring environment variables](#configuring-environment-variables) These examples mostly use `jb`, which is the `json.bash` library run as a stand-alone program. From within a bash script you get better performance by @@ -1010,6 +1011,30 @@ inputs, even when backtracking and matched-region output are disabled. The full syntax of `jb` arguments is documented in a (pseudo) grammar in [hack/syntax_patterns.bash](hack/syntax_patterns.bash). +### Configuring environment variables + +`json.bash` can be configured by setting certain environment variables: + +#### `JSON_BASH_GREP` + +The command(s) to run to start the JSON validator grep co-process. + +Default: `ggrep:grep` + +`json.bash` uses a +[regular expression to validate JSON data](./hack/syntax_patterns.bash). It uses +a GNU grep co-process to validate lines of JSON data using this regex +(specifically GNU grep, because it's a [PCRE] expression). + +This environment variable contains a list of command names or absolute paths to +execute. Multiple commands are separated with `:`. The first command that exists +is used, so the list can contain missing commands, so long as one exists. + +The default of `ggrep:grep` means `ggrep` will be used if it's available, +otherwise `grep`. + +[PCRE]: https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions + ## Background & performance notes Quite reasonably, you may be wondering why anyone would use Bash to implement a