@@ -17,6 +17,7 @@ use anyhow::Result;
1717use semver:: Version ;
1818use std:: fs;
1919use std:: path:: PathBuf ;
20+ use std:: sync:: atomic:: { AtomicBool , Ordering } ;
2021use std:: time:: { Duration , SystemTime } ;
2122
2223pub use freenet:: transport:: {
@@ -28,6 +29,73 @@ pub use freenet::transport::{
2829/// The service wrapper catches this and runs `freenet update` before restarting.
2930pub const EXIT_CODE_UPDATE_NEEDED : i32 = 42 ;
3031
32+ /// Environment variable set by every Freenet supervisor (systemd unit, macOS
33+ /// launchd wrapper script, and the Windows/Linux in-process `service
34+ /// run-wrapper` loop) on the `freenet network` child it spawns. Its presence is
35+ /// positive evidence that *something* will catch exit code 42 and run
36+ /// `freenet update` before restarting the node.
37+ ///
38+ /// Auto-update is structurally supervised-install-only: the running node never
39+ /// replaces its own binary — it exits 42 and relies on its supervisor to apply
40+ /// the update and restart it. A bare `freenet network` run has no supervisor, so
41+ /// it detects the update, exits 42, dies, and is never restarted (issue #4580).
42+ /// We use this marker (and `INVOCATION_ID` as a systemd fallback) to tell the
43+ /// operator, loudly, when an update was detected but will not be applied.
44+ pub const SUPERVISED_ENV_VAR : & str = "FREENET_SUPERVISED" ;
45+
46+ /// Whether the running node appears to be under a supervisor that will catch
47+ /// exit code 42 and apply the update.
48+ #[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
49+ pub enum SupervisorStatus {
50+ /// The authoritative [`SUPERVISED_ENV_VAR`] marker is set — a Freenet
51+ /// supervisor spawned us and *will* catch exit 42 and run `freenet update`.
52+ /// Nothing to warn about; exit 42 is expected to be applied.
53+ Supervised ,
54+ /// We are under systemd (`INVOCATION_ID` is set) but our authoritative
55+ /// marker is absent. systemd alone does not prove the unit has the
56+ /// exit-42 → `freenet update` `ExecStopPost` hook: a custom unit,
57+ /// `systemd-run`, or a hand-written service running `freenet network`
58+ /// without that hook is also `INVOCATION_ID`-bearing. We therefore can't
59+ /// confirm the update will be applied — warn, but more softly, since this
60+ /// also covers older Freenet units installed before the marker existed.
61+ SupervisedUnverified ,
62+ /// No evidence of any supervisor. Exit 42 will most likely kill the node
63+ /// without the update ever being applied — the operator must run
64+ /// `freenet update` manually (or install Freenet as a service).
65+ Unsupervised ,
66+ }
67+
68+ /// Detect whether a supervisor is present, using an injectable env lookup so the
69+ /// logic is unit-testable without mutating the process environment.
70+ ///
71+ /// Honest by construction: we only claim the fully-quiet [`Supervised`] state on
72+ /// *our own* marker. systemd's generic `INVOCATION_ID` is reported as
73+ /// [`SupervisedUnverified`] (still warned about, softly) because it does not
74+ /// prove the unit carries the exit-42 → `freenet update` hook. Absence of both
75+ /// is [`Unsupervised`], which drives the loud error.
76+ ///
77+ /// [`Supervised`]: SupervisorStatus::Supervised
78+ /// [`SupervisedUnverified`]: SupervisorStatus::SupervisedUnverified
79+ /// [`Unsupervised`]: SupervisorStatus::Unsupervised
80+ pub fn detect_supervisor_status < F > ( env_get : F ) -> SupervisorStatus
81+ where
82+ F : Fn ( & str ) -> Option < String > ,
83+ {
84+ let present = |key : & str | env_get ( key) . is_some_and ( |v| !v. trim ( ) . is_empty ( ) ) ;
85+ if present ( SUPERVISED_ENV_VAR ) {
86+ SupervisorStatus :: Supervised
87+ } else if present ( "INVOCATION_ID" ) {
88+ SupervisorStatus :: SupervisedUnverified
89+ } else {
90+ SupervisorStatus :: Unsupervised
91+ }
92+ }
93+
94+ /// Read the live supervisor status from the process environment.
95+ pub fn supervisor_status ( ) -> SupervisorStatus {
96+ detect_supervisor_status ( |key| std:: env:: var ( key) . ok ( ) )
97+ }
98+
3199/// Initial backoff interval for update checks (1 minute).
32100const INITIAL_BACKOFF : Duration = Duration :: from_secs ( 60 ) ;
33101
@@ -82,14 +150,39 @@ pub enum UpdateCheckResult {
82150///
83151/// Security: This function verifies against GitHub, so a malicious peer
84152/// claiming a fake version won't trigger an exit.
153+ /// Set the first time this process logs the auto-update lockout at warn! level,
154+ /// so the permanent locked-out state is surfaced loudly once rather than on
155+ /// every 60s update-loop tick (it can be hit from multiple triggers per tick).
156+ static LOCKOUT_WARNED : AtomicBool = AtomicBool :: new ( false ) ;
157+
85158pub async fn check_if_update_available ( current_version : & str ) -> UpdateCheckResult {
86159 // Don't check if we've failed too many times
87160 if !should_attempt_update ( ) {
88- tracing:: debug!(
89- failures = get_update_failure_count( ) ,
90- max = MAX_UPDATE_FAILURES ,
91- "Skipping update check - too many previous failures"
92- ) ;
161+ // Loud, operator-visible (issue #4580): a persistent lockout means this
162+ // peer has stopped trying to auto-update entirely and will silently stay
163+ // behind. A common cause is a non-writable binary path (e.g. a
164+ // hand-installed binary the service account can't replace), which is a
165+ // *permanent* lockout until an operator intervenes. Warn LOUDLY the first
166+ // time we observe it this process, then drop to debug! so the permanent
167+ // state does not spam the log every minute.
168+ let failures = get_update_failure_count ( ) ;
169+ if !LOCKOUT_WARNED . swap ( true , Ordering :: Relaxed ) {
170+ tracing:: warn!(
171+ failures,
172+ max = MAX_UPDATE_FAILURES ,
173+ "Auto-update is LOCKED OUT after {MAX_UPDATE_FAILURES} consecutive failed update \
174+ attempts and will NOT retry. This peer will stay on the current version until an \
175+ operator intervenes. Run `freenet update` manually to update and reset the \
176+ lockout; if updates keep failing, the binary path is likely not writable by this \
177+ account.",
178+ ) ;
179+ } else {
180+ tracing:: debug!(
181+ failures,
182+ max = MAX_UPDATE_FAILURES ,
183+ "Skipping update check - auto-update locked out (already warned this process)"
184+ ) ;
185+ }
93186 return UpdateCheckResult :: Skipped ;
94187 }
95188
@@ -739,6 +832,116 @@ mod tests {
739832 ) ;
740833 }
741834
835+ #[ test]
836+ fn test_detect_supervisor_status_marker_present ( ) {
837+ // Issue #4580: the explicit FREENET_SUPERVISED marker (set by all three
838+ // supervisors on the network child) is positive evidence of a supervisor.
839+ let env = |key : & str | {
840+ if key == SUPERVISED_ENV_VAR {
841+ Some ( "1" . to_string ( ) )
842+ } else {
843+ None
844+ }
845+ } ;
846+ assert_eq ! ( detect_supervisor_status( env) , SupervisorStatus :: Supervised ) ;
847+ }
848+
849+ #[ test]
850+ fn test_detect_supervisor_status_invocation_id_is_unverified ( ) {
851+ // systemd sets INVOCATION_ID for EVERY service instance, including custom
852+ // units / systemd-run that lack our exit-42 → `freenet update` hook. So a
853+ // bare INVOCATION_ID is only *unverified* supervision: we still warn (more
854+ // softly), rather than going fully quiet as if the update were guaranteed
855+ // to apply. This is the false-positive guard from the Codex review.
856+ let env = |key : & str | {
857+ if key == "INVOCATION_ID" {
858+ Some ( "a1b2c3d4" . to_string ( ) )
859+ } else {
860+ None
861+ }
862+ } ;
863+ assert_eq ! (
864+ detect_supervisor_status( env) ,
865+ SupervisorStatus :: SupervisedUnverified
866+ ) ;
867+ }
868+
869+ #[ test]
870+ fn test_detect_supervisor_status_marker_beats_invocation_id ( ) {
871+ // When BOTH our authoritative marker and systemd's INVOCATION_ID are set
872+ // (the normal Freenet systemd-unit case), the marker wins and we report
873+ // the fully-quiet Supervised state.
874+ let env = |key : & str | match key {
875+ SUPERVISED_ENV_VAR => Some ( "1" . to_string ( ) ) ,
876+ "INVOCATION_ID" => Some ( "a1b2c3d4" . to_string ( ) ) ,
877+ _ => None ,
878+ } ;
879+ assert_eq ! ( detect_supervisor_status( env) , SupervisorStatus :: Supervised ) ;
880+ }
881+
882+ #[ test]
883+ fn test_detect_supervisor_status_unsupervised_when_absent ( ) {
884+ // The whole point of #4580: a bare `freenet network` run has neither
885+ // signal and must be reported as Unsupervised so the loud warning fires
886+ // instead of silently exiting 42 with nothing to apply the update.
887+ let env = |_key : & str | None ;
888+ assert_eq ! (
889+ detect_supervisor_status( env) ,
890+ SupervisorStatus :: Unsupervised
891+ ) ;
892+ }
893+
894+ #[ test]
895+ fn test_detect_supervisor_status_empty_and_whitespace_are_not_evidence ( ) {
896+ // An empty or whitespace-only value (e.g. `FREENET_SUPERVISED=`) must NOT
897+ // count as a supervisor — it would suppress the warning while leaving the
898+ // update unapplied, which is exactly the silent failure we are fixing.
899+ for value in [ "" , " " , "\t " , "\n " ] {
900+ let env = move |key : & str | {
901+ if key == SUPERVISED_ENV_VAR || key == "INVOCATION_ID" {
902+ Some ( value. to_string ( ) )
903+ } else {
904+ None
905+ }
906+ } ;
907+ assert_eq ! (
908+ detect_supervisor_status( env) ,
909+ SupervisorStatus :: Unsupervised ,
910+ "value {value:?} must not count as supervisor evidence"
911+ ) ;
912+ }
913+ }
914+
915+ #[ test]
916+ fn test_locked_out_update_check_is_loud ( ) {
917+ // Source-scrape pin for #4580: when the failure lockout disables
918+ // auto-update, the skip MUST be operator-visible (warn!), not a silent
919+ // debug! line. A regression to debug! would re-hide the permanent
920+ // lockout (e.g. non-writable binary path) the issue calls out.
921+ let src = include_str ! ( "auto_update.rs" ) ;
922+ let ( _, after_fn_start) = src
923+ . split_once ( "pub async fn check_if_update_available(" )
924+ . expect ( "check_if_update_available definition not found" ) ;
925+ let ( _, after_guard) = after_fn_start
926+ . split_once ( "if !should_attempt_update() {" )
927+ . expect ( "lockout guard not found" ) ;
928+ let ( guard_body, _) = after_guard
929+ . split_once ( "return UpdateCheckResult::Skipped;" )
930+ . expect ( "could not locate end of lockout guard" ) ;
931+ // Strip line comments so the explanatory comment (which itself mentions
932+ // "warn!"/"debug!") cannot satisfy the assertion — match only on code.
933+ let code_only: String = guard_body
934+ . lines ( )
935+ . map ( |line| line. split_once ( "//" ) . map ( |( c, _) | c) . unwrap_or ( line) )
936+ . collect :: < Vec < _ > > ( )
937+ . join ( "\n " ) ;
938+ assert ! (
939+ code_only. contains( "tracing::warn!" ) ,
940+ "the auto-update lockout skip must log at warn! level so a permanent \
941+ lockout is diagnosable (#4580), not silently dropped at debug!"
942+ ) ;
943+ }
944+
742945 #[ test]
743946 fn test_backoff_constants ( ) {
744947 // Verify backoff progression: 1m -> 2m -> 4m -> 8m -> 16m -> 32m -> 64m (capped to 60m)
0 commit comments