diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..1ca40fd0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,40 @@ +FROM xtrm0/dafny + +USER root + +RUN apt-get update && apt-get install curl git g++ vim python3-pip -y && apt-get clean + +RUN pip install matplotlib scipy tqdm diffprivlib + +# create a non-root user +RUN useradd -m lean + +USER lean + +WORKDIR /home/lean + +SHELL ["/bin/bash", "-c"] + +ENV PATH="/home/lean/.elan/bin:/home/lean/.local/bin:$PATH" + +RUN mkdir SampCert + +COPY --chown=lean . ./SampCert/ + +RUN curl https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh -sSf | sh -s -- -y --default-toolchain none && \ + . ~/.profile && \ + elan toolchain install $(curl https://raw.githubusercontent.com/leanprover-community/mathlib/master/leanpkg.toml | grep lean_version | awk -F'"' '{print $2}') && \ + elan default stable + +RUN cd SampCert/ && \ + lake exe cache get + +RUN cd SampCert/ && \ + lake build + +RUN cd SampCert/ && \ + lake build FastExtract && \ + dafny build --target:py Tests/SampCert.dfy Tests/Random.py Tests/testing-kolmogorov-discretegaussian.py Tests/testing-kolmogorov-discretelaplace.py Tests/IBM/discretegauss.py Tests/benchmarks.py -o Tests/SampCert.dfy + +RUN cd SampCert && \ + lake build test diff --git a/REPRODUCING.md b/REPRODUCING.md new file mode 100644 index 00000000..8feadb62 --- /dev/null +++ b/REPRODUCING.md @@ -0,0 +1,91 @@ +# SampCert Artifact + +Our artifact consists of +- The Lean 4 source code for SampCert, including all proofs mentioned in the paper, and a code extractor used to translate the samplers into Python. +- A script to benchmark the extracted SampCert samplers against other DP implementations. +- A script to profile the number of bytes consumed by the discrete Gaussian sampler. +- A script to run statistical tests on the extracted SampCert samplers. +- A Lean 4 program to run statistical tests on the SampCert samplers and example DP queries, using the Lean FFI. + +## Building + +To build and enter the artifact, run the following commands (**from the root directory of the artifact**). +``` +docker build --platform linux/amd64 -t sampcert . +docker run -it --platform linux/amd64 --name sampcert sampcert /bin/bash +``` +This will compile our Lean code, and extract the implementation of our samplers used by our Large Cloud Provider. +We have tested that this image builds and runs on a 2023 M2 Macbook Pro, Ubuntu Linux, and within WSL. +If running this docker image on a ARM system, please ensure that Rosetta2 virtualization is enabled. + +Alternatively, you can load our prebuilt docker image using +``` +docker image load -i sampcert-image.tar +docker run -it --platform linux/amd64 --name sampcert sampcert /bin/bash +``` + + +## The Lean code + +The file `SampCert.lean` is the top-level file in the project. +To check that `definition-name` code is sorry-free, one can add the line +``` +#print axioms definition-name +``` +to any file and recompile with `lake build` (**from inside the SampCert directory**). +For example, uncommenting `-- #print axioms combineMeanHistogram` at the bottom of `SampCert.lean` will print +``` +[2165/2166] Built SampCert +info: ././././SampCert.lean:75:0: 'combineMeanHistogram' depends on axioms: [propext, Classical.choice, Quot.sound] +``` +which does not include the axiom `sorryAx`, indicating that the definition and proof is closed. + +The file ``paper_mapping.md`` includes a table that lists all of the definitions from our paper. + +## Benchmarks + +To reproduce our benchmarks (figure 4), run the following command from (**the home directory, inside the artifact**): +``` +sh SampCert/Tests/run_benchmark.sh +``` +This command will save a reproduction of figure 4 to the home directory. It can be accessed by running the following command (**from outside of the container**). +``` +docker cp sampcert:/home/lean/GaussianBenchmarks.pdf . +``` + +To profile the number of bytes of entropy consumed, we have a version of the code instrumented with logging on a separate branch (``git diff main..PLDI25-profiling`` will show you the differences). +To produce a figure that counts the number of bytes of entropy consumed, run the following (**from the SampCert directory**): +``` +git checkout PLDI25-profiling +lake build profile +LD_LIBRARY_PATH=.lake/build/lib/ python3 profile.py +``` +To view the profile, it can be copied out of the container by executing the following (**from outside of the container**): +``` +docker cp sampcert:/home/lean/SampCert/GaussianProfile.pdf . +``` + + +## Statistical Tests: Extracted samplers + +To check that our extracted samplers execute and sample from the correct distribution, we have included a K-S test. +To run the tests on the extracted versions, run the following (**from the home directory, on the main branch**): +``` +python3 SampCert/Tests/SampCert-py/testing-kolmogorov-discretegaussian.py +python3 SampCert/Tests/SampCert-py/testing-kolmogorov-discretelaplace.py +``` +The script reports `Test passed!` when the kolmogorov distance between the computed and ideal CDF is within `0.02`. + + +## Tests: FFI samplers + +To demonstrate our claim that our code can be executed within Lean using the C FFI we have rewritten the K-S test, as well as several example queries, inside the file `Test.lean`. +To run that file, execute the command (**from the SampCert directory, on the main branch**) +``` +lake exe test +``` + +This will do the following: +- Execute our implementation of the sparse vector technique on randomly generated sample data +- Execute our noised histogram query, and histogram mean queries, on randomly generated sample data, +- Preform statistical tests on our implementation of the discrete Gaussian, and report the Kolmogorov distance as above. diff --git a/SampCert.lean b/SampCert.lean index 55d9f9b0..62e78ed9 100644 --- a/SampCert.lean +++ b/SampCert.lean @@ -8,13 +8,17 @@ import SampCert.DifferentialPrivacy.Queries.Histogram.Basic import SampCert.DifferentialPrivacy.ZeroConcentrated.System import SampCert.DifferentialPrivacy.Pure.System import SampCert.DifferentialPrivacy.Queries.HistogramMean.Properties +import SampCert.DifferentialPrivacy.Queries.UnboundedMax.Basic +import SampCert.DifferentialPrivacy.Queries.ParHistogram.Basic +import SampCert.DifferentialPrivacy.Queries.Sparse.Basic +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Basic import SampCert.DifferentialPrivacy.Approximate.DP import SampCert.Samplers.Gaussian.Properties import Init.Data.UInt.Lemmas open SLang PMF -def combineConcentrated := @privNoisedBoundedMean_DP gaussian_zCDPSystem +def combineConcentrated := @privNoisedBoundedMean_DP zCDPSystem def combinePure := @privNoisedBoundedMean_DP PureDPSystem /- @@ -27,7 +31,7 @@ def numBins : ℕ+ := 64 /- Bin the infinite type ℕ with clipping -/ -def bin (n : ℕ) : Fin numBins := +def example_bin (n : ℕ) : Fin numBins := { val := min (Nat.log 2 n) (PNat.natPred numBins), isLt := by apply min_lt_iff.mpr @@ -40,8 +44,8 @@ Return an upper bound on the bin value, clipped to 2^(1 + numBins) -/ def unbin (n : Fin numBins) : ℕ+ := 2 ^ (1 + n.val) -noncomputable def combineMeanHistogram : Mechanism ℕ (Option ℚ) := - privMeanHistogram PureDPSystem numBins { bin } unbin 1 20 2 1 20 +def combineMeanHistogram : Mechanism ℕ (Option ℚ) := + privMeanHistogram PureDPSystem laplace_pureDPSystem numBins { bin := example_bin } unbin 1 20 2 1 20 end histogramMeanExample @@ -69,3 +73,5 @@ def DiscreteGaussianSampleGet (num den : UInt32) (mix: UInt32) : UInt32 := Id.ru else let z : IO ℤ ← run <| DiscreteGaussianPMF ⟨ num.toNat , UInt32.toNa_of_non_zero h₁ ⟩ ⟨ den.toNat , UInt32.toNa_of_non_zero h₂ ⟩ mix.toNat return DirtyIOGet z + +-- #print axioms combineMeanHistogram diff --git a/SampCert/DifferentialPrivacy/Abstract.lean b/SampCert/DifferentialPrivacy/Abstract.lean index 84d7fb13..7529cd1a 100644 --- a/SampCert/DifferentialPrivacy/Abstract.lean +++ b/SampCert/DifferentialPrivacy/Abstract.lean @@ -15,12 +15,31 @@ import SampCert.DifferentialPrivacy.Approximate.DP This file defines an abstract system of differentially private operators. -/ -noncomputable section - open Classical Nat Int Real ENNReal namespace SLang +/-- +Typeclass synonym for the classes we use for probaility +-/ +class DiscProbSpace (T : Type) where + instMeasurableSpace : MeasurableSpace T + instCountable : Countable T + instDiscreteMeasurableSpace : DiscreteMeasurableSpace T + instInhabited : Inhabited T + +-- Typeclass inference to- and from- the synonym +instance [idps : DiscProbSpace T] : MeasurableSpace T := idps.instMeasurableSpace +instance [idps : DiscProbSpace T] : Countable T := idps.instCountable +instance [idps : DiscProbSpace T] : DiscreteMeasurableSpace T := idps.instDiscreteMeasurableSpace +instance [idps : DiscProbSpace T] : Inhabited T := idps.instInhabited + +instance [im : MeasurableSpace T] [ic : Countable T] [idm : DiscreteMeasurableSpace T] [ii : Inhabited T] : DiscProbSpace T where + instMeasurableSpace := im + instCountable := ic + instDiscreteMeasurableSpace := idm + instInhabited := ii + /-- Abstract definition of a differentially private systemm. -/ @@ -31,54 +50,52 @@ class DPSystem (T : Type) where prop : Mechanism T Z → NNReal → Prop /-- - For any δ, prop implies ``ApproximateDP δ ε`` up to a sufficient degradation - of the privacy parameter. + Definition of DP is well-formed: Privacy parameter required to obtain (ε', δ)-approximate DP -/ - prop_adp [Countable Z] {m : Mechanism T Z} : - ∃ (degrade : (δ : NNReal) -> (ε' : NNReal) -> NNReal), ∀ (δ : NNReal) (_ : 0 < δ) (ε' : NNReal), - (prop m (degrade δ ε') -> ApproximateDP m ε' δ) + of_app_dp : (δ : NNReal) -> (ε' : NNReal) -> NNReal /-- - DP is monotonic + For any ε', this definition of DP implies (ε', δ)-approximate-DP for all δ -/ - prop_mono {m : Mechanism T Z} {ε₁ ε₂: NNReal} (Hε : ε₁ ≤ ε₂) (H : prop m ε₁) : prop m ε₂ + prop_adp [DiscProbSpace Z] {m : Mechanism T Z} : ∀ (δ : NNReal) (_ : 0 < δ) (ε' : NNReal), + (prop m (of_app_dp δ ε') -> ApproximateDP m ε' δ) /-- - A noise mechanism (eg. Laplace, Discrete Gaussian, etc) - Paramaterized by a query, sensitivity, and a (rational) security paramater. - -/ - noise : Query T ℤ → (sensitivity : ℕ+) → (num : ℕ+) → (den : ℕ+) → Mechanism T ℤ - /-- - Adding noise to a query makes it private. + DP is monotonic -/ - noise_prop : ∀ q : List T → ℤ, ∀ Δ εn εd : ℕ+, sensitivity q Δ → prop (noise q Δ εn εd) (εn / εd) + prop_mono {m : Mechanism T Z} {ε₁ ε₂: NNReal} : + ε₁ ≤ ε₂ -> prop m ε₁ -> prop m ε₂ /-- Privacy adaptively composes by addition. -/ - adaptive_compose_prop : {U V : Type} → [MeasurableSpace U] → [Countable U] → [DiscreteMeasurableSpace U] → [Inhabited U] → [MeasurableSpace V] → [Countable V] → [DiscreteMeasurableSpace V] → [Inhabited V] → ∀ m₁ : Mechanism T U, ∀ m₂ : U -> Mechanism T V, - ∀ ε₁ ε₂ : NNReal, - prop m₁ ε₁ → (∀ u, prop (m₂ u) ε₂) -> prop (privComposeAdaptive m₁ m₂) (ε₁ + ε₂) + adaptive_compose_prop {U V : Type} [DiscProbSpace U] [DiscProbSpace V] + {m₁ : Mechanism T U} {m₂ : U -> Mechanism T V} {ε₁ ε₂ ε : NNReal} : + prop m₁ ε₁ → (∀ u, prop (m₂ u) ε₂) -> + ε₁ + ε₂ = ε -> + prop (privComposeAdaptive m₁ m₂) ε /-- Privacy is invariant under post-processing. -/ - postprocess_prop : {U : Type} → [MeasurableSpace U] → [Countable U] → [DiscreteMeasurableSpace U] → [Inhabited U] → { pp : U → V } → - ∀ m : Mechanism T U, ∀ ε : NNReal, - prop m ε → prop (privPostProcess m pp) ε + postprocess_prop {U : Type} [DiscProbSpace U] + { pp : U → V } {m : Mechanism T U} {ε : NNReal} : + prop m ε → prop (privPostProcess m pp) ε /-- Constant query is 0-DP -/ - const_prop : {U : Type} → [MeasurableSpace U] → [Countable U] → [DiscreteMeasurableSpace U] -> (u : U) -> prop (privConst u) (0 : NNReal) - + const_prop {U : Type} [DiscProbSpace U] {u : U} {ε : NNReal} : + ε = (0 : NNReal) -> prop (privConst u) ε namespace DPSystem /- Non-adaptive privacy composes by addition. -/ -lemma compose_prop {U V : Type} [dps : DPSystem T] [MeasurableSpace U] [Countable U] [DiscreteMeasurableSpace U] [Inhabited U] [MeasurableSpace V] [Countable V] [DiscreteMeasurableSpace V] [Inhabited V] : - ∀ m₁ : Mechanism T U, ∀ m₂ : Mechanism T V, ∀ ε₁ ε₂ : NNReal, - dps.prop m₁ ε₁ → dps.prop m₂ ε₂ → dps.prop (privCompose m₁ m₂) (ε₁ + ε₂) := by - intros m₁ m₂ ε₁ ε₂ p1 p2 +lemma compose_prop {U V : Type} [dps : DPSystem T] [DiscProbSpace U] [DiscProbSpace V] : + {m₁ : Mechanism T U} -> {m₂ : Mechanism T V} -> {ε₁ ε₂ ε : NNReal} -> + (ε₁ + ε₂ = ε) -> + dps.prop m₁ ε₁ → dps.prop m₂ ε₂ → dps.prop (privCompose m₁ m₂) ε := by + intros _ _ _ _ _ _ p1 p2 unfold privCompose - exact adaptive_compose_prop m₁ (fun _ => m₂) ε₁ ε₂ p1 fun _ => p2 + apply adaptive_compose_prop p1 (fun _ => p2) + trivial end DPSystem @@ -96,4 +113,30 @@ lemma bind_bind_indep (p : Mechanism T U) (q : Mechanism T V) (h : U → V → P ext l x simp [privCompose, privComposeAdaptive, tsum_prod'] +/-- +A noise function for a differential privacy system +-/ +class DPNoise (dps : DPSystem T) where + /-- + A noise mechanism (eg. Laplace, Discrete Gaussian, etc) + Paramaterized by a query, sensitivity, and a (rational) security paramater. + -/ + noise : Query T ℤ → (sensitivity : ℕ+) → (num : ℕ+) → (den : ℕ+) → Mechanism T ℤ + /-- + Relationship between arguments to noise and resulting privacy amount + -/ + noise_priv : (num : ℕ+) → (den : ℕ+) → (priv : NNReal) -> Prop + /-- + Adding noise to a query makes it private + -/ + noise_prop {q : List T → ℤ} {Δ εn εd : ℕ+} {ε : NNReal} : + noise_priv εn εd ε -> + sensitivity q Δ → + dps.prop (noise q Δ εn εd) ε + + +class DPPar (T : Type) extends (DPSystem T) where + prop_par {m1 : Mechanism T U} {m2 : Mechanism T V} {ε₁ ε₂ ε : NNReal} : + ε = max ε₁ ε₂ -> ∀f, prop m1 ε₁ -> prop m2 ε₂ -> prop (privParCompose m1 m2 f) ε + end SLang diff --git a/SampCert/DifferentialPrivacy/Approximate/DP.lean b/SampCert/DifferentialPrivacy/Approximate/DP.lean index 534248ee..47f93574 100644 --- a/SampCert/DifferentialPrivacy/Approximate/DP.lean +++ b/SampCert/DifferentialPrivacy/Approximate/DP.lean @@ -6,9 +6,6 @@ Authors: Jean-Baptiste Tristan import SampCert.SLang import SampCert.DifferentialPrivacy.Generic import SampCert.DifferentialPrivacy.Neighbours --- import SampCert.DifferentialPrivacy.Pure.DP --- import SampCert.DifferentialPrivacy.ZeroConcentrated.DP --- import SampCert.Util.Log noncomputable section diff --git a/SampCert/DifferentialPrivacy/Generic.lean b/SampCert/DifferentialPrivacy/Generic.lean index be8e333c..364a01c9 100644 --- a/SampCert/DifferentialPrivacy/Generic.lean +++ b/SampCert/DifferentialPrivacy/Generic.lean @@ -13,25 +13,112 @@ import SampCert.Foundations.Basic This file defines an abstract system of differentially private operators. -/ -noncomputable section - -open Classical Nat Int Real ENNReal namespace SLang + abbrev Query (T U : Type) := List T → U -abbrev Mechanism (T U : Type) := List T → PMF U +abbrev Mechanism (T U : Type) := List T → SPMF U /-- General (value-dependent) composition of mechanisms -/ -def privComposeAdaptive (nq1 : Mechanism T U) (nq2 : U -> Mechanism T V) (l : List T) : PMF (U × V) := do +def privComposeAdaptive (nq1 : Mechanism T U) (nq2 : U -> Mechanism T V) (l : List T) : SPMF (U × V) := do let A <- nq1 l let B <- nq2 A l return (A, B) -lemma compose_sum_rw_adaptive (nq1 : List T → PMF U) (nq2 : U -> List T → PMF V) (u : U) (v : V) (l : List T) : +/-- +Product of mechanisms. +-/ +def privCompose (nq1 : Mechanism T U) (nq2 : Mechanism T V) (l : List T) : SPMF (U × V) := + privComposeAdaptive nq1 (fun _ => nq2) l + +/-- +Mechanism obtained by applying a post-processing function to a mechanism. +-/ +def privPostProcess (nq : Mechanism T U) (pp : U → V) (l : List T) : SPMF V := do + let A ← nq l + return pp A + +/-- +Constant mechanism +-/ +def privConst (u : U) : Mechanism T U := fun _ => SPMF_pure u + +/-- +Parallel composition mechanism +-/ +def privParCompose (m1 : Mechanism T U) (m2 : Mechanism T V) (f : T -> Bool) : Mechanism T (U × V) := + fun l => do + let v1 <- m1 <| List.filter f l + let v2 <- m2 <| List.filter ((! ·) ∘ f) l + return (v1, v2) + + +section privParComp +variable (m1 : Mechanism T U) (m2 : Mechanism T V) (f : T -> Bool) +variable (l l₁ l₂ : List T) +variable (v₁ v₂ : T) + +open Classical + +/- +lemma privParComp_eval xu xv : + ((privParComp m1 m2 f l) : SLang (U × V)) (xu, xv) = + ∑'(u : U), ∑'(v : V), + (if ((u, v) = (xu, xv)) + then (m1 (List.filter f l) : SLang U) xu * (m2 (List.filter ((! ·) ∘ f) l) : SLang V) xv + else 0) := by + simp [privParComp] + simp_rw [← ENNReal.tsum_mul_left] + apply tsum_congr; intro u + apply tsum_congr; intro v + simp_rw [mul_ite_zero] + split <;> split <;> simp_all +-/ + +-- FIXME: Cleanup +lemma privParCompose_eval xu xv : + ((privParCompose m1 m2 f l) : SLang (U × V)) (xu, xv) = + (m1 (List.filter f l) : SLang U) xu * (m2 (List.filter ((! ·) ∘ f) l) : SLang V) xv := by + simp [privParCompose] + simp_rw [← ENNReal.tsum_mul_left] + conv=> + lhs + enter [1, a, 1, b] + rw [mul_ite] + rw [ENNReal.tsum_eq_add_tsum_ite xu, ENNReal.tsum_eq_add_tsum_ite xv] + simp + conv=> + rhs + rw [<- add_zero (_ * _)] + congr + · conv=> + rhs + rw [<- add_zero (_ * _)] + congr + simp + intro _ H1 H2 + exfalso; apply H2 ▸ H1; rfl + · simp + intro _ H1 H2 + exfalso; apply H2 ▸ H1; rfl + + +end privParComp + +noncomputable section + +open Classical Nat Int Real ENNReal + +instance SPMF.instFunLike : FunLike (SPMF α) α ℝ≥0∞ where + coe p a := p.1 a + coe_injective' _ _ h := Subtype.eq h + + +lemma compose_sum_rw_adaptive (nq1 : List T → SPMF U) (nq2 : U -> List T → SPMF V) (u : U) (v : V) (l : List T) : (∑' (a : U), nq1 l a * ∑' (a_1 : V), if u = a ∧ v = a_1 then nq2 a l a_1 else 0) = nq1 l u * nq2 u l v := by have hrw1 : ∀ (a : U), nq1 l a * (∑' (a_1 : V), if u = a ∧ v = a_1 then nq2 a l a_1 else 0) = if (u = a) then (nq1 l a * ∑' (a_1 : V), if u = a ∧ v = a_1 then nq2 a l a_1 else 0) else 0 := by intro a @@ -70,24 +157,8 @@ lemma privComposeChainRule (nq1 : Mechanism T U) (nq2 : U -> Mechanism T V) (l : intros u v rw [<- compose_sum_rw_adaptive] simp [privComposeAdaptive] + simp [SPMF.instFunLike] -/-- -Product of mechanisms. --/ -def privCompose (nq1 : Mechanism T U) (nq2 : Mechanism T V) (l : List T) : PMF (U × V) := - privComposeAdaptive nq1 (fun _ => nq2) l - -/-- -Mechanism obtained by applying a post-processing function to a mechanism. --/ -def privPostProcess (nq : Mechanism T U) (pp : U → V) (l : List T) : PMF V := do - let A ← nq l - return pp A - -/-- -Constant mechanism --/ -def privConst (u : U) : Mechanism T U := fun _ => PMF.pure u -- @[simp] @@ -218,4 +289,6 @@ lemma condition_to_subset (f : U → V) (g : U → ENNReal) (x : V) : simp rw [B] +end + end SLang diff --git a/SampCert/DifferentialPrivacy/Neighbours.lean b/SampCert/DifferentialPrivacy/Neighbours.lean index 1d556e5f..b8758b5b 100644 --- a/SampCert/DifferentialPrivacy/Neighbours.lean +++ b/SampCert/DifferentialPrivacy/Neighbours.lean @@ -14,14 +14,13 @@ This file defines neighbouring lists. variable {T : Type} /-- -Lists which differ by the inclusion or modification of an element. +Lists which differ by the inclusion or exclusion of an element. This is SampCert's private property. -/ inductive Neighbour (l₁ l₂ : List T) : Prop where | Addition : l₁ = a ++ b → l₂ = a ++ [n] ++ b → Neighbour l₁ l₂ | Deletion : l₁ = a ++ [n] ++ b → l₂ = a ++ b → Neighbour l₁ l₂ - | Update : l₁ = a ++ [n] ++ b → l₂ = a ++ [m] ++ b -> Neighbour l₁ l₂ /-- @@ -33,5 +32,3 @@ def Neighbour_symm (l₁ l₂ : List T) (H : Neighbour l₁ l₂) : Neighbour l exact Neighbour.Deletion Hl2 Hl1 · rename_i _ _ _ Hl1 Hl2 exact Neighbour.Addition Hl2 Hl1 - · rename_i _ _ _ _ Hl1 Hl2 - exact Neighbour.Update Hl2 Hl1 diff --git a/SampCert/DifferentialPrivacy/Pure/AdaptiveComposition.lean b/SampCert/DifferentialPrivacy/Pure/AdaptiveComposition.lean index 1cc2ca32..5da3c698 100644 --- a/SampCert/DifferentialPrivacy/Pure/AdaptiveComposition.lean +++ b/SampCert/DifferentialPrivacy/Pure/AdaptiveComposition.lean @@ -18,8 +18,10 @@ variable [Hu : Nonempty U] namespace SLang -- Better proof for Pure DP adaptive composition -theorem PureDP_ComposeAdaptive' (nq1 : List T → PMF U) (nq2 : U -> List T → PMF V) (ε₁ ε₂ : NNReal) (h1 : PureDP nq1 ε₁) (h2 : ∀ u : U, PureDP (nq2 u) ε₂) : - PureDP (privComposeAdaptive nq1 nq2) (ε₁ + ε₂) := by +theorem PureDP_ComposeAdaptive' (nq1 : List T → PMF U) (nq2 : U -> List T → PMF V) (ε₁ ε₂ ε : NNReal) (h1 : PureDP nq1 ε₁) (h2 : ∀ u : U, PureDP (nq2 u) ε₂) (Hε : ε₁ + ε₂ = ε ) : + PureDP (privComposeAdaptive nq1 nq2) ε := by + rw [<- Hε] + clear ε Hε simp [PureDP] at * rw [event_eq_singleton] at * simp [DP_singleton] at * diff --git a/SampCert/DifferentialPrivacy/Pure/Const.lean b/SampCert/DifferentialPrivacy/Pure/Const.lean index be3152ba..44c549dc 100644 --- a/SampCert/DifferentialPrivacy/Pure/Const.lean +++ b/SampCert/DifferentialPrivacy/Pure/Const.lean @@ -32,7 +32,7 @@ theorem privConst_DP_Bound {u : U} : DP (privConst u : Mechanism T U) 0 := by /-- ``privComposeAdaptive`` satisfies zCDP -/ -theorem PureDP_privConst : ∀ (u : U), PureDP (privConst u : Mechanism T U) (0 : NNReal) := by +theorem PureDP_privConst : ∀ (u : U) (ε : NNReal), (ε = 0) -> PureDP (privConst u : Mechanism T U) ε := by simp [PureDP] at * apply privConst_DP_Bound diff --git a/SampCert/DifferentialPrivacy/Pure/DP.lean b/SampCert/DifferentialPrivacy/Pure/DP.lean index 40c33529..2a5497cf 100644 --- a/SampCert/DifferentialPrivacy/Pure/DP.lean +++ b/SampCert/DifferentialPrivacy/Pure/DP.lean @@ -111,19 +111,21 @@ theorem ApproximateDP_of_DP (m : Mechanism T U) (ε : ℝ) (h : DP m ε) : apply le_trans h simp +/-- +Pure privacy bound required to obtain (ε, δ)-approximate DP +-/ +def pure_of_adp (_ : NNReal) (ε : NNReal) : NNReal := ε + /-- Pure DP is no weaker than approximate DP, up to a loss of parameters -/ lemma pure_ApproximateDP [Countable U] {m : Mechanism T U} : - ∃ (degrade : (δ : NNReal) -> (ε' : NNReal) -> NNReal), ∀ (δ : NNReal) (_ : 0 < δ) (ε' : NNReal), - (DP m (degrade δ ε') -> ApproximateDP m ε' δ) := by - let degrade (_ : NNReal) (ε' : NNReal) : NNReal := ε' - exists degrade - intro δ _ ε' HDP + ∀ (δ : NNReal) (_ : 0 < δ) (ε' : NNReal), + DP m (pure_of_adp δ ε') -> ApproximateDP m ε' δ := by + intro _ _ _ HDP rw [ApproximateDP] apply ApproximateDP_of_DP - have R1 : degrade δ ε' = ε' := by simp - rw [R1] at HDP + simp [pure_of_adp] at HDP trivial end SLang diff --git a/SampCert/DifferentialPrivacy/Pure/Mechanism/Properties.lean b/SampCert/DifferentialPrivacy/Pure/Mechanism/Properties.lean index addf1b2f..950208a4 100644 --- a/SampCert/DifferentialPrivacy/Pure/Mechanism/Properties.lean +++ b/SampCert/DifferentialPrivacy/Pure/Mechanism/Properties.lean @@ -114,12 +114,15 @@ theorem privNoisedQueryPure_DP_bound (query : List T → ℤ) (Δ ε₁ ε₂ : exact (add_lt_add_iff_right 1).mpr A · apply exp_pos +def laplace_pureDP_noise_priv (ε₁ ε₂ : ℕ+) (ε : NNReal) : Prop := (ε₁ : NNReal) / ε₂ = ε /-- Laplace noising mechanism ``privNoisedQueryPure`` produces a pure ``ε₁/ε₂``-DP mechanism from a Δ-sensitive query. -/ -theorem privNoisedQueryPure_DP (query : List T → ℤ) (Δ ε₁ ε₂ : ℕ+) (bounded_sensitivity : sensitivity query Δ) : - PureDP (privNoisedQueryPure query Δ ε₁ ε₂) ((ε₁ : NNReal) / ε₂) := by +theorem privNoisedQueryPure_DP (query : List T → ℤ) (Δ ε₁ ε₂ : ℕ+) (ε : NNReal) (HN : laplace_pureDP_noise_priv ε₁ ε₂ ε) (bounded_sensitivity : sensitivity query Δ) : + PureDP (privNoisedQueryPure query Δ ε₁ ε₂) ε := by + unfold laplace_pureDP_noise_priv at HN + rw [<- HN] simp [PureDP] apply privNoisedQueryPure_DP_bound apply bounded_sensitivity diff --git a/SampCert/DifferentialPrivacy/Pure/Par.lean b/SampCert/DifferentialPrivacy/Pure/Par.lean new file mode 100644 index 00000000..2973c6df --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Par.lean @@ -0,0 +1,139 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ +import SampCert.DifferentialPrivacy.Generic +import SampCert.DifferentialPrivacy.Pure.DP + +/-! +# Pure DP parallel composition + +This file proves a DP bound for parallel composition +-/ + +noncomputable section + +open Classical Nat Int Real ENNReal MeasureTheory Measure + +namespace SLang + +-- FIXME: Cleanup! +theorem privParCompose_DP_Bound {m1 : Mechanism T U} {m2 : Mechanism T V} {ε₁ ε₂ : NNReal} (f) + (H1 : DP m1 ε₁) (H2 : DP m2 ε₂) : DP (privParCompose m1 m2 f) (max ε₁ ε₂) := by + + apply singleton_to_event + apply event_to_singleton at H1 + apply event_to_singleton at H2 + + -- Simplify the evaluation into a product + rintro l₁ l₂ HN ⟨u, v⟩ + simp [DFunLike.coe] + rw [privParCompose_eval, privParCompose_eval] + rw [ENNReal.div_eq_inv_mul] + rw [ENNReal.mul_inv ?G1 ?G2] + case G1 => + right + apply PMF.apply_ne_top + case G2 => + left + apply PMF.apply_ne_top + rw [mul_left_comm] + repeat rw [<- mul_assoc] + conv => + lhs + enter [1, 1] + rw [mul_comm] + rw [<- ENNReal.div_eq_inv_mul] + rw [mul_assoc] + rw [<- ENNReal.div_eq_inv_mul] + + -- Now, use the neighbouring condition to turn one of the two terms into 1 + cases HN + · rename_i l₁' l₂' t Hl₁ Hl₂ + simp only [Hl₁, Hl₂, List.filter_append, List.filter_singleton, Function.comp_apply] + cases f t + -- Addition + · simp + apply (le_trans ?G1) + case G1 => + apply mul_le_mul + · apply ENNReal.div_self_le_one + · rfl + · simp + · simp + simp + transitivity + · apply H2 + apply Neighbour.Addition + · rfl + · simp; rfl + · apply ENNReal.ofReal_le_ofReal + apply Real.exp_monotone + apply le_trans (le_max_right ε₁.toReal ε₂.toReal) + conv => + lhs + rw [<- one_mul (max _ _)] + simp + · simp + rw [mul_comm] + apply (le_trans ?G1) + case G1 => + apply mul_le_mul + · apply ENNReal.div_self_le_one + · rfl + · simp + · simp + simp + transitivity + · apply H1 + apply Neighbour.Addition + · rfl + · simp; rfl + · apply ENNReal.ofReal_le_ofReal + apply Real.exp_monotone + simp + · rename_i l₁' t l₂' Hl₁ Hl₂ + simp only [Hl₁, Hl₂, List.filter_append, List.filter_singleton, Function.comp_apply] + cases f t + · simp + apply (le_trans ?G1) + case G1 => + apply mul_le_mul + · apply ENNReal.div_self_le_one + · rfl + · simp + · simp + simp + transitivity + · apply H2 + apply Neighbour.Deletion + · simp + rfl + · rfl + · apply ENNReal.ofReal_le_ofReal + apply Real.exp_monotone + simp + · simp + rw [mul_comm] + apply (le_trans ?G1) + case G1 => + apply mul_le_mul + · apply ENNReal.div_self_le_one + · rfl + · simp + · simp + simp + transitivity + · apply H1 + apply Neighbour.Deletion + · simp; rfl + · rfl + · apply ENNReal.ofReal_le_ofReal + simp + +theorem pureDP_priv_Par {m1 : Mechanism T U} {m2 : Mechanism T V} {ε₁ ε₂ ε: NNReal} : + ε = max ε₁ ε₂ -> ∀f, DP m1 ε₁ -> DP m2 ε₂ -> DP (privParCompose m1 m2 f) ε := + (· ▸ privParCompose_DP_Bound) + +end SLang diff --git a/SampCert/DifferentialPrivacy/Pure/Postprocessing.lean b/SampCert/DifferentialPrivacy/Pure/Postprocessing.lean index 4e57529a..8621724a 100644 --- a/SampCert/DifferentialPrivacy/Pure/Postprocessing.lean +++ b/SampCert/DifferentialPrivacy/Pure/Postprocessing.lean @@ -29,6 +29,7 @@ lemma privPostProcess_DP_bound {nq : Mechanism T U} {ε : NNReal} (h : PureDP nq replace h := h l₁ l₂ neighbours simp [privPostProcess] apply ENNReal.div_le_of_le_mul + simp [SPMF.instFunLike] rw [← ENNReal.tsum_mul_left] apply tsum_le_tsum _ ENNReal.summable (by aesop) intro i diff --git a/SampCert/DifferentialPrivacy/Pure/System.lean b/SampCert/DifferentialPrivacy/Pure/System.lean index 00e16cd0..0dd3d1d9 100644 --- a/SampCert/DifferentialPrivacy/Pure/System.lean +++ b/SampCert/DifferentialPrivacy/Pure/System.lean @@ -9,6 +9,7 @@ import SampCert.DifferentialPrivacy.Pure.Mechanism.Basic import SampCert.DifferentialPrivacy.Pure.AdaptiveComposition import SampCert.DifferentialPrivacy.Pure.Postprocessing import SampCert.DifferentialPrivacy.Pure.Const +import SampCert.DifferentialPrivacy.Pure.Par /-! # Pure DP system @@ -21,14 +22,25 @@ variable { T : Type } /-- Pure ε-DP with noise drawn from the discrete Laplace distribution. -/ -noncomputable instance PureDPSystem : DPSystem T where +instance PureDPSystem : DPSystem T where prop := PureDP + of_app_dp := pure_of_adp prop_adp := pure_ApproximateDP prop_mono := PureDP_mono + adaptive_compose_prop := by + intros; apply PureDP_ComposeAdaptive' <;> trivial + postprocess_prop := by + intros; apply PureDP_PostProcess; trivial + const_prop := by + intros; apply PureDP_privConst; trivial + +instance laplace_pureDPSystem : DPNoise (@PureDPSystem T) where noise := privNoisedQueryPure - noise_prop := privNoisedQueryPure_DP - adaptive_compose_prop := PureDP_ComposeAdaptive' - postprocess_prop := PureDP_PostProcess - const_prop := PureDP_privConst + noise_priv := laplace_pureDP_noise_priv + noise_prop := by + intros; apply privNoisedQueryPure_DP <;> trivial + +instance PureDPParSystem : DPPar T where + prop_par := pureDP_priv_Par end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/AboveThresh/Basic.lean b/SampCert/DifferentialPrivacy/Queries/AboveThresh/Basic.lean new file mode 100644 index 00000000..926595af --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/AboveThresh/Basic.lean @@ -0,0 +1,9 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ + +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Properties +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Privacy +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Code diff --git a/SampCert/DifferentialPrivacy/Queries/AboveThresh/Code.lean b/SampCert/DifferentialPrivacy/Queries/AboveThresh/Code.lean new file mode 100644 index 00000000..9572977a --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/AboveThresh/Code.lean @@ -0,0 +1,69 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ + +import SampCert.DifferentialPrivacy.Abstract +import SampCert.DifferentialPrivacy.Pure.System + +namespace SLang + +variable (ε₁ ε₂ : ℕ+) + +variable [dps : DPSystem ℕ] +variable [dpn : DPNoise dps] + +/-- +Sensitivity bound for the τ threshold +-/ +def sens_cov_τ : ℕ+ := 1 + +/-- +Sensitivity bound for each upper bound attempt +-/ +def sens_cov_vk : ℕ+ := sens_cov_τ + 1 + +/-- +Noise the constant 0 value using the abstract noise function. + +This looks strange, but will specialize to Lap(ε₁/ε₂, 0) in the pure DP case. +-/ +def privNoiseZero (ε₁ ε₂ : ℕ+) : SPMF ℤ := dpn.noise (fun _ => 0) 1 ε₁ ε₂ [] + +def privNoiseGuess (ε₁ ε₂ : ℕ+) : SPMF ℤ := privNoiseZero ε₁ (2 * sens_cov_vk * ε₂) + +def privNoiseThresh (ε₁ ε₂ : ℕ+) : SPMF ℤ := privNoiseZero ε₁ (2 * sens_cov_τ * ε₂) + +/- +## Program version 1 + - Executable + - Tracks history of samples +-/ + + + + +def sv_query (sv_T : Type) : Type := ℕ -> Query sv_T ℤ + +def sv1_state : Type := List ℤ × ℤ + +def sv1_threshold (s : sv1_state) : ℕ := List.length s.1 + +def sv1_noise (s : sv1_state) : ℤ := s.2 + +def sv1_aboveThreshC (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (s : sv1_state) : Bool := + decide (qs (sv1_threshold s) l + (sv1_noise s) < τ + T) + -- decide (exactDiffSum (sv1_threshold s) l + (sv1_noise s) < τ) + +def sv1_aboveThreshF (ε₁ ε₂ : ℕ+) (s : sv1_state) : SLang sv1_state := do + let vn <- privNoiseGuess ε₁ ε₂ + return (s.1 ++ [s.2], vn) + +def sv1_aboveThresh {sv_T : Type} (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : SLang ℕ := do + let τ <- privNoiseThresh ε₁ ε₂ + let v0 <- privNoiseGuess ε₁ ε₂ + let sk <- probWhile (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) ([], v0) + return (sv1_threshold sk) + +end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/AboveThresh/Privacy.lean b/SampCert/DifferentialPrivacy/Queries/AboveThresh/Privacy.lean new file mode 100644 index 00000000..864699b9 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/AboveThresh/Privacy.lean @@ -0,0 +1,473 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ + +import SampCert.DifferentialPrivacy.Abstract +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Properties +import SampCert.DifferentialPrivacy.Pure.System + +noncomputable section + +open Classical + +namespace SLang + +variable {sv_Ta: Type} (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) +variable (Hqs_sens : ∀ i, sensitivity (qs i) 1) + +def cov_τ_def (v0 : ℤ) (vs : List ℤ) (l₁ l₂ : List sv_T) : ℤ := (sv8_G qs l₁ [] v0 vs) - (sv8_G qs l₂ [] v0 vs) + +lemma cov_τ_def_neg (v0 : ℤ) (vs : List ℤ) (l₁ l₂ : List sv_T) : cov_τ_def qs v0 vs l₁ l₂ = -cov_τ_def qs v0 vs l₂ l₁ := by + simp [cov_τ_def] + +def cov_vk_def (v0 : ℤ) (vs : List ℤ) (l₁ l₂ : List sv_T) (point : ℕ) : ℤ := + -- exactDiffSum (point + 1) l₂ - exactDiffSum (point + 1) l₁ + cov_τ_def qs v0 vs l₁ l₂ + qs (point + 1) l₂ - qs (point + 1) l₁ + cov_τ_def qs v0 vs l₁ l₂ + +lemma cov_vk_def_neg (v0 : ℤ) (vs : List ℤ) (l₁ l₂ : List sv_T) : cov_vk_def qs v0 vs l₁ l₂ point = -cov_vk_def qs v0 vs l₂ l₁ point := by + simp [cov_τ_def, cov_vk_def] + linarith + +-- FIXME: Move +lemma tsum_shift (Δ : ℤ) (f : ℤ → ENNReal) : (∑'(x : ℤ), f x = ∑'(x : ℤ), f (x + Δ)) := by + apply @tsum_eq_tsum_of_ne_zero_bij + case i => exact (fun x => x.1 + Δ) + · simp [Function.Injective] + · simp + intro x Hx + exists (x - Δ) + simp + trivial + · intro + rfl + +lemma laplace_inequality' (τ τ' : ℤ) (Δ : ℕ+) : + ((ENNReal.ofReal (Real.exp (-abs τ' / (Δ * ε₂ / ε₁)))) * ((DiscreteLaplaceGenSamplePMF (Δ * ε₂) ε₁ 0 τ))) ≤ + (DiscreteLaplaceGenSamplePMF (Δ * ε₂) ε₁ 0) (τ + τ') := by + simp [DiscreteLaplaceGenSamplePMF, PMF.instFunLike] + generalize HA : (Real.exp (↑↑ε₁ / (↑↑Δ * ↑↑ε₂)) - 1) = A + generalize HB : (Real.exp (↑↑ε₁ / (↑↑Δ * ↑↑ε₂)) + 1) = B + generalize HC : ((Δ : Real) * ε₂ / ε₁) = C + rw [<- ENNReal.ofReal_mul ?G1] + case G1 => apply Real.exp_nonneg + apply ENNReal.ofReal_le_ofReal + conv => + lhs + rw [mul_comm] + repeat rw [mul_assoc] + apply mul_le_mul_of_nonneg + · rfl + · rw [← Real.exp_add] + apply Real.exp_monotone + repeat rw [<- neg_div] + rw [div_add_div_same] + apply div_le_div_of_nonneg_right + · simp + have H := @abs_add_le ℝ _ _ _ τ τ' + linarith + · rw [<- HC] + simp [div_nonneg_iff] + · rw [<- HA] + rw [<- HB] + simp [div_nonneg_iff] + left + conv => + lhs + rw [<- add_zero 0] + apply add_le_add + · apply Real.exp_nonneg + · simp + · apply Real.exp_nonneg + +lemma laplace_inequality (τ τ' : ℤ) (Δ : ℕ+) : + ((DiscreteLaplaceGenSamplePMF (Δ * ε₂) ε₁ 0 τ)) ≤ + ((ENNReal.ofReal (Real.exp (abs τ' / (Δ * ε₂ / ε₁)))) * + (DiscreteLaplaceGenSamplePMF (Δ * ε₂) ε₁ 0) (τ + τ')) := by + apply le_trans _ ?G1 + case G1 => + apply ENNReal.mul_left_mono + apply laplace_inequality' + simp only [] + repeat rw [<- mul_assoc] + conv => + lhs + rw [<- one_mul (DiscreteLaplaceGenSamplePMF _ _ _ _)] + apply mul_le_mul + · apply Eq.le + rw [<- ENNReal.ofReal_mul ?G1] + case G1 => apply Real.exp_nonneg + rw [<- Real.exp_add] + symm + simp + rw [div_add_div_same] + rw [div_eq_zero_iff] + left + simp + · rfl + · apply zero_le + · apply zero_le + + +lemma laplace_inequality_sub (τ τ' : ℤ) (Δ : ℕ+) : + ((DiscreteLaplaceGenSamplePMF (Δ * ε₂) ε₁ 0 (τ + τ'))) ≤ + ((ENNReal.ofReal (Real.exp (abs τ' / (Δ * ε₂ / ε₁)))) * + (DiscreteLaplaceGenSamplePMF (Δ * ε₂) ε₁ 0) τ) := by + apply le_trans + · apply laplace_inequality ε₁ ε₂ (τ + τ') (-τ') Δ + apply Eq.le + simp + +lemma DSN (N : ℕ) (H : Neighbour L1 L2) : ((qs N L1) : ℝ) - (qs N L2) ≤ 1 := by + let Hqs_sens' := Hqs_sens N L1 L2 H + rw [← Int.cast_sub] + rw [<- Int.cast_one] + apply Int.cast_le.mpr + let X1 := Int.ofNat_le_ofNat_of_le Hqs_sens' + simp only [Int.natCast_natAbs, Nat.cast_one] at X1 + apply le_trans _ X1 + apply le_abs_self + +lemma Hsens_cov_τ_lemma (HN : Neighbour l₁ l₂) : sv8_sum qs l₁ H v0 - sv8_sum qs l₂ H v0 ≤ OfNat.ofNat 1 := by + simp only [sv8_sum] + rw [add_tsub_add_eq_tsub_right] + have X := @DSN sv_T qs Hqs_sens l₁ l₂ H.length HN + rw [← Int.cast_sub] at X + have Y : (@OfNat.ofNat.{0} Real 1 (@One.toOfNat1.{0} Real Real.instOne)) = (@OfNat.ofNat.{0} Int (@OfNat.ofNat.{0} Nat 1 (instOfNatNat 1)) (@instOfNat (@OfNat.ofNat.{0} Nat 1 (instOfNatNat 1)))) := + by simp + rw [Y] at X + apply Int.cast_le.mp at X + apply le_trans X + simp + +lemma Hsens_cov_τ (v0 : ℤ) (vs : List ℤ) (l₁ l₂ : List sv_T) (Hneighbour : Neighbour l₁ l₂) : cov_τ_def qs v0 vs l₁ l₂ ≤ sens_cov_τ := by + dsimp [cov_τ_def, sens_cov_τ] + + suffices (∀ H v0, sv8_G qs l₁ H v0 vs - sv8_G qs l₂ H v0 vs ≤ sens_cov_τ.val.cast) by + apply this + + induction vs + · intro H v0 + dsimp [sens_cov_τ] + simp only [sv8_G] + apply Hsens_cov_τ_lemma <;> trivial + · rename_i next fut IH + intro H v0 + simp only [sv8_G] + apply le_trans (max_sub_max_le_max _ _ _ _) + -- Do both cases separately + apply Int.max_le.mpr + apply And.intro + · apply Hsens_cov_τ_lemma <;> trivial + · apply IH + +-- Prove sensitivity bound +lemma Hsens_cov_vk (v0 : ℤ) (vs : List ℤ) (l₁ l₂ : List sv_T) (point : ℕ) (Hneighbour : Neighbour l₁ l₂) : cov_vk_def qs v0 vs l₁ l₂ point ≤ sens_cov_vk := by + dsimp [cov_vk_def] + have X := Hsens_cov_τ qs Hqs_sens v0 vs l₁ l₂ Hneighbour + simp_all [sens_cov_vk, sens_cov_τ] + rw [<- one_add_one_eq_two] + apply Int.add_le_add _ X + + let Hqs_sens' := Hqs_sens (point + 1) l₂ l₁ (Neighbour_symm _ _ Hneighbour) + let X1 := Int.ofNat_le_ofNat_of_le Hqs_sens' + simp only [Int.natCast_natAbs, Nat.cast_one] at X1 + apply le_trans _ X1 + apply le_abs_self + +lemma sv9_aboveThresh_pmf_DP HL (ε : NNReal) (Hε : ε = ε₁ / ε₂) : + PureDPSystem.prop (@sv9_aboveThresh_SPMF sv_T qs T HL ε₁ ε₂) ε := by + -- Unfold DP definitions + simp [DPSystem.prop] + apply singleton_to_event + unfold DP_singleton + intro l₁ l₂ Hneighbour point + + apply ENNReal.div_le_of_le_mul + simp [sv9_aboveThresh_SPMF, DFunLike.coe, PMF.instFunLike] + + cases point + · -- point = 0 + simp [sv9_aboveThresh] + + -- Put sums on outside + conv => + enter [2] + rw [← ENNReal.tsum_mul_left] + enter [1, i] + rw [← ENNReal.tsum_mul_left] + rw [← ENNReal.tsum_mul_left] + enter [1, i_1] + repeat rw [<- mul_assoc] + enter [1] + rw [mul_comm (ENNReal.ofReal _)] + repeat rw [mul_assoc] + rw [mul_comm (ENNReal.ofReal _)] + conv => + enter [2, 1, i, 1, i_1] + repeat rw [mul_assoc] + conv => + enter [1, 1, i] + rw [← ENNReal.tsum_mul_left] + + -- Change of variables + let cov_τ : ℤ := 0 + -- let cov_vk : ℤ := exactDiffSum 0 l₂ - exactDiffSum 0 l₁ + cov_τ + let cov_vk : ℤ := qs 0 l₂ - qs 0 l₁ + cov_τ + conv => + lhs + rw [tsum_shift cov_τ] + enter [1, τ] + rw [tsum_shift cov_vk] + enter [1, vk] + conv => + rhs + enter [1, τ, 1, vk] + apply ENNReal.tsum_le_tsum + intro τ + apply ENNReal.tsum_le_tsum + intro vk + + rw [<- mul_assoc] + rw [<- mul_assoc] + rw [<- mul_assoc] + apply mul_le_mul + · -- Laplace bound + simp [cov_τ] + rw [mul_assoc] + apply ENNReal.mul_left_mono + simp [privNoiseGuess, privNoiseZero, DPNoise.noise, privNoisedQueryPure] + apply le_trans + · apply laplace_inequality_sub + rw [mul_comm] + apply ENNReal.mul_left_mono + rw [Hε] + apply ENNReal.ofReal_le_ofReal + apply Real.exp_monotone + simp [sens_cov_vk, sens_cov_τ] + + have X : |((qs 0 l₂) : ℝ) - (qs 0 l₁)| ≤ 1 := by + rw [abs_le] + apply And.intro + · apply neg_le.mp + simp only [neg_sub] + apply DSN <;> trivial + · apply DSN + · trivial + · apply Neighbour_symm + trivial + + ring_nf + rw [InvolutiveInv.inv_inv] + conv => + lhs + rw [mul_comm] + rw [<- mul_assoc] + rw [<- mul_assoc] + rw [mul_comm] + enter [2] + rw [mul_comm] + simp + apply le_trans _ X + conv => + rhs + rw [<- one_mul (abs _)] + apply mul_le_mul + · apply inv_le_one + simp + · rfl + · apply abs_nonneg + · simp + + + · -- Conditionals should be equal + suffices (τ + T + cov_τ ≤ sv8_sum qs l₁ [] (vk + cov_vk)) = (τ + T ≤ sv8_sum qs l₂ [] vk) by + split <;> simp_all + split <;> simp_all + linarith + apply propext + simp [sv8_sum, cov_vk] + apply Iff.intro + · intro _ + linarith + · intro _ + linarith + + · apply zero_le + · apply zero_le + + + · rename_i point + -- point > 0 + -- proceed with the proof in the paper + simp [sv9_aboveThresh] + + -- Cancel out the deterministic parts + conv => + enter [2] + rw [← ENNReal.tsum_mul_left] + enter [1, i] + rw [← ENNReal.tsum_mul_left] + rw [← ENNReal.tsum_mul_left] + enter [1, i_1] + repeat rw [<- mul_assoc] + enter [1] + rw [mul_comm (ENNReal.ofReal _)] + repeat rw [mul_assoc] + rw [mul_comm (ENNReal.ofReal _)] + conv => + enter [2, 1, i, 1, i_1] + repeat rw [mul_assoc] + conv => + enter [1, 1, i] + rw [← ENNReal.tsum_mul_left] + apply ENNReal.tsum_le_tsum + intro v0 + apply ENNReal.tsum_le_tsum + intro vs + apply ENNReal.mul_left_mono + apply ENNReal.mul_left_mono + + -- Rearrange to put sums on the outside + conv => + lhs + enter [1, τ] + rw [← ENNReal.tsum_mul_left] + enter [1, vk] + conv => + rhs + rw [← ENNReal.tsum_mul_left] + enter [1, τ] + rw [← ENNReal.tsum_mul_left] + rw [← ENNReal.tsum_mul_left] + enter [1, vk] + + simp [sv8_cond, sv8_sum] + + -- Perform the changes of variables, so that the sums are pointwise le + let cov_τ : ℤ := cov_τ_def qs v0 vs l₁ l₂ + let cov_vk : ℤ := cov_vk_def qs v0 vs l₁ l₂ point + + conv => + lhs + rw [tsum_shift cov_τ] + enter [1, τ] + rw [tsum_shift cov_vk] + enter [1, vk] + apply ENNReal.tsum_le_tsum + intro τ + apply ENNReal.tsum_le_tsum + intro vk + + -- The change of variables make the conditional equal + repeat rw [<- mul_assoc] + apply mul_le_mul' _ ?G2 + case G2 => + apply Eq.le + suffices ((sv8_G qs l₁ [] v0 ↑vs < τ + cov_τ + T) = (sv8_G qs l₂ [] v0 ↑vs < τ + T)) ∧ + ((τ + cov_τ + T ≤ qs (point + 1) l₁ + (vk + cov_vk)) = (τ + T ≤ qs (point + 1) l₂ + vk)) by + simp_all + apply And.intro + · -- cov_τ + apply propext + dsimp [cov_τ, cov_τ_def] + apply Iff.intro <;> intro _ <;> linarith + · -- cov_vk + apply propext + dsimp [cov_vk, cov_vk_def] + apply Iff.intro <;> intro _ <;> linarith + + simp [privNoiseThresh, privNoiseGuess, privNoiseZero, DPNoise.noise, privNoisedQueryPure] + + -- Apply the Laplace inequalities + apply le_trans + · apply mul_le_mul + · apply laplace_inequality_sub + · apply laplace_inequality_sub + · apply zero_le + · apply zero_le + + -- Cancel the Laplace samples + conv => + lhs + rw [mul_assoc] + rw [mul_comm] + conv => + rhs + rw [mul_assoc] + rw [mul_comm] + repeat rw [mul_assoc] + apply ENNReal.mul_left_mono + conv => + lhs + rw [mul_comm] + repeat rw [mul_assoc] + apply ENNReal.mul_left_mono + + -- Apply the ineuqalities + rw [<- ENNReal.ofReal_mul ?G1] + case G1 => apply Real.exp_nonneg + apply ENNReal.ofReal_le_ofReal + rw [← Real.exp_add] + apply Real.exp_monotone + apply @le_trans _ _ _ ((|sens_cov_τ| : ℝ) / (↑↑(2 * sens_cov_τ) * ↑↑ε₂ / ↑↑ε₁) + (|sens_cov_vk| : ℝ) / (↑↑(2 * sens_cov_vk) * ↑↑ε₂ / ↑↑ε₁)) + · have W : |cov_τ.cast| ≤ (sens_cov_τ.val : ℝ) := by + apply abs_le'.mpr + apply And.intro + · dsimp only [cov_τ] + apply Int.cast_le.mpr + apply Hsens_cov_τ <;> trivial + · dsimp only [cov_τ] + rw [cov_τ_def_neg] + simp + apply Int.cast_le.mpr + apply Hsens_cov_τ + · trivial + apply Neighbour_symm + apply Hneighbour + + have X : |cov_vk.cast| ≤ (sens_cov_vk.val : ℝ) := by + apply abs_le'.mpr + apply And.intro + · dsimp only [cov_vk] + apply Int.cast_le.mpr + apply Hsens_cov_vk _ Hqs_sens + trivial + · dsimp only [cov_vk] + rw [cov_vk_def_neg] + simp + apply Int.cast_le.mpr + apply Hsens_cov_vk _ Hqs_sens + apply Neighbour_symm + trivial + + apply add_le_add + · simp + apply div_le_div_of_nonneg_right + · apply W + · apply mul_nonneg <;> simp + · apply div_le_div_of_nonneg_right + · simp + apply X + · apply div_nonneg <;> simp + simp [sens_cov_τ, sens_cov_vk] + ring_nf + rw [InvolutiveInv.inv_inv] + rw [Hε] + conv => + lhs + enter [2, 1] + rw [mul_comm] + rw [<- add_mul] + rw [<- add_mul] + conv => + lhs + enter [1, 1] + rw [<- (one_mul (ε₁.val.cast))] + rw [<- add_mul] + repeat rw [div_eq_mul_inv] + simp + rw [one_add_one_eq_two] + ring_nf + rfl diff --git a/SampCert/DifferentialPrivacy/Queries/AboveThresh/Properties.lean b/SampCert/DifferentialPrivacy/Queries/AboveThresh/Properties.lean new file mode 100644 index 00000000..26573637 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/AboveThresh/Properties.lean @@ -0,0 +1,2479 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ + +import SampCert.DifferentialPrivacy.Abstract +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Code + +noncomputable section +open Classical + +namespace SLang + +section equiv + +variable [dps : DPSystem ℕ] +variable [dpn : DPNoise dps] +variable {sv_T : Type} + +/-- +Stronger congruence rule for probBind: The bound-to functions have to be equal only on the support of +the bound-from function. +-/ +lemma probBind_congr_strong (p : SLang T) (f : T -> SLang U) (g : T -> SLang U) (Hcong : ∀ t : T, p t ≠ 0 -> f t = g t) : + p >>= f = p >>= g := by + simp + unfold probBind + apply SLang.ext + intro u + apply Equiv.tsum_eq_tsum_of_support ?G1 + case G1 => + apply Set.BijOn.equiv (fun x => x) + simp [Function.support] + have Heq : {x | ¬p x = 0 ∧ ¬f x u = 0} = {x | ¬p x = 0 ∧ ¬g x u = 0} := by + apply Set.sep_ext_iff.mpr + intro t Ht + rw [Hcong] + apply Ht + rw [Heq] + apply Set.bijOn_id + simp [Function.support] + intro t ⟨ Hp, _ ⟩ + simp [Set.BijOn.equiv] + rw [Hcong] + apply Hp + + + + +lemma ENNReal.tsum_iSup_comm (f : T -> U -> ENNReal) : ∑' x, ⨆ y, f x y ≥ ⨆ y, ∑' x, f x y := by + apply LE.le.ge + rw [iSup_le_iff] + intro i + apply ENNReal.tsum_le_tsum + intro a + apply le_iSup + +lemma ENNReal.tsum_iSup_comm' (f : T -> U -> ENNReal) : ⨆ y, ∑' x, f x y ≤ ∑' x, ⨆ y, f x y := by + rw [iSup_le_iff] + intro i + apply ENNReal.tsum_le_tsum + intro a + apply le_iSup + +lemma iSup_comm_lemma (qs : sv_query sv_T) T (ε₁ ε₂ : ℕ+) (l : List sv_T) (τ v0 : ℤ): + ∑' b, ⨆ i, probWhileCut (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) i ([], v0) b = + ⨆ i, ∑' b, probWhileCut (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) i ([], v0) b := by + rw [ENNReal.tsum_eq_iSup_sum] + conv => + rhs + enter [1, y] + rw [ENNReal.tsum_eq_iSup_sum] + rw [iSup_comm] + apply iSup_congr + intro s + apply ENNReal.finsetSum_iSup_of_monotone + intro a + apply probWhileCut_monotonic + +lemma sv1_loop_ub (qs : sv_query sv_T) T ε₁ ε₂ l : ∀ L : List ℤ, ∀ (v0 : ℤ), (∑' (x : sv1_state), probWhileCut (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) cut (L, v0) x ≤ 1) := by + induction cut + · simp [probWhileCut] + · rename_i cut' IH + intro L v0 + simp [probWhileCut, probWhileFunctional] + split + · simp + simp [sv1_aboveThreshF] + conv => + enter [1, 1, x] + conv => + enter [1, a] + rw [<- ENNReal.tsum_mul_right] + simp + rw [ENNReal.tsum_comm] + enter [1, b] + + apply + @le_trans _ _ _ + (∑' (x : sv1_state) (b : ℤ), (privNoiseGuess ε₁ ε₂) b * probWhileCut (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) cut' (L ++ [v0], b) x) + _ ?G5 ?G6 + case G5 => + apply ENNReal.tsum_le_tsum + intro a + apply ENNReal.tsum_le_tsum + intro b + unfold sv1_state + rw [ENNReal.tsum_eq_add_tsum_ite (L ++ [v0], b)] + simp + conv => + rhs + rw [<- add_zero (_ * _)] + apply add_le_add + · simp + · simp + intros + aesop + case G6 => + rw [ENNReal.tsum_comm] + conv => + enter [1, 1, b] + rw [ENNReal.tsum_mul_left] + apply @le_trans _ _ _ (∑' (b : ℤ), (privNoiseGuess ε₁ ε₂) b * 1) + · apply ENNReal.tsum_le_tsum + intro a + apply ENNReal.mul_left_mono + apply IH + · simp + apply Eq.le + rw [<- Summable.hasSum_iff ENNReal.summable] + cases (privNoiseGuess ε₁ ε₂) + simp [DFunLike.coe, SPMF.instFunLike] + trivial + · simp + + +lemma sv1_ub (qs : sv_query sv_T) T ε₁ ε₂ l : ∑'s, sv1_aboveThresh qs T ε₁ ε₂ l s ≤ 1 := by + unfold sv1_aboveThresh + unfold sv1_threshold + simp + -- Push the sum over s inwards + conv => + rw [ENNReal.tsum_comm] + enter [1, 1, b] + rw [ENNReal.tsum_mul_left] + enter [2] + rw [ENNReal.tsum_comm] + enter [1, i] + rw [ENNReal.tsum_mul_left] + enter [2] + rw [ENNReal.tsum_comm] + apply + @le_trans _ _ _ + (∑' (a : ℤ), (privNoiseThresh ε₁ ε₂) a * ∑' (a_1 : ℤ), (privNoiseGuess ε₁ ε₂) a_1 * 1) + _ ?G2 ?G1 + case G1 => + apply Eq.le + simp + rw [ENNReal.tsum_mul_right] + have R1 : ∑' (a : ℤ), (privNoiseThresh ε₁ ε₂) a = 1 := by + rw [<- Summable.hasSum_iff ENNReal.summable] + cases (privNoiseThresh ε₁ ε₂) + simp [DFunLike.coe, SPMF.instFunLike] + trivial + have R2 : ∑' (a : ℤ), (privNoiseGuess ε₁ ε₂) a = 1 := by + rw [<- Summable.hasSum_iff ENNReal.summable] + cases (privNoiseGuess ε₁ ε₂) + simp [DFunLike.coe, SPMF.instFunLike] + trivial + simp_all + case G2 => + apply ENNReal.tsum_le_tsum + intro τ + apply ENNReal.mul_left_mono + apply ENNReal.tsum_le_tsum + intro v0 + apply ENNReal.mul_left_mono + + apply + @le_trans _ _ _ + (∑' (b : sv1_state), probWhile (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) ([], v0) b ) + _ ?G3 ?G4 + case G3 => + apply ENNReal.tsum_le_tsum + intro a + rw [tsum_ite_eq] + case G4 => + unfold probWhile + rw [iSup_comm_lemma] + apply iSup_le_iff.mpr + intro cut + apply sv1_loop_ub + +/- +## Program version 2 + - Only moves the loop into a non-executable form, ie. explicitly defines the PMF +-/ + +def sv2_aboveThresh (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : SLang ℕ := + fun (point : ℕ) => + let computation : SLang ℕ := do + let τ <- privNoiseThresh ε₁ ε₂ + let v0 <- privNoiseGuess ε₁ ε₂ + let sk <- probWhile (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) ([], v0) + return (sv1_threshold sk) + computation point + +lemma sv1_sv2_eq ε₁ ε₂ l : sv1_aboveThresh qs T ε₁ ε₂ l = sv2_aboveThresh qs T ε₁ ε₂ l := by + apply SLang.ext + intro result + simp [sv1_aboveThresh, sv2_aboveThresh] + + + +/- +## Program version 3 + - Truncates the loop +-/ + +def sv3_loop (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (τ : ℤ) (l : List sv_T) (point : ℕ) (init : sv1_state) : SLang sv1_state := do + probWhileCut (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) (point + 1) init + +def sv3_aboveThresh (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : SLang ℕ := + fun (point : ℕ) => + let computation : SLang ℕ := do + let τ <- privNoiseThresh ε₁ ε₂ + let v0 <- privNoiseGuess ε₁ ε₂ + let sk <- sv3_loop qs T ε₁ ε₂ τ l point ([], v0) + return (sv1_threshold sk) + computation point + +def cone_of_possibility (cut : ℕ) (initial hist : List ℤ) : Prop := + (hist.length < cut + initial.length) ∧ (initial.length ≤ hist.length) + +def constancy_at {qs : sv_query sv_T} {T : ℤ} {ε₁ ε₂ : ℕ+} {τ : ℤ} {data : List sv_T} {v0 vk : ℤ} (cut : ℕ) (initial hist : List ℤ) : Prop := + probWhileCut (sv1_aboveThreshC qs T τ data) (sv1_aboveThreshF ε₁ ε₂) (1 + cut) (initial, v0) (hist, vk) = + probWhileCut (sv1_aboveThreshC qs T τ data) (sv1_aboveThreshF ε₁ ε₂) cut (initial, v0) (hist, vk) + + +-- All points outside of the cone are zero +lemma external_to_cone_zero : + (¬ cone_of_possibility cut initial hist) -> + probWhileCut (sv1_aboveThreshC qs T τ data) (sv1_aboveThreshF ε₁ ε₂) cut (initial, v0) (hist, vk) = 0 := by + revert initial v0 vk + induction cut + · simp [probWhileCut, probWhileFunctional] + · rename_i cut' IH + intro intial v0 vk Hcut' + unfold probWhileCut + unfold probWhileFunctional + split <;> simp + · intro h + rcases h with ⟨ initial', vk' ⟩ + cases Classical.em (¬ ∃ v', initial' = intial ++ [v']) + · left + simp [sv1_aboveThreshF] + intro i Hi + exfalso + cases Hi + rename_i h + apply h + exists v0 + rename_i h + simp at h + rcases h with ⟨ v', Hinitial' ⟩ + right + apply IH + simp_all [cone_of_possibility] + intro + have Hcut'' := Hcut' (by linarith) + linarith + · intro H + cases H + simp_all [cone_of_possibility] + +-- Base case: left edge of the cone satisfies constancy +lemma cone_left_edge_constancy {qs : sv_query sv_T} {T : ℤ} {ε₁ ε₂ : ℕ+} {τ : ℤ} {data : List sv_T} {v0 vk : ℤ} (cut : ℕ) (initial hist : List ℤ) : + hist.length = initial.length -> + cone_of_possibility cut initial hist -> + @constancy_at _ _ _ qs T ε₁ ε₂ τ data v0 vk cut initial hist := by + intro Hlen Hcone + -- cut > 0 due to cone + cases cut + · exfalso + simp [cone_of_possibility] at Hcone + simp_all only [lt_self_iff_false, le_refl, and_true] + rename_i cut' + + -- Unfold the first iterate + unfold constancy_at + unfold probWhileCut + unfold probWhileFunctional + + cases (sv1_aboveThreshC qs T τ data (initial, v0)) + · -- False case: both programs terminate with initial state + simp + · -- True case: both programs step to a point outside of the cone, so are zero + simp + apply tsum_congr + intro ⟨ initial', v1 ⟩ + + simp [sv1_aboveThreshF] + rw [← ENNReal.tsum_mul_right] + rw [← ENNReal.tsum_mul_right] + + -- Ignore the cases when hist' is not a legal step + cases Classical.em (¬ ∃ v', initial' = initial ++ [v']) + · rename_i h + apply tsum_congr + intro z + split + · exfalso + apply h + exists v0 + rename_i h' + cases h' + rfl + · simp + rename_i h + simp at h + rcases h with ⟨ _, Hv1' ⟩ + simp [Hv1'] + apply tsum_congr + intro _ + + -- Both branches are outside of the cone now + rw [external_to_cone_zero (by simp_all [cone_of_possibility])] + rw [external_to_cone_zero (by simp_all [cone_of_possibility])] + +lemma cone_constancy {qs} {T : ℤ} {ε₁ ε₂ : ℕ+} {τ : ℤ} {data : List sv_T} {v0 vk : ℤ} (cut : ℕ) (initial hist : List ℤ) : + cone_of_possibility cut initial hist -> + @constancy_at _ _ _ qs T ε₁ ε₂ τ data v0 vk cut initial hist := by + -- Need theorem to be true for all initial states, since this will increase during the induction + -- v0 and vk will also change in ways which don't matter + revert initial v0 vk + + induction cut + · -- Not true base case (cut 0 is always outside of the cone) + -- Mercifully it is easy to prove + intro v0 vk initial Hcone + unfold constancy_at + simp [probWhileCut, probWhileFunctional] + cases (sv1_aboveThreshC qs T τ data (initial, v0)) <;> simp + unfold cone_of_possibility at Hcone + linarith + + rename_i n IH + intro v0 vk initial Hcone + -- True base case: are we on the left-hand edge of the cone + cases Classical.em (hist.length = initial.length) + · apply cone_left_edge_constancy <;> assumption + + -- If not, unfold the first (and only the first) level of the induction + unfold constancy_at + unfold probWhileCut + unfold probWhileFunctional + + -- If the conditional is false, we are done + cases (sv1_aboveThreshC qs T τ data (initial, v0)) + · simp + + + -- If the conditional is true, we decrement N by one and add a sample to the history + rename_i Hcone_interior + unfold constancy_at at IH + simp + apply tsum_congr + intro ⟨ initial', vk' ⟩ + + -- We only need to consider the cases where sv1_aboveThreshF is nonzero + -- And this is exactly the case where initial' is initial plus one element + simp [sv1_aboveThreshF] + rw [← ENNReal.tsum_mul_right] + rw [← ENNReal.tsum_mul_right] + apply tsum_congr + intro z + cases Classical.em (¬ ∃ v', initial' = initial ++ [v']) + · split + · exfalso + rename_i h1 h2 + apply h1 + exists v0 + cases h2 + trivial + · simp + rename_i h + simp at h + rcases h with ⟨ v', Hinitial' ⟩ + split <;> try simp + rename_i h + cases h + congr 1 + + -- Apply the IH + apply IH + + -- Prove that the new value is in the new cone of possibility + unfold cone_of_possibility at Hcone + rcases Hcone with ⟨ Hcone1, Hcone2 ⟩ + unfold cone_of_possibility + apply And.intro + · simp + linarith + · simp + apply Nat.lt_iff_add_one_le.mp + apply LE.le.eq_or_lt at Hcone2 + cases Hcone2 + · exfalso + apply Hcone_interior + symm + trivial + · trivial + + +lemma sv2_sv3_eq (qs : sv_query sv_T) (T : ℤ) ε₁ ε₂ l : sv2_aboveThresh qs T ε₁ ε₂ l = sv3_aboveThresh qs T ε₁ ε₂ l := by + apply SLang.ext + + -- Step through equal headers + intro point + unfold sv2_aboveThresh + unfold sv3_aboveThresh + unfold sv3_loop + simp + apply tsum_congr + intro τ + congr 1 + apply tsum_congr + intro v0 + congr 1 + apply tsum_congr + intro final_state + rcases final_state with ⟨ hist, vk ⟩ + split <;> try rfl + rename_i H + simp [H, sv1_threshold] + clear H + + -- Apply a lemma about eventual constancy + apply probWhile_apply + apply @tendsto_atTop_of_eventually_const _ _ _ _ _ _ _ (hist.length + 1) + intro i H + + -- i is in the cone, reduce by induction + induction i + · -- Fake base case + simp at H + · rename_i i IH + -- Real base case + cases Classical.em (i = hist.length) + · simp_all + + -- Inductive case: use constancy + rw [<- IH ?G1] + case G1 => + apply LE.le.ge + apply GE.ge.le at H + apply LE.le.lt_or_eq at H + cases H + · apply Nat.le_of_lt_succ + trivial + · exfalso + rename_i Hcont _ + apply Hcont + linarith + have HK := @cone_constancy _ _ _ qs T ε₁ ε₂ τ l v0 vk i [] hist + unfold constancy_at at HK + conv => + enter [1, 3] + rw [add_comm] + apply HK + unfold cone_of_possibility + simp + apply GE.ge.le at H + apply LE.le.lt_or_eq at H + cases H + · linarith + · exfalso + rename_i h _ + apply h + linarith + + + +-- Commute out a single sample from the loop +lemma sv3_loop_unroll_1 (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (ε₁ ε₂ : ℕ+) l point L vk : + sv3_loop qs T ε₁ ε₂ τ l (point + 1) (L, vk) = + (do + let vk1 <- privNoiseGuess ε₁ ε₂ + if (sv1_aboveThreshC qs T τ l (L, vk)) + then (sv3_loop qs T ε₁ ε₂ τ l point (L ++ [vk], vk1)) + else probPure (L, vk)) := by + conv => + lhs + unfold sv3_loop + simp [probWhileCut, probWhileFunctional] + split + · apply SLang.ext + intro ⟨ HF, vkf ⟩ + simp [probBind] + unfold sv3_loop + conv => + enter [1, 1, a, 1] + unfold sv1_aboveThreshF + simp + conv => + enter [1, 1, a] + rw [← ENNReal.tsum_mul_right] + rw [ENNReal.tsum_comm] + apply tsum_congr + intro a + simp + rw [ENNReal.tsum_eq_add_tsum_ite ?G1] + case G1 => apply (L ++ [vk], a) + split + · conv => + rhs + rw [<- add_zero (_ * _)] + congr 1 + simp + intro i HK1 HK2 + exfalso + apply HK1 + apply HK2 + · exfalso + rename_i hk + apply hk + rfl + · simp + apply SLang.ext + intro ⟨ HF, vkf ⟩ + simp [probBind] + split <;> try simp + unfold privNoiseGuess + unfold privNoiseZero + symm + apply PMF.tsum_coe + +/- +## Program version 4 + - Executable + - Presamples at each point, and then executes a deterministic loop +-/ + +-- An sv4 state is an sv1 state plus a list of presamples +def sv4_state : Type := sv1_state × List ℤ + +def sv4_presample (ε₁ ε₂ : ℕ+) (n : ℕ) : SLang { l : List ℤ // List.length l = n } := + match n with + | Nat.zero => return ⟨ [], by simp ⟩ + | Nat.succ n' => do + let vk1 <- privNoiseGuess ε₁ ε₂ + let vks <- sv4_presample ε₁ ε₂ n' + return ⟨ vks ++ [vk1], by simp ⟩ + + +def sv4_aboveThreshF (s : sv4_state) : SLang sv4_state := + match s.2 with + | [] => probZero + | (p :: ps) => return ((s.1.1 ++ [s.1.2], p), ps) + +def sv4_aboveThreshC (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (st : sv4_state) : Bool := + sv1_aboveThreshC qs T τ l st.1 + +def sv4_loop (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (τ : ℤ) (l : List sv_T) (point : ℕ) (init : sv1_state) : SLang sv1_state := do + let presamples <- sv4_presample ε₁ ε₂ point + let v <- probWhileCut (sv4_aboveThreshC qs T τ l) sv4_aboveThreshF (point + 1) (init, presamples) + return v.1 + +lemma sv3_loop_unroll_1_alt (qs : sv_query sv_T) (τ : ℤ) (ε₁ ε₂ : ℕ+) l point (initial_state : sv1_state) : + sv3_loop qs T ε₁ ε₂ τ l (point + 1) initial_state = + (do + let vk1 <- privNoiseGuess ε₁ ε₂ + if (sv1_aboveThreshC qs T τ l initial_state) + then (sv3_loop qs T ε₁ ε₂ τ l point (initial_state.1 ++ [initial_state.2], vk1)) + else probPure initial_state) := by + rcases initial_state with ⟨ _ , _ ⟩ + rw [sv3_loop_unroll_1] + +def len_list_append_rev {m n : ℕ} (x : { l : List ℤ // l.length = m }) (y: { l : List ℤ // l.length = n }) : { l : List ℤ // l.length = n + m } := + ⟨ x.1 ++ y.1 , by simp [add_comm] ⟩ + +lemma vector_sum_singleton (f : { l : List ℤ // l.length = 1 } -> ENNReal) (P : (x : ℤ) -> ([x].length = 1)) : + (∑'(x : { l // l.length = 1 }), f x) = (∑' (x : ℤ), f ⟨ [x], P x⟩) := by + apply @tsum_eq_tsum_of_ne_zero_bij + case i => + simp [Function.support, DFunLike.coe] + exact fun x => ⟨ [x.1], by simp ⟩ + · simp [Function.Injective] + · simp [Function.support, Set.range] + intro L HL HN + cases L + · simp at HL + rename_i v R + cases R + · exists v + · simp at HL + · simp [Function.support, DFunLike.coe] + + +def vsm_0 (x : {l : List ℤ // l.length = n + 1}) : ℤ := + List.headI x.1 + +def vsm_rest (x : {l : List ℤ // l.length = n + 1}) : {l : List ℤ // l.length = n } := + ⟨ List.tail x.1, by simp ⟩ + +def vsm_last (x : {l : List ℤ // l.length = n + 1}) : ℤ := + List.getLastI x.1 + +def vsm_init (x : {l : List ℤ // l.length = n + 1}) : {l : List ℤ // l.length = n } := + ⟨ List.dropLast x.1, by simp ⟩ + + +lemma sv4_presample_eval (ε₁ ε₂ : ℕ+) (n : ℕ) (s : { l : List ℤ // List.length l = n }) : + sv4_presample ε₁ ε₂ n ⟨ List.reverse s, by simp ⟩ = List.foldl (fun acc b => acc * (privNoiseGuess ε₁ ε₂ b)) 1 s.1 := by + rcases s with ⟨ s, Hs ⟩ + simp + revert n + induction s + · intro n Hs + simp_all + simp at Hs + unfold sv4_presample + split + · simp + · exfalso + simp at Hs + · intro n Hs + rename_i s0 ss IH + simp at Hs + simp only [List.reverse_cons, List.foldl_cons] + unfold sv4_presample + cases n + · exfalso + simp at Hs + rename_i n' + generalize HF : (fun acc b => acc * (privNoiseGuess ε₁ ε₂) b) = F + simp + conv => + enter [1, 1, a] + rw [<- ENNReal.tsum_mul_left] + enter [1, i] + simp + rw [← ENNReal.tsum_prod] + rw [ENNReal.tsum_eq_add_tsum_ite (s0, ⟨ ss.reverse, by simp; linarith ⟩)] + conv => + rhs + rw [<- add_zero (List.foldl _ _ _ )] + rw [add_comm] + conv => + lhs + rw [add_comm] + congr 1 + · simp + intro _ _ _ _ E + exfalso + simp_all + apply congrArg List.reverse at E + simp at E + cases E + simp_all + simp + rw [IH _ ?G1] + case G1 => linarith + rw [HF] + suffices (F (List.foldl F 1 ss) s0 = List.foldl F (F 1 s0) ss) by + rw [<- HF] at this + simp at this + rw [<- HF] + rw [<- this] + rw [mul_comm] + rw [<- (@List.foldl_eq_of_comm' _ _ F ?G1 1 s0 ss)] + case G1 => + rw [<- HF] + intros + simp_all + repeat rw [ mul_assoc] + congr 1 + rw [mul_comm] + simp + +lemma sv4_presample_eval' (ε₁ ε₂ : ℕ+) (n : ℕ) (s : { l : List ℤ // List.length l = n }) : + sv4_presample ε₁ ε₂ n s = List.foldl (fun acc b => acc * (privNoiseGuess ε₁ ε₂ b)) 1 (List.reverse s) := by + have X := sv4_presample_eval ε₁ ε₂ n ⟨ List.reverse s, by simp ⟩ + simp only [List.reverse_reverse, Subtype.coe_eta] at X + trivial + + +lemma vector_sum_merge n (f : ℤ × { l : List ℤ // l.length = n } -> ENNReal) : + (∑'p, f p) = ∑'(p : {l : List ℤ // l.length = n + 1}), f (vsm_0 p, vsm_rest p) := by + apply @tsum_eq_tsum_of_ne_zero_bij + case i => + simp [Function.support, DFunLike.coe] + exact fun x => (vsm_0 x.1, vsm_rest x.1) + · simp [Function.Injective] + simp [vsm_0, vsm_rest] + intro L1 HL1 HL1f L2 HL2 HL2f Heq1 Heq2 + cases L1 + · simp at HL1 + cases L2 + · simp at HL2 + simp_all + · simp [Function.support, Set.range] + intro z L HL HF + exists (z :: L) + simp + exists HL + · simp [Function.support, DFunLike.coe] + + +-- Split in the other order, used as a helper function +-- REFACTOR: Get rid of this, use sv4_presample_split'' +lemma sv4_presample_split' (ε₁ ε₂ : ℕ+) (point : ℕ) (z : ℤ) (p : { l : List ℤ // List.length l = point }) : + privNoiseGuess ε₁ ε₂ z * sv4_presample ε₁ ε₂ point p = + sv4_presample ε₁ ε₂ (point + 1) ⟨ (p.1 ++ [z]), by simp ⟩ := by + rcases p with ⟨ L, HL ⟩ + revert HL + induction L + · intro HL + simp at HL + simp + conv => + rhs + unfold sv4_presample + unfold sv4_presample + split + · simp + rw [ENNReal.tsum_eq_add_tsum_ite z] + conv => + lhs + rw [<- (add_zero (privNoiseGuess _ _ _))] + congr 1 + · simp + · symm + simp + aesop + · exfalso + simp at HL + + · rename_i L0 LL _ + intro HL + simp + conv => + rhs + unfold sv4_presample + simp + conv => + enter [2, 1, a] + rw [← ENNReal.tsum_mul_left] + enter [1, b] + simp + rw [← ENNReal.tsum_prod] + rw [ENNReal.tsum_eq_add_tsum_ite (z, ⟨ L0 :: LL, HL ⟩)] + conv => + lhs + rw [<- (add_zero (_ * _))] + congr 1 + · simp + · symm + simp + intro A B C D E + exfalso + apply (congrArg List.reverse) at E + simp at E + cases E + apply D + · symm + trivial + rename_i E + apply (congrArg List.reverse) at E + simp at E + symm + trivial + + +lemma sv4_presample_split'' (ε₁ ε₂ : ℕ+) (point : ℕ) (z : ℤ) (p : { l : List ℤ // List.length l = point }) HP : + privNoiseGuess ε₁ ε₂ z * sv4_presample ε₁ ε₂ point p = + sv4_presample ε₁ ε₂ (point + 1) ⟨ (p.1 ++ [z]), HP ⟩ := by rw [sv4_presample_split'] + +lemma get_last_lemma (L : List ℤ) H : L.getLastI = L.getLast H := by + rw [List.getLastI_eq_getLast?] + rw [List.getLast?_eq_getLast_of_ne_nil H] + +lemma drop_init_lemma (L : List ℤ) (H : L ≠ []) : L.dropLast ++ [L.getLastI] = L := by + rw [get_last_lemma _ H] + apply List.dropLast_append_getLast H + +lemma list_lemma_1 {L : List ℤ} (H : L ≠ []) : List.headI L :: L.tail = L := by + apply List.cons_head?_tail + apply Option.mem_def.mpr + cases L + · exfalso + simp at H + simp + +lemma list_lemma_2 {L : List ℤ} (H : L ≠ []) : List.dropLast L ++ [L.getLastI] = L := by + rw [drop_init_lemma L H] + + +-- Splits and rearranges the functions +def sv4_presample_split (ε₁ ε₂ : ℕ+) (point : ℕ) : + sv4_presample ε₁ ε₂ (point + 1) = + (do + let presample_1 <- sv4_presample ε₁ ε₂ 1 + let presample_r <- sv4_presample ε₁ ε₂ point + return len_list_append_rev presample_1 presample_r) := by + apply SLang.ext + intro final_state + simp [sv4_presample] + conv => + enter [1, 1, a] + rw [<- ENNReal.tsum_mul_left] + rw [← ENNReal.tsum_prod] + rw [vector_sum_singleton _ (by simp)] + + have X (x : ℤ): (@tsum.{0, 0} ENNReal + (@NonUnitalNonAssocSemiring.toAddCommMonoid.{0} ENNReal + (@NonAssocSemiring.toNonUnitalNonAssocSemiring.{0} ENNReal + (@Semiring.toNonAssocSemiring.{0} ENNReal + (@OrderedSemiring.toSemiring.{0} ENNReal + (@OrderedCommSemiring.toOrderedSemiring.{0} ENNReal + (@CanonicallyOrderedCommSemiring.toOrderedCommSemiring.{0} ENNReal + ENNReal.instCanonicallyOrderedCommSemiring)))))) + ENNReal.instTopologicalSpace Int fun (x_1 : Int) => + @ite.{1} ENNReal (@Eq.{1} Int x_1 x) (Classical.propDecidable (@Eq.{1} Int x_1 x)) + (@OfNat.ofNat.{0} ENNReal 0 (@Zero.toOfNat0.{0} ENNReal instENNRealZero)) + (@ite.{1} ENNReal (@Eq.{1} Int x x_1) (Int.instDecidableEq x x_1) (@SLang.privNoiseGuess _ _ ε₁ ε₂ x_1) + 0)) = 0 := by simp; aesop + conv => + enter [2, 1, x, 1] + simp + rw [ENNReal.tsum_eq_add_tsum_ite x] + simp + enter [2] + rw [X] + clear X + simp + conv => + enter [2, 1, x] + rw [<- ENNReal.tsum_mul_left] + rw [← ENNReal.tsum_prod] + simp_all [len_list_append_rev] + + -- Join the sv4_presamples + conv => + enter [1, 1, p] + rw [sv4_presample_split'] + conv => + enter [2, 1, p] + rw [sv4_presample_split'] + rw [vector_sum_merge] + rw [vector_sum_merge] + + -- Both sums are singletons + simp [vsm_rest, vsm_0] + symm + rw [ENNReal.tsum_eq_add_tsum_ite final_state] + -- rcases final_state with ⟨ f, Hf ⟩ + -- simp + conv => + rhs + rw [<- (zero_add (tsum _))] + conv => + lhs + rw [add_comm] + congr 1 + · simp + intro A B C D + exfalso + rcases final_state with ⟨ f, Hf ⟩ + simp_all + apply C + rw [list_lemma_1 ?G1] + intro K + simp_all + + rw [ENNReal.tsum_eq_add_tsum_ite ⟨[vsm_last final_state] ++ (vsm_init final_state), by simp ⟩ ] + conv => + lhs + rw [<- (add_zero (@ite _ _ _ _ _))] + rw [add_comm] + conv => + rhs + rw [add_comm] + congr 1 + · symm + simp + intro A B C D + exfalso + rcases final_state with ⟨ f, Hf ⟩ + simp_all [vsm_rest, vsm_0, vsm_last, vsm_init] + apply C + rw [List.getLastI_eq_getLast?] + unfold Option.iget + simp + rw [list_lemma_1] + intro K + simp_all + + -- Apply the closed form to evaluate + rcases final_state with ⟨ f, Hf ⟩ + simp [vsm_rest, vsm_0, vsm_last, vsm_init] + rw [sv4_presample_eval'] + rw [sv4_presample_eval'] + simp only [] + + have Hfoldl_eq : + (List.foldl (fun acc b => acc * (privNoiseGuess ε₁ ε₂) b) 1 (f.tail ++ [f.headI]).reverse = + List.foldl (fun acc b => acc * (privNoiseGuess ε₁ ε₂) b) 1 (f.dropLast ++ [f.getLastI]).reverse):= by + apply List.Perm.foldl_eq + · intro A B C + simp + rw [mul_assoc] + rw [mul_assoc] + congr 1 + rw [mul_comm] + · conv => + lhs + simp + have H1 : (f.headI :: f.tail.reverse).Perm (f.headI :: f.tail) := by + apply List.Perm.cons f.headI + apply List.reverse_perm + trans + · apply H1 + rw [list_lemma_1 ?G2] + case G2 => intro _ ; simp_all + rw [list_lemma_2 ?G2] + case G2 => intro _ ; simp_all + apply List.Perm.symm + apply List.reverse_perm + rw [Hfoldl_eq] + clear Hfoldl_eq + generalize HX : List.foldl (fun acc b => acc * (privNoiseGuess ε₁ ε₂) b) 1 (f.dropLast ++ [f.getLastI]).reverse = X + clear HX + + -- Both of the conditionals are true + split + · split + · rfl + · exfalso + rename_i A B + apply B + rw [list_lemma_2] + intro K + simp [K] at Hf + · split + · exfalso + rename_i A B + apply A + rw [list_lemma_1] + intro K + simp [K] at Hf + · rfl + + + +def len_1_list_to_val (x : { l : List ℤ // l.length = 1 }) : ℤ := + let ⟨ xl, _ ⟩ := x + match xl with + | (v :: _) => v + +-- When we do induction on point, +-- We will want to generalize over all init +-- Unfolding this loop just moves the first presample into init +-- Which can apply the IH-- since it's some arbitrary init state and a presamples state generated by one fewer point + + +lemma presample_norm_lemma (point : ℕ) (ε₁ ε₂ : ℕ+) : + ∑' (a : { l // l.length = point }), sv4_presample ε₁ ε₂ point a = 1 := by + induction point + · simp [sv4_presample] + rw [ENNReal.tsum_eq_add_tsum_ite ⟨ [], sv4_presample.proof_3 ⟩] + conv => + rhs + rw [<- add_zero 1] + congr <;> simp + · rename_i n IH + + -- sv4_presample_split' + suffices (∑' (a : ℤ × { l : List ℤ // l.length = n }), privNoiseGuess ε₁ ε₂ a.1 * sv4_presample ε₁ ε₂ n a.2 = 1) by + conv at this => + enter [1, 1, a] + rw [sv4_presample_split'] + rw [<- this] + symm + rw [vector_sum_merge] + simp only [] + simp [vsm_0, vsm_rest] + symm + apply @tsum_eq_tsum_of_ne_zero_bij + case i => + simp [Function.support, DFunLike.coe] + exact fun x => ⟨ ↑(vsm_rest x.1) ++ [vsm_0 x.1], by simp ⟩ + · simp [Function.Injective] + simp [vsm_0, vsm_rest] + intro L1 HL1 HL1f L2 HL2 HL2f + cases L1 + · simp at HL1 + cases L2 + · simp at HL2 + simp_all + intro X + rename_i A B C D + have X1 : List.reverse (B ++ [A]) = List.reverse (D ++ [C]) := by exact congrArg List.reverse X + simp at X1 + apply X1 + · simp [Function.support, Set.range] + intro L1 HL1 Hf1 + exists ((vsm_last ⟨ L1, HL1 ⟩) :: (vsm_init ⟨ L1, HL1 ⟩)) + simp + apply And.intro + · simp [vsm_0, vsm_rest, vsm_init, vsm_last] + intro K + apply Hf1 + rw [<- K] + congr + symm + apply drop_init_lemma + intro K + simp [K] at HL1 + · simp [vsm_0, vsm_rest, vsm_init, vsm_last] + apply drop_init_lemma + intro K + simp [K] at HL1 + · simp [Function.support, DFunLike.coe] + intros + congr + rw [ENNReal.tsum_prod'] + conv => + enter [1, 1, a] + simp + rw [ENNReal.tsum_mul_left] + rw [IH] + simp + + -- Change noise to SPMF + have S := (privNoiseGuess ε₁ ε₂).2 + apply HasSum.tsum_eq S + + +def sv3_sv4_loop_eq qs (T : ℤ) (ε₁ ε₂ : ℕ+) (τ : ℤ) (l : List sv_T) (point : ℕ) (init : sv1_state) : + sv3_loop qs T ε₁ ε₂ τ l point init = sv4_loop qs T ε₁ ε₂ τ l point init := by + revert init + induction point + · -- It's a mess but it works + intro init + simp [sv3_loop, sv4_loop, probWhileCut, probWhileFunctional, sv4_presample, sv4_aboveThreshC] + split + · simp [sv4_aboveThreshF, sv1_aboveThreshF] + apply SLang.ext + intro final_state + simp + · apply SLang.ext + intro final_state + simp + split + · rename_i h + simp_all + symm + rw [condition_to_subset] + rw [ENNReal.tsum_eq_add_tsum_ite ⟨(init, []), rfl⟩] + simp_all + conv => + rhs + rw [<- add_zero 1] + congr + simp + intro a + rcases a + simp_all + · symm + simp + intro i H + rcases i + intro HK + rename_i hk2 _ _ + apply hk2 + cases HK + simp at H + trivial + · -- Inductive case + intro init + rename_i point IH + + -- Unfold sv3_loop on the left + rw [sv3_loop_unroll_1_alt] + + -- Apply the IH on the left + -- Doing it this way because I can't conv under the @ite? + + let ApplyIH : + ((do + let vk1 ← privNoiseGuess ε₁ ε₂ + if sv1_aboveThreshC qs T τ l init = true + then sv3_loop qs T ε₁ ε₂ τ l point (init.1 ++ [init.2], vk1) + else (SLang.probPure init) : SLang _) = + ((do + let vk1 ← privNoiseGuess ε₁ ε₂ + if sv1_aboveThreshC qs T τ l init = true + then sv4_loop qs T ε₁ ε₂ τ l point (init.1 ++ [init.2], vk1) + else probPure init) : SLang _)) := by + simp + apply SLang.ext + intro final_state + simp + apply tsum_congr + intro _ + congr + split + · rw [IH] + · rfl + + rw [ApplyIH] + clear ApplyIH IH + have ToPresample : + (do + let vk1 ← privNoiseGuess ε₁ ε₂ + if sv1_aboveThreshC qs T τ l init = true then sv4_loop qs T ε₁ ε₂ τ l point (init.1 ++ [init.2], vk1) else probPure init) = + (do + let vps ← sv4_presample ε₁ ε₂ 1 + let vk1 := len_1_list_to_val vps + if sv1_aboveThreshC qs T τ l init = true then sv4_loop qs T ε₁ ε₂ τ l point (init.1 ++ [init.2], vk1) else probPure init) := by + apply SLang.ext + intro final_state + simp + -- There is a bijection here + rw [vector_sum_singleton _ (by simp)] + apply tsum_congr + intro x + simp [len_1_list_to_val] + simp [sv4_presample] + rw [ENNReal.tsum_eq_add_tsum_ite x] + simp + have X : ( @tsum.{0, 0} ENNReal + (@NonUnitalNonAssocSemiring.toAddCommMonoid.{0} ENNReal + (@NonAssocSemiring.toNonUnitalNonAssocSemiring.{0} ENNReal + (@Semiring.toNonAssocSemiring.{0} ENNReal + (@OrderedSemiring.toSemiring.{0} ENNReal + (@OrderedCommSemiring.toOrderedSemiring.{0} ENNReal + (@CanonicallyOrderedCommSemiring.toOrderedCommSemiring.{0} ENNReal + ENNReal.instCanonicallyOrderedCommSemiring)))))) + ENNReal.instTopologicalSpace Int fun (x_1 : Int) => + @ite.{1} ENNReal (@Eq.{1} Int x_1 x) (Classical.propDecidable (@Eq.{1} Int x_1 x)) + (@OfNat.ofNat.{0} ENNReal 0 (@Zero.toOfNat0.{0} ENNReal instENNRealZero)) + (@ite.{1} ENNReal (@Eq.{1} Int x x_1) (Int.instDecidableEq x x_1) (@SLang.privNoiseGuess dps dpn ε₁ ε₂ x_1) + (@OfNat.ofNat.{0} ENNReal 0 + (@Zero.toOfNat0.{0} ENNReal + (@MulZeroClass.toZero.{0} ENNReal + (@NonUnitalNonAssocSemiring.toMulZeroClass.{0} ENNReal + (@NonAssocSemiring.toNonUnitalNonAssocSemiring.{0} ENNReal + (@Semiring.toNonAssocSemiring.{0} ENNReal + (@OrderedSemiring.toSemiring.{0} ENNReal + (@OrderedCommSemiring.toOrderedSemiring.{0} ENNReal + (@CanonicallyOrderedCommSemiring.toOrderedCommSemiring.{0} ENNReal + ENNReal.instCanonicallyOrderedCommSemiring))))))))))) = 0 := by simp; aesop + + conv => + enter [2, 1, 2] + skip + rw [X] + simp + + rw [ToPresample] + clear ToPresample + + -- Now, just need to prove this unfolding of sv4_loop + unfold sv4_loop + conv => + enter [2] + unfold probWhileCut + unfold probWhileFunctional + unfold sv4_aboveThreshC + + split + · conv => + enter [2] + rw [sv4_presample_split] + simp + + apply SLang.ext + intro final_state + simp + apply tsum_congr + intro vsample_1 + congr 1 + apply tsum_congr + intro vsample_rest + congr 1 + -- Seems that the RHS inner sum is an indicator on final_state, so I should + -- commute that out front + conv => + enter [2, 1, a] + rw [<- ENNReal.tsum_mul_left] + conv => + enter [2] + rw [ENNReal.tsum_comm] + apply tsum_congr + intro ⟨ ns_state, ns_presamples ⟩ -- Not sure what the variable ns represents? + simp + split <;> try simp + rename_i HF + + -- Investigate the RHS term for simplifications? + rcases vsample_1 with ⟨ vs1, Hvs1 ⟩ + rcases vsample_rest with ⟨ vsr, Hvsr ⟩ + cases vs1 + · simp at Hvs1 + rename_i vs1 vs_emp + conv => + enter [2, 1, a, 1] + unfold sv4_aboveThreshF + simp [len_list_append_rev] + have Hemp : vs_emp = [] := by cases vs_emp <;> simp_all + simp_all + congr + + · conv => + enter [2] + rw [sv4_presample_split] + simp + apply SLang.ext + intro final_state + simp + apply tsum_congr + intro v1 + split + · conv => + enter [1] + rw [<- mul_one (sv4_presample _ _ _ _)] + congr 1 + symm + apply presample_norm_lemma + · simp + +def sv4_aboveThresh (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : SLang ℕ := + fun (point : ℕ) => + let computation : SLang ℕ := do + let τ <- privNoiseThresh ε₁ ε₂ + let v0 <- privNoiseGuess ε₁ ε₂ + let sk <- sv4_loop qs T ε₁ ε₂ τ l point ([], v0) + return (sv1_threshold sk) + computation point + +def sv3_sv4_eq qs (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : + sv3_aboveThresh qs T ε₁ ε₂ l = sv4_aboveThresh qs T ε₁ ε₂ l := by + unfold sv3_aboveThresh + unfold sv4_aboveThresh + simp + conv => + enter [1, x, 1, y, 2, 1, z] + rw [sv3_sv4_loop_eq] + +/- +## Program version 5 + - Executable + - Isolates the loop for the next step +-/ + +def sv5_loop (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (point : ℕ) (init : sv4_state) : SLang ℕ := do + let sk <- probWhileCut (sv4_aboveThreshC qs T τ l) sv4_aboveThreshF (point + 1) init + return (sv1_threshold sk.1) + +def sv5_aboveThresh (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : SLang ℕ := + fun (point : ℕ) => + let computation : SLang ℕ := do + let τ <- privNoiseThresh ε₁ ε₂ + let v0 <- privNoiseGuess ε₁ ε₂ + let presamples <- sv4_presample ε₁ ε₂ point + @sv5_loop _ qs T τ l point (([], v0), presamples) + computation point + +def sv4_sv5_eq (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : + sv4_aboveThresh qs T ε₁ ε₂ l = sv5_aboveThresh qs T ε₁ ε₂ l := by + unfold sv4_aboveThresh + unfold sv5_aboveThresh + unfold sv4_loop + unfold sv5_loop + simp + + +/- +## Program version 6 + - Executable + - Changes the loop from a probWhileCut into a single, deterministic, check +-/ + +-- -- When you look at exactly n loops in the future, the check evaluates to true +-- def sv6_aboveThresh_check (τ : ℤ) (l : List ℕ) (s : sv4_state) (n : ℕ) : Bool := +-- match n with +-- | 0 => true +-- | Nat.succ n' => +-- match s with +-- -- If there is more samples on the tape, call recursively +-- | ((past, present), (future_next :: future_rest)) => +-- sv4_aboveThreshC τ l ((past, present), (future_next :: future_rest)) ∧ +-- sv6_aboveThresh_check τ l ((past ++ [present], future_next), future_rest) n' +-- | (_, []) => +-- -- Out of samples on the tape +-- false + +-- The state sp is a "past configuration" of sc, ie. one we already checked +def is_past_configuration (sp sc : sv4_state) : Prop := + (sp.1.1.length ≤ sc.1.1.length) ∧ sp.1.1 ++ [sp.1.2] ++ sp.2 = sc.1.1 ++ [sc.1.2] ++ sc.2 + +-- All past configurations had their loop check execute to True +def sv6_aboveThresh_hist (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (s : sv4_state) : Prop := + ∀ sp, (is_past_configuration sp s) -> sv4_aboveThreshC qs T τ l sp = true + + +-- If all past configurations of sp evaluate to True, +-- and the next one evaluates to true, +-- then all past configurations for the next one evaluate to True +lemma sv6_aboveThresh_hist_step (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (past fut_rest : List ℤ) (present fut : ℤ) : + sv6_aboveThresh_hist qs T τ l ((past, present), fut :: fut_rest) -> + sv4_aboveThreshC qs T τ l ((past ++ [present], fut), fut_rest) -> + sv6_aboveThresh_hist qs T τ l ((past ++ [present], fut), fut_rest) := by + intro H1 H2 + unfold sv6_aboveThresh_hist + intro s H3 + unfold is_past_configuration at H3 + rcases H3 with ⟨ H3, H4 ⟩ + simp_all + + apply Nat.lt_or_eq_of_le at H3 + cases H3 + · -- The length of s1.1.1 is less than or equal to past + apply H1 + apply And.intro + · linarith + · simp_all + · rename_i Hs11_len + -- The length of s.1.1 is equal to past.length + 1 + -- Now we can characterize s + have Hs11 : List.take (past.length + 1) (s.1.1 ++ s.1.2 :: s.2) = + List.take (past.length + 1) (past ++ present :: fut :: fut_rest) := by + rw [H4] + rw [List.take_left' Hs11_len] at Hs11 + simp [List.take_append] at Hs11 + simp_all + rcases H4 with ⟨ H5, H6 ⟩ + cases s + rename_i s1 _ + cases s1 + simp_all + + +def is_past_configuration_strict (sp sc : sv4_state) : Prop := + (sp.1.1.length < sc.1.1.length) ∧ sp.1.1 ++ [sp.1.2] ++ sp.2 = sc.1.1 ++ [sc.1.2] ++ sc.2 + +-- All strictly past configurations had their loop check execute to True +def sv6_aboveThresh_hist_strict (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (s : sv4_state) : Prop := + ∀ sp, (is_past_configuration_strict sp s) -> sv4_aboveThreshC qs T τ l sp = true + +lemma sv6_aboveThresh_hist_step_strict (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (past fut_rest : List ℤ) (present fut : ℤ) : + sv6_aboveThresh_hist_strict qs T τ l ((past, present), fut :: fut_rest) -> + sv4_aboveThreshC qs T τ l ((past, present), fut :: fut_rest) -> + sv6_aboveThresh_hist_strict qs T τ l ((past ++ [present], fut), fut_rest) := by + intro H1 H2 + unfold sv6_aboveThresh_hist_strict + intro s H3 + unfold is_past_configuration_strict at H3 + rcases H3 with ⟨ H3, H4 ⟩ + simp_all + + apply Nat.lt_or_eq_of_le at H3 + cases H3 + · -- The length of s1.1.1 is less than or equal to past + apply H1 + apply And.intro + · linarith + · simp_all + · rename_i Hs11_len + have Hs11 : List.take (past.length) (s.1.1 ++ s.1.2 :: s.2) = + List.take (past.length) (past ++ present :: fut :: fut_rest) := by + rw [H4] + simp at Hs11_len + rw [List.take_left] at Hs11 + rw [<- Hs11_len] at Hs11 + rw [List.take_left] at Hs11 + cases s + rename_i s1 s2 + cases s1 + rename_i s11 s12 + simp_all + +@[simp] +def sv6_cond_rec (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (past : List ℤ) (pres : ℤ) (future : List ℤ) : Bool := + match future with + | [] => ¬ (sv4_aboveThreshC qs T τ l ((past, pres), [])) + | (f :: ff) => (sv4_aboveThreshC qs T τ l ((past, pres), f :: ff) = true) && (sv6_cond_rec qs T τ l (past ++ [pres]) f ff) + +@[simp] +def sv6_cond (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (init : sv4_state) : Bool := + sv6_cond_rec qs T τ l init.1.1 init.1.2 init.2 + +def sv6_loop (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (point : ℕ) (init : sv4_state) : SLang ℕ := do + if (sv6_cond qs T τ l init) + then return point + else probZero + +-- QUESTION: What do we need for equality in the base case? +lemma sv5_sv6_loop_base_case (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (point eval : ℕ) (past future : List ℤ) (pres : ℤ) : + future = [] -> + List.length future = eval -> + List.length (past ++ [pres] ++ future) = point + 1 -> + (sv6_loop qs T τ l point ((past, pres), future)) point = (sv5_loop qs T τ l eval ((past, pres), future)) point := by + intro Hfuture Heval Hstate + rw [Hfuture] + simp_all + rw [<- Heval] + unfold sv5_loop + unfold probWhileCut + unfold probWhileFunctional + split + · rename_i h + simp [probWhileCut, sv6_loop] + rw [h] + simp + · rename_i h + simp at h + simp [sv6_loop] + simp [h] + unfold sv4_state + unfold sv1_state + rw [ENNReal.tsum_eq_add_tsum_ite ((past, pres), [])] + simp [sv1_threshold] + simp [Hstate] + conv => + lhs + rw [<- add_zero 1] + congr + symm + simp + aesop + +-- QUESTION: What do we need for sv6_loop to be equal to sv6_loop_cond (next) +lemma sv6_loop_ind (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (point : ℕ) (past ff: List ℤ) (pres f: ℤ) : + (sv4_aboveThreshC qs T τ l ((past, pres), f :: ff) = true) -> + List.length (past ++ [pres] ++ f :: ff) = point + 1 -> + (sv6_loop qs T τ l point ((past, pres), f :: ff)) point = (sv6_loop qs T τ l point ((past ++ [pres], f), ff)) point := by + intro Hcondition _ + unfold sv6_loop + suffices (sv6_cond qs T τ l ((past, pres), f :: ff) = sv6_cond qs T τ l ((past ++ [pres], f), ff)) by + split <;> split <;> try rfl + all_goals simp_all + conv => + lhs + unfold sv6_cond + simp + simp [Hcondition] + + +-- QUESTION: What do we need for sv5 to be equal to sv5_loop_cond (next) evaluated at point +lemma sv5_loop_ind (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (eval point : ℕ) (past ff: List ℤ) (pres f: ℤ) : + (sv4_aboveThreshC qs T τ l ((past, pres), f :: ff) = true) -> + (sv5_loop qs T τ l (eval + 1) ((past, pres), f :: ff)) point = (sv5_loop qs T τ l eval ((past ++ [pres], f), ff)) point := by + intro Hcondition + conv => + lhs + unfold sv5_loop + unfold probWhileCut + unfold probWhileFunctional + split + · simp only [bind, pure, sv4_aboveThreshF, pure_bind] + unfold sv5_loop + rfl + · exfalso + trivial + +def sv6_aboveThresh (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : SLang ℕ := + fun (point : ℕ) => + let computation : SLang ℕ := do + let τ <- privNoiseThresh ε₁ ε₂ + let v0 <- privNoiseGuess ε₁ ε₂ + let presamples <- sv4_presample ε₁ ε₂ point + @sv6_loop _ qs T τ l point (([], v0), presamples) + computation point + + +-- sv6_loop and sv5_loop are equal at point (under some conditions) +def sv5_sv6_loop_eq_point (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (point eval : ℕ) (past future : List ℤ) (pres : ℤ) : + List.length (past ++ [pres] ++ future) = point + 1 -> + List.length future = eval -> + -- sv6_aboveThresh_hist_strict τ l ((past, pres), future) -> + @sv5_loop _ qs T τ l eval ((past, pres), future) point = @sv6_loop _ qs T τ l point ((past, pres), future) point := by + revert past pres eval + induction future + · intro eval past pres H1 H2 + symm + simp at H1 + apply (sv5_sv6_loop_base_case _ _ _ _ _ _ _ _ _ (by rfl) H2 ?G2) + case G2 => + simp + trivial + · rename_i f ff IH + intro eval past pres Hstate Heval + cases eval + · simp at Heval + + rename_i eval + cases (Classical.em (sv4_aboveThreshC qs T τ l ((past, pres), f :: ff) = true)) + · rename_i Hcondition + rw [sv5_loop_ind _ _ _ _ _ _ _ _ _ _ Hcondition] + rw [sv6_loop_ind _ _ _ _ _ _ _ _ _ Hcondition Hstate] + apply (IH eval (past ++ [pres]) f ?G1 ?G2) + case G1 => simp_all + case G2 => simp_all + + rename_i Hcondition + simp at Hcondition + simp [sv6_loop, Hcondition] + unfold sv5_loop + unfold probWhileCut + unfold probWhileFunctional + rw [Hcondition] + simp + intro i Hi Hk + simp_all [sv1_threshold] + + +def sv5_sv6_eq (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : + sv5_aboveThresh qs T ε₁ ε₂ l = sv6_aboveThresh qs T ε₁ ε₂ l := by + unfold sv5_aboveThresh + unfold sv6_aboveThresh + apply SLang.ext + intro eval_point + simp + apply tsum_congr + intro τ + congr + apply funext + intro v0 + congr + apply funext + intro future + congr + rw [sv5_sv6_loop_eq_point] + · simp + · exact Mathlib.Vector.length_val future + + +/- +## Program v8 +Not executable +Separates out the zero case +-/ + +def sv7_aboveThresh (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : SLang ℕ := + fun (point : ℕ) => + let computation : SLang ℕ := do + let τ <- privNoiseThresh ε₁ ε₂ + let v0 <- privNoiseGuess ε₁ ε₂ + match point with + | 0 => + if (¬ (sv4_aboveThreshC qs T τ l (([], v0), []))) + then probPure point + else probZero + | (Nat.succ point') => do + let presamples <- sv4_presample ε₁ ε₂ point' + let vk <- privNoiseGuess ε₁ ε₂ + if (sv6_cond qs T τ l (([], v0), presamples ++ [vk])) + then probPure point + else probZero + computation point + +def sv6_sv7_eq (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : + sv6_aboveThresh qs T ε₁ ε₂ l = sv7_aboveThresh qs T ε₁ ε₂ l := by + apply SLang.ext + intro point + unfold sv6_aboveThresh + unfold sv7_aboveThresh + cases point + · simp [sv6_loop, sv6_cond] + apply tsum_congr + intro τ + congr 1 + apply tsum_congr + intro v0 + congr 1 + simp [sv4_presample] + rw [ENNReal.tsum_eq_add_tsum_ite ⟨[], rfl⟩] + simp + conv => + rhs + rw [<- (add_zero (@ite _ _ _ _ _))] + congr 1 + simp + · rename_i point' + simp only [] + apply tsum_congr + intro τ + congr 1 + apply tsum_congr + intro v0 + congr 1 + simp + conv => + enter [2, 1, a] + rw [<- ENNReal.tsum_mul_left] + conv => + lhs + unfold sv6_loop + simp + rw [ENNReal.tsum_comm] + rw [← ENNReal.tsum_prod] + conv => + rhs + enter [1, a] + rw [<- mul_assoc] + enter [1] + rw [mul_comm] + rw [sv4_presample_split'] + apply @tsum_eq_tsum_of_ne_zero_bij + case i => + exact fun x => ⟨ x.1.2 ++ [x.1.1], by simp ⟩ + · simp [Function.Injective] + intros + rename_i H + apply congrArg List.reverse at H + simp at H + apply H + · simp [Function.support, Set.range] + intros L HL Hf1 Hf2 + exists (vsm_last ⟨ L, HL ⟩) + exists (vsm_init ⟨ L, HL ⟩) + simp + apply And.intro + · apply And.intro + · intro K + apply Hf1 + rw [<- K] + congr + simp [vsm_init, vsm_last] + symm + apply drop_init_lemma + intro K + simp [K] at HL + · intro K + apply Hf2 + rw [<- K] + split <;> split + · rfl + · exfalso + rename_i h1 h2 + apply h2 + rw [<- h1] + congr + simp [vsm_init, vsm_last] + apply drop_init_lemma + intro K + simp [K] at HL + · exfalso + rename_i h2 h1 + apply h2 + rw [<- h1] + congr + simp [vsm_init, vsm_last] + symm + apply drop_init_lemma + intro K + simp [K] at HL + · rfl + · simp [vsm_init, vsm_last] + apply drop_init_lemma + intro K + simp [K] at HL + · simp + +/- +## Program v8 + +Not executable +Defines G from the paper +-/ + + + +def sv8_sum (qs : sv_query sv_T) (l : List sv_T) (past : List ℤ) (pres : ℤ) : ℤ := + qs (List.length past) l + pres + -- exactDiffSum (List.length past) l + pres + +-- G is the maximum value of sv8_sum over the tape +def sv8_G (qs : sv_query sv_T) (l : List sv_T) (past : List ℤ) (pres : ℤ) (future : List ℤ) : ℤ := + match future with + | [] => sv8_sum qs l past pres + | (f :: ff) => max (sv8_sum qs l past pres) (sv8_G qs l (past ++ [pres]) f ff) + + def sv8_cond (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (past : List ℤ) (pres : ℤ) (future : List ℤ) (last : ℤ) : Bool := + (sv8_G qs l past pres future < τ + T) ∧ (sv8_sum qs l (past ++ [pres] ++ future) last ≥ τ + T) + +def sv8_aboveThresh (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : SLang ℕ := + fun (point : ℕ) => + let computation : SLang ℕ := do + let τ <- privNoiseThresh ε₁ ε₂ + let v0 <- privNoiseGuess ε₁ ε₂ + match point with + | 0 => + if (sv8_sum qs l [] v0 ≥ τ + T) + then probPure point + else probZero + | (Nat.succ point') => do + let presamples <- sv4_presample ε₁ ε₂ point' + let vk <- privNoiseGuess ε₁ ε₂ + if (sv8_cond qs T τ l [] v0 presamples vk) + then probPure point + else probZero + computation point + +lemma sv7_sv8_cond_eq (qs : sv_query sv_T) (T : ℤ) (τ : ℤ) (l : List sv_T) (v0 : ℤ) (vs : List ℤ) (vk : ℤ) : + sv8_cond qs T τ l [] v0 vs vk = sv6_cond qs T τ l (([], v0), vs ++ [vk]) := by + suffices (∀ init, sv8_cond qs T τ l init v0 vs vk = sv6_cond qs T τ l ((init, v0), vs ++ [vk])) by + apply this + revert v0 + unfold sv8_cond + simp + induction vs + · intro v0 init + simp [sv6_cond_rec, sv4_aboveThreshC, sv1_aboveThreshC, sv1_threshold, sv1_noise, List.length] + simp [sv8_G, sv8_sum] + · intro vi init + rename_i vi_1 rest IH + have IH' := IH vi_1 (init ++ [vi]) + simp at IH' + clear IH + conv => + rhs + simp [sv6_cond_rec] + rw [<- IH'] + clear IH' + cases (decide (τ + T ≤ sv8_sum qs l (init ++ vi :: vi_1 :: rest) vk)) <;> simp + conv => + lhs + unfold sv8_G + simp + cases (decide (sv8_G qs l (init ++ [vi]) vi_1 rest < τ + T)) <;> simp + simp [sv4_aboveThreshC, sv1_aboveThreshC, sv8_sum, sv1_threshold, sv1_noise] + + +def sv7_sv8_eq (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : + sv7_aboveThresh qs T ε₁ ε₂ l = sv8_aboveThresh qs T ε₁ ε₂ l := by + apply SLang.ext + intro point + unfold sv7_aboveThresh + unfold sv8_aboveThresh + cases point + · simp [sv4_aboveThreshC, sv1_aboveThreshC, sv8_sum, sv1_threshold, sv1_noise] + rename_i point' + simp only [] + repeat (apply tsum_congr; intro _; congr 1) + simp only [sv7_sv8_cond_eq, sv6_cond] + + +/- +## Program v9 + +Not executable +Rewritten so that the randomness we will cancel out is right at the front +-/ + + +def sv9_aboveThresh (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : SLang ℕ := + fun (point : ℕ) => + let computation : SLang ℕ := do + match point with + | 0 => do + let τ <- privNoiseThresh ε₁ ε₂ + let v0 <- privNoiseGuess ε₁ ε₂ + if (sv8_sum qs l [] v0 ≥ τ + T) + then probPure point + else probZero + | (Nat.succ point') => do + let v0 <- privNoiseGuess ε₁ ε₂ + let presamples <- sv4_presample ε₁ ε₂ point' + let τ <- privNoiseThresh ε₁ ε₂ + let vk <- privNoiseGuess ε₁ ε₂ + if (sv8_cond qs T τ l [] v0 presamples vk) + then probPure point + else probZero + computation point + +def sv8_sv9_eq (qs : sv_query sv_T) (T : ℤ) (ε₁ ε₂ : ℕ+) (l : List sv_T) : + sv8_aboveThresh qs T ε₁ ε₂ l = sv9_aboveThresh qs T ε₁ ε₂ l := by + apply SLang.ext + intro point + unfold sv8_aboveThresh + unfold sv9_aboveThresh + simp + split + · simp + · simp + conv => + lhs + conv => + enter [1, a] + rw [<- ENNReal.tsum_mul_left] + conv => + enter [1, b] + rw [<- mul_assoc] + conv => + enter [1] + rw [mul_comm] + rw [mul_assoc] + rw [ENNReal.tsum_comm] + conv => + enter [1, b] + rw [ENNReal.tsum_mul_left] + + apply tsum_congr + intro b + congr 1 + + conv => + lhs + conv => + enter [1, a] + rw [<- ENNReal.tsum_mul_left] + conv => + enter [1, b] + rw [<- mul_assoc] + conv => + enter [1] + rw [mul_comm] + rw [mul_assoc] + rw [ENNReal.tsum_comm] + conv => + enter [1, b] + rw [ENNReal.tsum_mul_left] + + +lemma ENNReal.tsum_lb_single (x : T) (f : T -> ENNReal) (l : ENNReal) : + l ≤ f x -> l ≤ ∑' (a : T), f a := by + intro H + apply le_trans H + apply ENNReal.le_tsum + +lemma ENNReal.tsum_lb_subset (P : T -> Prop) (f : T -> ENNReal) (l : ENNReal) : + l ≤ (∑'(a : {t : T // P t}), f a.1) -> l ≤ ∑' (a : T), f a := by + intro H + apply le_trans H + apply ENNReal.tsum_comp_le_tsum_of_injective + simp [Function.Injective] + + +lemma ENNReal.tsum_split (P : T -> Prop) (f : T -> ENNReal) : + ∑' (a : T), f a = (∑'(a : {t : T // P t}), f a.1) + (∑'(a : {t : T // ¬P t}), f a.1) := by + symm + apply tsum_add_tsum_compl <;> apply ENNReal.summable + +/- +def β_geo (β : ENNReal) : SLang ℕ := (probGeometric (fun x => if x then β else 1 - β)) + +def β_geo_recurrence (β : ENNReal) (n : ℕ) (H : n > 0) : β_geo β (n + 1) = β * β_geo β n := by + simp [β_geo, probGeometric_apply] + split + · exfalso + simp_all + · ring_nf + rename_i h + rw [mul_pow_sub_one h] + +def β_geo' (β : ℝ) : ℕ -> ℝ := + fun N => + match N with + | 0 => 0 + | Nat.succ N' => ENNReal.toReal (β_geo β N') +-/ + +def geo_cdf (β : ENNReal) (n : ℕ) : ENNReal := 1 - (1 - β)^n + + +-- set_option pp.coercions false +lemma geo_cdf_rec (β : ENNReal) (Hβ1: β ≤ 1) (n : ℕ) : + geo_cdf β (n + 1) = β + (1 - β) * geo_cdf β n := by + unfold geo_cdf + /- + suffices ENNReal.toEReal (1 - (1 - β) ^ (n + 1)) = ENNReal.toEReal (β + (1 - β) * (1 - (1 - β) ^ n)) by + apply EReal.coe_ennreal_injective + apply this + -/ + + suffices ENNReal.toReal (1 - (1 - β) ^ (n + 1)) = ENNReal.toReal (β + (1 - β) * (1 - (1 - β) ^ n)) by + apply (ENNReal.toReal_eq_toReal_iff _ _).mp at this + cases this + · trivial + rename_i this + cases this + · rename_i h + rcases h with ⟨ A, B ⟩ + simp_all + exfalso + cases B + · simp_all + · rename_i h + apply ENNReal.mul_eq_top.mp at h + simp_all + · rename_i h + rcases h with ⟨ A, _ ⟩ + simp_all + ring_nf + have C1 : β ≠ ⊤ := by + intro K + simp_all + have C3 : (1 - β) ^ (n + 1) ≤ 1 := by + apply pow_le_one' + apply tsub_le_self + have C4 : (1 - β) ^ n ≤ 1 := by + apply pow_le_one' + apply tsub_le_self + have C2 : (1 - β) * (1 - (1 - β) ^ n) ≠ ⊤ := by + apply ENNReal.mul_ne_top + · apply ENNReal.sub_ne_top + simp + · apply ENNReal.sub_ne_top + simp + rw [ENNReal.toReal_add C2 C1] + rw [ENNReal.toReal_mul] + rw [← pow_succ'] + rw [ENNReal.toReal_sub_of_le C3 (by simp)] + rw [ENNReal.toReal_sub_of_le Hβ1 (by simp)] + rw [ENNReal.toReal_sub_of_le C4 (by simp)] + rw [ENNReal.toReal_pow] + rw [ENNReal.toReal_pow] + rw [ENNReal.toReal_sub_of_le Hβ1 (by simp)] + rw [mul_sub] + simp + rw [pow_succ] + linarith + + + + +end equiv + + +abbrev has_lucky {sv_T : Type} (qs : sv_query sv_T) (T : ℤ) : Prop := + ∀ (τ : ℤ) (l : List sv_T), ∃ (K : ℤ), ∀ A, ∀ (K' : ℤ), K ≤ K' -> qs A l + K' ≥ τ + T + +section pmf + +lemma ite_conv_left {P : Prop} {D} {a b c : ENNReal} (H : a = c) : @ite _ P D a b = @ite _ P D c b := by + split <;> trivial + +lemma ite_mono_left {P : Prop} {D} {a b c : ENNReal} (H : a ≤ c) : @ite _ P D a b ≤ @ite _ P D c b := by + split <;> trivial + +lemma ite_lemma_1 {P : Prop} {D} {f : T -> ENNReal} : ∑'(a : T), @ite _ P D (f a) 0 = @ite _ P D (∑'(a : T), f a) 0 := by + split + · rfl + · simp + +variable (qs : sv_query sv_T) +variable (T : ℤ) +variable (lucky_guess : has_lucky qs T) + +lemma sv1_lb ε₁ ε₂ l : + 1 ≤ ∑'s, (@sv1_aboveThresh PureDPSystem laplace_pureDPSystem sv_T qs T ε₁ ε₂ l s) := by + simp only [sv1_aboveThresh, bind, pure, bind_apply] + -- Push the sum over s inwards + conv => + rhs + rw [ENNReal.tsum_comm] + enter [1, b] + rw [ENNReal.tsum_mul_left] + enter [2] + rw [ENNReal.tsum_comm] + enter [1, i] + rw [ENNReal.tsum_mul_left] + + -- Reduce to CDF problem + apply @le_trans _ _ _ (∑' (b : ℤ), (privNoiseThresh ε₁ ε₂) b * 1) _ ?G1 + case G1 => + apply Eq.le + rw [ENNReal.tsum_mul_right] + simp + have R1 : ∑' (a : ℤ), (privNoiseThresh ε₁ ε₂) a = 1 := by + rw [<- Summable.hasSum_iff ENNReal.summable] + cases (privNoiseThresh ε₁ ε₂) + simp [DFunLike.coe, SPMF.instFunLike] + trivial + simp_all + apply ENNReal.tsum_le_tsum + intro τ + apply ENNReal.mul_left_mono + + -- Turn it into a supremum + conv => + enter [2, 1, i_1, 2, 1, i ,1, b] + simp only [probWhile] + rw [ENNReal.iSup_mul] + conv => + enter [2, 1, v0, 2, 1, state_size, 1, state] + + -- Commute out the cut number first + apply le_trans _ ?G1 + case G1 => + apply ENNReal.tsum_le_tsum + intro v0 + apply ENNReal.mul_left_mono + apply ENNReal.tsum_le_tsum + intro state_size + apply ENNReal.tsum_iSup_comm' + apply le_trans _ ?G1 + case G1 => + apply ENNReal.tsum_le_tsum + intro v0 + apply ENNReal.mul_left_mono + apply ENNReal.tsum_iSup_comm' + simp + conv => + enter [2, 1, v0] + rw [ENNReal.mul_iSup] + apply le_trans _ ?G1 + case G1 => + apply ENNReal.tsum_iSup_comm' + + -- The lucky event: sampling above a value T, which forces the loop to terminate + rcases (lucky_guess τ l) with ⟨ K, HK ⟩ + let PLucky (K' : ℤ) : Prop := K ≤ K' + have HLucky : ∀ (K' : ℤ), ∀ A, PLucky K' → qs A l + K' ≥ τ + T := by + aesop + clear HK + + -- We will split the sum based on PLucky at each step + + -- ρ is the probability of the lucky event + let ρ : ENNReal := (∑'(a : {t : ℤ // PLucky t}), privNoiseGuess ε₁ ε₂ a.1) + have Hρ_1 : (∑'a, privNoiseGuess ε₁ ε₂ a) = 1 := by + cases (privNoiseGuess ε₁ ε₂) + simp [DFunLike.coe, SPMF.instFunLike] + rw [<- Summable.hasSum_iff ENNReal.summable] + trivial + have Hρ_lb : 0 < ρ := by + -- There is at least one lucky element + have HU : PLucky K := by simp [PLucky] + apply LT.lt.trans_le _ ?G2 + case G2 => apply ENNReal.le_tsum ⟨ _, HU ⟩ + simp [privNoiseGuess, privNoiseZero, DPNoise.noise, privNoisedQueryPure, DiscreteLaplaceGenSamplePMF] + simp [DFunLike.coe, PMF.instFunLike] + apply Real.mul_pos + · apply div_pos + · simp + · apply Right.add_pos' + · apply Real.exp_pos + · simp + · apply Real.exp_pos + have Hρ_nz : ρ ≠ 0 := by apply pos_iff_ne_zero.mp Hρ_lb + have Hρ_ub : ρ ≤ 1 := by + rw [<- Hρ_1] + rw [ENNReal.tsum_split PLucky] + simp_all only [ge_iff_le, self_le_add_right, PLucky, ρ] + have Hρ_ub_strict : ρ < 1 := by + rw [<- Hρ_1] + rw [ENNReal.tsum_split PLucky] + conv => + lhs + rw [<- add_zero ρ] + apply ENNReal.add_lt_add_of_le_of_lt + · intro X; simp_all + · rfl + · -- There is at least one unlucky element + have HU : ¬PLucky (K - 1) := by simp [PLucky] + apply LT.lt.trans_le _ ?G2 + case G2 => apply ENNReal.le_tsum ⟨ _, HU ⟩ + simp [privNoiseGuess, privNoiseZero, DPNoise.noise, privNoisedQueryPure, DiscreteLaplaceGenSamplePMF] + simp [DFunLike.coe, PMF.instFunLike] + apply Real.mul_pos + · apply div_pos + · simp + · apply Right.add_pos' + · apply Real.exp_pos + · simp + · apply Real.exp_pos + + -- Bound the CDF below by the geometric CDF + apply @le_trans _ _ _ (⨆(y : ℕ), geo_cdf ρ y) + · -- Math + apply le_iSup_iff.mpr + intro b H + apply Classical.by_contradiction + intro H1 + simp at H1 + have Hz : (∃ z, (1 - ρ)^z < 1 - b) := by + have W := @exists_pow_lt_of_lt_one NNReal _ _ _ (ENNReal.toNNReal (1 - b)) (ENNReal.toNNReal (1 - ρ)) ?G2 ?G1 + case G2 => + rw [ENNReal.toNNReal_pos_iff] + apply And.intro + · simp + trivial + · apply ENNReal.sub_lt_of_lt_add + · exact le_of_lt H1 + · simp + case G1 => + apply ENNReal.toNNReal_lt_of_lt_coe + simp + apply ENNReal.sub_lt_self + · simp + · simp + · trivial + rcases W with ⟨ N, HN ⟩ + exists N + rw [<- ENNReal.toNNReal_pow] at HN + apply (ENNReal.toNNReal_lt_toNNReal _ _).mp + · trivial + · apply ENNReal.pow_ne_top + apply ENNReal.sub_ne_top + simp + · apply ENNReal.sub_ne_top + simp + rcases Hz with ⟨ z, Hz ⟩ + have Hz' : 1 - (1 - ρ) ^ z > 1 - (1 - b) := by + apply LT.lt.gt + apply (ENNReal.sub_lt_iff_lt_right _ _).mpr + · apply ENNReal.lt_add_of_sub_lt_left + · left + simp + · apply Eq.trans_lt _ Hz + apply ENNReal.sub_sub_cancel + · simp + apply Right.pow_le_one_of_le + apply tsub_le_self + · apply ENNReal.sub_ne_top + simp + · apply tsub_le_self + have H' := H z + have X : 1 - (1 - b) = b := by + apply ENNReal.sub_sub_cancel + · simp + exact le_of_lt H1 + rw [X] at Hz' + apply LE.le.not_lt at H' + apply H' + apply GT.gt.lt + trivial + apply iSup_mono + intro cut + + + -- Because v0 is not in the loop, we need to do one of the unrollings first + -- Our IH is going to include a condition on "present" + cases cut + · -- cut=0 case is trivial + simp [probWhileCut, geo_cdf] + rename_i cut + + rw [geo_cdf_rec _ Hρ_ub] + rw [ENNReal.tsum_split PLucky] + apply add_le_add + · -- Lucky guess + simp [probWhileCut, probWhileFunctional, sv1_aboveThreshC, sv1_noise] + conv => + rhs + enter [1, a, 2, 1, x, 1, x1] + rw [ite_conv_left + (by + rw [ite_eq_right_iff.mpr] + intro i + exfalso + rcases a with ⟨ v, Hv ⟩ + simp [sv1_threshold] at i + have luck := HLucky v 0 Hv + apply (LT.lt.not_le i) + trivial)] + rfl + -- The rightmost sum is 1 + apply @le_trans _ _ _ (∑' (a : { t // PLucky t }), (privNoiseGuess ε₁ ε₂) ↑a * 1) + · simp + apply ENNReal.tsum_le_tsum + intro x + apply ENNReal.mul_left_mono + apply ENNReal.tsum_lb_single 0 + apply ENNReal.tsum_lb_single ([], x.1) + simp [sv1_threshold] + + + -- Unlucky + suffices (∀ H, ∀ a : {t : ℤ // ¬ PLucky t}, geo_cdf ρ cut ≤ + ∑' (x : ℕ) (x_1 : sv1_state), + if x = sv1_threshold x_1 then probWhileCut (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) (cut + 1) (H, ↑a) x_1 else 0) by + apply le_trans _ ?G1 + case G1 => + apply ENNReal.tsum_le_tsum + intro a + apply ENNReal.mul_left_mono + apply this + simp + rw [ENNReal.tsum_mul_right] + apply ENNReal.mul_right_mono + apply Eq.le + -- Math + clear this + rw [<- Hρ_1] + conv => + enter [1, 1] + rw [ENNReal.tsum_split PLucky] + apply ENNReal.add_sub_cancel_left + exact LT.lt.ne_top Hρ_ub_strict + + -- Now we have the right inductive structure + induction cut + · -- Base case is trivial? + simp [geo_cdf] + · rename_i cut IH + intro H a + rcases a with ⟨ v, Hv ⟩ + simp + + -- Because the first sample is not lucky, we can't say anything about the branch we end up in + -- It may terminate, or it may not. + have advance : + ((∑' (x1 : ℕ) (x2 : sv1_state), + if x1 = sv1_threshold x2 + then (sv1_aboveThreshF ε₁ ε₂ (H, v)).probBind (fun v => probWhileCut (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) (cut + 1) v) x2 + else 0) + ≤ (∑' (x : ℕ) (x_1 : sv1_state), if x = sv1_threshold x_1 then probWhileCut (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) (cut + 1 + 1) (H, v) x_1 else 0)) := by + conv => + rhs + enter [1, x1, 1, x2] + unfold probWhileCut + unfold probWhileFunctional + simp + split + · simp + · simp + -- RHS is 1 + apply ENNReal.tsum_lb_single (List.length H) + apply ENNReal.tsum_lb_single (H, v) + conv => + rhs + simp [sv1_threshold] + + -- LHS is bounded above by 1 by upper bound lemma + have X : + (∑' (x1 : ℕ) (x2 : sv1_state), + if x1 = sv1_threshold x2 then + ∑' (a : sv1_state), + sv1_aboveThreshF ε₁ ε₂ (H, v) a * probWhileCut (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) (cut + 1) a x2 + else 0) = + (∑' (x1 : ℕ) (x2 : sv1_state), + if x1 = sv1_threshold x2 then + ((sv1_aboveThreshF ε₁ ε₂ (H, v) >>= probWhileCut (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) (cut + 1)) x2) + else 0) := by + simp + rw [X] + clear X + rw [ENNReal.tsum_comm] + have X : ∀ b : sv1_state, + (∑' (a : ℕ), + if a = sv1_threshold b then + (sv1_aboveThreshF ε₁ ε₂ (H, v) >>= probWhileCut (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) (cut + 1)) b + else 0) = + ((sv1_aboveThreshF ε₁ ε₂ (H, v) >>= probWhileCut (sv1_aboveThreshC qs T τ l) (sv1_aboveThreshF ε₁ ε₂) (cut + 1)) b) := by + intro b + rw [tsum_ite_eq] + conv => + lhs + enter [1, b] + rw [X b] + clear X + simp [sv1_aboveThreshF] + apply le_trans + · apply ENNReal.tsum_le_tsum + intro a + simp + apply ENNReal.tsum_le_tsum + intro a1 + apply ENNReal.mul_left_mono + rfl + + rw [ENNReal.tsum_comm] + conv => + lhs + enter [1, b] + rw [ENNReal.tsum_mul_left] + apply le_trans + · apply ENNReal.tsum_le_tsum + intro x + apply ENNReal.mul_left_mono + apply sv1_loop_ub + simp + rcases (privNoiseGuess ε₁ ε₂) with ⟨ X, Y ⟩ + apply Eq.le + exact HasSum.tsum_eq Y + apply le_trans _ advance + simp + clear advance + + -- Now we want to commute out the randomness associate to that s1_aboveThreshF + apply le_trans _ ?G1 + case G1 => + apply ENNReal.tsum_le_tsum + intro x + apply ENNReal.tsum_le_tsum + intro x_1 + rw [<- ite_lemma_1] + conv => + enter [2] + conv => + enter [1, a] + rw [ENNReal.tsum_comm] + rw [ENNReal.tsum_comm] + + -- Split the sv1_state sum + conv => + enter [2] + unfold sv1_state + rw [ENNReal.tsum_prod'] + rw [ENNReal.tsum_comm] + + -- Now, condition on the luckiness of the next value + rw [ENNReal.tsum_split PLucky] + + + -- Split the sum and the recurrence relation + rw [geo_cdf_rec _ Hρ_ub] + apply add_le_add + · -- Guess is lucky + -- The loop will terminate and we can show it + + conv => + enter [2, 1, a, 1, b] + conv => + enter [1, c] + conv => + enter [1, d] + rw [<- mul_ite_zero] + rw [ENNReal.tsum_mul_left] + rw [ENNReal.tsum_mul_left] + + -- Conclude by simplification + simp only [sv1_aboveThreshF, bind, pure, bind_apply, pure_apply, mul_ite, mul_one, mul_zero] + unfold probWhileCut + unfold probWhileFunctional + -- Push the ite inside so that all of the sums are in a row + conv => + enter [2, 1, a, 1, b] + rw [<- ENNReal.tsum_mul_right] + simp + enter [1, i] + rw [<- mul_ite_zero] + enter [2] + rw [<- ite_lemma_1] + conv => + enter [1, x] + rw [<- ite_lemma_1] + conv => + enter [2, 1, a, 1, b, 1, i] + rw [<- ENNReal.tsum_mul_left] + enter [1, c] + rw [<- ENNReal.tsum_mul_left] + -- Move the lucky sample inwards + conv => + rhs + rw [ENNReal.tsum_comm] + enter [1, b] + rw [ENNReal.tsum_comm] + enter [1, c] + rw [ENNReal.tsum_comm] + enter [1, d] + rw [ENNReal.tsum_comm] + -- Pick elements for each of the other sums to make it terminate + apply ENNReal.tsum_lb_single (H ++ [v]) + rw [ENNReal.tsum_comm] + apply ENNReal.tsum_lb_single (List.length H + 1) + rw [ENNReal.tsum_comm] + rw [ENNReal.tsum_prod'] + apply ENNReal.tsum_lb_single (H ++ [v]) + apply le_trans _ ?G1 + case G1 => + apply ENNReal.tsum_le_tsum + intro b + apply ENNReal.tsum_le_tsum + intro a + apply ENNReal.tsum_le_tsum + intro c + apply ENNReal.mul_left_mono + simp [sv1_threshold] + simp [sv1_aboveThreshC] + apply ite_mono_left + rw [ite_eq_right_iff.mpr (by + intro K + exfalso + rcases c with ⟨ c, Hc ⟩ + simp [sv1_threshold, sv1_noise] at K + have HC' := HLucky c (H.length + 1) Hc + apply (LT.lt.not_le K) + apply GE.ge.le + apply HC')] + + -- Now move the lucky sum out to the front, so that we can constrain the other sum values to equal it + conv => + rhs + enter [1, a] + rw [ENNReal.tsum_comm] + rw [ENNReal.tsum_comm] + + apply le_trans _ ?G1 + case G1 => + apply ENNReal.tsum_le_tsum + intro a + apply ENNReal.tsum_lb_single a.1 + apply ENNReal.tsum_lb_single a.1 + rfl + simp + + · -- Guess is unlucky + -- Commute out the samples related to the first sample (which will evenetually become a (1- ρ) factor) + conv => + enter [2, 1, a, 1, b] + conv => + enter [1, c] + conv => + enter [1, d] + rw [<- mul_ite_zero] + rw [ENNReal.tsum_mul_left] + rw [ENNReal.tsum_mul_left] + unfold sv1_state at IH + + apply le_trans _ ?G1 + case G1 => + apply ENNReal.tsum_le_tsum + intro a + apply ENNReal.tsum_le_tsum + intro b + apply ENNReal.mul_left_mono + apply IH + simp + conv => + enter [2, 1, a] + rw [ENNReal.tsum_mul_right] + rw [ENNReal.tsum_mul_right] + apply ENNReal.mul_right_mono + + -- Conclude by simplification + simp only [sv1_aboveThreshF, bind, pure, bind_apply, pure_apply, mul_ite, mul_one, mul_zero] + apply le_trans _ ?G1 + case G1 => + apply ENNReal.tsum_le_tsum + intro a + apply ENNReal.tsum_lb_single (H ++ [v]) + apply ENNReal.tsum_lb_single a.1 + rfl + simp + rw [<- Hρ_1] + rw [ENNReal.tsum_split PLucky] + rw [add_comm] + + +def sv1_aboveThresh_PMF (ε₁ ε₂ : ℕ+) (l : List sv_T) : SPMF ℕ := + ⟨ sv1_aboveThresh qs T ε₁ ε₂ l, + by + rw [Summable.hasSum_iff ENNReal.summable] + apply LE.le.antisymm + · apply sv1_ub + · apply sv1_lb + apply lucky_guess ⟩ + +/-- +sv9 normalizes because sv1 normalizes +-/ +def sv9_aboveThresh_SPMF (ε₁ ε₂ : ℕ+) (l : List sv_T) : SPMF ℕ := + ⟨ @sv9_aboveThresh PureDPSystem laplace_pureDPSystem sv_T qs T ε₁ ε₂ l, + by + rw [<- @sv8_sv9_eq] + rw [<- @sv7_sv8_eq] + rw [<- @sv6_sv7_eq] + rw [<- @sv5_sv6_eq] + rw [<- @sv4_sv5_eq] + rw [<- @sv3_sv4_eq] + rw [<- @sv2_sv3_eq] + rw [<- @sv1_sv2_eq] + rw [Summable.hasSum_iff ENNReal.summable] + apply LE.le.antisymm + · apply sv1_ub + · apply sv1_lb + apply lucky_guess ⟩ + +end pmf + +end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/BoundedMean/Code.lean b/SampCert/DifferentialPrivacy/Queries/BoundedMean/Code.lean index 2cf226d8..dc94cd09 100644 --- a/SampCert/DifferentialPrivacy/Queries/BoundedMean/Code.lean +++ b/SampCert/DifferentialPrivacy/Queries/BoundedMean/Code.lean @@ -13,16 +13,15 @@ import SampCert.DifferentialPrivacy.Queries.BoundedSum.Code This file defines a differentially private bounded mean query. -/ -noncomputable section - namespace SLang variable [dps : DPSystem ℕ] +variable [dpn : DPNoise dps] /-- Compute a noised mean using a noised sum and noised count. -/ -def privNoisedBoundedMean (U : ℕ+) (ε₁ ε₂ : ℕ+) (l : List ℕ) : PMF ℚ := do +def privNoisedBoundedMean (U : ℕ+) (ε₁ ε₂ : ℕ+) (l : List ℕ) : SPMF ℚ := do let S ← privNoisedBoundedSum U ε₁ (2 * ε₂) l let C ← privNoisedCount ε₁ (2 * ε₂) l return S / C diff --git a/SampCert/DifferentialPrivacy/Queries/BoundedMean/Properties.lean b/SampCert/DifferentialPrivacy/Queries/BoundedMean/Properties.lean index 775a6435..abed30ab 100644 --- a/SampCert/DifferentialPrivacy/Queries/BoundedMean/Properties.lean +++ b/SampCert/DifferentialPrivacy/Queries/BoundedMean/Properties.lean @@ -21,6 +21,7 @@ noncomputable section namespace SLang variable [dps : DPSystem ℕ] +variable [dpn : DPNoise dps] lemma budget_split (ε₁ ε₂ : ℕ+) : (ε₁ : NNReal) / (ε₂ : NNReal) = (ε₁ : NNReal) / ((2 * ε₂) : ℕ+) + (ε₁ : NNReal) / ((2 * ε₂) : ℕ+) := by @@ -30,14 +31,20 @@ lemma budget_split (ε₁ ε₂ : ℕ+) : /-- DP bound for noised mean. -/ -theorem privNoisedBoundedMean_DP (U : ℕ+) (ε₁ ε₂ : ℕ+) : - dps.prop (privNoisedBoundedMean U ε₁ ε₂) ((ε₁ : NNReal) / ε₂) := by +theorem privNoisedBoundedMean_DP (U : ℕ+) (ε₁ ε₂ : ℕ+) (ε : NNReal) + (HP_half : dpn.noise_priv ε₁ (2 * ε₂) (ε / 2)) : + dps.prop (privNoisedBoundedMean U ε₁ ε₂) ε := by unfold privNoisedBoundedMean rw [bind_bind_indep] apply dps.postprocess_prop - rw [budget_split] - apply dps.compose_prop + apply dps.compose_prop ?SC1 · apply privNoisedBoundedSum_DP + apply HP_half · apply privNoisedCount_DP + apply HP_half + + case SC1 => + ring_nf + simp [div_mul] end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/BoundedSum/Code.lean b/SampCert/DifferentialPrivacy/Queries/BoundedSum/Code.lean index 12a37d7a..1396256d 100644 --- a/SampCert/DifferentialPrivacy/Queries/BoundedSum/Code.lean +++ b/SampCert/DifferentialPrivacy/Queries/BoundedSum/Code.lean @@ -14,11 +14,10 @@ import Init.Data.Int.Order This file defines a differentially private bounded sum query. -/ -noncomputable section - namespace SLang variable [dps : DPSystem ℕ] +variable [dpn : DPNoise dps] /-- Bounded sum query: computes a sum and truncates it at an upper bound. @@ -30,6 +29,6 @@ def exactBoundedSum (U : ℕ+) (l : List ℕ) : ℤ := Noised bounded sum query obtained by applying the DP noise mechanism to the bounded sum. -/ def privNoisedBoundedSum (U : ℕ+) (ε₁ ε₂ : ℕ+) (l : List ℕ) : PMF ℤ := do - dps.noise (exactBoundedSum U) U ε₁ ε₂ l + dpn.noise (exactBoundedSum U) U ε₁ ε₂ l end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/BoundedSum/Properties.lean b/SampCert/DifferentialPrivacy/Queries/BoundedSum/Properties.lean index 70e72bfd..1bf894d1 100644 --- a/SampCert/DifferentialPrivacy/Queries/BoundedSum/Properties.lean +++ b/SampCert/DifferentialPrivacy/Queries/BoundedSum/Properties.lean @@ -22,6 +22,7 @@ noncomputable section namespace SLang variable [dps : DPSystem ℕ] +variable [dpn : DPNoise dps] /-- Sensitivity of the bounded sum is equal to the bound. @@ -56,37 +57,14 @@ theorem exactBoundedSum_sensitivity (U : ℕ+) : sensitivity (exactBoundedSum U) · rename_i h rw [h] simp - · rename_i l n l' m h1 h2 - subst h1 h2 - simp - cases A n - · rename_i h - cases A m - · rename_i h' - rw [h, h'] - simp at * - apply Int.natAbs_coe_sub_coe_le_of_le h h' - · rename_i h' - rw [h, h'] - simp at * - apply Int.natAbs_coe_sub_coe_le_of_le h le_rfl - · rename_i h - cases A m - · rename_i h' - rw [h, h'] - simp at * - apply Int.natAbs_coe_sub_coe_le_of_le le_rfl h' - · rename_i h' - rw [h, h'] - simp at * /-- The noised bounded sum satisfies the DP property of the DP system. -/ @[simp] -theorem privNoisedBoundedSum_DP (U : ℕ+) (ε₁ ε₂ : ℕ+) : - dps.prop (privNoisedBoundedSum U ε₁ ε₂) ((ε₁ : NNReal) / ε₂) := by - apply dps.noise_prop +theorem privNoisedBoundedSum_DP (U : ℕ+) (ε₁ ε₂ : ℕ+) (ε : NNReal) (HP : dpn.noise_priv ε₁ ε₂ ε) : + dps.prop (privNoisedBoundedSum U ε₁ ε₂) ε := by + apply dpn.noise_prop HP apply exactBoundedSum_sensitivity end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/Count/Code.lean b/SampCert/DifferentialPrivacy/Queries/Count/Code.lean index a94db31e..f9bf360f 100644 --- a/SampCert/DifferentialPrivacy/Queries/Count/Code.lean +++ b/SampCert/DifferentialPrivacy/Queries/Count/Code.lean @@ -11,12 +11,11 @@ import SampCert.DifferentialPrivacy.Abstract This file defines a differentially private noising of an exact length query. -/ -noncomputable section - namespace SLang variable {T : Type} variable [dps : DPSystem T] +variable [dpn : DPNoise dps] /-- Query to count the size of the input list. @@ -27,6 +26,6 @@ def exactCount (l : List T) : ℤ := List.length l A noised counting mechanism. -/ def privNoisedCount (ε₁ ε₂ : ℕ+) (l : List T) : PMF ℤ := do - dps.noise exactCount 1 ε₁ ε₂ l + dpn.noise exactCount 1 ε₁ ε₂ l end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/Count/Properties.lean b/SampCert/DifferentialPrivacy/Queries/Count/Properties.lean index 8257d763..4e2f47fd 100644 --- a/SampCert/DifferentialPrivacy/Queries/Count/Properties.lean +++ b/SampCert/DifferentialPrivacy/Queries/Count/Properties.lean @@ -21,6 +21,7 @@ namespace SLang variable {T : Type} variable [dps : DPSystem T] +variable [dpn : DPNoise dps] /-- The counting query is 1-sensitive @@ -36,17 +37,15 @@ theorem exactCount_1_sensitive : · rename_i a b n h1 h2 subst h1 h2 simp - · rename_i a n b m h1 h2 - subst h1 h2 - simp /-- The noised counting query satisfies DP property -/ @[simp] -theorem privNoisedCount_DP (ε₁ ε₂ : ℕ+) : - dps.prop (privNoisedCount ε₁ ε₂) ((ε₁ : NNReal) / ε₂) := by - apply dps.noise_prop +theorem privNoisedCount_DP (ε₁ ε₂ : ℕ+) (ε : NNReal) (HP : dpn.noise_priv ε₁ ε₂ ε) : + dps.prop (privNoisedCount ε₁ ε₂) ε := by + unfold privNoisedCount + apply dpn.noise_prop HP apply exactCount_1_sensitive end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/Histogram/Code.lean b/SampCert/DifferentialPrivacy/Queries/Histogram/Code.lean index 553cdedc..eb6f25aa 100644 --- a/SampCert/DifferentialPrivacy/Queries/Histogram/Code.lean +++ b/SampCert/DifferentialPrivacy/Queries/Histogram/Code.lean @@ -24,8 +24,6 @@ structure Bins (T : Type) (num_bins : ℕ) where ## Private Histograms -/ -noncomputable section - variable (numBins : ℕ+) def predBins : ℕ := numBins.natPred @@ -87,6 +85,7 @@ instance : DiscreteMeasurableSpace (Histogram T numBins B) where namespace SLang variable [dps : DPSystem T] +variable [dpn : DPNoise dps] /-- Compute the exact number of elements inside a histogram bin @@ -98,7 +97,7 @@ def exactBinCount (b : Fin numBins) (l : List T) : ℤ := Compute a noised count of the number of list elements inside a particular histogram bin -/ def privNoisedBinCount (ε₁ ε₂ : ℕ+) (b : Fin numBins) : Mechanism T ℤ := - (dps.noise (exactBinCount numBins B b) 1 ε₁ (ε₂ * numBins)) + (dpn.noise (exactBinCount numBins B b) 1 ε₁ (ε₂ * numBins)) /-- Modify a count inside a Histogram diff --git a/SampCert/DifferentialPrivacy/Queries/Histogram/Properties.lean b/SampCert/DifferentialPrivacy/Queries/Histogram/Properties.lean index 155c66b8..9eeffd35 100644 --- a/SampCert/DifferentialPrivacy/Queries/Histogram/Properties.lean +++ b/SampCert/DifferentialPrivacy/Queries/Histogram/Properties.lean @@ -21,139 +21,71 @@ namespace SLang variable {T : Type} variable [dps : DPSystem T] +variable [dpn : DPNoise dps] variable [HT : Inhabited T] variable (numBins : ℕ+) variable (B : Bins T numBins) --- def exactBinCount (b : Fin num_bins) (l : List T) : ℤ := --- List.length (List.filter (fun v => B.bin v = b) l) +variable (ε₁ ε₂ : ℕ+) (ε : NNReal) (HN_bin : dpn.noise_priv ε₁ (ε₂ * numBins) (ε / numBins)) /- exactBinCount is 1-sensitive -/ theorem exactBinCount_sensitivity (b : Fin numBins) : sensitivity (exactBinCount numBins B b) 1 := by rw [sensitivity] - intros l₁ l₂ HN - cases HN - · rename_i l₁' l₂' v' Hl₁ Hl₂ - rw [ Hl₁, Hl₂ ] - rw [exactBinCount, exactBinCount] - simp [List.filter_cons] - aesop - · rename_i l₁' v' l₂' Hl₁ Hl₂ - rw [ Hl₁, Hl₂ ] - rw [exactBinCount, exactBinCount] - simp [List.filter_cons] - aesop - · rename_i l₁' v₁' l₂' v₂' Hl₁ Hl₂ - rw [ Hl₁, Hl₂ ] - rw [exactBinCount, exactBinCount] - simp [List.filter_cons] - -- There has to be a better way - cases (Classical.em (B.bin v₁' = b)) <;> cases (Classical.em (B.bin v₂' = b)) - all_goals (rename_i Hrw1 Hrw2) - all_goals (simp [Hrw1, Hrw2]) + intros _ _ H + cases H + all_goals simp_all [exactBinCount, exactBinCount, List.filter_cons] + all_goals aesop /-- DP bound for a noised bin count -/ -lemma privNoisedBinCount_DP (ε₁ ε₂ : ℕ+) (b : Fin numBins) : - dps.prop (privNoisedBinCount numBins B ε₁ ε₂ b) (ε₁ / ((ε₂ * numBins : PNat))) := by +lemma privNoisedBinCount_DP (b : Fin numBins) : + dps.prop (privNoisedBinCount numBins B ε₁ ε₂ b) (ε / numBins) := by unfold privNoisedBinCount - apply dps.noise_prop + apply dpn.noise_prop HN_bin apply exactBinCount_sensitivity --- MARKUSDE: Looking at this proof it is clear that we need better tactic support for the abstract DP operators --- MARKUSDE: - Lemmas with equality side conditions for the privacy cost /-- DP bound for intermediate steps in the histogram calculation. -/ -lemma privNoisedHistogramAux_DP (ε₁ ε₂ : ℕ+) (n : ℕ) (Hn : n < numBins) : - dps.prop (privNoisedHistogramAux numBins B ε₁ ε₂ n Hn) (n.succ * (ε₁ / (ε₂ * numBins : PNat))) := by +lemma privNoisedHistogramAux_DP (n : ℕ) (Hn : n < numBins) : + dps.prop (privNoisedHistogramAux numBins B ε₁ ε₂ n Hn) (n.succ * (ε / numBins)) := by induction n · unfold privNoisedHistogramAux - simp only [Nat.cast_zero, succ_eq_add_one, zero_add, Nat.cast_one, Nat.cast_mul, one_mul] - refine DPSystem.postprocess_prop - (privCompose (privNoisedBinCount numBins B ε₁ ε₂ 0) (privConst (emptyHistogram numBins B))) - (↑↑ε₁ / ↑↑(ε₂ * numBins)) ?G1 - apply (DPSystem_prop_ext _ ?HEq ?Hdp) - case Hdp => - apply (DPSystem.compose_prop - (privNoisedBinCount numBins B ε₁ ε₂ 0) - (privConst (emptyHistogram numBins B)) - (↑↑ε₁ / ↑↑(ε₂ * numBins)) - 0 - (privNoisedBinCount_DP numBins B ε₁ ε₂ 0) - (DPSystem.const_prop (emptyHistogram numBins B))) - case HEq => simp only [PNat.mul_coe, Nat.cast_mul, add_zero] - · rename_i n IH - unfold privNoisedHistogramAux - simp only [] - refine DPSystem.postprocess_prop - (privCompose (privNoisedBinCount numBins B ε₁ ε₂ ↑(n + 1)) - (privNoisedHistogramAux numBins B ε₁ ε₂ n (privNoisedHistogramAux.proof_2 numBins n Hn))) - (↑(n + 1).succ * (↑↑ε₁ / ↑↑(ε₂ * numBins))) ?succ.a - apply (@DPSystem_prop_ext _ _ _ (?C1 + ?C2) _ _ ?HCeq ?Hdp) - case Hdp => - refine - (DPSystem.compose_prop - (privNoisedBinCount numBins B ε₁ ε₂ ↑(n + 1)) - (privNoisedHistogramAux numBins B ε₁ ε₂ n _) (↑↑ε₁ / ↑↑(ε₂ * numBins)) (↑n.succ * (↑↑ε₁ / ↑↑(ε₂ * numBins))) ?X ?Y) - case X => exact privNoisedBinCount_DP numBins B ε₁ ε₂ ↑(n + 1) - case Y => apply IH - generalize (ε₁.val.cast / (ε₂ * numBins).val.cast : NNReal) = A - conv => - enter [1, 1] - rw [Eq.symm (one_mul A)] - rw [<- add_mul] - congr - simp only [succ_eq_add_one, Nat.cast_add, Nat.cast_one] - exact AddCommMagma.add_comm (OfNat.ofNat 1) (n.cast + OfNat.ofNat 1) + simp + apply dps.postprocess_prop + apply dps.compose_prop (AddLeftCancelMonoid.add_zero _) + · apply privNoisedBinCount_DP; apply HN_bin + · apply dps.const_prop; rfl + · rename_i _ IH + simp [privNoisedHistogramAux] + apply dps.postprocess_prop + apply dps.compose_prop ?arithmetic + · apply privNoisedBinCount_DP; apply HN_bin + · apply IH + case arithmetic => simp; ring_nf /-- DP bound for a noised histogram -/ -lemma privNoisedHistogram_DP (ε₁ ε₂ : ℕ+) : - dps.prop (privNoisedHistogram numBins B ε₁ ε₂) (ε₁ / ε₂) := by +lemma privNoisedHistogram_DP : + dps.prop (privNoisedHistogram numBins B ε₁ ε₂) ε := by unfold privNoisedHistogram apply (DPSystem_prop_ext _ ?HEq ?Hdp) - case Hdp => apply privNoisedHistogramAux_DP - case HEq => - simp - rw [division_def] - rw [division_def] - rw [mul_inv] - conv => - enter [1, 2] - rw [<- mul_assoc] - rw [mul_comm] - generalize (ε₁.val.cast * ε₂.val.cast⁻¹ : NNReal) = A - rw [<- mul_assoc] - conv => - enter [2] - rw [Eq.symm (one_mul A)] - congr - unfold predBins - cases numBins - rename_i n' Hn' - simp only [PNat.natPred_eq_pred, pred_eq_sub_one, cast_tsub, Nat.cast_one, PNat.mk_coe] - rw [tsub_add_eq_max] - rw [max_eq_left (one_le_cast.mpr Hn')] - apply mul_inv_cancel - intro HK - simp_all - rw [HK] at Hn' - cases Hn' - + case Hdp => apply privNoisedHistogramAux_DP; apply HN_bin + case HEq => simp [predBins, mul_div_left_comm] /-- DP bound for the thresholding maximum -/ -lemma privMaxBinAboveThreshold_DP (ε₁ ε₂ : ℕ+) (τ : ℤ) : - dps.prop (privMaxBinAboveThreshold numBins B ε₁ ε₂ τ) (ε₁ / ε₂) := by - rw [privMaxBinAboveThreshold] +lemma privMaxBinAboveThreshold_DP (τ : ℤ) : + dps.prop (privMaxBinAboveThreshold numBins B ε₁ ε₂ τ) ε := by + unfold privMaxBinAboveThreshold apply dps.postprocess_prop apply privNoisedHistogram_DP + apply HN_bin end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/HistogramMean/Code.lean b/SampCert/DifferentialPrivacy/Queries/HistogramMean/Code.lean index 4a91528c..51546d49 100644 --- a/SampCert/DifferentialPrivacy/Queries/HistogramMean/Code.lean +++ b/SampCert/DifferentialPrivacy/Queries/HistogramMean/Code.lean @@ -20,9 +20,8 @@ Implementation for ## Compute the mean using a histogram to compute the noisy max -/ -noncomputable section - variable (dps : SLang.DPSystem ℕ) +variable (dpn : SLang.DPNoise dps) variable (numBins : ℕ+) variable (B : Bins ℕ numBins) @@ -44,7 +43,7 @@ and the bounded mean with (ε₃/ε₄)-DP. def privMeanHistogram (ε₁ ε₂ : ℕ+) (τ : ℤ) (ε₃ ε₄ : ℕ+) : Mechanism ℕ (Option ℚ) := privPostProcess (privComposeAdaptive - (@privMaxBinAboveThreshold numBins _ B dps ε₁ ε₂ τ) + (@privMaxBinAboveThreshold numBins _ B dps dpn ε₁ ε₂ τ) (fun opt_max => match opt_max with | none => privConst none diff --git a/SampCert/DifferentialPrivacy/Queries/HistogramMean/Properties.lean b/SampCert/DifferentialPrivacy/Queries/HistogramMean/Properties.lean index 22892f3f..6d15c8fd 100644 --- a/SampCert/DifferentialPrivacy/Queries/HistogramMean/Properties.lean +++ b/SampCert/DifferentialPrivacy/Queries/HistogramMean/Properties.lean @@ -22,6 +22,7 @@ noncomputable section namespace SLang variable (dps : DPSystem ℕ) +variable (dpn : DPNoise dps) variable (numBins : ℕ+) variable (B : Bins ℕ numBins) variable (unbin : Fin numBins -> ℕ+) @@ -56,23 +57,28 @@ instance : DiscreteMeasurableSpace (Option (Fin ↑numBins)) where forall_measurableSet := by simp only [MeasurableSpace.measurableSet_top, implies_true] -/- +/-- DP bound for the adaptive mean -/ -lemma privMeanHistogram_DP (ε₁ ε₂ : ℕ+) (τ : ℤ) (ε₃ ε₄ : ℕ+) : - dps.prop (privMeanHistogram dps numBins B unbin ε₁ ε₂ τ ε₃ ε₄) (ε₁/ε₂ + ε₃/ε₄) := by +lemma privMeanHistogram_DP (ε₁ ε₂ : ℕ+) (τ : ℤ) (ε₃ ε₄ : ℕ+) (εA εB : NNReal) + (HN_A : dpn.noise_priv ε₁ (ε₂ * numBins) (εA / numBins)) + (HN_B : dpn.noise_priv ε₃ (2 * ε₄) (εB / 2)): + dps.prop (privMeanHistogram dps dpn numBins B unbin ε₁ ε₂ τ ε₃ ε₄) (εA + εB) := by rw [privMeanHistogram] apply dps.postprocess_prop apply dps.adaptive_compose_prop · apply privMaxBinAboveThreshold_DP - intro u - cases u - · simp only - apply dps.prop_mono ?G1 ?G2 - case G2 => apply dps.const_prop - simp only [_root_.zero_le] - · rename_i mx - simp only - apply dps.postprocess_prop - apply privNoisedBoundedMean_DP + apply HN_A + · intro u + cases u + · simp only [] + apply (@DPSystem.prop_mono _ _ _ _ 0 εB _) + · apply dps.const_prop + rfl + · apply _root_.zero_le + · simp only [] + apply dps.postprocess_prop + apply privNoisedBoundedMean_DP + apply HN_B + rfl end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/ParHistogram/Basic.lean b/SampCert/DifferentialPrivacy/Queries/ParHistogram/Basic.lean new file mode 100644 index 00000000..62d2f50c --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/ParHistogram/Basic.lean @@ -0,0 +1,8 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ + +import SampCert.DifferentialPrivacy.Queries.ParHistogram.Code +import SampCert.DifferentialPrivacy.Queries.ParHistogram.Properties diff --git a/SampCert/DifferentialPrivacy/Queries/ParHistogram/Code.lean b/SampCert/DifferentialPrivacy/Queries/ParHistogram/Code.lean new file mode 100644 index 00000000..318d562d --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/ParHistogram/Code.lean @@ -0,0 +1,48 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ +import SampCert.DifferentialPrivacy.Queries.Histogram.Code +import SampCert.DifferentialPrivacy.Abstract +import Init.Data.Fin.Basic +import Mathlib.Data.Vector.Basic + +/-! +# ``privParNoisedHistogram`` Implementation + +This file implements a histogram with noised bins, computed in parallel +-/ + + +namespace SLang + +variable [dps : DPPar T] +variable [dpn : DPNoise dps.toDPSystem] + +variable (numBins : ℕ+) +variable (B : Bins T numBins) + + +def privParNoisedBinCount (ε₁ ε₂ : ℕ+) (b : Fin numBins) : Mechanism T ℤ := + (dpn.noise (exactBinCount numBins B b) 1 ε₁ ε₂) + +def privParNoisedHistogramAux (ε₁ ε₂ : ℕ+) (n : ℕ) (Hn : n < numBins) : Mechanism T (Histogram T numBins B) := + let privParNoisedHistogramAux_rec := + match n with + | Nat.zero => privConst (emptyHistogram numBins B) + | Nat.succ n' => privParNoisedHistogramAux ε₁ ε₂ n' (Nat.lt_of_succ_lt Hn) + privPostProcess + (privParCompose + (privParNoisedBinCount numBins B ε₁ ε₂ n) + privParNoisedHistogramAux_rec + (B.bin · = n)) + (fun z => setCount numBins B z.2 n z.1) + +/-- +Histogram with noise added to each count +-/ +def privParNoisedHistogram (ε₁ ε₂ : ℕ+) : Mechanism T (Histogram T numBins B) := + privParNoisedHistogramAux numBins B ε₁ ε₂ (predBins numBins) (predBins_lt_numBins numBins) + +end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/ParHistogram/Properties.lean b/SampCert/DifferentialPrivacy/Queries/ParHistogram/Properties.lean new file mode 100644 index 00000000..0770d559 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/ParHistogram/Properties.lean @@ -0,0 +1,76 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Jean-Baptiste Tristan +-/ +import SampCert.DifferentialPrivacy.Queries.ParHistogram.Code +import SampCert.DifferentialPrivacy.Queries.Histogram.Properties +import SampCert.DifferentialPrivacy.Sensitivity +import SampCert.DifferentialPrivacy.Abstract + +/-! +# ``privParHistogram`` Properties + +This file proves abstract differential privacy for the parallel noised histogram. +-/ + +open Classical Nat Int Real + +noncomputable section + +namespace SLang + +variable {T : Type} +variable [dps : DPPar T] +variable [dpn : DPNoise dps.toDPSystem] +variable [HT : Inhabited T] + +variable (numBins : ℕ+) +variable (B : Bins T numBins) + +variable (ε₁ ε₂ : ℕ+) (ε : NNReal) (HN_bin : dpn.noise_priv ε₁ ε₂ ε) + +/-- +DP bound for a noised bin count +-/ +lemma privParNoisedBinCount_DP (b : Fin numBins) : + dps.prop (privParNoisedBinCount numBins B ε₁ ε₂ b) ε := by + unfold privParNoisedBinCount + apply dpn.noise_prop HN_bin + apply exactBinCount_sensitivity + + +lemma privParNoisedHistogramAux_DP (n : ℕ) (Hn : n < numBins) : + dps.prop (privParNoisedHistogramAux numBins B ε₁ ε₂ n Hn) ε := by + induction n + · unfold privParNoisedHistogramAux + simp + apply dps.postprocess_prop + apply (@DPPar.prop_par _ _ _ _ _ _ ε 0) + · symm + apply max_eq_left + apply _root_.zero_le + · simp [privParNoisedBinCount] + apply dpn.noise_prop HN_bin + apply exactBinCount_sensitivity + · apply dps.const_prop + rfl + · rename_i _ IH + simp [privParNoisedHistogramAux] + apply dps.postprocess_prop + apply DPPar.prop_par ?arithmetic + · apply privParNoisedBinCount_DP + apply HN_bin + · apply IH + case arithmetic => simp + + +/-- +DP bound for a noised histogram +-/ +lemma privParNoisedHistogram_DP : + dps.prop (privParNoisedHistogram numBins B ε₁ ε₂) ε := by + unfold privParNoisedHistogram + apply (DPSystem_prop_ext _ ?HEq ?Hdp) + case Hdp => apply privParNoisedHistogramAux_DP; apply HN_bin + case HEq => simp [predBins, mul_div_left_comm] diff --git a/SampCert/DifferentialPrivacy/Queries/Sparse/Basic.lean b/SampCert/DifferentialPrivacy/Queries/Sparse/Basic.lean new file mode 100644 index 00000000..f8ac9959 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/Sparse/Basic.lean @@ -0,0 +1,8 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ + +import SampCert.DifferentialPrivacy.Queries.Sparse.Privacy +import SampCert.DifferentialPrivacy.Queries.Sparse.Code diff --git a/SampCert/DifferentialPrivacy/Queries/Sparse/Code.lean b/SampCert/DifferentialPrivacy/Queries/Sparse/Code.lean new file mode 100644 index 00000000..6cff3fcb --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/Sparse/Code.lean @@ -0,0 +1,52 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ + +import SampCert.DifferentialPrivacy.Abstract +import SampCert.DifferentialPrivacy.Pure.System +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Code +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Properties + +namespace SLang + +variable (T : ℤ) (ε₁ ε₂ : ℕ+) {sv_T : Type} +variable (qs : sv_query sv_T) +variable (HL : has_lucky qs T) + +variable [dps : DPSystem ℕ] +variable [dpn : DPNoise dps] + + +def snoc {T : Type*} (x : T) (L : List T) := L ++ [x] + +-- "Sparse" algorithm as described in the proof of 3.25 of +-- Cynthia Dwork and Aaron Roth "The Algorithmic Foundations of Differential Privacy" (2014) + +def shift_qs (n : ℕ) (qs : sv_query sv_T) : sv_query sv_T := fun i => qs (i + n) + +lemma shift_qs_lucky n (qs' : sv_query sv_T) (H : has_lucky qs' T) : has_lucky (shift_qs n qs') T := by + intro τ l + rcases (H τ l) with ⟨ K, HK ⟩ + exists K + intro A K' HK' + simp only [shift_qs] + apply HK + trivial + + +def privSparseAux {sv_T : Type} (qs' : sv_query sv_T) (HL' : has_lucky qs' T) (c : ℕ) : Mechanism sv_T (List ℕ) := + match c with + | 0 => privConst [] + | Nat.succ c' => + privPostProcess + (privComposeAdaptive + (sv1_aboveThresh_PMF qs' T HL' ε₁ ε₂) + (fun n => privSparseAux (shift_qs n qs') (shift_qs_lucky T n qs' HL') c')) + (Function.uncurry snoc) + +def privSparse (c : ℕ) : Mechanism sv_T (List ℕ) := + privSparseAux T ε₁ ε₂ (shift_qs 0 qs) (shift_qs_lucky _ _ _ HL) c + +end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/Sparse/Privacy.lean b/SampCert/DifferentialPrivacy/Queries/Sparse/Privacy.lean new file mode 100644 index 00000000..47ce0364 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/Sparse/Privacy.lean @@ -0,0 +1,67 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ + +import SampCert.DifferentialPrivacy.Abstract +import SampCert.DifferentialPrivacy.Pure.System +import SampCert.DifferentialPrivacy.Queries.Sparse.Code +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Properties +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Privacy + +noncomputable section + +open Classical + +namespace SLang + +variable (T : ℤ) (ε₁ ε₂ : ℕ+) {sv_T : Type} + +variable [dps : DPSystem sv_T] +variable [dpn : DPNoise dps] + +variable (qs : sv_query sv_T) +-- variable (Hqs : ∀ N : ℕ, has_lucky (shift_qs N qs) T) +variable (Hqs : has_lucky qs T) +variable (HDP : ∀ N H, ∀ ε : NNReal, (ε = ε₁ / ε₂) -> dps.prop (sv1_aboveThresh_PMF (shift_qs N qs) T H ε₁ ε₂) ε) + +lemma shift_qs_add {T : Type} (qs' : sv_query T) (A B : ℕ) : (shift_qs A (shift_qs B qs')) = (shift_qs (A + B) qs') := by + apply funext; simp [shift_qs, add_assoc] + +local instance : MeasurableSpace (List ℕ) where + MeasurableSet' _ := True + measurableSet_empty := by simp only + measurableSet_compl := by simp only [imp_self, implies_true] + measurableSet_iUnion := by simp only [implies_true, imp_self] + +local instance : DiscreteMeasurableSpace (List ℕ) where + forall_measurableSet := by simp only [MeasurableSpace.measurableSet_top, implies_true] + +lemma privSparseAux_DP (ε : NNReal) (c : ℕ) (Hε : ε = ε₁ / ε₂) : + ∀ N : ℕ, ∀ H, dps.prop (privSparseAux T ε₁ ε₂ (shift_qs N qs) H c) (c * ε) := by + induction c + · intro _ _ + unfold privSparseAux + simp + apply dps.const_prop + simp_all + · rename_i c' IH + intro N HL + simp [privSparseAux] + apply dps.postprocess_prop + apply @DPSystem.adaptive_compose_prop _ _ _ _ _ _ _ _ ε (c' * ε) ((c' + 1) * ε) + · apply HDP + trivial + · intro u + let IH' := IH (u + N) + rw [<- shift_qs_add] at IH' + apply IH' + · ring_nf + +lemma privSparse_DP (ε : NNReal) (c : ℕ) (Hε : ε = ε₁ / ε₂) : + dps.prop (privSparse T ε₁ ε₂ qs Hqs c) (c * ε) := by + unfold privSparse + apply privSparseAux_DP + · apply HDP + · trivial diff --git a/SampCert/DifferentialPrivacy/Queries/UnboundedMax/Basic.lean b/SampCert/DifferentialPrivacy/Queries/UnboundedMax/Basic.lean new file mode 100644 index 00000000..ae7d5405 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/UnboundedMax/Basic.lean @@ -0,0 +1,8 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ + +import SampCert.DifferentialPrivacy.Queries.UnboundedMax.Privacy +import SampCert.DifferentialPrivacy.Queries.UnboundedMax.Code diff --git a/SampCert/DifferentialPrivacy/Queries/UnboundedMax/Code.lean b/SampCert/DifferentialPrivacy/Queries/UnboundedMax/Code.lean new file mode 100644 index 00000000..8da6340b --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/UnboundedMax/Code.lean @@ -0,0 +1,190 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ + +import SampCert.DifferentialPrivacy.Abstract +import SampCert.DifferentialPrivacy.Pure.System +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Code +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Properties + +namespace SLang + +variable (ε₁ ε₂ : ℕ+) + +/-- +Sum over a list, clipping each element to a maximum. + +Similar to exactBoundedSum, however exactClippedSum allows m = 0. +-/ +def exactClippedSum (m : ℕ) (l : List ℕ) : ℤ := + List.sum (List.map (fun n : ℕ => (Nat.min n m)) l) + +/-- +Rate at which a given clipping thresold is impacting the accuracy of the sum. + +Always negative or zero. +-/ +def exactDiffSum (m : ℕ) (l : List ℕ) : ℤ := exactClippedSum m l - exactClippedSum (m + 1) l + +/- +lemma exactDiffSum_nonpos : exactDiffSum point L ≤ 0 := by + simp [exactDiffSum, exactClippedSum] + induction L + · simp + · rename_i l ll IH + simp + apply le_trans + · apply add_le_add + · rfl + · apply IH + simp +-/ + +lemma exactDiffSum_singleton_le_1 : -1 ≤ exactDiffSum point [v] := by + simp [exactDiffSum, exactClippedSum] + cases (Classical.em (point ≤ v)) + · right + trivial + · left + rename_i h + simp at h + rw [Int.min_def] + simp + split + · linarith + · linarith + +lemma exactClippedSum_append : exactClippedSum i (A ++ B) = exactClippedSum i A + exactClippedSum i B := by + simp [exactClippedSum] + +lemma exactDiffSum_append : exactDiffSum i (A ++ B) = exactDiffSum i A + exactDiffSum i B := by + simp [exactDiffSum] + rw [exactClippedSum_append] + rw [exactClippedSum_append] + linarith + +-- There is a value such that sampling at least that value implies the loop definitely terminiates +lemma lucky_guess : has_lucky exactDiffSum 0 := by + intros τ l + exists (List.length l + τ) + intro A K' HK' + apply ge_iff_le.mpr + apply le_trans _ ?G1 + case G1 => + apply add_le_add + · rfl + · apply HK' + conv => + lhs + rw [<- zero_add τ] + rw [<- add_assoc] + simp + clear HK' + + induction l + · simp [exactDiffSum, exactClippedSum] + · rename_i l0 ll IH + simp + rw [<- List.singleton_append] + rw [exactDiffSum_append] + rw [add_comm] + repeat rw [<- add_assoc] + rw [add_comm] + repeat rw [<- add_assoc] + rw [add_assoc] + conv => + lhs + rw [<- add_zero 0] + apply add_le_add + · apply IH + · have H := @exactDiffSum_singleton_le_1 A l0 + linarith + +def privUnboundedMax (ε₁ ε₂ : ℕ+) : List ℕ -> SPMF ℕ := + sv1_aboveThresh_PMF exactDiffSum 0 lucky_guess ε₁ ε₂ + +/- +/- +## Program version 0 + - Executable + - Optimization of sv1: Tracks single state + (Needs sv0 sv1 equivalence proof) +-/ + +def sv0_state : Type := ℕ × ℤ + +def sv0_threshold (s : sv0_state) : ℕ := s.1 + +def sv0_noise (s : sv0_state) : ℤ := s.2 + +def sv0_aboveThreshC (τ : ℤ) (l : List ℕ) (s : sv0_state) : Bool := + decide (exactDiffSum (sv0_threshold s) l + (sv0_noise s) < τ) + +def sv0_aboveThreshF (ε₁ ε₂ : ℕ+) (s : sv0_state) : SLang sv0_state := do + let vn <- privNoiseGuess ε₁ ε₂ + let n := (sv0_threshold s) + 1 + return (n, vn) + +def sv0_aboveThresh (ε₁ ε₂ : ℕ+) (l : List ℕ) : SLang ℕ := do + let τ <- privNoiseThresh ε₁ ε₂ + let v0 <- privNoiseGuess ε₁ ε₂ + let sk <- probWhile (sv0_aboveThreshC τ l) (sv0_aboveThreshF ε₁ ε₂) (0, v0) + return (sv0_threshold sk) +-/ + + + + +-- Unused for now +lemma exactDiffSum_eventually_constant : ∃ K, ∀ K', K ≤ K' -> exactDiffSum K' l = 0 := by + cases l + · simp [exactDiffSum, exactClippedSum] + · rename_i l0 ll + let K := (@List.maximum_of_length_pos _ _ (l0 :: ll) (by simp)) + exists K + intro K' HK' + simp [exactDiffSum, exactClippedSum] + rw [min_eq_left_iff.mpr ?G1] + case G1 => + simp + apply le_trans _ HK' + apply List.le_maximum_of_length_pos_of_mem + apply List.mem_cons_self + rw [min_eq_left_iff.mpr ?G1] + case G1 => + apply (@le_trans _ _ _ (↑K') _ _ (by simp)) + simp + apply le_trans _ HK' + apply List.le_maximum_of_length_pos_of_mem + apply List.mem_cons_self + conv => + enter [1, 1, 2, 1] + rw [(@List.map_inj_left _ _ _ _ (fun n => ↑n)).mpr + (by + intro a Ha + rw [min_eq_left_iff.mpr _] + simp + apply le_trans _ HK' + apply List.le_maximum_of_length_pos_of_mem + apply List.mem_cons_of_mem + apply Ha)] + rfl + conv => + enter [1, 2, 2, 1] + rw [(@List.map_inj_left _ _ _ _ (fun n => ↑n)).mpr + (by + intro a Ha + rw [min_eq_left_iff.mpr _] + apply (@le_trans _ _ _ (↑K') _ _ (by simp)) + simp + apply le_trans _ HK' + apply List.le_maximum_of_length_pos_of_mem + apply List.mem_cons_of_mem + apply Ha)] + rfl + simp + + +end SLang diff --git a/SampCert/DifferentialPrivacy/Queries/UnboundedMax/Privacy.lean b/SampCert/DifferentialPrivacy/Queries/UnboundedMax/Privacy.lean new file mode 100644 index 00000000..eed984e9 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Queries/UnboundedMax/Privacy.lean @@ -0,0 +1,71 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Markus de Medeiros +-/ + +import SampCert.DifferentialPrivacy.Abstract +import SampCert.DifferentialPrivacy.Pure.System +import SampCert.DifferentialPrivacy.Queries.UnboundedMax.Code +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Properties +import SampCert.DifferentialPrivacy.Queries.AboveThresh.Privacy + +noncomputable section + +open Classical + +namespace SLang + +variable (ε₁ ε₂ : ℕ+) + +lemma exactDiffSum_sens i : sensitivity (exactDiffSum i) 1 := by + intro l₁ l₂ H + cases H + · rename_i A B C H1 H2 + rw [H1, H2] + repeat rw [exactDiffSum_append] + simp [exactDiffSum, exactClippedSum] + apply Int.le_of_ofNat_le_ofNat + simp + cases (Classical.em (C ≤ i)) + · rw [min_eq_left (by linarith)] + rw [min_eq_left (by linarith)] + simp + · cases (Classical.em (C ≤ i + 1)) + · have HC : C = i + 1 := by linarith + simp_all + · rw [min_eq_right (by linarith)] + rw [min_eq_right (by linarith)] + simp + · rename_i A C B H1 H2 + rw [H1, H2] + repeat rw [exactDiffSum_append] + simp [exactDiffSum, exactClippedSum] + apply Int.le_of_ofNat_le_ofNat + simp + cases (Classical.em (C ≤ i)) + · rw [min_eq_left (by linarith)] + rw [min_eq_left (by linarith)] + simp + · cases (Classical.em (C ≤ i + 1)) + · have HC : C = i + 1 := by linarith + simp_all + · rw [min_eq_right (by linarith)] + rw [min_eq_right (by linarith)] + simp + +lemma privUnboundedMax_DP (ε : NNReal) (Hε : ε = ε₁ / ε₂) : + PureDPSystem.prop (@privUnboundedMax ε₁ ε₂) ε := by + suffices H : (privUnboundedMax ε₁ ε₂) = (sv9_aboveThresh_SPMF exactDiffSum 0 lucky_guess ε₁ ε₂) by + rw [H] + apply sv9_aboveThresh_pmf_DP + · apply exactDiffSum_sens + · trivial + unfold privUnboundedMax + unfold sv1_aboveThresh_PMF + unfold sv9_aboveThresh_SPMF + apply funext + intro l + congr + rw [<- sv8_sv9_eq, <- sv7_sv8_eq, <- sv6_sv7_eq, <- sv5_sv6_eq, + <- sv4_sv5_eq, <- sv3_sv4_eq, <- sv2_sv3_eq, <- sv1_sv2_eq] diff --git a/SampCert/DifferentialPrivacy/ZeroConcentrated/AdaptiveComposition.lean b/SampCert/DifferentialPrivacy/ZeroConcentrated/AdaptiveComposition.lean index ac102d93..7358ef78 100644 --- a/SampCert/DifferentialPrivacy/ZeroConcentrated/AdaptiveComposition.lean +++ b/SampCert/DifferentialPrivacy/ZeroConcentrated/AdaptiveComposition.lean @@ -131,50 +131,21 @@ theorem privComposeAdaptive_zCDPBound {nq1 : List T → PMF U} {nq2 : U -> List zCDPBound (privComposeAdaptive nq1 nq2) (ε₁ + ε₂) := by rw [zCDPBound] intro α Hα l₁ l₂ Hneighbours - -- This step is loose - apply (@LE.le.trans _ _ _ (ENNReal.ofReal (1/2 * (ε₁)^2 * α + 1/2 * (ε₂)^2 * α : ℝ)) _ _ ?case_sq) - case case_sq => - apply ofReal_le_ofReal - -- Binomial bound - rw [add_sq] - rw [<- right_distrib] - apply (mul_le_mul_of_nonneg_right _ ?goal1) - case goal1 => linarith - rw [<- left_distrib] - apply (mul_le_mul_of_nonneg_left _ ?goal1) - case goal1 => linarith - apply add_le_add_right - have hrw : ε₁ ^ 2 = ε₁ ^ 2 + 0 := by linarith - conv => - lhs - rw [hrw] - clear hrw - apply add_le_add_left - refine mul_nonneg ?bc.bc.ha Hε₂ - refine mul_nonneg ?G Hε₁ - simp -- Rewrite the upper bounds in terms of Renyi divergences of nq1/nq2 rw [zCDPBound] at h1 -- have marginal_ub := h1 α Hα l₁ l₂ Hneighbours - have conditional_ub : (⨆ (u : U), RenyiDivergence (nq2 u l₁) (nq2 u l₂) α ≤ ENNReal.ofReal (1 / 2 * ε₂ ^ 2 * α)) := - ciSup_le fun x => h2 x α Hα l₁ l₂ Hneighbours + have conditional_ub : (⨆ (u : U), RenyiDivergence (nq2 u l₁) (nq2 u l₂) α) ≤ ENNReal.ofReal (ε₂ * α) := + by exact iSup_le fun i => h2 i α Hα l₁ l₂ Hneighbours apply (@LE.le.trans _ _ _ (RenyiDivergence (nq1 l₁) (nq1 l₂) α + ⨆ (u : U), RenyiDivergence (nq2 u l₁) (nq2 u l₂) α) _ _ ?case_alg) case case_alg => + rw [add_mul] rw [ENNReal.ofReal_add ?G1 ?G2] case G1 => - simp - apply mul_nonneg - · apply mul_nonneg - · simp - · exact sq_nonneg ε₁ - · linarith + apply Right.mul_nonneg Hε₁ + linarith case G2 => - simp - apply mul_nonneg - · apply mul_nonneg - · simp - · exact sq_nonneg ε₂ - · linarith + apply Right.mul_nonneg Hε₂ + linarith exact _root_.add_le_add (h1 α Hα l₁ l₂ Hneighbours) conditional_ub exact privComposeAdaptive_renyi_bound Hα Hneighbours HAC1 HAC2 diff --git a/SampCert/DifferentialPrivacy/ZeroConcentrated/DP.lean b/SampCert/DifferentialPrivacy/ZeroConcentrated/DP.lean index f51ad1aa..5edf10ea 100644 --- a/SampCert/DifferentialPrivacy/ZeroConcentrated/DP.lean +++ b/SampCert/DifferentialPrivacy/ZeroConcentrated/DP.lean @@ -44,7 +44,7 @@ satisfying this bound are ``ε``-DP). -/ def zCDPBound (q : List T → PMF U) (ε : ℝ) : Prop := ∀ α : ℝ, 1 < α → ∀ l₁ l₂ : List T, Neighbour l₁ l₂ → - RenyiDivergence (q l₁) (q l₂) α ≤ ENNReal.ofReal ((1/2) * ε ^ 2 * α) + RenyiDivergence (q l₁) (q l₂) α ≤ ENNReal.ofReal (ε * α) /-- All neighbouring queries are absolutely continuous @@ -63,24 +63,22 @@ lemma zCDP_mono {m : List T -> PMF U} {ε₁ ε₂ : NNReal} (H : ε₁ ≤ ε · assumption · rw [zCDPBound] at * intro α Hα l₁ l₂ N - apply (@le_trans _ _ _ (ENNReal.ofReal (1 / 2 * ↑ε₁ ^ 2 * α)) _ (Hε α Hα l₁ l₂ N)) + apply (@le_trans _ _ _ (ENNReal.ofReal (ε₁ * α)) _ ?G1) + case G1 => apply Hε <;> trivial apply ENNReal.coe_mono refine (Real.toNNReal_le_toNNReal_iff ?a.hp).mpr ?a.a · apply mul_nonneg - · apply mul_nonneg - · simp - · simp + · exact NNReal.zero_le_coe + · linarith + · apply mul_le_mul_of_nonneg_right + · exact H · linarith - · repeat rw [mul_assoc] - apply (mul_le_mul_iff_of_pos_left (by simp)).mpr - apply (mul_le_mul_iff_of_pos_right (by linarith)).mpr - apply pow_le_pow_left' H (OfNat.ofNat 2) /-- Obtain an approximate DP bound from a zCDP bound, when ε > 0 and δ < 1 -/ lemma ApproximateDP_of_zCDP_pos_lt_one [Countable U] (m : Mechanism T U) - (ε : ℝ) (Hε_pos : 0 < ε) (h : zCDPBound m ε) (Hm : ACNeighbour m) : + (ε : ℝ) (Hε_pos : 0 < ε) (h : zCDPBound m ((1/2) * ε^2)) (Hm : ACNeighbour m) : ∀ δ : NNReal, (0 < (δ : ℝ)) -> ((δ : ℝ) < 1) -> DP' m (ε^2/2 + ε * (2*Real.log (1/δ))^(1/2 : ℝ)) δ := by have Hε : 0 ≤ ε := by exact le_of_lt Hε_pos intro δ Hδ0 Hδ1 @@ -634,12 +632,11 @@ lemma ApproximateDP_of_zCDP_pos_lt_one [Countable U] (m : Mechanism T U) Obtain an approximate DP bound from a zCDP bound, when ε > 0 -/ lemma ApproximateDP_of_zCDP_pos [Countable U] (m : Mechanism T U) - (ε : ℝ) (Hε_pos : 0 < ε) (h : zCDPBound m ε) (Hm : ACNeighbour m) : + (ε : ℝ) (Hε_pos : 0 < ε) (h : zCDPBound m ((1/2) * ε^2)) (Hm : ACNeighbour m) : ∀ δ : NNReal, (0 < (δ : ℝ)) -> DP' m (ε^2/2 + ε * (2*Real.log (1/δ))^(1/2 : ℝ)) δ := by intro δ Hδ0 cases (Classical.em (δ < 1)) - · intro Hδ1 - apply ApproximateDP_of_zCDP_pos_lt_one m ε Hε_pos h Hm δ Hδ0 + · apply ApproximateDP_of_zCDP_pos_lt_one m ε Hε_pos h Hm δ Hδ0 trivial · apply ApproximateDP_gt1 apply le_of_not_lt @@ -649,7 +646,7 @@ lemma ApproximateDP_of_zCDP_pos [Countable U] (m : Mechanism T U) Obtain an approximate DP bound from a zCDP bound -/ theorem ApproximateDP_of_zCDP [Countable U] (m : Mechanism T U) - (ε : ℝ) (Hε : 0 ≤ ε) (h : zCDPBound m ε) (Hm : ACNeighbour m) : + (ε : ℝ) (Hε : 0 ≤ ε) (h : zCDPBound m ((1/2) * ε^2)) (Hm : ACNeighbour m) : ∀ δ : NNReal, (0 < (δ : ℝ)) -> DP' m (ε^2/2 + ε * (2*Real.log (1/δ))^(1/2 : ℝ)) δ := by cases LE.le.lt_or_eq Hε · rename_i Hε @@ -668,70 +665,81 @@ theorem ApproximateDP_of_zCDP [Countable U] (m : Mechanism T U) case G2 => exact Hm l₁ l₂ HN simp +namespace zCDP_of_adp_def + +def D (δ : NNReal) : Real := (2 * Real.log (1 / δ)) ^ ((1 : ℝ) / (2 : ℝ)) + +def ε (ε' : NNReal) (δ : NNReal) : NNReal := Real.toNNReal (-D δ + discrim (OfNat.ofNat 2)⁻¹ (D δ) (-↑ε') ^ (2 : ℝ)⁻¹) + +lemma eqε (ε' δ : NNReal) (H0 : 0 < δ) (H1 : δ < 1) : ε' = ((ε ε' δ)^2) / (2 : NNReal) + ε ε' δ * D δ := by + suffices (((1 : ℝ) / (2 : ℝ)) * (ε ε' δ) * (ε ε' δ) + D δ * ε ε' δ + (- ε')) = 0 by + simp_all + rw [add_neg_eq_zero] at this + rw [<- this] + ring_nf + + have Hle : 0 < D δ ^ 2 + 4 * (OfNat.ofNat 2)⁻¹ * ε' := by + apply Right.add_pos_of_pos_of_nonneg + · apply sq_pos_of_pos + unfold D + apply Real.rpow_pos_of_pos + apply mul_pos <;> simp + exact Real.log_neg H0 H1 + · simp + + apply (@quadratic_eq_zero_iff Real _ _ _ _ _ ?Ga ((discrim (OfNat.ofNat 1 / OfNat.ofNat 2) (D δ) (-ε'.toReal)) ^ ((1 : ℝ) / (2 : ℝ))) ?Gs ((ε ε' δ).toReal)).mpr + case Ga => simp + case Gs => + rw [<- Real.rpow_add ?Gadd] + case Gadd => + simp [discrim] + apply Hle + rw [add_halves] + simp + left + simp + unfold ε + apply Real.coe_toNNReal + simp [discrim] + apply nonneg_le_nonneg_of_sq_le_sq + · apply Real.rpow_nonneg + exact le_of_lt Hle + rw [<- Real.rpow_add Hle] + rw [<- one_div] + rw [add_halves] + simp + rw [<- sq] + simp + +end zCDP_of_adp_def + +/-- +Pure privacy bound required to obtain (ε, δ)-approximate DP +-/ +def zCDP_of_adp (δ : NNReal) (ε' : NNReal) : NNReal := (1/2) * ((zCDP_of_adp_def.ε ε' δ)^2) + /-- zCDP is no weaker than approximate DP, up to a loss of parameters. -/ lemma zCDP_ApproximateDP [Countable U] {m : Mechanism T U} : - ∃ (degrade : (δ : NNReal) -> (ε' : NNReal) -> NNReal), ∀ (δ : NNReal) (_ : 0 < δ) (ε' : NNReal), - (zCDP m (degrade δ ε') -> ApproximateDP m ε' δ) := by - let degrade (δ : NNReal) (ε' : NNReal) : NNReal := - (√(2 * Real.log (1/δ) + 2 * ε') - √(2 * Real.log (1/δ))).toNNReal - have HDdegrade δ ε' : degrade δ ε' = (√(2 * Real.log (1/δ) + 2 * ε') - √(2 * Real.log (1/δ))).toNNReal := by rfl - exists degrade - intro δ Hδ ε' ⟨ HN , HB ⟩ - + ∀ (δ : NNReal) (_ : 0 < δ) (ε' : NNReal), + (zCDP m (zCDP_of_adp δ ε') -> ApproximateDP m ε' δ) := by + intro δ Hδ0 ε' H cases Classical.em (1 ≤ δ) · rename_i Hδ1 exact ApproximateDP_gt1 m (↑ε') Hδ1 - rename_i Hδ1 - rw [ApproximateDP] - have R := ApproximateDP_of_zCDP m (degrade δ ε') (by simp) HB HN δ Hδ + simp at Hδ1 + unfold ApproximateDP + + unfold zCDP_of_adp at H + rcases H with ⟨ H1, H2 ⟩ + have X := ApproximateDP_of_zCDP m (zCDP_of_adp_def.ε ε' δ) ?G1 H2 H1 δ Hδ0 + case G1 => exact NNReal.zero_le_coe + rw [zCDP_of_adp_def.eqε ε' δ Hδ0 Hδ1] + unfold zCDP_of_adp_def.D + simp_all - have Hdegrade : ((degrade δ ε') ^ 2) / 2 + (degrade δ ε') * (2 * Real.log (1 / δ))^(1/2 : ℝ) = ε' := by - rw [HDdegrade] - generalize HD : Real.log (1 / δ) = D - have HDnn : 0 ≤ D := by - rw [<- HD] - apply Real.log_nonneg - apply one_le_one_div Hδ - exact le_of_not_ge Hδ1 - simp only [Real.coe_toNNReal'] - rw [max_eq_left ?G1] - case G1 => - apply sub_nonneg_of_le - apply Real.sqrt_le_sqrt - simp - rw [sub_sq'] - rw [Real.sq_sqrt ?G1] - case G1 => - apply add_nonneg - · simp - trivial - · simp - rw [Real.sq_sqrt ?G1] - case G1 => - simp - trivial - rw [← Real.sqrt_eq_rpow] - rw [mul_sub_right_distrib] - rw [<- sq] - rw [Real.sq_sqrt ?G1] - case G1 => - simp - trivial - generalize HW : √(2 * D + 2 * ↑ε') * √(2 * D) = W - conv => - enter [1, 1, 1, 2] - rw [mul_assoc] - rw [HW] - rw [sub_div] - rw [add_div] - rw [add_div] - simp - linarith - rw [Hdegrade] at R - trivial /-- @@ -1872,7 +1880,7 @@ Convert ε-DP bound to `(1/2)ε²`-zCDP bound Note that `zCDPBound _ ε` corresponds to `(1/2)ε²`-zCDP (not `ε`-zCDP). -/ -lemma ofDP_bound (ε : NNReal) (q' : List T -> PMF U) (H : SLang.PureDP q' ε) : zCDPBound q' ε := by +lemma ofDP_bound (ε : NNReal) (q' : List T -> PMF U) (H : SLang.PureDP q' ε) : zCDPBound q' ((1/2) * ε^2) := by rw [zCDPBound] intro α Hα l₁ l₂ HN -- Special case: (εα/2 > 1) @@ -2298,7 +2306,7 @@ Convert ε-DP to `(1/2)ε²`-zCDP. Note that `zCDPBound _ ε` corresponds to `(1/2)ε²`-zCDP (not `ε`-zCDP). -/ -lemma ofDP (ε : NNReal) (q : List T -> PMF U) (H : SLang.PureDP q ε) : zCDP q ε := by +lemma ofDP (ε : NNReal) (q : List T -> PMF U) (H : SLang.PureDP q ε) : zCDP q ((1/2) * ε^2) := by constructor · exact ACNeighbour_of_DP ε q H - · exact ofDP_bound ε q H \ No newline at end of file + · exact ofDP_bound ε q H diff --git a/SampCert/DifferentialPrivacy/ZeroConcentrated/Mechanism/Properties.lean b/SampCert/DifferentialPrivacy/ZeroConcentrated/Mechanism/Properties.lean index d3653614..8838f8db 100644 --- a/SampCert/DifferentialPrivacy/ZeroConcentrated/Mechanism/Properties.lean +++ b/SampCert/DifferentialPrivacy/ZeroConcentrated/Mechanism/Properties.lean @@ -23,7 +23,7 @@ namespace SLang The zCDP mechanism with bounded sensitivity satisfies the bound for ``(Δε₂/ε₁)^2``-zCDP. -/ theorem privNoisedQuery_zCDPBound (query : List T → ℤ) (Δ ε₁ ε₂ : ℕ+) (bounded_sensitivity : sensitivity query Δ) : - zCDPBound (privNoisedQuery query Δ ε₁ ε₂) ((ε₁ : NNReal) / ε₂) := by + zCDPBound (privNoisedQuery query Δ ε₁ ε₂) ((1/2) * ((ε₁ : NNReal) / ε₂) ^ 2) := by simp [zCDPBound, privNoisedQuery] intros α h1 l₁ l₂ h2 have A := @discrete_GaussianGenSample_ZeroConcentrated α h1 (Δ * ε₂) ε₁ (query l₁) (query l₂) @@ -221,12 +221,11 @@ def privNoisedQuery_AC (query : List T -> ℤ) (Δ ε₁ ε₂ : ℕ+) : ACNeigh The zCDP mechanism is ``(Δε₂/ε₁)^2``-zCDP. -/ theorem privNoisedQuery_zCDP (query : List T → ℤ) (Δ ε₁ ε₂ : ℕ+) (bounded_sensitivity : sensitivity query Δ) : - zCDP (privNoisedQuery query Δ ε₁ ε₂) ((ε₁ : NNReal) / ε₂) := by - simp [zCDP] + zCDP (privNoisedQuery query Δ ε₁ ε₂) ((1/2) * ((ε₁ : NNReal) / ε₂) ^ 2) := by + simp only [zCDP] apply And.intro · exact privNoisedQuery_AC query Δ ε₁ ε₂ · apply privNoisedQuery_zCDPBound exact bounded_sensitivity - end SLang diff --git a/SampCert/DifferentialPrivacy/ZeroConcentrated/Postprocessing.lean b/SampCert/DifferentialPrivacy/ZeroConcentrated/Postprocessing.lean index 6ebaacb2..a3b38813 100644 --- a/SampCert/DifferentialPrivacy/ZeroConcentrated/Postprocessing.lean +++ b/SampCert/DifferentialPrivacy/ZeroConcentrated/Postprocessing.lean @@ -37,6 +37,7 @@ def privPostProcess_AC {f : U -> V} (nq : Mechanism T U) (Hac : ACNeighbour nq) intro l₁ l₂ Hn v have Hac := Hac l₁ l₂ Hn simp [privPostProcess] + simp [DFunLike.coe, SPMF.instFunLike] intro Hppz i fi apply Hac apply Hppz @@ -365,10 +366,6 @@ theorem privPostPocess_DP_pre_reduct {U : Type} [m2 : MeasurableSpace U] [count rw [<- mul_assoc] at HJ -- Move constants to the left-hand side of HJ - -- Super haunted bug: When I apply this as normal to HJ (with placeholders) - -- Lean it lights up all of my "have" and "let" statements because it \"doesn't - -- know how to synthesize\" a placeholder. The placeholder it points me to is in - -- Pure/Postprocessing, where the same lemma is also applied with placeholders. have W := @ENNReal.div_le_iff_le_mul 1 @@ -575,8 +572,6 @@ theorem privPostProcess_zCDPBound {nq : Mechanism T U} {ε : ℝ} apply inv_nonneg_of_nonneg linarith apply elog_mono_le.mp - simp [PMF.bind, PMF.pure] - simp [PMF.instFunLike] apply privPostPocess_DP_pre · exact fun l => PMF.hasSum_coe_one (nq l) · exact h1 diff --git a/SampCert/DifferentialPrivacy/ZeroConcentrated/System.lean b/SampCert/DifferentialPrivacy/ZeroConcentrated/System.lean index de28f44d..88396bef 100644 --- a/SampCert/DifferentialPrivacy/ZeroConcentrated/System.lean +++ b/SampCert/DifferentialPrivacy/ZeroConcentrated/System.lean @@ -16,21 +16,52 @@ import SampCert.DifferentialPrivacy.ZeroConcentrated.Const This file contains an instance of an abstract DP system associated to the discrete gaussian mechanisms. -/ +noncomputable section + namespace SLang variable { T : Type } /-- -Instance of a DP system for zCDP, using the discrete Gaussian as a noising mechanism. +Instance of a DP system for zCDP -/ -noncomputable instance gaussian_zCDPSystem : DPSystem T where +instance zCDPSystem : DPSystem T where prop := zCDP - prop_adp := zCDP_ApproximateDP - prop_mono := zCDP_mono + of_app_dp := zCDP_of_adp + prop_adp := by + intros; apply zCDP_ApproximateDP <;> assumption + prop_mono := by + intro _ _ _ _ H HZ + apply zCDP_mono H HZ + adaptive_compose_prop := by + intros _ _ _ _ _ _ _ _ _ HZ HZ2 Hε + rw [<- Hε] + apply privComposeAdaptive_zCDP + apply HZ + apply HZ2 + postprocess_prop := by + intros _ _ _ _ _ _ HZ + apply privPostProcess_zCDP + apply HZ + const_prop := by + intros _ _ _ _ Hε + rw [Hε] + apply privConst_zCDP + +def gaussian_zCDP_noise_priv (ε₁ ε₂ : ℕ+) (ε : NNReal) : Prop := (1/2) * ((ε₁ : NNReal) / ε₂)^2 ≤ ε + +/-- +Gaussian noise for zCDP system +-/ +instance gaussian_zCDPSystem (T : Type) : DPNoise (@zCDPSystem T) where noise := privNoisedQuery - noise_prop := privNoisedQuery_zCDP - adaptive_compose_prop := privComposeAdaptive_zCDP - postprocess_prop := privPostProcess_zCDP - const_prop := privConst_zCDP + noise_priv := gaussian_zCDP_noise_priv + noise_prop := by + intros _ _ _ _ _ H HS + apply DPSystem.prop_mono ?G1 + · simp [DPSystem.prop] + apply privNoisedQuery_zCDP + apply HS + · apply H end SLang diff --git a/SampCert/SLang.lean b/SampCert/SLang.lean index 5566b5fb..74d3a631 100644 --- a/SampCert/SLang.lean +++ b/SampCert/SLang.lean @@ -144,11 +144,77 @@ def probUntil (body : SLang T) (cond : T → Bool) : SLang T := do let v ← body probWhile (λ v : T => ¬ cond v) (λ _ : T => body) v + +/- +A SPMF is a SLang program that is a also proper distribution +-/ +abbrev SPMF.{u} (α : Type u) : Type u := { f : SLang α // HasSum f 1 } + +instance : Coe (SPMF α) (SLang α) where + coe a := a.1 + +instance : Coe (SPMF α) (PMF α) where + coe a := ⟨ a.1, a.2 ⟩ + + +lemma probPure_norm (a : α) : ∑'s, probPure a s = 1 := by + simp [probPure] + + +@[simp] +def SPMF_pure (a : α) : SPMF α := + ⟨ probPure a, + by + rw [Summable.hasSum_iff ENNReal.summable] + apply probPure_norm ⟩ + + +lemma probBind_norm (p : SLang α) (q : α -> SLang β) : + ∑'s, p s = 1 -> + (∀ a, ∑'s, q a s = 1) -> + ∑'s, probBind p q s = 1 := by + intro H1 H2 + rw [<- Summable.hasSum_iff ENNReal.summable] at H1 + have H2' : ∀ a, HasSum (q a) 1 := by + intro a + have H2 := H2 a + rw [<- Summable.hasSum_iff ENNReal.summable] at H2 + trivial + let p' : PMF α := ⟨ p, H1 ⟩ + let q' : α -> PMF β := (fun a => ⟨q a, H2' a ⟩) + have H : (probBind p (fun x => q x)) = (PMF.bind p' q') := by + unfold probBind + simp [DFunLike.coe, PMF.instFunLike, PMF.bind] + rw [H] + cases (PMF.bind p' q') + simp [DFunLike.coe, PMF.instFunLike] + rw [<- Summable.hasSum_iff ENNReal.summable] + trivial + + +@[simp] +def SPMF_bind (p : SPMF α) (q : α -> SPMF β) : SPMF β := + ⟨ probBind p (fun x => q x), + by + rw [Summable.hasSum_iff ENNReal.summable] + apply probBind_norm + · rw [<- Summable.hasSum_iff ENNReal.summable] + cases p + trivial + · intro a + rw [<- Summable.hasSum_iff ENNReal.summable] + cases q a + trivial ⟩ + +instance : Monad SPMF where + pure := SPMF_pure + bind := SPMF_bind + end SLang namespace PMF @[extern "my_run"] -opaque run (c : PMF T) : IO T +opaque run (c : SLang.SPMF T) : IO T end PMF diff --git a/SampCert/Samplers/Laplace/Code.lean b/SampCert/Samplers/Laplace/Code.lean index 064a4adf..b4c7adc1 100644 --- a/SampCert/Samplers/Laplace/Code.lean +++ b/SampCert/Samplers/Laplace/Code.lean @@ -80,8 +80,6 @@ def DiscreteLaplaceSampleOptimized (num den : PNat) : SLang ℤ := do /-- ``SLang`` term to obtain a sample from the discrete Laplace distribution, optimized based on (num/den). - -FIXME: Extractor breaks when we move 50 to an external constant, even when we specify @[always_inline] -/ def DiscreteLaplaceSampleMixed (num den : PNat) (mix: ℕ) : SLang ℤ := do if num ≤ den * mix diff --git a/SampCert/Samplers/Uniform/Properties.lean b/SampCert/Samplers/Uniform/Properties.lean index fd7539bf..21a85d48 100644 --- a/SampCert/Samplers/Uniform/Properties.lean +++ b/SampCert/Samplers/Uniform/Properties.lean @@ -249,7 +249,7 @@ theorem UniformSample_HasSum_1 (n : PNat) : /-- Conversion of ``UniformSample`` from a ``SLang`` term to a ``PMF``. -/ -noncomputable def UniformSample_PMF (n : PNat) : PMF ℕ := ⟨ UniformSample n , UniformSample_HasSum_1 n⟩ +def UniformSample_PMF (n : PNat) : SPMF ℕ := ⟨ UniformSample n , UniformSample_HasSum_1 n⟩ /-- Evaluation of ``UniformSample`` on ``ℕ`` guarded by its support, when inside the support. diff --git a/Test.lean b/Test.lean index 930ddeb8..63dee216 100644 --- a/Test.lean +++ b/Test.lean @@ -6,6 +6,7 @@ Authors: Jean-Baptiste Tristan import SampCert import SampCert.SLang import SampCert.Samplers.Gaussian.Properties +import Init.Data.Float open SLang Std Int Array PMF @@ -173,7 +174,61 @@ def test (num den : ℕ+) (mix numSamples : ℕ) (threshold : Float) : IO Unit : IO.println s!"Kolmogorov distance = {D}" -def main : IO Unit := do +def query_tests : IO Unit := do + -- Generate list of 1000 numbers from 0 to 15 + let samples := 10000 + let unif_ub := 10 + let data : List ℕ <- List.mapM (fun _ => run <| (SLang.UniformSample_PMF unif_ub)) (List.replicate samples 0) + + let num : ℕ+ := 9 + let den : ℕ+ := 2 + let num_trials := 5 + + IO.println s!"[query] testing pure ({(num : ℕ)} / {(den : ℕ)})-DP queries" + IO.println s!"data := {samples} uniform samples of [0, {(unif_ub : ℕ)}): {(data.take 20)}..." + IO.println "" + + for i in [:num_trials] do + let ct <- run <| @privNoisedCount _ PureDPSystem laplace_pureDPSystem num den data + IO.println s!"#{i} count: {ct}" + IO.println "" + + let sum_bound : ℕ+ := 10 + for i in [:num_trials] do + let bs <- run <| @privNoisedBoundedSum PureDPSystem laplace_pureDPSystem sum_bound num den data + IO.println s!"#{i} bounded sum (bound = {(sum_bound : ℕ)}): {bs}" + IO.println "" + + for i in [:num_trials] do + let bs <- run <| @privNoisedBoundedMean PureDPSystem laplace_pureDPSystem sum_bound num den data + let float_bs := Float.div (Float.ofInt bs.1) (Float.ofInt bs.2) + IO.println s!"#{i} bounded mean (bound = {(sum_bound : ℕ)}): {bs} (~{float_bs})" + IO.println "" + + for i in [:num_trials] do + let h <- run <| @privNoisedHistogram numBins _ { bin := example_bin } PureDPSystem laplace_pureDPSystem num den data + let h' : List ℤ := h.count.toList.take 25 + IO.println s!"#{i} histogram : {h'}..." + IO.println "" + + let thresh := 100 + for i in [:num_trials] do + let m <- run <| @privMaxBinAboveThreshold numBins _ { bin := example_bin } PureDPSystem laplace_pureDPSystem num den thresh data + IO.println s!"#{i} max bin above threshold (threshold = {(thresh : ℤ)}): {m}" + IO.println "" + + let τ := 100 + for i in [:num_trials] do + let m <- run <| @privMeanHistogram PureDPSystem laplace_pureDPSystem numBins { bin := example_bin } unbin num den τ num den data + let m_float := + match m with + | none => none + | some bs => some (Float.div (Float.ofInt bs.1) (Float.ofInt bs.2)) + IO.println s!"#{i} (0.5x-privacy) histogram mean, τ = {τ}: {m} (~{m_float})" + IO.println "" + +def statistical_tests : IO Unit := do + IO.println s!"[samplers] statistical tests" let tests : List (ℕ+ × ℕ+ × ℕ) := [ -- (1,1,0), (1,1,7), @@ -188,3 +243,33 @@ def main : IO Unit := do for (num,den,mix) in tests do IO.println s!"num = {(num : ℕ)}, den = {(den : ℕ)}, mix = {mix}" test num den mix 100000 0.1 + +def sparseVector_tests : IO Unit := do + let samples := 10000 + let unif_ub := 100 + let data : List ℕ <- List.mapM (fun _ => run <| (SLang.UniformSample_PMF unif_ub)) (List.replicate samples 0) + + let num : ℕ+ := 1 + let den : ℕ+ := 4 + let num_trials := 5 + + IO.println s!"[query] testing sparse vector max, ({(num : ℕ)} / {(den : ℕ)})-DP" + IO.println s!"data := {samples} uniform samples of [0, {(unif_ub : ℕ)}): {(data.take 20)}..." + IO.println "" + + -- for i in [:num_trials] do + -- let ct <- run <| @sv0_privMax_PMF PureDPSystem laplace_pureDPSystem num den data + -- IO.println s!"#{i} sv0 max: {ct}" + -- IO.println "" + + for i in [:num_trials] do + let ct <- run <| privUnboundedMax num den data + IO.println s!"#{i} sv1 max: {ct}" + IO.println "" + + + +def main : IO Unit := do + sparseVector_tests + query_tests + statistical_tests diff --git a/Tests/Load.py b/Tests/Load.py index 8839dbae..48008712 100644 --- a/Tests/Load.py +++ b/Tests/Load.py @@ -16,20 +16,20 @@ # Loading Mathlib and its dependencies # To create the dynamic library, for each one of them, run lake build xxx:shared -CDLL("../.lake/packages/cli/.lake/build/lib/libCli.dylib", RTLD_GLOBAL) -CDLL("../.lake/packages/batteries/.lake/build/lib/libBatteries.dylib", RTLD_GLOBAL) -CDLL("../.lake/packages/aesop/.lake/build/lib/libAesop.dylib", RTLD_GLOBAL) -CDLL("../.lake/packages/proofwidgets/.lake/build/lib/libProofWidgets.dylib", RTLD_GLOBAL) -CDLL("../.lake/packages/qq/.lake/build/lib/libQq.dylib", RTLD_GLOBAL) -CDLL("../.lake/packages/importGraph/.lake/build/lib/libImportGraph.dylib", RTLD_GLOBAL) -CDLL("../.lake/packages/mathlib/.lake/build/lib/libMathlib.dylib", RTLD_GLOBAL) +CDLL(".lake/packages/cli/.lake/build/lib/libCli.dylib", RTLD_GLOBAL) +CDLL(".lake/packages/batteries/.lake/build/lib/libBatteries.dylib", RTLD_GLOBAL) +CDLL(".lake/packages/aesop/.lake/build/lib/libAesop.dylib", RTLD_GLOBAL) +CDLL(".lake/packages/proofwidgets/.lake/build/lib/libProofWidgets.dylib", RTLD_GLOBAL) +CDLL(".lake/packages/qq/.lake/build/lib/libQq.dylib", RTLD_GLOBAL) +CDLL(".lake/packages/importGraph/.lake/build/lib/libImportGraph.dylib", RTLD_GLOBAL) +CDLL(".lake/packages/mathlib/.lake/build/lib/libMathlib.dylib", RTLD_GLOBAL) # Loading SampCert's FFI -CDLL("../.lake/build/lib/libleanffi.dylib", RTLD_GLOBAL) +CDLL(".lake/build/lib/libleanffi.dylib", RTLD_GLOBAL) # # Loading SampCert -samplers = CDLL("../.lake/build/lib/libSampCert.dylib", RTLD_GLOBAL) +samplers = CDLL(".lake/build/lib/libSampCert.dylib", RTLD_GLOBAL) # samplers = CDLL("../.lake/build/lib/libSamplers.dylib", RTLD_GLOBAL) # Initialization @@ -44,8 +44,10 @@ lean.lean_io_mark_end_initialization() -# Initialization complete +def SampCertFII_dgs_get(num, denom, mix): + samplers.dgs_get(c_uint32(num),c_uint32(denom),c_uint32(mix)) -if __name__ == "__main__": - r = samplers.dgs_get(c_uint32(40),c_uint32(1),c_uint32(0)) - print(r) \ No newline at end of file +# Initialization complete +# if __name__ == "__main__": +# r = samplers.dgs_get(c_uint32(40),c_uint32(1),c_uint32(0)) +# print(r) diff --git a/Tests/benchmarks.py b/Tests/benchmarks.py index 1f067787..7c37aee5 100644 --- a/Tests/benchmarks.py +++ b/Tests/benchmarks.py @@ -2,31 +2,36 @@ # Benchmarking the discrete Gaussian sampler -import SampCert import matplotlib.pyplot as plt import timeit import secrets import numpy as np from datetime import datetime import tqdm as tqdm -from decimal import Decimal import argparse -from diffprivlib.mechanisms.base import bernoulli_neg_exp from diffprivlib.mechanisms import GaussianDiscrete +from discretegauss import sample_dgauss -from fractions import Fraction -from discretegauss import sample_dlaplace, sample_dgauss +import SampCert +from Load import SampCertFII_dgs_get +# from Load import samplers sampler = SampCert.SLang() rng = secrets.SystemRandom() -def gaussian_benchmarks(mix, warmup_attempts, measured_attempts, lb ,ub, quantity): +def gaussian_benchmarks(mix, warmup_attempts, measured_attempts, lb ,ub, quantity, inv): # Values of epsilon attempted sigmas = [] g = GaussianDiscrete(epsilon=0.01, delta=0.00001) + # Mixes to force the benchmark to work with algorithm 1 or algorithm 2 + mix = [0, 300, 7] + titles = ["DiscreteGaussianSample + Algorithm 1 ", + "DiscreteGaussianSample + Algorithm 2", + "DiscreteGaussianSample + Optimized "] + # SampCert means = [] @@ -35,6 +40,10 @@ def gaussian_benchmarks(mix, warmup_attempts, measured_attempts, lb ,ub, quantit means.append([]) stdevs.append([]) + # FFI SampCert + ffimeans = [] + ffistdevs = [] + # sample_dgauss ibm_dg_mean = [] ibm_dg_stdev = [] @@ -45,15 +54,24 @@ def gaussian_benchmarks(mix, warmup_attempts, measured_attempts, lb ,ub, quantit num_attempts = warmup_attempts + measured_attempts - for sigma in tqdm.tqdm(np.linspace(lb+0.001,ub,quantity)): + for sigma_ in tqdm.tqdm(range(lb,ub,quantity)): + if inv: + sigma = 1.0 / float(sigma_) + sigma_num = sigma_ + sigma_denom = ub + else: + sigma = sigma_ + sigma_num = sigma_ + sigma_denom = 1 + g._scale = sigma sigmas += [sigma] - sigma_num, sigma_denom = Decimal(sigma).as_integer_ratio() sigma_squared = sigma ** 2 times = [] + ffitimes = [] for i in mix: times.append([]) @@ -67,6 +85,12 @@ def gaussian_benchmarks(mix, warmup_attempts, measured_attempts, lb ,ub, quantit elapsed = timeit.default_timer() - start_time times[m].append(elapsed) + for i in range(num_attempts): + start_time = timeit.default_timer() + SampCertFII_dgs_get(sigma_num, sigma_denom, 7) + elapsed = timeit.default_timer() - start_time + ffitimes.append(elapsed) + for i in range(num_attempts): start_time = timeit.default_timer() sample_dgauss(sigma_squared, rng) @@ -85,6 +109,7 @@ def gaussian_benchmarks(mix, warmup_attempts, measured_attempts, lb ,ub, quantit measured.append(np.array(times[m][-measured_attempts:])) ibm_dg_measured = np.array(t_ibm_dg[-measured_attempts:]) ibm_dpl_measured = np.array(t_ibm_dpl[-measured_attempts:]) + ffi_measured = np.array(ffitimes[-measured_attempts:]) # Convert s to ms for m in range(len(mix)): @@ -94,41 +119,53 @@ def gaussian_benchmarks(mix, warmup_attempts, measured_attempts, lb ,ub, quantit ibm_dg_stdev.append(ibm_dg_measured.std() * 1000.0) ibm_dpl_mean.append(ibm_dpl_measured.mean() * 1000.0) ibm_dpl_stdev.append(ibm_dpl_measured.std() * 1000.0) + ffimeans.append(ffi_measured.mean() * 1000.0) + ffistdevs.append(ffi_measured.std() * 1000.0) - - fig,ax1 = plt.subplots() + fig,ax1 = plt.subplots(figsize=(7,5)) color = plt.cm.rainbow(np.linspace(0, 1, len(mix) + 2)) - for m in range(len(mix)): - ax1.plot(sigmas, means[m], color=color[2 + m], linewidth=1.0, label='DiscreteGaussianSample' + ' mix = ' + str(mix[m])) - ax1.fill_between(sigmas, np.array(means[m])-0.5*np.array(stdevs[m]), np.array(means[m])+0.5*np.array(stdevs[m]), - alpha=0.2, facecolor='k', linewidth=2, linestyle='dashdot', antialiased=True) + ibm_confidence = 1.96 * np.array(ibm_dg_mean) / np.sqrt(measured_attempts) + ax1.plot(sigmas, ibm_dg_mean, color=color[0], linewidth=1.0, label='sample_dgauss (Algorithm 1)', linestyle=(0, (3,1,1,1))) + ax1.fill_between(sigmas, np.array(ibm_dg_mean)-ibm_confidence, np.array(ibm_dg_mean)+ibm_confidence, + alpha=0.2, facecolor=color[0], linewidth=2, linestyle='solid', antialiased=True) + + ibm_dpl_confidence = 1.96 * np.array(ibm_dpl_mean) / np.sqrt(measured_attempts) + ax1.plot(sigmas, ibm_dpl_mean, color=color[1], linewidth=1.0, label='diffprivlib (Algorithm 2)', linestyle=(0, (5, 4))) + ax1.fill_between(sigmas, np.array(ibm_dpl_mean)-ibm_dpl_confidence, np.array(ibm_dpl_mean)+ibm_dpl_confidence, + alpha=0.2, facecolor=color[1], linewidth=2, linestyle='solid', antialiased=True) - ax1.plot(sigmas, ibm_dg_mean, color=color[0], linewidth=1.0, label='IBM sample_dgauss') - ax1.fill_between(sigmas, np.array(ibm_dg_mean)-0.5*np.array(ibm_dg_stdev), np.array(ibm_dg_mean)+0.5*np.array(ibm_dg_stdev), - alpha=0.2, facecolor='k', linewidth=2, linestyle='dashdot', antialiased=True) + linestyles = [(0, (5, 2)), 'dotted', 'solid'] + for m in range(len(mix)): + confidence = 1.96 * np.array(stdevs[m]) / np.sqrt(measured_attempts) + ax1.plot(sigmas, means[m], color=color[2 + m], linewidth=1.0, label=titles[m], linestyle=(linestyles[m])) + ax1.fill_between(sigmas, np.array(means[m])-confidence, np.array(means[m])+confidence, + alpha=0.2, facecolor=color[2 + m], linewidth=2, linestyle='solid', antialiased=True) - ax1.plot(sigmas, ibm_dpl_mean, color=color[1], linewidth=1.0, label='IBM diffprivlib') - ax1.fill_between(sigmas, np.array(ibm_dpl_mean)-0.5*np.array(ibm_dpl_stdev), np.array(ibm_dpl_mean)+0.5*np.array(ibm_dpl_stdev), - alpha=0.2, facecolor='k', linewidth=2, linestyle='dashdot', antialiased=True) + ffi_confidence = 1.96 * np.array(ffimeans) / np.sqrt(measured_attempts) + ax1.plot(sigmas, ffimeans, color='purple', linewidth=1.5, label='Compiled (Optimized)') + ax1.fill_between(sigmas, np.array(ffimeans)-ffi_confidence, np.array(ffimeans)+ffi_confidence, + alpha=0.2, facecolor='purple', linewidth=2, linestyle='solid', antialiased=True) ax1.set_xlabel("Sigma") ax1.set_ylabel("Sampling Time (ms)") plt.legend(loc = 'best') now = datetime.now() - filename = 'GaussianBenchmarks' + now.strftime("%H%M%S") + '.pdf' + filename = 'GaussianBenchmarks.pdf' + print(filename) plt.savefig(filename) if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--mix", nargs="+", type=int, help="mix", default=[0]) + parser.add_argument("--mix", nargs="+", type=int, help="mix", default=[7]) parser.add_argument("--warmup", type=int, help="warmup", default=0) parser.add_argument("--trials", type=int, help="trials", default=1000) parser.add_argument("--min", type=int, help="min", default=1) parser.add_argument("--max", type=int, help="max", default=500) parser.add_argument("--quantity", type=int, help="step", default=10) + parser.add_argument("--inv", default=False, action=argparse.BooleanOptionalAction) args = parser.parse_args() - gaussian_benchmarks(args.mix,args.warmup,args.trials,args.min,args.max,args.quantity) + gaussian_benchmarks(args.mix,args.warmup,args.trials,args.min,args.max,args.quantity,args.inv) diff --git a/Tests/discretegauss.py b/Tests/discretegauss.py new file mode 100644 index 00000000..706dd0fa --- /dev/null +++ b/Tests/discretegauss.py @@ -0,0 +1,305 @@ +# Implementation of exact discrete gaussian distribution sampler +# See https://arxiv.org/abs/2004.00010 +# - Thomas Steinke dgauss@thomas-steinke.net 2020 + +import random #Default random number generator, +#random.SecureRandom() provides high-quality randomness from /dev/urandom or similar +from fractions import Fraction #we will work with rational numbers + +#sample uniformly from range(m) +#all randomness comes from calling this +def sample_uniform(m,rng): + assert isinstance(m,int) #python 3 + #assert isinstance(m,(int,long)) #python 2 + assert m>0 + return rng.randrange(m) + +#sample from a Bernoulli(p) distribution +#assumes p is a rational number in [0,1] +def sample_bernoulli(p,rng): + assert isinstance(p,Fraction) + assert 0 <= p <= 1 + m=sample_uniform(p.denominator,rng) + if m < p.numerator: + return 1 + else: + return 0 + +#sample from a Bernoulli(exp(-x)) distribution +#assumes x is a rational number in [0,1] +def sample_bernoulli_exp1(x,rng): + assert isinstance(x,Fraction) + assert 0 <= x <= 1 + k=1 + while True: + if sample_bernoulli(x/k,rng)==1: + k=k+1 + else: + break + return k%2 + +#sample from a Bernoulli(exp(-x)) distribution +#assumes x is a rational number >=0 +def sample_bernoulli_exp(x,rng): + assert isinstance(x,Fraction) + assert x >= 0 + #Sample floor(x) independent Bernoulli(exp(-1)) + #If all are 1, return Bernoulli(exp(-(x-floor(x)))) + while x>1: + if sample_bernoulli_exp1(Fraction(1,1),rng)==1: + x=x-1 + else: + return 0 + return sample_bernoulli_exp1(x,rng) + +#sample from a geometric(1-exp(-x)) distribution +#assumes x is a rational number >= 0 +def sample_geometric_exp_slow(x,rng): + assert isinstance(x,Fraction) + assert x >= 0 + k=0 + while True: + if sample_bernoulli_exp(x,rng)==1: + k=k+1 + else: + return k + +#sample from a geometric(1-exp(-x)) distribution +#assumes x >= 0 rational +def sample_geometric_exp_fast(x,rng): + assert isinstance(x,Fraction) + if x==0: return 0 #degenerate case + assert x>0 + + t=x.denominator + while True: + u=sample_uniform(t,rng) + b=sample_bernoulli_exp(Fraction(u,t),rng) + if b==1: + break + v=sample_geometric_exp_slow(Fraction(1,1),rng) + value = v*t+u + return value//x.numerator + +#sample from a discrete Laplace(scale) distribution +#Returns integer x with Pr[x] = exp(-abs(x)/scale)*(exp(1/scale)-1)/(exp(1/scale)+1) +#casts scale to Fraction +#assumes scale>=0 +def sample_dlaplace(scale,rng=None): + if rng is None: + rng = random.SystemRandom() + scale = Fraction(scale) + assert scale >= 0 + while True: + sign=sample_bernoulli(Fraction(1,2),rng) + magnitude=sample_geometric_exp_fast(1/scale,rng) + if sign==1 and magnitude==0: continue + return magnitude*(1-2*sign) + +#compute floor(sqrt(x)) exactly +#only requires comparisons between x and integer +def floorsqrt(x): + assert x >= 0 + #a,b integers + a=0 #maintain a^2<=x + b=1 #maintain b^2>x + while b*b <= x: + b=2*b #double to get upper bound + #now do binary search + while a+1=0 +def sample_dgauss(sigma2,rng=None): + if rng is None: + rng = random.SystemRandom() + sigma2=Fraction(sigma2) + if sigma2==0: return 0 #degenerate case + assert sigma2 > 0 + t = floorsqrt(sigma2)+1 + while True: + candidate = sample_dlaplace(t,rng=rng) + bias=((abs(candidate)-sigma2/t)**2)/(2*sigma2) + if sample_bernoulli_exp(bias,rng)==1: + return candidate + +######################################################################### +#DONE That's it! Now some utilities + +import math #need this, code below is no longer exact + +#Compute the normalizing constant of the discrete gaussian +#i.e. sum_{x in Z} exp(-x^2/2sigma2) +#By Poisson summation formula, this is equivalent to +# sqrt{2*pi*sigma2}*sum_{y in Z} exp(-2*pi^2*sigma2*y^2) +#For small sigma2 the former converges faster +#For large sigma2, the latter converges faster +#crossover at sigma2=1/2*pi +#For intermediate sigma2, this code will compute both and check +def normalizing_constant(sigma2): + original=None + poisson=None + if sigma2<=1: + original = 0 + x=1000 #summation stops at exp(-x^2/2sigma2)<=exp(-500,000) + while x>0: + original = original + math.exp(-x*x/(2.0*sigma2)) + x = x - 1 #sum from small to large for improved accuracy + original = 2*original + 1 #symmetrize and add x=0 + if sigma2*100 >= 1: + poisson = 0 + y = 1000 #summation stops at exp(-y^2*2*pi^2*sigma2)<=exp(-190,000) + while y>0: + poisson = poisson + math.exp(-math.pi*math.pi*sigma2*2*y*y) + y = y - 1 #sum from small to large + poisson = math.sqrt(2*math.pi*sigma2)*(1+2*poisson) + if poisson is None: return original + if original is None: return poisson + #if we have computed both, check equality + scale = max(1,math.sqrt(2*math.pi*sigma2)) #tight-ish lower bound on constant + assert -1e-15*scale <= original-poisson <= 1e-15*scale + #10^-15 is about as much precision as we can expect from double precision floating point numbers + #64-bit float has 56-bit mantissa 10^-15 ~= 2^-50 + return (original+poisson)/2 + +#compute the variance of discrete gaussian +#mean is zero, thus: +#var = sum_{x in Z} x^2*exp(-x^2/(2*sigma2)) / normalizing_constant(sigma2) +#By Poisson summation formula, we have equivalent expression: +# variance(sigma2) = sigma2 * (1 - 4*pi^2*sigma2*variance(1/(4*pi^2*sigma2)) ) +#See lemma 20 https://arxiv.org/pdf/2004.00010v3.pdf#page=17 +#alternative expression converges faster when sigma2 is large +#crossover point (in terms of convergence) is sigma2=1/(2*pi) +#for intermediate values of sigma2, we compute both expressions and check +def variance(sigma2): + original=None + poisson=None + if sigma2<=1: #compute primary expression + original=0 + x = 1000 #summation stops at exp(-x^2/2sigma2)<=exp(-500,000) + while x>0: #sum from small to large for improved accuracy + original = original + x*x*math.exp(-x*x/(2.0*sigma2)) + x=x-1 + original = 2*original/normalizing_constant(sigma2) + if sigma2*100>=1: + poisson=0 #we will compute sum_{y in Z} y^2 * exp(-2*pi^2*sigma2*y^2) + y=1000 #summation stops at exp(-y^2*2*pi^2*sigma2)<=exp(-190,000) + while y>0: #sum from small to large + poisson = poisson + y*y*math.exp(-y*y*2*sigma2*math.pi*math.pi) + y=y-1 + poisson = 2*poisson/normalizing_constant(1/(4*sigma2*math.pi*math.pi)) + #next convert from variance(1/(4*pi^2*sigma2)) to variance(sigma2) + poisson = sigma2*(1-4*sigma2*poisson*math.pi*math.pi) + if original is None: return poisson + if poisson is None: return original + #if we have computed both check equality + assert -1e-15*sigma2 <= original-poisson <= 1e-15*sigma2 + return (original+poisson)/2 + +######################################################################### +#DONE Now some basic testing code + +import matplotlib.pyplot as plt #only needed for testing +import time #only needed for testing + +#This generates n samples from sample_dgauss(sigma2) +#It times this and releases statistics +#produces a histogram plot if plot==True +#if plot==None it will only produce a histogram if it's not too large +#can save image instead of displaying by specifying a path e.g., save="plot.png" +def plot_histogram(sigma2,n,save=None,plot=None): + #generate samples + before=time.time() + samples = [sample_dgauss(sigma2) for i in range(n)] + after=time.time() + print("generated "+str(n)+" samples in "+str(after-before)+" seconds ("+str(n/(after-before))+" samples per second) for sigma^2="+str(sigma2)) + #now process + samples.sort() + values=[] + counts=[] + counter=None + prev=None + for sample in samples: + if prev is None: #initializing + prev=sample + counter=1 + elif sample==prev: #still same element + counter=counter+1 + else: + #add prev to histogram + values.append(prev) + counts.append(counter) + #start counting + prev=sample + counter=1 + #add final value + values.append(prev) + counts.append(counter) + + #print & sum + sum=0 + sumsquared=0 + kl=0 #compute KL divergence betwen empirical distribution and true distribution + norm_const=normalizing_constant(sigma2) + true_var=variance(sigma2) + for i in range(len(values)): + if len(values)<=100: #don't print too much + print(str(values[i])+":\t"+str(counts[i])) + sum = sum + values[i]*counts[i] + sumsquared = sumsquared + values[i]*values[i]*counts[i] + kl = kl + counts[i]*(math.log(counts[i]*norm_const/n)+values[i]*values[i]/(2.0*sigma2)) + mean = Fraction(sum,n) + var=Fraction(sumsquared,n) + kl=kl/n + print("mean="+str(float(mean))+" (true=0)") + print("variance="+str(float(var))+" (true="+str(true_var)+")") + print("KL(empirical||true)="+str(kl)) # https://en.wikipedia.org/wiki/G-test + assert kl>0 #kl divergence always >=0 and ==0 iff empirical==true, which is impossible + #now plot + if plot is None: + plot = (len(values)<=1000) #don't plot if huge + if not plot: return + ideal_counts = [n*math.exp(-x*x/(2.0*sigma2))/norm_const for x in values] + plt.bar(values, counts) + plt.plot(values, ideal_counts,'r') + plt.title("Histogram of samples from discrete Gaussian\nsigma^2="+str(sigma2)+" n="+str(n)) + if save is None: + plt.show() + else: + plt.savefig(save) + plt.clf() + +if __name__ == '__main__': + print("This is the discrete Gaussian sampler") + print("See the paper https://arxiv.org/abs/2004.00010") + print("Now running some basic testing code") + print("Start by calculating normalizing constant and variance for different values") + #some test code for normalizing_constant and variance functions + for sigma2 in [0.1**100,0.1**6,0.001,0.01,0.03,0.05,0.08,0.1,0.15,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,10,100,10**6,10**20,10**100]: + #internal asserts do some testing when 0.01<=sigma2<=1 + c=normalizing_constant(sigma2) + v=variance(sigma2) + #print + print("sigma^2="+str(sigma2) + ":\tnorm_const=" + str(c) + "=sqrt{2*pi}*sigma*" + str(c/math.sqrt(2*math.pi*sigma2)) + "\tvar=" + str(v)) + #print a few samples + #for i in range(20): print sample_dgauss(1) + #plot histogram and statistics + #includes timing + print("Now run the sampler") + print("Start with very large sigma^2=10^100 -- for timing purposes only") + plot_histogram(10**100,100000,plot=False) #large var, this will just be for timing + print("Now sigma^2=10 -- will display a histogram") + plot_histogram(10,100000) #small var, this will produce plot + diff --git a/Tests/run_benchmark.sh b/Tests/run_benchmark.sh new file mode 100644 index 00000000..41035760 --- /dev/null +++ b/Tests/run_benchmark.sh @@ -0,0 +1,3 @@ +#/bin/sh +python3 /home/lean/SampCert/Tests/SampCert-py/benchmarks.py --max=50 --quantity=1 --warmup=200 --trials=1000 +#python3 Tests/SampCert-py/benchmarks.py --max=50 --quantity=1 --warmup=200 --trials=1000 diff --git a/paper_mapping.md b/paper_mapping.md new file mode 100644 index 00000000..06b3f1e0 --- /dev/null +++ b/paper_mapping.md @@ -0,0 +1,75 @@ +## Reference from the paper to the code + +| § | Kind | Item from paper | File | Name | Note | +|----------|-------------|----------------------------------------|---------------------------------------------------------------------------|------------------------------------------|------------------------| +| 1 | Folder | Library of DP mechanisms | [SampCert/DifferentialPrivacy/Queries/] | | | +| | Folder | Sampling algorithms | [SampCert/Samplers/] | | | +| | Folder | Pure DP Instantiation | [SampCert/DifferentialPrivacy/Pure] | | | +| | Folder | zCDP Instantiation | [SampCert/DifferentialPrivacy/ZeroConcentrated] | | | +| 2 | Definition | Query | [SampCert/DifferentialPrivacy/Generic.lean] | `Query` | | +| | Definition | Mechanism | [SampCert/DifferentialPrivacy/Generic.lean] | `Mechanism` | Listing 1 | +| | Definition | Neighbour | [SampCert/DifferentialPrivacy/Neighbours.lean] | `Neighbour` | | +| | Definition | `AbstractDP` | [SampCert/DifferentialPrivacy/Abstract.lean] | `DPSystem` | Listing 2 | +| | Definition | `privComposeAdaptive` | [SampCert/DifferentialPrivacy/Generic.lean] | `privComposeAdaptive` | Listing 1 | +| | Definition | `privPostProcess` | [SampCert/DifferentialPrivacy/Generic.lean] | `privPostProcess` | Listing 1 | +| | Definition | `privConst` | [SampCert/DifferentialPrivacy/Generic.lean] | `privConst` | Listing 1 | +| | Definition | `DPNoise` | [SampCert/DifferentialPrivacy/Abstract.lean] | `DPNoise` | Listing 3 | +| | Definition | `ApproximateDP` | [SampCert/DifferentialPrivacy/Approximate/DP.lean] | `ApproximateDP` | | +| | Definition | Sensitivity | [SampCert/DifferentialPrivacy/Sensitivity.lean] | `sensitivity` | | +| | Example | Private histogram | [SampCert/DifferentialPrivacy/Queries/Histogram/] | | | +| | Definition | Private histogram implementation | [SampCert/DifferentialPrivacy/Queries/Histogram/Code.lean] | | Listing 4 | +| | Definition | Private histogram `Bins` | [SampCert/DifferentialPrivacy/Queries/Histogram/Code.lean] | `Bins` | | +| | Definition | Private histogram `Histogram` | [SampCert/DifferentialPrivacy/Queries/Histogram/Code.lean] | `Histogram` | | +| | Lemma | Noised count ADP | [SampCert/DifferentialPrivacy/Queries/Histogram/Properties.lean] | `privNoisedBinCount_DP` | Listing 5 | +| | Lemma | Noised histogram aux ADP | [SampCert/DifferentialPrivacy/Queries/Histogram/Properties.lean] | `privNoisedHistogramAux_DP` | Listing 6 | +| | Lemma | Private histogram ADP | [SampCert/DifferentialPrivacy/Queries/Histogram/Properties.lean] | `privNoisedHistogram_DP` | Listing 7 | +| | Definition | Pure DP | [SampCert/DifferentialPrivacy/Pure/DP.lean] | `pureDP` | | +| | Definition | Discrete Laplace mechanism | [SampCert/DifferentialPrivacy/Pure/Mechanism/Code.lean] | `privNoisedQueryPure` | | +| | Lemma | Pure DP of discrete Laplace mechanism | [SampCert/DifferentialPrivacy/Pure/Mechanism/Properties.lean] | `privNoisedQueryPure_DP` | | +| | Lemma | Pure DP adaptive composition bound | [SampCert/DifferentialPrivacy/Pure/AdaptiveComposition.lean] | `PureDP_ComposeAdaptive'` | | +| | Lemma | Pure DP postprocessing bound | [SampCert/DifferentialPrivacy/Pure/Postprocessing.lean] | `PureDP_PostProcess` | | +| | Lemma | Pure DP implies approximate DP | [SampCert/DifferentialPrivacy/Pure/DP.lean] | `pure_ApproximateDP` | | +| | Instance | `AbstractDP` instance for Pure DP | [SampCert/DifferentialPrivacy/Pure/System.lean] | `PureDPSystem` | | +| | Instance | `DPNoise` instance for Pure DP | [SampCert/DifferentialPrivacy/Pure/System.lean] | `laplace_pureDPSystem` | | +| | Definition | Zero-concentrated DP | [SampCert/DifferentialPrivacy/ZeroConcentrated/DP.lean] | `zCDP` | | +| | Definition | Discrete Gaussian mechanism | [SampCert/DifferentialPrivacy/ZeroConcentrated/Mechanism/Code.lean] | `privNoisedQuery` | | +| | Lemma | zCDP of discrete Gaussian mechanism | [SampCert/DifferentialPrivacy/ZeroConcentrated/Mechanism/Properties.lean] | `privNoisedQuery_zCDP` | | +| | Lemma | zCDP adaptive composition bound | [SampCert/DifferentialPrivacy/ZeroConcentrated/AdaptiveComposition.lean] | `privComposeAdaptive_zCDP ` | | +| | Lemma | zCDP postprocessing bound | [SampCert/DifferentialPrivacy/ZeroConcentrated/Postprocessing.lean] | `privPostProcess_zCDP` | | +| | Lemma | zCDP implies approximate DP | [SampCert/DifferentialPrivacy/ZeroConcentrated/DP.lean] | `zCDP_ApproximateDP` | | +| | Instance | `AbstractDP` instance for zCDP | [SampCert/DifferentialPrivacy/ZeroConcentrated/System.lean] | `zCDPSystem` | | +| | Instance | `DPNoise` instance for zCDP | [SampCert/DifferentialPrivacy/ZeroConcentrated/System.lean] | `gaussian_zCDPSystem` | | +| 3 | Definition | SLang primitives | [SampCert/SLang.lean] | | Figure 3 | +| | Definition | Geometric sampler | [SampCert/Samplers/Geometric/Code.lean] | `probGeometric` | Listing 8 | +| | Lemma | Geometric sampler correctness proof | [SampCert/Samplers/Geometric/Properties.lean] | `probGeometric_apply` | | +| | Lemma | Geometric sampler normalization proof | [SampCert/Samplers/Geometric/Properties.lean] | `probGeometric_normalizes` | | +| | Definition | Laplace sampler | [SampCert/Samplers/Laplace/Code.lean] | `DiscreteLaplaceSample` | Listing 9 | +| | Definition | Optimized Laplace sampler | [SampCert/Samplers/Laplace/Code.lean] | `DiscreteLaplaceOptimized` | Listing 10 | +| | Definition | Dynamically switching Laplace sampler | [SampCert/Samplers/Laplace/Code.lean] | `DiscreteLaplaceSampleMixed` | | +| | Lemma | Laplace sampler correctness proof | [SampCert/Samplers/Laplace/Properties.lean] | `DiscreteLaplaceSample_apply` | | +| | Lemma | Laplace sampler equivalences | [SampCert/Samplers/Laplace/Properties.lean] | `DiscreteLaplaceSample_equiv` | | +| | Lemma | Laplace sampler normalizes | [SampCert/Samplers/Laplace/Properties.lean] | `DiscreteLaplaceSampleMixed_normalizes ` | | +| | Definition | Discrete Gaussian sampler | [SampCert/Samplers/Gaussian/Code.lean] | `DiscreteGaussianSample` | Listing 11 | +| | Definition | Discrete Gaussian correctness | [SampCert/Samplers/Gaussian/Properties.lean] | `DiscreteGaussianSample_apply` | | +| | Definition | Discrete Gaussian normalizes | [SampCert/Samplers/Gaussian/Properties.lean] | `DiscreteGaussianSample_normalizes` | | +| 4 | C++ Program | SLang FFI implementations | [ffi.cpp] | | Listing 12, Listing 20 | +| Appendix | Definition | Generic parallel composition | [SampCert/DifferentialPrivacy/Generic.lean] | `privParComp` | | +| | Lemma | Parallel composition typeclass | [SampCert/DifferentialPrivacy/Abstract.lean] | `DPPar` | | +| | Definition | `DPPar` instance for Pure DP | [SampCert/DifferentialPrivacy/Pure/System.lean] | `PureDPParSystem` | | +| | Folder | AboveThresh | [SampCert/DifferentialPrivacy/AboveThresh/] | | | +| | Definition | AboveThresh, executable implementation | [SampCert/DifferentialPrivacy/AboveThresh/Code.lean] | `sv1_AboveThresh` | Listing 13 | +| | Definition | AboveThresh, private form | [SampCert/DifferentialPrivacy/AboveThresh/Properties.lean] | `sv9_AboveThresh` | Listing 14 | +| | Definition | AboveThresh, SPMF | [SampCert/DifferentialPrivacy/AboveThresh/Properties.lean] | `sv9_AboveThresh_SPMF` | | +| | Lemma | AboveThresh pure DP | [SampCert/DifferentialPrivacy/AboveThresh/Privacy.lean] | `sv9_AboveThresh_pmf_DP` | | +| | File | Sparse Vector Technique | [SampCert/DifferentialPrivacy/Sparse/Code.lean] | | Listing 15 | +| | File | Sparse Vector Technique DP | [SampCert/DifferentialPrivacy/Sparse/Privacy.lean] | | Listing 16 | +| | Lemma | DP implies zCDP | [SampCert/DifferentialPrivacy/ZeroConcentrated/DP.lean] | `ofDP` | | +| | Definition | Parallel Composition | [SampCert/DifferentialPrivacy/Generic.lean] | `privParComp` | Listing 17 | +| | Definition | Abstract Parallel Composition | [SampCert/DifferentialPrivacy/Abstract.lean] | `AbstractParDP` | Listing 18 | +| | Lemma | Parallel Histogram | [SampCert/DifferentialPrivacy/Queries/ParHistogram/Code.lean] | `privParNoisedHistogram` | Listing 19 | +| | File | Parallel Histogram DP | [SampCert/DifferentialPrivacy/Queries/ParHistogram/Properties.lean] | | | +| | File | | [SampCert/DifferentialPrivacy/Queries/ParHistogram/Properties.lean] | `Tests/SampCert.dfy` | Listing 20 (Note 1) | + +Note 1: Test code must be built beforehand (`lake build FastExtract`) + +