diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 00000000..13f68a1d --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,20 @@ +{ + "configurations": [ + { + "name": "Win32", + "includePath": [ + "${workspaceFolder}/**" + ], + "defines": [ + "_DEBUG", + "UNICODE", + "_UNICODE" + ], + "compilerPath": "C:\\msys64\\ucrt64\\bin\\gcc.exe", + "cStandard": "c17", + "cppStandard": "gnu++17", + "intelliSenseMode": "windows-gcc-x64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..70e34ecb --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "C_Cpp.errorSquiggles": "disabled" +} \ No newline at end of file diff --git a/README.md b/README.md index f0c5ca2d..d8d89c28 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,72 @@ -# SampCert +# Verifying Differential Privacy in Lean -A verified implementation using [Lean](https://github.com/leanprover/lean4) and [Mathlib](https://github.com/leanprover-community/mathlib4) of randomized algorithms including [the discrete Gaussian sampler for differential privacy](https://arxiv.org/abs/2004.00010), key results in [zero concentrated differential privacy](https://arxiv.org/abs/1605.02065), and [some verified (unbounded) private queries](https://arxiv.org/pdf/1909.01917). +The [SampCert](https://github.com/leanprover/SampCert) project (de Medeiros et al. 2024) was created to implement and formalize differential privacy notions in Lean. It provided support for various notions of differential privacy, a framework for implementing differentially private mechanisms via verified sampling algorithms, and implemented several differentially private algorithms, including the Gaussian and Laplace mechanisms. -SampCert is deployed and used in the [AWS Clean Rooms Differential Privacy service](https://docs.aws.amazon.com/clean-rooms/latest/userguide/differential-privacy.html#dp-overview). SampCert proves deep properties about some of its randomized algorithm and makes heavy use of Mathlib. For example, we use theorems such as [the Poisson summation formula](https://leanprover-community.github.io/mathlib4_docs/Mathlib/Analysis/Fourier/PoissonSummation.html#Real.tsum_eq_tsum_fourierIntegral_of_rpow_decay). +We build upon SampCert, creating support for the local model using [Lean](https://lean-lang.org/) and the extensive [Mathlib](https://github.com/leanprover-community/mathlib4) library. We also implement the [randomized response](https://www.tandfonline.com/doi/abs/10.1080/01621459.1965.10480775) and [one-time basic RAPPOR](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/42852.pdf) mechanisms, as well as a more robust post-processing property for randomized mappings. We additionally move towards an implementation of the shuffle model. -The principal developer of SampCert is [Jean-Baptiste Tristan](https://jtristan.github.io/). It is also developed by [Markus de Medeiros](https://www.markusde.ca/). +Our implementations are in `SampCert/DifferentialPrivacy/Pure`. Our additions are in the Local folder, Shuffle Model folder, and a few separate files in the Pure folder. -Other people have contributed important ideas or tools for deployment including (in no particular order): Leo de Moura, Anjali Joshi, Joseph Tassarotti, Stefan Zetzsche, Aws Albharghouti, Muhammad Naveed, Tristan Ravitch, Fabian Zaiser, Tomas Skrivan. +### Local -To cite SampCert you can currently use the following reference: -``` -@software{Tristan_SampCert_Verified_2024, -author = {Tristan, Jean-Baptiste}, -doi = {10.5281/zenodo.11204806}, -month = may, -title = {{SampCert : Verified Differential Privacy}}, -url = {https://github.com/leanprover/SampCert}, -version = {1.0.0}, -year = {2024} -} -``` +#### BinomialSample + +We implement and show normalization for a sampler for the Binomial distribution. + +#### LocalDP + +The files in this folder provide a variety of definitions to support the local model of differential privacy. We create a definition of differential privacy that is parametrized by a neighbour relation, thus generalizing the SampCert definition. For the local model, we use the `DP_withUpdateNeighbour` definition of neighboring datasets, which considers two lists to be neighbouring if they differ in the update of a single entry. + +We also show that if an $\epsilon$-local randomizer is extended to a dataset via monadic map, the resulting algorithm is $\epsilon$-differentially private. This is proven in `LocalDP_toDataset.` + +#### MultiBernoulli + +The files here implement and show normalization for the "multi-Bernoulli" distribution, which we define to be the distribution associated with flipping each entry of a list independently and with the same probability. + +#### RAPPOR + +In this folder, we provide an implementation of basic one-time RAPPOR and give a proof of differential privacy. The file `Definitions.lean` provides our implementation of RAPPOR. The file `Properties/DPProof.lean` provides a proof that our implementation of RAPPOR is differentially private. + +#### RandomizedResponse + +In this folder, we provide an implementation of randomized response and give a proof of differential privacy. The file `Definitions.lean` provides our implementation of randomized response. The file `Properties/DPProof.lean` provides a proof that our implementation of randomized response is differentially private. + +#### ProbabilityProduct.lean + +We show that the probability of generating a list of outputs is equal to the product of the probabilities of generating each output independently. This is used to prove our `LocalDP_toDataset` lemma. +#### PushForward.lean + +We prove that the push-forward of a probability measure is a probability measure. A similar statement already exists in SampCert, but our rephrasing was slightly more convenient for our purposes. +#### Reduction.lean + +We prove a helper lemma that is used in proving a local algorithm is DP. It allows us to reduce the problem to just considering the local randomizer. + +### Shuffle model + +This folder contains the implementation and properties of the shuffle model. + +#### Definitions.lean + +This contains the definition of the shuffle model. The definition `Shuffler` is the randomized permutation. We implement the shuffle algorithm with randomized response in `RRShuffle`, and provide a more general definition in `ShuffleAlgorithm`. + +#### Properties.lean + +This instantiates the shuffle algorithm as a PMF, and show that our algorithm for a random permutation that is indeed uniformly random. In the theorem `ShuffleAlgorithm_is_DP`, we show that shuffling the output of a differentially-private algorithm does not worsen the differential privacy bound. + +#### ENNRealLemmaSuite.lean + +A significant challenge in our work was proving arithemetic statements over extended non-negative reals. Mathlib currently provides less support for arithmetic in the ENNReal type, so oftentimes we have to prove statements ourselves. These statements are in this file. + +#### LawfulMonadSLang.lean + +We instantiate SLang as a LawfulMonad. + +#### Normalization.lean + +We prove a lemma showing that the mass of a distribution is preserved under monadic map. This is used for both of the local algorithms that we implemented. + +### RandomizedPostProcess.lean + +This file provides the Lean proof about the post-processing property of differential privacy: If there is an $\epsilon$-DP mechanism $M: X \rightarrow W$ and a (possibly randomized) mapping $F: W \rightarrow Z$, then $F \circ M$ is $\epsilon$-DP. The case where $F$ is deterministic is implemented in SampCert. We implement the case where $F$ is random. The result is in the lemma +``randPostProcess_DP_bound_with_UpdateNeighbour``. + +We would like to thank SampCert for motivating and being the basis for our project. We would also like to thank Google for sponsoring and mentoring this project, and the Institute of Pure and Applied Mathematics (IPAM) for supporting and hosting our work. diff --git a/SampCert/DifferentialPrivacy/Neighbours.lean b/SampCert/DifferentialPrivacy/Neighbours.lean index 1d556e5f..8cfeee25 100644 --- a/SampCert/DifferentialPrivacy/Neighbours.lean +++ b/SampCert/DifferentialPrivacy/Neighbours.lean @@ -16,14 +16,13 @@ variable {T : Type} /-- Lists which differ by the inclusion or modification of an element. -This is SampCert's private property. +This is SampCert's private property. -/ inductive Neighbour (l₁ l₂ : List T) : Prop where | Addition : l₁ = a ++ b → l₂ = a ++ [n] ++ b → Neighbour l₁ l₂ | Deletion : l₁ = a ++ [n] ++ b → l₂ = a ++ b → Neighbour l₁ l₂ | Update : l₁ = a ++ [n] ++ b → l₂ = a ++ [m] ++ b -> Neighbour l₁ l₂ - /-- Neighbour relation is symmetric. -/ diff --git a/SampCert/DifferentialPrivacy/Pure/Local/BinomialSample/Binomial.lean b/SampCert/DifferentialPrivacy/Pure/Local/BinomialSample/Binomial.lean new file mode 100644 index 00000000..ac5cc1e7 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/BinomialSample/Binomial.lean @@ -0,0 +1,32 @@ +import SampCert.DifferentialPrivacy.Pure.Local.LocalDP.DPwithUpdateNeighbour +import SampCert.DifferentialPrivacy.Pure.Local.MultiBernoulli.Code +import SampCert.DifferentialPrivacy.Pure.Local.MultiBernoulli.Properties +import SampCert.DifferentialPrivacy.Pure.Local.PushForward + +namespace SLang + +def BinomialSample (seed: MultiBernoulli.SeedType)(n:PNat) := do + let seeds := List.replicate n seed + let list ← MultiBernoulli.MultiBernoulliSample (seeds) + let k := List.count true list + return k + +theorem BinomialSample_norms [LawfulMonad SLang] (seed : MultiBernoulli.SeedType) (n : PNat) : + HasSum (BinomialSample seed n) 1 := by + rw [BinomialSample] + simp + unfold probBind + simp [Summable.hasSum_iff ENNReal.summable] + have h: (push_forward (MultiBernoulli.MultiBernoulliSample (List.replicate (↑n) seed)) + (fun (a : List Bool) => (List.count true a))) = (fun (b : Nat) => + (∑' (a : List Bool), if b = List.count true a then MultiBernoulli.MultiBernoulliSample + (List.replicate (↑n) seed) a else 0)) := by + unfold push_forward + rfl + + rw [← h] + rw [push_forward_prob_is_prob] + simp [MultiBernoulli.MultiBernoulliSample_normalizes] + + def BinomialSample_PMF [LawfulMonad SLang] (seed : MultiBernoulli.SeedType) (n : PNat) : PMF ℕ := + ⟨BinomialSample seed n, BinomialSample_norms seed n⟩ diff --git a/SampCert/DifferentialPrivacy/Pure/Local/ENNRealLemmasSuite.lean b/SampCert/DifferentialPrivacy/Pure/Local/ENNRealLemmasSuite.lean new file mode 100644 index 00000000..40350945 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/ENNRealLemmasSuite.lean @@ -0,0 +1,375 @@ +import SampCert + +namespace ENNRealLemmas + +/- Lemmas mostly having to do with mathematically trivial arithmetic in the + extended non-negative reals. +-/ + +lemma pnat_zero_imp_false (den : PNat): (den : Nat) = 0 -> False := by aesop + +lemma tsum_equal_comp {α β: Type} [AddCommMonoid β] [TopologicalSpace β] (f g : α -> β) (h: ∀i : α, f i = g i ): + ∑' (i : α), f i = ∑' (i : α), g i := by simp_all + +lemma simplifier_3 {β : Type} [DecidableEq β] (f : T -> SLang β) (c : List β) (a b : β): +(∑' (a_1 : List β), if b = a ∧ c = a_1 then mapM f tl a_1 else 0) = if b = a then mapM f tl c else 0 := by +rw[tsum_eq_single c] +aesop +aesop + +lemma ennreal_mul_eq (a b c : ENNReal): a = b -> c * a = c * b := by + intro h + rw[h] + +lemma ennreal_div_one (a: ENNReal) : a / 1 = a := by simp_all only [div_one] + +lemma ennreal_mul_assoc (a b c : ENNReal): a * c + b * c = (a + b) * c := by ring + +lemma le_add_non_zero (a b :ENNReal)(h: b ≠ 0)(h2: a ≠ ⊤): a < a+b := by + exact ENNReal.lt_add_right h2 h + +lemma sub_le_add_ennreal (a b :ENNReal)(h1: b ≠ 0)(h3: b ≤ a)(h4: a ≠ ⊤): a -b < a +b := by + apply ENNReal.sub_lt_of_lt_add + exact h3 + rw [add_assoc] + apply le_add_non_zero + simp_all only [ne_eq, add_eq_zero, and_self, not_false_eq_true] + exact h4 + +lemma mult_ne_zero (a b : ENNReal) (h1 : a ≠ 0) (h2 : b ≠ 0): a * b ≠ 0 := by aesop + +lemma ineq_coercion (num : Nat) (den : PNat) (h : 2 * num < den): +2 * (@Nat.cast ENNReal NonAssocSemiring.toNatCast num) < @Nat.cast ENNReal CanonicallyOrderedCommSemiring.toNatCast ↑den := + by norm_cast + +lemma mult_ne_top (a b : ENNReal) (h1 : a ≠ ⊤) (h2 : b ≠ ⊤): a * b ≠ ⊤ := by + rw [@Ne.eq_def] + intro h + rw [ENNReal.mul_eq_top] at h + cases h with + | inl h => + apply And.right at h + contradiction + | inr h => + apply And.left at h + contradiction + +lemma Finset.prod_ne_top (n : Nat) (f : ℕ -> ENNReal) (h : ∀ i, f i ≠ ⊤): + ∏ (i : Fin n), f i.val ≠ ⊤ := by + induction n with + | zero => simp + | succ m ih => + rw [@Fin.prod_univ_add] + apply mult_ne_top + apply ih + rw [@Fin.prod_univ_one] + apply h + +lemma Finset.prod_ne_top_fin (n : Nat) (f : Fin n -> ENNReal) (h : ∀ i, f i ≠ ⊤): + ∏ (i : Fin n), f i ≠ ⊤ := by + cases hn : n == 0 with +| false => + have hn1: n > 0 := by + simp at hn + exact Nat.zero_lt_of_ne_zero hn + + let g : Nat -> ENNReal := fun i => f (Fin.ofNat' i hn1) + have mod: ∀ i : Fin n, i % n = i := by + intro i + rw [Nat.mod_eq] + simp + have h0: ∀ i : Fin n, f i = g i := by + intro i + simp_all [g] + have eq: Fin.ofNat' (i.val) hn1 = i := by + simp_all [Fin.ofNat'] + rw [eq] + have h1: ∏ i : Fin n, f i = ∏ i : Fin n, g i := by + conv => + enter [2, 2, i] + rw [←h0 i] + have h2: ∀ i : ℕ, g i ≠ ⊤ := by + intro i + apply h + rw [h1] + apply Finset.prod_ne_top n g + exact h2 + | true => + simp at hn + subst hn + simp + +lemma div_ne_top (a b : ENNReal) (h1 : a ≠ ⊤) (h2 : b ≠ 0): a / b ≠ ⊤ := by + simp + rw [Not] + intro a + rw [@ENNReal.div_eq_top] at a + rcases a with ⟨_,ar⟩ + subst ar + simp_all only [ne_eq, not_true_eq_false] + rename_i h3 + rcases h3 with ⟨hl,_⟩ + subst hl + simp_all only [ne_eq, not_true_eq_false] + +lemma div_div_cancel_rev (a b c : ENNReal) (h : c ≠ 0 ∧ c ≠ ⊤): a < b -> a / c < b / c := by + intro h1 + apply ENNReal.div_lt_of_lt_mul + rw [@ENNReal.mul_comm_div] + rw [ENNReal.div_self] + simp + exact h1 + exact h.left + exact h.right + +lemma quot_gt_one_rev (a b : ENNReal): b < a -> 1 < a/b := by + cases hb : b == 0 with + | true => simp at hb + subst hb + intro ha + have h1 : a / 0 = ⊤ := by + rw [@ENNReal.div_eq_top] + apply Or.inl + apply And.intro + aesop + trivial + rw [h1] + decide + | false => simp at hb + intro ha + have h1: b/b = 1 := by + rw [ENNReal.div_self] + aesop + aesop + cases hbT : b == ⊤ with + | true => + simp at hbT + aesop + | false => + simp at hbT + have h2: b/b < a/b := by + apply div_div_cancel_rev b a b + aesop + exact ha + rw[h1] at h2 + exact h2 + +lemma quot_gt_one (a b : ENNReal): 1 < a/b -> b < a := by + intro h + cases hb: b == 0 with + | true => simp at hb + cases ha: a == 0 with + | true => simp at ha + subst hb + subst ha + apply False.elim + have nh: ¬ (1 : ENNReal) < 0/0 := by simp + contradiction + | false => simp at ha + have ha1: a > 0 := pos_iff_ne_zero.mpr ha + rw[←hb] at ha1 + exact ha1 + | false => simp at hb + cases hbT : b == ⊤ with + | true => simp at hbT + subst hbT + have h1 : ¬ 1 < a / ⊤ := by simp_all only [ENNReal.div_top, not_lt_zero'] + apply False.elim + apply h1 + exact h + | false => + simp at hbT + have h1: 1 * b < (a / b) * b := ENNReal.mul_lt_mul_right' hb hbT h + rw [@ENNReal.mul_comm_div] at h1 + rw [ENNReal.div_self] at h1 + rw [@CanonicallyOrderedCommSemiring.one_mul] at h1 + rw [@CanonicallyOrderedCommSemiring.mul_one] at h1 + exact h1 + apply hb + apply hbT + +lemma quot_lt_one_rev (a b : ENNReal): b < a -> b/a < 1 := by + intro h + apply ENNReal.div_lt_of_lt_mul' + rw [mul_one] + exact h + +lemma tsum_func_zero_simp (f : List Bool -> ENNReal) (h : f [] = 0): + ∑' (x : List Bool), f x = (∑'(x : List Bool), if x = [] then 0 else f x) := by + rw [ENNReal.tsum_eq_add_tsum_ite []] + simp [h] + apply tsum_equal_comp + intro i + aesop + +/- The unused variable "assm" is here on purpose -/ +lemma tsum_ite_not (f : List Bool -> ENNReal): + ∑' (x : List Bool), (if x = [] then 0 else f x) = + ∑' (x : List Bool), if assm : x ≠ [] then f x else 0 := by simp_all [ite_not] + +lemma tsum_ite_mult (f : T -> ENNReal) (P : T -> Bool): + (∑' (x : T), f x * if (P x) then 1 else 0) = ∑' (x : T), if (P x) then f x else 0 := by simp_all only [mul_ite, + mul_one, mul_zero] + +lemma sub_add_eq_add_sub (a b :ENNReal)(h : b ≤ a)(h1 : b ≠ ⊤): a - b+ b = a + b-b := by + rw [@AddCommMonoidWithOne.add_comm] + rw [add_tsub_cancel_of_le h] + rw [@AddCommMonoidWithOne.add_comm] + symm + apply ENNReal.sub_eq_of_add_eq + exact h1 + rw [add_comm] + +lemma sub_add_cancel_ennreal (a b :ENNReal)(h:b≤ a)(h1 : b ≠ ⊤): a -b +b =a := by + rw [sub_add_eq_add_sub] + rw [ENNReal.add_sub_cancel_right h1] + exact h + exact h1 + + +lemma le_double (a b c : ENNReal)(h1 : a ≤ b)(h2 : c ≤ d): a * c ≤ b * d := by + apply mul_le_mul_of_nonneg + all_goals aesop + +lemma mult_div_comm_mult_div (a b c :ENNReal): a*(b/c) = b*(a/c):= by + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [mul_comm] + rw[← mul_assoc] + conv => + enter [1,1] + rw[mul_comm] + +lemma div_mult_eq_mult_div (a b c :ENNReal) : a/b*c = c*(a/b) := by rw [@CanonicallyOrderedCommSemiring.mul_comm] + +lemma lt_sub_left (a b : Nat):a-b>0 ↔ b< a := by aesop + +lemma lt_cancel (a b c : ENNReal) (h1: c ≠ 0) (h2 : c ≠ ⊤): c * a < c * b -> a < b := by + intro h + apply (ENNReal.mul_lt_mul_right h1 h2).mp + rw [mul_comm] + nth_rw 2 [mul_comm] + exact h + + +lemma exp_change_form (num : Nat) (den : PNat) (h: 2 * num < den) : ((2:ENNReal)⁻¹ + num / den) / (2⁻¹ - num / den) + = (↑(NNReal.ofPNat den) + 2 * ↑num) / (↑(NNReal.ofPNat den) - 2 * ↑num) := by + have h1: ENNReal.ofNNReal ((@Subtype.val ℕ (fun n => 0 < n) den) : NNReal) = (den : ENNReal) := by norm_cast + have h2: (2 : ENNReal) * num < ENNReal.ofNNReal ((@Subtype.val ℕ (fun n => 0 < n) den) : NNReal) := by norm_cast + simp + rw [ENNReal.div_eq_div_iff] + rw [mul_comm] + rw [← ennreal_mul_assoc] + conv => + enter [2] + rw [mul_comm] + rw [← ennreal_mul_assoc] + rw [ENNReal.mul_sub] + rw [ENNReal.mul_sub] + rw [ENNReal.mul_sub] + rw [ENNReal.mul_sub] + rw [← mul_assoc] + rw [@ENNReal.mul_comm_div] + rw [ENNReal.inv_mul_cancel] + rw [ENNReal.div_self] + rw [mul_one] + rw [one_mul] + conv => + enter [2,2,1] + rw[mul_comm] + rw[← mul_assoc] + rw [ENNReal.inv_mul_cancel] + rw [one_mul] + rw [mul_comm] + rw [mult_div_comm_mult_div] + rw [ENNReal.div_self] + rw [mul_one] + rw[div_mult_eq_mult_div] + + simp + apply pnat_zero_imp_false + + norm_cast + rw [Not] + intro B + contradiction + + simp + simp + + simp + apply pnat_zero_imp_false + + norm_cast + rw [Not] + intro B + contradiction + + simp + simp + + intro _ + intro _ + norm_cast + rw[Not] + intro D + contradiction + + intro _ + intro _ + norm_cast + rw [Not] + intro D + contradiction + + intro B + intro C + simp_all + rw [@ENNReal.div_eq_top] + rw[Not] + intro D + cases D with + | inl dl => + apply And.right at dl + have hh : ¬ den.val = 0 := by simp + norm_num at dl + contradiction + | inr dr => + apply And.left at dr + contradiction + + intro _ + intro _ + simp_all + rw [← lt_sub_left] at h + rw [@ne_iff_lt_or_gt] + right + simp_all only [gt_iff_lt, tsub_pos_iff_lt, ENNReal.coe_natCast, NNReal.ofPNat, Nonneg.mk_natCast] + norm_cast + aesop + rw [← @zero_lt_iff] + rw [@tsub_pos_iff_lt] + rw [h1] + have h3: ↑(2 * num) / ↑(NNReal.ofPNat den) < (1 : ENNReal) := by + apply ENNReal.div_lt_of_lt_mul' + rw [mul_one] + aesop + have h4: ↑(2 * num) / ↑(NNReal.ofPNat den) < 2 * (2⁻¹ : ENNReal) := by + have ha: 2 * (2⁻¹ : ENNReal) = 1 := by + rw [ENNReal.mul_inv_cancel] + aesop + decide + rw [ha] + exact h3 + set s : ENNReal := (num/ ↑(NNReal.ofPNat den)) + have h5: 2 * s < 2 * (2⁻¹ : ENNReal) := by + rw [mul_div] + convert h4 + norm_cast + apply lt_cancel _ _ 2 + aesop + decide + exact h5 + aesop + +end ENNRealLemmas diff --git a/SampCert/DifferentialPrivacy/Pure/Local/LawfulMonadSLang.lean b/SampCert/DifferentialPrivacy/Pure/Local/LawfulMonadSLang.lean new file mode 100644 index 00000000..92c61333 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/LawfulMonadSLang.lean @@ -0,0 +1,33 @@ +import SampCert + +open SLang + +/- Instantiation of SLang as a LawfulMonad. This makes simp much stronger.-/ + +instance SLang.LawfulMonad : LawfulMonad SLang where + map_const := by + intro a b + rfl + id_map := by + intro u hu + simp [Functor.map] + seqLeft_eq := by + intro α β x y + simp [SeqLeft.seqLeft, Seq.seq, Functor.map] + apply Eq.refl + seqRight_eq := by + intro α β x y + simp [SeqRight.seqRight, Seq.seq, Functor.map] + pure_seq := by + intro α β g x + simp [Seq.seq, Functor.map] + pure_bind := by simp + bind_pure_comp := by + intro α β f x + simp_all only [bind, pure] + apply Eq.refl + bind_assoc := by simp + bind_map := by + intro α β f x + simp_all only [bind] + apply Eq.refl diff --git a/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/DPNeighbour.lean b/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/DPNeighbour.lean new file mode 100644 index 00000000..64ccb21e --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/DPNeighbour.lean @@ -0,0 +1,19 @@ +import SampCert.DifferentialPrivacy.Pure.DP +import SampCert.DifferentialPrivacy.Pure.Local.LocalDP.DPwithGeneralNeighbour +import SampCert.DifferentialPrivacy.Neighbours + +namespace SLang + +open SLang + +/- Abstraction of SampCert definition of DP. + ARU stands for Add-Remove-Update Neighbour +-/ +def DP_withARUNeighbour (m : Mechanism T U) (ε : ℝ) : Prop := + DP_withGeneralNeighbour m (Neighbour) ε + +/- Proof that our definitions is equivalent to SampCert's definition -/ +theorem DP_withARUNeighbour_isDP (m : Mechanism T U) (ε : ℝ) : + DP_withARUNeighbour m ε ↔ DP m ε := by rfl + +end SLang diff --git a/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/DPwithGeneralNeighbour.lean b/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/DPwithGeneralNeighbour.lean new file mode 100644 index 00000000..dbac1ce8 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/DPwithGeneralNeighbour.lean @@ -0,0 +1,22 @@ +import SampCert.DifferentialPrivacy.Generic +namespace SLang + +open SLang +open Classical + +/- DP with a general neighbour relation. + The definition of the neighbour relation is passed in as a parameter.-/ +def DP_withGeneralNeighbour (m : Mechanism T U) (VariableNeighbour: List T -> List T -> Prop) (ε : ℝ): Prop := + ∀ l₁ l₂ : List T, VariableNeighbour l₁ l₂ → ∀ S : Set U, + (∑' x : U, if x ∈ S then m l₁ x else 0) / (∑' x : U, if x ∈ S then m l₂ x else 0) ≤ ENNReal.ofReal (Real.exp ε) + +/- Instantiation of PureDP with a general neighbour relation. -/ +def PureDP_withGeneralNeighbour (m : Mechanism T U) (VariableNeighbour : List T -> List T -> Prop) (ε : NNReal) : Prop := + DP_withGeneralNeighbour m VariableNeighbour ε + +/- Instantiation of DP_singleton with a general neighbour relation. -/ +def DP_singleton_withGeneralNeighbour (m : Mechanism T U) (VariableNeighbour: List T -> List T -> Prop) (ε : ℝ): Prop := + ∀ l₁ l₂ : List T, VariableNeighbour l₁ l₂ → ∀ x : U, + (m l₁ x) / (m l₂ x) ≤ ENNReal.ofReal (Real.exp ε) + +end SLang diff --git a/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/DPwithUpdateNeighbour.lean b/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/DPwithUpdateNeighbour.lean new file mode 100644 index 00000000..7ea9e57a --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/DPwithUpdateNeighbour.lean @@ -0,0 +1,86 @@ +import SampCert.DifferentialPrivacy.Pure.Local.LocalDP.DPwithGeneralNeighbour +import SampCert.DifferentialPrivacy.Pure.Local.LocalDP.UpdateNeighbour + +open SLang +open Classical + +namespace SLang + +/- DP with the update neighbour relation. -/ +def DP_withUpdateNeighbour (m : Mechanism T U) (ε : ℝ) : Prop := + DP_withGeneralNeighbour m (UpdateNeighbour) ε + + +/- Instantiation of DP_singleton with the update neighbour relation-/ +def DP_singleton_withUpdateNeighbour (m : Mechanism T U) (ε : ℝ) : Prop := + DP_singleton_withGeneralNeighbour m (UpdateNeighbour) ε + +end SLang + +theorem singleton_to_event_update (m : Mechanism T U) (ε : ℝ) (h : DP_singleton_withUpdateNeighbour m ε) : + DP_withUpdateNeighbour m ε := by + simp [DP_withUpdateNeighbour] + simp [DP_singleton_withUpdateNeighbour] at h + intros l₁ l₂ h1 S + replace h1 := h l₁ l₂ h1 + have A : ∀ (x : U), (if x ∈ S then m l₁ x else 0) / (if x ∈ S then m l₂ x else 0) ≤ ENNReal.ofReal ε.exp := by + aesop + have B : ∀ b : ENNReal, b ≠ 0 ∨ ENNReal.ofReal ε.exp ≠ ⊤ := by + aesop + have C : ∀ b : ENNReal, b ≠ ⊤ ∨ ENNReal.ofReal ε.exp ≠ 0 := by + intro b + right + simp + exact Real.exp_pos ε + have D := fun { x : U } => @ENNReal.div_le_iff_le_mul (if x ∈ S then m l₁ x else 0) (if x ∈ S then m l₂ x else 0) (ENNReal.ofReal ε.exp) (B (if x ∈ S then m l₂ x else 0)) (C (if x ∈ S then m l₂ x else 0)) + have E := fun x : U => D.1 (A x) + have F := ENNReal.tsum_le_tsum E + rw [ENNReal.tsum_mul_left] at F + rw [← ENNReal.div_le_iff_le_mul] at F + · clear h1 A B C D + trivial + · aesop + · right + simp + exact Real.exp_pos ε + +theorem event_to_singleton_update (m : Mechanism T U) (ε : ℝ)(h: DP_withUpdateNeighbour m ε): + DP_singleton_withUpdateNeighbour m ε := by + simp [DP_singleton_withUpdateNeighbour] + intros l₁ l₂ h1 x + replace h1 := h l₁ l₂ h1 {x} + simp at h1 + rw [tsum_eq_single x] at h1 + · simp at h1 + rw [tsum_eq_single x] at h1 + · simp at h1 + trivial + · aesop + · aesop + +theorem event_eq_singleton_update (m : Mechanism T U) (ε : ℝ) : + DP_withUpdateNeighbour m ε ↔ DP_singleton_withUpdateNeighbour m ε := by + constructor + · apply event_to_singleton_update + · apply singleton_to_event_update + +lemma privPostProcess_DPwithUpdate_bound{nq : Mechanism T U} {ε : NNReal} (h : DP_withUpdateNeighbour nq ε) (f : U → V) : + DP_withUpdateNeighbour (privPostProcess nq f) ε := by + rw [event_eq_singleton_update] at * + simp [DP_singleton_withUpdateNeighbour] at * + intros l₁ l₂ neighbours x + replace h := h l₁ l₂ neighbours + simp [privPostProcess] + apply ENNReal.div_le_of_le_mul + rw [← ENNReal.tsum_mul_left] + apply tsum_le_tsum _ ENNReal.summable (by aesop) + intro i + split + · rename_i h + subst h + refine (ENNReal.div_le_iff_le_mul ?inl.hb0 ?inl.hbt).mp (h i) + · aesop + · right + simp + exact Real.exp_pos ε + · simp diff --git a/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/LocalDP.lean b/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/LocalDP.lean new file mode 100644 index 00000000..ca0b28e3 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/LocalDP.lean @@ -0,0 +1,16 @@ +import SampCert + +namespace SLang + +/- We define local differential privacy. The definition is the same as + for central DP, but the mechanisms take in a user, not a dataset, as input. + as input. -/ + +open Classical + +abbrev LocalMechanism (T U : Type) := T → PMF U + +/-Local Differential Privacy-/ +def Local_DP (m : LocalMechanism T U) (ε : ℝ) : Prop := + ∀ u₁ u₂ : T, ∀ y : U, + m u₁ y / m u₂ y ≤ ENNReal.ofReal (Real.exp ε) diff --git a/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/LocalToDataset.lean b/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/LocalToDataset.lean new file mode 100644 index 00000000..8d648aa5 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/LocalToDataset.lean @@ -0,0 +1,160 @@ +import SampCert.DifferentialPrivacy.Pure.Local.LocalDP.LocalDP +import SampCert.DifferentialPrivacy.Pure.Local.Normalization +import SampCert.DifferentialPrivacy.Pure.Local.LocalDP.DPwithUpdateNeighbour +import SampCert.DifferentialPrivacy.Pure.Local.ProbabilityProduct +import SampCert.DifferentialPrivacy.Pure.Local.Reduction + +namespace SLang + +open Classical + +/- We define a general way to transform local randomizers into algorithms on datasets, + and prove that the dataset-level algorithm satisfies the same DP bound as the local randomizer. -/ + +/- Transforms a local randomizer into a dataset-level algorithm. -/ +def local_to_dataset (m : LocalMechanism T U) (l : List T) : SLang (List U) := + (l.mapM (fun x => (m x).1)) + +/- Proof of normalization for local_to_dataset. This is necessary to instantiate + local_to_dataset as a PMF. -/ +lemma local_to_dataset_normalizes (m : LocalMechanism T U) (l : List T): +HasSum (local_to_dataset m l) 1 := by + rw [Summable.hasSum_iff ENNReal.summable] + rw [local_to_dataset] + apply norm_func_norm_on_list + intro x + rw [← Summable.hasSum_iff ENNReal.summable] + exact (m x).2 + +/- Instantiation of local_to_dataset as a PMF. -/ +def local_to_dataset_PMF (m : LocalMechanism T U) (l : List T) : PMF (List U) := + ⟨local_to_dataset m l, local_to_dataset_normalizes m l⟩ + +/- local_to_dataset outputs datasets of different length than the input dataset with probability zero. -/ +lemma local_to_dataset_diff_lengths (l₁ : List T) (x : List U) (hlen : l₁.length ≠ x.length): + local_to_dataset m l₁ x = 0 := by + induction l₁ generalizing x with + | nil => simp [local_to_dataset, -mapM] + aesop + | cons hd tl ih => + simp [local_to_dataset, -mapM] + simp [local_to_dataset, -mapM] at ih + intro b + apply Or.inr + intro y hy + subst hy + simp_all only [mapM, List.length_cons, ne_eq, add_left_inj, not_false_eq_true] + +/- Prob_of_ind_prob lemma for local_to_dataset. -/ +lemma local_to_dataset_prob_of_ind_prob_PMF (m: LocalMechanism T U) (l : List T) (a: List U) (k : l.length = a.length) : + local_to_dataset_PMF m l a = (∏'(i: Fin l.length), m (l.get i) (a.get (Fin.cast k i ))):= by apply prod_of_ind_prob + +/- This lemma states that, under a technical assumption, the algorithm + that applies the same local randomizer to each row of data satisfies the same + DP bound as the local randomizer. + The hypotheses "nonzero" and "equiv" should be regarded as technical conditions. + Essentially, they require the user to provide a precise characterization of which + outputs of the local randomizer occur with probability zero. + This is necessary in order to apply the reduction lemma. + This lemma is applied in the proof of DP for both Randomized Response and One-Time Basic RAPPOR. + -/ +lemma LocalDP_to_dataset (m : LocalMechanism T U) (ε : ℝ) + (equiv: ∀ l₁ l₂ : List T, (h_upd: UpdateNeighbour l₁ l₂) → ∀ b : List U, (blen1: l₁.length = b.length) → + (∀ i : Fin l₁.length, (m l₁[i]) (b[i]) = 0 ↔ (m (l₂[i]'(by + have h: l₂.length = b.length := by rw[←blen1]; apply (Eq.symm (UpdateNeighbour_length h_upd)) + omega))) b[i] = 0)) + (noninf: ∀ (k : T) (bo : U), (m k) bo ≠ ⊤): + Local_DP m ε → DP_withUpdateNeighbour (local_to_dataset_PMF m) ε := by + let P: T → U → Bool := fun k bo => (m k) bo ≠ 0 + intro hloc + apply singleton_to_event_update + intros l₁ l₂ h_adj x + cases xlen1 : l₁.length == x.length with + | true => simp at xlen1 + have xlen2: l₂.length = x.length := by + rw [←xlen1] + rw[←UpdateNeighbour_length h_adj] + rw[local_to_dataset_prob_of_ind_prob_PMF m l₁ x xlen1] + rw[local_to_dataset_prob_of_ind_prob_PMF m l₂ x xlen2] + cases h_adj with + | Update hl₁ hl₂ => + rename_i a y c z + simp + have valid_index9 (i : Fin (l₂.length - 1)): i.val + 1 < x.length := by + rw[←xlen2] + have h1: i.val < l₂.length - 1 := i.2 + omega + have valid_index10 (i : Fin (l₂.length - 1)): i.val < x.length := by + rw[←xlen2] + omega + cases P_true: ((∀ i : Fin (l₂.length - 1), P (l₂[(@Nat.cast (Fin (l₂.length - 1 + 1)) Fin.instNatCast a.length).succAbove i]'(by simp[Fin.succAbove]; aesop)) (x[(@Nat.cast (Fin (l₂.length - 1 + 1)) Fin.instNatCast a.length).succAbove i]'(by simp [Fin.succAbove]; aesop))) == true) with + | true => + rw [reduction_final l₁ l₂ a c y z x _ hl₁ hl₂ xlen1 xlen2 _ noninf] + simp[Local_DP] at hloc + apply hloc + intro i + simp[P] at P_true + apply P_true i + | false => + simp at P_true + have nonzero2: ∀ (k : T) (bo : U), P k bo = false ↔ (m k) bo = 0 := by + intro k bo + apply not_iff_not.mp + simp [P] + /- The next several "have" statements are just to prove index validity. + The proofs can undoubtedly be simplfied with some effort. -/ + have h1: a.length + (c.length + 1) = l₂.length := by + rw [hl₂] + simp_all only [ne_eq, Fin.getElem_fin, List.append_assoc, List.singleton_append, + List.length_append, List.length_cons] + have valid_index11 (t : Fin (l₂.length - 1)): t.val + 1 < a.length + (c.length + 1) := by + rw [h1] + omega + have valid_index12 (t : Fin (l₂.length - 1)): t.val < a.length + (c.length + 1) := by + rw [h1] + omega + have h1: ∏' (i : Fin l₂.length), (m l₂[i.val]) x[i.val] = 0 := by + rw [tprod_fintype] + rw [@Finset.prod_eq_zero_iff] + cases P_true with + | intro z hz => + have valid_index13: ((@Nat.cast (Fin (l₂.length - 1 + 1)) Fin.instNatCast a.length).succAbove z).val < l₂.length := by + simp[Fin.succAbove] + rename_i z_1 + simp_all only [ne_eq, Fin.getElem_fin, List.append_assoc, List.singleton_append, + List.length_append, List.length_cons] + split + next h => simp_all only [Fin.coe_castSucc] + next h => simp_all only [not_lt, Fin.val_succ] + use ⟨(@Nat.cast (Fin (l₂.length - 1 + 1)) Fin.instNatCast a.length).succAbove z, valid_index13⟩ + apply And.intro + simp + apply (nonzero2 _ _).mp + apply hz + have h2: ∏' (i : Fin l₁.length), (m l₁[i.val]) x[i.val] = 0 := by + rw [tprod_fintype] + rw [@Finset.prod_eq_zero_iff] + cases P_true with + | intro z hz => + have valid_index13: ((@Nat.cast (Fin (l₂.length - 1 + 1)) Fin.instNatCast a.length).succAbove z).val < l₁.length := by + simp[Fin.succAbove] + rename_i z_1 + simp_all only [ne_eq, Fin.getElem_fin, List.append_assoc, List.singleton_append, + List.length_append, List.length_cons] + split + next h => simp_all only [Fin.coe_castSucc] + next h => simp_all only [not_lt, Fin.val_succ] + use ⟨(@Nat.cast (Fin (l₂.length - 1 + 1)) Fin.instNatCast a.length).succAbove z, valid_index13⟩ + apply And.intro + simp + apply (equiv l₁ l₂ (UpdateNeighbour.Update hl₁ hl₂) x xlen1 _).mpr + simp_all [P] + rw [h2] + rw [@ENNReal.zero_div] + simp + | false => simp at xlen1 + rw [←Ne.eq_def] at xlen1 + have numerator_zero: local_to_dataset_PMF m l₁ x = 0 := local_to_dataset_diff_lengths l₁ x xlen1 + rw [numerator_zero] + rw [@ENNReal.zero_div] + simp diff --git a/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/UpdateNeighbour.lean b/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/UpdateNeighbour.lean new file mode 100644 index 00000000..ca8c07ef --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/LocalDP/UpdateNeighbour.lean @@ -0,0 +1,23 @@ +import SampCert + +namespace SLang + +/- Lists which differ in the update of a single entry. -/ +inductive UpdateNeighbour {T : Type} (l₁ l₂ : List T) : Prop where + | Update: l₁ = a ++ [n] ++ b → l₂ = a ++ [m] ++ b -> UpdateNeighbour l₁ l₂ + +/- The UpdateNeighbour relation is symmetric. -/ +def UpdateNeighbour_symm (l₁ l₂ : List T) (H : UpdateNeighbour l₁ l₂) : UpdateNeighbour l₂ l₁ := by + cases H + · rename_i _ _ _ _ Hl1 Hl2 + exact UpdateNeighbour.Update Hl2 Hl1 + +/- Two lists satisfying the UpdateNeighbour relation have the same length.-/ +lemma UpdateNeighbour_length {T : Type} {l₁ l₂ : List T} (H : UpdateNeighbour l₁ l₂) : + l₁.length = l₂.length := by + cases H + rename_i _ _ _ _ Hl1 Hl2 + rw[Hl1, Hl2] + simp + +end SLang diff --git a/SampCert/DifferentialPrivacy/Pure/Local/MultiBernoulli/Code.lean b/SampCert/DifferentialPrivacy/Pure/Local/MultiBernoulli/Code.lean new file mode 100644 index 00000000..23836a7f --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/MultiBernoulli/Code.lean @@ -0,0 +1,23 @@ +import SampCert +open SLang + +namespace MultiBernoulli + +/- We define the "MultiBernoulli" distribution to be the distribution associated with + flipping each entry of a list with the same probability.-/ + +structure SeedType where + /- This is just handy shorthand for a rational parameter n/d in [0, 1]-/ + n : Nat + d : PNat + h : n ≤ d + +/- Obtains a Bernoulli sample taken from the seed.-/ +def bernoulli_mapper: SeedType -> SLang Bool := + fun s => SLang.BernoulliSample s.n s.d s.h + +/- Obtains a sample from the "MultiBernoulli" distribution. -/ +def MultiBernoulliSample (seeds: List SeedType): SLang (List Bool) := + seeds.mapM (fun s => bernoulli_mapper s) + +end MultiBernoulli diff --git a/SampCert/DifferentialPrivacy/Pure/Local/MultiBernoulli/Properties.lean b/SampCert/DifferentialPrivacy/Pure/Local/MultiBernoulli/Properties.lean new file mode 100644 index 00000000..aa22db82 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/MultiBernoulli/Properties.lean @@ -0,0 +1,28 @@ +import Mathlib.Probability.ProbabilityMassFunction.Basic +import Mathlib.Probability.Independence.Basic +import SampCert +import SampCert.SLang +import Mathlib.Data.Set.Basic +import SampCert.DifferentialPrivacy.Pure.Local.ENNRealLemmasSuite +import SampCert.DifferentialPrivacy.Pure.Local.MultiBernoulli.Code +import SampCert.DifferentialPrivacy.Pure.Local.Normalization + +namespace MultiBernoulli +open SLang +open ENNRealLemmas + +/- Normalization of the bernoulli_mapper. -/ +lemma bernoulli_mapper_sums_to_1 (s : SeedType): ∑' (b : Bool), bernoulli_mapper s b = 1 := by + rw[bernoulli_mapper] + exact SLang.BernoulliSample_normalizes s.n s.d s.h + +/- Normalization for MultiBernoulli, which is proven using our normaliation lemma. -/ +lemma MultiBernoulliSample_normalizes [LawfulMonad SLang] (seeds : List SeedType) : + ∑' (b: List Bool), MultiBernoulliSample seeds b = 1 := by + unfold MultiBernoulliSample + apply norm_func_norm_on_list + intro a + rw [bernoulli_mapper_sums_to_1] + + +end MultiBernoulli diff --git a/SampCert/DifferentialPrivacy/Pure/Local/Normalization.lean b/SampCert/DifferentialPrivacy/Pure/Local/Normalization.lean new file mode 100644 index 00000000..6f7b3150 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/Normalization.lean @@ -0,0 +1,140 @@ +import Mathlib.Probability.ProbabilityMassFunction.Basic +import Mathlib.Probability.Independence.Basic +import SampCert +import SampCert.SLang +import Mathlib.Data.Set.Basic +import Mathlib.Data.Set.Basic +import SampCert.DifferentialPrivacy.Pure.Local.ENNRealLemmasSuite + +namespace SLang + +/- In this file, we show that if a function f : α -> SLang β + normalizes, in the sense that ∑' (b : List β) f a b = 1 + for any fixed a : α, + then the function obtained by applying monadic map + to f and some list also normalizes. + This is valuable for local algorithms, where + a randomizer is first defined for a single user and then + applied to a dataset of users. + The proof is by induction and follows by a straightforward + application of the Fubini-Tonelli theorem. +-/ + +/- Helper lemma to simplify a if-then-else statement in a sum-/ +lemma simplifier1_gen (α β : Type) [DecidableEq β](f: α → SLang β) (a : List β) (b : β)(tl : List α): +(∑' (a_1 : List β), if a = b :: a_1 then mapM f tl a_1 else 0) = +(if a.head? = b then mapM f tl a.tail else 0) := by + cases a with + | nil => simp + | cons ah atl => + simp [-mapM] + split + next h => + subst h + simp_all only [true_and] + rw [tsum_eq_single] + split + rename_i h + on_goal 2 => rename_i h + apply Eq.refl + simp_all only [not_true_eq_false] + intro b' a + simp_all only [ne_eq, mapM, ite_eq_right_iff] + intro a_1 + subst a_1 + simp_all only [not_true_eq_false] + next h => simp_all only [false_and, ↓reduceIte, _root_.tsum_zero] + +open Set + +/- This lemma shows that summing a function over all possible lists is the same as summing a function + over all tails of lists with a specified head. +-/ +lemma list_beta_tsum_only_tl (β : Type) [DecidableEq β] (b : β) (f : List β -> ENNReal): +∑' (a : List β), f a = ∑' (a : List β), if a.head? = some b then f a.tail else 0 := by + apply Equiv.tsum_eq_tsum_of_support + intro x + case e => + let e₁ := Equiv.Set.image (List.cons b) (Function.support f) (fun _ _ h => List.cons_injective h) + have h_eq : (List.cons b) '' (Function.support f) = + Function.support (fun y => if y.head? = some b then f y.tail else 0) := by + rw [@Set.ext_iff] + intro L + apply Iff.intro + { intro hx + have lhead: L.head? = some b := by + simp_all only [mem_image, Function.mem_support, ne_eq] + obtain ⟨w, h⟩ := hx + obtain ⟨_, right⟩ := h + subst right + simp_all only [List.head?_cons] + rw[Function.support] + rw [@mem_setOf] + rw [lhead] + simp + simp_all only [mem_image, Function.mem_support, ne_eq] + obtain ⟨w, h⟩ := hx + obtain ⟨left, right⟩ := h + subst right + simp_all only [List.head?_cons, List.tail_cons, not_false_eq_true] + } + { intro hx + rw [Function.support] at hx + rw [@mem_setOf] at hx + have lhead: L.head? = some b := by simp_all only [ne_eq, ite_eq_right_iff, Classical.not_imp] + simp_all only [↓reduceIte, ne_eq, mem_image, Function.mem_support] + apply Exists.intro L.tail + apply And.intro + exact hx + rw [List.cons_head?_tail lhead] + } + let e₂ := Equiv.Set.ofEq h_eq + exact e₁.trans e₂ + case he => simp + +/- Another simplifying lemma for if-then-else statements within a sum.-/ +lemma simplifier2_gen (α β : Type) [DecidableEq β] (f: α → SLang β)(hd : α) (tl : List α) (b : β): +(∑' (a : List β), f hd b * if a.head? = some b then mapM f tl a.tail else 0) = + ∑' (a : List β), f hd b * mapM f tl a := by + simp_all only [mul_ite, mul_zero] + apply symm + apply list_beta_tsum_only_tl β b + +/- Simplifying lemma to pull a constant out of a sum. -/ +lemma simplifier3_gen [LawfulMonad SLang] (α β : Type)(f : α → SLang β)(hd : α)(tl : List α) (b : List β) (h : ∑' (b : β), f hd b = 1): ∑' (a : β), f hd a * mapM f tl b = mapM f tl b := by + have : ∑' (a : β), f hd a * mapM f tl b = (∑' (a : β), f hd a) * mapM f tl b := by + rw [ENNReal.tsum_mul_right] + rw [this, h, one_mul] + +/- The key normalization lemma, which shows that if a function f + normalizes, then the function obtained applying monadic map to f and some list al + also normalizes. +-/ +lemma norm_func_norm_on_list [LawfulMonad SLang] (α β : Type) [DecidableEq β] (f: α → SLang β) (al: List α): + (∀ a : α, ∑' (b : β), f a b = 1) → ∑' (b : List β), mapM f al b = 1 := by + intro h + induction al with + | nil => + rw [@List.mapM_nil] + simp[pure] + rw [ENNReal.tsum_eq_add_tsum_ite []] + simp_all only [↓reduceIte, ite_self, tsum_zero, add_zero] + | cons hd tl ih => + simp [List.mapM_cons, -mapM] + conv => + enter [1, 1, b, 1, a] + simp [-mapM] + rw [simplifier1_gen] + rw [@ENNReal.tsum_comm] + conv => + enter [1, 1, b] + rw [simplifier2_gen] + rw [@ENNReal.tsum_comm] + conv => + enter [1, 1, b] + rw [simplifier3_gen] + rfl + rw[h hd] + apply ih + +end SLang diff --git a/SampCert/DifferentialPrivacy/Pure/Local/ProbabilityProduct.lean b/SampCert/DifferentialPrivacy/Pure/Local/ProbabilityProduct.lean new file mode 100644 index 00000000..18017f18 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/ProbabilityProduct.lean @@ -0,0 +1,44 @@ +import SampCert.DifferentialPrivacy.Pure.Local.ENNRealLemmasSuite + +namespace SLang + +open ENNRealLemmas +/- Helper lemma about mapM to prove "prod_of_ind_prob"-/ +lemma mapM_dist_cons {β : Type} [LawfulMonad SLang] [DecidableEq β] (f: T → SLang β) (b: β)(c: List β)(hd: T)(tl: List T): +mapM f (hd :: tl) (b :: c) = f hd b * mapM f tl c := by + rw[List.mapM_cons] + simp[-mapM] + conv => + enter [1, 1, a, 2] + simp[-mapM] + rw [simplifier_3] + rw [tsum_eq_single b] + aesop + aesop + +/- Proof that using `mapM f l` is the same as sampling from f (l.get i) for all i independently -/ +lemma prod_of_ind_prob (β : Type) [LawfulMonad SLang] [DecidableEq β] (f : T -> SLang β) (a : List β) (l : List T) (k : l.length = a.length) : + mapM f l a = (∏' (i : Fin l.length), f (l.get i) (a.get (Fin.cast k i))) := by + induction l generalizing a with + | nil => + simp[-mapM] + rw[List.length_nil] at k + symm at k + apply List.eq_nil_of_length_eq_zero at k + rw[k] + | cons hd tl ih => + cases a with + | nil => + simp at k + | cons b c => + rw [mapM_dist_cons] + rw [ih c] + rw [@tprod_fintype] + rw [@tprod_fintype] + simp + rw[Fin.prod_univ_succ] + simp at k + apply Eq.refl + aesop + +end SLang diff --git a/SampCert/DifferentialPrivacy/Pure/Local/PushForward.lean b/SampCert/DifferentialPrivacy/Pure/Local/PushForward.lean new file mode 100644 index 00000000..90d738fb --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/PushForward.lean @@ -0,0 +1,27 @@ +import SampCert + +open SLang + +/- This proves that the push-forward of a probability measure is a probability measure. +-/ + +noncomputable def push_forward {T S: Type} [DecidableEq S] (p : SLang T) (f : T -> S) : SLang S := + fun s => ∑' (t : T), if s = f t then p t else 0 + +lemma push_forward_prob_is_prob_gen {T S : Type} [DecidableEq S] (p : SLang T) (f : T -> S) (h : ∑' (t : T), p t = n) : + ∑' (s : S), (push_forward p f) s = n := by + simp [push_forward] + rw [@ENNReal.tsum_comm] + have _: ∀b : T, ∑' (a : S), (if f b = a then p b else 0 : ENNReal) = p b := by + intro b + rw [tsum_eq_single (f b)] + simp + intro b' a + simp_all only [ne_eq, ite_eq_right_iff] + intro a_1 + subst a_1 + simp_all only [not_true_eq_false] + simp_all + +lemma push_forward_prob_is_prob {T S : Type} [DecidableEq S] (p : SLang T) (f : T -> S) (h : ∑' (t : T), p t = 1) : + ∑' (s : S), (push_forward p f) s = 1 := push_forward_prob_is_prob_gen p f h diff --git a/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Basic.lean b/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Basic.lean new file mode 100644 index 00000000..d6e67da4 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Basic.lean @@ -0,0 +1,7 @@ +import Mathlib.Probability.ProbabilityMassFunction.Basic +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Definitions +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Properties.BasicLemmas +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Properties.DPProof +import SampCert.DifferentialPrivacy.Pure.Local.Normalization +import SampCert.DifferentialPrivacy.Pure.Local.RAPPOR.Definitions +import SampCert.DifferentialPrivacy.Pure.Local.ProbabilityProduct diff --git a/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Definitions.lean b/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Definitions.lean new file mode 100644 index 00000000..623bd9a8 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Definitions.lean @@ -0,0 +1,30 @@ +import Mathlib.Probability.ProbabilityMassFunction.Basic +import SampCert +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Definitions +import SampCert.DifferentialPrivacy.Pure.Local.Normalization +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Properties.PMFProof + +namespace RAPPOR + +open RandomizedResponse SLang + +/- Definition of One-Time Basic RAPPOR. -/ + +/- One-hot encoding of a vector-/ +@[simp] +def one_hot {T : Type} (n : Nat) (query : T -> Fin n) (v : T) : List Bool := List.ofFn (fun i => query v = i) + +/- One-Time Basic RAPPOR for a single user. + We follow the description in "LDP Protocols for Frequency Estimation" + by Wang et al. + The rational privacy parameter lambda = num/den relates to the parameter + f in the paper via the equation lambda = 1/2 (1 - f). +-/ +def RAPPORSingleSample {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (v : T) : SLang (List Bool) := do + RRSamplePushForward num den h (one_hot n query v) + +/- One-Time Basic RAPPOR for a dataset of users. -/ +def RAPPORSample {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (v : List T) : SLang (List (List Bool)) := do + v.mapM (fun x => RAPPORSingleSample n query num den h x) + +end RAPPOR diff --git a/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Properties/Arithmetic.lean b/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Properties/Arithmetic.lean new file mode 100644 index 00000000..4b7f9788 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Properties/Arithmetic.lean @@ -0,0 +1,176 @@ +import SampCert.DifferentialPrivacy.Pure.Local.RAPPOR.Basic + +open SLang +open ENNRealLemmas +open RandomizedResponse + +namespace RAPPOR + +/- Mathematically trivial arithmetical steps for the Proof of DP, + together with the final bound for DP. +-/ + +lemma arith_1 (num : Nat) (den : PNat) (h : 2 * num < den): +(1 : ENNReal) ≤ ((2⁻¹ + ↑num / ↑(NNReal.ofPNat den)) / (2⁻¹ - ↑num / ↑(NNReal.ofPNat den))) ^ 2 := by + rw [@sq] + simp + cases frac_zero : num/den.val == (0:ENNReal) with + | true => + simp_all only [beq_iff_eq] + rw [@Decidable.le_iff_lt_or_eq] + right + simp_all only [beq_eq_false_iff_ne, ne_eq, ENNReal.div_eq_zero_iff, + Nat.cast_eq_zero, ENNReal.natCast_ne_top, or_false, Nat.cast_mul, Nat.cast_ofNat] + rw [← ENNReal.coe_two] + norm_cast + simp + rw [ENNReal.div_self] + simp + simp + | false => + rw [@Decidable.le_iff_lt_or_eq] + left + apply ENNRealLemmas.quot_gt_one_rev + apply ENNRealLemmas.sub_le_add_ennreal + aesop + rw [@ENNReal.le_inv_iff_mul_le] + rw [@ENNReal.div_eq_inv_mul] + rw [mul_assoc] + rw [mul_comm] + rw [← @ENNReal.le_inv_iff_mul_le] + simp + rw [@Decidable.le_iff_lt_or_eq] + left + rw [@Nat.cast_comm] + norm_cast + simp_all only [beq_eq_false_iff_ne, ne_eq, ENNReal.div_eq_zero_iff, + Nat.cast_eq_zero, ENNReal.natCast_ne_top, or_false, Nat.cast_mul, Nat.cast_ofNat] + rw [← ENNReal.coe_two] + norm_cast + simp + +lemma reindex (α β : Type) (l v : List α) (b : List β) (h1 : l.length = v.length) (h2 : l.length = b.length) + (f : α -> β -> ENNReal): + ∏ (i : Fin l.length), f l[i] b[i] = ∏ (i : Fin v.length), f l[i] b[i] := by + apply Fintype.prod_equiv (finCongr h1) + intro x + rfl + +lemma num_den_simper (num : Nat) (den : PNat) (h : 2 * num < den): + num / den < (2⁻¹ : ℝ) := by + rw [@div_lt_iff] + have h1 : 2 * (num : ℝ) < den.val := by exact_mod_cast h + have h2: 2 * (num : ℝ) < den := by aesop + have h3 : 2⁻¹ * (2 * (num : ℝ)) < 2⁻¹ * den := by + rw [@mul_lt_mul_left] + apply h2 + aesop + aesop + simp_all only [NNReal.ofPNat, Nonneg.mk_natCast, NNReal.coe_pos, Nat.cast_pos] + exact den.2 + +lemma log_rw (num : Nat) (den : PNat) (h: 2 * num < den): + 2 * Real.log ((2⁻¹ + ↑num / ↑(NNReal.ofPNat den)) / (2⁻¹ - ↑num / ↑(NNReal.ofPNat den))) = Real.log (((2⁻¹ + ↑num / ↑(NNReal.ofPNat den)) / (2⁻¹ - ↑num / ↑(NNReal.ofPNat den)))^2) := by + rw [←Real.log_rpow] + simp + rw [@div_pos_iff] + apply Or.inl + apply And.intro + norm_num + rw [@one_div] + positivity + norm_num + simp_all only [one_div] + convert num_den_simper num den h + +lemma exp_rw (num : Nat) (den : PNat) (h: 2 * num < den): + Real.exp (Real.log (((2⁻¹ + num / den) / (2⁻¹ - num / den))^2)) = ((2⁻¹ + num / den) / (2⁻¹ - num / den))^2 := by + rw [Real.exp_log] + rw [@sq_pos_iff] + rw [@div_ne_zero_iff] + apply And.intro + positivity + simp_all only [NNReal.ofPNat, Nonneg.mk_natCast, ne_eq] + rw [@sub_eq_zero] + have h1 : num / den < (2⁻¹ : ℝ) := num_den_simper num den h + aesop + +lemma arith_2_helper (num : Nat) (den : PNat) (h : 2 * num < den) : +(((2⁻¹ : ENNReal) + ↑num / den) / (2⁻¹ - ↑num / ↑↑↑den.val)) = + ENNReal.ofReal ((2⁻¹ + ↑num / ↑↑ den.val) / (2⁻¹ - ↑num / ↑↑↑den)) := by + have h1: ENNReal.ofReal 2⁻¹ = (2⁻¹ : ENNReal) := by + field_simp + rw [ENNReal.ofReal_div_of_pos] + simp + linarith + have h2: (0 : ℝ) ≤ num / den.val := by + rw [@div_nonneg_iff] + apply Or.inl + apply And.intro + aesop + aesop + rw [ENNReal.ofReal_div_of_pos] + congr + { + rw [ennreal_of_nat] + rw [ennreal_of_pnat] + rw [ENNReal.ofReal_add] + rw [ENNReal.ofReal_div_of_pos] + norm_cast + rw [h1] + aesop + aesop + aesop + simp [h2] + } + { rw [ENNReal.ofReal_sub] + rw [h1] + rw [ENNReal.ofReal_div_of_pos] + aesop + simp_all only [NNReal.ofPNat, Nonneg.mk_natCast, NNReal.coe_pos, Nat.cast_pos] + exact den.2 + simp_all only [NNReal.ofPNat, Nonneg.mk_natCast] + convert h2 + } + { rw [@sub_pos] + convert num_den_simper num den h + } + +lemma arith_2_mult_helper (num : Nat) (den : PNat) (h : 2 * num < den) : +(((2⁻¹ : ENNReal) + ↑num / den) / (2⁻¹ - ↑num / ↑↑↑den.val)) * (((2⁻¹ : ENNReal) + ↑num / den) / (2⁻¹ - ↑num / ↑↑↑den.val)) = +ENNReal.ofReal ((2⁻¹ + ↑num / ↑↑ den.val) / (2⁻¹ - ↑num / ↑↑↑den)) * ENNReal.ofReal ((2⁻¹ + ↑num / ↑↑ den.val) / (2⁻¹ - ↑num / ↑↑↑den)) := by +rw [arith_2_helper num den h] + +lemma arith_2 (num : Nat) (den : PNat) (h: 2 * num < den): + ((2⁻¹ + num / den) / (2⁻¹ - num / den))^2 = ENNReal.ofReal (Real.exp (2 * Real.log ((2⁻¹ + num / den) / (2⁻¹ - num / den)))) := by + conv => + enter [2, 1, 1] + rw [log_rw] + rfl + exact h + conv => + enter [2, 1] + rw [exp_rw num den h] + rw [@sq, @sq] + simp + rw [ENNReal.ofReal_mul] + convert arith_2_mult_helper num den h + { + rw [@div_nonneg_iff] + apply Or.inl + apply And.intro + {positivity} + { rw [@sub_nonneg] + rw [@Decidable.le_iff_lt_or_eq] + apply Or.inl + convert num_den_simper num den h + } + } + +lemma RRSamplePushForward_final_bound (num : Nat) (den : PNat) (h : 2 * num < den) (a a' : Bool) (b : Bool): + RRSinglePushForward num den h a b / RRSinglePushForward num den h a' b + ≤ (den + 2 * num) / (den - 2 * num) := by + rw [← RRSingleSample_is_RRSinglePushForward num den h] + apply final_bound + +end RAPPOR diff --git a/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Properties/BasicLemmas.lean b/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Properties/BasicLemmas.lean new file mode 100644 index 00000000..d707c2cc --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Properties/BasicLemmas.lean @@ -0,0 +1,160 @@ +import SampCert.DifferentialPrivacy.Pure.Local.RAPPOR.Basic + +namespace RAPPOR + +open RandomizedResponse +open SLang + +/- In the RAPPOR algorithm with n possible responses, the probability of an output of different length than n is zero.-/ +lemma RAPPORSingleSample_diff_lengths [LawfulMonad SLang] {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (l₁ : T) (l₂ : List Bool) (hlen : (one_hot n query l₁).length ≠ l₂.length): + RAPPORSingleSample n query num den h l₁ l₂= 0 := by + rw [RAPPORSingleSample] + apply RRSamplePushForward_diff_lengths num den h (one_hot n query l₁) l₂ hlen + + +/- The same "diff_lengths" theorem as above, but extended to the entire dataset. -/ +lemma RAPPORSample_diff_lengths [LawfulMonad SLang] {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (l₁ : List T) (x : List (List Bool)) (hlen : l₁.length ≠ x.length): + RAPPORSample n query num den h l₁ x = 0 := by + induction l₁ generalizing x with + | nil => simp [RAPPORSample, -mapM] + aesop + | cons hd tl ih => + simp [RAPPORSample, -mapM] + simp [RAPPORSample, -mapM] at ih + intro b + apply Or.inr + intro y hy + subst hy + simp_all only [mapM, List.length_cons, ne_eq, add_left_inj, not_false_eq_true] + +/- The next few lemmas are helper lemmas to simplify proofs involving one-hot encodings. +-/ +lemma one_hot_same_answer {T : Type} (n : Nat) (query: T -> Fin n) (v u : T) (h : query v = query u) : + one_hot n query v = one_hot n query u := by + simp + rw [h] + +lemma one_hot_same_answer_index {T : Type} (n : Nat) (query: T -> Fin n) (v : T) (j : Fin n) : + (one_hot n query v)[j]'(by simp) = true ↔ query v = j := by + simp [one_hot] + +lemma one_hot_different_answer {T : Type} (n : Nat) (query: T -> Fin n) (v u : T) (h : query u ≠ query v): + (one_hot n query v)[query v]'(by simp) ≠ (one_hot n query u)[query v]'(by simp) := by + simp + rw [← @Ne.eq_def] + exact h + +lemma one_hot_different_answer_ex_two {T : Type} (n : Nat) (query: T -> Fin n) (v u : T) (j : Fin n) (h: query v ≠ query u): + (one_hot n query v)[j]'(by simp) ≠ (one_hot n query u)[j]'(by simp) ↔ query v = j ∨ query u = j := by + simp [one_hot] + apply Iff.intro + { intro ha + by_contra hb -- actually aesop can take it from here + rw [Mathlib.Tactic.PushNeg.not_or_eq] at hb + apply ha + apply Iff.intro + intro hc + apply And.left at hb + contradiction + intro hc + apply And.right at hb + contradiction + } + { intro ha + cases ha with + | inl h1 => aesop + | inr h1 => aesop + } + +lemma one_hot_different_answer_ex_two_contrp {T : Type} (n : Nat) (query: T -> Fin n) (v u : T) (j : Fin n) (h: query v ≠ query u): + (one_hot n query v)[j]'(by simp) = (one_hot n query u)[j]'(by simp) ↔ query v ≠ j ∧ query u ≠ j := by + have h1: query v ≠ j ∧ query u ≠ j ↔ ¬ (query v = j ∨ query u = j) := by simp_all only [ne_eq, not_or] + rw [h1] + rw [←one_hot_different_answer_ex_two n query v u j h] + simp + +lemma one_hot_different_answer_ex_two_contrp' {T : Type} (n : Nat) (query: T -> Fin n) (v u : T) (j : Fin n) (h: query v ≠ query u): + (one_hot n query v)[j.val]'(by simp) = (one_hot n query u)[j.val]'(by simp) ↔ query v ≠ j ∧ query u ≠ j := by + have h1: (one_hot n query v)[j.val]'(by simp) = (one_hot n query v)[j]'(by simp) := by simp + have h2: (one_hot n query u)[j.val]'(by simp) = (one_hot n query u)[j]'(by simp) := by simp + rw [h1, h2] + rw [one_hot_different_answer_ex_two_contrp n query v u j h] + +/- RRSamplePushForward gives a non-zero probability for an output of the same length. + This is needed in the DP proof. +-/ +lemma RRSamplePushForward_non_zero (num : Nat) (den : PNat) (h: 2 * num < den) (l : List Bool) (b : List Bool) (k: l.length = b.length): + RRSamplePushForward num den h l b ≠ 0 := by + rw [RRSamplePushForward] + rw [prod_of_ind_prob _ _ _ _ k] + rw [@tprod_fintype] + rw [@Finset.prod_ne_zero_iff] + intro a _ + apply RRSinglePushForward_non_zero + +lemma RAPPORSingleSample_non_zero [LawfulMonad SLang] {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (l₁ : T) (l₂ : List Bool) (hlen : n = l₂.length): + RAPPORSingleSample n query num den h l₁ l₂ ≠ 0 := by + rw [RAPPORSingleSample] + apply RRSamplePushForward_non_zero num den h (one_hot n query l₁) l₂ (by simp[hlen]) + +lemma RAPPORSingleSample_zero_imp_diff_lengths [LawfulMonad SLang] {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (l₁ : T) (l₂ : List Bool) + (hlen: RAPPORSingleSample n query num den h l₁ l₂ = 0): n ≠ l₂.length := by + by_contra hx + have h_contr: RAPPORSingleSample n query num den h l₁ l₂ ≠ 0 := RAPPORSingleSample_non_zero n query num den h l₁ l₂ (by simp[hx]) + contradiction + +/- RRSamplePushForward is always finite. This is needed in the DP proof. -/ +lemma RRSamplePushForward_finite (num : Nat) (den : PNat) (h: 2 * num < den) (l : List Bool) (b : List Bool): + RRSamplePushForward num den h l b ≠ ⊤ := by + cases hlen: l.length == b.length with + | true => + simp at hlen + unfold RRSamplePushForward + rw [prod_of_ind_prob _ _ _ _ hlen] + rw [@tprod_fintype] + apply ENNRealLemmas.Finset.prod_ne_top_fin + intro i + apply RRSinglePushForward_finite + | false => + simp at hlen + have hzero: RRSamplePushForward num den h l b = 0 := RRSamplePushForward_diff_lengths num den h l b hlen + rw [hzero] + simp + +/- The quotient of two products is the product of the quotients, under the condition that the denominators are non-zero and non-infinite -/ + /- This is needed in the DP proof -/ + lemma prod_over_prod (n : Nat) (f : Fin n -> ENNReal) (g : Fin n -> ENNReal)(nonzero: ∀i, g i ≠ 0)(noninf: ∀i, g i ≠ ⊤): + (∏ i : Fin n, f i) / (∏ i : Fin n, g i) = ∏ i : Fin n, (f i / g i) := by + induction n with + | zero => simp + | succ m ih => + conv => + enter[2] + simp [@Fin.prod_univ_add] + rw[← ih] + simp [@Fin.prod_univ_add] + conv => + enter[1] + rw[div_eq_mul_inv] + rw[ENNReal.mul_inv] + rw[mul_assoc] + conv => + enter[2] + rw[mul_comm] + rw[← mul_assoc] + rw[← mul_assoc] + conv => + enter [1,1] + rw[mul_comm] + rw[← ENNReal.div_eq_inv_mul] + rw[mul_assoc] + rw[← ENNReal.div_eq_inv_mul] + rfl + apply Or.inr + apply noninf + apply Or.inr + apply nonzero + intro i + apply nonzero + intro i + apply noninf diff --git a/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Properties/DPProof.lean b/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Properties/DPProof.lean new file mode 100644 index 00000000..7f3041cb --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Properties/DPProof.lean @@ -0,0 +1,224 @@ +import SampCert.DifferentialPrivacy.Pure.Local.RAPPOR.Basic +import SampCert.DifferentialPrivacy.Pure.Local.RAPPOR.Properties.BasicLemmas +import SampCert.DifferentialPrivacy.Pure.Local.RAPPOR.Properties.Arithmetic +import SampCert.DifferentialPrivacy.Pure.Local.RAPPOR.Properties.PMFProof +namespace RAPPOR + +open RandomizedResponse +open SLang + +/- In this file, we show that the One-Time Basic RAPPOR algorithm + is ε-differentially private with ε = 2 ln (1/2 + λ)/(1/2 - λ). -/ + +/- This allows us to use prob_ind_prob in the RAPPOR DP proof -/ +lemma RAPPOR_prob_of_ind_prob_PMF {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (v : List T) (a: List (List Bool)) (k : v.length = a.length) : + RAPPORSample_PMF n query num den h v a = (∏'(i: Fin v.length), RAPPORSingleSample n query num den h (v.get i) (a.get (Fin.cast k i ))):= by apply prod_of_ind_prob + +/- Uses the one-hot-encoding lemmas to rewrite a quotient of RRSinglePushForward applications into an if-then-else statement -/ +lemma reduction_helper1 {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (v u : T) (b : List Bool) + (ohu_len : (one_hot n query u).length = b.length) (onhv_len : (one_hot n query v).length = b.length) (h_users: query u ≠ query v) (i : Fin (one_hot n query u).length): + RRSinglePushForward num den h (one_hot n query v)[i.val] b[i.val] / + RRSinglePushForward num den h (one_hot n query u)[i.val] b[i.val] = + if query v = (finCongr (by aesop) i) then RRSinglePushForward num den h (one_hot n query v)[query v] (b[query v]'(by aesop)) / RRSinglePushForward num den h (one_hot n query u)[query v] (b[query v]'(by aesop)) + else if query u = (finCongr (by aesop) i) then RRSinglePushForward num den h (one_hot n query v)[query u] (b[query u]'(by aesop)) / RRSinglePushForward num den h (one_hot n query u)[query u] (b[query u]'(by aesop)) + else 1 := by + cases hi : (finCongr (by aesop) i) == query v with + | true => simp at hi + have h1: i.val = (query v).val := by + rw [← hi] + simp + aesop + | false => simp at hi + cases hi2: (finCongr (by aesop) i) == query u with + | true => simp at hi2 + have h1: i.val = (query u).val := by + rw [← hi2] + simp + simp [h1, -one_hot] + simp_all only [not_false_eq_true, List.getElem_ofFn, Fin.eta, decide_True, + decide_False, ↓reduceIte] + split + next h_1 => + simp_all only [decide_True] + next h_1 => simp_all only [decide_False] + | false => simp at hi2 + simp_all only [List.getElem_ofFn, finCongr_apply, Fin.getElem_fin, Fin.coe_cast, + Fin.eta] + split + next h_1 => simp_all only [not_true_eq_false] + next h_1 => + split + next h_2 => simp_all only [not_true_eq_false] + next h_2 => + have h1: (one_hot n query v)[i.val]'(by omega) = (one_hot n query u)[i.val]'(by omega) := + by convert one_hot_different_answer_ex_two_contrp' n query v u (finCongr (by aesop) i) + aesop + rw [h1] + rw [ENNReal.div_self] + apply RRSinglePushForward_non_zero + apply RRSinglePushForward_finite + +/- Rewrites the if-then-else statement from above into a simpler form, with most terms cancelled. -/ +lemma reduction_helper2 {T : Type} (n : Nat) (query: T -> Fin n) (f : Bool -> SLang Bool) (v u : T) (b : List Bool) (h_users: query u ≠ query v) + (ohu_len : (one_hot n query u).length = b.length) (onhv_len : (one_hot n query v).length = b.length): + (∏ i : Fin (one_hot n query u).length, + if query v = (finCongr (by aesop) i) then f (one_hot n query v)[query v] (b[query v]'(by aesop)) / f (one_hot n query u)[query v] (b[query v]'(by aesop)) + else if query u = (finCongr (by aesop) i) then f (one_hot n query v)[query u] (b[query u]'(by aesop)) / f (one_hot n query u)[query u] (b[query u]'(by aesop)) + else 1) = + f (one_hot n query v)[(query v).val] (b[(query v).val]'(by aesop)) / f (one_hot n query u)[(query v).val] (b[(query v).val]'(by aesop)) + * f (one_hot n query v)[(query u).val] (b[(query u).val]'(by aesop)) / f (one_hot n query u)[(query u).val] (b[(query u).val]'(by aesop)) + := by + simp_all only [finCongr_apply, Fin.getElem_fin, Fin.coe_cast, List.getElem_ofFn, Fin.eta] + have _ (g : Fin b.length -> ENNReal) : ∏ i : Fin b.length, g i = ∏ (i ∈ Finset.univ), g i := by aesop + conv => + enter [1] + rw [@Finset.prod_ite] + simp [-one_hot] + rw [@Finset.prod_ite] + simp [-one_hot] + simp_all only [finCongr_apply, implies_true, List.getElem_ofFn, Fin.eta, decide_True] + have hblen : b.length = n := by aesop + have h5 (k : T): Finset.filter (fun x => query k = Fin.cast (by aesop) x) (Finset.univ : Finset (Fin (one_hot n query u).length)) = {finCongr (by aesop) (query k)} := by aesop + have h6: (Finset.filter (fun x => query u = Fin.cast (by aesop) x) (Finset.filter (fun x => ¬query v = Fin.cast (by aesop) x) (Finset.univ : Finset (Fin (one_hot n query u).length)))).card = 1 := by + rw [@Finset.card_eq_one] + use (finCongr (by aesop) (query u)) + aesop + have h8: ∏ x ∈ Finset.filter (fun x => query v = Fin.cast (by aesop) x) (Finset.univ : Finset (Fin (one_hot n query u).length)), + f (one_hot n query v)[(query v).val] (b[(query v).val]'(by aesop)) / f (one_hot n query u)[(query v).val] (b[(query v).val]'(by aesop)) + = f (one_hot n query v)[(query v).val] (b[(query v).val]'(by aesop)) / f (one_hot n query u)[(query v).val] (b[(query v).val]'(by aesop)) := by + subst hblen + simp_all only [List.getElem_ofFn, Fin.eta, Finset.prod_const] + conv => + enter [1, 2] + simp + simp + have h9: ∏ x ∈ Finset.filter (fun x => query u = Fin.cast (by aesop) x) (Finset.filter (fun x => ¬query v = Fin.cast (by aesop) x) Finset.univ : Finset (Fin (one_hot n query u).length)), + f (one_hot n query v)[(query u).val] (b[(query u).val]'(by + simp_all only [List.getElem_ofFn, Fin.eta, decide_True, decide_False, Fin.is_lt])) / f (one_hot n query u)[(query u).val] (b[(query u).val]'(by + simp_all only [one_hot, List.getElem_ofFn, Fin.eta, decide_True, decide_False, Fin.is_lt])) + = f (one_hot n query v)[(query u).val] (b[(query u).val]'(by aesop)) / f (one_hot n query u)[(query u).val] (b[(query u).val]'(by aesop)) := by + simp_all only [List.getElem_ofFn, Fin.eta, decide_True, Finset.prod_const] + simp + rw [h8] + rw [h9] + rw [@mul_div] + +/- Cancellation of terms for the single-user DP proof by using the above two lemmas.-/ +lemma single_DP_reduction {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (v u : T) (b : List Bool) (h_users: query u ≠ query v) + (ohu_len : (one_hot n query u).length = b.length) (onhv_len : (one_hot n query v).length = b.length): +∏ i : Fin (one_hot n query u).length, RRSinglePushForward num den h (one_hot n query v)[i.val] b[i.val] / RRSinglePushForward num den h (one_hot n query u)[i.val] b[i.val] + = RRSinglePushForward num den h (one_hot n query v)[(query v).val] (b[(query v).val]'(by aesop)) / RRSinglePushForward num den h (one_hot n query u)[(query v).val] (b[(query v).val]'(by aesop)) + * RRSinglePushForward num den h (one_hot n query v)[(query u).val] (b[(query u).val]'(by aesop)) / RRSinglePushForward num den h (one_hot n query u)[(query u).val] (b[(query u).val]'(by aesop)) + := by + conv => + enter [1, 2, i] + rw [reduction_helper1 n query num den h v u b ohu_len onhv_len h_users i] + rw [reduction_helper2 _ _ _ _ _ _ h_users] + simp_all only [mul_one] + exact onhv_len + +/- This gives the RAPPOR DP bound when the algorithm is applied to a single user. -/ +lemma RAPPORSingle_DP {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (v u : T) (b : List Bool): + (RAPPORSingleSample n query num den h v b) / (RAPPORSingleSample n query num den h u b) ≤ ((2⁻¹ + num / den) / (2⁻¹ - num / den))^2 := by + simp_all only [RAPPORSingleSample] + set ohv := one_hot n query v + set ohu := one_hot n query u + have oh_len: ohu.length = ohv.length := by simp[ohv, ohu] + cases hlen: ohv.length == b.length with + | true => + simp at hlen + cases h_eq: query v == query u with + | true => simp at h_eq + have same_answer: ohv = ohu := one_hot_same_answer n query v u h_eq + rw [same_answer] + rw [@ENNReal.div_self] + {apply arith_1 _ _ h} + { + apply RRSamplePushForward_non_zero + rw[←hlen] + exact oh_len + } + { apply RRSamplePushForward_finite + } + | false => + simp at h_eq + simp_all only [RRSamplePushForward] + rw [prod_of_ind_prob _ _ _ _ hlen] + rw [prod_of_ind_prob _ _ _ _ oh_len] + simp_all [@tprod_fintype] + have len_eq: ohu.length = ohv.length := by aesop + have index_1: ∏ i : Fin ohv.length, RRSinglePushForward num den h ohv[i.val] b[i.val] = + ∏ i : Fin ohu.length, RRSinglePushForward num den h ohv[i.val] b[i.val] := by + apply reindex + symm at len_eq + exact len_eq + exact hlen + rw [index_1] + rw [prod_over_prod] + simp_all only [ohv, ohu] + rw [single_DP_reduction n query num den h v u b (by aesop) oh_len hlen] + rw [@mul_div_assoc] + rw [@sq] + have exp_eq : ((2:ENNReal)⁻¹ + num / den) / (2⁻¹ - num / den) = (↑(NNReal.ofPNat den) + 2 * ↑num) / (↑(NNReal.ofPNat den) - 2 * ↑num) := by + apply ENNRealLemmas.exp_change_form + exact h + simp at exp_eq + rw [exp_eq] + apply ENNRealLemmas.le_double + apply RRSamplePushForward_final_bound + apply RRSamplePushForward_final_bound + intro i + apply RRSinglePushForward_non_zero + intro i + apply RRSinglePushForward_finite + | false => + simp at hlen + have h1: RRSamplePushForward num den h ohv b = 0 := RAPPORSingleSample_diff_lengths n query num den h v b hlen + rw [h1] + rw [@ENNReal.zero_div] + simp + +/- Instantiation of RAPPOR as a local randomizer -/ +def RAPPORSingle_Local (n : Nat) (query : T → Fin n) (num: Nat) (den : PNat) (h: 2 * num < den): LocalMechanism T (List Bool) := + fun l => ⟨RAPPORSingleSample n query num den h l, RAPPORSingleSample_PMF_helper query num den h l⟩ + +/- Proof of Local DP for RAPPOR, using the single-DP bound above. -/ +lemma RAPPOR_Local_DP {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den): + Local_DP (RAPPORSingle_Local n query num den h) (2 * Real.log ((2⁻¹ + num / den) / (2⁻¹ - num / den))) := by + rw [Local_DP] + intro u₁ u₂ y + simp [RAPPORSingle_Local] + calc + RAPPORSingleSample n query num den h u₁ y / + RAPPORSingleSample n query num den h u₂ y ≤ + ((2⁻¹ + num / den) / (2⁻¹ - num / den)) ^ 2 := RAPPORSingle_DP n query num den h _ _ _ + _ = ENNReal.ofReal (Real.exp (2 * Real.log ((2⁻¹ + num / den) / (2⁻¹ - num / den)))) := by rw[←arith_2 num den h] + +/- Proof of the differential privacy bound for our implementation of Basic One-Time RAPPOR, using + the lemma in "LocalToDataset."-/ +theorem RAPPORSample_DP {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (b : List Bool): + DP_withUpdateNeighbour (RAPPORSample_PMF n query num den h) (2 * Real.log ((2⁻¹ + num/den) / (2⁻¹ - num/den))) + := by + have h1: RAPPORSample_PMF n query num den h = local_to_dataset_PMF (RAPPORSingle_Local n query num den h) := rfl + rw [h1] + apply LocalDP_to_dataset _ _ + {intro l₁ l₂ b k bo i + apply Iff.intro + simp [RAPPORSingle_Local] + intro h0 + apply RAPPORSingleSample_diff_lengths + apply RAPPORSingleSample_zero_imp_diff_lengths at h0 + simp [one_hot, h0] + intro h0 + apply RAPPORSingleSample_diff_lengths + apply RAPPORSingleSample_zero_imp_diff_lengths at h0 + simp [←ne_eq] + apply h0 + } + { + intro k bo + exact PMF.apply_ne_top (RAPPORSingle_Local n query num den h k) bo + } + {apply RAPPOR_Local_DP n query num den h} + +end RAPPOR diff --git a/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Properties/PMFProof.lean b/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Properties/PMFProof.lean new file mode 100644 index 00000000..68e9e465 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/RAPPOR/Properties/PMFProof.lean @@ -0,0 +1,29 @@ +import SampCert.DifferentialPrivacy.Pure.Local.RAPPOR.Basic + +namespace RAPPOR + +open SLang +open RandomizedResponse +/- In this file, we show normalization for the One-Time Basic RAPPOR Algorithm. +-/ + +/- Normalization of the single-user RAPPOR, which essentially relies on the normalization property + of randomized response. -/ +lemma RAPPORSingleSample_PMF_helper [LawfulMonad SLang] {T : Type} (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (v : T) : + HasSum (RAPPORSingleSample n query num den h v) 1 := by + rw [RAPPORSingleSample] + apply RRSamplePushForward_PMF_helper + +/- Extension to the multi-user RAPPOR, which follows from our normalization lemma. -/ +lemma RAPPORSample_PMF_helper [LawfulMonad SLang] {T : Type} (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (v : List T) : + HasSum (RAPPORSample n query num den h v) 1 := by + rw [Summable.hasSum_iff ENNReal.summable] + unfold RAPPORSample + apply norm_func_norm_on_list + intro a + rw [← Summable.hasSum_iff ENNReal.summable] + apply RAPPORSingleSample_PMF_helper query num den h a + +/- Promotion of RAPPOR to a PMF-/ +def RAPPORSample_PMF [LawfulMonad SLang] {T : Type} (n : Nat) (query: T -> Fin n) (num : Nat) (den : PNat) (h: 2 * num < den) (v : List T) : PMF (List (List Bool)) := + ⟨RAPPORSample n query num den h v, RAPPORSample_PMF_helper query num den h v⟩ diff --git a/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Basic.lean b/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Basic.lean new file mode 100644 index 00000000..b77d5966 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Basic.lean @@ -0,0 +1,10 @@ +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Definitions +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Properties.Arithmetic +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Properties.PMFProof +import Mathlib.Probability.ProbabilityMassFunction.Basic +import SampCert.DifferentialPrivacy.Pure.DP +import SampCert.Samplers.Bernoulli.Properties +import SampCert.DifferentialPrivacy.Pure.Local.LawfulMonadSLang +import SampCert.DifferentialPrivacy.Pure.Local.LocalDP.DPwithUpdateNeighbour +import SampCert.DifferentialPrivacy.Pure.Local.ENNRealLemmasSuite +import SampCert.DifferentialPrivacy.Pure.Local.ProbabilityProduct diff --git a/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Definitions.lean b/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Definitions.lean new file mode 100644 index 00000000..103557a8 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Definitions.lean @@ -0,0 +1,33 @@ +import Mathlib.Probability.ProbabilityMassFunction.Basic +import SampCert + +namespace RandomizedResponse + +open SLang + +/- Arithmetic lemma for the next definition. -/ +lemma arith_0 (num : Nat) (den : PNat) (_ : 2 * num < den): den - 2*num ≤ 2 * den := by + simp_all only [tsub_le_iff_right] + linarith + +/- RRSinglePushForward performs the Randomized Response algorithm, but + associates each user with their private response. +-/ + def RRSinglePushForward (num : Nat) (den : PNat) (h: 2 * num < den) (l : Bool) : SLang Bool := do + let r ← SLang.BernoulliSample (den - 2*num) (2 * den) (arith_0 num den h) + return Bool.xor (l) r + +/- Single-user Randomized Response with a gven query. -/ +def RRSingleSample {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l : T) : SLang Bool := do + RRSinglePushForward num den h (query l) + +/- Extension of Randomized Response to a dataset by use of monadic map. -/ + +def RRSample {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l : List T) : SLang (List Bool) := do + l.mapM (fun x => RRSingleSample query num den h x) + +/- The next definition is used in RAPPOR. -/ +def RRSamplePushForward (num : Nat) (den : PNat) (h: 2 * num < den) (l : List Bool) : SLang (List Bool) := do + l.mapM (fun x => RRSinglePushForward num den h x) + +end RandomizedResponse diff --git a/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Properties/Arithmetic.lean b/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Properties/Arithmetic.lean new file mode 100644 index 00000000..983cc1f5 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Properties/Arithmetic.lean @@ -0,0 +1,96 @@ +import SampCert.Util.Log +import Mathlib.Topology.Basic +import Mathlib.Probability.ProbabilityMassFunction.Basic +import Mathlib.Data.Real.Basic +import Mathlib.Data.Complex.Exponential +import Mathlib.Analysis.SpecialFunctions.Exp +import Mathlib.Analysis.SpecialFunctions.Log.Basic + +/- We prove the arithmetic bounds necessary for the proof of DP for Randomized Response. -/ + +lemma numerator_pos (num : ℕ) (den : ℕ+) : (0 : ℝ) < ↑↑den + 2 * num := by + have den_pos : 0 < (den : ℕ) := den.property + have den_real_pos : (0 : ℝ) < ↑(den : ℕ) := Nat.cast_pos.mpr den_pos + have two_num_nonneg : 0 ≤ (2 * num : ℝ) := by + refine mul_nonneg ?_ (Nat.cast_nonneg num) + norm_num + exact add_pos_of_pos_of_nonneg den_real_pos two_num_nonneg + +lemma denominator_pos (num : ℕ) (den : PNat) (h : 2 * num < den) : (0 : ℝ) < ↑↑den - 2 * ↑num := by + simp_all only [NNReal.ofPNat, Nonneg.mk_natCast, sub_pos] + norm_cast + exact Nat.cast_lt.mpr h + + +lemma step1 (num : Nat) (den : PNat) (h : 2 * num < den): +ENNReal.ofReal ((den + 2 * num) / (den - 2 * num)) = ENNReal.ofReal (Real.exp (Real.log ((den + 2 * num) / (den - 2 * num)))) := by + congr + rw [eq_comm] + apply Real.exp_log + apply div_pos + {exact numerator_pos num den} + {exact denominator_pos num den h} + +lemma ennreal_of_nat (n: Nat) : ↑n = ENNReal.ofReal (↑ n) := by simp_all only [ENNReal.ofReal_natCast] + +lemma ennreal_of_pnat (d : PNat) : ↑↑d = ENNReal.ofReal (↑↑d) := by simp_all only [NNReal.ofPNat, Nonneg.mk_natCast, + ENNReal.ofReal_coe_nnreal] + + +lemma step2 (num : Nat) (den : PNat) (h : 2 * num < den): + (↑↑den + 2 * ↑num) / (↑↑den - 2 * ↑num) = ENNReal.ofReal ((↑↑den + 2 * ↑num) / (↑↑den - 2 * ↑num)) := by + rw [ENNReal.ofReal_div_of_pos] + rw [ennreal_of_nat] + rw [ennreal_of_pnat] + have h1 : 2 = ENNReal.ofReal (2) := by simp + rw [h1] + rw [← ENNReal.ofReal_mul] + rw [← ENNReal.ofReal_add] + rw [← ENNReal.ofReal_sub] + simp + simp + simp + simp + simp_all only [sub_pos] + norm_cast + rw [Mathlib.Tactic.Zify.natCast_lt] at h + simp_all only [Nat.cast_mul, Nat.cast_ofNat, NNReal.ofPNat, Nonneg.mk_natCast] + norm_cast + norm_cast at h + +lemma final_step_combined (num : Nat) (den : PNat) (h : 2 * num < den) : +(den + (2: ENNReal) * num) / (den - (2 : ENNReal) * num) = ENNReal.ofReal (Real.exp (Real.log ((den + 2 * num) / (den - 2 * num)))) := by + rw [← step1 num den h] + exact step2 num den h + +lemma reduce (num : Nat) (den : PNat): +((↑(NNReal.ofPNat den) + 2 * ↑num) / (↑(NNReal.ofPNat den) - 2 * ↑num): ℝ) = (1 / 2 + ↑num / ↑(NNReal.ofPNat den)) / (1 / 2 - ↑num / ↑(NNReal.ofPNat den)) := by + simp + rw [inv_eq_one_div] + rw [div_add_div] + · rw [one_mul] + rw [div_sub_div] + rw [one_mul] + rw [div_div_div_eq] + nth_rewrite 2 [div_eq_mul_inv] + nth_rewrite 4 [mul_comm] + rw [← div_eq_mul_inv] + rw [mul_div_mul_left] + apply ne_of_gt + simp_all only [Nat.ofNat_pos, mul_pos_iff_of_pos_left, NNReal.coe_pos, Nat.cast_pos] + exact den.2 + aesop + apply ne_of_gt + simp_all only [NNReal.coe_pos, Nat.cast_pos] + exact den.2 + · aesop + · apply ne_of_gt + simp_all only [NNReal.coe_pos, Nat.cast_pos] + exact den.2 + + +lemma final_coercion (num : Nat) (den : PNat) (h : 2 * num < den): +(↑(NNReal.ofPNat den) + 2 * ↑num) / (↑(NNReal.ofPNat den) - 2 * ↑num) ≤ + ENNReal.ofReal (Real.exp (Real.log ((1 / 2 + ↑num / ↑(NNReal.ofPNat den)) / (1 / 2 - ↑num / ↑(NNReal.ofPNat den))))):= by + rw [final_step_combined num den h] + rw [reduce num den] diff --git a/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Properties/BasicLemmas.lean b/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Properties/BasicLemmas.lean new file mode 100644 index 00000000..9489559c --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Properties/BasicLemmas.lean @@ -0,0 +1,293 @@ +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Basic + +namespace RandomizedResponse + +open SLang +open ENNRealLemmas + +/- Basic facts about Randomized Response, e.g., its distribution and finiteness. + We then use these facts to prove the final bound for the proof of DP. +-/ + + /- RRSinglePushForward is like RRSingleSample, but with "query" taken to be the identity map-/ +lemma RRSingleSample_is_RRSinglePushForward (num : Nat) (den : PNat) (h: 2 * num < den) (l : Bool): + RRSingleSample (fun x => x) num den h l = RRSinglePushForward num den h l := by + simp [RRSingleSample, RRSinglePushForward] + + /- RRSamplePushForward is like RRSample, but with "query" taken to be the identity map -/ +lemma RRSample_is_RRSamplePushForward (num : Nat) (den : PNat) (h: 2 * num < den) (l : List Bool): + RRSample (fun x => x) num den h l = RRSamplePushForward num den h l := by + simp [RRSample, RRSamplePushForward, -mapM] + rfl + +/- Probability of a person with private answer "true" giving randomized response "true."-/ +lemma RRSingleSample_true_true {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l : T) (hq : query l = true): + RRSingleSample query num den h l true = (den + 2 * num) / (2 * den) := by + rw[RRSingleSample, RRSinglePushForward] + simp_all only [bind, pure, Bool.true_bne, bind_apply, BernoulliSample_apply, + pure_apply, Bool.true_eq, Bool.not_eq_true', mul_ite, + Bool.false_eq_true, reduceIte, mul_one, mul_zero, tsum_ite_eq] + simp + rw [ENNReal.sub_eq_of_add_eq] + simp + rw [@ENNReal.div_eq_top] + rw [Not] + intro A + rcases A with ⟨_,hb⟩ + simp at hb + rename_i h_1 + simp_all only [ENNReal.sub_eq_top_iff, ENNReal.natCast_ne_top, ne_eq, false_and] + have h_le : (2:ENNReal) *num ≤ den.val := by + rw [@Nat.lt_iff_le_and_ne] at h + rcases h with ⟨hl,_⟩ + exact mod_cast hl + have two_num_fin : (2:ENNReal)* num ≠ ⊤:= by + simp + rw [Not] + intro B + norm_cast + have hh : 1 = (den.val + (2:ENNReal) * num)/(2 *den) + (den-2*num)/(2*den):= by + simp + rw [@ENNReal.div_add_div_same] + rw [add_comm] + conv => + enter [2,1,2] + rw [add_comm] + rw [← add_assoc] + rw [sub_add_cancel_ennreal] + have den_den : 1 = ((den.val :ENNReal) + den.val)/(2*(den.val:ENNReal)) := by + rw[two_mul] + rw [ENNReal.div_self] + simp + simp + norm_cast + rw [@ENNReal.le_coe_iff] + simp_all only [ne_eq, Nat.cast_mul, Nat.cast_ofNat] + apply h_le + simp_all only [WithTop.coe_natCast, Nat.cast_inj] + apply Eq.refl + exact two_num_fin + symm + exact hh + +/- Probability of a person with private answer "true" giving randomized response "false."-/ +lemma RRSingleSample_true_false {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l : T) (hq : query l = true): + RRSingleSample query num den h l false = (den - 2 * num) / (2 * den) := by + rw[RRSingleSample, RRSinglePushForward] + simp_all only [bind, pure, Bool.true_bne, bind_apply, BernoulliSample_apply, ENNReal.natCast_sub, Nat.cast_mul, + Nat.cast_ofNat, PNat.mul_coe, PNat.val_ofNat, pure_apply, Bool.false_eq, Bool.not_eq_false', mul_ite, ↓reduceIte, + mul_one, mul_zero, tsum_ite_eq, NNReal.ofPNat, Nonneg.mk_natCast] + apply Eq.refl + +/- Probability of a person with private answer "false" giving randomized response "true."-/ +lemma RRSingleSample_false_true {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l : T) (hq : query l = false): + RRSingleSample query num den h l true = (den - 2 * num) / (2 * den) := by + rw[RRSingleSample, RRSinglePushForward] + simp_all only [bind, pure, Bool.false_bne, bind_apply, BernoulliSample_apply, ENNReal.natCast_sub, Nat.cast_mul, + Nat.cast_ofNat, PNat.mul_coe, PNat.val_ofNat, pure_apply, Bool.true_eq, Bool.not_eq_true', mul_ite, + Bool.false_eq_true, ↓reduceIte, mul_one, mul_zero, tsum_ite_eq, NNReal.ofPNat, Nonneg.mk_natCast] + apply Eq.refl + +/- Probability of a person with private answer "false" giving randomized response "false."-/ +lemma RRSingleSample_false_false {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l : T) (hq : query l = false): + RRSingleSample query num den h l false = (den + 2 * num) / (2 * den) := by + rw[RRSingleSample, RRSinglePushForward] + simp_all only [bind, pure, Bool.false_bne, bind_apply, BernoulliSample_apply, ENNReal.natCast_sub, Nat.cast_mul, + Nat.cast_ofNat, PNat.mul_coe, PNat.val_ofNat, pure_apply, Bool.false_eq, mul_ite, Bool.false_eq_true, ↓reduceIte, + mul_one, mul_zero, tsum_ite_eq, NNReal.ofPNat, Nonneg.mk_natCast] + rw [ENNReal.sub_eq_of_add_eq] + simp + rw [@ENNReal.div_eq_top] + rw [Not] + intro A + rcases A with ⟨_,hb⟩ + simp at hb + rename_i h_1 + simp_all only [ENNReal.sub_eq_top_iff, ENNReal.natCast_ne_top, ne_eq, false_and] + have h_le : (2:ENNReal) *num ≤ den.val := by + rw [@Nat.lt_iff_le_and_ne] at h + rcases h with ⟨hl,_⟩ + exact mod_cast hl + have two_num_fin : (2:ENNReal)* num ≠ ⊤:= by + simp + rw [Not] + intro B + norm_cast + have hh : 1 = (den.val + (2:ENNReal) * num)/(2 *den) + (den-2*num)/(2*den):= by + simp + rw [@ENNReal.div_add_div_same] + rw [add_comm] + conv => + enter [2,1,2] + rw [add_comm] + rw [← add_assoc] + rw [sub_add_cancel_ennreal] + have den_den : 1 = ((den.val :ENNReal) + den.val)/(2*(den.val:ENNReal)) := by + rw[two_mul] + rw [ENNReal.div_self] + simp + simp + norm_cast + rw [@ENNReal.le_coe_iff] + simp_all only [ne_eq, Nat.cast_mul, Nat.cast_ofNat] + apply h_le + simp_all only [WithTop.coe_natCast, Nat.cast_inj] + apply Eq.refl + exact two_num_fin + symm + exact hh + +/- RRSinglePushForward always outputs non-zero probabilities. -/ +lemma RRSinglePushForward_non_zero {T : Type} (query : T -> Bool) (num : Nat) (den : PNat) (h : 2 * num < den) (l : T) (b : Bool): + RRSinglePushForward num den h (query l) b ≠ 0 := by + simp [RRSinglePushForward] + cases hb : b == query l with + | true => simp at hb + subst hb + simp + rw [@tsub_eq_zero_iff_le] + rw [@Mathlib.Tactic.PushNeg.not_le_eq] + apply quot_lt_one_rev + norm_cast + rw [PNat.mul_coe] + simp_all only [PNat.val_ofNat] + have hh : den.val - 2*num ≤ den.val:= by simp + have gg : den.val < 2 *den.val := by simp + rw [@Nat.le_iff_lt_or_eq] at hh + cases hh with + | inl hl => + apply LT.lt.trans hl gg + | inr hr => + rw [hr] + simp + | false => simp at hb + rw [← Bool.eq_not_of_ne hb] + intro + apply And.intro + trivial + apply And.intro + {norm_cast + rw [@Nat.sub_eq_zero_iff_le] + linarith + } + {exact ne_of_beq_false rfl} + +/- RRSingleSample always outputs non-zero probabilities. -/ +lemma RRSingleSample_non_zero {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l : T) (b : Bool): + RRSingleSample query num den h l b ≠ 0 := by + rw [RRSingleSample] + apply RRSinglePushForward_non_zero + +/- RRSinglePushForward always outputs finite probabilities. -/ +lemma RRSingleSample_finite {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l : T) (b : Bool): + RRSingleSample query num den h l b ≠ ⊤ := by + have hden: ↑(NNReal.ofPNat den) ≠ (0 : ENNReal) := by + rw [@ne_iff_lt_or_gt] + apply Or.inr + simp_all only [NNReal.ofPNat, Nonneg.mk_natCast, gt_iff_lt, ENNReal.coe_pos, Nat.cast_pos] + apply den.2 + cases hb: b with + | true => cases hq: query l with + | true => rw [RRSingleSample_true_true _ _ _ _ _ hq] + apply div_ne_top + exact Ne.symm (ne_of_beq_false rfl) + refine mult_ne_zero 2 ↑(NNReal.ofPNat den) ?true.true.h2.h1 ?true.true.h2.h2 + aesop + exact hden + | false => rw [RRSingleSample_false_true _ _ _ _ _ hq] + apply div_ne_top + aesop + refine mult_ne_zero 2 ↑(NNReal.ofPNat den) ?true.false.h2.h1 ?true.false.h2.h2 + aesop + exact hden + | false => cases hq: query l with + | true => rw [RRSingleSample_true_false _ _ _ _ _ hq] + apply div_ne_top + aesop + refine mult_ne_zero 2 ↑(NNReal.ofPNat den) ?false.true.h2.h1 ?false.true.h2.h2 + aesop + exact hden + | false => rw [RRSingleSample_false_false _ _ _ _ _ hq] + apply div_ne_top + rw [@ENNReal.add_ne_top] + apply And.intro + aesop + exact Ne.symm (ne_of_beq_false rfl) + refine mult_ne_zero 2 ↑(NNReal.ofPNat den) ?false.false.h2.h1 ?false.false.h2.h2 + aesop + exact hden + +/- RRSinglePushForward always outputs finite probabilities. + Given what was already proved, the simplest way to prove the next lemma + is to note that RRSinglePushForward and RRSample with the identity query are the same -/ +lemma RRSinglePushForward_finite (num : Nat) (den : PNat) (h: 2 * num < den) (l : Bool) (b : Bool): + RRSinglePushForward num den h l b ≠ ⊤ := by + rw [←RRSingleSample_is_RRSinglePushForward] + apply RRSingleSample_finite + +/- The next lemma is helpful for the DP Proof. -/ +lemma RRSinglePushForward_div_finite (num : Nat) (den : PNat) (h: 2 * num < den) (l₁ l₂ : Bool) (b : Bool): + RRSinglePushForward num den h l₁ b / RRSinglePushForward num den h l₂ b ≠ ⊤ := by + simp + rw [Not] + intro h1 + rw [@ENNReal.div_eq_top] at h1 + cases h1 with + | inl hl => + apply And.right at hl + have hcontr : RRSinglePushForward num den h l₂ b ≠ 0 := by apply RRSinglePushForward_non_zero (fun x : Bool => x) + contradiction + | inr hr => + apply And.left at hr + have hcontr: RRSinglePushForward num den h l₁ b ≠ ⊤ := by apply RRSinglePushForward_finite + contradiction + +/- The corresponding lemmas showing that RRPushForward is non-zero and finite are in a different file, + since we need our prod_of_ind_prob lemma for them. +-/ + +/- RRSamplePushForward assigns a zero probability of transition to a list of different length. -/ +lemma RRSamplePushForward_diff_lengths (num : Nat) (den : PNat) (h: 2 * num < den) (l₁ : List Bool) (l₂ : List Bool) (hlen : l₁.length ≠ l₂.length): + RRSamplePushForward num den h l₁ l₂ = 0 := by + induction l₁ generalizing l₂ with + | nil => simp [RRSamplePushForward, -mapM] + aesop + | cons hd tl ih => + simp [RRSamplePushForward, -mapM] + simp [RRSamplePushForward, -mapM] at ih + apply And.intro + apply Or.inr + intro b + intro a + subst a + simp_all only [mapM, List.length_cons, ne_eq, add_left_inj, not_false_eq_true] + apply Or.inr + intro b + intro a + subst a + simp_all only [mapM, List.length_cons, ne_eq, add_left_inj, not_false_eq_true] + +/- RRSample assigns a zero probability of transition to a list of different length. -/ +lemma RRSample_diff_lengths {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l₁ : List T) (l₂ : List Bool) (hlen : l₁.length ≠ l₂.length): + RRSample query num den h l₁ l₂= 0 := by + induction l₁ generalizing l₂ with + | nil => simp [RRSample, -mapM] + aesop + | cons hd tl ih => + simp [RRSample, -mapM] + simp [RRSample, -mapM] at ih + apply And.intro + apply Or.inr + intro b + intro a + subst a + simp_all only [mapM, List.length_cons, ne_eq, add_left_inj, not_false_eq_true] + apply Or.inr + intro b + intro a + subst a + simp_all only [mapM, List.length_cons, ne_eq, add_left_inj, not_false_eq_true] + +/- Applies the above to the PMF instantiation of RRSample. -/ +lemma RRSamplePMF_diff_lengths {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h : 2 * num < den) (l₁ : List T) (l₂ : List Bool) (hlen : l₁.length ≠ l₂.length): + RRSample_PMF query num den h l₁ l₂ = 0 := RRSample_diff_lengths query num den h l₁ l₂ hlen diff --git a/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Properties/DPProof.lean b/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Properties/DPProof.lean new file mode 100644 index 00000000..6989e7ce --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Properties/DPProof.lean @@ -0,0 +1,483 @@ +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Basic +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Properties.BasicLemmas +import SampCert.DifferentialPrivacy.Pure.Local.LocalDP.LocalToDataset + +open SLang +open ENNRealLemmas +open RandomizedResponse + +namespace SLang +/- We prove the ln ((1/2 + λ)/(1/2 - λ)) differential privacy bound for our implementation of + randomized response. + We first prove the bound for the single-user (local randomizer) version, + and then apply the lemma in "LocalToDataset" to extend the result to a dataset.-/ + +/- Final arithmetic bound for the DP proof, by considering eight possible cases. -/ +lemma final_bound (query : T -> Bool) (num : Nat) (den : PNat) (h : 2 * num < den) (a a' : T) (b : Bool): + RRSingleSample query num den h a b / RRSingleSample query num den h a' b + ≤ (den + 2 * num) / (den - 2 * num) := by + cases b with + | true => + cases hqa : query a with + | true => + rw [RRSingleSample_true_true _ _ _ _ _ hqa] + cases hqa' : query a' with + | true => rw [RRSingleSample_true_true _ _ _ _ _ hqa'] + rw[ENNReal.div_self] + {rw [@Decidable.le_iff_lt_or_eq] + cases hnum : num == 0 with + | true => simp at hnum + apply Or.inr + subst hnum + simp + rw [ENNReal.div_self] + norm_num + apply pnat_zero_imp_false + simp + | false => simp at hnum + apply Or.inl + apply quot_gt_one_rev + simp + have h1: 0 < (2 : ENNReal) * num + 2 * num := by + simp + rw [@Nat.pos_iff_ne_zero] + simp + exact hnum + have h2: den < den + (2 : ENNReal) * num + 2 * num := by + simp + rw [add_assoc] + apply ENNReal.lt_add_right + simp + norm_num + exact hnum + + simp_all only [pos_add_self_iff, CanonicallyOrderedCommSemiring.mul_pos, Nat.ofNat_pos, + Nat.cast_pos, true_and, NNReal.ofPNat, Nonneg.mk_natCast, gt_iff_lt] + apply ENNReal.sub_lt_of_lt_add + rw [@Decidable.le_iff_eq_or_lt] + right + rw [← ENNReal.coe_two] + exact_mod_cast h + + simp_all only} + {rw [@ENNReal.div_ne_zero] + apply And.intro + simp + intro hd + apply False.elim + apply pnat_zero_imp_false den hd + simp + rw[Not] + intro b + norm_cast + } + { simp + rw [@ENNReal.div_eq_top] + rw[Not] + intro b + rcases b with ⟨_, br⟩ + have hh : ¬ den.val = 0 := by simp + simp at br + rw [Not] at hh + apply hh at br + exact br + rename_i h_1 + simp_all only [ENNReal.add_eq_top, ENNReal.coe_ne_top, false_or, ne_eq] + obtain ⟨left⟩ := h_1 + norm_cast + } + | false => rw [RRSingleSample_false_true _ _ _ _ _ hqa'] + simp_all only [NNReal.ofPNat, Nonneg.mk_natCast] + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [ENNReal.mul_inv] + simp + rw [mul_assoc] + rw [mul_comm] + rw [mul_comm] + rw [← mul_assoc] + rw [← mul_assoc] + conv => + enter [1, 1] + rw [mul_comm] + rw [← mul_assoc] + enter [1] + rw [ENNReal.inv_mul_cancel] + rw [one_mul] + simp_all only [ne_eq, mul_eq_zero, OfNat.ofNat_ne_zero, ENNReal.coe_eq_zero, Nat.cast_eq_zero, + false_or] + apply pnat_zero_imp_false + simp_all only [ne_eq] + apply Aesop.BuiltinRules.not_intro + intro a_1 + norm_cast + left + simp + rw[Not] + intro b + norm_cast + left + simp + apply pnat_zero_imp_false + | false => + rw [RRSingleSample_false_true _ _ _ _ _ hqa] + cases hqa' : query a' with + | true => + rw [RRSingleSample_true_true _ _ _ _ _ hqa'] + simp_all only [NNReal.ofPNat, Nonneg.mk_natCast] + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [ENNReal.mul_inv] + simp + rw [mul_assoc] + rw [mul_comm] + rw [mul_comm] + rw [← mul_assoc] + rw [← mul_assoc] + conv => + enter [1, 1] + rw [mul_comm] + rw [← mul_assoc] + enter [1] + rw [ENNReal.inv_mul_cancel] + rw [one_mul] + rw [← @ENNReal.div_eq_inv_mul] + rw [← @ENNReal.div_eq_inv_mul] + apply ENNReal.div_le_div + simp_all only [tsub_le_iff_right] + rw [add_assoc] + have hh (a b : ENNReal) : a ≤ a + b := by simp + apply hh + simp_all only [tsub_le_iff_right] + rw [add_assoc] + have hh (a b : ENNReal) : a ≤ a + b := by simp + apply hh + simp + apply pnat_zero_imp_false + simp + rw [Not] + intro a + norm_cast + left + simp + rw[Not] + intro b + norm_cast + left + simp + apply pnat_zero_imp_false + + | false => rw [RRSingleSample_false_true _ _ _ _ _ hqa'] + rw[ENNReal.div_self] + { rw [@Decidable.le_iff_lt_or_eq] + cases hnum : num == 0 with + | true => simp at hnum + apply Or.inr + subst hnum + simp + rw [ENNReal.div_self] + norm_num + apply pnat_zero_imp_false + simp + | false => simp at hnum + apply Or.inl + apply quot_gt_one_rev + simp + have h1: 0 < (2 : ENNReal) * num + 2 * num := by + simp + rw [@Nat.pos_iff_ne_zero] + simp + exact hnum + have h2: den < den + (2 : ENNReal) * num + 2 * num := by + simp + rw [add_assoc] + apply ENNReal.lt_add_right + simp + norm_num + exact hnum + + simp_all only [pos_add_self_iff, CanonicallyOrderedCommSemiring.mul_pos, Nat.ofNat_pos, + Nat.cast_pos, true_and, NNReal.ofPNat, Nonneg.mk_natCast, gt_iff_lt] + apply ENNReal.sub_lt_of_lt_add + rw [@Decidable.le_iff_eq_or_lt] + right + rw [← ENNReal.coe_two] + exact_mod_cast h + + simp_all only + } + {simp + apply And.intro + rw [Not] + intro b + rw [@Nat.lt_iff_le_and_not_ge] at h + rw [@tsub_eq_zero_iff_le] at b + rcases h with ⟨hl, hr⟩ + rw [← ENNReal.coe_two] at b + have hh : den.val ≤ 2 * num := by + exact_mod_cast b + linarith + + rw [Not] + intro a_1 + norm_cast + } + { simp + rw [@ENNReal.div_eq_top] + rw[Not] + intro b + rcases b with ⟨_, br⟩ + have hh : ¬ den.val = 0 := by simp + simp at br + rw [Not] at hh + apply hh at br + exact br + rename_i h_1 + simp_all only [ENNReal.add_eq_top, ENNReal.coe_ne_top, false_or, ne_eq] + obtain ⟨left⟩ := h_1 + norm_cast + } + + | false => + cases hqa : query a with + | true => + rw [RRSingleSample_true_false _ _ _ _ _ hqa] + cases hqa' : query a' with + | true => rw [RRSingleSample_true_false _ _ _ _ _ hqa'] + simp + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [ENNReal.mul_inv] + simp + rw [← mul_assoc] + conv => + enter [1,1] + rw [mul_comm] + rw [← mul_assoc] + rw [ENNReal.inv_mul_cancel] + rw [one_mul] + rw [ENNReal.inv_mul_cancel] + rw [ENNReal.le_div_iff_mul_le] + rw [one_mul] + simp_all only [tsub_le_iff_right] + rw [add_assoc] + have hh (a b : ENNReal) : a ≤ a + b := by simp + apply hh + + right + simp_all only [ne_eq, add_eq_zero, ENNReal.coe_eq_zero, Nat.cast_eq_zero, mul_eq_zero, + OfNat.ofNat_ne_zero, false_or, not_and] + intro a_1 + apply Aesop.BuiltinRules.not_intro + intro a_2 + subst a_2 + simp_all only [mul_zero, PNat.pos] + have h' : ¬ (den : Nat) = 0:= PNat.ne_zero den + contradiction + + left + norm_num + + simp + rw [Not] + intro b + rw [@Nat.lt_iff_le_and_not_ge] at h + rw [@tsub_eq_zero_iff_le] at b + rcases h with ⟨hl, hr⟩ + rw [← ENNReal.coe_two] at b + have hh : den.val ≤ 2 * num := by + exact_mod_cast b + linarith + norm_num + simp + apply pnat_zero_imp_false + simp + rw[Not] + intro b + norm_cast + left + simp + rw[Not] + intro b + norm_cast + left + simp + apply pnat_zero_imp_false + | false => rw [RRSingleSample_false_false _ _ _ _ _ hqa'] + simp + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [ENNReal.mul_inv] + simp + rw [mul_assoc] + rw [mul_comm] + rw [mul_comm] + rw [← mul_assoc] + rw [← mul_assoc] + conv => + enter [1, 1] + rw [mul_comm] + rw [← mul_assoc] + enter [1] + rw [ENNReal.inv_mul_cancel] + rw [one_mul] + rw [← @ENNReal.div_eq_inv_mul] + rw [← @ENNReal.div_eq_inv_mul] + apply ENNReal.div_le_div + simp_all only [tsub_le_iff_right] + rw [add_assoc] + have hh (a b : ENNReal) : a ≤ a + b := by simp + apply hh + simp_all only [tsub_le_iff_right] + rw [add_assoc] + have hh (a b : ENNReal) : a ≤ a + b := by simp + apply hh + simp + apply pnat_zero_imp_false + simp + rw [Not] + intro a + norm_cast + left + simp + rw[Not] + intro b + norm_cast + left + simp + apply pnat_zero_imp_false + | false => + rw [RRSingleSample_false_false _ _ _ _ _ hqa] + cases hqa' : query a' with + | true => rw [RRSingleSample_true_false _ _ _ _ _ hqa'] + simp + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [ENNReal.mul_inv] + simp + rw [mul_assoc] + rw [mul_comm] + rw [mul_comm] + rw [← mul_assoc] + rw [← mul_assoc] + conv => + enter [1, 1] + rw [mul_comm] + rw [← mul_assoc] + enter [1] + rw [ENNReal.inv_mul_cancel] + rw [one_mul] + simp + apply pnat_zero_imp_false + simp + rw [Not] + intro b + norm_cast + left + simp + rw[Not] + intro b + norm_cast + left + simp + apply pnat_zero_imp_false + | false => rw [RRSingleSample_false_false _ _ _ _ _ hqa'] + simp + rw [@ENNReal.div_eq_inv_mul] + rw [@ENNReal.div_eq_inv_mul] + rw [ENNReal.mul_inv] + simp + rw [← mul_assoc] + conv => + enter [1,1] + rw [mul_comm] + rw [← mul_assoc] + rw [ENNReal.inv_mul_cancel] + rw [one_mul] + rw [ENNReal.inv_mul_cancel] + rw [ENNReal.le_div_iff_mul_le] + rw [one_mul] + simp_all only [tsub_le_iff_right] + rw [add_assoc] + have hh (a b : ENNReal) : a ≤ a + b := by simp + apply hh + right + simp_all only [ne_eq, add_eq_zero, ENNReal.coe_eq_zero, Nat.cast_eq_zero, mul_eq_zero, + OfNat.ofNat_ne_zero, false_or, not_and] + intro a_1 + apply Aesop.BuiltinRules.not_intro + intro a_2 + subst a_2 + simp_all only [mul_zero, PNat.pos] + have h' : ¬ (den : Nat) = 0:= PNat.ne_zero den + contradiction + left + norm_num + simp + intro b + rw [Not] + intro + have h' : ¬ (den : Nat) = 0:= PNat.ne_zero den + contradiction + simp + rw[Not] + intro b + norm_cast + simp + apply pnat_zero_imp_false + simp + rw[Not] + intro b + norm_cast + left + simp + rw[Not] + intro b + norm_cast + left + simp + apply pnat_zero_imp_false + +/- Instantiation of RRSingleSample as a local randomizer -/ +def RRSingle_Local (query : T → Bool) (num: Nat) (den : PNat) (h: 2 * num < den): LocalMechanism T Bool := + fun l => ⟨RRSingleSample query num den h l, RRSingleSample_PMF_helper query num den h l⟩ + +/- Using the "final_bound" lemma above to show that RRSingle_Local is locally differentially private. -/ +lemma RR_Local_DP (query : T → Bool) (num : Nat) (den : PNat) (h : 2 * num < den): Local_DP (RRSingle_Local query num den h) (Real.log ((↑(NNReal.ofPNat den) + 2 * ↑num) / (↑(NNReal.ofPNat den) - 2 * ↑num))) := by + rw [Local_DP] + intro u₁ u₂ y + simp [RRSingle_Local] + have h1: RRSingleSample query num den h u₁ y / RRSingleSample query num den h u₂ y ≤ ENNReal.ofReal (Real.exp (Real.log ((↑(NNReal.ofPNat den) + 2 * ↑num) / (↑(NNReal.ofPNat den) - 2 * ↑num)))) := by + calc + RRSingleSample query num den h u₁ y / RRSingleSample query num den h u₂ y ≤ (↑(NNReal.ofPNat den) + 2 * ↑num) / (↑(NNReal.ofPNat den) - 2 * ↑num) := final_bound query num den h u₁ u₂ y + _ = ENNReal.ofReal (Real.exp (Real.log ((↑(NNReal.ofPNat den) + 2 * ↑num) / (↑(NNReal.ofPNat den) - 2 * ↑num)))) := by rw [final_step_combined num den h] + apply h1 + +/- Proof that our implementation of randomized response is ln ((1/2 + λ)/(1/2 - λ))-differentially private. + The proof is by applying the lemma proven in "LocalToDataset" -/ +theorem RRSample_DP (query : T → Bool) (num : Nat) (den : PNat) (h : 2 * num < den): DP_withUpdateNeighbour (RRSample_PMF query num den h) (Real.log ((↑(NNReal.ofPNat den) + 2 * ↑num) / (↑(NNReal.ofPNat den) - 2 * ↑num))) := by + have h1: RRSample_PMF query num den h = local_to_dataset_PMF (RRSingle_Local query num den h) := rfl + rw [h1] + apply LocalDP_to_dataset _ _ + simp [RRSingle_Local] + intro l₁ l₂ h_upd b blen1 i + have hlen: l₂.length = b.length := by rw[←blen1]; apply Eq.symm (UpdateNeighbour_length h_upd) + apply Iff.intro + intro hx + have h_contr: RRSingleSample query num den h l₁[i.val] b[i.val] = 0 := by aesop + have h_contr2: RRSingleSample query num den h l₁[i.val] b[i.val] ≠ 0 := RRSingleSample_non_zero query num den h l₁[i.val] b[i.val] + contradiction + intro hx + have h_contr: RRSingleSample query num den h l₂[i.val] b[i.val] = 0 := by aesop + have h_contr2: RRSingleSample query num den h l₂[i.val] b[i.val] ≠ 0 := RRSingleSample_non_zero query num den h l₂[i.val] b[i.val] + contradiction + apply RRSingleSample_finite query num den h + apply RR_Local_DP diff --git a/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Properties/PMFProof.lean b/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Properties/PMFProof.lean new file mode 100644 index 00000000..b29daae8 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/RandomizedResponse/Properties/PMFProof.lean @@ -0,0 +1,58 @@ +import Mathlib.Probability.ProbabilityMassFunction.Basic +import SampCert +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Definitions +import SampCert.DifferentialPrivacy.Pure.Local.Normalization +import SampCert.Samplers.Bernoulli.Properties + +namespace RandomizedResponse +open SLang + +/- Instantiation of RRSinglePushForward as a PMF. -/ +lemma RRSinglePushForward_PMF (num : Nat) (den : PNat) (h: 2 * num < den) (l : Bool) : + HasSum (RRSinglePushForward num den h l) 1 := by + rw [Summable.hasSum_iff ENNReal.summable] + rw [@tsum_bool] + rw[RRSinglePushForward] + cases l + { + simp_all only [bind, pure, Bool.false_bne, SLang.bind_apply, ENNReal.natCast_sub, + Nat.cast_mul, Nat.cast_ofNat, PNat.mul_coe, PNat.val_ofNat, SLang.pure_apply, Bool.false_eq, mul_ite, + Bool.false_eq_true, ↓reduceIte, mul_one, mul_zero, tsum_ite_eq, Bool.true_eq] + rw[←SLang.BernoulliSample_normalizes (den - 2 * num) (2 * den) (arith_0 num den h)] + rw[tsum_bool] + } + { simp_all only [bind, pure, Bool.true_bne, SLang.bind_apply, ENNReal.natCast_sub, + Nat.cast_mul, Nat.cast_ofNat, PNat.mul_coe, PNat.val_ofNat, SLang.pure_apply, Bool.false_eq, Bool.not_eq_false', + mul_ite, ↓reduceIte, mul_one, mul_zero, tsum_ite_eq, Bool.true_eq, Bool.not_eq_true', Bool.false_eq_true] + rw[←SLang.BernoulliSample_normalizes (den - 2 * num) (2 * den) (arith_0 num den h)] + rw[tsum_bool] + rw [@AddCommMonoidWithOne.add_comm] + } + +/- The next lemmas lead towards the instantiation of RRSample as a PMF. -/ +lemma RRSingleSample_PMF_helper {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l : T) : + HasSum (RRSingleSample query num den h l) 1 := by + rw [RRSingleSample] + apply RRSinglePushForward_PMF + +lemma RRSample_PMF_helper [LawfulMonad SLang] {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l : List T) : + HasSum (RRSample query num den h l) 1 := by + rw [Summable.hasSum_iff ENNReal.summable] + unfold RRSample + apply norm_func_norm_on_list + intro a + rw [← Summable.hasSum_iff ENNReal.summable] + apply RRSingleSample_PMF_helper + +lemma RRSamplePushForward_PMF_helper [LawfulMonad SLang] (num : Nat) (den : PNat) (h: 2 * num < den) (l : List Bool) : + HasSum (RRSamplePushForward num den h l) 1 := by + rw [Summable.hasSum_iff ENNReal.summable] + unfold RRSamplePushForward + apply norm_func_norm_on_list + intro a + rw [← Summable.hasSum_iff ENNReal.summable] + apply RRSinglePushForward_PMF + +/- Instantiation of RRSample as a PMF. -/ +def RRSample_PMF [LawfulMonad SLang] {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l : List T) : PMF (List Bool) := + ⟨RRSample query num den h l, RRSample_PMF_helper query num den h l⟩ diff --git a/SampCert/DifferentialPrivacy/Pure/Local/Reduction.lean b/SampCert/DifferentialPrivacy/Pure/Local/Reduction.lean new file mode 100644 index 00000000..72a6fe33 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/Local/Reduction.lean @@ -0,0 +1,160 @@ +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Basic + +/- Proof of the DP proof step that allows us to reduce from considering the dataset-based algorithm + to the considering just the local randomizer. + The proof is highly technical, but mathematically trivial. + -/ +lemma valid_index0 (l₁ : List T)(h1: l₁ = a++[n]++b) (i : Fin (l₁.length - 1)): (Fin.succAbove (a.length) i).val < l₁.length := by + have hl : l₁.length - 1 + 1 = l₁.length := by + rw [Nat.sub_add_cancel] + rw [h1] + simp + linarith + simp [Fin.succAbove] + split + simp [Fin.castSucc] + {calc + i.val < l₁.length - 1 := i.2 + _ < l₁.length := by aesop} + { + calc + i.succ.val = i.val + 1 := by simp + _ < l₁.length - 1 + 1 := by linarith[i.2] + _ = l₁.length := by rw [hl] + } + +lemma valid_index1 (l₁ l₂ : List T)(h1: l₁ = a++[n]++b) (h2: l₂ = a++[m]++b) (i : Fin ((l₁.length - 1))): (Fin.succAbove (a.length) i).val < l₂.length := by + have hl: l₁.length = l₂.length := by aesop + rw[←hl] + apply valid_index0 + exact h1 + +lemma mod_helper (a b: ℕ)(h1: b ≥ 1)(h2: a + unfold Fin.succAbove + have h' : i.castSucc < ↑a.length := by + rw [@Fin.castSucc_lt_iff_succ_le] + rw [@Fin.le_iff_val_le_val] + simp + rw[mod_helper (a.length) (l₁.length) (by rw[h1];simp;linarith) (by rw[h1]; simp)] + simp[Nat.succ_le_of_lt h] + + simp only[h'] + simp only [↓reduceIte, Fin.coe_castSucc,Fin.getElem_fin] + rw[List.getElem_append_left (a++[n]) b (by simp[h];linarith)] + rw[List.getElem_append_left a [n] h] + rw[List.getElem_append_left (a++[m]) b (by simp[h];linarith)] + rw[List.getElem_append_left] + + case neg h => + have iab: i.val - a.length < b.length := by + have ile : i < l₁.length - 1 := i.is_lt + simp[h1] at ile + rw[tsub_lt_iff_left] + exact ile + simp at h + exact h + unfold Fin.succAbove + have h' : ¬ i.castSucc < ↑a.length := by + simp at h + simp + rw [@Fin.le_castSucc_iff] + apply Nat.lt_succ_of_le + simp + rw[mod_helper (a.length) (l₁.length) (by rw[h1];simp;linarith) (by rw[h1]; simp)] + exact h + simp only[h'] + simp only [↓reduceIte, Fin.coe_castSucc,Fin.getElem_fin] + rw[List.getElem_append_right (a++[n]) b (by simp;linarith)] + rw[List.getElem_append_right (a++[m]) b (by simp;linarith)] + simp + simp + linarith + simp + linarith + + +lemma valid_index2 {l₁ : List T} (h1: l₁ = a++[n]++b) (i : Fin ((l₁.length - 1) + 1)): + i.val < l₁.length := by + have hl1: l₁.length - 1 + 1 = l₁.length := by + rw [Nat.sub_add_cancel] + rw[h1] + simp + linarith + exact Nat.lt_of_lt_of_eq i.2 hl1 + +lemma valid_index3 {β: Type}{l₁ : List T} {x : List β} (h1: l₁ = a++[n]++b) (hx: l₁.length = x.length) (i : Fin ((l₁.length - 1) + 1)): + i.val < x.length := by + rw[←hx] + apply valid_index2 h1 i + +lemma valid_index8 {β: Type}{l₁ : List T} {x : List β} (h1: l₁ = a++[n]++b) (hx: l₁.length = x.length) (i : Fin ((l₁.length - 1) + 1)): + i.val < x.length := by + rw[←hx] + apply valid_index2 h1 i + +lemma reduction2 {β: Type}(l₁ l₂: List T)(x: List β)(f: T → SLang β)(h1: l₁ = a++[n]++b)(h2: l₂ = a++[m]++b)(hx: l₁.length = x.length)(hy: l₂.length = x.length) + (nonzero: ∀ (i : Fin (l₂.length - 1)), f (l₂[↑((@Nat.cast (Fin (l₂.length - 1 + 1)) Fin.instNatCast a.length).succAbove i)]'(by apply valid_index8 h2; aesop)) (x[↑((@Nat.cast (Fin (l₂.length - 1 + 1)) Fin.instNatCast a.length).succAbove i)]'(by apply valid_index8 h2; aesop)) ≠ 0) + (noninf: ∀(k: T) (bo: β), f k bo ≠ ⊤):(∏' (i : Fin ((l₁.length-1)+1)), f (l₁[i.val]'(valid_index2 h1 i)) (x[i.val]'(valid_index3 h1 hx i))) / + (∏' (i : Fin ((l₂.length-1)+1)), f (l₂[i.val]'(valid_index2 h2 i)) (x[i.val]'(valid_index3 h2 hy i))) = f (l₁[(a.length)]'(by rw[h1]; simp)) (x[a.length]'(by rw[← hx]; rw[h1]; simp)) / f (l₂[a.length]'(by rw[h2];simp)) (x[a.length]'(by rw[← hx]; rw[h1]; simp)) := by + rw[tprod_fintype] + rw[tprod_fintype] + rw[Fin.prod_univ_succAbove (fun (b: Fin ((l₁.length-1)+1)) => f (l₁[b.val]'(valid_index2 h1 b)) (x[b.val]'(valid_index3 h1 hx b))) a.length] + + have ind: a.length < x.length := by + rw[← hx] + rw[h1] + simp + rw[Fin.prod_univ_succAbove (fun (b: Fin ((l₂.length-1)+1)) => f (l₂[b.val]'(valid_index2 h2 b)) (x[b.val]'(valid_index3 h2 hy b))) a.length] + have helper: l₁.length - 1 = l₂.length - 1 := by aesop + have hlp: (∏ i : Fin (l₁.length - 1), f l₁[(Fin.succAbove a.length i).val] x[↑(Fin.succAbove a.length i).val]) = ∏ i : Fin (l₂.length - 1), f l₂[(Fin.succAbove a.length i).val] x[(Fin.succAbove a.length i).val] := by + apply Fintype.prod_equiv (Equiv.cast (congr_arg Fin helper)) + simp[succHelp l₁ l₂ h1 h2] + intro i + congr + rw [← propext cast_eq_iff_heq] + rw [← propext cast_eq_iff_heq] + rw[hlp] + rw[ENNReal.mul_div_mul_right] + simp + + simp[mod_helper (a.length) (l₁.length) (by rw[h1];simp;linarith) (by rw[h1]; simp)] + simp[mod_helper (a.length) (l₂.length) (by rw[h2];simp;linarith) (by rw[h2]; simp)] + + rw[Finset.prod_ne_zero_iff] + intro i _ + apply nonzero i + rw[← lt_top_iff_ne_top] + apply ENNReal.prod_lt_top + intro i + simp[noninf] + +lemma fin_prod_cast {n m : ℕ} (h : n = m)(f : Fin n → ENNReal) : + ∏' i : Fin n, f i = ∏' i : Fin m, f (Fin.cast h.symm i) := by + subst h + simp + +lemma conversion {β: Type}(l: List T) (x: List β)(h1: l = a++[n]++b)(hl : l.length ≥ 1)(hx: l.length = x.length)(f: T → SLang β): (∏' (i : Fin (l.length)), f (l[i.val]'(by simp)) (x[i.val]'(by rw[← hx];simp))) = (∏' (i : Fin ((l.length-1)+1)), f (l[i.val]'(valid_index2 h1 i)) (x[i.val]'(valid_index3 h1 hx i))) := by + rw [fin_prod_cast (by rw [← Nat.sub_add_cancel hl])] + simp + +/- The statement we need. This is used to prove the main lemma in "LocalToDataset."-/ +theorem reduction_final {β: Type}(l₁ l₂ a b: List T)(n m : T)(x: List β)(f: T → SLang β)(h1: l₁ = a++[n]++b)(h2: l₂ = a++[m]++b)(hx: l₁.length = x.length)(hy: l₂.length = x.length) + (nonzero: ∀ (i : Fin (l₂.length - 1)), f (l₂[↑((@Nat.cast (Fin (l₂.length - 1 + 1)) Fin.instNatCast a.length).succAbove i)]'(by apply valid_index8 h2; aesop)) (x[↑((@Nat.cast (Fin (l₂.length - 1 + 1)) Fin.instNatCast a.length).succAbove i)]'(by apply valid_index8 h2; aesop)) ≠ 0) + (noninf: ∀(k: T) (bo: β), f k bo ≠ ⊤):(∏' (i : Fin (l₁.length)), f (l₁[i.val]'(by simp)) (x[i.val]'(by rw[← hx]; simp))) / + (∏' (i : Fin (l₂.length)), f (l₂[i.val]'(by simp)) (x[i.val]'(by rw[← hy];simp))) = f (l₁[(a.length)]'(by rw[h1]; simp)) (x[a.length]'(by rw[← hx];rw[h1];simp)) / f (l₂[a.length]'(by rw[h2];simp)) (x[a.length]'(by rw[← hx];rw[h1];simp)) := by + have hl2: l₂.length ≥ 1 := by rw[h2];simp; omega + have hl1: l₁.length ≥ 1 := by rw[h1];simp; omega + rw[conversion l₂ x h2 hl2 hy f] + rw[conversion l₁ x h1 hl1 hx f] + rw [reduction2 l₁ l₂ x f h1 h2 hx hy nonzero noninf] diff --git a/SampCert/DifferentialPrivacy/Pure/Postprocessing.lean b/SampCert/DifferentialPrivacy/Pure/Postprocessing.lean index 4e57529a..a5e732ae 100644 --- a/SampCert/DifferentialPrivacy/Pure/Postprocessing.lean +++ b/SampCert/DifferentialPrivacy/Pure/Postprocessing.lean @@ -5,6 +5,8 @@ Authors: Jean-Baptiste Tristan -/ import SampCert.DifferentialPrivacy.Pure.DP import SampCert.DifferentialPrivacy.Generic +import Mathlib.Probability.ProbabilityMassFunction.Basic +import Mathlib.Topology.Algebra.InfiniteSum.Basic import Mathlib.Data.Set.Defs import Mathlib.Data.Set.Prod @@ -42,6 +44,7 @@ lemma privPostProcess_DP_bound {nq : Mechanism T U} {ε : NNReal} (h : PureDP nq exact Real.exp_pos ε · simp + /-- ``privPostProcess`` satisfies pure DP, for any surjective postprocessing function. -/ diff --git a/SampCert/DifferentialPrivacy/Pure/RandomizedPostProcessing.lean b/SampCert/DifferentialPrivacy/Pure/RandomizedPostProcessing.lean new file mode 100644 index 00000000..3a203f16 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/RandomizedPostProcessing.lean @@ -0,0 +1,237 @@ +import SampCert.DifferentialPrivacy.Pure.DP +import SampCert.DifferentialPrivacy.Generic +import SampCert.DifferentialPrivacy.Pure.Local.LocalDP.DPwithUpdateNeighbour +import Mathlib.Probability.ProbabilityMassFunction.Basic +import Mathlib.Topology.Algebra.InfiniteSum.Basic +import Mathlib.Data.Set.Defs +import Mathlib.Data.Set.Prod + +noncomputable section + +open Classical Set + +namespace SLang +/-This file establishes the post-processing property for randomized post-processing functions-/ + +/-Composes a mechanism with a randomized post-processing function-/ +def privPostProcessRand {T U V : Type} (nq : Mechanism T U) (g : U → PMF V) : Mechanism T V := + fun l => (nq l).bind g + +/-The following 3 lemmas are helper functions for the main theorems at the end-/ +lemma tsum_ite_eq_single {T U : Type} (m : Mechanism T U) (l₁ : List T)(u : U): + (∑' (x : U), if x = u then ((m l₁) x) else 0) = (m l₁) u := by + have hpoint : + (fun x : U => if x = u then (m l₁) x else 0) = + (fun x : U => if x = u then (m l₁) u else 0) := by + funext x + by_cases hx : x = u + · simp [hx] + · simp [hx] + have hcollapse : (∑' x : U, if x = u then (m l₁) u else 0) = (m l₁) u := by + simp [tsum_ite_eq (β := U) (α := ENNReal) u ((m l₁) u)] + simp [hpoint, hcollapse] + +lemma tsum_indicator_mul_left {U V : Type} (p : PMF U) (g : U → PMF V) (S : Set V) (u : U) (hsplit : (fun v => if v ∈ S then p u * g u v else 0) = fun v => p u * if v ∈ S then g u v else 0): +(∑' v : V, if v ∈ S then p u * g u v else 0) = p u * ∑' v : V, if v ∈ S then (g u) v else 0:= by + calc + (∑' v : V, if v ∈ S then p u * g u v else 0) + = ∑' v : V, p u * (if v ∈ S then g u v else 0) := by + simp [hsplit] + _ = p u * ∑' v : V, (if v ∈ S then g u v else 0) := by + simpa using + (ENNReal.tsum_mul_left + (a := p u) + (f := fun v : V => if v ∈ S then g u v else 0)) + +lemma tsum_bind_indicator {U V : Type} + (p : PMF U) (g : U → PMF V) (S : Set V) : + (∑' v : V, if v ∈ S then (p.bind g) v else 0) = (∑' u : U, p u * (∑' v : V, if v ∈ S then g u v else 0)) := by + have hbind : ∀ v, (p.bind g) v = ∑' u, p u * g u v := by + intro v; simp [PMF.bind_apply] + calc + (∑' v : V, if v ∈ S then (p.bind g) v else 0) + = ∑' v, if v ∈ S then (∑' u, p u * g u v) else 0 := by + simp [hbind] + _ = ∑' v, ∑' u, (if v ∈ S then p u * g u v else 0) := by + refine tsum_congr ?_ + intro v; by_cases hv : v ∈ S <;> simp [hv] + _ = ∑' u, ∑' v, (if v ∈ S then p u * g u v else 0) := by + simpa using + ENNReal.tsum_comm (f := fun v u => (if v ∈ S then p u * g u v else 0)) + _ = ∑' u, p u * (∑' v, if v ∈ S then g u v else 0) := by + refine tsum_congr ?_ + intro u + have hsplit :(fun v => if v ∈ S then p u * g u v else 0) = (fun v => p u * (if v ∈ S then g u v else 0)) := by + funext v; by_cases hv : v ∈ S <;> simp [hv, mul_comm, mul_left_comm, mul_assoc] + exact tsum_indicator_mul_left p g S u hsplit + +/-Provides a pointwise guarantee of a DP bound-/ +lemma DP.pointwise_ratio_bound {T U : Type} + {m : Mechanism T U} {ε : ℝ} + (h : DP m ε) {l₁ l₂ : List T} (hN : Neighbour l₁ l₂) : + ∀ u : U, m l₁ u ≤ ENNReal.ofReal (Real.exp ε) * m l₂ u := by + intro u + have hS := h l₁ l₂ hN ({u} : Set U) + have hnum : + (∑' x : U, (if x ∈ ({u} : Set U) then m l₁ x else 0)) = m l₁ u := by + classical + have : (fun x : U => if x ∈ ({u} : Set U) then m l₁ x else 0) + = (fun x : U => if x = u then m l₁ u else 0) := by + funext x + by_cases hx : x = u + · subst hx; simp + · simp [Set.mem_singleton_iff, hx] + simpa [this] using (tsum_ite_eq_single m l₁ u) + have hden : + (∑' x : U, (if x ∈ ({u} : Set U) then m l₂ x else 0)) = m l₂ u := by + classical + have : (fun x : U => if x ∈ ({u} : Set U) then m l₂ x else 0) + = (fun x : U => if x = u then m l₂ u else 0) := by + funext x + by_cases hx : x = u + · subst hx; simp + · simp [Set.mem_singleton_iff, hx] + simpa [this] using (tsum_ite_eq_single m l₂ u) + have hratio : m l₁ u / m l₂ u ≤ ENNReal.ofReal (Real.exp ε) := by + rw [hnum, hden] at hS + exact hS + by_cases hz : m l₂ u = 0 + · have hfin : ENNReal.ofReal (Real.exp ε) ≠ ⊤ := by aesop + have hzero : m l₁ u = 0 := by + by_contra hpos + have htop : m l₁ u / m l₂ u = ⊤ := by + exact ENNReal.div_eq_top.mpr (Or.inl ⟨by simp [hpos], hz⟩) + have : (⊤ : ENNReal) ≤ ENNReal.ofReal (Real.exp ε) := by + simp at hratio + aesop + have : ENNReal.ofReal (Real.exp ε) = ⊤ := top_le_iff.mp this + exact hfin this + simp [hz, hzero, zero_mul] + · have h_not_infty : ⊤ ≠ m l₂ u := by + have hle : m l₂ u ≤ 1 := by simpa using (m l₂).coe_le_one u + have hlt : m l₂ u < ⊤ := lt_of_le_of_lt hle (by simp) + aesop + have : m l₁ u ≤ ENNReal.ofReal (Real.exp ε) * m l₂ u := by + rw [← ENNReal.div_le_iff_le_mul] + · exact hratio + · aesop + · aesop + simpa using this + +/-Provides a pointwise guarantee of a DP bound but for our DP_withUpdateNeighbour-/ +lemma DP.pointwise_ratio_bound_for_UpdateNeighbour {T U : Type} + {m : Mechanism T U} {ε : Real} + (h : DP_withUpdateNeighbour m ε) {l₁ l₂ : List T} (hN : UpdateNeighbour l₁ l₂) : + ∀ u : U, m l₁ u ≤ ENNReal.ofReal (Real.exp ε) * m l₂ u := by + intro u + have hS := h l₁ l₂ hN ({u} : Set U) + have hnum : (∑' x : U, (if x ∈ ({u} : Set U) then m l₁ x else 0)) = m l₁ u := by + have : (fun x : U => if x ∈ ({u} : Set U) then m l₁ x else 0) + = (fun x : U => if x = u then m l₁ u else 0) := by + funext x + by_cases hx : x = u + · subst hx; simp + · simp [Set.mem_singleton_iff, hx] + simpa [this] using (tsum_ite_eq_single m l₁ u) + have hden : (∑' x : U, (if x ∈ ({u} : Set U) then m l₂ x else 0)) = m l₂ u := by + have : (fun x : U => if x ∈ ({u} : Set U) then m l₂ x else 0) + = (fun x : U => if x = u then m l₂ u else 0) := by + funext x + by_cases hx : x = u + · subst hx; simp + · simp [Set.mem_singleton_iff, hx] + simpa [this] using (tsum_ite_eq_single m l₂ u) + have hratio : m l₁ u / m l₂ u ≤ ENNReal.ofReal (Real.exp ε) := by + rw [hnum, hden] at hS + exact hS + by_cases hz : m l₂ u = 0 + · have hfin : ENNReal.ofReal (Real.exp ε) ≠ ⊤ := by aesop + have hzero : m l₁ u = 0 := by + by_contra hpos + have htop : m l₁ u / m l₂ u = ⊤ := by + exact ENNReal.div_eq_top.mpr (Or.inl ⟨by simp [hpos], hz⟩) + have : (⊤ : ENNReal) ≤ ENNReal.ofReal (Real.exp ε) := by + simp at hratio + aesop + have : ENNReal.ofReal (Real.exp ε) = ⊤ := top_le_iff.mp this + exact hfin this + simp [hz, hzero, zero_mul] + · have h_not_infty : ⊤ ≠ m l₂ u := by + have hle : m l₂ u ≤ 1 := by simpa using (m l₂).coe_le_one u + have hlt : m l₂ u < ⊤ := lt_of_le_of_lt hle (by simp) + aesop + have : m l₁ u ≤ ENNReal.ofReal (Real.exp ε) * m l₂ u := by + rw [← ENNReal.div_le_iff_le_mul] + exact hratio + all_goals aesop + simpa using this + +/-Proves that combing DP algorithm with post-processor does not worsen the DP bound-/ +theorem randPostProcess_DP_bound {T U V : Type} {nq : Mechanism T U} {ε : NNReal} (h : PureDP nq ε) (g : U → PMF V) : + DP (privPostProcessRand nq g) ε := by + intro l₁ l₂ hN S + let p₁ := nq l₁ + let p₂ := nq l₂ + let w : U → ENNReal := fun u => (∑' v : V, if v ∈ S then g u v else 0) + have hNum : (∑' v : V, if v ∈ S then (privPostProcessRand nq g l₁) v else 0) + = ∑' u : U, p₁ u * w u := by + simpa [privPostProcessRand, p₁] using tsum_bind_indicator (nq l₁) g S + have hDen : (∑' v : V, if v ∈ S then (privPostProcessRand nq g l₂) v else 0) + = ∑' u : U, p₂ u * w u := by + simpa [privPostProcessRand, p₂] using tsum_bind_indicator (nq l₂) g S + have hpt := DP.pointwise_ratio_bound (T:=T) (U:=U) (m:=nq) (ε:=ε) h hN + have hsum : + (∑' u : U, p₁ u * w u) + ≤ ENNReal.ofReal (Real.exp ε) * (∑' u : U, p₂ u * w u) := by + have hpt' : ∀ u, p₁ u * w u ≤ (ENNReal.ofReal (Real.exp ε) * p₂ u) * w u := by + intro u + have := hpt u + have hpt' : p₁ u ≤ ENNReal.ofReal (Real.exp ε) * p₂ u := by simpa [p₁, p₂] using hpt u + have hw0 : 0 ≤ w u := by aesop + have hmul : p₁ u * w u ≤ (ENNReal.ofReal (Real.exp ε) * p₂ u) * w u := mul_le_mul_of_nonneg_right hpt' hw0 + simpa [mul_left_comm, mul_comm, mul_assoc] using hmul + have := ENNReal.tsum_le_tsum hpt' + simpa [ENNReal.tsum_mul_left, mul_left_comm, mul_assoc] using this + by_cases hDen0 : (∑' u : U, p₂ u * w u) = 0 + · have hNum0 : (∑' u : U, p₁ u * w u) = 0 := by + have : (∑' u : U, p₁ u * w u) ≤ ENNReal.ofReal (Real.exp ε) * 0 := by simpa [hDen0] using hsum + exact le_antisymm (le_trans this (by aesop)) bot_le + simp [hNum, hDen, hNum0, hDen0] + · nth_rewrite 1 [mul_comm] at hsum + have : (∑' u : U, p₁ u * w u) / (∑' u : U, p₂ u * w u) ≤ ENNReal.ofReal (Real.exp ε) := ENNReal.div_le_of_le_mul' hsum + simpa [hNum, hDen] using this + +/-Same thing as randPostProcess_DP_bound, but uses our DP_withUpdateNeighbour-/ +theorem randPostProcess_DP_bound_with_UpdateNeighbour {T U V : Type} {nq : Mechanism T U} {ε : Real} (h : DP_withUpdateNeighbour nq ε) (g : U → PMF V) : + DP_withUpdateNeighbour (privPostProcessRand nq g) ε := by + intro l₁ l₂ hN S + let p₁ := nq l₁ + let p₂ := nq l₂ + let w : U → ENNReal := fun u => (∑' v : V, if v ∈ S then g u v else 0) + have hNum : (∑' v : V, if v ∈ S then (privPostProcessRand nq g l₁) v else 0) + = ∑' u : U, p₁ u * w u := by + simpa [privPostProcessRand, p₁] using tsum_bind_indicator (nq l₁) g S + have hDen : (∑' v : V, if v ∈ S then (privPostProcessRand nq g l₂) v else 0) + = ∑' u : U, p₂ u * w u := by + simpa [privPostProcessRand, p₂] using tsum_bind_indicator (nq l₂) g S + have hpt := DP.pointwise_ratio_bound_for_UpdateNeighbour (T:=T) (U:=U) (m:=nq) (ε:=ε) h hN + have hsum : + (∑' u : U, p₁ u * w u) + ≤ ENNReal.ofReal (Real.exp ε) * (∑' u : U, p₂ u * w u) := by + have hpt' : ∀ u, p₁ u * w u ≤ (ENNReal.ofReal (Real.exp ε) * p₂ u) * w u := by + intro u + have := hpt u + have hpt' : p₁ u ≤ ENNReal.ofReal (Real.exp ε) * p₂ u := by simpa [p₁, p₂] using hpt u + have hw0 : 0 ≤ w u := by aesop + have hmul : p₁ u * w u ≤ (ENNReal.ofReal (Real.exp ε) * p₂ u) * w u := mul_le_mul_of_nonneg_right hpt' hw0 + simpa [mul_left_comm, mul_comm, mul_assoc] using hmul + have := ENNReal.tsum_le_tsum hpt' + simpa [ENNReal.tsum_mul_left, mul_left_comm, mul_assoc] using this + by_cases hDen0 : (∑' u : U, p₂ u * w u) = 0 + · have hNum0 : (∑' u : U, p₁ u * w u) = 0 := by + have : (∑' u : U, p₁ u * w u) ≤ ENNReal.ofReal (Real.exp ε) * 0 := by simpa [hDen0] using hsum + exact le_antisymm (le_trans this (by aesop)) (bot_le) + simp [hNum, hDen, hNum0, hDen0] + · nth_rewrite 1 [mul_comm] at hsum + have : (∑' u : U, p₁ u * w u) / (∑' u : U, p₂ u * w u) ≤ ENNReal.ofReal (Real.exp ε) := ENNReal.div_le_of_le_mul' hsum + simpa [hNum, hDen] using this diff --git a/SampCert/DifferentialPrivacy/Pure/ShuffleModel/Basic.lean b/SampCert/DifferentialPrivacy/Pure/ShuffleModel/Basic.lean new file mode 100644 index 00000000..f00cedbc --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/ShuffleModel/Basic.lean @@ -0,0 +1,12 @@ +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Definitions +import SampCert.Samplers.Uniform.Code +import SampCert.Samplers.Uniform.Properties +import SampCert.DifferentialPrivacy.Pure.Local.Normalization +import SampCert.DifferentialPrivacy.Pure.Local.PushForward +import SampCert.DifferentialPrivacy.Pure.Local.LocalDP.DPwithUpdateNeighbour +import SampCert.DifferentialPrivacy.Pure.Local.MultiBernoulli.Code +import SampCert.DifferentialPrivacy.Pure.Local.MultiBernoulli.Properties +import SampCert.DifferentialPrivacy.Pure.Postprocessing +import SampCert.DifferentialPrivacy.Generic +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Properties.PMFProof +import SampCert.DifferentialPrivacy.Pure.RandomizedPostProcessing diff --git a/SampCert/DifferentialPrivacy/Pure/ShuffleModel/Definitions.lean b/SampCert/DifferentialPrivacy/Pure/ShuffleModel/Definitions.lean new file mode 100644 index 00000000..c2d69128 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/ShuffleModel/Definitions.lean @@ -0,0 +1,38 @@ +import SampCert.DifferentialPrivacy.Pure.ShuffleModel.Basic + +namespace SLang +open Classical + +/- Implementation of the Shuffler for the Shuffle Model. + Outputs a random permutation of the input list. + This is a bit unsatisfactory, + since we have not actually shown that the output distribution is uniform. -/ +def Shuffler {α: Type}(l:List α): SLang (List α) := do +match l with +| [] => return [] +| hd :: tl => + let len := (hd :: tl).length + let i : Nat ← UniformSample (Nat.toPNat' len) + let rest : List α ← Shuffler tl + return rest.insertNth i hd + +/- This is the implementation of the Shuffle algorithm using randomized response as the local randomizer. -/ +def RRShuffle(query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den)(l: List T) := do + let l ← RandomizedResponse.RRSample query num den h l + let b ← Shuffler l + return b + +/- Computes the number of permutations of a list, accounting + for the number of unique elements in the list. -/ + +def num_perms {α: Type} [DecidableEq α] (l: List α): ℕ := (l.permutations.toFinset).card + +/- Definition of a function that produces random permutations. -/ +def UniformShuffler {U: Type} (f: List U → SLang (List U)) : Prop := + ∀ l₁ l₂: List U, f l₁ l₂ = if List.isPerm l₁ l₂ then (num_perms l₁ : ENNReal)⁻¹ else (0: ENNReal) + +/- Generalized version of the shuffle algorithm that takes in any mechanism. -/ +def ShuffleAlgorithm (m : Mechanism T (List U))(f : List U → SLang (List U))(_: UniformShuffler f)(l: List T) := do + let x ← (m l).toSLang + let b ← f x + return b diff --git a/SampCert/DifferentialPrivacy/Pure/ShuffleModel/Properties.lean b/SampCert/DifferentialPrivacy/Pure/ShuffleModel/Properties.lean new file mode 100644 index 00000000..35a90444 --- /dev/null +++ b/SampCert/DifferentialPrivacy/Pure/ShuffleModel/Properties.lean @@ -0,0 +1,148 @@ +import SampCert.DifferentialPrivacy.Pure.ShuffleModel.Basic +import SampCert.DifferentialPrivacy.Pure.ShuffleModel.Definitions +import SampCert.DifferentialPrivacy.Pure.Local.RandomizedResponse.Properties.DPProof +import Mathlib.Data.Set.Card + +namespace SLang +open RandomizedResponse +open Classical + +/- Shuffler function normalizes-/ +lemma Shuffler_PMF_helper {α: Type} (l:List α): HasSum (Shuffler l) 1 := by + rw [Summable.hasSum_iff ENNReal.summable] + induction l with + | nil => + unfold Shuffler + simp [pure] + unfold probPure + rw [ENNReal.tsum_eq_add_tsum_ite []] + aesop + | cons h t ih => + unfold Shuffler + simp [pure] + rw [← Summable.hasSum_iff ENNReal.summable] + rw [Summable.hasSum_iff ENNReal.summable] + simp + rw [ENNReal.tsum_comm] + let hpf (b: Nat)(_: List α): (fun (i: List α) => ∑'(a : List α), (if i = List.insertNth b h a then Shuffler t a else 0)) = + (push_forward (Shuffler t) (fun (a: List α) => List.insertNth b h a)) := by + unfold push_forward + rfl + conv => + enter [1,1,b] + rw [ENNReal.tsum_mul_left] + rw [hpf b t] + enter [2] + apply push_forward_prob_is_prob_gen + rw [ih] + simp + +/- A restatement of the above lemma. -/ +lemma Shuffler_norms {α: Type} (l:List α): ∑' (b : List α), Shuffler l b = 1 := by + rw [← Summable.hasSum_iff ENNReal.summable] + apply Shuffler_PMF_helper + +/-Instantiation of Shuffler as a PMF. -/ +def Shuffler_PMF {α : Type} (l : List α) : PMF (List α) := + ⟨Shuffler l, Shuffler_PMF_helper l⟩ + +/- Helper lemma about List.isPerm-/ +lemma List.isPerm_iff_mem_permutations {U : Type} [DecidableEq U] [DecidableEq (List U)] (l : List U) (a : List U) : l.isPerm a ↔ (a ∈ l.permutations) := by + apply Iff.intro + simp + rw [@List.perm_comm] + intro hp + apply (@List.isPerm_iff U _ l a).mp hp + simp + intro hp + rw [@List.perm_comm] at hp + apply (@List.isPerm_iff U _ l a).mpr hp + +lemma UniformShuffler_norms' {U: Type} (f: List U → SLang (List U))(h: UniformShuffler f)(l: List U): + ∑' (b : List U), f l b = 1 := by + have h2: ∑' (b : List U), (if List.isPerm l b then ((num_perms l : ENNReal)⁻¹) else 0) = (num_perms l) * (num_perms l : ENNReal)⁻¹ := by + set b := (num_perms l : ENNReal)⁻¹ + simp [@tsum_split_ite] + conv => + enter [1, 1, b_1] + rw [←one_mul b] + rw [@ENNReal.tsum_mul_right] + congr + simp [num_perms] + have h0: {x // l.isPerm x} = {x // x ∈ l.permutations.toFinset} := by + congr + apply funext + intro x + rw [eq_iff_iff] + have h0a: x ∈ l.permutations.toFinset ↔ x ∈ l.permutations := by aesop + rw [h0a] + exact List.isPerm_iff_mem_permutations l x + rw [h0] + rw [tsum_fintype] + simp_all only [Finset.univ_eq_attach, Finset.sum_const, Finset.card_attach, nsmul_eq_mul, mul_one, Nat.cast_inj] + simp_all [UniformShuffler] + apply ENNReal.mul_inv_cancel + all_goals simp[num_perms, List.permutations] + +/- Alternate version -/ +lemma UniformShuffler_norms {U: Type} (f: List U → SLang (List U))(h: UniformShuffler f)(l: List U): + HasSum (f l) 1 := by + rw [Summable.hasSum_iff ENNReal.summable] + exact UniformShuffler_norms' f h l + +/- Any shuffle algorithm normalizes. -/ +lemma ShuffleAlgorithm_PMF_helper {U: Type} (m : Mechanism T (List U))(f : List U → SLang (List U))(h: UniformShuffler f) (l: List T): +HasSum (ShuffleAlgorithm m f h l) 1 := by + unfold ShuffleAlgorithm + simp_all only [bind, pure, bind_pure] + unfold probBind + simp [Summable.hasSum_iff ENNReal.summable] + rw [ENNReal.tsum_comm] + conv => + enter [1,1,b] + rw [ENNReal.tsum_mul_left] + enter [2] + apply UniformShuffler_norms' + exact h + simp + +/- Conversion of SLang output to PMF.-/ +def ShuffleAlgorithm_PMF {U: Type} [DecidableEq U] (m : Mechanism T (List U ))(f : List U → SLang (List U))(h: UniformShuffler f) (l: List T) : PMF (List U) := + ⟨ShuffleAlgorithm m f h l, ShuffleAlgorithm_PMF_helper m f h l⟩ + +/- RRShuffle normalizes. + This is a bit unsatisfactory: if we had that ''RandomShuffle'' is in fact + a UniformShuffler, then the prove would be trivial simply by instantiating RRShuffle as a UniformShuffler-/ +lemma RRShuffle_PMF_helper {T : Type}(query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den)(l : List T): HasSum (RRShuffle query num den h l) 1 := by + unfold RRShuffle + simp_all only [bind, pure, bind_pure] + unfold probBind + simp [Summable.hasSum_iff ENNReal.summable] + rw [ENNReal.tsum_comm] + conv => + enter [1,1,b] + rw [ENNReal.tsum_mul_left] + enter [2] + apply Shuffler_norms + simp + rw [← Summable.hasSum_iff ENNReal.summable] + apply RRSample_PMF_helper + +/- Instantiation of RRShuffle as a PMF. -/ +def RRShuffle_PMF {T : Type} (query: T -> Bool) (num : Nat) (den : PNat) (h: 2 * num < den) (l : List T) : PMF (List Bool) := + ⟨RRShuffle query num den h l, RRShuffle_PMF_helper query num den h l⟩ + +/- Shows that shuffling is a postprocessing function. -/ +lemma shuffling_is_postprocessing (m : Mechanism T (List U)) (f : List U → SLang (List U)) (h_uniform : UniformShuffler f): privPostProcessRand m (fun u => ⟨f u, UniformShuffler_norms f h_uniform u⟩) = ShuffleAlgorithm_PMF m f h_uniform := by + funext + simp [privPostProcessRand, ShuffleAlgorithm_PMF, ShuffleAlgorithm] + rfl + +/- Shuffle Algorithm is ε-differentially private, given that the local algorithm is ε-differentially private. -/ +theorem ShuffleAlgorithm_is_DP (m : Mechanism T (List U))(f : List U → SLang (List U))(ε : ℝ)(hdp: DP_withUpdateNeighbour m ε) +(h_uniform: UniformShuffler f): DP_withUpdateNeighbour (ShuffleAlgorithm_PMF m f h_uniform) ε := by +conv => + enter [1] + rw [←shuffling_is_postprocessing] +let g : List U → PMF (List U) := (fun u => ⟨f u, UniformShuffler_norms f h_uniform u⟩) +apply @randPostProcess_DP_bound_with_UpdateNeighbour T _ _ m ε hdp g diff --git a/SampCert/DifferentialPrivacy/RenyiDivergence.lean b/SampCert/DifferentialPrivacy/RenyiDivergence.lean index 9ff28400..da0f799d 100644 --- a/SampCert/DifferentialPrivacy/RenyiDivergence.lean +++ b/SampCert/DifferentialPrivacy/RenyiDivergence.lean @@ -26,7 +26,6 @@ Simplified consequence of absolute continuity between PMF's. -/ def AbsCts (p q : T -> ENNReal) : Prop := ∀ x : T, q x = 0 -> p x = 0 - /-- All PMF's are absolutely continuous with respect to themselves. -/ diff --git a/Tests/SampCert.dfy b/Tests/SampCert.dfy new file mode 100644 index 00000000..84dbf63e --- /dev/null +++ b/Tests/SampCert.dfy @@ -0,0 +1,288 @@ + +module {:extern} Random { + + class {:extern} Random { + + static method {:extern "UniformPowerOfTwoSample"} ExternUniformPowerOfTwoSample(n: nat) returns (u: nat) + + } + +} + +module {:extern} SampCert { + + import Random + + type pos = x: nat | x > 0 witness 1 + + class SLang { + + + method UniformPowerOfTwoSample(n: nat) returns (u: nat) + + requires n >= 1 + + { + + u := Random.Random.ExternUniformPowerOfTwoSample(n); + + } + method {:verify false} UniformSample (n: pos) + returns (o: nat) + modifies this + decreases * + { + var x := UniformPowerOfTwoSample(2 * n); + while ! (x < n) + decreases * + { + x := UniformPowerOfTwoSample(2 * n); + } + var r := x; + o := r; + } + + method {:verify false} BernoulliSample (num: nat, den: pos) + returns (o: bool) + requires num <= den + modifies this + decreases * + { + var d := UniformSample(den); + o := d < num; + } + + method {:verify false} BernoulliExpNegSampleUnitLoop (num: nat, den: pos, state: (bool,pos)) + returns (o: (bool,pos)) + requires num <= den + modifies this + decreases * + { + var A := BernoulliSample(num, state.1 * den); + o := (A,state.1 + 1); + } + + method {:verify false} BernoulliExpNegSampleUnitAux (num: nat, den: pos) + returns (o: nat) + requires num <= den + modifies this + decreases * + { + var state := (true,1); + while state.0 + decreases * + { + state := BernoulliExpNegSampleUnitLoop(num, den, state); + } + var r := state; + o := r.1; + } + + method {:verify false} BernoulliExpNegSampleUnit (num: nat, den: pos) + returns (o: bool) + requires num <= den + modifies this + decreases * + { + var K := BernoulliExpNegSampleUnitAux(num, den); + if K % 2 == 0 { + o := true; + } else { + o := false; + } + } + + method {:verify false} BernoulliExpNegSampleGenLoop (iter: nat) + returns (o: bool) + modifies this + decreases * + { + if iter == 0 { + o := true; + } else { + var B := BernoulliExpNegSampleUnit(1, 1); + if ! (B == true) { + o := B; + } else { + var R := BernoulliExpNegSampleGenLoop(iter - 1); + o := R; + } + } + } + + method {:verify false} BernoulliExpNegSample (num: nat, den: pos) + returns (o: bool) + modifies this + decreases * + { + if num <= den { + var X := BernoulliExpNegSampleUnit(num, den); + o := X; + } else { + var gamf := num / den; + var B := BernoulliExpNegSampleGenLoop(gamf); + if B == true { + var X := BernoulliExpNegSampleUnit(num % den, den); + o := X; + } else { + o := false; + } + } + } + + method {:verify false} DiscreteLaplaceSampleLoopIn1Aux (t: pos) + returns (o: (nat,bool)) + modifies this + decreases * + { + var U := UniformSample(t); + var D := BernoulliExpNegSample(U, t); + o := (U,D); + } + + method {:verify false} DiscreteLaplaceSampleLoopIn1 (t: pos) + returns (o: nat) + modifies this + decreases * + { + var x := DiscreteLaplaceSampleLoopIn1Aux(t); + while ! (x.1) + decreases * + { + x := DiscreteLaplaceSampleLoopIn1Aux(t); + } + var r1 := x; + o := r1.0; + } + + method {:verify false} DiscreteLaplaceSampleLoopIn2Aux (num: nat, den: pos, K: (bool,nat)) + returns (o: (bool,nat)) + modifies this + decreases * + { + var A := BernoulliExpNegSample(num, den); + o := (A,K.1 + 1); + } + + method {:verify false} DiscreteLaplaceSampleLoopIn2 (num: nat, den: pos) + returns (o: nat) + modifies this + decreases * + { + var K := (true,0); + while K.0 + decreases * + { + K := DiscreteLaplaceSampleLoopIn2Aux(num, den, K); + } + var r2 := K; + o := r2.1; + } + + method {:verify false} DiscreteLaplaceSampleLoop (num: pos, den: pos) + returns (o: (bool,nat)) + modifies this + decreases * + { + var v := DiscreteLaplaceSampleLoopIn2(den, num); + var V := v - 1; + var B := BernoulliSample(1, 2); + o := (B,V); + } + + method {:verify false} DiscreteLaplaceSampleLoop' (num: pos, den: pos) + returns (o: (bool,nat)) + modifies this + decreases * + { + var U := DiscreteLaplaceSampleLoopIn1(num); + var v := DiscreteLaplaceSampleLoopIn2(1, 1); + var V := v - 1; + var X := U + num * V; + var Y := X / den; + var B := BernoulliSample(1, 2); + o := (B,Y); + } + + method {:verify false} DiscreteLaplaceSample (num: pos, den: pos) + returns (o: int) + modifies this + decreases * + { + var x := DiscreteLaplaceSampleLoop(num, den); + while ! (! (x.0 == true && x.1 == 0)) + decreases * + { + x := DiscreteLaplaceSampleLoop(num, den); + } + var r := x; + var Z := if r.0 == true then - (r.1) else r.1; + o := Z; + } + + method {:verify false} DiscreteLaplaceSampleOptimized (num: pos, den: pos) + returns (o: int) + modifies this + decreases * + { + var x := DiscreteLaplaceSampleLoop'(num, den); + while ! (! (x.0 == true && x.1 == 0)) + decreases * + { + x := DiscreteLaplaceSampleLoop'(num, den); + } + var r := x; + var Z := if r.0 == true then - (r.1) else r.1; + o := Z; + } + + method {:verify false} DiscreteLaplaceSampleMixed (num: pos, den: pos, mix: nat) + returns (o: int) + modifies this + decreases * + { + if num <= den * mix { + var v := DiscreteLaplaceSample(num, den); + o := v; + } else { + var v := DiscreteLaplaceSampleOptimized(num, den); + o := v; + } + } + + method {:verify false} DiscreteGaussianSampleLoop (num: pos, den: pos, t: pos, mix: nat) + returns (o: (int,bool)) + modifies this + decreases * + { + var Y := DiscreteLaplaceSampleMixed(t, 1, mix); + var y := (if Y < 0 then -(Y) else (Y)); + var n := ((if y * t * den - num < 0 then -(y * t * den - num) else (y * t * den - num))) * ((if y * t * den - num < 0 then -(y * t * den - num) else (y * t * den - num))); + var d := 2 * num * (t) * (t) * den; + var C := BernoulliExpNegSample(n, d); + o := (Y,C); + } + + method {:verify false} DiscreteGaussianSample (num: pos, den: pos, mix: nat) + returns (o: int) + modifies this + decreases * + { + var ti := num / den; + var t := ti + 1; + var num := (num) * (num); + var den := (den) * (den); + var x := DiscreteGaussianSampleLoop(num, den, t, mix); + while ! (x.1) + decreases * + { + x := DiscreteGaussianSampleLoop(num, den, t, mix); + } + var r := x; + o := r.0; + } + + +} + +} diff --git a/ffi.cpp b/ffi.cpp index 8b3775b3..c17da02b 100644 --- a/ffi.cpp +++ b/ffi.cpp @@ -47,6 +47,7 @@ extern "C" lean_object * prob_While(lean_object * condition, lean_object * body, extern "C" lean_object * my_run(lean_object * a) { if (urandom == -1) { + // add O_CLOEXEC for Linux urandom = open("/dev/urandom", O_RDONLY | O_CLOEXEC); if (urandom == -1) { lean_internal_panic("prob_UniformByte: /dev/urandom cannot be opened"); diff --git a/lakefile.lean b/lakefile.lean index f918d059..8dac9cf5 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -3,10 +3,16 @@ open Lake DSL package «sampcert» where -- add any package configuration options here + /--/ moreLinkArgs := #[ + "-L./.lake/packages/LeanCopilot/.lake/build/lib", + "-lctranslate2" + ] -/ require mathlib from git "https://github.com/leanprover-community/mathlib4.git" @ "v4.10.0" +-- require LeanCopilot from git "https://github.com/lean-dojo/LeanCopilot.git" @ "49ca8d6" + @[default_target] lean_lib «SampCert» where extraDepTargets := #[`libleanffi,`libleanffidyn]