Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions mechanisms/aim.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ def __init__(
max_model_size=80,
max_iters=1000,
structural_zeros={},
bounded=False,
):
super(AIM, self).__init__(epsilon, delta, prng)
super(AIM, self).__init__(epsilon, delta, bounded=bounded, prng=prng)
self.rounds = rounds
self.max_iters = max_iters
self.max_model_size = max_model_size
Expand Down Expand Up @@ -110,7 +111,7 @@ def run(self, data, workload, num_synth_rows=None, initial_cliques=None):

oneway = [cl for cl in candidates if len(cl) == 1]

sigma = np.sqrt(rounds / (2 * 0.9 * self.rho))
sigma = self.gaussian_noise_scale_zcdp(1.0, 0.9 * self.rho / rounds)
epsilon = np.sqrt(8 * 0.1 * self.rho / rounds)

measurements = []
Expand All @@ -134,7 +135,7 @@ def run(self, data, workload, num_synth_rows=None, initial_cliques=None):
if self.rho - rho_used < 2 * (0.5 / sigma**2 + 1.0 / 8 * epsilon**2):
# Just use up whatever remaining budget there is for one last round
remaining = self.rho - rho_used
sigma = np.sqrt(1 / (2 * 0.9 * remaining))
sigma = self.gaussian_noise_scale_zcdp(1.0, 0.9 * remaining)
epsilon = np.sqrt(8 * 0.1 * remaining)
terminate = True

Expand Down Expand Up @@ -198,7 +199,6 @@ def default_params():


if __name__ == "__main__":

description = ""
formatter = argparse.ArgumentDefaultsHelpFormatter
parser = argparse.ArgumentParser(description=description, formatter_class=formatter)
Expand Down
1 change: 0 additions & 1 deletion mechanisms/cdp2adp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
# - Thomas Steinke dgauss@thomas-steinke.net 2020

import math
import matplotlib.pyplot as plt


# *********************************************************************
Expand Down
73 changes: 48 additions & 25 deletions mechanisms/mechanism.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import numpy as np
#from autodp import privacy_calibrator
from functools import partial
from cdp2adp import cdp_rho
from scipy.special import softmax
import opendp.prelude as dp

dp.enable_features("contrib", "floating-point")


def pareto_efficient(costs):
Expand All @@ -26,7 +27,7 @@ def generalized_em_scores(q, ds, t):


class Mechanism:
def __init__(self, epsilon, delta, bounded, prng=np.random):
def __init__(self, epsilon, delta, bounded, prng=None):
"""
Base class for a mechanism.
:param epsilon: privacy parameter
Expand All @@ -38,7 +39,7 @@ def __init__(self, epsilon, delta, bounded, prng=np.random):
self.delta = delta
self.rho = 0 if delta == 0 else cdp_rho(epsilon, delta)
self.bounded = bounded
self.prng = prng
self.prng = prng if prng is not None else np.random

def run(self, dataset, workload):
pass
Expand All @@ -63,19 +64,20 @@ def generalized_exponential_mechanism(
return keys[key]

def permute_and_flip(self, qualities, epsilon, sensitivity=1.0):
""" Sample a candidate from the permute-and-flip mechanism """
q = qualities - qualities.max()
p = np.exp(0.5 * epsilon / sensitivity * q)
for i in np.random.permutation(p.size):
if np.random.rand() <= p[i]:
return i
"""Sample a candidate from the permute-and-flip mechanism"""
qualities = np.array(qualities)
scale = 2.0 * sensitivity / epsilon
input_space = (
dp.vector_domain(dp.atom_domain(T=float, nan=False)),
dp.linf_distance(T=float),
)
noisy_max = dp.m.make_noisy_max(*input_space, dp.max_divergence(), scale=scale)
return noisy_max(qualities.tolist())

def exponential_mechanism(
self, qualities, epsilon, sensitivity=1.0, base_measure=None
):
if isinstance(qualities, dict):
# import pandas as pd
# print(pd.Series(list(qualities.values()), list(qualities.keys())).sort_values().tail())
keys = list(qualities.keys())
qualities = np.array([qualities[key] for key in keys])
if base_measure is not None:
Expand All @@ -85,31 +87,52 @@ def exponential_mechanism(
keys = np.arange(qualities.size)

""" Sample a candidate from the permute-and-flip mechanism """
q = qualities - qualities.max()
if base_measure is None:
p = softmax(0.5 * epsilon / sensitivity * q)
else:
p = softmax(0.5 * epsilon / sensitivity * q + base_measure)
scale = 2.0 * sensitivity / epsilon
input_space = (
dp.vector_domain(dp.atom_domain(T=float, nan=False)),
dp.linf_distance(T=float),
)
noisy_max = dp.m.make_noisy_max(*input_space, dp.max_divergence(), scale=scale)

if base_measure is not None:
qualities = qualities + base_measure

return keys[self.prng.choice(p.size, p=p)]
idx = noisy_max(qualities.tolist())
return keys[idx]

def gaussian_noise_scale(self, l2_sensitivity, epsilon, delta):
""" Return the Gaussian noise necessary to attain (epsilon, delta)-DP """
raise ValueError('Temporarily Deprecated due to broken dependency')
"""Return the Gaussian noise necessary to attain (epsilon, delta)-DP"""
if self.bounded:
l2_sensitivity *= 2.0
return l2_sensitivity * np.sqrt(2 * np.log(1.25 / delta)) / epsilon

def gaussian_noise_scale_zcdp(self, l2_sensitivity, rho):
"""Return the Gaussian noise necessary to attain zCDP with rho"""
if self.bounded:
l2_sensitivity *= 2.0
return l2_sensitivity / np.sqrt(2 * rho)

def laplace_noise_scale(self, l1_sensitivity, epsilon):
""" Return the Laplace noise necessary to attain epsilon-DP """
"""Return the Laplace noise necessary to attain epsilon-DP"""
if self.bounded:
l1_sensitivity *= 2.0
return l1_sensitivity / epsilon

def gaussian_noise(self, sigma, size):
""" Generate iid Gaussian noise of a given scale and size """
return self.prng.normal(0, sigma, size)
"""Generate iid Gaussian noise of a given scale and size"""
measure = dp.m.make_gaussian(
dp.atom_domain(T=float, nan=False),
dp.absolute_distance(T=float),
scale=sigma,
)
return np.array([measure(0.0) for _ in range(size)])

def laplace_noise(self, b, size):
""" Generate iid Laplace noise of a given scale and size """
return self.prng.laplace(0, b, size)
"""Generate iid Laplace noise of a given scale and size"""
measure = dp.m.make_laplace(
dp.atom_domain(T=float, nan=False), dp.absolute_distance(T=float), scale=b
)
return np.array([measure(0.0) for _ in range(size)])

def best_noise_distribution(self, l1_sensitivity, l2_sensitivity, epsilon, delta):
""" Adaptively determine if Laplace or Gaussian noise will be better, and
Expand Down
129 changes: 68 additions & 61 deletions mechanisms/mst.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from scipy.cluster.hierarchy import DisjointSet
import networkx as nx
import itertools
from mechanism import Mechanism
from cdp2adp import cdp_rho
from scipy.special import logsumexp
import argparse
Expand All @@ -19,30 +20,75 @@
and does not rely on public provisional data for measurement selection.
"""

ESTIMATION_ITERATIONS=10_000
SELECT_ITERATIONS=2500

class MSTMechanism(Mechanism):

def __init__(self, epsilon, delta):
super().__init__(epsilon, delta, bounded=False)

def run(self, data):
sigma = np.sqrt(3 / (2 * self.rho))
cliques = [(col,) for col in data.domain]
log1 = self.measure(data, cliques, sigma)
data, log1, undo_compress_fn = compress_domain(data, log1)
cliques = self.select(data, self.rho /3.0, log1)
log2 = self.measure(data, cliques, sigma)
est = estimation.mirror_descent(data.domain, log1+log2, iters=ESTIMATION_ITERATIONS)
synth = est.synthetic_data()
return undo_compress_fn(synth)

def measure(self, data, cliques, sigma, weights=None):
if weights is None:
weights = np.ones(len(cliques))
weights = np.array(weights) / np.linalg.norm(weights)
measurements = []
for proj, weight in zip(cliques, weights):
x = data.project(proj).datavector()
y = x + self.gaussian_noise(sigma / weight, x.size)
measurements.append(LinearMeasurement(y, proj, sigma / weight))
return measurements

def select(self, data, rho, measurement_log, cliques=None):
if cliques is None:
cliques = []

est = estimation.mirror_descent(data.domain, measurement_log, iters=SELECT_ITERATIONS)

weights = {}
candidates = list(itertools.combinations(data.domain.attrs, 2))
for a, b in candidates:
xhat = est.project([a, b]).datavector()
x = data.project([a, b]).datavector()
weights[a, b] = np.linalg.norm(x - xhat, 1)

T = nx.Graph()
T.add_nodes_from(data.domain.attrs)
ds = DisjointSet(data.domain.attrs)

for e in cliques:
T.add_edge(*e)
ds.merge(*e)

r = len(list(nx.connected_components(T)))
epsilon = np.sqrt((8 * rho) / (r - 1))

for _ in range(r - 1):
candidates = [e for e in candidates if not ds.connected(*e)]
wgts = np.array([weights[e] for e in candidates])
idx = self.exponential_mechanism(dict(zip(range(len(wgts)), wgts)), epsilon, sensitivity=1.0)
e = candidates[idx]
T.add_edge(*e)
ds.merge(*e)

return list(T.edges)



def MST(data, epsilon, delta):
rho = cdp_rho(epsilon, delta)
sigma = np.sqrt(3 / (2 * rho))
cliques = [(col,) for col in data.domain]
log1 = measure(data, cliques, sigma)
data, log1, undo_compress_fn = compress_domain(data, log1)
cliques = select(data, rho / 3.0, log1)
log2 = measure(data, cliques, sigma)
est = estimation.mirror_descent(data.domain, log1+log2, iters=10000)
synth = est.synthetic_data()
return undo_compress_fn(synth)


def measure(data, cliques, sigma, weights=None):
if weights is None:
weights = np.ones(len(cliques))
weights = np.array(weights) / np.linalg.norm(weights)
measurements = []
for proj, wgt in zip(cliques, weights):
x = data.project(proj).datavector()
y = x + np.random.normal(loc=0, scale=sigma / wgt, size=x.size)
measurements.append(LinearMeasurement(y, proj, sigma / wgt))
return measurements
mech = MSTMechanism(epsilon, delta)
return mech.run(data)


def compress_domain(data, measurements):
Expand All @@ -67,45 +113,6 @@ def compress_domain(data, measurements):
return transform_data(data, supports), new_measurements, undo_compress_fn


def exponential_mechanism(q, eps, sensitivity, prng=np.random, monotonic=False):
coef = 1.0 if monotonic else 0.5
scores = coef * eps / sensitivity * q
probas = np.exp(scores - logsumexp(scores))
return prng.choice(q.size, p=probas)


def select(data, rho, measurement_log, cliques=[]):

est = estimation.mirror_descent(data.domain, measurement_log, iters=2500)

weights = {}
candidates = list(itertools.combinations(data.domain.attrs, 2))
for a, b in candidates:
xhat = est.project([a, b]).datavector()
x = data.project([a, b]).datavector()
weights[a, b] = np.linalg.norm(x - xhat, 1)

T = nx.Graph()
T.add_nodes_from(data.domain.attrs)
ds = DisjointSet(data.domain.attrs)

for e in cliques:
T.add_edge(*e)
ds.merge(*e)

r = len(list(nx.connected_components(T)))
epsilon = np.sqrt(8 * rho / (r - 1))
for i in range(r - 1):
candidates = [e for e in candidates if not ds.connected(*e)]
wgts = np.array([weights[e] for e in candidates])
idx = exponential_mechanism(wgts, epsilon, sensitivity=1.0)
e = candidates[idx]
T.add_edge(*e)
ds.merge(*e)

return list(T.edges)


def transform_data(data, supports):
df = pd.DataFrame(data.to_dict(), columns=data.domain.attrs)
newdom = {}
Expand Down
Loading