@inproceedings{6997a5451e0f4a4fbccce27bd9570d53,
title = "Differentially Private Weighted Sampling",
abstract = "Common datasets have the form of elements with keys (e.g., transactions and products) and the goal is to perform analytics on the aggregated form of key and frequency pairs. A weighted sample of keys by (a function of) frequency is a highly versatile summary that provides a sparse set of representative keys and supports approximate evaluations of query statistics. We propose private weighted sampling (PWS): A method that sanitizes a weighted sample as to ensure element-level differential privacy, while retaining its utility to the maximum extent possible. PWS maximizes the reporting probabilities of keys and estimation quality of a broad family of statistics. PWS improves over the state of the art even for the well-studied special case of private histograms, when no sampling is performed. We empirically observe significant performance gains of 20%-300% increase in key reporting for common Zipfian frequency distributions and accurate estimation with x2-8 lower frequencies. PWS is applied as a post-processing of a non-private sample, without requiring the original data. Therefore, it can be a seamless addition to existing implementations, such as those optimizes for distributed or streamed data. We believe that due to practicality and performance, PWS may become a method of choice in applications where privacy is desired.",
author = "Edith Cohen and Ofir Geri and Tam{\'a}s Sarl{\'o}s and Uri Stemmer",
year = "2021",
language = "English",
volume = "130",
series = "Proceedings of Machine Learning Research",
publisher = "PMLR",
pages = "2404--2412",
editor = "Arindam Banerjee and Kenji Fukumizu",
booktitle = "The 24th International Conference on Artificial Intelligence and Statistics, AISTATS 2021, April 13-15, 2021, Virtual Event",
}