1b:["$","$L29",null,{"isWhiteLabelled":false,"children":["$","$Lb",null,{"pt":{"compact":0,"expanded":3},"children":[["$","$L2a",null,{"noStar":true,"publisher":true,"task":true,"params":true,"size":"xl","product":{"id":"eyJwYXBlcklEIjoiMjAwMy4wNDUwOSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","updated":"2020-05-13T03:50:06.000Z","paperID":"2003.04509","published":"2020-03-10T02:34:16.000Z","authors":"[\"Noga Alon\",\"Amos Beimel\",\"Shay Moran\",\"Uri Stemmer\"]","title":"Closure Properties for Private Classification and Online Prediction","scoreTrending":null,"summary":"$2b","lastCheckedForCode":"2022-09-02T09:03:45.281Z","links":[{"id":"eyJ1cmwiOiJodHRwczovL3BhcGVyc3dpdGhjb2RlLmNvbS9wYXBlci9jbG9zdXJlLXByb3BlcnRpZXMtZm9yLXByaXZhdGUtY2xhc3NpZmljYXRpb24ifQ==","type":"pwc","url":"https://paperswithcode.com/paper/closure-properties-for-private-classification","data":null}],"reposConnection":{"edges":[]},"models":[],"tags":[],"summaries":[],"emailsConnection":{"edges":[]},"__typename":"paper","authorArray":["Noga Alon","Amos Beimel","Shay Moran","Uri Stemmer"]}}],["$","$L18",null,{"container":true,"columns":100,"spacing":{"compact":0,"expanded":2,"large":3},"children":[["$","$L18",null,{"size":{"compact":100,"expanded":100,"large":68},"children":[["$","$7",null,{"children":["$","$L2c",null,{"publisher":"arxiv","paperID":"2003.04509","product":{"paper":"$1b:props:children:props:children:0:props:product","models":"$1b:props:children:props:children:0:props:product:models"},"isWhiteLabelled":false}]}],["$","$7",null,{"children":["$","$L2d",null,{"article":"$L2e","model":"$undefined"}]}]]}],["$","$L18",null,{"size":"grow","children":["$","$L2f",null,{}]}]]}],["$","$7",null,{"children":null}],[["$","audio",null,{"id":"tts"}],["$","$L30",null,{"paperID":"2003.04509","publisher":"arxiv","paperJSON":{"title":"Closure Properties for Private Classification and Online Prediction","paperID":"2003.04509","avgLineHeight":13.55,"imgScale":4,"sections":[{"heading":"Abstract","paragraphs":[[{"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"be a class of boolean functions and consider a ","element":"span"},{"style":{"fontStyle":"italic"},"text":"composed class ","element":"span"},{"style":{"height":12},"width":48.04,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/0-0.png","element":"img","alt":" H′ ","inline":true,"padRight":true},{"text":"that is derived from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"using some arbitrary aggregation rule (for example, ","element":"span"},{"style":{"height":12},"width":48.04,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/0-1.png","element":"img","alt":" H′ ","inline":true,"padRight":true},{"text":"may be the class of all 3-wise majority-votes of functions in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":"). We upper bound the Littlestone dimension of ","element":"span"},{"style":{"height":12},"width":48.04,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/0-2.png","element":"img","alt":" H′ ","inline":true,"padRight":true},{"text":"in terms of that of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":". As a corollary, we derive closure properties for online learning and private PAC learning.","element":"span"}],[{"text":"The derived bounds on the Littlestone dimension exhibit an undesirable exponential dependence. For private learning, we prove close to optimal bounds that circumvents this suboptimal dependency. The improved bounds on the sample complexity of private learning are derived algorithmically via transforming a private learner for the original class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"to a private learner for the composed class ","element":"span"},{"style":{"height":12},"width":48.04,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/0-3.png","element":"img","alt":" H′","inline":true},{"text":". Using the same ideas we show that any (","element":"span"},{"style":{"fontStyle":"italic"},"text":"proper or improper","element":"span"},{"text":") private algorithm that learns a class of functions ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"in the realizable case (i.e., when the examples are labeled by some function in the class) can be transformed to a private algorithm that learns the class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"in the agnostic case.","element":"span"}]]},{"heading":"1 Introduction","paragraphs":[[{"text":"We study closure properties for learnability of binary-labeled hypothesis classes in two related settings: online learning and differentially private PAC learning.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Closure Properties for Online Learning. ","element":"span"},{"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"be a class of experts that can be online learned with vanishing regret. That is, there exists an algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"such that given any sequence of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T ","element":"span"},{"text":"prediction tasks, the number of false predictions made by ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is larger by at most ","element":"span"},{"style":{"fontStyle":"italic"},"text":"R","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"T","element":"span"},{"text":") = ","element":"span"},{"style":{"fontStyle":"italic"},"text":"o","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"T","element":"span"},{"text":") ","element":"span"},{"text":"than the number of false predictions made by the best expert in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":".","element":"span"}],[{"text":"Consider a scenario where the sequence of tasks is such that every single expert in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"predicts poorly on it, however there is a small unknown set of experts ","element":"span"},{"style":{"height":15.6},"width":289.78,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/0-4.png","element":"img","alt":" h1, . . . , hk ∈ H","inline":true,"padRight":true},{"text":"that can predict well by collaborating. More formally, there is an aggregation rule ","element":"span"},{"style":{"height":19.53},"width":398.65,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/0-5.png","element":"img","alt":" G : {0, 1}k → {0, 1}","inline":true,"padRight":true},{"text":"such that the combined expert ","element":"span"},{"style":{"height":17.6},"width":254.94,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/0-6.png","element":"img","alt":"G(h1, . . . , hk)","inline":true,"padRight":true},{"text":"exhibits accurate predictions on a significant majority of the tasks. For example, a possible aggregation rule ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G ","element":"span"},{"text":"could be the majority-vote of the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"text":"experts. Since we assume that the identities of the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"text":"experts are not known, it is natural to consider the class ","element":"span"},{"style":{"height":17.6},"width":582.82,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-0.png","element":"img","alt":" H′ = {G(h1, . . . , hk) : hi ∈ H},","inline":true,"padRight":true},{"text":"which consists of all possible ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G","element":"span"},{"text":"-aggregations of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"text":"experts from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":". We study the following question:","element":"span"}],[{"id":"id-12","style":{"fontWeight":"bold"},"text":"Question 1.1. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Can the optimal regret with respect to ","element":"span"},{"style":{"height":13.2},"width":53.27,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-1.png","element":"img","alt":" H′ ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be bounded in terms of that of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"style":{"fontStyle":"italic"},"text":"?","element":"span"}],[{"text":"The ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Littlestone dimension ","element":"span"},{"text":"is a combinatorial parameter that determines online learnability ","element":"span"},{"href":"#id-0","referenceIndex":33,"text":"[Littlestone, ","element":"a"},{"href":"#id-0","referenceIndex":33,"text":"1987, ","element":"a"},{"href":"#id-1","referenceIndex":9,"text":"Ben-David et al., ","element":"a"},{"href":"#id-1","referenceIndex":9,"text":"2009]","element":"a"},{"text":". In particular, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"is online learnable if and only if it has a finite Littlestone dimension ","element":"span"},{"style":{"height":13.2},"width":124.89,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-2.png","element":"img","alt":" d < ∞","inline":true},{"text":", and the best possible regret ","element":"span"},{"style":{"fontStyle":"italic"},"text":"R","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"T","element":"span"},{"text":") ","element":"span"},{"text":"for online learning ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"satisfies","element":"span"}],[{"id":"id-13","style":{"width":"67%"},"width":1259,"height":54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-3.png","element":"img"}],[{"text":"Furthermore, if it is known that if one of the experts never errs (a.k.a the realizable setting), then the optimal regret is exactly ","element":"span"},{"style":{"height":15.13},"width":49.62,"height":37.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-4.png","element":"img","alt":" d.1 ","inline":true,"padRight":true},{"text":"(The regret is called mistake-bound in this context.)","element":"span"}],[{"text":"Thus, the above question boils down to asking whether the Littlestone dimension of ","element":"span"},{"style":{"height":13.2},"width":53.27,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-5.png","element":"img","alt":" H′ ","inline":true,"padRight":true},{"text":"is bounded by a function of the Littlestone dimension of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":". One of the two main results in this work provides an affirmative answer to this question (Theorem ","element":"span"},{"href":"#id-2","text":"2.1)","element":"a"},{"text":".","element":"span"}],[{"text":"We next discuss a variant of this question in the setting of Differentially Private (DP) learning. The two settings of online and DP-learning are intimately related (see, e.g., ","element":"span"},{"href":"#id-3","referenceIndex":15,"text":"Bun et al. ","element":"a"},{"href":"#id-3","referenceIndex":15,"text":"[2020]","element":"a"},{"text":", ","element":"span"},{"href":"#id-4","referenceIndex":1,"text":"Abernethy et al. ","element":"a"},{"href":"#id-4","referenceIndex":1,"text":"[2017]","element":"a"},{"text":", ","element":"span"},{"href":"#id-5","referenceIndex":28,"text":"Joseph et al. ","element":"a"},{"href":"#id-5","referenceIndex":28,"text":"[2019]","element":"a"},{"text":", ","element":"span"},{"href":"#id-6","referenceIndex":25,"text":"Gonen et al. ","element":"a"},{"href":"#id-6","referenceIndex":25,"text":"[2019]","element":"a"},{"text":"). In particular, both online learning and DP-learning are characterized by the finiteness of the Littlestone dimension ","element":"span"},{"href":"#id-0","referenceIndex":33,"text":"[Littlestone, ","element":"a"},{"href":"#id-0","referenceIndex":33,"text":"1987, ","element":"a"},{"href":"#id-1","referenceIndex":9,"text":"Ben-David et al., ","element":"a"},{"href":"#id-1","referenceIndex":9,"text":"2009, ","element":"a"},{"href":"#id-7","referenceIndex":14,"text":"Bun ","element":"a"},{"href":"#id-7","referenceIndex":14,"text":"et al., ","element":"a"},{"href":"#id-7","referenceIndex":14,"text":"2015, ","element":"a"},{"href":"#id-8","referenceIndex":2,"text":"Alon et al., ","element":"a"},{"href":"#id-8","referenceIndex":2,"text":"2019, ","element":"a"},{"href":"#id-3","referenceIndex":15,"text":"Bun et al., ","element":"a"},{"href":"#id-3","referenceIndex":15,"text":"2020]","element":"a"},{"text":".","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Closure Properties for Differentially Private Learning. ","element":"span"},{"text":"Imagine the following medical scenario: consider a family ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"of viruses for which there is an algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"that can learn to diagnose any specific virus ","element":"span"},{"style":{"height":13.6},"width":130.1,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-6.png","element":"img","alt":" h ∈ H","inline":true,"padRight":true},{"text":"given enough labeled medical data. Further assume that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"has the desired property of being differentially private learning algorithm as defined by ","element":"span"},{"href":"#id-9","referenceIndex":31,"text":"[Kasiviswanathan et al., ","element":"a"},{"href":"#id-9","referenceIndex":31,"text":"2011]","element":"a"},{"text":"; that is, it is a PAC learning algorithm in which the privacy of every patient whose data is used during training is guarded in the formal sense of differential privacy ","element":"span"},{"href":"#id-10","referenceIndex":21,"text":"[Dwork et al., ","element":"a"},{"href":"#id-10","referenceIndex":21,"text":"2006b]","element":"a"},{"text":".","element":"span"}],[{"text":"Assume that an outbreak of a deadly disease ","element":"span"},{"style":{"height":12.8},"width":41.14,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-7.png","element":"img","alt":" h′ ","inline":true,"padRight":true},{"text":"has occurred in several locations all over the world and that it is known that ","element":"span"},{"style":{"height":12.8},"width":41.14,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-8.png","element":"img","alt":" h′ ","inline":true,"padRight":true},{"text":"is caused by some relatively small, yet unknown group of viruses from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":". That is, our prior information is that there are unknown viruses ","element":"span"},{"style":{"height":15.6},"width":287.08,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-9.png","element":"img","alt":" h1, . . . , hk ∈ H","inline":true,"padRight":true},{"text":"for a relatively small ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"text":"such that ","element":"span"},{"style":{"height":17.6},"width":353.32,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-10.png","element":"img","alt":"h′ = G(h1, . . . , hk)","inline":true,"padRight":true},{"text":"for some rule ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G","element":"span"},{"text":". For example, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G ","element":"span"},{"text":"could be the OR function in which case ","element":"span"},{"style":{"height":12.8},"width":203.26,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-11.png","element":"img","alt":" h′ occurs if","inline":true,"padRight":true},{"text":"and only if the patient is infected with at least one of the viruses ","element":"span"},{"style":{"height":15.6},"width":197.66,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-12.png","element":"img","alt":" h1, . . . , hk.","inline":true}],[{"text":"It would be highly beneficial if one could use the algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"to diagnose ","element":"span"},{"style":{"height":12.8},"width":41.14,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-13.png","element":"img","alt":" h′ ","inline":true,"padRight":true},{"text":"in an automated fashion. Moreover, doing it in a private manner could encourage health institutions in the different locations to contribute their patients’ data. This inspires the following question:","element":"span"}],[{"id":"id-11","style":{"fontWeight":"bold"},"text":"Question 1.2. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Can one use the algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"style":{"fontStyle":"italic"},"text":"to ","element":"span"},{"text":"privately ","element":"span"},{"style":{"fontStyle":"italic"},"text":"learn to diagnose ","element":"span"},{"style":{"height":12.8},"width":41.14,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/1-14.png","element":"img","alt":" h′","inline":true},{"style":{"fontStyle":"italic"},"text":"? How does the sample complexity of this learning task scale as a function of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G","element":"span"},{"style":{"fontStyle":"italic"},"text":"?","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Differential Privacy, Online Learning, and the Littlestone Dimension. ","element":"span"},{"text":"Question ","element":"span"},{"href":"#id-11","text":"1.2 ","element":"a"},{"text":"and Question ","element":"span"},{"href":"#id-12","text":"1.1 ","element":"a"},{"text":"are equivalent in the sense that both online learning and DP-learning are characterized by the finiteness of the Littlestone dimension ","element":"span"},{"href":"#id-0","referenceIndex":33,"text":"[Littlestone, ","element":"a"},{"href":"#id-0","referenceIndex":33,"text":"1987, ","element":"a"},{"href":"#id-1","referenceIndex":9,"text":"Ben-David et al., ","element":"a"},{"href":"#id-1","referenceIndex":9,"text":"2009, ","element":"a"},{"href":"#id-7","referenceIndex":14,"text":"Bun et al., ","element":"a"},{"href":"#id-7","referenceIndex":14,"text":"2015, ","element":"a"},{"href":"#id-8","referenceIndex":2,"text":"Alon et al., ","element":"a"},{"href":"#id-8","referenceIndex":2,"text":"2019, ","element":"a"},{"href":"#id-3","referenceIndex":15,"text":"Bun ","element":"a"},{"href":"#id-3","referenceIndex":15,"text":"et al., ","element":"a"},{"href":"#id-3","referenceIndex":15,"text":"2020]","element":"a"},{"text":".","element":"span"}],[{"text":"Note however that unlike the bounds relating the Littlestone dimension to online learning, which are tight up to logarithmic factors (see ","element":"span"},{"href":"#id-13","text":"(1)","element":"a"},{"text":"), the bounds relating the Littlestone dimension and DP-learning are ","element":"span"},{"style":{"fontStyle":"italic"},"text":"very far from each other","element":"span"},{"text":"; specifically, if ","element":"span"},{"style":{"fontStyle":"italic"},"text":"d ","element":"span"},{"text":"denotes the Littlestone dimension of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"then the lower bound on the sample complexity of privately learning ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"scales with ","element":"span"},{"style":{"height":17.08},"width":105.57,"height":42.69,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-0.png","element":"img","alt":" log∗ d","inline":true,"padRight":true},{"href":"#id-7","referenceIndex":14,"text":"[Bun et al., ","element":"a"},{"href":"#id-7","referenceIndex":14,"text":"2015, ","element":"a"},{"href":"#id-8","referenceIndex":2,"text":"Alon et al., ","element":"a"},{"href":"#id-8","referenceIndex":2,"text":"2019]","element":"a"},{"text":", while the best known","element":"span"},{"text":"2 ","element":"span"},{"text":"upper bound scales with ","element":"span"},{"text":"exp(","element":"span"},{"style":{"fontStyle":"italic"},"text":"d","element":"span"},{"text":") ","element":"span"},{"href":"#id-3","referenceIndex":15,"text":"[Bun et al., ","element":"a"},{"href":"#id-3","referenceIndex":15,"text":"2020]","element":"a"},{"text":".","element":"span"}],[{"text":"Thus, while our solution to Question ","element":"span"},{"href":"#id-12","text":"1.1 ","element":"a"},{"text":"yields an affirmative answer to Question ","element":"span"},{"href":"#id-11","text":"1.2, ","element":"a"},{"text":"the implied quantitative bounds are far from being realistically satisfying. Specifically, every finite ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"is learnable with privacy using ","element":"span"},{"style":{"fontStyle":"italic"},"text":"O","element":"span"},{"text":"(log ","element":"span"},{"style":{"fontStyle":"italic"},"text":"|H|","element":"span"},{"text":") ","element":"span"},{"text":"samples ","element":"span"},{"href":"#id-9","referenceIndex":31,"text":"[Kasiviswanathan et al., ","element":"a"},{"href":"#id-9","referenceIndex":31,"text":"2011]","element":"a"},{"text":", and so if ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"is finite and not too large, the bounds implied by the Littlestone dimension are not meaningful. We therefore focus on deriving effective bounds for private learning, which is the content of Theorem ","element":"span"},{"href":"#id-14","text":"2.3 ","element":"a"},{"text":"(see Theorem ","element":"span"},{"href":"#id-15","text":"7.1 ","element":"a"},{"text":"for a precise statement).","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Littlestone Classes. ","element":"span"},{"text":"It is natural to ask which natural hypothesis classes have bounded Littlestone dimension. First, it holds that ","element":"span"},{"style":{"height":17.6},"width":589.29,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-1.png","element":"img","alt":" Ldim(H) ≤ log|H| for every H","inline":true},{"text":", so for finite classes the Littlestone dimension scales rather gracefully with their size.","element":"span"}],[{"text":"There are also natural infinite Littlestone classes: for example, let the domain ","element":"span"},{"style":{"height":12.8},"width":350.62,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-2.png","element":"img","alt":" X = Fn be an n-","inline":true,"padRight":true},{"text":"dimensional vector space over some field ","element":"span"},{"style":{"height":19.53},"width":420.59,"height":48.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-3.png","element":"img","alt":" F and let H ⊆ {0, 1}X ","inline":true,"padRight":true},{"text":"consist of all affine subspaces of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"V ","element":"span"},{"text":"of dimension ","element":"span"},{"style":{"height":14.8},"width":72.69,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-4.png","element":"img","alt":" ≤ d","inline":true},{"text":". It can be shown here that ","element":"span"},{"text":"Ldim(","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":") = ","element":"span"},{"style":{"fontStyle":"italic"},"text":"d","element":"span"},{"text":". (For example, the class of all lines in ","element":"span"},{"style":{"height":15.13},"width":155.38,"height":37.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-5.png","element":"img","alt":" R100 has","inline":true,"padRight":true},{"text":"Littlestone dimension ","element":"span"},{"text":"1","element":"span"},{"text":".) A bit more generally, any class of hypotheses that can be described by polynomial ","element":"span"},{"style":{"fontStyle":"italic"},"text":"equalities ","element":"span"},{"text":"of a bounded degree has bounded Littlestone dimension. (Observe that if one replaces “equalities” with “inequalities” then the Littlestone dimension may become unbounded, however the VC dimension remains bounded (e.g. Halfspaces).) We note in passing that this can be further generalized to classes that are definable in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"stable theories","element":"span"},{"text":", which is a deep and well-explored notion in model theory. We refer the reader to ","element":"span"},{"href":"#id-16","referenceIndex":17,"text":"Chase and Freitag ","element":"a"},{"href":"#id-16","referenceIndex":17,"text":"[2019]","element":"a"},{"text":", Section 5.1 for such examples.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Organization. ","element":"span"},{"text":"Formal statement of our main results and description of our techniques appears in Section ","element":"span"},{"text":"2, ","element":"span"},{"text":"specifically, a short overview of the proofs is given in Section ","element":"span"},{"href":"#id-17","text":"2.1. ","element":"a"},{"text":"Definitions and background results are provided in Section ","element":"span"},{"text":"3. ","element":"span"},{"text":"The complete proofs appear in the rest of the paper. Closure properties for Littlestone classes is proved in Section ","element":"span"},{"text":"4. ","element":"span"},{"text":"The effective bounds for private learning are given in Section ","element":"span"},{"text":"5 ","element":"span"},{"text":"and Sections ","element":"span"},{"text":"6 ","element":"span"},{"text":"and ","element":"span"},{"text":"7. ","element":"span"},{"text":"We note that each of these parts can be read independently of the other.","element":"span"}]]},{"heading":"2 Main Results and Techniques","paragraphs":[[{"text":"Let ","element":"span"},{"style":{"height":19.53},"width":407.27,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-6.png","element":"img","alt":" G : {0, 1}k → {0, 1}","inline":true,"padRight":true},{"text":"be a boolean function and let ","element":"span"},{"style":{"height":19.53},"width":421.3,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-7.png","element":"img","alt":" H1, . . . , Hk ⊆ {0, 1}X ","inline":true,"padRight":true},{"text":"be hypothesis classes. Denote by ","element":"span"},{"style":{"height":17.6},"width":278.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-8.png","element":"img","alt":" G(H1, . . . , Hk)","inline":true,"padRight":true},{"text":"the following class ","element":"span"},{"style":{"height":17.6},"width":825.8,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-9.png","element":"img","alt":" G(H1, . . . , Hk) = {G(h1, . . . , hk) : hi ∈ Hi}.","inline":true,"padRight":true},{"text":"For example, if ","element":"span"},{"style":{"height":17.6},"width":1358.19,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-10.png","element":"img","alt":" G(x1, x2) = x1 ∧ x2 then G(H1, H2) = H1 ∧ H2 = {h1 ∧ h2 : hi ∈ Hi}","inline":true,"padRight":true},{"text":"is the class of all pairwise intersections/conjunctions of a function from ","element":"span"},{"style":{"height":14.62},"width":53.85,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-11.png","element":"img","alt":" H1","inline":true,"padRight":true},{"text":"and a function from ","element":"span"},{"style":{"height":14.62},"width":66.78,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-12.png","element":"img","alt":" H2.","inline":true}],[{"id":"id-2","style":{"fontWeight":"bold"},"text":"Theorem 2.1 ","element":"span"},{"text":"(A Closure Theorem for the Littlestone Dimension)","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"height":19.53},"width":379.53,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-13.png","element":"img","alt":" G : {0, 1}k → {0, 1}","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be a boolean function, let ","element":"span"},{"style":{"height":19.53},"width":412.77,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-14.png","element":"img","alt":" H1, . . . , Hk ⊆ {0, 1}X ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be classes, and let ","element":"span"},{"style":{"height":17.6},"width":879.34,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/2-15.png","element":"img","alt":" d ∈ N such that Ldim(Hi) ≤ d for every i ≤ k.","inline":true}],[{"style":{"width":"67%"},"width":1269,"height":95,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-0.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"where ","element":"span"},{"style":{"height":16.41},"width":34,"height":41.03,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-1.png","element":"img","alt":"˜O","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"conceals polynomial factors in ","element":"span"},{"text":"log ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"style":{"fontStyle":"italic"},"text":"and ","element":"span"},{"text":"log ","element":"span"},{"style":{"fontStyle":"italic"},"text":"d","element":"span"},{"style":{"fontStyle":"italic"},"text":".","element":"span"}],[{"text":"In particular, if ","element":"span"},{"style":{"height":17.6},"width":1127.83,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-2.png","element":"img","alt":" Ldim(Hi) < ∞ for all i ≤ d then Ldim(G(H1, . . . , Hk)) < ∞","inline":true},{"text":". Consequently, if each of the ","element":"span"},{"style":{"height":14.62},"width":48.85,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-3.png","element":"img","alt":" Hi","inline":true},{"text":"’s is online learnable then ","element":"span"},{"style":{"height":17.6},"width":278.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-4.png","element":"img","alt":" G(H1, . . . , Hk)","inline":true,"padRight":true},{"text":"is online learnable. We comment that if the aggregating function ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G ","element":"span"},{"text":"is simple then one can obtain better bounds. For example, if ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G ","element":"span"},{"text":"is a majority-vote, a ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"text":"-wise OR, or a ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"text":"-wise AND function then a bound of ","element":"span"},{"style":{"height":20.41},"width":165.71,"height":51.03,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-5.png","element":"img","alt":"˜O(k2 · d)","inline":true,"padRight":true},{"text":"holds. (See Section ","element":"span"},{"href":"#id-18","text":"4.2.2.","element":"a"},{"text":")","element":"span"}],[{"text":"Another combinatorial parameter which arises in the relationship between online and DP learning is the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"threshold dimension","element":"span"},{"text":": a sequence ","element":"span"},{"style":{"height":15.2},"width":326.63,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-6.png","element":"img","alt":" x1, . . . , xk ∈ X is","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"threshold-shattered ","element":"span"},{"text":"by ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"if there are ","element":"span"},{"style":{"height":15.6},"width":282.86,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-7.png","element":"img","alt":" h1, . . . , hk ∈ H","inline":true,"padRight":true},{"text":"such that ","element":"span"},{"style":{"height":18.22},"width":198.87,"height":45.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-8.png","element":"img","alt":" hi(xj) = 1","inline":true,"padRight":true},{"text":"if and only if ","element":"span"},{"style":{"height":16.4},"width":464.09,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-9.png","element":"img","alt":" i ≤ j for all i, j ≤ k. The","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"threshold dimension","element":"span"},{"text":", ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":") ","element":"span"},{"text":"is the maximum size of a sequence that is threshold-shattered by ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":". The threshold dimension plays a key role in showing that DP learnable classes have a finite Littlestone dimension ","element":"span"},{"href":"#id-8","referenceIndex":2,"text":"[Alon et al., ","element":"a"},{"href":"#id-8","referenceIndex":2,"text":"2019]","element":"a"},{"text":". A classical theorem by ","element":"span"},{"href":"#id-19","referenceIndex":40,"text":"Shelah ","element":"a"},{"href":"#id-19","referenceIndex":40,"text":"[1978] ","element":"a"},{"text":"in model theory shows that the Littlestone and the threshold dimensions are exponentially related.","element":"span"},{"text":"3 ","element":"span"},{"text":"In particular ","element":"span"},{"style":{"height":17.6},"width":273.93,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-10.png","element":"img","alt":" Ldim(H) < ∞","inline":true,"padRight":true},{"text":"if and only if ","element":"span"},{"style":{"height":17.6},"width":205.49,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-11.png","element":"img","alt":" T(H) < ∞","inline":true},{"text":". (See Theorem ","element":"span"},{"href":"#id-20","text":"3.2 ","element":"a"},{"text":"in the preliminaries section.) We prove the following closure theorem in terms of the threshold dimension.","element":"span"}],[{"id":"id-21","style":{"fontWeight":"bold"},"text":"Theorem 2.2 ","element":"span"},{"text":"(A Closure Theorem for the Threshold Dimension)","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"height":19.53},"width":383.13,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-12.png","element":"img","alt":" G : {0, 1}k → {0, 1}","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be a boolean function, let ","element":"span"},{"style":{"height":19.53},"width":403.93,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-13.png","element":"img","alt":" H1, . . . , Hk ⊆ {0, 1}X ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be classes, and let ","element":"span"},{"style":{"height":17.6},"width":868.86,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-14.png","element":"img","alt":" t ∈ N such that T(Hi) < t for every i ≤ k. Then,","inline":true}],[{"style":{"width":"28%"},"width":532,"height":63,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-15.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Moreover, an exponential dependence in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t ","element":"span"},{"style":{"fontStyle":"italic"},"text":"is necessary: for every ","element":"span"},{"style":{"height":14},"width":108.07,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-16.png","element":"img","alt":" t ≥ 6","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"there exists a class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"such that","element":"span"}],[{"style":{"width":"67%"},"width":1266,"height":117,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-17.png","element":"img"}],[{"text":"Note that the bounds in Theorem ","element":"span"},{"href":"#id-2","text":"2.1 ","element":"a"},{"text":"and Theorem ","element":"span"},{"href":"#id-21","text":"2.2 ","element":"a"},{"text":"escalate rapidly with ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"text":"(the arity of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G","element":"span"},{"text":") and with ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t","element":"span"},{"text":". It will be interesting to determine tight bounds.","element":"span"}],[{"text":"By ","element":"span"},{"href":"#id-8","referenceIndex":2,"text":"Alon et al. ","element":"a"},{"href":"#id-8","referenceIndex":2,"text":"[2019]","element":"a"},{"text":", ","element":"span"},{"href":"#id-3","referenceIndex":15,"text":"Bun et al. ","element":"a"},{"href":"#id-3","referenceIndex":15,"text":"[2020]","element":"a"},{"text":", Theorem ","element":"span"},{"href":"#id-2","text":"2.1 ","element":"a"},{"text":"also implies closure properties for DP-learnable classes. However, the quantitative bounds are even worse: not only do the bounds on the Littlestone dimension of ","element":"span"},{"style":{"height":17.6},"width":278.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-18.png","element":"img","alt":" G(H1, . . . , Hk)","inline":true,"padRight":true},{"text":"escalate rapidly with ","element":"span"},{"style":{"fontStyle":"italic"},"text":"d ","element":"span"},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"text":", also the quantitative relationship between the Littlestone dimension and DP-learning sample complexity is very loose, and the best bounds exhibit a tower-like gap between the upper and lower bounds. For example, if the class of functions ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"is finite and its Littlestone dimension is ","element":"span"},{"style":{"height":17.6},"width":251.49,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-19.png","element":"img","alt":" ω(log log |H|)","inline":true},{"text":", then the bound of Theorem ","element":"span"},{"href":"#id-2","text":"2.1 ","element":"a"},{"text":"is most likely to be much worse than the generic application of the exponential mechanism, whose sample complexity is the logarithm of the size of the class. We therefore explore the closure properties of differentially-private learning algorithms directly and derive the following bound.","element":"span"}],[{"id":"id-14","style":{"fontWeight":"bold"},"text":"Theorem 2.3 ","element":"span"},{"text":"(A Closure Theorem for Private Learning (informal))","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"height":19.53},"width":528.4,"height":48.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-20.png","element":"img","alt":" G : {0, 1}k → {0, 1} be a","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"boolean function. Let ","element":"span"},{"style":{"height":19.53},"width":416.84,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-21.png","element":"img","alt":" H1, . . . , Hk ⊆ {0, 1}X ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be classes that are ","element":"span"},{"style":{"height":17.6},"width":94.76,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-22.png","element":"img","alt":" (ε, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":"-differentially private and ","element":"span"},{"style":{"height":17.6},"width":123.39,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-23.png","element":"img","alt":" (α, β)-","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"accurate learnable with sample complexity ","element":"span"},{"style":{"height":10.62},"width":50.32,"height":26.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-24.png","element":"img","alt":" mi","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"respectively. Then, ","element":"span"},{"style":{"height":17.6},"width":434.89,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-25.png","element":"img","alt":" G(H1, . . . , Hk) is (ε, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":"-private and ","element":"span"},{"style":{"height":17.6},"width":108.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-26.png","element":"img","alt":"(α, β)","inline":true},{"style":{"fontStyle":"italic"},"text":"-accurate learnable with sample complexity","element":"span"}],[{"style":{"width":"41%"},"width":768,"height":129,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/3-27.png","element":"img"}],[{"text":"The exact quantitative satement of the results appears in Theorem ","element":"span"},{"href":"#id-15","text":"7.1. ","element":"a"},{"text":"We remark that closure properties for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"pure ","element":"span"},{"text":"differentially-private learning algorithms (i.e., when ","element":"span"},{"style":{"height":12.8},"width":101.24,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-0.png","element":"img","alt":" δ = 0","inline":true},{"text":") are implied by the characterization of ","element":"span"},{"href":"#id-22","referenceIndex":8,"text":"[Beimel et al., ","element":"a"},{"href":"#id-22","referenceIndex":8,"text":"2019]","element":"a"},{"text":". Similarly, closure properties for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"non-private ","element":"span"},{"text":"PAC learning are implied by the characterization of their sample complexity in terms of the VC dimension and by the Sauer-Shelah-Perles Lemma ","element":"span"},{"href":"#id-23","referenceIndex":39,"text":"[Sauer, ","element":"a"},{"href":"#id-23","referenceIndex":39,"text":"1972]","element":"a"},{"text":". However, since there is no tight characterization of the sample complexity of approximate differentially-private learning algorithms (i.e., when ","element":"span"},{"style":{"height":13.2},"width":110.41,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-1.png","element":"img","alt":" δ > 0","inline":true},{"text":"), we prove Theorem ","element":"span"},{"href":"#id-14","text":"2.3 ","element":"a"},{"text":"algorithmically by constructing a (non-efficient) learning algorithm for ","element":"span"},{"style":{"height":17.6},"width":278.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-2.png","element":"img","alt":" G(H1, . . . , Hk)","inline":true,"padRight":true},{"text":"from private learning algorithms for ","element":"span"},{"style":{"height":15.2},"width":221.08,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-3.png","element":"img","alt":"H1, . . . , Hk.","inline":true}],[{"href":"#id-24","referenceIndex":7,"text":"Beimel et al. ","element":"a"},{"href":"#id-24","referenceIndex":7,"text":"[2015] ","element":"a"},{"text":"proved that any ","element":"span"},{"style":{"fontStyle":"italic"},"text":"proper ","element":"span"},{"text":"private learning algorithm in the realizable case","element":"span"},{"text":"4 ","element":"span"},{"text":"can be transformed into an agnostic","element":"span"},{"text":"5 ","element":"span"},{"text":"private learning algorithm, with only a mild increase in the sample complexity. We show that the same result holds even for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"improper ","element":"span"},{"text":"private learning (i.e., when the private learning algorithm can return an arbitrary hypothesis).","element":"span"}],[{"id":"id-68","style":{"fontWeight":"bold"},"text":"Theorem 2.4 ","element":"span"},{"text":"(Private Learning Implies Agnostic Private Learning)","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"For every ","element":"span"},{"style":{"height":16.4},"width":440.02,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-4.png","element":"img","alt":" 0 < α, β, δ < 1, every","inline":true},{"style":{"height":12.8},"width":123.65,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-5.png","element":"img","alt":"m ∈ N","inline":true},{"style":{"fontStyle":"italic"},"text":", and every concept class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"style":{"fontStyle":"italic"},"text":", if there exists a ","element":"span"},{"style":{"height":17.6},"width":96.22,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-6.png","element":"img","alt":" (1, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":"-differentially private ","element":"span"},{"style":{"height":17.6},"width":108.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-7.png","element":"img","alt":" (α, β)","inline":true},{"style":{"fontStyle":"italic"},"text":"-accurate PAC learner for the hypothesis class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"with sample complexity ","element":"span"},{"style":{"fontStyle":"italic"},"text":"m","element":"span"},{"style":{"fontStyle":"italic"},"text":", then there exists an ","element":"span"},{"style":{"height":17.6},"width":233.1,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-8.png","element":"img","alt":" (O(1), O(δ))","inline":true},{"style":{"fontStyle":"italic"},"text":"-differentially private ","element":"span"},{"style":{"height":17.6},"width":669.37,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-9.png","element":"img","alt":"(O(α), O(β + δn))-accurate agnostic","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"learner for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"with sample complexity","element":"span"}],[{"style":{"width":"37%"},"width":711,"height":132,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-10.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Furthermore, if the original learner is proper, then the agnostic learner is proper.","element":"span"}],[{"text":"We obtain this result by showing that a variant of the transformation of ","element":"span"},{"href":"#id-24","referenceIndex":7,"text":"[Beimel et al., ","element":"a"},{"href":"#id-24","referenceIndex":7,"text":"2015] ","element":"a"},{"text":"also works for the improper case; we do not know if the original transformation of ","element":"span"},{"href":"#id-24","referenceIndex":7,"text":"[Beimel et al., ","element":"a"},{"href":"#id-24","referenceIndex":7,"text":"2015] ","element":"a"},{"text":"also works for the improper case. Our analysis of the transformation for the improper case is more involved than the analysis for the proper case.","element":"span"}],[{"id":"id-17","style":{"fontWeight":"bold"},"text":"2.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Technical Overview","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"2.1.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Closure for Littlestone Dimension","element":"span"}],[{"text":"Our proof of Theorem ","element":"span"},{"href":"#id-2","text":"2.1 ","element":"a"},{"text":"exploits tools from online learning. It may be instructive to compare Theorem ","element":"span"},{"href":"#id-2","text":"2.1 ","element":"a"},{"text":"with an analogous result for VC classes: a classical result by ","element":"span"},{"href":"#id-25","referenceIndex":19,"text":"Dudley ","element":"a"},{"href":"#id-25","referenceIndex":19,"text":"[1978] ","element":"a"},{"text":"upper bounds the VC dimension of ","element":"span"},{"style":{"height":20.41},"width":813.79,"height":51.03,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-11.png","element":"img","alt":" G(H1, . . . , Hk) by ˜O(d1 + · · · + dk), where di","inline":true,"padRight":true},{"text":"is the VC dimension of ","element":"span"},{"style":{"height":14.62},"width":48.85,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-12.png","element":"img","alt":" Hi","inline":true},{"text":". The argument uses the Sauer-Shelah-Perles Lemma ","element":"span"},{"href":"#id-23","referenceIndex":39,"text":"[Sauer, ","element":"a"},{"href":"#id-23","referenceIndex":39,"text":"1972] ","element":"a"},{"text":"to bound the growth-rate (a.k.a. shatter function) of ","element":"span"},{"style":{"height":17.6},"width":332.21,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-13.png","element":"img","alt":" G(H1, . . . , Hk) by","inline":true,"padRight":true},{"text":"some ","element":"span"},{"style":{"height":15.53},"width":174.58,"height":38.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-14.png","element":"img","alt":" nd1+···+dk","inline":true},{"text":": indeed, if we let ","element":"span"},{"style":{"height":17.6},"width":473.94,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-15.png","element":"img","alt":" n = VC(G(H1, . . . , Hk)),","inline":true,"padRight":true},{"text":"then by the definition of the shatter function, ","element":"span"},{"style":{"height":17.53},"width":287.98,"height":43.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-16.png","element":"img","alt":"2n ≤ nd1+···+dk","inline":true},{"text":", which implies that ","element":"span"},{"style":{"height":20.41},"width":414.8,"height":51.03,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-17.png","element":"img","alt":" n = ˜O(d1 + · · · + dk)","inline":true,"padRight":true},{"text":"as stated. It is worth noting that a notion of growth-rate as well as a corresponding variant of the Sauer-Shelah-Perles Lemma also exist for Littlestone classes ","element":"span"},{"href":"#id-26","referenceIndex":11,"text":"[Bhaskar, ","element":"a"},{"href":"#id-26","referenceIndex":11,"text":"2017, ","element":"a"},{"href":"#id-27","referenceIndex":16,"text":"Chase and Freitag, ","element":"a"},{"href":"#id-27","referenceIndex":16,"text":"2018]","element":"a"},{"text":". However we are not aware of a way of using it to prove Theorem ","element":"span"},{"href":"#id-2","text":"2.1.","element":"a"}],[{"text":"We take a different approach. We first focus on the case where ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G ","element":"span"},{"text":"is a majority-vote. That is, the class ","element":"span"},{"style":{"height":17.6},"width":383,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-18.png","element":"img","alt":"H = G(H1, . . . , Hk)","inline":true,"padRight":true},{"text":"consists of all ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"text":"-wise majority-votes of experts ","element":"span"},{"style":{"height":15.02},"width":150.04,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/4-19.png","element":"img","alt":" hi ∈ Hi","inline":true},{"text":". We bound the Littlestone dimension of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"by exhibiting an online learning algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"that learns ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"in the mistake-bound model with at most ","element":"span"},{"style":{"height":20.41},"width":162.71,"height":51.03,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-0.png","element":"img","alt":"˜O(k2 · d)","inline":true,"padRight":true},{"text":"mistakes. The derivation of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"exploits fundamental tools from online learning such as the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Weighted Majority Algorithm ","element":"span"},{"text":"by ","element":"span"},{"href":"#id-28","referenceIndex":32,"text":"Littlestone and Warmuth ","element":"a"},{"href":"#id-28","referenceIndex":32,"text":"[1989] ","element":"a"},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Online Boosting ","element":"span"},{"href":"#id-29","referenceIndex":18,"text":"[Chen et al., ","element":"a"},{"href":"#id-29","referenceIndex":18,"text":"2012, ","element":"a"},{"href":"#id-30","referenceIndex":10,"text":"Beygelzimer et al., ","element":"a"},{"href":"#id-30","referenceIndex":10,"text":"2015, ","element":"a"},{"href":"#id-31","referenceIndex":13,"text":"Brukhim et al., ","element":"a"},{"href":"#id-31","referenceIndex":13,"text":"2020]","element":"a"},{"text":".","element":"span"}],[{"text":"Then, the bound for a general ","element":"span"},{"style":{"height":19.53},"width":393.06,"height":48.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-1.png","element":"img","alt":" G : {0, 1}k → {0, 1}","inline":true,"padRight":true},{"text":"is obtained by expressing ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G ","element":"span"},{"text":"as a formula which only uses majority-votes and negations gates. The exponential dependence in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"text":"in the final bound is a consequence of the formula-size which can be exponential in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"text":". We do not know whether this exponential dependence is necessary.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"2.1.2 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Closure for Threshold Dimension","element":"span"}],[{"text":"Our proof of Theorem ","element":"span"},{"href":"#id-21","text":"2.2 ","element":"a"},{"text":"is combinatorial. First, note that an inferior bound follows from Theorem ","element":"span"},{"href":"#id-2","text":"2.1, ","element":"a"},{"text":"using the fact that the Littlestone and threshold dimensions are exponentially related (see Theorem ","element":"span"},{"href":"#id-20","text":"3.2)","element":"a"},{"text":". However this approach yields a super-exponential bound on ","element":"span"},{"style":{"height":17.6},"width":354.83,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-2.png","element":"img","alt":" T(G(H1, . . . , Hk)).","inline":true}],[{"text":"The bound in Theorem ","element":"span"},{"href":"#id-21","text":"2.2 ","element":"a"},{"text":"follows by arguing contra-positively that if ","element":"span"},{"style":{"height":17.6},"width":495.35,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-3.png","element":"img","alt":" T(G(H1, . . . , Hk)) is large","inline":true,"padRight":true},{"text":"then ","element":"span"},{"style":{"height":17.6},"width":115.91,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-4.png","element":"img","alt":" T(Hi)","inline":true,"padRight":true},{"text":"is also “largish” for some ","element":"span"},{"style":{"height":14.8},"width":111.41,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-5.png","element":"img","alt":" i ≤ k","inline":true},{"text":". Specifically, if ","element":"span"},{"style":{"height":17.6},"width":755.88,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-6.png","element":"img","alt":" T(G(H1, . . . , Hk)) ≥ exp(t exp(k)) then","inline":true},{"style":{"height":17.6},"width":473.61,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-7.png","element":"img","alt":"T(Hi) ≥ t for some i ≤ k","inline":true},{"text":". This is shown using a Ramsey argument that asserts that any large enough sequence ","element":"span"},{"style":{"height":11.2},"width":186.78,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-8.png","element":"img","alt":" x1, . . . , xn","inline":true,"padRight":true},{"text":"that is threshold-shattered by ","element":"span"},{"style":{"height":17.6},"width":246.84,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-9.png","element":"img","alt":" G(H1 . . . Hk)","inline":true,"padRight":true},{"text":"must contain a relatively large subsequence that is threshold-shattered by one of the ","element":"span"},{"style":{"height":14.62},"width":48.85,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-10.png","element":"img","alt":" Hi","inline":true},{"text":"’s. Quantitatively, if ","element":"span"},{"style":{"height":17.6},"width":344.55,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-11.png","element":"img","alt":" n ≥ exp(t exp(k))","inline":true,"padRight":true},{"text":"then there must be a subsequence ","element":"span"},{"style":{"height":13.02},"width":203.84,"height":32.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-12.png","element":"img","alt":" xj1, . . . , xjt","inline":true,"padRight":true},{"text":"that is threshold-shattered by one of the ","element":"span"},{"style":{"height":14.62},"width":90.48,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-13.png","element":"img","alt":" Hi’s.","inline":true}],[{"text":"This upper bounds ","element":"span"},{"style":{"height":17.6},"width":1230.56,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-14.png","element":"img","alt":" T(G(H1, . . . , Hk)) by some exp(t exp(k)), where t = maxi T(Hi)","inline":true},{"text":". It is worth noting that, in contrast with Theorem ","element":"span"},{"href":"#id-2","text":"2.1, ","element":"a"},{"text":"an exponential dependence here is inevitable: we prove in Theorem ","element":"span"},{"href":"#id-21","text":"2.2 ","element":"a"},{"text":"that for any ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t ","element":"span"},{"text":"there exists a class ","element":"span"},{"style":{"height":17.6},"width":1156.6,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-15.png","element":"img","alt":" H with T(H) ≤ t such that T({h1 ∨ h2 : h1, h2 ∈ H}) ≥ exp(t).","inline":true,"padRight":true},{"text":"This lower bound is achieved by a randomized construction.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"2.1.3 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Private learning Implies Agnostic Private Learning","element":"span"}],[{"text":"We start by describing the transformation of ","element":"span"},{"href":"#id-24","referenceIndex":7,"text":"[Beimel et al., ","element":"a"},{"href":"#id-24","referenceIndex":7,"text":"2015] ","element":"a"},{"text":"from a proper private learning algorithm of a class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"to an agnostic proper private learning algorithm for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":". Assume that there is a private learning algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"with sample complexity ","element":"span"},{"style":{"fontStyle":"italic"},"text":"m","element":"span"},{"text":". The transformation takes a sample ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"of size ","element":"span"},{"style":{"fontStyle":"italic"},"text":"O","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"m","element":"span"},{"text":") ","element":"span"},{"text":"and constructs all possible behaviors ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"of functions in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"on the points of the sample (ignoring the labels). By the Sauer-Shelah-Perles Lemma, the number of such behaviors is at most","element":"span"},{"style":{"height":36.05},"width":269.73,"height":90.13,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-16.png","element":"img","alt":"� e|S|VC(H)�VC(H)","inline":true},{"text":". Then, it finds using the exponential mechanism a behavior ","element":"span"},{"style":{"height":13.2},"width":128.66,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-17.png","element":"img","alt":" h′ ∈ H","inline":true,"padRight":true},{"text":"that minimizes the empirical error on the sample. (The exponential mechanism is guaranteed to identify a behavior with small empirical error because the number of possible behaviors is relatively small.) Finally, the transformation relabeles the sample ","element":"span"},{"style":{"height":16.4},"width":264.46,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-18.png","element":"img","alt":" S using h′ and","inline":true,"padRight":true},{"text":"applies ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"on the relabeled sample. If ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is a proper learning algorithm then, by standard VC arguments, the resulting algorithm is an agnostic algorithm for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":". The privacy guarantees of the resulting algorithm are more delicate, and it is only ","element":"span"},{"style":{"fontStyle":"italic"},"text":"O","element":"span"},{"text":"(1)","element":"span"},{"text":"-differentially private, even if ","element":"span"},{"style":{"height":14},"width":112.13,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-19.png","element":"img","alt":" A is ε","inline":true},{"text":"-differentially private for a small ","element":"span"},{"style":{"height":8.4},"width":31.35,"height":21,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/5-20.png","element":"img","alt":" ε.","inline":true,"padRight":true},{"text":"(The difficulty in the privacy analysis is the set of behaviors ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"is ","element":"span"},{"style":{"fontStyle":"italic"},"text":"data-dependent","element":"span"},{"text":". Therefore, the privacy guarantees of the resulting algorithms ","element":"span"},{"style":{"fontStyle":"italic"},"text":"are not ","element":"span"},{"text":"directly implied by those of the exponential mechanism, which assume that the set of possible outcomes is fixed and data-independent.)","element":"span"}],[{"text":"When ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is improper, we cannot use VC arguments to argue that the resulting algorithm is an agnostic learner. We rather use the generalization properties of differential privacy (proved in ","element":"span"},{"href":"#id-32","referenceIndex":23,"text":"[Dwork et al., ","element":"a"},{"href":"#id-32","referenceIndex":23,"text":"2015, ","element":"a"},{"href":"#id-33","referenceIndex":5,"text":"Bassily et al., ","element":"a"},{"href":"#id-33","referenceIndex":5,"text":"2016, ","element":"a"},{"href":"#id-34","referenceIndex":37,"text":"Rogers et al., ","element":"a"},{"href":"#id-34","referenceIndex":37,"text":"2016, ","element":"a"},{"href":"#id-35","referenceIndex":24,"text":"Feldman and Steinke, ","element":"a"},{"href":"#id-35","referenceIndex":24,"text":"2017, ","element":"a"},{"href":"#id-36","referenceIndex":35,"text":"Nissim and Stemmer, ","element":"a"},{"href":"#id-36","referenceIndex":35,"text":"2017, ","element":"a"},{"href":"#id-37","referenceIndex":29,"text":"Jung et al., ","element":"a"},{"href":"#id-37","referenceIndex":29,"text":"2020]","element":"a"},{"text":"): if a differentially private algorithm has a small empirical error on a sample chosen i.i.d. from some distribution, then it also has a small generalization error on the underlying distribution (even if the labeling hypothesis is chosen after seeing the sample). There are technical issues in applying these results in our case that require some modifications in the transformation.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"2.1.4 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Closure for Differentially Private Learning","element":"span"}],[{"text":"We prove Theorem ","element":"span"},{"href":"#id-14","text":"2.3 ","element":"a"},{"text":"by constructing a private algorithm ","element":"span"},{"style":{"height":15.64},"width":231.38,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-0.png","element":"img","alt":" AClosureLearn","inline":true,"padRight":true},{"text":"for the class ","element":"span"},{"style":{"height":17.6},"width":383.23,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-1.png","element":"img","alt":" G(H1, . . . , Hk) using","inline":true,"padRight":true},{"text":"private learning algorithms for the classes ","element":"span"},{"style":{"height":15.2},"width":207.6,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-2.png","element":"img","alt":" H1, . . . , Hk","inline":true},{"text":". Algorithm ","element":"span"},{"style":{"height":15.64},"width":231.38,"height":39.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-3.png","element":"img","alt":" AClosureLearn","inline":true,"padRight":true},{"text":"uses the relabeling procedure (the one that we use to transform a private PAC learner into a private agnostic learner) in a new setting.","element":"span"}],[{"text":"The input to ","element":"span"},{"style":{"height":15.64},"width":231.38,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-4.png","element":"img","alt":" AClosureLearn","inline":true,"padRight":true},{"text":"is a sample labeled by some function in ","element":"span"},{"style":{"height":17.6},"width":278.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-5.png","element":"img","alt":" G(H1, . . . , Hk)","inline":true},{"text":". The algorithm finds hypotheses ","element":"span"},{"style":{"height":15.6},"width":184.18,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-6.png","element":"img","alt":" h1, . . . , hk","inline":true,"padRight":true},{"text":"in steps, where in the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i","element":"span"},{"text":"’th step, the algorithm finds a hypothesis ","element":"span"},{"style":{"height":15.6},"width":387.55,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-7.png","element":"img","alt":" hi such that h1, . . . , hi","inline":true,"padRight":true},{"text":"have a completion ","element":"span"},{"style":{"height":11.82},"width":209.55,"height":29.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-8.png","element":"img","alt":" ci+1, . . . , ck","inline":true,"padRight":true},{"text":"to a hypothesis ","element":"span"},{"style":{"height":17.6},"width":479.4,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-9.png","element":"img","alt":" G(h1, . . . , hi, ci+1, . . . , ck)","inline":true,"padRight":true},{"text":"with small error (assuming that ","element":"span"},{"style":{"height":15.6},"width":221.06,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-10.png","element":"img","alt":"h1, . . . , hi−1","inline":true,"padRight":true},{"text":"have a good completion).","element":"span"}],[{"text":"Each step of ","element":"span"},{"style":{"height":15.64},"width":231.38,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-11.png","element":"img","alt":" AClosureLearn","inline":true,"padRight":true},{"text":"is similar to the algorithm for agnostic learning described above. That is, in the ","element":"span"},{"style":{"height":16.8},"width":403.94,"height":42,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-12.png","element":"img","alt":" i’th step, AClosureLearn","inline":true,"padRight":true},{"text":"first relabels the input sample ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"using some ","element":"span"},{"style":{"height":15.02},"width":138.52,"height":37.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-13.png","element":"img","alt":" h ∈ Hi","inline":true,"padRight":true},{"text":"in a way that guarantees completion to a hypothesis with small empirical error. The relabeling ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"is chosen using the exponential mechanism with an appropriate score function. The relabeled sample is then fed to the private algorithm for the class ","element":"span"},{"style":{"height":14.62},"width":48.85,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-14.png","element":"img","alt":" Hi","inline":true,"padRight":true},{"text":"to produce a hypothesis ","element":"span"},{"style":{"height":15.02},"width":37.14,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-15.png","element":"img","alt":" hi","inline":true,"padRight":true},{"text":"and then the algorithm proceeds to the next step ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i ","element":"span"},{"text":"+ 1","element":"span"},{"text":". As in the algorithm for agnostic learning, the proof that the hypothesis ","element":"span"},{"style":{"height":17.6},"width":254.94,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-16.png","element":"img","alt":" G(h1, . . . , hk)","inline":true,"padRight":true},{"text":"returned by the algorithm is easier when the private algorithms for ","element":"span"},{"style":{"height":15.2},"width":207.6,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-17.png","element":"img","alt":" H1, . . . , Hk","inline":true,"padRight":true},{"text":"are proper and it is more involved if they are improper.","element":"span"}]]},{"heading":"3 Preliminaries","paragraphs":[[{"text":"This section is organized as follows: Section ","element":"span"},{"href":"#id-38","text":"3.1 ","element":"a"},{"text":"contains basic definitions and tools related to the Littlestone dimension and Section ","element":"span"},{"href":"#id-39","text":"3.2 ","element":"a"},{"text":"contains basic definitions and tools related to private learning.","element":"span"}],[{"id":"id-38","style":{"fontWeight":"bold"},"text":"3.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Preliminaries on the Littlestone Dimension","element":"span"}],[{"text":"The Littlestone dimension is a combinatorial parameter that characterizes regret bounds in online learning ","element":"span"},{"href":"#id-0","referenceIndex":33,"text":"[Littlestone, ","element":"a"},{"href":"#id-0","referenceIndex":33,"text":"1987, ","element":"a"},{"href":"#id-1","referenceIndex":9,"text":"Ben-David et al., ","element":"a"},{"href":"#id-1","referenceIndex":9,"text":"2009]","element":"a"},{"text":". The definition of this parameter uses the notion of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"mistake-trees","element":"span"},{"text":": these are binary decision trees whose internal nodes are labeled by elements of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X","element":"span"},{"text":". Any root-to-leaf path in a mistake tree can be described as a sequence of examples ","element":"span"},{"style":{"height":17.6},"width":547.71,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-18.png","element":"img","alt":" (x1, y1), . . . , (xd, yd), where xi","inline":true,"padRight":true},{"text":"is the label of the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i","element":"span"},{"text":"’th internal node in the path, and ","element":"span"},{"style":{"height":17.6},"width":304.2,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-19.png","element":"img","alt":" yi = if the (i+1)","inline":true},{"text":"’th node in the path is the right child of the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i","element":"span"},{"text":"’th node, and otherwise ","element":"span"},{"style":{"height":15.6},"width":115.12,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-20.png","element":"img","alt":" yi = 0","inline":true},{"text":". We say that a tree ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T ","element":"span"},{"text":"is ","element":"span"},{"style":{"fontStyle":"italic"},"text":"shattered ","element":"span"},{"text":"by ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"if for any root-to-leaf path ","element":"span"},{"style":{"height":17.6},"width":417.41,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-21.png","element":"img","alt":" (x1, y1), . . . , (xd, yd) in","inline":true},{"style":{"height":17.6},"width":892.44,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-22.png","element":"img","alt":"T there is h ∈ H such that h(xi) = yi, for all i ≤ d","inline":true},{"text":". The Littlestone dimension of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":", denoted by ","element":"span"},{"text":"Ldim(","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":")","element":"span"},{"text":", is the depth of the largest complete tree that is shattered by ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":".","element":"span"}],[{"id":"id-53","style":{"fontWeight":"bold"},"text":"Definition 3.1 ","element":"span"},{"text":"(Subtree)","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T ","element":"span"},{"style":{"fontStyle":"italic"},"text":"be labeled binary tree. We will use the following notion of a subtree ","element":"span"},{"style":{"height":15.6},"width":102.88,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-23.png","element":"img","alt":" T ′ of","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"depth ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"style":{"fontStyle":"italic"},"text":"of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T ","element":"span"},{"style":{"fontStyle":"italic"},"text":"by induction on ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h","element":"span"},{"style":{"fontStyle":"italic"},"text":":","element":"span"}],[{"style":{"fontStyle":"italic"},"text":"1. Any leaf of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T ","element":"span"},{"style":{"fontStyle":"italic"},"text":"is a subtree of height ","element":"span"},{"text":"0","element":"span"},{"style":{"fontStyle":"italic"},"text":".","element":"span"}],[{"style":{"fontStyle":"italic"},"text":"2. For ","element":"span"},{"style":{"height":14.8},"width":114.3,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-24.png","element":"img","alt":" h ≥ 1","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"a subtree of height ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"style":{"fontStyle":"italic"},"text":"is obtained from an internal vertex of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T ","element":"span"},{"style":{"fontStyle":"italic"},"text":"together with a subtree of height ","element":"span"},{"style":{"height":12.8},"width":103.15,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-25.png","element":"img","alt":" h − 1","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"of the tree rooted at its left child and a subtree of height ","element":"span"},{"style":{"height":12.8},"width":103.15,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-26.png","element":"img","alt":" h − 1","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"of the tree rooted at its right child.","element":"span"}],[{"text":"Note that if ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T ","element":"span"},{"text":"is a labeled tree and it is shattered by the class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":", then any subtree ","element":"span"},{"style":{"height":12},"width":47.56,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/6-27.png","element":"img","alt":" T ′ ","inline":true,"padRight":true},{"text":"of it with the same labeling of its internal vertices is shattered by the class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":".","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Threshold Dimension. ","element":"span"},{"text":"A classical theorem of Shelah in model-theory connects bounds on 2-rank (Littlestone dimension) to the concept of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"thresholds","element":"span"},{"text":": let ","element":"span"},{"style":{"height":19.53},"width":245.39,"height":48.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-0.png","element":"img","alt":" H ⊆ {0, 1}X ","inline":true,"padRight":true},{"text":"be a hypothesis class. We say that a sequence ","element":"span"},{"style":{"height":15.2},"width":325.24,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-1.png","element":"img","alt":" x1, . . . , xk ∈ X is","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"threshold-shattered ","element":"span"},{"text":"by ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"if there are ","element":"span"},{"style":{"height":18.22},"width":705.17,"height":45.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-2.png","element":"img","alt":" h1, . . . , hk ∈ H such that hi(xj) = 1 if","inline":true,"padRight":true},{"text":"and only if ","element":"span"},{"style":{"height":16.4},"width":353.89,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-3.png","element":"img","alt":" i ≤ j for all i, j ≤ k","inline":true},{"text":". Define the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"threshold dimension","element":"span"},{"text":", ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":")","element":"span"},{"text":", as the maximum size of a sequence that is threshold-shattered by ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":".","element":"span"}],[{"id":"id-20","style":{"fontWeight":"bold"},"text":"Theorem 3.2 ","element":"span"},{"text":"(Littlestone Dimension versus Threshold Dimension ","element":"span"},{"href":"#id-19","referenceIndex":40,"text":"[Shelah, ","element":"a"},{"href":"#id-19","referenceIndex":40,"text":"1978, ","element":"a"},{"href":"#id-40","referenceIndex":27,"text":"Hodges, ","element":"a"},{"href":"#id-40","referenceIndex":27,"text":"1997]","element":"a"},{"text":")","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"be a hypothesis class, then:","element":"span"}],[{"style":{"width":"53%"},"width":1008,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-4.png","element":"img"}],[{"id":"id-39","style":{"fontWeight":"bold"},"text":"3.2 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Preliminaries on Private Learning","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Differential Privacy. ","element":"span"},{"text":"Consider a database where each record contains information of an individual. An algorithm is said to preserve differential privacy if a change of a single record of the database (i.e., information of an individual) does not significantly change the output distribution of the algorithm. Intuitively, this means that the information inferred about an individual from the output of a differentially-private algorithm is similar to the information that would be inferred had the individual’s record been arbitrarily modified or removed. Formally:","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Definition 3.3 ","element":"span"},{"text":"(Differential privacy ","element":"span"},{"href":"#id-10","referenceIndex":21,"text":"[Dwork et al., ","element":"a"},{"href":"#id-10","referenceIndex":21,"text":"2006b,","element":"a"},{"href":"#id-41","referenceIndex":20,"text":"a]","element":"a"},{"text":")","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A randomized algorithm ","element":"span"},{"style":{"height":17.6},"width":175.29,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-5.png","element":"img","alt":" A is (ε, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":"-differentially private if for all neighboring databases ","element":"span"},{"style":{"height":15.6},"width":233.68,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-6.png","element":"img","alt":" S1, S2 ∈ Xm ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"(i.e., databases differing in one entry), and for all sets ","element":"span"},{"style":{"fontStyle":"italic"},"text":"F ","element":"span"},{"style":{"fontStyle":"italic"},"text":"of outputs,","element":"span"}],[{"style":{"width":"72%"},"width":1351,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-7.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"where the probability is taken over the random coins of ","element":"span"},{"style":{"height":14},"width":270.63,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-8.png","element":"img","alt":" A. When δ = 0","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"we omit it and say that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"style":{"fontStyle":"italic"},"text":"preserves ","element":"span"},{"text":"pure ","element":"span"},{"style":{"height":8.4},"width":21,"height":21,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-9.png","element":"img","alt":" ε","inline":true},{"style":{"fontStyle":"italic"},"text":"-differential privacy. When ","element":"span"},{"style":{"height":13.2},"width":103.33,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-10.png","element":"img","alt":" δ > 0","inline":true},{"style":{"fontStyle":"italic"},"text":", we use the term ","element":"span"},{"text":"approximate ","element":"span"},{"style":{"fontStyle":"italic"},"text":"differential privacy , in which case ","element":"span"},{"style":{"height":12.8},"width":20,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-11.png","element":"img","alt":" δ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is typically a negligible function of the database size ","element":"span"},{"style":{"fontStyle":"italic"},"text":"m","element":"span"},{"style":{"fontStyle":"italic"},"text":".","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"PAC Learning. ","element":"span"},{"text":"We next define the probably approximately correct (PAC) model of ","element":"span"},{"href":"#id-42","referenceIndex":41,"text":"Valiant ","element":"a"},{"href":"#id-42","referenceIndex":41,"text":"[1984]","element":"a"},{"text":". A hypothesis ","element":"span"},{"style":{"height":17.6},"width":269.55,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-12.png","element":"img","alt":" c : X → {0, 1}","inline":true,"padRight":true},{"text":"is a predicate that labels ","element":"span"},{"style":{"fontStyle":"italic"},"text":"examples ","element":"span"},{"text":"taken from the domain ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X ","element":"span"},{"text":"by either 0 or 1. We sometime refer to a hypothesis as a concept. A ","element":"span"},{"style":{"fontStyle":"italic"},"text":"hypothesis class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"over ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X ","element":"span"},{"text":"is a set of hypotheses (predicates) mapping ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X ","element":"span"},{"text":"to ","element":"span"},{"style":{"fontStyle":"italic"},"text":"{","element":"span"},{"text":"0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1","element":"span"},{"style":{"fontStyle":"italic"},"text":"}","element":"span"},{"text":". A learning algorithm is given examples sampled according to an unknown probability distribution ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P ","element":"span"},{"text":"over ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X","element":"span"},{"text":", and labeled according to an unknown ","element":"span"},{"style":{"fontStyle":"italic"},"text":"target ","element":"span"},{"text":"concept ","element":"span"},{"style":{"height":13.2},"width":110.72,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-13.png","element":"img","alt":" c ∈ H","inline":true},{"text":". The learning algorithm is successful when it outputs a hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"that approximates the target concept over samples from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P","element":"span"},{"text":". More formally:","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Definition 3.4. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"The ","element":"span"},{"text":"generalization error ","element":"span"},{"style":{"fontStyle":"italic"},"text":"of a hypothesis ","element":"span"},{"style":{"height":17.6},"width":284.96,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-14.png","element":"img","alt":" h : X → {0, 1}","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"with respect to a concept ","element":"span"},{"style":{"fontStyle":"italic"},"text":"c ","element":"span"},{"style":{"fontStyle":"italic"},"text":"and a distribution ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P ","element":"span"},{"style":{"fontStyle":"italic"},"text":"over ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X ","element":"span"},{"style":{"fontStyle":"italic"},"text":"is defined as ","element":"span"},{"style":{"height":17.6},"width":1005.89,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-15.png","element":"img","alt":" errorP(c, h) = Prx∼P[h(x) ̸= c(x)]. If errorP(c, h) ≤ α","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"we say that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"style":{"fontStyle":"italic"},"text":"is ","element":"span"},{"style":{"height":16.4},"width":353.72,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-16.png","element":"img","alt":" α-good for c and P.","inline":true}],[{"style":{"fontWeight":"bold"},"text":"Definition 3.5 ","element":"span"},{"text":"(PAC Learning ","element":"span"},{"href":"#id-42","referenceIndex":41,"text":"[Valiant, ","element":"a"},{"href":"#id-42","referenceIndex":41,"text":"1984]","element":"a"},{"text":")","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"An algorithm ","element":"span"},{"style":{"height":17.6},"width":261.79,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-17.png","element":"img","alt":" A is an (α, β)","inline":true},{"text":"-accurate PAC learner ","element":"span"},{"style":{"fontStyle":"italic"},"text":"for a hypothesis class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"over ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X ","element":"span"},{"style":{"fontStyle":"italic"},"text":"if for all concepts ","element":"span"},{"style":{"height":13.2},"width":109.24,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-18.png","element":"img","alt":" c ∈ H","inline":true},{"style":{"fontStyle":"italic"},"text":", all distributions ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P ","element":"span"},{"style":{"fontStyle":"italic"},"text":"on ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X","element":"span"},{"style":{"fontStyle":"italic"},"text":", given an input of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"m ","element":"span"},{"style":{"fontStyle":"italic"},"text":"samples ","element":"span"},{"style":{"height":17.6},"width":1015.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-19.png","element":"img","alt":"S = (z1, . . . , zm), where zi = (xi, c(xi)) and each xi","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is drawn i.i.d. from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P","element":"span"},{"style":{"fontStyle":"italic"},"text":", algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"style":{"fontStyle":"italic"},"text":"outputs a hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"style":{"fontStyle":"italic"},"text":"satisfying","element":"span"}],[{"style":{"width":"29%"},"width":544,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-20.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"where the probability is taken over the random choice of the examples in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"style":{"fontStyle":"italic"},"text":"according to ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P ","element":"span"},{"style":{"fontStyle":"italic"},"text":"and the random coins of the learner ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A","element":"span"},{"style":{"fontStyle":"italic"},"text":". If the output hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"style":{"fontStyle":"italic"},"text":"always satisfies ","element":"span"},{"style":{"height":14},"width":265.75,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/7-21.png","element":"img","alt":" h ∈ H then A","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is called a ","element":"span"},{"text":"proper ","element":"span"},{"style":{"fontStyle":"italic"},"text":"PAC learner; otherwise, it is called an ","element":"span"},{"text":"improper ","element":"span"},{"style":{"fontStyle":"italic"},"text":"PAC learner.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Definition 3.6. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"For an unlabeled sample ","element":"span"},{"style":{"height":18.09},"width":290.34,"height":45.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-0.png","element":"img","alt":" S = (xi)mi=1, the","inline":true,"padRight":true},{"text":"empirical error ","element":"span"},{"style":{"fontStyle":"italic"},"text":"of two concepts ","element":"span"},{"style":{"height":17.6},"width":369.46,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-1.png","element":"img","alt":" c, h is errorS(c, h) =","inline":true}],[{"style":{"height":19.22},"width":440.46,"height":48.05,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-2.png","element":"img","alt":"m|{i : c(xi) ̸= h(xi)}|.","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"For a labeled sample ","element":"span"},{"style":{"height":18.09},"width":365.09,"height":45.23,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-3.png","element":"img","alt":" S = (xi, yi)mi=1, the","inline":true,"padRight":true},{"text":"empirical error ","element":"span"},{"style":{"fontStyle":"italic"},"text":"of ","element":"span"},{"style":{"height":17.6},"width":312.4,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-4.png","element":"img","alt":" h is errorS(h) =","inline":true}],[{"style":{"height":19.22},"width":356.66,"height":48.05,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-5.png","element":"img","alt":"m|{i : h(xi) ̸= yi}|.","inline":true}],[{"text":"The previous definition of PAC learning captures the realizable case, that is, the examples are drawn from some distribution and labeled according to some concept ","element":"span"},{"style":{"height":13.2},"width":120.24,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-6.png","element":"img","alt":" c ∈ H","inline":true},{"text":". We next define agnostic learning, i.e., where there is a distribution over labeled examples and the goal is to find a hypothesis whose error is close to the error of the best hypothesis in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"with respect to the distribution. Formally, for a distribution ","element":"span"},{"style":{"height":12},"width":26,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-7.png","element":"img","alt":" µ","inline":true,"padRight":true},{"text":"on ","element":"span"},{"style":{"height":17.6},"width":199.76,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-8.png","element":"img","alt":" X × {0, 1}","inline":true,"padRight":true},{"text":"and a function ","element":"span"},{"style":{"height":19.95},"width":1069.58,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-9.png","element":"img","alt":" f : X → {0, 1} we define errorµ(f) = Pr(x,a)∼µ[f(x) ̸= a].","inline":true}],[{"style":{"fontWeight":"bold"},"text":"Definition 3.7 ","element":"span"},{"text":"(Agnostic PAC Learning)","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Algorithm ","element":"span"},{"style":{"height":17.6},"width":428.96,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-10.png","element":"img","alt":" A is an (α, β)-accurate","inline":true,"padRight":true},{"text":"agnostic PAC learner ","element":"span"},{"style":{"fontStyle":"italic"},"text":"for a hypothesis class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"with sample complexity ","element":"span"},{"style":{"fontStyle":"italic"},"text":"m ","element":"span"},{"style":{"fontStyle":"italic"},"text":"if for all distributions ","element":"span"},{"style":{"height":17.6},"width":304.37,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-11.png","element":"img","alt":" µ on X × {0, 1}","inline":true},{"style":{"fontStyle":"italic"},"text":", given an input of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"m ","element":"span"},{"style":{"fontStyle":"italic"},"text":"labeled samples ","element":"span"},{"style":{"height":17.6},"width":325.19,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-12.png","element":"img","alt":" S = (z1, . . . , zm)","inline":true},{"style":{"fontStyle":"italic"},"text":", where each labeled example ","element":"span"},{"style":{"height":17.6},"width":235.74,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-13.png","element":"img","alt":" zi = (xi, ai)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is drawn i.i.d. from ","element":"span"},{"style":{"height":12},"width":38.29,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-14.png","element":"img","alt":" µ,","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"style":{"fontStyle":"italic"},"text":"outputs a hypothesis ","element":"span"},{"style":{"height":13.6},"width":115.5,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-15.png","element":"img","alt":" h ∈ H","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"satisfying","element":"span"}],[{"style":{"width":"47%"},"width":895,"height":108,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-16.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"where the probability is taken over the random choice of the examples in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"style":{"fontStyle":"italic"},"text":"according to ","element":"span"},{"style":{"height":12},"width":26,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-17.png","element":"img","alt":" µ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and the random coins of the learner ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A","element":"span"},{"style":{"fontStyle":"italic"},"text":". If the output hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"style":{"fontStyle":"italic"},"text":"always satisfies ","element":"span"},{"style":{"height":14},"width":249.53,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-18.png","element":"img","alt":" h ∈ H then A","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is called a ","element":"span"},{"text":"proper ","element":"span"},{"style":{"fontStyle":"italic"},"text":"agnostic PAC learner; otherwise, it is called an ","element":"span"},{"text":"improper ","element":"span"},{"style":{"fontStyle":"italic"},"text":"agnostic PAC learner.","element":"span"}],[{"text":"The following bound is due to ","element":"span"},{"href":"#id-43","referenceIndex":42,"text":"[Vapnik and Chervonenkis, ","element":"a"},{"href":"#id-43","referenceIndex":42,"text":"1971, ","element":"a"},{"href":"#id-44","referenceIndex":12,"text":"Blumer et al., ","element":"a"},{"href":"#id-44","referenceIndex":12,"text":"1989]","element":"a"},{"text":".","element":"span"}],[{"id":"id-48","style":{"fontWeight":"bold"},"text":"Theorem 3.8 ","element":"span"},{"text":"(VC-Dimension Generalization Bound)","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P ","element":"span"},{"style":{"fontStyle":"italic"},"text":"be a concept class and a distribution over a domain ","element":"span"},{"style":{"height":16.4},"width":374.59,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-19.png","element":"img","alt":" X. Let α, β > 0, and","inline":true}],[{"style":{"width":"39%"},"width":737,"height":106,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-20.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Suppose that we draw an unlabeled sample ","element":"span"},{"style":{"height":18.09},"width":391.36,"height":45.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-21.png","element":"img","alt":" S = (xi)mi=1, where xi","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"are drawn i.i.d. from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P","element":"span"},{"style":{"fontStyle":"italic"},"text":". Then,","element":"span"}],[{"style":{"width":"58%"},"width":1100,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-22.png","element":"img"}],[{"text":"The next theorem, due to ","element":"span"},{"href":"#id-43","referenceIndex":42,"text":"[Vapnik and Chervonenkis, ","element":"a"},{"href":"#id-43","referenceIndex":42,"text":"1971, ","element":"a"},{"href":"#id-45","referenceIndex":4,"text":"Anthony and Bartlett, ","element":"a"},{"href":"#id-45","referenceIndex":4,"text":"2009, ","element":"a"},{"href":"#id-46","referenceIndex":3,"text":"Anthony and ","element":"a"},{"href":"#id-46","referenceIndex":3,"text":"Shawe-Taylor, ","element":"a"},{"href":"#id-46","referenceIndex":3,"text":"1993]","element":"a"},{"text":", handles (in particular) the agnostic case.","element":"span"}],[{"id":"id-47","style":{"fontWeight":"bold"},"text":"Theorem 3.9 ","element":"span"},{"text":"(VC-Dimension Agnostic Generalization Bound)","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"There exists a constant ","element":"span"},{"style":{"height":11.6},"width":24,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-23.png","element":"img","alt":" γ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"such that for every domain ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X","element":"span"},{"style":{"fontStyle":"italic"},"text":", every concept class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"over the domain ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X","element":"span"},{"style":{"fontStyle":"italic"},"text":", and every distribution ","element":"span"},{"style":{"height":12},"width":26,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-24.png","element":"img","alt":" µ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"over the domain ","element":"span"},{"style":{"height":17.6},"width":207,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-25.png","element":"img","alt":" X ×{0, 1}:","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"For a sample ","element":"span"},{"style":{"height":18.09},"width":387.54,"height":45.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-26.png","element":"img","alt":" S = (xi, yi)mi=1 where","inline":true}],[{"style":{"width":"22%"},"width":414,"height":93,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-27.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"and ","element":"span"},{"style":{"height":17.6},"width":170.54,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-28.png","element":"img","alt":" {(xi, yi)}","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"are drawn i.i.d. from ","element":"span"},{"style":{"height":12},"width":26,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-29.png","element":"img","alt":" µ","inline":true},{"style":{"fontStyle":"italic"},"text":", it holds that","element":"span"}],[{"style":{"width":"49%"},"width":934,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-30.png","element":"img"}],[{"text":"Notice that in Theorem ","element":"span"},{"href":"#id-47","text":"3.9 ","element":"a"},{"text":"the sample complexity is proportional to ","element":"span"},{"style":{"height":21.75},"width":36.75,"height":54.37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-31.png","element":"img","alt":"1α2","inline":true,"padRight":true},{"text":", as opposed to ","element":"span"},{"style":{"height":21.29},"width":22,"height":53.23,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/8-32.png","element":"img","alt":" 1α ","inline":true,"padRight":true},{"text":"in Theorem ","element":"span"},{"href":"#id-48","text":"3.8.","element":"a"}],[{"style":{"fontWeight":"bold"},"text":"Private Learning. ","element":"span"},{"text":"Consider an algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"in the probably approximately correct (PAC) model of ","element":"span"},{"href":"#id-42","referenceIndex":41,"text":"Valiant ","element":"a"},{"href":"#id-42","referenceIndex":41,"text":"[1984]","element":"a"},{"text":". We say that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is a ","element":"span"},{"style":{"fontStyle":"italic"},"text":"private ","element":"span"},{"text":"learner if it also satisfies differential privacy w.r.t. its training data.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Definition 3.10 ","element":"span"},{"text":"(Private PAC Learning ","element":"span"},{"href":"#id-9","referenceIndex":31,"text":"[Kasiviswanathan et al., ","element":"a"},{"href":"#id-9","referenceIndex":31,"text":"2011]","element":"a"},{"text":")","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"style":{"fontStyle":"italic"},"text":"be an algorithm that gets an input ","element":"span"},{"style":{"height":17.6},"width":311.95,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-0.png","element":"img","alt":" S = (z1, . . . , zm)","inline":true},{"style":{"fontStyle":"italic"},"text":", where each ","element":"span"},{"style":{"height":10.62},"width":32.29,"height":26.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-1.png","element":"img","alt":" zi","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is a labeled example. Algorithm ","element":"span"},{"style":{"height":17.6},"width":236.76,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-2.png","element":"img","alt":" A is an (ε, δ)","inline":true},{"text":"-differentially private ","element":"span"},{"style":{"height":17.6},"width":108.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-3.png","element":"img","alt":"(α, β)","inline":true},{"text":"-accurate PAC learner with sample complexity ","element":"span"},{"style":{"fontStyle":"italic"},"text":"m ","element":"span"},{"style":{"fontStyle":"italic"},"text":"for a class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"over ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X ","element":"span"},{"style":{"fontStyle":"italic"},"text":"if","element":"span"}],[{"style":{"width":"99%"},"width":1868,"height":271,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-4.png","element":"img"}],[{"text":"Note that the utility requirement in the above definition is an average-case requirement, as the learner is only required to do well on typical samples. In contrast, the privacy requirement is a worst-case requirement that must hold for every pair of neighboring databases (no matter how they were generated).","element":"span"}],[{"text":"The following definition and lemma are taken from ","element":"span"},{"href":"#id-7","referenceIndex":14,"text":"Bun et al. ","element":"a"},{"href":"#id-7","referenceIndex":14,"text":"[2015]","element":"a"},{"text":".","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Definition 3.11 ","element":"span"},{"text":"(Empirical Learner)","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Algorithm ","element":"span"},{"style":{"height":17.6},"width":259.62,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-5.png","element":"img","alt":" A is an (α, β)","inline":true},{"style":{"fontStyle":"italic"},"text":"-accurate empirical learner for a class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"over ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X ","element":"span"},{"style":{"fontStyle":"italic"},"text":"with sample complexity ","element":"span"},{"style":{"fontStyle":"italic"},"text":"m ","element":"span"},{"style":{"fontStyle":"italic"},"text":"if for every ","element":"span"},{"style":{"height":13.2},"width":111.28,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-6.png","element":"img","alt":" c ∈ H","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and for every sample ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"style":{"fontStyle":"italic"},"text":"of size ","element":"span"},{"style":{"fontStyle":"italic"},"text":"m ","element":"span"},{"style":{"fontStyle":"italic"},"text":"that is labeled by ","element":"span"},{"style":{"fontStyle":"italic"},"text":"c","element":"span"},{"style":{"fontStyle":"italic"},"text":", the algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"style":{"fontStyle":"italic"},"text":"outputs a hypothesis ","element":"span"},{"style":{"height":13.2},"width":117.47,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-7.png","element":"img","alt":" h ∈ H","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"satisfying","element":"span"}],[{"style":{"width":"28%"},"width":540,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-8.png","element":"img"}],[{"id":"id-69","style":{"fontWeight":"bold"},"text":"Lemma 3.12 ","element":"span"},{"href":"#id-7","referenceIndex":14,"text":"(Bun et al. ","element":"a"},{"href":"#id-7","referenceIndex":14,"text":"[2015]","element":"a"},{"text":")","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Suppose ","element":"span"},{"style":{"height":17.6},"width":234.73,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-9.png","element":"img","alt":" A is an (ε, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":"-differentially private ","element":"span"},{"style":{"height":17.6},"width":108.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-10.png","element":"img","alt":" (α, β)","inline":true},{"style":{"fontStyle":"italic"},"text":"-accurate PAC learner for a concept class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"with sample complexity ","element":"span"},{"style":{"height":14},"width":187.1,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-11.png","element":"img","alt":" m. Let A′ ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be an algorithm, whose input sample ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"style":{"fontStyle":"italic"},"text":"contains ","element":"span"},{"text":"9","element":"span"},{"style":{"fontStyle":"italic"},"text":"m ","element":"span"},{"style":{"fontStyle":"italic"},"text":"randomly labeled examples. Further assume that ","element":"span"},{"style":{"height":14},"width":50.84,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-12.png","element":"img","alt":" A′ ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"samples with repetitions ","element":"span"},{"style":{"fontStyle":"italic"},"text":"m ","element":"span"},{"style":{"fontStyle":"italic"},"text":"labeled examples from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"style":{"fontStyle":"italic"},"text":"and returns the output of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"style":{"fontStyle":"italic"},"text":"on these examples. Then, ","element":"span"},{"style":{"height":17.6},"width":243.96,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-13.png","element":"img","alt":" A′ is an (ε, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":"-differentially private ","element":"span"},{"style":{"height":17.6},"width":275.58,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-14.png","element":"img","alt":" (α, β)-accurate","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"empirical learner for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"with sample complexity ","element":"span"},{"text":"9","element":"span"},{"style":{"fontStyle":"italic"},"text":"m","element":"span"},{"style":{"fontStyle":"italic"},"text":". Clearly, if ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"style":{"fontStyle":"italic"},"text":"is proper, then so is ","element":"span"},{"style":{"height":14},"width":57.02,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-15.png","element":"img","alt":" A′.","inline":true}],[{"style":{"fontWeight":"bold"},"text":"The Exponential Mechanism. ","element":"span"},{"text":"We next describe the exponential mechanism of ","element":"span"},{"href":"#id-49","referenceIndex":34,"text":"McSherry and Talwar ","element":"a"},{"href":"#id-49","referenceIndex":34,"text":"[2007]","element":"a"},{"text":". Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X ","element":"span"},{"text":"be a domain and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"a set of solutions. Given a score function ","element":"span"},{"style":{"height":16},"width":442.3,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-16.png","element":"img","alt":" q : X∗ × H → N, and","inline":true,"padRight":true},{"text":"a database ","element":"span"},{"style":{"height":13.2},"width":159.31,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-17.png","element":"img","alt":" S ∈ X∗","inline":true},{"text":", the goal is to chooses a solution ","element":"span"},{"style":{"height":13.2},"width":137.61,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-18.png","element":"img","alt":" h ∈ H","inline":true,"padRight":true},{"text":"approximately minimizing ","element":"span"},{"style":{"fontStyle":"italic"},"text":"q","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"S, h","element":"span"},{"text":")","element":"span"},{"text":". The mechanism chooses a solution probabilistically, where the probability mass that is assigned to each solution ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"decreases exponentially with its score ","element":"span"},{"style":{"fontStyle":"italic"},"text":"q","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"S, h","element":"span"},{"text":")","element":"span"},{"text":":","element":"span"}],[{"id":"id-64","style":{"width":"102%"},"width":1916,"height":648,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/9-19.png","element":"img"}],[{"href":"#id-9","referenceIndex":31,"text":"Kasiviswanathan et al. ","element":"a"},{"href":"#id-9","referenceIndex":31,"text":"[2011] ","element":"a"},{"text":"showed that the exponential mechanism can be used as a generic private learner – when used with the score function ","element":"span"},{"style":{"height":17.6},"width":812.5,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-0.png","element":"img","alt":" q(S, h) = |{i : h(xi) ̸= yi}| = m · errorS(h)","inline":true},{"text":", the probability that the exponential mechanism outputs a hypothesis ","element":"span"},{"style":{"height":18.44},"width":938.32,"height":46.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-1.png","element":"img","alt":" h such that errorS(h) > minf∈H{errorS(f)} + ∆ is","inline":true,"padRight":true},{"text":"at most ","element":"span"},{"style":{"height":17.6},"width":370.79,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-2.png","element":"img","alt":" |H| · exp(−ε∆m/2)","inline":true},{"text":". This results in a generic private proper-learner for every finite concept class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":", with sample complexity ","element":"span"},{"style":{"height":18.44},"width":282.23,"height":46.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-3.png","element":"img","alt":" Oα,β,ε(log |H|).","inline":true}],[{"style":{"fontWeight":"bold"},"text":"Generalization Properties of Differentially Private Algorithms. ","element":"span"},{"text":"In this paper we use the fact that differential privacy implies generalization ","element":"span"},{"href":"#id-32","referenceIndex":23,"text":"[Dwork et al., ","element":"a"},{"href":"#id-32","referenceIndex":23,"text":"2015, ","element":"a"},{"href":"#id-33","referenceIndex":5,"text":"Bassily et al., ","element":"a"},{"href":"#id-33","referenceIndex":5,"text":"2016, ","element":"a"},{"href":"#id-34","referenceIndex":37,"text":"Rogers et al., ","element":"a"},{"href":"#id-34","referenceIndex":37,"text":"2016, ","element":"a"},{"href":"#id-35","referenceIndex":24,"text":"Feldman ","element":"a"},{"href":"#id-35","referenceIndex":24,"text":"and Steinke, ","element":"a"},{"href":"#id-35","referenceIndex":24,"text":"2017, ","element":"a"},{"href":"#id-36","referenceIndex":35,"text":"Nissim and Stemmer, ","element":"a"},{"href":"#id-36","referenceIndex":35,"text":"2017, ","element":"a"},{"href":"#id-37","referenceIndex":29,"text":"Jung et al., ","element":"a"},{"href":"#id-37","referenceIndex":29,"text":"2020]","element":"a"},{"text":": differentially private learning algorithms satisfy that their empirical loss is typically close to their population loss. We use the following variant of this result, which is a multiplicative version that applies also to the case that ","element":"span"},{"style":{"height":12.4},"width":100.53,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-4.png","element":"img","alt":" ε > 1","inline":true,"padRight":true},{"text":"(as needed in this paper).","element":"span"}],[{"id":"id-65","style":{"fontWeight":"bold"},"text":"Theorem 3.14 ","element":"span"},{"text":"(DP Generalization – Multiplicative version ","element":"span"},{"href":"#id-32","referenceIndex":23,"text":"[Dwork et al., ","element":"a"},{"href":"#id-32","referenceIndex":23,"text":"2015, ","element":"a"},{"href":"#id-33","referenceIndex":5,"text":"Bassily et al., ","element":"a"},{"href":"#id-33","referenceIndex":5,"text":"2016, ","element":"a"},{"href":"#id-35","referenceIndex":24,"text":"Feld- ","element":"a"},{"href":"#id-35","referenceIndex":24,"text":"man and Steinke, ","element":"a"},{"href":"#id-35","referenceIndex":24,"text":"2017, ","element":"a"},{"href":"#id-36","referenceIndex":35,"text":"Nissim and Stemmer, ","element":"a"},{"href":"#id-36","referenceIndex":35,"text":"2017]","element":"a"},{"text":")","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"height":17.6},"width":257.43,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-5.png","element":"img","alt":" A be an (ε, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":"-differentially private algorithm that operates on a database of ","element":"span"},{"style":{"height":13.2},"width":144.35,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-6.png","element":"img","alt":" S ∈ Xn ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and outputs a predicate ","element":"span"},{"style":{"height":17.6},"width":448.73,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-7.png","element":"img","alt":" test : X → {0, 1}. Let P","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be a distribution over ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X ","element":"span"},{"style":{"fontStyle":"italic"},"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"style":{"fontStyle":"italic"},"text":"be a database containing ","element":"span"},{"style":{"fontStyle":"italic"},"text":"n ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i.i.d. elements from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P","element":"span"},{"style":{"fontStyle":"italic"},"text":". Then,","element":"span"}],[{"style":{"width":"85%"},"width":1604,"height":149,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-8.png","element":"img"}]]},{"heading":"4 Closure of Littlestone Classes","paragraphs":[[{"text":"In this section we study closure properties for Littlestone classes. We begin in Section ","element":"span"},{"href":"#id-50","text":"4.1 ","element":"a"},{"text":"with a rather simple (and tight) analysis of the behavior of the Littlestone and Threshold dimension under unions. Then, in Section ","element":"span"},{"href":"#id-51","text":"4.2 ","element":"a"},{"text":"we prove our main results in this part (Theorems ","element":"span"},{"href":"#id-2","text":"2.1 ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-21","text":"2.2) ","element":"a"},{"text":"which bound the variability of the Littlestone and Thresholds dimension under arbitrary compositions.","element":"span"}],[{"id":"id-50","style":{"fontWeight":"bold"},"text":"4.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Closure Under Unions","element":"span"}],[{"text":"We begin with two basic bounds on the variability of the Littlestone/Threshold dimension under union. Note that here ","element":"span"},{"style":{"height":14.62},"width":158.34,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-9.png","element":"img","alt":" H1 ∪ H2","inline":true,"padRight":true},{"text":"denotes the usual union: ","element":"span"},{"style":{"height":17.6},"width":652.66,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-10.png","element":"img","alt":" H1 ∪ H2 = {h : h ∈ H1 or h ∈ H2}","inline":true},{"text":". These bounds are useful as they allows us to reduce a bound on the dimension of ","element":"span"},{"style":{"height":17.6},"width":199.23,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-11.png","element":"img","alt":" G(H1, H2)","inline":true,"padRight":true},{"text":"for arbitrary ","element":"span"},{"style":{"height":15.2},"width":129.03,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-12.png","element":"img","alt":" H1, H2","inline":true,"padRight":true},{"text":"to the case where ","element":"span"},{"style":{"height":17.6},"width":1117.5,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-13.png","element":"img","alt":"H1 = H2 (because G(H1, H2) ⊆ G(H, H) for H = H1 ∪ H2).","inline":true}],[{"style":{"fontWeight":"bold"},"text":"Observation 4.1. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"[Threshold Dimension Under Union] Let ","element":"span"},{"style":{"height":19.53},"width":330.24,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-14.png","element":"img","alt":" H1, H2 ⊆ {0, 1}X ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be hypothesis classes with ","element":"span"},{"style":{"height":17.6},"width":327.05,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-15.png","element":"img","alt":"T(Hi) = ti. Then,","inline":true}],[{"style":{"width":"22%"},"width":415,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-16.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Moreover, this bound is tight: for every ","element":"span"},{"style":{"height":14.4},"width":86.84,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-17.png","element":"img","alt":" t1, t2","inline":true},{"style":{"fontStyle":"italic"},"text":", there are classes ","element":"span"},{"style":{"height":15.2},"width":129.03,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-18.png","element":"img","alt":" H1, H2","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"with Threshold dimension ","element":"span"},{"style":{"height":14.4},"width":86.84,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-19.png","element":"img","alt":" t1, t2","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"respectively such that ","element":"span"},{"style":{"height":17.6},"width":417.43,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-20.png","element":"img","alt":" T(H1 ∪ H2) = t1 + t2.","inline":true}],[{"style":{"fontStyle":"italic"},"text":"Proof. ","element":"span"},{"text":"For the upper bound, observe that if ","element":"span"},{"style":{"height":15.02},"width":374.94,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-21.png","element":"img","alt":" h1 . . . hm ∈ H1 ∪ H2","inline":true,"padRight":true},{"text":"threshold-shatters the sequence ","element":"span"},{"style":{"height":10.62},"width":164.26,"height":26.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-22.png","element":"img","alt":" x1 . . . xm","inline":true,"padRight":true},{"text":"then ","element":"span"},{"style":{"height":18.22},"width":265.23,"height":45.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-23.png","element":"img","alt":" {hi : hi ∈ Hj}","inline":true,"padRight":true},{"text":"threshold-shatters ","element":"span"},{"style":{"height":18.22},"width":513.2,"height":45.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-24.png","element":"img","alt":" {xi : hi ∈ Hj} for j ∈ {1, 2}","inline":true},{"text":". For the lower bound, set ","element":"span"},{"style":{"height":17.6},"width":237.21,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-25.png","element":"img","alt":" X = [t1+t2],","inline":true},{"id":"id-52","style":{"height":17.6},"width":1270.26,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-26.png","element":"img","alt":"H1 = {hi : i ≤ t1}, and H2 = {hi : t1 < i ≤ t1 + t2}, where hi(j) = 1","inline":true,"padRight":true},{"text":"if and only if ","element":"span"},{"style":{"height":16},"width":104.68,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/10-27.png","element":"img","alt":" i ≤ j.","inline":true}],[{"style":{"fontWeight":"bold"},"text":"Proposition 4.2 ","element":"span"},{"text":"(Littlestone Dimension Under Union)","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"height":19.53},"width":332.31,"height":48.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-0.png","element":"img","alt":" H1, H2 ⊆ {0, 1}X ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be hypothesis classes with ","element":"span"},{"style":{"height":17.6},"width":402.45,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-1.png","element":"img","alt":"Ldim(Hi) = di. Then,","inline":true}],[{"style":{"width":"30%"},"width":572,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-2.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Moreover, this bound is tight: for every ","element":"span"},{"style":{"height":15.6},"width":100.74,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-3.png","element":"img","alt":" d1, d2","inline":true},{"style":{"fontStyle":"italic"},"text":", there are classes ","element":"span"},{"style":{"height":15.2},"width":129.03,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-4.png","element":"img","alt":" H1, H2","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"with Littlestone dimension ","element":"span"},{"style":{"height":15.6},"width":100.75,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-5.png","element":"img","alt":" d1, d2","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"respectively such that ","element":"span"},{"style":{"height":17.6},"width":574.94,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-6.png","element":"img","alt":" Ldim(H1 ∪ H2) = d1 + d2 + 1.","inline":true}],[{"style":{"fontStyle":"italic"},"text":"Proof of Proposition ","element":"span"},{"href":"#id-52","style":{"fontStyle":"italic"},"text":"4.2. ","element":"a"},{"text":"There are several ways to prove this statement. One possibility is to use the realizable online mistake-bound setting ","element":"span"},{"href":"#id-0","referenceIndex":33,"text":"[Littlestone, ","element":"a"},{"href":"#id-0","referenceIndex":33,"text":"1987] ","element":"a"},{"text":"and argue that ","element":"span"},{"style":{"height":14.62},"width":163.99,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-7.png","element":"img","alt":" H1 ∪ H2","inline":true,"padRight":true},{"text":"can be learned with at most ","element":"span"},{"style":{"height":15.02},"width":207.92,"height":37.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-8.png","element":"img","alt":" d1 + d2 + 1","inline":true,"padRight":true},{"text":"mistakes in this setting. We present here an alternative inductive argument, which may be of independent interest. Towards this end, it is convenient to define the depth of the empty tree as ","element":"span"},{"style":{"height":15.2},"width":142.2,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-9.png","element":"img","alt":" −1, and","inline":true,"padRight":true},{"text":"that of a tree consisting of one vertex (leaf) as ","element":"span"},{"text":"0","element":"span"},{"text":".","element":"span"}],[{"text":"Consider a shattered tree ","element":"span"},{"style":{"height":17.6},"width":558.03,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-10.png","element":"img","alt":" T of depth d = Ldim(H1 ∪H2)","inline":true,"padRight":true},{"text":"with leaves labelled ","element":"span"},{"style":{"height":15.02},"width":191.68,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-11.png","element":"img","alt":" H1 and H2","inline":true,"padRight":true},{"text":"in the obvious way. Recall the notion of a subtree in Definition ","element":"span"},{"href":"#id-53","text":"3.1, ","element":"a"},{"text":"and let ","element":"span"},{"style":{"height":17.6},"width":281.3,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-12.png","element":"img","alt":" x ≤ Ldim(H1)","inline":true,"padRight":true},{"text":"be the maximum depth of a complete binary subtree all whose leaves are ","element":"span"},{"style":{"height":14.62},"width":53.85,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-13.png","element":"img","alt":" H1","inline":true,"padRight":true},{"text":"leaves, and ","element":"span"},{"style":{"height":17.6},"width":291.19,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-14.png","element":"img","alt":" y ≤ Ldim(H2)","inline":true,"padRight":true},{"text":"the maximum depth of a subtree all whose leaves are ","element":"span"},{"style":{"height":14.62},"width":53.85,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-15.png","element":"img","alt":" H2","inline":true},{"text":"-leaves. Similarly, let ","element":"span"},{"style":{"height":12},"width":113.76,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-16.png","element":"img","alt":" xL, yL","inline":true,"padRight":true},{"text":"denote the maximum depth of a ","element":"span"},{"style":{"height":15.02},"width":196.96,"height":37.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-17.png","element":"img","alt":" H1-subtree","inline":true,"padRight":true},{"text":"and a ","element":"span"},{"style":{"height":14.62},"width":53.85,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-18.png","element":"img","alt":" H2","inline":true},{"text":"-subtree in the tree rooted at the left child of the root of ","element":"span"},{"style":{"height":16.4},"width":303.5,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-19.png","element":"img","alt":" T, and let xR, yR","inline":true,"padRight":true},{"text":"be the same for the tree rooted at the right child.","element":"span"}],[{"text":"It suffices to show that ","element":"span"},{"style":{"height":17.6},"width":1394.82,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-20.png","element":"img","alt":" x + y ≥ d − 1: clearly x ≥ max(xL, xR) and also x ≥ min(xL, xR) + 1 thus","inline":true},{"style":{"height":17.6},"width":435.6,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-21.png","element":"img","alt":"x ≥ (xL + xR)/2 + 1/2","inline":true},{"text":". Similarly ","element":"span"},{"style":{"height":17.6},"width":550.56,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-22.png","element":"img","alt":" y ≥ (yL + yR)/2 + 1/2, hence","inline":true}],[{"style":{"width":"32%"},"width":608,"height":85,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-23.png","element":"img"}],[{"text":"and this gives by induction on ","element":"span"},{"style":{"fontStyle":"italic"},"text":"d ","element":"span"},{"text":"(starting with ","element":"span"},{"style":{"height":16.4},"width":541.96,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-24.png","element":"img","alt":" d = 0 or 1) that x + y ≥ d − 1","inline":true,"padRight":true},{"text":"as required. To see that this bound is tight, pick ","element":"span"},{"style":{"height":15.02},"width":430.32,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-25.png","element":"img","alt":" n ≥ d1 + d2 + 1 and set","inline":true}],[{"style":{"width":"87%"},"width":1631,"height":104,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-26.png","element":"img"}],[{"text":"One can verify that ","element":"span"},{"style":{"height":17.6},"width":1245.04,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-27.png","element":"img","alt":" Ldim(Hi) = di, for i = 1, 2 and that Ldim(H1 ∪ H2) = d1 + d2 + 1","inline":true},{"text":", as required (in fact, even the VC dimension of ","element":"span"},{"style":{"height":15.6},"width":448.26,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-28.png","element":"img","alt":" H1 ∪ H2 is d1 + d2 + 1).","inline":true}],[{"text":"Proposition ","element":"span"},{"href":"#id-52","text":"4.2 ","element":"a"},{"text":"implies that ","element":"span"},{"style":{"height":20.02},"width":481.69,"height":50.06,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-29.png","element":"img","alt":" Ldim(∪ki=1Hi) = O(k · d)","inline":true,"padRight":true},{"text":"provided that ","element":"span"},{"style":{"height":17.6},"width":568.66,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-30.png","element":"img","alt":" Ldim(Hi) ≤ d for al i, and that","inline":true,"padRight":true},{"text":"this inequality can be tight when ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"text":"= 2","element":"span"},{"text":". The following proposition shows that for a larger ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"text":"this bound can be significantly improved:","element":"span"}],[{"id":"id-54","style":{"fontWeight":"bold"},"text":"Proposition 4.3 ","element":"span"},{"text":"(Littlestone Dimension Under Multiple Unions)","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"height":19.53},"width":459.26,"height":48.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-31.png","element":"img","alt":" H1, . . . , Hk ⊆ {0, 1}X be","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"classes with ","element":"span"},{"style":{"height":17.6},"width":265.5,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-32.png","element":"img","alt":" Ldim(Hi) ≤ d","inline":true},{"style":{"fontStyle":"italic"},"text":". Then, for every ","element":"span"},{"style":{"height":17.6},"width":235.98,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-33.png","element":"img","alt":" 0 < ε < 1/2,","inline":true}],[{"style":{"width":"28%"},"width":527,"height":131,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-34.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Moreover, this bound is tight up to a constant factor: for every ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"style":{"fontStyle":"italic"},"text":", there are classes ","element":"span"},{"style":{"height":15.6},"width":303.24,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-35.png","element":"img","alt":" H1, . . . , Hk with","inline":true},{"style":{"height":17.6},"width":943.84,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-36.png","element":"img","alt":"Ldim(Hi) ≤ d such that Ldim(∪iHi) ≥ d + ⌊log k⌋.","inline":true}],[{"text":"Proposition ","element":"span"},{"href":"#id-54","text":"4.3 ","element":"a"},{"text":"demonstrates a difference with the threshold dimension. Indeed, while the bound above scales logarithmically with ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"text":", in the case of the threshold dimension a linear dependence in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"text":"is necessary: indeed, set ","element":"span"},{"style":{"height":18.22},"width":1189.9,"height":45.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-37.png","element":"img","alt":" X = [k · t], Hi = {hj : (i − 1) · t < ji ≤ i · t}, where hi(j) = 1","inline":true,"padRight":true},{"text":"if and only if ","element":"span"},{"style":{"height":16.4},"width":225.98,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-38.png","element":"img","alt":" i ≤ j. Thus,","inline":true},{"style":{"height":20.02},"width":1145.06,"height":50.06,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/11-39.png","element":"img","alt":"Ldim(Hi) = t for all i and Ldim(∪ki=1Hi) = k · t >> t + log k.","inline":true}],[{"id":"id-55","style":{"width":"48%"},"width":913,"height":534,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-0.png","element":"img"}],[{"text":"Figure 1: ","element":"figcaption","subtype":"caption"},{"text":"An illustration of the tree shattered by ","element":"figcaption","subtype":"caption"},{"style":{"height":12},"width":48.04,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-1.png","element":"img","alt":" H′ ","inline":true,"padRight":true},{"text":"in the construction in Proposition ","element":"figcaption","subtype":"caption"},{"href":"#id-54","text":"4.3. ","element":"a","subtype":"caption"},{"text":"In this illustration ","element":"figcaption","subtype":"caption"},{"style":{"height":16},"width":115.83,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-2.png","element":"img","alt":" ⌊log k⌋","inline":true,"padRight":true},{"text":"equals 3.","element":"figcaption","subtype":"caption"}],[{"style":{"fontStyle":"italic"},"text":"Proof of Proposition ","element":"span"},{"href":"#id-54","style":{"fontStyle":"italic"},"text":"4.3. ","element":"a"},{"text":"We begin with the lower bound: pick any class ","element":"span"},{"style":{"height":19.53},"width":247.41,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-3.png","element":"img","alt":" H ⊆ {0, 1}X ","inline":true,"padRight":true},{"text":"with Littlestone dimension ","element":"span"},{"style":{"fontStyle":"italic"},"text":"d","element":"span"},{"text":", and let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T ","element":"span"},{"text":"be a tree of depth ","element":"span"},{"style":{"fontStyle":"italic"},"text":"d ","element":"span"},{"text":"which is shattered by ","element":"span"},{"style":{"height":17.6},"width":273.86,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-4.png","element":"img","alt":" H. Pick ⌊log k⌋","inline":true,"padRight":true},{"text":"new points ","element":"span"},{"style":{"height":19.95},"width":297.59,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-5.png","element":"img","alt":" z1, . . . , z⌊log k⌋ /∈","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"X","element":"span"},{"text":", and extend the domain ","element":"span"},{"style":{"height":21.89},"width":1044.22,"height":54.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-6.png","element":"img","alt":" X to X′ = X ∪ {z1 . . . , z⌊log k⌋}. Define H′ ⊆ {0, 1}X′ ","inline":true,"padRight":true},{"text":"by extending each ","element":"span"},{"style":{"height":15.02},"width":289.77,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-7.png","element":"img","alt":"h ∈ H to the zi","inline":true},{"text":"’s in each of the ","element":"span"},{"style":{"height":16.33},"width":225.22,"height":40.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-8.png","element":"img","alt":" k′ = 2⌊log k⌋ ","inline":true,"padRight":true},{"text":"possible ways. (So, each ","element":"span"},{"style":{"height":16.4},"width":575.35,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-9.png","element":"img","alt":" h ∈ H has k′ copies in H′, one","inline":true,"padRight":true},{"text":"for each possible pattern on the ","element":"span"},{"style":{"height":15.6},"width":278.98,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-10.png","element":"img","alt":" zi’s.) Thus, H′ ","inline":true,"padRight":true},{"text":"is a union of ","element":"span"},{"style":{"height":16.4},"width":263.23,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-11.png","element":"img","alt":" k′ copies of H","inline":true},{"text":", one copy for each boolean pattern on the ","element":"span"},{"style":{"height":10.62},"width":32.29,"height":26.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-12.png","element":"img","alt":" zi","inline":true},{"text":"’s. In particular, ","element":"span"},{"style":{"height":13.2},"width":53.27,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-13.png","element":"img","alt":" H′ ","inline":true,"padRight":true},{"text":"is the union of ","element":"span"},{"style":{"height":12.8},"width":40.09,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-14.png","element":"img","alt":" k′ ","inline":true,"padRight":true},{"text":"classes with Littlestone dimension ","element":"span"},{"style":{"fontStyle":"italic"},"text":"d","element":"span"},{"text":". Also note that ","element":"span"},{"style":{"height":17.6},"width":443.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-15.png","element":"img","alt":"Ldim(H′) ≥ ⌊log k⌋ + d","inline":true},{"text":", as witnessed by the tree which is illustrated in Figure ","element":"span"},{"href":"#id-55","text":"1.","element":"a"}],[{"text":"The upper bound is based on a multiplicative-weights argument. Recall that the Littlestone dimension equals the optimal number of mistakes performed by a deterministic online learner in the mistake-bound model (i.e. online learning when the sequence of input examples is labelled by some ","element":"span"},{"style":{"height":13.6},"width":132.54,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-16.png","element":"img","alt":" h ∈ H","inline":true},{"text":"). Thus, it suffices to demonstrate an online learner for ","element":"span"},{"style":{"height":20.02},"width":134.75,"height":50.06,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-17.png","element":"img","alt":" ∪ki=1Hi","inline":true,"padRight":true},{"text":"which makes at most ","element":"span"},{"text":"3","element":"span"},{"style":{"fontStyle":"italic"},"text":"d ","element":"span"},{"text":"+ 3 log ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"text":"mistakes. Pick for ","element":"span"},{"text":"every ","element":"span"},{"style":{"height":14.62},"width":48.85,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-18.png","element":"img","alt":" Hi","inline":true,"padRight":true},{"text":"an online learner ","element":"span"},{"style":{"height":15.42},"width":44.73,"height":38.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-19.png","element":"img","alt":" Ai","inline":true,"padRight":true},{"text":"which makes at most ","element":"span"},{"style":{"fontStyle":"italic"},"text":"d ","element":"span"},{"text":"mistakes on input sequences consistent with ","element":"span"},{"style":{"height":14.62},"width":135.52,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-20.png","element":"img","alt":" Hi. We","inline":true,"padRight":true},{"text":"set the online learning algorithm ","element":"span"},{"style":{"height":20.02},"width":438.05,"height":50.06,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-21.png","element":"img","alt":" A for H = ∪ki=1Hi to be","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"The Weighted Majority Algorithm ","element":"span"},{"text":"by ","element":"span"},{"href":"#id-28","referenceIndex":32,"text":"Littlestone ","element":"a"},{"href":"#id-28","referenceIndex":32,"text":"and Warmuth ","element":"a"},{"href":"#id-28","referenceIndex":32,"text":"[1989] ","element":"a"},{"text":"with the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"text":"experts being the algorithms ","element":"span"},{"style":{"height":16},"width":199.35,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-22.png","element":"img","alt":" A1, . . . , Ak","inline":true},{"text":". Now, consider an input sequence ","element":"span"},{"style":{"height":17.6},"width":461.58,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-23.png","element":"img","alt":"S = (x1, y1), . . . (xT , yT )","inline":true,"padRight":true},{"text":"consistent with ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":". Thus, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"is consistent with ","element":"span"},{"style":{"height":15.02},"width":334.06,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-24.png","element":"img","alt":" Hi for some i ≤ k","inline":true,"padRight":true},{"text":"and therefore ","element":"span"},{"style":{"height":15.42},"width":44.73,"height":38.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-25.png","element":"img","alt":"Ai","inline":true,"padRight":true},{"text":"makes at most ","element":"span"},{"style":{"fontStyle":"italic"},"text":"d ","element":"span"},{"text":"mistakes on it. Thus, by the multiplicative weights analysis (see e.g. Corollary 2.1 in ","element":"span"},{"href":"#id-28","referenceIndex":32,"text":"Littlestone and Warmuth ","element":"a"},{"href":"#id-28","referenceIndex":32,"text":"[1989]","element":"a"},{"text":"), the number of mistakes ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"makes on ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"is at most","element":"span"}],[{"style":{"width":"14%"},"width":279,"height":132,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-26.png","element":"img"}],[{"text":"where ","element":"span"},{"style":{"height":16.4},"width":196.96,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-27.png","element":"img","alt":" 0 ≤ β < 1","inline":true,"padRight":true},{"text":"is multiplicative factor which discounts the weights of wrong experts. The upper bound follows by setting ","element":"span"},{"style":{"height":17.6},"width":161.62,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-28.png","element":"img","alt":" β = 1/2.","inline":true}],[{"style":{"width":"1%"},"width":30,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-29.png","element":"img"}],[{"id":"id-51","style":{"fontWeight":"bold"},"text":"4.2 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Closure Under Composition","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"4.2.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Threshold Dimension","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Proof of Theorem ","element":"span"},{"href":"#id-2","style":{"fontWeight":"bold"},"text":"2.1. ","element":"a"},{"text":"We begin with the upper bound. Let ","element":"span"},{"style":{"height":17.6},"width":429.98,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-30.png","element":"img","alt":" T(G(H1, . . . , Hk)) = n","inline":true},{"text":". It suffices to show that if ","element":"span"},{"style":{"height":20.04},"width":198.38,"height":50.09,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-31.png","element":"img","alt":" n ≥ 24k4k·t ","inline":true,"padRight":true},{"text":"then there is ","element":"span"},{"style":{"height":17.6},"width":465.88,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-32.png","element":"img","alt":" i ≤ k such that T(Hi) ≥ t","inline":true},{"text":". By assumption, there are ","element":"span"},{"style":{"height":15.6},"width":387.24,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/12-33.png","element":"img","alt":" x1, x2 . . . xn ∈ X and","inline":true}],[{"text":"functions ","element":"span"},{"style":{"height":17.42},"width":795.19,"height":43.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-0.png","element":"img","alt":" hij ∈ Hj, for 1 ≤ i ≤ n, 1 ≤ j ≤ k such that","inline":true}],[{"style":{"width":"51%"},"width":968,"height":46,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-1.png","element":"img"}],[{"text":"Construct a coloring of the edges of the complete graph on ","element":"span"},{"style":{"height":19.53},"width":161.03,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-2.png","element":"img","alt":" [n] by 4k ","inline":true,"padRight":true},{"text":"colors as follows: for each ","element":"span"},{"style":{"height":14},"width":72.87,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-3.png","element":"img","alt":" 1 ≤","inline":true},{"style":{"height":14.8},"width":185.36,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-4.png","element":"img","alt":"p < q ≤ n","inline":true},{"text":", the color of the edge ","element":"span"},{"style":{"fontStyle":"italic"},"text":"{","element":"span"},{"style":{"fontStyle":"italic"},"text":"p, q","element":"span"},{"style":{"fontStyle":"italic"},"text":"} ","element":"span"},{"text":"is given by the following ordered sequence of ","element":"span"},{"text":"2","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"text":"bits:","element":"span"}],[{"style":{"width":"32%"},"width":603,"height":119,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-5.png","element":"img"}],[{"text":"By Ramsey Theorem ","element":"span"},{"href":"#id-56","referenceIndex":36,"text":"[Ramsey, ","element":"a"},{"href":"#id-56","referenceIndex":36,"text":"1930]","element":"a"},{"text":", if ","element":"span"},{"style":{"height":22.04},"width":408.47,"height":55.09,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-6.png","element":"img","alt":" n ≥ (4k)2t·4k = 24k4k·t ","inline":true,"padRight":true},{"text":"then there is a monochromatic set ","element":"span"},{"style":{"height":17.6},"width":141.22,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-7.png","element":"img","alt":" A ⊆ [n]","inline":true,"padRight":true},{"text":"of size ","element":"span"},{"style":{"height":19.93},"width":179.64,"height":49.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-8.png","element":"img","alt":" |A| = 2t.6 ","inline":true,"padRight":true},{"text":"Denote the elements of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"by","element":"span"}],[{"style":{"width":"39%"},"width":746,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-9.png","element":"img"}],[{"text":"and let ","element":"span"},{"style":{"height":17.6},"width":555.78,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-10.png","element":"img","alt":" u = (u1 . . . uk), v = (v1 . . . vk)","inline":true,"padRight":true},{"text":"such that the color of every pair in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is","element":"span"}],[{"style":{"width":"15%"},"width":288,"height":111,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-11.png","element":"img"}],[{"text":"Thus, for every pair ","element":"span"},{"style":{"height":16.4},"width":449.37,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-12.png","element":"img","alt":" p, q ≤ d and every r ≤ k:","inline":true}],[{"style":{"width":"24%"},"width":461,"height":132,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-13.png","element":"img"}],[{"text":"We claim that ","element":"span"},{"style":{"height":17.82},"width":626.78,"height":44.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-14.png","element":"img","alt":" u ̸= v: indeed, xj1, xj2, xj3, . . . , xjt","inline":true,"padRight":true},{"text":"is threshold-shattered by the functions","element":"span"}],[{"style":{"width":"74%"},"width":1395,"height":47,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-15.png","element":"img"}],[{"text":"Thus,","element":"span"}],[{"style":{"width":"44%"},"width":839,"height":115,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-16.png","element":"img"}],[{"text":"Therefore, ","element":"span"},{"style":{"height":19.13},"width":506.4,"height":47.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-17.png","element":"img","alt":" v ∈ G−1(0) and u ∈ G−1(1)","inline":true,"padRight":true},{"text":"and in particular ","element":"span"},{"style":{"height":16.8},"width":104.16,"height":42,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-18.png","element":"img","alt":" u ̸= v","inline":true},{"text":". Pick an index ","element":"span"},{"style":{"height":16.8},"width":290.14,"height":42,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-19.png","element":"img","alt":" r so that ur ̸= vr","inline":true},{"text":". Therefore, for every ","element":"span"},{"style":{"height":14.8},"width":148.33,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-20.png","element":"img","alt":" p, q ≤ t:","inline":true}],[{"style":{"width":"38%"},"width":722,"height":113,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-21.png","element":"img"}],[{"text":"This shows that either ","element":"span"},{"style":{"height":10.62},"width":146.26,"height":26.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-22.png","element":"img","alt":" x1 . . . xt","inline":true,"padRight":true},{"text":"is threshold shattered by ","element":"span"},{"style":{"height":15.2},"width":630.69,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-23.png","element":"img","alt":" Hr (if vr = 1, ur = 0), or xt . . . x1","inline":true,"padRight":true},{"text":"is thresholds shattered by ","element":"span"},{"style":{"height":15.2},"width":432.35,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-24.png","element":"img","alt":" Hr (if vr = 0, ur = 1","inline":true},{"text":"); in either way, the threshold dimension of ","element":"span"},{"style":{"height":14.62},"width":51.85,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/13-25.png","element":"img","alt":" Hr","inline":true,"padRight":true},{"text":"is at least ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t","element":"span"},{"text":". This completes the proof of the upper bound.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Lower Bound. ","element":"span"},{"text":"We next prove the lower bound. Let ","element":"span"},{"style":{"height":16.33},"width":194.66,"height":40.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-0.png","element":"img","alt":" m = 2⌊t/5⌋","inline":true},{"text":", and construct ","element":"span"},{"style":{"height":17.6},"width":232.3,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-1.png","element":"img","alt":" H ⊆ {0, 1}m ","inline":true,"padRight":true},{"text":"randomly as follows: ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"consists of ","element":"span"},{"text":"2","element":"span"},{"style":{"fontStyle":"italic"},"text":"m ","element":"span"},{"text":"random functions","element":"span"}],[{"id":"id-57","style":{"width":"25%"},"width":485,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-2.png","element":"img"}],[{"text":"where for each ","element":"span"},{"style":{"height":18.22},"width":835.38,"height":45.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-3.png","element":"img","alt":" i set fi(j) = gj(j) = 0 for j > i, and for j ≤ i","inline":true},{"text":", pick uniformly at random one of ","element":"span"},{"style":{"height":16.4},"width":160.49,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-4.png","element":"img","alt":" fi, gi, set","inline":true,"padRight":true},{"text":"it to be ","element":"span"},{"text":"1 ","element":"span"},{"text":"in position ","element":"span"},{"style":{"fontStyle":"italic"},"text":"j ","element":"span"},{"text":"and set the other to be ","element":"span"},{"text":"0 ","element":"span"},{"text":"in position ","element":"span"},{"style":{"fontStyle":"italic"},"text":"j","element":"span"},{"text":". All of the above","element":"span"},{"style":{"height":22.16},"width":113.24,"height":55.41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-5.png","element":"img","alt":"�m−12 �","inline":true},{"text":"random choices are done independently. By construction, ","element":"span"},{"style":{"height":17.6},"width":433.54,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-6.png","element":"img","alt":" {h1 ∨ h2 : h1, h2 ∈ H}","inline":true,"padRight":true},{"text":"threshold-shatters the sequence ","element":"span"},{"text":"1","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"2 ","element":"span"},{"style":{"fontStyle":"italic"},"text":". . . , m ","element":"span"},{"text":"with probability ","element":"span"},{"text":"1 ","element":"span"},{"text":"and hence has threshold dimension at least ","element":"span"},{"style":{"fontStyle":"italic"},"text":"m","element":"span"},{"text":". It suffices to show that with a positive probability it holds that","element":"span"}],[{"style":{"width":"99%"},"width":1870,"height":135,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-7.png","element":"img"}],[{"text":"We set out to prove ","element":"span"},{"href":"#id-57","text":"(3)","element":"a"},{"text":". Consider the following event:","element":"span"}],[{"style":{"width":"84%"},"width":1583,"height":46,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-8.png","element":"img"}],[{"text":"Note that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"E ","element":"span"},{"text":"implies that ","element":"span"},{"style":{"height":17.6},"width":208.19,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-9.png","element":"img","alt":" T(H) ≤ 2k","inline":true,"padRight":true},{"text":"and therefore it suffices to show that ","element":"span"},{"text":"Pr[","element":"span"},{"style":{"fontStyle":"italic"},"text":"E","element":"span"},{"text":"] ","element":"span"},{"style":{"fontStyle":"italic"},"text":"> ","element":"span"},{"text":"0","element":"span"},{"text":". Towards this end we use a union bound: we define a family of “bad” events whose total sum of probabilities is less than one with the property that if none of the bad events occurs then ","element":"span"},{"style":{"fontStyle":"italic"},"text":"E ","element":"span"},{"text":"occurs. The bad events are defined as follows: for any pair of subsets ","element":"span"},{"style":{"height":18.3},"width":767.84,"height":45.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-10.png","element":"img","alt":" A, B ⊆ [m] of size |A| = |B| = k, let BA,B","inline":true,"padRight":true},{"text":"denote the event","element":"span"}],[{"style":{"width":"80%"},"width":1512,"height":46,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-11.png","element":"img"}],[{"text":"Note that indeed ","element":"span"},{"style":{"height":17.9},"width":556.31,"height":44.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-12.png","element":"img","alt":" ¬E implies BA,B for some A, B","inline":true,"padRight":true},{"text":"and thus it suffices to show that with a positive probability none of the ","element":"span"},{"style":{"height":15.1},"width":89.44,"height":37.75,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-13.png","element":"img","alt":" BA.B","inline":true,"padRight":true},{"text":"occurs. We claim that","element":"span"}],[{"style":{"width":"20%"},"width":386,"height":56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-14.png","element":"img"}],[{"text":"Indeed, for a fixed ","element":"span"},{"style":{"height":13.6},"width":105.48,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-15.png","element":"img","alt":" i ∈ A","inline":true},{"text":", the probability that one of ","element":"span"},{"style":{"height":16.4},"width":527.17,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-16.png","element":"img","alt":" fi, gi equals to 1 on all j ∈ B","inline":true,"padRight":true},{"text":"is at most ","element":"span"},{"style":{"height":19.94},"width":217.13,"height":49.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-17.png","element":"img","alt":" 2−(k−1). By","inline":true,"padRight":true},{"text":"independence, the probability that the latter simultaneously holds for every ","element":"span"},{"style":{"height":13.6},"width":101.37,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-18.png","element":"img","alt":" i ∈ A","inline":true,"padRight":true},{"text":"is at most ","element":"span"},{"style":{"height":18.74},"width":279.12,"height":46.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-19.png","element":"img","alt":" 2−k(k−1). Thus,","inline":true,"padRight":true},{"text":"the probability that ","element":"span"},{"style":{"height":17.5},"width":89.44,"height":43.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-20.png","element":"img","alt":" BA,B","inline":true,"padRight":true},{"text":"occurs for at least one pair ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A, B ","element":"span"},{"text":"is at most","element":"span"}],[{"style":{"width":"37%"},"width":694,"height":114,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-21.png","element":"img"}],[{"text":"where the last inequality holds because ","element":"span"},{"style":{"height":23.69},"width":400.51,"height":59.23,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-22.png","element":"img","alt":" k = (2 + 1log m) log m.","inline":true}],[{"style":{"width":"1%"},"width":30,"height":24,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-23.png","element":"img"}],[{"id":"id-18","style":{"fontWeight":"bold"},"text":"4.2.2 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Littlestone Dimension","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Proof of Theorem ","element":"span"},{"href":"#id-2","style":{"fontWeight":"bold"},"text":"2.1. ","element":"a"},{"text":"We will first show that for an odd ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"text":", the majority-vote ","element":"span"},{"style":{"height":15.24},"width":349.98,"height":38.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-24.png","element":"img","alt":" G = MAJk satisfies","inline":true}],[{"id":"id-58","style":{"width":"68%"},"width":1279,"height":52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-25.png","element":"img"}],[{"text":"(Recall that ","element":"span"},{"style":{"height":17.6},"width":367.25,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-26.png","element":"img","alt":" d = maxi Ldim(Hi)","inline":true},{"text":".) Then, we use this to argue that for any ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G","element":"span"},{"text":",","element":"span"}],[{"id":"id-61","style":{"width":"67%"},"width":1255,"height":52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/14-27.png","element":"img"}],[{"text":"We start with proving ","element":"span"},{"href":"#id-58","text":"(4)","element":"a"},{"text":". Let ","element":"span"},{"style":{"height":20.02},"width":1274.65,"height":50.05,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-0.png","element":"img","alt":" H = ∪ki=1Hi and Hk = MAJk(H, . . . , H). Since MAJk(H1, . . . , Hk) ⊆","inline":true},{"style":{"height":14.84},"width":54.85,"height":37.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-1.png","element":"img","alt":"Hk","inline":true},{"text":", it suffices to show that ","element":"span"},{"style":{"height":20.41},"width":383.65,"height":51.03,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-2.png","element":"img","alt":" Ldim(Hk) ≤ ˜O(k2d)","inline":true},{"text":". We use online boosting towards this end.","element":"span"}],[{"text":"Online boosting (in the realizable setting) is an algorithmic framework which allows to transform a weak online learner for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"with a non-trivial mistake-bound of ","element":"span"},{"style":{"height":17.6},"width":851.31,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-3.png","element":"img","alt":" (1/2 − γ)T + R(T), where R(T) = o(T) is a","inline":true,"padRight":true},{"text":"sublinear regret function, to a strong online learner with a vanishing mistake-bound of ","element":"span"},{"style":{"height":17.6},"width":358.43,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-4.png","element":"img","alt":" O(R(T)/γ). Online","inline":true,"padRight":true},{"text":"boosting has been studied by several works (e.g. ","element":"span"},{"href":"#id-29","referenceIndex":18,"text":"Chen et al. ","element":"a"},{"href":"#id-29","referenceIndex":18,"text":"[2012]","element":"a"},{"text":", ","element":"span"},{"href":"#id-30","referenceIndex":10,"text":"Beygelzimer et al. ","element":"a"},{"href":"#id-30","referenceIndex":10,"text":"[2015]","element":"a"},{"text":", ","element":"span"},{"href":"#id-31","referenceIndex":13,"text":"Brukhim ","element":"a"},{"href":"#id-31","referenceIndex":13,"text":"et al. ","element":"a"},{"href":"#id-31","referenceIndex":13,"text":"[2020]","element":"a"},{"text":"). We use here the variant given by ","element":"span"},{"href":"#id-31","referenceIndex":13,"text":"Brukhim et al. ","element":"a"},{"href":"#id-31","referenceIndex":13,"text":"[2020] ","element":"a"},{"text":"(see Theorem 2 there)","element":"span"},{"href":"#id-59","text":"7","element":"a"},{"text":".","element":"span"}],[{"text":"Which weak learner to use? Recall that by ","element":"span"},{"href":"#id-1","referenceIndex":9,"text":"Ben-David et al. ","element":"a"},{"href":"#id-1","referenceIndex":9,"text":"[2009] ","element":"a"},{"text":"(see Equation ","element":"span"},{"href":"#id-13","text":"(1)","element":"a"},{"text":") there exists an agnostic online learning algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"W ","element":"span"},{"text":"for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"whose (expected) regret bound is","element":"span"}],[{"style":{"width":"31%"},"width":582,"height":54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-5.png","element":"img"}],[{"text":"We claim that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"W ","element":"span"},{"text":"is a weak learner for ","element":"span"},{"style":{"height":14.84},"width":54.85,"height":37.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-6.png","element":"img","alt":" Hk","inline":true,"padRight":true},{"text":"with mistake-bound","element":"span"}],[{"id":"id-60","style":{"width":"61%"},"width":1154,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-7.png","element":"img"}],[{"text":"To prove this, it suffices to show that for every sequence of examples ","element":"span"},{"style":{"height":17.6},"width":355.51,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-8.png","element":"img","alt":" (x1, y1) . . . (xT , yT )","inline":true,"padRight":true},{"text":"which is consistent with ","element":"span"},{"style":{"height":14.84},"width":54.85,"height":37.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-9.png","element":"img","alt":" Hk","inline":true,"padRight":true},{"text":"there exists ","element":"span"},{"style":{"height":13.6},"width":115.66,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-10.png","element":"img","alt":" h ∈ H","inline":true,"padRight":true},{"text":"which makes at most ","element":"span"},{"style":{"height":17.6},"width":283.1,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-11.png","element":"img","alt":" (1/2 − 1/k) · T","inline":true,"padRight":true},{"text":"mistakes on it. Indeed, let ","element":"span"},{"style":{"height":15.24},"width":246.27,"height":38.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-12.png","element":"img","alt":" h1 . . . hk such","inline":true,"padRight":true},{"text":"that ","element":"span"},{"style":{"height":17.6},"width":722.16,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-13.png","element":"img","alt":" yt = MAJk(h1(xt) . . . hk(xt)) for t ≤ T","inline":true},{"text":". Thus, on every example ","element":"span"},{"style":{"height":17.6},"width":614.66,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-14.png","element":"img","alt":" (xt, yt) at most 1/2 − 1/k fraction","inline":true,"padRight":true},{"text":"of the ","element":"span"},{"style":{"height":15.02},"width":37.14,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-15.png","element":"img","alt":" hi","inline":true},{"text":"’s make a mistake on it. By averaging, this implies that one of the ","element":"span"},{"style":{"height":15.02},"width":37.14,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-16.png","element":"img","alt":" hi","inline":true,"padRight":true},{"text":"makes at most ","element":"span"},{"style":{"height":17.6},"width":251.83,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-17.png","element":"img","alt":" (1/2 − 1/k)T","inline":true,"padRight":true},{"text":"mistakes in total, and ","element":"span"},{"href":"#id-60","text":"(6) ","element":"a"},{"text":"follows.","element":"span"}],[{"text":"Now, by applying online boosting with ","element":"span"},{"style":{"fontStyle":"italic"},"text":"W ","element":"span"},{"text":"as a weak learner, we obtain an algorithm with a mistake-bound of at most","element":"span"}],[{"style":{"width":"38%"},"width":720,"height":90,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-18.png","element":"img"}],[{"text":"Thus, since the Littlestone dimension characterizes the optimal mistake-bound, letting ","element":"span"},{"style":{"height":17.6},"width":359.94,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-19.png","element":"img","alt":" D = Ldim(Hk), we","inline":true,"padRight":true},{"text":"get that","element":"span"}],[{"style":{"width":"41%"},"width":783,"height":74,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-20.png","element":"img"}],[{"text":"and in particular ","element":"span"},{"style":{"height":31.6},"width":566.6,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-21.png","element":"img","alt":" D ≤ O�k�Ldim(H)D log D�","inline":true},{"text":", which implies that","element":"span"}],[{"style":{"width":"60%"},"width":1135,"height":196,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-22.png","element":"img"}],[{"text":"and finishes the proof of ","element":"span"},{"href":"#id-58","text":"(4)","element":"a"},{"text":".","element":"span"}],[{"text":"We next set out to prove ","element":"span"},{"href":"#id-61","text":"(5)","element":"a"},{"text":". The idea is to represent an arbitrary ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G ","element":"span"},{"text":"using a formula which only uses majority-votes and negations. Let ","element":"span"},{"style":{"height":19.53},"width":382.06,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-23.png","element":"img","alt":" G : {0, 1}k → {0, 1}","inline":true,"padRight":true},{"text":"be an arbitrary boolean function. It is a basic fact that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"G ","element":"span"},{"text":"can be represented by a Disjunctive Normal Form (DNF) as follows:","element":"span"}],[{"id":"id-59","style":{"width":"58%"},"width":1101,"height":170,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/15-24.png","element":"img"}],[{"text":"where each ","element":"span"},{"style":{"height":20.15},"width":521.21,"height":50.38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-0.png","element":"img","alt":" zi,j ∈ {xj, ¬xj}, and m ≤ 2k","inline":true},{"text":". Now, note that","element":"span"}],[{"style":{"width":"78%"},"width":1467,"height":382,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-1.png","element":"img"}],[{"text":"Thus, ","element":"span"},{"style":{"height":17.6},"width":278.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-2.png","element":"img","alt":" G(H1, . . . , Hk)","inline":true,"padRight":true},{"text":"can be written as ","element":"span"},{"style":{"height":17.88},"width":476.4,"height":44.7,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-3.png","element":"img","alt":" MAJ2m−1(H′1, . . . H′2m−1)","inline":true},{"text":", where for ","element":"span"},{"style":{"height":18.09},"width":464.61,"height":45.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-4.png","element":"img","alt":" i > m, H′i = {h0} is the","inline":true,"padRight":true},{"text":"class which contains the all-zero function ","element":"span"},{"style":{"height":15.2},"width":324.12,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-5.png","element":"img","alt":" h0, and for i ≤ m,","inline":true}],[{"style":{"width":"33%"},"width":619,"height":54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-6.png","element":"img"}],[{"text":"such that each class ","element":"span"},{"style":{"height":19.69},"width":290.83,"height":49.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-7.png","element":"img","alt":" H′′i,j is either Ht","inline":true,"padRight":true},{"text":"or its negation ","element":"span"},{"style":{"height":19.69},"width":512.11,"height":49.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-8.png","element":"img","alt":" ¬Ht for some t ≤ k, or H′′i,j ","inline":true,"padRight":true},{"text":"is the class ","element":"span"},{"style":{"height":17.6},"width":207.27,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-9.png","element":"img","alt":" {h1} which","inline":true,"padRight":true},{"text":"contains the all-one function. We now apply ","element":"span"},{"href":"#id-58","text":"(4) ","element":"a"},{"text":"to conclude that ","element":"span"},{"style":{"height":20.9},"width":717.92,"height":52.25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-10.png","element":"img","alt":" Ldim(H′i) = ˜O(k2d) for all i ≤ m, and","inline":true,"padRight":true},{"text":"that","element":"span"}],[{"style":{"width":"48%"},"width":917,"height":53,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-11.png","element":"img"}],[{"text":"as required.","element":"span"}]]},{"heading":"5 Private Agnostic Learning and Closure of Private Learning","paragraphs":[[{"text":"In this section we describe our private learning algorithm. We start by discussing a relabeling procedure (discussed in ","element":"span"},{"text":"2)","element":"span"},{"text":", explaining the difficulties in designing the procedure and how we overcome them. We then provide a formal description of the relabeling procedure in ","element":"span"},{"style":{"height":15.64},"width":144.97,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-12.png","element":"img","alt":" ARelabel","inline":true,"padRight":true},{"text":"and prove that it can be used to construct a private algorithm that produces hypothesis that has good generalization properties; this is done by presenting an algorithm ","element":"span"},{"style":{"height":15.64},"width":306.55,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-13.png","element":"img","alt":" ARelabelAndLearn.","inline":true}],[{"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"be a hypothesis class, and suppose that we have a differentially private learning algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"for the realizable setting. That is, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is guaranteed to succeed in its learning task whenever it is given a labeled database that is consistent with some hypothesis in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":". Now suppose that we are given a database ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"sampled from some distribution ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P ","element":"span"},{"text":"on ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X ","element":"span"},{"text":"and labeled by some concept ","element":"span"},{"style":{"height":12.73},"width":35.88,"height":31.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/16-14.png","element":"img","alt":" c∗ ","inline":true,"padRight":true},{"text":"(not necessarily in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":"). So, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"might ","element":"span"},{"style":{"fontStyle":"italic"},"text":"not ","element":"span"},{"text":"be consistent with any hypothesis in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":", and we cannot directly apply ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"on ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S","element":"span"},{"text":". Heuristically, one might first ","element":"span"},{"style":{"fontStyle":"italic"},"text":"relabel ","element":"span"},{"text":"the database ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"using some function from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":", and then apply ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"on the relabeled database. Can we argue that such a paradigm would satisfy differential privacy, or is it the case that the relabeling process “vaporises” the privacy guarantees of algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A","element":"span"},{"text":"?","element":"span"}],[{"text":"Building on a result of ","element":"span"},{"href":"#id-24","referenceIndex":7,"text":"Beimel et al. ","element":"a"},{"href":"#id-24","referenceIndex":7,"text":"[2015]","element":"a"},{"text":", we show that it ","element":"span"},{"style":{"fontStyle":"italic"},"text":"is ","element":"span"},{"text":"possible to relabel the database before applying algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"while maintaining differential privacy. As we mentioned in the introduction, the relabeling procedure of ","element":"span"},{"href":"#id-24","referenceIndex":7,"text":"Beimel et al. ","element":"a"},{"href":"#id-24","referenceIndex":7,"text":"[2015] ","element":"a"},{"text":"instantiates the exponential mechanism in order to choose a hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"that is (almost) as close as possible to the original labels in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S","element":"span"},{"text":", uses this hypothesis to relabel the database, and applies the given differentially private algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"on the relabeled database to obtain an outcome ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f","element":"span"},{"text":".","element":"span"}],[{"text":"Now we want to argue that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"has low generalization error. We known (by the guarantees of the exponential mechanism) that the hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"with which we relabeled ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"has a relatively small empirical error on ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"(close to the lowest possible error). Via standard VC arguments, we also know that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"has a relatively small generalization error. Therefore, in order to show that the returned hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"has low generalization","element":"span"}],[{"text":"error, it suffices to show that ","element":"span"},{"style":{"height":17.6},"width":222.97,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-0.png","element":"img","alt":" errorP(f, h)","inline":true,"padRight":true},{"text":"is small. This might seem trivial at first sight: Since as ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is a PAC learner, and since it is applied on a database ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"labeled by the hypothesis ","element":"span"},{"style":{"height":13.6},"width":115.5,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-1.png","element":"img","alt":" h ∈ H","inline":true},{"text":", it must (w.h.p.) return a hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"with small error w.r.t. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h","element":"span"},{"text":". Is that really the case? The difficulty with formalizing this argument is that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is only guaranteed to succeed in identifying a good hypothesis when it is applied on an ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i.i.d. ","element":"span"},{"text":"sample from some underlying distribution. This is ","element":"span"},{"style":{"fontStyle":"italic"},"text":"not ","element":"span"},{"text":"true in our case. Specifically, we first sampled the database ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"from the underlying distribution, then ","element":"span"},{"style":{"fontStyle":"italic"},"text":"based on ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S","element":"span"},{"text":", we identified the hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"and relabeled ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"using ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h","element":"span"},{"text":". For all we know, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"might completely fail when executed on such a database (not sampled in an i.i.d. manner).","element":"span"},{"text":"8 ","element":"span"},{"text":"Therefore, before applying ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"on the relabeled database, we subsample i.i.d. elements from it, and apply ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"on this newly sampled database. Now we know that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is applied on an i.i.d. sampled database, and so, by the utility guarantees of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A","element":"span"},{"text":", the hypotheses ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"are close w.r.t. the underlying distribution. However, this subsampling step changes the distribution from which the inputs of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"are coming from. This distribution is no longer ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P ","element":"span"},{"text":"(the original distribution from which ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"was sampled), rather it is the uniform distribution on the empirical sample ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S","element":"span"},{"text":". This means that what we get from the utility guarantees of ","element":"span"},{"style":{"height":17.6},"width":380.34,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-2.png","element":"img","alt":" A is that errorS(f, h)","inline":true,"padRight":true},{"text":"is small. We need to show that ","element":"span"},{"style":{"height":17.6},"width":379.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-3.png","element":"img","alt":" errorP(f, h) is small.","inline":true,"padRight":true},{"text":"If ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is a ","element":"span"},{"style":{"fontStyle":"italic"},"text":"proper ","element":"span"},{"text":"learner, then ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"is itself in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":", and hence, using standard VC arguments, the fact that ","element":"span"},{"style":{"height":17.6},"width":219,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-4.png","element":"img","alt":"errorS(f, h)","inline":true,"padRight":true},{"text":"is small implies that ","element":"span"},{"style":{"height":17.6},"width":222.96,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-5.png","element":"img","alt":" errorP(f, h)","inline":true,"padRight":true},{"text":"is small. However, if ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is an improper learner, then this argument breaks because ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"might come from a different hypothesis class with a much larger VC dimension. To overcome this difficulty, we will instead relate ","element":"span"},{"style":{"height":17.6},"width":523.23,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-6.png","element":"img","alt":" errorS(f, h) and errorP(f, h)","inline":true,"padRight":true},{"text":"using the generalization properties of differential privacy. These generalization properties state that if a predicate ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t ","element":"span"},{"text":"was identified using a differentially private algorithm, then (w.h.p.) the empirical average of this predicate and its expectation over the underlying distribution are close. More formally, we would like to consider the predicate ","element":"span"},{"style":{"height":17.6},"width":463.9,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-7.png","element":"img","alt":"(h⊕f)(x) = h(x) ⊕ f(x)","inline":true},{"text":", which would complete our mission because the empirical average of that predicate on ","element":"span"},{"style":{"height":17.6},"width":309.84,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-8.png","element":"img","alt":" S is errorS(f, h)","inline":true},{"text":", and its expectation over ","element":"span"},{"style":{"height":17.6},"width":318.47,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-9.png","element":"img","alt":" P is errorP(f, h)","inline":true},{"text":". However, while ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"is indeed the outcome of a differentially private computation, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"style":{"fontStyle":"italic"},"text":"is not","element":"span"},{"text":", and we cannot directly apply the generalization properties of differential privacy to this predicate. Specifically, our relabeling procedure does not reveal the chosen hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h","element":"span"},{"text":". We overcome this issue by introducing the following conceptual modification to the relabeling procedure. Let us think about the input database ","element":"span"},{"style":{"height":12.8},"width":550.69,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-10.png","element":"img","alt":" S as two databases S = D◦T","inline":true},{"text":". In the relabeling procedure we still relabel all of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"using ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h","element":"span"},{"text":". We show that (a small variant of) this relabeling procedure still satisfies differential privacy w.r.t. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"D ","element":"span"},{"style":{"fontStyle":"italic"},"text":"even if the algorithm publicly releases the relabeled database ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T","element":"span"},{"text":". This works in our favour because given the relabeled database ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T ","element":"span"},{"text":"we can identify a hypothesis ","element":"span"},{"style":{"height":13.6},"width":130.25,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-11.png","element":"img","alt":" h′ ∈ H","inline":true,"padRight":true},{"text":"that agrees with it, and by standard VC arguments we know that ","element":"span"},{"style":{"height":17.6},"width":235.65,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-12.png","element":"img","alt":" errorP(h, h′)","inline":true,"padRight":true},{"text":"is small (since both ","element":"span"},{"style":{"height":15.6},"width":414.35,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-13.png","element":"img","alt":" h, h′ come from H). In","inline":true,"padRight":true},{"text":"addition, ","element":"span"},{"style":{"height":12.8},"width":41.14,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-14.png","element":"img","alt":" h′ ","inline":true,"padRight":true},{"text":"is computed by post-processing the relabeled database ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T ","element":"span"},{"text":"which we can view as the result of a private computation w.r.t. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"D","element":"span"},{"text":". Therefore, we can now use the generalization properties of differential privacy to argue that ","element":"span"},{"style":{"height":17.6},"width":516.68,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-15.png","element":"img","alt":" errorD(f, h) ≈ errorP(f, h)","inline":true},{"text":", which would allow us to complete the proof. We remark that the conceptual modification of treating ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"as two databases ","element":"span"},{"style":{"height":12.8},"width":190.77,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-16.png","element":"img","alt":" S = D◦T","inline":true,"padRight":true},{"text":"is crucial for our analysis. We do not know if the original relabeling procedure of ","element":"span"},{"href":"#id-24","referenceIndex":7,"text":"Beimel et al. ","element":"a"},{"href":"#id-24","referenceIndex":7,"text":"[2015] ","element":"a"},{"text":"can be applied when ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is an improper learner. In Algorithm ","element":"span"},{"href":"#id-62","text":"2 ","element":"a"},{"text":"we formally describe ","element":"span"},{"style":{"height":15.64},"width":144.97,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/17-17.png","element":"img","alt":" ARelabel","inline":true},{"text":". We next provide an informal description of the algorithm.","element":"span"}],[{"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"be a hypothesis class, and let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"q ","element":"span"},{"text":"be a score function. Algorithm ","element":"span"},{"style":{"height":15.64},"width":144.97,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-0.png","element":"img","alt":" ARelabel","inline":true,"padRight":true},{"text":"takes two input databases ","element":"span"},{"style":{"height":17.6},"width":402.69,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-1.png","element":"img","alt":"D, T ∈ (X × {0, 1})∗","inline":true},{"text":", where the labels in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"D ","element":"span"},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T ","element":"span"},{"text":"are arbitrary. The algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"relabels ","element":"span"},{"style":{"fontStyle":"italic"},"text":"D ","element":"span"},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"T ","element":"span"},{"text":"using a hypothesis ","element":"span"},{"style":{"height":13.6},"width":131.12,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-2.png","element":"img","alt":" h ∈ H","inline":true,"padRight":true},{"text":"with near optimal score ","element":"span"},{"style":{"fontStyle":"italic"},"text":"q","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"D, h","element":"span"},{"text":")","element":"span"},{"text":". The output of this algorithm is the two relabeled databases ","element":"span"},{"style":{"height":16.41},"width":160.8,"height":41.03,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-3.png","element":"img","alt":"˜D and ˜T","inline":true},{"text":". Observe that algorithm ","element":"span"},{"style":{"height":15.64},"width":144.96,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-4.png","element":"img","alt":" ARelabel","inline":true,"padRight":true},{"text":"is clearly ","element":"span"},{"style":{"fontStyle":"italic"},"text":"not ","element":"span"},{"text":"differentially private, since it outputs its input database (with different labels). Before formally presenting algorithm ","element":"span"},{"style":{"height":15.64},"width":144.96,"height":39.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-5.png","element":"img","alt":" ARelabel","inline":true},{"text":", we introduce the following definition.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Definition 5.1. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X ","element":"span"},{"style":{"fontStyle":"italic"},"text":"be a domain and let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"be a class of functions over ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X","element":"span"},{"style":{"fontStyle":"italic"},"text":". A function ","element":"span"},{"style":{"height":17.6},"width":343.43,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-6.png","element":"img","alt":" q : (X ×{0, 1})∗ ×","inline":true},{"style":{"height":13.6},"width":215.48,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-7.png","element":"img","alt":"H → R has","inline":true,"padRight":true},{"text":"matched-sensitivity ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"style":{"fontStyle":"italic"},"text":"if for every ","element":"span"},{"style":{"height":17.6},"width":1050.2,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-8.png","element":"img","alt":" S ∈ (X × {0, 1})∗, every (x, y), (x′y′) ∈ X × {0, 1}, and","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"every ","element":"span"},{"style":{"height":15.6},"width":171.22,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-9.png","element":"img","alt":" h, h′ ∈ H","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"that agree on every element of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"style":{"fontStyle":"italic"},"text":"we have that","element":"span"}],[{"style":{"width":"46%"},"width":866,"height":56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-10.png","element":"img"}],[{"text":"In words, a score function ","element":"span"},{"style":{"fontStyle":"italic"},"text":"q ","element":"span"},{"text":"has low ","element":"span"},{"style":{"fontStyle":"italic"},"text":"matched-sensitivity ","element":"span"},{"text":"if given “similar” databases it assigns “similar” scores to “similar” solutions. Note that if a function ","element":"span"},{"style":{"fontStyle":"italic"},"text":"q ","element":"span"},{"text":"has matched-sensitivity 1, then in particular, it has (standard) sensitivity (at most) 1.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Example 5.2. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"be a concept class over ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X","element":"span"},{"style":{"fontStyle":"italic"},"text":". Then, the score function ","element":"span"},{"style":{"fontStyle":"italic"},"text":"q","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"S, h","element":"span"},{"text":") ","element":"span"},{"style":{"fontStyle":"italic"},"text":"that takes a labeled database ","element":"span"},{"style":{"height":17.6},"width":349.6,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-11.png","element":"img","alt":" S ∈ (X × {0, 1})∗ ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and a concept ","element":"span"},{"style":{"height":13.2},"width":129.25,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-12.png","element":"img","alt":" h ∈ H","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and returns the number of errors ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"style":{"fontStyle":"italic"},"text":"makes on ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"style":{"fontStyle":"italic"},"text":"has matched-sensitivity at most 1.","element":"span"}],[{"id":"id-62","style":{"width":"100%"},"width":1872,"height":995,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-13.png","element":"img"}],[{"text":"We next present an algorithm ","element":"span"},{"style":{"height":15.64},"width":293.71,"height":39.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-14.png","element":"img","alt":" ARelabelAndLearn","inline":true,"padRight":true},{"text":"and analyze its properties. This algorithm is an abstraction of parts of ","element":"span"},{"style":{"height":17.9},"width":596.12,"height":44.75,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-15.png","element":"img","alt":" APrivateAgnostic and AClosureLearn","inline":true,"padRight":true},{"text":"and is used for unifying the proofs of privacy and correctness of these algorithms. We start with an informal description of algorithm ","element":"span"},{"style":{"height":15.64},"width":393.66,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-16.png","element":"img","alt":" ARelabelAndLearn. The","inline":true,"padRight":true},{"text":"algorithm first applies the relabeling algorithm ","element":"span"},{"style":{"height":15.64},"width":144.97,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-17.png","element":"img","alt":" ARelabel","inline":true,"padRight":true},{"text":"and then applies a private algorithm to the relabeled database. For the analysis of our algorithms in the sequence, ","element":"span"},{"style":{"height":15.64},"width":293.71,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-18.png","element":"img","alt":" ARelabelAndLearn","inline":true,"padRight":true},{"text":"also publishes part of the relabeled database. We prove that ","element":"span"},{"style":{"height":15.64},"width":293.71,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-19.png","element":"img","alt":" ARelabelAndLearn","inline":true,"padRight":true},{"text":"guarantees differential privacy w.r.t. to the part of the database that it did not publish.","element":"span"}],[{"id":"id-63","text":"In Lemma ","element":"span"},{"href":"#id-63","text":"5.3, ","element":"a"},{"text":"we analyze the privacy properties of algorithm ","element":"span"},{"style":{"height":15.64},"width":306.55,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/18-20.png","element":"img","alt":" ARelabelAndLearn.","inline":true}],[{"style":{"width":"100%"},"width":1872,"height":826,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/19-0.png","element":"img"}],[{"style":{"fontWeight":"bold"},"text":"Lemma 5.3. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"height":17.6},"width":257.56,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/19-1.png","element":"img","alt":" A be an (ε, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":"-differentially private algorithm and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"q ","element":"span"},{"style":{"fontStyle":"italic"},"text":"be a score function with matched-sensitivity 1. Then, for every ","element":"span"},{"style":{"fontStyle":"italic"},"text":"V ","element":"span"},{"style":{"fontStyle":"italic"},"text":", algorithm","element":"span"}],[{"style":{"width":"100%"},"width":1884,"height":717,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/19-2.png","element":"img"}],[{"text":"Thus, ","element":"span"},{"style":{"height":17.6},"width":238.76,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/19-3.png","element":"img","alt":" |H1| ≤ 2|H2|","inline":true,"padRight":true},{"text":"and similarly ","element":"span"},{"style":{"height":17.6},"width":249.89,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/19-4.png","element":"img","alt":" |H2| ≤ 2|H1|.","inline":true}],[{"text":"More specifically, for every ","element":"span"},{"style":{"height":17.6},"width":175.94,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/19-5.png","element":"img","alt":" t ∈ {1, 2}","inline":true,"padRight":true},{"text":"and every pattern ","element":"span"},{"style":{"height":17.6},"width":213.74,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/19-6.png","element":"img","alt":" h ∈ ΠC(K)","inline":true,"padRight":true},{"text":"there are either one or two (but not more) patterns in ","element":"span"},{"style":{"height":14.62},"width":48.27,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/19-7.png","element":"img","alt":" Ht","inline":true,"padRight":true},{"text":"that agree with ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"on ","element":"span"},{"style":{"fontStyle":"italic"},"text":"K","element":"span"},{"text":". We denote these one or two patterns by ","element":"span"},{"style":{"height":23.43},"width":357.85,"height":58.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/19-8.png","element":"img","alt":" h(0)t and h(1)t , which","inline":true,"padRight":true},{"text":"may be identical if only one unique pattern exists. By the fact that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"q ","element":"span"},{"text":"has matched-sensitivity at most 1, for every ","element":"span"},{"style":{"height":17.6},"width":694.56,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/19-9.png","element":"img","alt":" t1, t2 ∈ {1, 2} and every b1, b2 ∈ {0, 1}","inline":true,"padRight":true},{"text":"we have that","element":"span"}],[{"style":{"width":"79%"},"width":1492,"height":64,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/19-10.png","element":"img"}],[{"text":"where the last inequality is because ","element":"span"},{"style":{"height":25.2},"width":255.31,"height":62.99,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/19-11.png","element":"img","alt":" h(b1)t1 and h(b2)t2","inline":true,"padRight":true},{"text":"agree on every point in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"D ","element":"span"},{"text":"and because ","element":"span"},{"style":{"fontStyle":"italic"},"text":"q ","element":"span"},{"text":"has matched-sensitivity at most 1.","element":"span"}],[{"text":"For every ","element":"span"},{"style":{"height":18.44},"width":691.99,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-0.png","element":"img","alt":" h ∈ ΠH(K) and t ∈ {1, 2}, let wt,h","inline":true,"padRight":true},{"text":"be the probability that the exponential mechanism chooses either ","element":"span"},{"style":{"height":23.42},"width":195.89,"height":58.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-1.png","element":"img","alt":" h(0)t or h(1)t","inline":true,"padRight":true},{"text":"in Step ","element":"span"},{"href":"#id-62","text":"(3) ","element":"a"},{"text":"of the execution of ","element":"span"},{"style":{"height":15.64},"width":250.66,"height":39.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-2.png","element":"img","alt":" ARelabel on Si","inline":true},{"text":". We get that for every ","element":"span"},{"style":{"height":17.6},"width":224.71,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-3.png","element":"img","alt":" h ∈ ΠC(K),","inline":true}],[{"style":{"width":"72%"},"width":1349,"height":692,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-4.png","element":"img"}],[{"text":"We are now ready to conclude the proof. For every ","element":"span"},{"style":{"height":17.6},"width":325.08,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-5.png","element":"img","alt":" h ∈ ΠH(K), let It","inline":true,"padRight":true},{"text":"be the event that the exponential mechanism chooses in Step ","element":"span"},{"href":"#id-62","text":"(3) ","element":"a"},{"text":"of the execution on ","element":"span"},{"style":{"height":23.42},"width":507.3,"height":58.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-6.png","element":"img","alt":" St either h(0)t or h(1)t and ht","inline":true,"padRight":true},{"text":"be the random variable denoting the pattern that the exponential mechanism chooses in Step ","element":"span"},{"href":"#id-62","text":"(3) ","element":"a"},{"text":"of the execution on ","element":"span"},{"style":{"height":15.02},"width":38.76,"height":37.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-7.png","element":"img","alt":" St","inline":true,"padRight":true},{"text":"conditioned on the event ","element":"span"},{"style":{"height":14.62},"width":31.18,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-8.png","element":"img","alt":" It","inline":true},{"text":". Observe that ","element":"span"},{"style":{"height":15.53},"width":222.26,"height":38.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-9.png","element":"img","alt":" Sh0 and Sh1 ","inline":true,"padRight":true},{"text":"are distributions on neighboring databases; thus, applying the differentially private ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"on them satisfies differential privacy, i.e., for every possible sets of outputs ","element":"span"},{"style":{"fontStyle":"italic"},"text":"F ","element":"span"},{"text":"of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A","element":"span"},{"text":":","element":"span"}],[{"style":{"width":"46%"},"width":878,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-10.png","element":"img"}],[{"text":"Recall that algorithm ","element":"span"},{"style":{"height":15.64},"width":532.67,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-11.png","element":"img","alt":" ARelabelAndLearn returns three","inline":true,"padRight":true},{"text":"outcomes: the relabeled database ","element":"span"},{"style":{"height":15.53},"width":55.15,"height":38.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-12.png","element":"img","alt":" V h","inline":true},{"text":", hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"that is consistent with ","element":"span"},{"style":{"height":15.54},"width":55.15,"height":38.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-13.png","element":"img","alt":" V h","inline":true},{"text":", and the output of algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A","element":"span"},{"text":". As ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"is computed from ","element":"span"},{"style":{"height":15.54},"width":55.15,"height":38.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-14.png","element":"img","alt":" V h","inline":true},{"text":", we can consider it as post-processing and ignore it, and assume for the the privacy analysis that ","element":"span"},{"style":{"height":16.8},"width":457.94,"height":42,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-15.png","element":"img","alt":" ARelabelAndLearn only has","inline":true,"padRight":true},{"text":"two outputs: ","element":"span"},{"style":{"height":15.53},"width":55.15,"height":38.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-16.png","element":"img","alt":" V h ","inline":true,"padRight":true},{"text":"and the output of algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A","element":"span"},{"text":". Also recall that the database ","element":"span"},{"style":{"fontStyle":"italic"},"text":"V ","element":"span"},{"text":"is fixed, and observe that once the hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"is fixed (in Step ","element":"span"},{"href":"#id-62","text":"(3) ","element":"a"},{"text":"of algorithm ","element":"span"},{"style":{"height":15.64},"width":144.97,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-17.png","element":"img","alt":" ARelabel","inline":true},{"text":"), the relabeled database ","element":"span"},{"style":{"height":15.53},"width":55.15,"height":38.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-18.png","element":"img","alt":" V h ","inline":true,"padRight":true},{"text":"is also fixed. Furthermore, for every ","element":"span"},{"style":{"height":17.6},"width":216.21,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-19.png","element":"img","alt":" h ∈ ΠH(K)","inline":true,"padRight":true},{"text":"we have that ","element":"span"},{"style":{"height":24.94},"width":596.65,"height":62.35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-20.png","element":"img","alt":" V h(0)t = V h(1)t , since h(0)t and h(1)t","inline":true,"padRight":true},{"text":"agree on all of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"V ","element":"span"},{"text":".","element":"span"}],[{"text":"Let ","element":"span"},{"style":{"height":17.6},"width":439.04,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-21.png","element":"img","alt":" F ⊆ (X × {0, 1})∗ × R","inline":true,"padRight":true},{"text":"be a set of possible outcomes for algorithm ","element":"span"},{"style":{"height":15.64},"width":511.78,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-22.png","element":"img","alt":" ARelabelAndLearn, where R is","inline":true,"padRight":true},{"text":"the range of algorithm ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A","element":"span"},{"text":". For every ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"we denote","element":"span"}],[{"style":{"width":"29%"},"width":544,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-23.png","element":"img"}],[{"text":"Observe that for every ","element":"span"},{"style":{"height":17.6},"width":213.74,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-24.png","element":"img","alt":" h ∈ ΠC(K)","inline":true,"padRight":true},{"text":"we have that","element":"span"}],[{"style":{"width":"34%"},"width":647,"height":61,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/20-25.png","element":"img"}],[{"text":"because ","element":"span"},{"style":{"height":23.8},"width":337.67,"height":59.5,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-0.png","element":"img","alt":" h(0)1 , h(1)1 , h(0)2 , h(1)2","inline":true,"padRight":true},{"text":"agree on all points in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"V ","element":"span"},{"text":". We calculate,","element":"span"}],[{"style":{"width":"89%"},"width":1674,"height":527,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-1.png","element":"img"}],[{"text":"The next claim proves that ","element":"span"},{"style":{"height":15.64},"width":144.97,"height":39.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-2.png","element":"img","alt":" ARelabel","inline":true,"padRight":true},{"text":"returns a hypothesis whose score is close to the hypothesis with smallest score in the class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":".","element":"span"}],[{"id":"id-71","style":{"fontWeight":"bold"},"text":"Claim 5.4. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Fix ","element":"span"},{"style":{"height":17.6},"width":777.08,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-3.png","element":"img","alt":" α and β, and let S = D◦T ∈ (X × {0, 1})∗ ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be a labeled database such that","element":"span"}],[{"style":{"width":"43%"},"width":805,"height":106,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-4.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Consider the execution of ","element":"span"},{"style":{"height":15.64},"width":410.58,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-5.png","element":"img","alt":" ARelabel on S, and let h","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"denote the hypothesis chosen on Step ","element":"span"},{"href":"#id-62","style":{"fontStyle":"italic"},"text":"(3)","element":"a"},{"style":{"fontStyle":"italic"},"text":". With probability at least ","element":"span"},{"style":{"height":17.6},"width":122.5,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-6.png","element":"img","alt":" (1−β)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"we have that ","element":"span"},{"style":{"height":17.6},"width":639.38,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-7.png","element":"img","alt":" q(D, h) ≤ minc∈H{q(D, c)}+α|D|","inline":true},{"style":{"fontStyle":"italic"},"text":". In particular, assuming that ","element":"span"},{"style":{"height":17.6},"width":228.92,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-8.png","element":"img","alt":" |D| ≥ |S|/2,","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"it suffices that","element":"span"}],[{"style":{"width":"40%"},"width":757,"height":101,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-9.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Proof. ","element":"span"},{"text":"Note that by Sauer-Shelah-Perles lemma,","element":"span"}],[{"style":{"width":"58%"},"width":1089,"height":117,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-10.png","element":"img"}],[{"text":"As ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"contains all patterns of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"restricted to ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S","element":"span"},{"text":", the set ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"contains a pattern ","element":"span"},{"style":{"height":17.6},"width":350.45,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-11.png","element":"img","alt":" f∗ s.t. q(D, f∗) =","inline":true},{"style":{"height":17.6},"width":315.26,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-12.png","element":"img","alt":"minc∈H{q(D, c)}","inline":true},{"text":". Hence, Proposition ","element":"span"},{"href":"#id-64","text":"3.13 ","element":"a"},{"text":"(properties of the exponential mechanism) ensures that the probability of the exponential mechanism choosing an ","element":"span"},{"style":{"height":17.6},"width":689.33,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-13.png","element":"img","alt":" h s.t. q(D, h) > minc∈H{q(D, c)} + α","inline":true,"padRight":true},{"text":"is at most","element":"span"}],[{"style":{"width":"51%"},"width":972,"height":117,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-14.png","element":"img"}],[{"text":"which is at most ","element":"span"},{"style":{"height":31.6},"width":892.82,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-15.png","element":"img","alt":" β whenever |D| ≥ 2α ln( 1β) + 2 VC(H)α ln� e|S|VC(H)�.","inline":true}],[{"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"denote the hypothesis returned by ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"and let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"be a hypothesis consistent with the pattern chosen on Step ","element":"span"},{"href":"#id-62","text":"(3) ","element":"a"},{"text":"of ","element":"span"},{"style":{"height":15.64},"width":144.97,"height":39.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-16.png","element":"img","alt":" ARelabel","inline":true},{"text":". The next lemma relates the generalization error ","element":"span"},{"style":{"height":17.6},"width":222.97,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-17.png","element":"img","alt":" errorP(f, h)","inline":true,"padRight":true},{"text":"to the empirical error ","element":"span"},{"style":{"height":17.6},"width":236.32,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-18.png","element":"img","alt":"errorD(f, h).","inline":true}],[{"id":"id-72","style":{"fontWeight":"bold"},"text":"Lemma 5.5. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Fix ","element":"span"},{"style":{"height":16.8},"width":322.42,"height":42,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-19.png","element":"img","alt":" α and β, and let µ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be a distribution on ","element":"span"},{"style":{"height":17.6},"width":320.75,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-20.png","element":"img","alt":" X × {0, 1} and P","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be the marginal distribution on unlabeled examples from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X","element":"span"},{"style":{"fontStyle":"italic"},"text":". Furthermore, let ","element":"span"},{"style":{"height":17.6},"width":570.76,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-21.png","element":"img","alt":" S = D◦V ◦W ∈ (X × {0, 1})∗ ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be database sampled i.i.d. from ","element":"span"},{"style":{"height":16.4},"width":197.21,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-22.png","element":"img","alt":" µ such that","inline":true}],[{"style":{"width":"36%"},"width":692,"height":158,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/21-23.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"and","element":"span"}],[{"style":{"width":"30%"},"width":572,"height":151,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-0.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Consider the execution of ","element":"span"},{"style":{"height":15.64},"width":615.54,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-1.png","element":"img","alt":" ARelabelAndLearn on S, let h ∈ H","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be a hypothesis consistent with the pattern chosen on Step ","element":"span"},{"href":"#id-62","style":{"fontStyle":"italic"},"text":"(3) ","element":"a"},{"style":{"fontStyle":"italic"},"text":"of ","element":"span"},{"style":{"height":15.64},"width":144.96,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-2.png","element":"img","alt":" ARelabel","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and assume that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"style":{"fontStyle":"italic"},"text":"outputs some hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f","element":"span"},{"style":{"fontStyle":"italic"},"text":". With probability at least ","element":"span"},{"style":{"height":17.6},"width":306.56,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-3.png","element":"img","alt":"1 − O(β + δ|D|)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"we have that","element":"span"}],[{"style":{"width":"35%"},"width":664,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-4.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Proof. ","element":"span"},{"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"be the third output of ","element":"span"},{"style":{"height":15.64},"width":293.72,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-5.png","element":"img","alt":" ARelabelAndLearn","inline":true},{"text":", i.e., a hypothesis from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"that is consistent with ","element":"span"},{"style":{"height":15.53},"width":67.67,"height":38.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-6.png","element":"img","alt":" V h.","inline":true,"padRight":true},{"text":"Since ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"agree on ","element":"span"},{"style":{"fontStyle":"italic"},"text":"V ","element":"span"},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"|","element":"span"},{"style":{"fontStyle":"italic"},"text":"V ","element":"span"},{"style":{"fontStyle":"italic"},"text":"| ","element":"span"},{"text":"is big enough, by Theorem ","element":"span"},{"href":"#id-48","text":"3.8, ","element":"a"},{"text":"with probability at least ","element":"span"},{"style":{"height":16.4},"width":213.71,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-7.png","element":"img","alt":" 1 − β (over","inline":true,"padRight":true},{"text":"sampling ","element":"span"},{"style":{"fontStyle":"italic"},"text":"V ","element":"span"},{"text":"),","element":"span"}],[{"id":"id-66","style":{"width":"58%"},"width":1095,"height":49,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-8.png","element":"img"}],[{"text":"Since ","element":"span"},{"style":{"fontStyle":"italic"},"text":"|","element":"span"},{"style":{"fontStyle":"italic"},"text":"D","element":"span"},{"style":{"fontStyle":"italic"},"text":"| ","element":"span"},{"text":"is big enough, by Theorem ","element":"span"},{"href":"#id-47","text":"3.9 ","element":"a"},{"text":"(applied to ","element":"span"},{"style":{"height":13.6},"width":123.62,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-9.png","element":"img","alt":" H ⊕ H","inline":true,"padRight":true},{"text":"and the distribution ","element":"span"},{"style":{"height":12},"width":26,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-10.png","element":"img","alt":" µ","inline":true,"padRight":true},{"text":"that samples ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x ","element":"span"},{"text":"according to ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P ","element":"span"},{"text":"and labels it with ","element":"span"},{"text":"0","element":"span"},{"text":"), with probability at least ","element":"span"},{"style":{"height":16.4},"width":113.14,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-11.png","element":"img","alt":" 1 − β,","inline":true}],[{"style":{"width":"68%"},"width":1289,"height":49,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-12.png","element":"img"}],[{"text":"We will now use the generalization properties of differential privacy to argue that ","element":"span"},{"style":{"height":17.6},"width":379.17,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-13.png","element":"img","alt":" errorP(f, h) is small.","inline":true,"padRight":true},{"text":"By Lemma ","element":"span"},{"href":"#id-63","text":"5.3, ","element":"a"},{"text":"algorithm ","element":"span"},{"style":{"height":17.6},"width":583.41,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-14.png","element":"img","alt":" ARelabelAndLearn is (O(1), O(δ))","inline":true},{"text":"-differentially private w.r.t. the database ","element":"span"},{"style":{"fontStyle":"italic"},"text":"D","element":"span"},{"text":". In addition, by post-processing the outcomes of ","element":"span"},{"style":{"height":15.64},"width":293.71,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-15.png","element":"img","alt":" ARelabelAndLearn","inline":true,"padRight":true},{"text":"(the hypotheses ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h","element":"span"},{"text":") we can define the following predicate ","element":"span"},{"style":{"height":17.6},"width":1519.18,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-16.png","element":"img","alt":" test : X × {0, 1} → {0, 1} where test(x, y) = 1 if h(x) ̸= f(x), and test(x, y) = 0","inline":true,"padRight":true},{"text":"otherwise. Now observe that","element":"span"}],[{"style":{"width":"88%"},"width":1654,"height":77,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-17.png","element":"img"}],[{"text":"Similarly,","element":"span"}],[{"style":{"width":"83%"},"width":1568,"height":123,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-18.png","element":"img"}],[{"text":"Recall that ","element":"span"},{"text":"test ","element":"span"},{"text":"is the result of a private computation on the database ","element":"span"},{"style":{"fontStyle":"italic"},"text":"D ","element":"span"},{"text":"(obtained as a post-processing of the outcomes of ","element":"span"},{"style":{"height":15.64},"width":293.71,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-19.png","element":"img","alt":" ARelabelAndLearn","inline":true},{"text":"). Also observe that since ","element":"span"},{"style":{"height":17.6},"width":581.37,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-20.png","element":"img","alt":" ARelabelAndLearn is (O(1), O(δ))","inline":true},{"text":"-differentially private, it is in particular,","element":"span"},{"style":{"height":31.6},"width":387.08,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-21.png","element":"img","alt":"�O(1), O�δ + β|D|��","inline":true},{"text":"-differentially private for every choice of ","element":"span"},{"style":{"height":17.6},"width":319.3,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-22.png","element":"img","alt":" β and |D|. Hence,","inline":true,"padRight":true},{"text":"assuming ","element":"span"},{"style":{"height":31.6},"width":359.02,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-23.png","element":"img","alt":" |D| ≥ O�1α log 1β�","inline":true},{"text":", Theorem ","element":"span"},{"href":"#id-65","text":"3.14 ","element":"a"},{"text":"(the generalization properties of differential privacy) states that with probability at least ","element":"span"},{"style":{"height":17.6},"width":317.53,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-24.png","element":"img","alt":" 1 − O(δ|D| + β),","inline":true}],[{"id":"id-67","style":{"width":"84%"},"width":1584,"height":504,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/22-25.png","element":"img"}],[{"text":"So, by ","element":"span"},{"href":"#id-66","text":"(9)","element":"a"},{"text":", ","element":"span"},{"href":"#id-66","text":"(10)","element":"a"},{"text":", and ","element":"span"},{"href":"#id-67","text":"(11)","element":"a"},{"text":", with probability at least ","element":"span"},{"style":{"height":17.6},"width":306.56,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-0.png","element":"img","alt":" 1 − O(β + δ|D|)","inline":true}],[{"style":{"width":"67%"},"width":1267,"height":49,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-1.png","element":"img"}],[{"text":"Thus, the next inequality, which concludes the proof, holds with probability ","element":"span"},{"style":{"height":17.6},"width":317.53,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-2.png","element":"img","alt":" 1 − O(β + δ|D|).","inline":true}],[{"style":{"width":"88%"},"width":1655,"height":345,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-3.png","element":"img"}]]},{"heading":"6 Private PAC Implies Private Agnostic PAC","paragraphs":[[{"text":"In this section we show that private learning implies private agnostic learning (with essentially the same sample complexity) even for improper learning algorithms. Algorithm ","element":"span"},{"style":{"height":17.9},"width":270.61,"height":44.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-4.png","element":"img","alt":" APrivateAgnostic","inline":true},{"text":", the agnostic algorithm for a class ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H","element":"span"},{"text":", first applies algorithm ","element":"span"},{"style":{"height":15.64},"width":144.97,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-5.png","element":"img","alt":" ARelabel","inline":true,"padRight":true},{"text":"on the data and relabels the sample using a hypothesis in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"text":"that has close to minimal empirical error, and then uses the private learning algorithm (after sub-sampling) to learn the relabeled database.","element":"span"}],[{"id":"id-70","style":{"width":"100%"},"width":1872,"height":632,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-6.png","element":"img"}],[{"style":{"fontWeight":"bold"},"text":"Theorem 6.1 ","element":"span"},{"text":"(Theorem ","element":"span"},{"href":"#id-68","text":"2.4 ","element":"a"},{"text":"Restated)","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"height":17.6},"width":744.46,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-7.png","element":"img","alt":" 0 < α, β, δ < 1, m ∈ N, and A be a (1, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":"-differentially private ","element":"span"},{"style":{"height":17.6},"width":108.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-8.png","element":"img","alt":"(α, β)","inline":true},{"style":{"fontStyle":"italic"},"text":"-accurate PAC learner for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"with sample complexity ","element":"span"},{"style":{"height":18.3},"width":812.98,"height":45.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-9.png","element":"img","alt":" m. Then, APrivateAgnostic is an (O(1), O(δ))-","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"differentially private ","element":"span"},{"style":{"height":17.6},"width":669.37,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-10.png","element":"img","alt":" (O(α), O(β + δn))-accurate agnostic","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"learner for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"H ","element":"span"},{"style":{"fontStyle":"italic"},"text":"with sample complexity","element":"span"}],[{"style":{"width":"38%"},"width":713,"height":105,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-11.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Proof. ","element":"span"},{"text":"The privacy properties of the algorithm are straightforward. Specifically, by Lemma ","element":"span"},{"href":"#id-69","text":"3.12, ","element":"a"},{"text":"Step ","element":"span"},{"href":"#id-70","text":"(3) ","element":"a"},{"text":"the algorithm (applying ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"on a subsample from ","element":"span"},{"style":{"height":20.41},"width":449.69,"height":51.03,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-12.png","element":"img","alt":"˜D) satisfies (O(1), O(δ))","inline":true},{"text":"-differential privacy. Algorithm ","element":"span"},{"style":{"height":17.9},"width":270.61,"height":44.75,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-13.png","element":"img","alt":"APrivateAgnostic","inline":true,"padRight":true},{"text":"is, therefore, ","element":"span"},{"style":{"height":17.6},"width":233.1,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-14.png","element":"img","alt":" (O(1), O(δ))","inline":true},{"text":"-differentially private by Lemma ","element":"span"},{"href":"#id-63","text":"5.3. ","element":"a"},{"text":"In particular, if ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is ","element":"span"},{"text":"(1","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"0)","element":"span"},{"text":"-differentially private then ","element":"span"},{"style":{"height":18.3},"width":489.06,"height":45.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/23-15.png","element":"img","alt":" APrivateAgnostic is (O(1), 0)","inline":true},{"text":"-differentially private.","element":"span"}],[{"text":"As for the utility analysis, fix a target distribution ","element":"span"},{"style":{"height":17.6},"width":324.1,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-0.png","element":"img","alt":" µ over X × {0, 1}","inline":true},{"text":", and denote","element":"span"}],[{"style":{"width":"20%"},"width":386,"height":65,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-1.png","element":"img"}],[{"text":"Also let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P ","element":"span"},{"text":"denote the marginal distribution on unlabeled examples from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"X","element":"span"},{"text":". Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"be a sample containing ","element":"span"},{"style":{"fontStyle":"italic"},"text":"n ","element":"span"},{"text":"i.i.d. samples from ","element":"span"},{"style":{"height":12},"width":26,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-2.png","element":"img","alt":" µ","inline":true},{"text":", and denote ","element":"span"},{"style":{"height":17.6},"width":653.2,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-3.png","element":"img","alt":" S = D◦T where |D| = |T| = |S|/2","inline":true},{"text":". By Theorem ","element":"span"},{"href":"#id-47","text":"3.9 ","element":"a"},{"text":"(the agnostic VC generalization bound), assuming that ","element":"span"},{"style":{"height":31.6},"width":574.26,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-4.png","element":"img","alt":" |S| ≥ O�1α2�VC(H) + ln 1β��","inline":true},{"text":", with probability at least ","element":"span"},{"style":{"height":16.4},"width":200.4,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-5.png","element":"img","alt":" 1 − β (over","inline":true,"padRight":true},{"text":"sampling ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S","element":"span"},{"text":"), the following event occur.","element":"span"}],[{"style":{"width":"55%"},"width":1047,"height":47,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-6.png","element":"img"}],[{"text":"We continue with the analysis assuming that this event occurs, and show that (w.h.p.) the hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"returned by the algorithm has low generalization error. Consider the execution of ","element":"span"},{"style":{"height":17.9},"width":433.56,"height":44.75,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-7.png","element":"img","alt":" APrivateAgnostic on S. In","inline":true,"padRight":true},{"text":"Step ","element":"span"},{"href":"#id-70","text":"(2) ","element":"a"},{"text":"we apply algorithm ","element":"span"},{"style":{"height":15.64},"width":144.97,"height":39.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-8.png","element":"img","alt":" ARelabel","inline":true,"padRight":true},{"text":"to obtain the relabeled databases ","element":"span"},{"style":{"height":19.21},"width":317.83,"height":48.03,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-9.png","element":"img","alt":"˜D, ˜T. Let h ∈ H","inline":true,"padRight":true},{"text":"be a hypothesis extending the pattern used by algorithm ","element":"span"},{"style":{"height":15.64},"width":144.97,"height":39.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-10.png","element":"img","alt":" ARelabel","inline":true,"padRight":true},{"text":"to relabel these databases. By Claim ","element":"span"},{"href":"#id-71","text":"5.4, ","element":"a"},{"text":"assuming that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"|","element":"span"},{"style":{"fontStyle":"italic"},"text":"D","element":"span"},{"style":{"fontStyle":"italic"},"text":"| ","element":"span"},{"text":"is big enough, with probability at least ","element":"span"},{"style":{"height":16.4},"width":101.15,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-11.png","element":"img","alt":" 1 − β","inline":true,"padRight":true},{"text":"it holds that","element":"span"}],[{"style":{"width":"66%"},"width":1246,"height":65,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-12.png","element":"img"}],[{"text":"In this case, by Event ","element":"span"},{"style":{"height":14.62},"width":49.22,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-13.png","element":"img","alt":" E1","inline":true,"padRight":true},{"text":"we have that","element":"span"}],[{"style":{"width":"94%"},"width":1772,"height":65,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-14.png","element":"img"}],[{"text":"Recall that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A ","element":"span"},{"text":"is executed on the database ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Q ","element":"span"},{"text":"containing ","element":"span"},{"style":{"height":20.41},"width":105.4,"height":51.03,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-15.png","element":"img","alt":" | ˜D|/9","inline":true,"padRight":true},{"text":"i.i.d. samples from ","element":"span"},{"style":{"height":16.01},"width":36,"height":40.03,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-16.png","element":"img","alt":"˜D","inline":true},{"text":". By Lemma ","element":"span"},{"href":"#id-69","text":"3.12, ","element":"a"},{"text":"with probability at least ","element":"span"},{"style":{"height":16.4},"width":101.15,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-17.png","element":"img","alt":" 1 − β","inline":true},{"text":", the hypothesis ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"chosen in Step ","element":"span"},{"href":"#id-70","text":"(3) ","element":"a"},{"text":"satisfies","element":"span"}],[{"id":"id-73","style":{"width":"64%"},"width":1216,"height":48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-18.png","element":"img"}],[{"text":"By Lemma ","element":"span"},{"href":"#id-72","text":"5.5 ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-73","text":"(15) ","element":"a"},{"text":"with probability at least ","element":"span"},{"style":{"height":17.6},"width":306.56,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-19.png","element":"img","alt":" 1 − O(β + |D|δ)","inline":true}],[{"style":{"width":"99%"},"width":1870,"height":289,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-20.png","element":"img"}]]},{"heading":"7 Closure of Private Learning","paragraphs":[[{"text":"In this section we prove Theorem ","element":"span"},{"href":"#id-15","text":"7.1 ","element":"a"},{"text":"– if ","element":"span"},{"style":{"height":15.2},"width":207.6,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-21.png","element":"img","alt":" H1, . . . , Hk","inline":true,"padRight":true},{"text":"are privately learnable, then ","element":"span"},{"style":{"height":17.6},"width":396.68,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-22.png","element":"img","alt":" G(H1, . . . , Hk) is pri-","inline":true,"padRight":true},{"text":"vately learnable.","element":"span"}],[{"id":"id-15","style":{"fontWeight":"bold"},"text":"Theorem 7.1 ","element":"span"},{"text":"(Closure Theorem for Private Learning)","element":"span"},{"style":{"fontWeight":"bold"},"text":". ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"height":19.53},"width":420.91,"height":48.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-23.png","element":"img","alt":" G : {0, 1}k → {0, 1}","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be a boolean function and ","element":"span"},{"style":{"height":19.53},"width":408.74,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-24.png","element":"img","alt":" H1, . . . , Hk ⊆ {0, 1}X ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be classes that are ","element":"span"},{"style":{"height":17.6},"width":94.76,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-25.png","element":"img","alt":" (ε, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":"-differentially private and ","element":"span"},{"style":{"height":17.6},"width":108.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-26.png","element":"img","alt":" (α, β)","inline":true},{"style":{"fontStyle":"italic"},"text":"-accurate learnable by a possibly ","element":"span"},{"text":"improper ","element":"span"},{"style":{"fontStyle":"italic"},"text":"learning algorithms with sample complexity ","element":"span"},{"style":{"height":17.6},"width":240.44,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-27.png","element":"img","alt":" mi(α, β, ε, δ)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"respectively. Then, ","element":"span"},{"style":{"height":17.6},"width":561.18,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-28.png","element":"img","alt":"G(H1, . . . , Hk) is (O(1), O(δ))","inline":true},{"style":{"fontStyle":"italic"},"text":"-private and ","element":"span"},{"style":{"height":17.6},"width":353.66,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-29.png","element":"img","alt":" (O(α), O(β + δm))","inline":true},{"style":{"fontStyle":"italic"},"text":"-accurate learnable with sample complexity","element":"span"}],[{"style":{"width":"69%"},"width":1297,"height":159,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/24-30.png","element":"img"}],[{"text":"To prove Theorem ","element":"span"},{"href":"#id-15","text":"7.1, ","element":"a"},{"text":"we present ","element":"span"},{"style":{"height":15.64},"width":231.39,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-0.png","element":"img","alt":" AClosureLearn","inline":true,"padRight":true},{"text":"– a generic transformation of private learning algorithms ","element":"span"},{"style":{"height":16},"width":210.38,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-1.png","element":"img","alt":" A1, . . . , Ak","inline":true,"padRight":true},{"text":"for the classes ","element":"span"},{"style":{"height":15.2},"width":207.6,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-2.png","element":"img","alt":" H1, . . . , Hk","inline":true,"padRight":true},{"text":"respectively to a private learner for ","element":"span"},{"style":{"height":17.6},"width":389.85,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-3.png","element":"img","alt":" G(H1, . . . , Hk). This","inline":true,"padRight":true},{"text":"transformation could be applied to proper as well as improper learners, and to a learners that preserves pure or approximate privacy. Given a labeled sample ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"of size ","element":"span"},{"style":{"fontStyle":"italic"},"text":"N","element":"span"},{"text":", algorithm ","element":"span"},{"style":{"height":15.64},"width":231.38,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-4.png","element":"img","alt":" AClosureLearn","inline":true,"padRight":true},{"text":"finds hypotheses ","element":"span"},{"style":{"height":15.6},"width":184.19,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-5.png","element":"img","alt":" h1, . . . , hk","inline":true,"padRight":true},{"text":"in steps, where in the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i","element":"span"},{"text":"’th step, the algorithm finds a hypothesis ","element":"span"},{"style":{"height":15.6},"width":412,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-6.png","element":"img","alt":" hi such that h1, . . . , hi","inline":true,"padRight":true},{"text":"have a completion ","element":"span"},{"style":{"height":11.82},"width":209.54,"height":29.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-7.png","element":"img","alt":" ci+1, . . . , ck","inline":true,"padRight":true},{"text":"to a hypothesis ","element":"span"},{"style":{"height":17.6},"width":482.69,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-8.png","element":"img","alt":" G(h1, . . . , hi, ci+1, . . . , ck)","inline":true,"padRight":true},{"text":"with small error (assuming that ","element":"span"},{"style":{"height":15.6},"width":221.06,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-9.png","element":"img","alt":"h1, . . . , hi−1","inline":true,"padRight":true},{"text":"have a good completion). In the ","element":"span"},{"style":{"height":16.8},"width":396.64,"height":42,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-10.png","element":"img","alt":" i’th step, AClosureLearn","inline":true,"padRight":true},{"text":"relabels the input sample ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S ","element":"span"},{"text":"so that the relabeled sample is realizable by ","element":"span"},{"style":{"height":14.62},"width":48.85,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-11.png","element":"img","alt":" Hi","inline":true},{"text":". The relabeling ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"is chosen using ","element":"span"},{"style":{"height":15.64},"width":144.97,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-12.png","element":"img","alt":" ARelabel","inline":true,"padRight":true},{"text":"in a way that guarantees completion to a hypothesis with small empirical error. That is, using an appropriate score-function in ","element":"span"},{"style":{"height":15.64},"width":144.96,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-13.png","element":"img","alt":" ARelabel","inline":true,"padRight":true},{"text":"(i.e., in the exponential mechanism), it is guaranteed that for the hypotheses ","element":"span"},{"style":{"height":15.6},"width":221.06,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-14.png","element":"img","alt":" h1, . . . , hi−1","inline":true,"padRight":true},{"text":"computed in the previous steps there are some ","element":"span"},{"style":{"height":15.82},"width":423.92,"height":39.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-15.png","element":"img","alt":" ci+1 ∈ Hi, . . . , ck ∈ Hk","inline":true,"padRight":true},{"text":"such that the function ","element":"span"},{"style":{"height":17.6},"width":567.22,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-16.png","element":"img","alt":" G(h1, . . . , hi−1, h, ci+1, . . . , ck)","inline":true,"padRight":true},{"text":"has a small loss with respect to the original sample ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S","element":"span"},{"text":". The relabeled sample is fed (after subsampling) to the private algorithm ","element":"span"},{"style":{"height":15.42},"width":46.84,"height":38.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-17.png","element":"img","alt":" Ai","inline":true,"padRight":true},{"text":"to produce a hypothesis ","element":"span"},{"style":{"height":15.02},"width":37.14,"height":37.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-18.png","element":"img","alt":" hi","inline":true,"padRight":true},{"text":"and then the algorithm proceeds to the next step ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i ","element":"span"},{"text":"+ 1","element":"span"},{"text":".","element":"span"}],[{"id":"id-75","style":{"width":"100%"},"width":1872,"height":1046,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-19.png","element":"img"}],[{"text":"In Lemma ","element":"span"},{"href":"#id-74","text":"7.2, ","element":"a"},{"text":"we analyze the privacy guarantees of ","element":"span"},{"style":{"height":15.64},"width":244.24,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-20.png","element":"img","alt":" AClosureLearn.","inline":true}],[{"id":"id-74","style":{"fontWeight":"bold"},"text":"Lemma 7.2. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"height":12.4},"width":110.48,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-21.png","element":"img","alt":" ε < 1","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and assume the algorithms ","element":"span"},{"style":{"height":17.6},"width":386.03,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-22.png","element":"img","alt":" A1, . . . , Ak are (1, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":"-private. Then, ","element":"span"},{"style":{"height":15.64},"width":275.97,"height":39.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-23.png","element":"img","alt":" AClosureLearn is","inline":true},{"style":{"height":17.6},"width":163.2,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-24.png","element":"img","alt":"(ε, O(δ))","inline":true},{"style":{"fontStyle":"italic"},"text":"-differentially private.","element":"span"}],[{"style":{"fontStyle":"italic"},"text":"Proof. ","element":"span"},{"text":"Fix ","element":"span"},{"style":{"height":17.6},"width":146.43,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-25.png","element":"img","alt":" i ∈ [k]","inline":true,"padRight":true},{"text":"and consider the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i","element":"span"},{"text":"’th step of the algorithm. ","element":"span"},{"text":"By Lemma ","element":"span"},{"href":"#id-69","text":"3.12, ","element":"a"},{"text":"Step ","element":"span"},{"href":"#id-75","text":"(2c) ","element":"a"},{"text":"of algorithm ","element":"span"},{"style":{"height":15.64},"width":231.39,"height":39.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-26.png","element":"img","alt":" AClosureLearn","inline":true,"padRight":true},{"text":"(i.e., sub-sampling with replacement and executing a ","element":"span"},{"style":{"height":17.6},"width":96.23,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-27.png","element":"img","alt":" (1, δ)","inline":true},{"text":"-private algorithm) is ","element":"span"},{"style":{"height":17.6},"width":111.2,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-28.png","element":"img","alt":" (1, δ)-","inline":true,"padRight":true},{"text":"differentially private. Thus, by Lemma ","element":"span"},{"href":"#id-63","text":"5.3, ","element":"a"},{"text":"Steps ","element":"span"},{"href":"#id-75","text":"(2b)","element":"a"},{"text":"–","element":"span"},{"href":"#id-75","text":"(2c) ","element":"a"},{"text":"of algorithm ","element":"span"},{"style":{"height":17.6},"width":562.18,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-29.png","element":"img","alt":" AClosureLearn are (O(1), O(δ))-","inline":true,"padRight":true},{"text":"differentially private. Since each step is executed on a disjoint set of examples, ","element":"span"},{"style":{"height":17.6},"width":527,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-30.png","element":"img","alt":" AClosureLearn is (O(1), O(δ))-","inline":true,"padRight":true},{"text":"differentially private.","element":"span"}],[{"id":"id-80","text":"In the next lemma we prove that ","element":"span"},{"style":{"height":15.64},"width":231.39,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-31.png","element":"img","alt":" AClosureLearn","inline":true,"padRight":true},{"text":"is an accurate learner for the class ","element":"span"},{"style":{"height":17.6},"width":292.64,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/25-32.png","element":"img","alt":" G(H1, . . . , Hk).","inline":true}],[{"style":{"fontWeight":"bold"},"text":"Lemma 7.3. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Assume that ","element":"span"},{"style":{"height":17.6},"width":384.39,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-0.png","element":"img","alt":" A1, . . . , At are (1, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":"-differentially private ","element":"span"},{"style":{"height":17.6},"width":200.24,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-1.png","element":"img","alt":" (α/k, β/k)","inline":true},{"style":{"fontStyle":"italic"},"text":"-accurate (possibly improper) learning algorithms for ","element":"span"},{"style":{"height":15.2},"width":207.6,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-2.png","element":"img","alt":" H1, . . . , Hk","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"with sample complexity ","element":"span"},{"style":{"height":17.6},"width":333.73,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-3.png","element":"img","alt":" mi(α/k, β/k, 1, δ)","inline":true},{"style":{"fontStyle":"italic"},"text":". If at each iteration","element":"span"}],[{"style":{"width":"82%"},"width":1551,"height":188,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-4.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"then with probability at least ","element":"span"},{"style":{"height":17.6},"width":332.78,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-5.png","element":"img","alt":" 1 − O(β + kδ|Si|)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"we have that ","element":"span"},{"style":{"height":17.6},"width":485,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-6.png","element":"img","alt":" errorP(c) ≤ O(α), where c","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is the hypothesis returned by ","element":"span"},{"style":{"height":15.64},"width":338.97,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-7.png","element":"img","alt":" AClosureLearn on S.","inline":true}],[{"style":{"fontStyle":"italic"},"text":"Proof. ","element":"span"},{"text":"Let ","element":"span"},{"style":{"height":15.6},"width":184.18,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-8.png","element":"img","alt":" h1, . . . , hk","inline":true,"padRight":true},{"text":"be the hypotheses that ","element":"span"},{"style":{"height":15.64},"width":231.38,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-9.png","element":"img","alt":" AClosureLearn","inline":true,"padRight":true},{"text":"computes in Step ","element":"span"},{"href":"#id-75","text":"(2c)","element":"a"},{"text":". We prove by induction that for every ","element":"span"},{"style":{"height":17.6},"width":116.58,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-10.png","element":"img","alt":" i ∈ [k]","inline":true,"padRight":true},{"text":"with probability at least ","element":"span"},{"style":{"height":24.22},"width":414.04,"height":60.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-11.png","element":"img","alt":" 1− O(i)·βk +O(i·δ|Si|)","inline":true,"padRight":true},{"text":"there exist ","element":"span"},{"style":{"height":15.82},"width":467.2,"height":39.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-12.png","element":"img","alt":" ci+1 ∈ Hi+1, . . . , ck ∈ Hk","inline":true,"padRight":true},{"text":"such that","element":"span"}],[{"style":{"width":"72%"},"width":1353,"height":80,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-13.png","element":"img"}],[{"text":"The induction basis for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i ","element":"span"},{"text":"= 0 ","element":"span"},{"text":"is implied by the fact that the examples are labeled by some ","element":"span"},{"style":{"height":17.6},"width":242.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-14.png","element":"img","alt":" G(c1, . . . , ck)","inline":true,"padRight":true},{"text":"from ","element":"span"},{"style":{"height":17.6},"width":278.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-15.png","element":"img","alt":" G(H1, . . . , Hk)","inline":true},{"text":". For the induction step, assume that there are ","element":"span"},{"style":{"height":15.6},"width":551.24,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-16.png","element":"img","alt":" ci ∈ Hi, . . . , ck ∈ Hk such that","inline":true}],[{"style":{"width":"55%"},"width":1041,"height":93,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-17.png","element":"img"}],[{"text":"We need to prove that with probability at least ","element":"span"},{"style":{"height":24.29},"width":1045.24,"height":60.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-18.png","element":"img","alt":" 1 − O(1)·βk − O(δ|Si|) there are c′i+1 ∈ Hi+1, . . . , c′k ∈ Hk","inline":true,"padRight":true},{"text":"such that","element":"span"}],[{"style":{"width":"50%"},"width":954,"height":80,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-19.png","element":"img"}],[{"text":"Recall that each example in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"S","element":"span"},{"text":", and hence in ","element":"span"},{"style":{"height":15.02},"width":38.76,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-20.png","element":"img","alt":" Si","inline":true},{"text":", is chosen i.i.d. from the distribution in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P","element":"span"},{"text":". Since","element":"span"}],[{"id":"id-78","style":{"width":"73%"},"width":1367,"height":164,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-21.png","element":"img"}],[{"text":"by Theorem ","element":"span"},{"href":"#id-47","text":"3.9 ","element":"a"},{"text":"applied to ","element":"span"},{"style":{"height":17.6},"width":617.04,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-22.png","element":"img","alt":" G(H1, . . . , Hk) ⊕ G(H1, . . . , Hk)","inline":true},{"text":", with probability at least ","element":"span"},{"style":{"height":23.11},"width":288.4,"height":57.78,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-23.png","element":"img","alt":" 1 − βk (over the","inline":true,"padRight":true},{"text":"sampling of ","element":"span"},{"style":{"height":15.02},"width":38.76,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-24.png","element":"img","alt":" Si","inline":true},{"text":") the following event occurs:","element":"span"}],[{"style":{"width":"69%"},"width":1302,"height":49,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-25.png","element":"img"}],[{"text":"We continue proving the induction step assuming that ","element":"span"},{"style":{"height":14.62},"width":49.22,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-26.png","element":"img","alt":" E1","inline":true,"padRight":true},{"text":"occurs. The proof of the induction step is as follows: Since ","element":"span"},{"style":{"height":14.62},"width":187.94,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-27.png","element":"img","alt":" E1 occurs:","inline":true}],[{"style":{"width":"76%"},"width":1436,"height":253,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-28.png","element":"img"}],[{"text":"By the definition of ","element":"span"},{"style":{"height":17.42},"width":451.51,"height":43.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-29.png","element":"img","alt":" H, there is h = hopt ∈ H","inline":true,"padRight":true},{"text":"that agrees with ","element":"span"},{"style":{"height":15.02},"width":136.62,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-30.png","element":"img","alt":" ci on Si","inline":true},{"text":", and therefore","element":"span"}],[{"style":{"width":"33%"},"width":626,"height":93,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/26-31.png","element":"img"}],[{"text":"By Claim ","element":"span"},{"href":"#id-71","text":"5.4, ","element":"a"},{"text":"if","element":"span"}],[{"style":{"width":"99%"},"width":1870,"height":327,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-0.png","element":"img"}],[{"text":"We assume that the above event occurs, thus, the latter implies that there are ","element":"span"},{"style":{"height":18.49},"width":380.15,"height":46.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-1.png","element":"img","alt":" c′i+1, . . . , c′k such that","inline":true}],[{"style":{"width":"79%"},"width":1486,"height":93,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-2.png","element":"img"}],[{"text":"Since ","element":"span"},{"style":{"height":14.62},"width":49.21,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-3.png","element":"img","alt":" E1","inline":true,"padRight":true},{"text":"occurs, by ","element":"span"},{"href":"#id-76","text":"(22)","element":"a"},{"text":",","element":"span"}],[{"id":"id-77","style":{"width":"92%"},"width":1733,"height":188,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-4.png","element":"img"}],[{"text":"Since","element":"span"}],[{"id":"id-76","style":{"width":"61%"},"width":1156,"height":92,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-5.png","element":"img"}],[{"text":"Lemma ","element":"span"},{"href":"#id-69","text":"3.12 ","element":"a"},{"text":"implies that Step ","element":"span"},{"href":"#id-75","text":"(2c) ","element":"a"},{"text":"of ","element":"span"},{"style":{"height":23.11},"width":469.81,"height":57.78,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-6.png","element":"img","alt":" AClosureLearn is an ( αk , βk )","inline":true,"padRight":true},{"text":"empirical learner and, therefore, with ","element":"span"},{"text":"probability at least ","element":"span"},{"style":{"height":23.11},"width":99.94,"height":57.77,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-7.png","element":"img","alt":" 1 − βk","inline":true}],[{"style":{"width":"66%"},"width":1250,"height":80,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-8.png","element":"img"}],[{"text":"Again, we assume in the rest of the proof that the above event occurs. By Lemma ","element":"span"},{"href":"#id-72","text":"5.5, ","element":"a"},{"text":"since","element":"span"}],[{"id":"id-79","style":{"width":"70%"},"width":1327,"height":164,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-9.png","element":"img"}],[{"text":"with probability at least ","element":"span"},{"style":{"height":24.22},"width":375.09,"height":60.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-10.png","element":"img","alt":" 1 − O(β)k − O(δ|Di|)","inline":true}],[{"style":{"width":"39%"},"width":748,"height":81,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-11.png","element":"img"}],[{"text":"Thus, by ","element":"span"},{"href":"#id-76","text":"(25)","element":"a"},{"text":", with probability at least ","element":"span"},{"style":{"height":24.22},"width":153.4,"height":60.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-12.png","element":"img","alt":" 1 − O(β)k","inline":true}],[{"style":{"width":"71%"},"width":1331,"height":105,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-13.png","element":"img"}],[{"text":"The latter, combined with ","element":"span"},{"href":"#id-77","text":"(23)","element":"a"},{"text":", implies the induction step: with probability at least ","element":"span"},{"style":{"height":24.22},"width":375.09,"height":60.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-14.png","element":"img","alt":" 1 − O(β)k − O(δ|Di|)","inline":true}],[{"style":{"width":"63%"},"width":1187,"height":227,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/27-15.png","element":"img"}],[{"text":"By ","element":"span"},{"href":"#id-78","text":"(19)","element":"a"},{"text":", ","element":"span"},{"href":"#id-76","text":"(21)","element":"a"},{"text":", ","element":"span"},{"href":"#id-76","text":"(24)","element":"a"},{"text":", and ","element":"span"},{"href":"#id-79","text":"(26)","element":"a"},{"text":", the sample complexity ","element":"span"},{"style":{"height":17.6},"width":154.67,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-0.png","element":"img","alt":" |Si| the i","inline":true},{"text":"’th step is","element":"span"}],[{"style":{"width":"94%"},"width":1760,"height":343,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-1.png","element":"img"}],[{"text":"To conclude, by a union bound, ","element":"span"},{"style":{"height":15.64},"width":231.38,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-2.png","element":"img","alt":" AClosureLearn","inline":true,"padRight":true},{"text":"returns, with probability at least ","element":"span"},{"style":{"height":22},"width":434.38,"height":55.01,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-3.png","element":"img","alt":" 1 − O(β + δ �ki=1 |Si|),","inline":true,"padRight":true},{"text":"a hypothesis ","element":"span"},{"style":{"height":17.6},"width":254.94,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-4.png","element":"img","alt":" G(h1, . . . , hk)","inline":true,"padRight":true},{"text":"with error less than ","element":"span"},{"style":{"height":17.6},"width":96.55,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-5.png","element":"img","alt":" O(α)","inline":true,"padRight":true},{"text":"with respect to the distribution ","element":"span"},{"style":{"fontStyle":"italic"},"text":"P","element":"span"},{"text":".","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Proof of Theorem ","element":"span"},{"href":"#id-15","style":{"fontWeight":"bold"},"text":"7.1.","element":"a"}],[{"style":{"fontStyle":"italic"},"text":"Proof. ","element":"span"},{"text":"Theorem ","element":"span"},{"href":"#id-15","text":"7.1 ","element":"a"},{"text":"follows from Lemmas ","element":"span"},{"href":"#id-74","text":"7.2 ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-80","text":"7.3. ","element":"a"},{"text":"Specifically, by Lemma ","element":"span"},{"href":"#id-80","text":"7.3, ","element":"a"},{"text":"to prove that ","element":"span"},{"style":{"height":15.64},"width":231.39,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-6.png","element":"img","alt":" AClosureLearn","inline":true,"padRight":true},{"text":"is ","element":"span"},{"style":{"height":17.6},"width":358,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-7.png","element":"img","alt":" (O(α), O(β + δm))","inline":true},{"text":"-accurate it suffices that","element":"span"}],[{"style":{"width":"71%"},"width":1347,"height":134,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-8.png","element":"img"}],[{"text":"By Lemma ","element":"span"},{"href":"#id-74","text":"7.2, ","element":"a"},{"style":{"height":17.6},"width":517.28,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-9.png","element":"img","alt":" AClosureLearn is (O(1), O(δ))","inline":true},{"text":"-differentially private.","element":"span"}],[{"style":{"fontStyle":"italic"},"text":"Remark ","element":"span"},{"text":"7.4","element":"span"},{"style":{"fontStyle":"italic"},"text":". ","element":"span"},{"text":"Since each ","element":"span"},{"style":{"height":17.6},"width":259.82,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-10.png","element":"img","alt":" Ai is an (α, β)","inline":true},{"text":"-accurate learning algorithm for the class ","element":"span"},{"style":{"height":14.8},"width":66.78,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-11.png","element":"img","alt":" H1,","inline":true}],[{"style":{"width":"35%"},"width":663,"height":106,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-12.png","element":"img"}],[{"text":"Furthermore, by the Sauer-Shelah-Perles Lemma, ","element":"span"},{"style":{"height":22},"width":963.01,"height":55.01,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-13.png","element":"img","alt":" VC(G(H1, . . . , Hk) = ˜O(�ki=1 VC(Hi)). Thus, the","inline":true,"padRight":true},{"text":"sample complexity of ","element":"span"},{"style":{"height":15.64},"width":273.28,"height":39.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-14.png","element":"img","alt":" AClosureLearn is","inline":true}],[{"style":{"width":"49%"},"width":931,"height":133,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-15.png","element":"img"}],[{"text":"For constant ","element":"span"},{"style":{"height":16.4},"width":116.98,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-16.png","element":"img","alt":" k, α, β","inline":true,"padRight":true},{"text":"this is nearly tight. By using sub-sampling (see e.g., ","element":"span"},{"href":"#id-9","referenceIndex":31,"text":"Kasiviswanathan et al. ","element":"a"},{"href":"#id-9","referenceIndex":31,"text":"[2011]","element":"a"},{"text":", ","element":"span"},{"href":"#id-81","referenceIndex":6,"text":"Beimel et al. ","element":"a"},{"href":"#id-81","referenceIndex":6,"text":"[2014]","element":"a"},{"text":"), we can achieve ","element":"span"},{"style":{"height":17.6},"width":163.2,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-17.png","element":"img","alt":" (ε, O(δ))","inline":true},{"text":"-differential privacy by increasing the sample complexity by a factor of ","element":"span"},{"style":{"height":17.6},"width":132.45,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-18.png","element":"img","alt":" O(1/ε)","inline":true},{"text":". Furthermore, by using private boosting ","element":"span"},{"href":"#id-82","referenceIndex":22,"text":"Dwork et al. ","element":"a"},{"href":"#id-82","referenceIndex":22,"text":"[2010]","element":"a"},{"text":", one can start with a private algorithm that is, for example, ","element":"span"},{"style":{"height":17.6},"width":145.8,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-19.png","element":"img","alt":" (1/4, β)","inline":true,"padRight":true},{"text":"accurate and get a private algorithm that is ","element":"span"},{"style":{"height":17.6},"width":108.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-20.png","element":"img","alt":" (α, β)","inline":true,"padRight":true},{"text":"by increasing the sample complexity by a factor of ","element":"span"},{"style":{"height":17.6},"width":140.18,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-21.png","element":"img","alt":" O(1/α)","inline":true},{"text":", and by simple technique, one can boost ","element":"span"},{"style":{"height":16.4},"width":26,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-22.png","element":"img","alt":" β","inline":true,"padRight":true},{"text":"by increasing the sample complexity by a factor of ","element":"span"},{"style":{"height":17.6},"width":229.4,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-23.png","element":"img","alt":" O(log(1/β))","inline":true},{"text":". Thus, we get an ","element":"span"},{"style":{"height":17.6},"width":163.2,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-24.png","element":"img","alt":" (ε, O(δ))","inline":true},{"text":"-differentially private ","element":"span"},{"style":{"height":17.6},"width":123.39,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-25.png","element":"img","alt":" (α, β)-","inline":true,"padRight":true},{"text":"accurate learner for ","element":"span"},{"style":{"height":17.6},"width":278.35,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-26.png","element":"img","alt":" G(H1, . . . , Hk)","inline":true,"padRight":true},{"text":"whose sample complexity is","element":"span"}],[{"style":{"width":"52%"},"width":987,"height":127,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2003.04509/images/28-27.png","element":"img"}]]},{"heading":"Acknowledgements","paragraphs":[[{"text":"We thank Adam Klivans and Roi Livni for insightful discussions.","element":"span"}]]},{"heading":"References","paragraphs":[[{"id":"id-4","text":"Jacob D. Abernethy, Chansoo Lee, Audra McMillan, and Ambuj Tewari. Online learning via differential ","element":"span"},{"text":"privacy. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"CoRR","element":"span"},{"text":", abs/1711.10019, 2017. URL ","element":"span"},{"href":"http://arxiv.org/abs/1711.10019","style":{"fontFamily":"monospace"},"text":"http://arxiv.org/abs/1711.10019","element":"a"},{"text":".","element":"span"}],[{"id":"id-8","text":"Noga Alon, Roi Livni, Maryanthe Malliaris, and Shay Moran. Private PAC learning implies finite Littlestone ","element":"span"},{"text":"dimension. In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the 51st Annual ACM Symposium on the Theory of Computing","element":"span"},{"text":", STOC ’19, New York, NY, USA, 2019. ACM.","element":"span"}],[{"id":"id-46","text":"Martin Anthony and John Shawe-Taylor. A result of Vapnik with applications. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Discrete Applied Mathematics","element":"span"},{"text":", 47(3):207–217, 1993.","element":"span"}],[{"id":"id-45","text":"Matin Anthony and Peter L. Bartlett. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Neural Network Learning: Theoretical Foundations","element":"span"},{"text":". Cambridge University Press, 2009. ","element":"span"},{"text":"ISBN 9780521118620. ","element":"span"},{"text":"URL ","element":"span"},{"href":"http://books.google.co.il/books?id=UH6XRoEQ4h8C","style":{"fontFamily":"monospace"},"text":"http://books.google.co.il/books?id= ","element":"a"},{"href":"http://books.google.co.il/books?id=UH6XRoEQ4h8C","style":{"fontFamily":"monospace"},"text":"UH6XRoEQ4h8C","element":"a"},{"text":".","element":"span"}],[{"id":"id-33","text":"Raef Bassily, Kobbi Nissim, Adam Smith, Thomas Steinke, Uri Stemmer, and Jonathan Ullman. Algo- ","element":"span"},{"text":"rithmic stability for adaptive data analysis. In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the 48th Annual ACM Symposium on the Theory of Computing","element":"span"},{"text":", STOC ’16, pages 1046–1059, New York, NY, USA, 2016. ACM.","element":"span"}],[{"id":"id-81","text":"Amos Beimel, Hai Brenner, Shiva Prasad Kasiviswanathan, and Kobbi Nissim. Bounds on the sample ","element":"span"},{"text":"complexity for private learning and private data release. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Machine Learning","element":"span"},{"text":", 94(3):401–437, 2014.","element":"span"}],[{"id":"id-24","text":"Amos Beimel, Kobbi Nissim, and Uri Stemmer. Learning privately with labeled and unlabeled examples. ","element":"span"},{"text":"In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the 26th Annual ACM-SIAM Symposium on Discrete Algorithms","element":"span"},{"text":", SODA ’15, pages 461–477, Philadelphia, PA, USA, 2015. SIAM.","element":"span"}],[{"id":"id-22","text":"Amos Beimel, Kobbi Nissim, and Uri Stemmer. Characterizing the sample complexity of pure private learn- ","element":"span"},{"text":"ers. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Journal of Machine Learning Research","element":"span"},{"text":", 20(146):1–33, 2019. URL ","element":"span"},{"href":"http://jmlr.org/papers/v20/18-269.html","style":{"fontFamily":"monospace"},"text":"http://jmlr.org/papers/ ","element":"a"},{"href":"http://jmlr.org/papers/v20/18-269.html","style":{"fontFamily":"monospace"},"text":"v20/18-269.html","element":"a"},{"text":".","element":"span"}],[{"id":"id-1","text":"Shai Ben-David, D´avid P´al, and Shai Shalev-Shwartz. Agnostic online learning. In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"COLT 2009 – The 22nd Conference on Learning Theory","element":"span"},{"text":", 2009. URL ","element":"span"},{"href":"http://www.cs.mcgill.ca/%7Ecolt2009/papers/032.pdf#page=1","style":{"fontFamily":"monospace"},"text":"http://www.cs.mcgill.ca/%7Ecolt2009/papers/ ","element":"a"},{"href":"http://www.cs.mcgill.ca/%7Ecolt2009/papers/032.pdf#page=1","style":{"fontFamily":"monospace"},"text":"032.pdf#page=1","element":"a"},{"text":".","element":"span"}],[{"id":"id-30","text":"Alina Beygelzimer, Satyen Kale, and Haipeng Luo. Optimal and adaptive algorithms for online boosting. ","element":"span"},{"text":"In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"International Conference on Machine Learning","element":"span"},{"text":", pages 2323–2331, 2015.","element":"span"}],[{"id":"id-26","text":"Siddharth Bhaskar. Thicket density. Technical Report arXiv:1702.03956, ArXiV, 2017.","element":"span"}],[{"id":"id-44","text":"Anselm Blumer, Andrzej Ehrenfeucht, David Haussler, and Manfred K. Warmuth. Learnability and the ","element":"span"},{"text":"Vapnik-Chervonenkis dimension. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Journal of the ACM","element":"span"},{"text":", 36(4):929–965, 1989.","element":"span"}],[{"id":"id-31","text":"Nataly Brukhim, Xinyi Chen, Elad Hazan, and Shay Moran. Online agnostic boosting via regret minimiza- ","element":"span"},{"text":"tion. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"CoRR","element":"span"},{"text":", abs/2003.01150, 2020.","element":"span"}],[{"id":"id-7","text":"Mark Bun, Kobbi Nissim, Uri Stemmer, and Salil Vadhan. Differentially private release and learning of ","element":"span"},{"text":"threshold functions. In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the 56th Annual IEEE Symposium on Foundations of Computer Science","element":"span"},{"text":", FOCS ’15, pages 634–649, Washington, DC, USA, 2015. IEEE Computer Society.","element":"span"}],[{"id":"id-3","text":"Mark Bun, Roi Livni, and Shay Moran. An equivalence between private classification and online prediction. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"CoRR","element":"span"},{"text":", abs/2003.00563, 2020. URL ","element":"span"},{"href":"https://arxiv.org/abs/2003.00563","style":{"fontFamily":"monospace"},"text":"https://arxiv.org/abs/2003.00563","element":"a"},{"text":".","element":"span"}],[{"id":"id-27","text":"Hunter Chase and James Freitag. Model theory and combinatorics of banned sequences, 2018.","element":"span"}],[{"id":"id-16","text":"Hunter Chase and James Freitag. Model theory and machine learning. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"The Bulletin of Symbolic Logic","element":"span"},{"text":", 25 (03):319332, Feb 2019. ISSN 1943-5894. doi: 10.1017/bsl.2018.71. URL ","element":"span"},{"href":"http://dx.doi.org/10.1017/bsl.2018.71","style":{"fontFamily":"monospace"},"text":"http://dx.doi.org/10. ","element":"a"},{"href":"http://dx.doi.org/10.1017/bsl.2018.71","style":{"fontFamily":"monospace"},"text":"1017/bsl.2018.71","element":"a"},{"text":".","element":"span"}],[{"id":"id-29","text":"Shang-Tse Chen, Hsuan-Tien Lin, and Chi-Jen Lu. An online boosting algorithm with theoretical justi- ","element":"span"},{"text":"fications. In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the 29th International Coference on International Conference on Machine Learning","element":"span"},{"text":", ICML12, page 18731880, Madison, WI, USA, 2012. Omnipress. ISBN 9781450312851.","element":"span"}],[{"id":"id-25","text":"R. M. Dudley. Central limit theorems for empirical measures. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Ann. Probab.","element":"span"},{"text":", 6(6):899–929, 12 1978. doi: 10.1214/aop/1176995384. URL ","element":"span"},{"href":"https://doi.org/10.1214/aop/1176995384","style":{"fontFamily":"monospace"},"text":"https://doi.org/10.1214/aop/1176995384","element":"a"},{"text":".","element":"span"}],[{"id":"id-41","text":"Cynthia Dwork, Krishnaram Kenthapadi, Frank McSherry, Ilya Mironov, and Moni Naor. Our data, our- ","element":"span"},{"text":"selves: Privacy via distributed noise generation. In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the 24th Annual International Conference on the Theory and Applications of Cryptographic Techniques","element":"span"},{"text":", EUROCRYPT ’06, pages 486–503, Berlin, Heidelberg, 2006a. Springer.","element":"span"}],[{"id":"id-10","text":"Cynthia Dwork, Frank McSherry, Kobbi Nissim, and Adam Smith. Calibrating noise to sensitivity in private ","element":"span"},{"text":"data analysis. In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the 3rd Conference on Theory of Cryptography","element":"span"},{"text":", TCC ’06, pages 265–284, Berlin, Heidelberg, 2006b. Springer.","element":"span"}],[{"id":"id-82","text":"Cynthia Dwork, Guy N. Rothblum, and Salil Vadhan. Boosting and differential privacy. In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the 51st Annual IEEE Symposium on Foundations of Computer Science","element":"span"},{"text":", FOCS ’10, pages 51–60, Washington, DC, USA, 2010. IEEE Computer Society.","element":"span"}],[{"id":"id-32","text":"Cynthia Dwork, Vitaly Feldman, Moritz Hardt, Toniann Pitassi, Omer Reingold, and Aaron Roth. The ","element":"span"},{"text":"reusable holdout: Preserving validity in adaptive data analysis. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Science","element":"span"},{"text":", 349(6248):636–638, 2015.","element":"span"}],[{"id":"id-35","text":"Vitaly Feldman and Thomas Steinke. Generalization for adaptively-chosen estimators via stable median. In ","element":"span"},{"text":"Satyen Kale and Ohad Shamir, editors, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the 30th Conference on Learning Theory, COLT 2017, Amsterdam, The Netherlands, 7-10 July 2017","element":"span"},{"text":", volume 65 of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of Machine Learning Research","element":"span"},{"text":", pages 728–757. PMLR, 2017. URL ","element":"span"},{"href":"http://proceedings.mlr.press/v65/feldman17a.html","style":{"fontFamily":"monospace"},"text":"http://proceedings.mlr.press/v65/feldman17a. ","element":"a"},{"href":"http://proceedings.mlr.press/v65/feldman17a.html","style":{"fontFamily":"monospace"},"text":"html","element":"a"},{"text":".","element":"span"}],[{"id":"id-6","text":"Alon Gonen, Elad Hazan, and Shay Moran. Private learning implies online learning: An efficient reduction. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"NeurIPS","element":"span"},{"text":", 2019.","element":"span"}],[{"text":"R. E. Greenwood and A. M. Gleason. Combinatorial relations and chromatic graphs. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Canadian Journal of Mathematics","element":"span"},{"text":", 7:1–7, 1955. doi: 10.4153/CJM-1955-001-4.","element":"span"}],[{"id":"id-40","text":"Wilfrid Hodges. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A Shorter Model Theory","element":"span"},{"text":". Cambridge University Press, New York, NY, USA, 1997. ISBN 0-521-58713-1.","element":"span"}],[{"id":"id-5","text":"Matthew Joseph, Jieming Mao, Seth Neel, and Aaron Roth. The role of interactivity in local differential ","element":"span"},{"text":"privacy. In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"FOCS","element":"span"},{"text":", 2019.","element":"span"}],[{"id":"id-37","text":"Christopher Jung, Katrina Ligett, Seth Neel, Aaron Roth, Saeed Sharifi-Malvajerdi, and Moshe Shenfeld. A ","element":"span"},{"text":"new analysis of differential privacy’s generalization guarantees. In Thomas Vidick, editor, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"11th Innovations in Theoretical Computer Science Conference, ITCS 2020, January 12-14, 2020, Seattle, Washington, USA","element":"span"},{"text":", volume 151 of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"LIPIcs","element":"span"},{"text":", pages 31:1–31:17. Schloss Dagstuhl - Leibniz-Zentrum f¨ur Informatik, 2020. doi: 10.4230/LIPIcs.ITCS.2020.31. URL ","element":"span"},{"href":"https://doi.org/10.4230/LIPIcs.ITCS.2020.31","style":{"fontFamily":"monospace"},"text":"https://doi.org/10.4230/LIPIcs.ITCS.2020.31","element":"a"},{"text":".","element":"span"}],[{"text":"Haim Kaplan, Katrina Ligett, Yishay Mansour, Moni Naor, and Uri Stemmer. Privately learning thresholds: ","element":"span"},{"text":"Closing the exponential gap, 2019.","element":"span"}],[{"id":"id-9","text":"Shiva Prasad Kasiviswanathan, Homin K. Lee, Kobbi Nissim, Sofya Raskhodnikova, and Adam Smith. ","element":"span"},{"text":"What can we learn privately? ","element":"span"},{"style":{"fontStyle":"italic"},"text":"SIAM Journal on Computing","element":"span"},{"text":", 40(3):793–826, 2011.","element":"span"}],[{"id":"id-28","text":"N. Littlestone and M. K. Warmuth. The weighted majority algorithm. In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the 30th Annual Symposium on Foundations of Computer Science","element":"span"},{"text":", SFCS 89, page 256261, USA, 1989. IEEE Computer Society. ","element":"span"},{"text":"ISBN 0818619821. ","element":"span"},{"text":"doi: 10.1109/SFCS.1989.63487. ","element":"span"},{"text":"URL ","element":"span"},{"href":"https://doi.org/10.1109/SFCS.1989.63487","style":{"fontFamily":"monospace"},"text":"https://doi.org/10.1109/ ","element":"a"},{"href":"https://doi.org/10.1109/SFCS.1989.63487","style":{"fontFamily":"monospace"},"text":"SFCS.1989.63487","element":"a"},{"text":".","element":"span"}],[{"id":"id-0","text":"Nick Littlestone. Learning quickly when irrelevant attributes abound: A new linear-threshold algorithm. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Machine Learning","element":"span"},{"text":", 2(4):285–318, 1987.","element":"span"}],[{"id":"id-49","text":"Frank McSherry and Kunal Talwar. Mechanism design via differential privacy. In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the 48th Annual IEEE Symposium on Foundations of Computer Science","element":"span"},{"text":", FOCS ’07, pages 94–103, Washington, DC, USA, 2007. IEEE Computer Society.","element":"span"}],[{"id":"id-36","text":"Kobbi Nissim and Uri Stemmer. Personal communication, 2017.","element":"span"}],[{"id":"id-56","text":"F. P. Ramsey. On a problem of formal logic. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the London Mathematical Society","element":"span"},{"text":", s2-30 (1):264–286, 1930. doi: 10.1112/plms/s2-30.1.264. URL ","element":"span"},{"href":"https://londmathsoc.onlinelibrary.wiley.com/doi/abs/10.1112/plms/s2-30.1.264","style":{"fontFamily":"monospace"},"text":"https://londmathsoc.onlinelibrary. ","element":"a"},{"href":"https://londmathsoc.onlinelibrary.wiley.com/doi/abs/10.1112/plms/s2-30.1.264","style":{"fontFamily":"monospace"},"text":"wiley.com/doi/abs/10.1112/plms/s2-30.1.264","element":"a"},{"text":".","element":"span"}],[{"id":"id-34","text":"Ryan Rogers, Aaron Roth, Adam Smith, and Om Thakkar. Max-information, differential privacy, and post- ","element":"span"},{"text":"selection hypothesis testing. In ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Proceedings of the 57th Annual IEEE Symposium on Foundations of Computer Science","element":"span"},{"text":", FOCS ’16, pages 487–494, Washington, DC, USA, 2016. IEEE Computer Society.","element":"span"}],[{"text":"F. Rosenblatt. The perceptron: A probabilistic model for information storage and organization in the brain. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Psychological Review","element":"span"},{"text":", 65(6):386–408, 1958. ISSN 0033-295X. doi: 10.1037/h0042519. URL ","element":"span"},{"href":"http://dx.doi.org/10.1037/h0042519","style":{"fontFamily":"monospace"},"text":"http: ","element":"a"},{"href":"http://dx.doi.org/10.1037/h0042519","style":{"fontFamily":"monospace"},"text":"//dx.doi.org/10.1037/h0042519","element":"a"},{"text":".","element":"span"}],[{"id":"id-23","text":"N. Sauer. On the density of families of sets. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"J. Comb. Theory, Ser. A","element":"span"},{"text":", 13:145–147, 1972. ISSN 0097-3165. doi: 10.1016/0097-3165(72)90019-2.","element":"span"}],[{"id":"id-19","text":"Saharon. Shelah. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Classification theory and the number of non-isomorphic models","element":"span"},{"text":". North-Holland Pub. Co. ; sole distributors for the U.S.A. and Canada, Elsevier/North-Holland Amsterdam ; New York : New York, 1978. ISBN 0720407575.","element":"span"}],[{"id":"id-42","text":"Leslie G. Valiant. A theory of the learnable. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Communications of the ACM","element":"span"},{"text":", 27(11):1134–1142, 1984.","element":"span"}],[{"id":"id-43","text":"V.N. Vapnik and A.Ya. Chervonenkis. On the uniform convergence of relative frequencies of events to ","element":"span"},{"text":"their probabilities. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Theory Probab. Appl.","element":"span"},{"text":", 16:264–280, 1971. ","element":"span"},{"text":"ISSN 0040-585X; 1095-7219/e. ","element":"span"},{"text":"doi: 10.1137/1116025.","element":"span"}]]}],"_version":"3.3.4"},"paperNode":"$1b:props:children:props:children:0:props:product"}]]]}]}]