1b:["$","$L29",null,{"isWhiteLabelled":false,"children":["$","$Lb",null,{"pt":{"compact":0,"expanded":3},"children":[["$","$L2a",null,{"noStar":true,"publisher":true,"task":true,"params":true,"size":"xl","product":{"id":"eyJwYXBlcklEIjoiMjAwMS4wNjQ4NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","updated":"2020-07-12T08:05:48.000Z","paperID":"2001.06485","published":"2020-01-17T10:44:36.000Z","authors":"[\"Boris Ndjia Njike\",\"Xavier Siebert\"]","title":"K-NN active learning under local smoothness assumption","scoreTrending":null,"summary":"There is a large body of work on convergence rates either in passive or\nactive learning. Here we first outline some of the main results that have been\nobtained, more specifically in a nonparametric setting under assumptions about\nthe smoothness of the regression function (or the boundary between classes) and\nthe margin noise. We discuss the relative merits of these underlying\nassumptions by putting active learning in perspective with recent work on\npassive learning. We design an active learning algorithm with a rate of\nconvergence better than in passive learning, using a particular smoothness\nassumption customized for k-nearest neighbors. Unlike previous active learning\nalgorithms, we use a smoothness assumption that provides a dependence on the\nmarginal distribution of the instance space. Additionally, our algorithm avoids\nthe strong density assumption that supposes the existence of the density\nfunction of the marginal distribution of the instance space and is therefore\nmore generally applicable.","lastCheckedForCode":"2022-09-04T04:12:20.071Z","links":[{"id":"eyJ1cmwiOiJodHRwczovL3BhcGVyc3dpdGhjb2RlLmNvbS9wYXBlci9rLW5uLWFjdGl2ZS1sZWFybmluZy11bmRlci1sb2NhbC1zbW9vdGhuZXNzLTEifQ==","type":"pwc","url":"https://paperswithcode.com/paper/k-nn-active-learning-under-local-smoothness-1","data":null}],"reposConnection":{"edges":[{"official":null,"node":{"id":"eyJyZXBvSUQiOiIzNzEyODQyODQiLCJzb3VyY2UiOiJnaXRodWIifQ==","source":"github","repoID":"371284284","url":"https://github.com/nouchem/KALLS","title":"KALLS","language":"matlab","stars":0,"forks":0,"framework":null,"scoreTrending":null,"updated":null,"created":null,"downloads":null,"likes":null,"owner":[{"username":"nouchem","avatar":"https://avatars.githubusercontent.com/u/25878787?v=4"}]}}]},"models":[],"tags":[{"id":"eyJuYW1lIjoiYWN0aXZlIGxlYXJuaW5nIiwidHlwZSI6InRhc2sifQ==","name":"active learning","description":"In active learning, the model queries the user for labels on specific data points it finds difficult to classify. This method is used when labeled data is scarce or expensive to obtain, allowing the model to learn effectively with fewer labeled examples.","scoreTrending":null,"count":{"stars":5418,"papers":2595,"models":1879},"__typename":"Tag"}],"summaries":[],"emailsConnection":{"edges":[{"author":"boris ndjia njike","node":{"id":"eyJhZGRyZXNzIjoiYm9yaXNlZGdhci5uZGppYW5qaWtlQHVtb25zLmFjLmJlIn0=","address":"borisedgar.ndjianjike@umons.ac.be","name":null,"avatar":null,"linkedin":null,"bio":null,"site":null,"override":null,"membership":[],"paper":[{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}}],"github":[],"scholar":[],"twitter":[],"location":[],"owner":[{"id":"eyJ1aWQiOiJmNWQ1ODYxNi05ODM1LTQ2MzMtYjFiMy04NWQxYWQ2MjJmM2IifQ==","name":"boris ndjia njike","github":[],"email":[],"authored":[{"id":"eyJwYXBlcklEIjoiMjEwMi4xMTA3NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2102.11077"},{"id":"eyJwYXBlcklEIjoiMTkwMi4wMzA1NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1902.03055"},{"id":"eyJwYXBlcklEIjoiMjAwMS4wNjQ4NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2001.06485"}]}]}},{"author":"xavier siebert","node":{"id":"eyJhZGRyZXNzIjoieGF2aWVyLnNpZWJlcnRAdW1vbnMuYWMuYmUifQ==","address":"xavier.siebert@umons.ac.be","name":"Xavier Siebert","avatar":null,"linkedin":null,"bio":null,"site":null,"override":null,"membership":[{"name":"UMONS"}],"paper":[{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}}],"github":[],"scholar":[{"thirdPartyID":"MF7sTY4AAAAJ"}],"twitter":[],"location":[],"owner":[{"id":"eyJ1aWQiOiI1NDc1OTMwMC0xZTgxLTQzZGQtYTdiOC1hMWJjM2E4M2I5MWUifQ==","name":"Xavier Siebert","github":[],"email":[],"authored":[{"id":"eyJwYXBlcklEIjoiMjEwMi4xMTA3NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2102.11077"},{"id":"eyJwYXBlcklEIjoiMTkwMi4wMzA1NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1902.03055"},{"id":"eyJwYXBlcklEIjoiMjAwMS4wNjQ4NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2001.06485"}]}]}}]},"__typename":"paper","authorArray":["Boris Ndjia Njike","Xavier Siebert"]}}],["$","$L18",null,{"container":true,"columns":100,"spacing":{"compact":0,"expanded":2,"large":3},"children":[["$","$L18",null,{"size":{"compact":100,"expanded":100,"large":68},"children":[["$","$7",null,{"children":["$","$L2b",null,{"publisher":"arxiv","paperID":"2001.06485","product":{"paper":"$1b:props:children:props:children:0:props:product","models":"$1b:props:children:props:children:0:props:product:models"},"isWhiteLabelled":false}]}],["$","$7",null,{"children":["$","$L2c",null,{"article":"$L2d","model":"$undefined"}]}]]}],["$","$L18",null,{"size":"grow","children":["$","$L2e",null,{}]}]]}],["$","$7",null,{"children":null}],[["$","audio",null,{"id":"tts"}],["$","$L2f",null,{"paperID":"2001.06485","publisher":"arxiv","paperJSON":{"title":"K-NN active learning under local smoothness assumption","paperID":"2001.06485","avgLineHeight":12,"imgScale":4,"sections":[{"heading":"Abstract","paragraphs":[[{"text":"$30","element":"span"}],[{"text":"Keywords ","element":"span"},{"text":"Nonparametric learning ","element":"span"},{"style":{"height":4.8},"width":11,"height":12,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/0-0.png","element":"img","alt":" ·","inline":true,"padRight":true},{"text":"active learning ","element":"span"},{"style":{"height":4.8},"width":11,"height":12,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/0-1.png","element":"img","alt":" ·","inline":true,"padRight":true},{"text":"nearest-neighbors ","element":"span"},{"style":{"height":4.8},"width":11,"height":12,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/0-2.png","element":"img","alt":" ·","inline":true,"padRight":true},{"text":"smoothness condition.","element":"span"}]]},{"heading":"1 Introduction","paragraphs":[[{"text":"Active learning is a machine learning approach for reducing the data labeling effort. Given an instance space ","element":"span"},{"text":"X ","element":"span"},{"text":"or a pool of unlabeled data ","element":"span"},{"style":{"height":16},"width":238.4,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/1-0.png","element":"img","alt":" {X1, . . . , Xw}","inline":true,"padRight":true},{"text":"provided by a distribution ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/1-1.png","element":"img","alt":" PX","inline":true},{"text":", the learner focuses its labeling effort only on the most “informative” points so that a model built from them can achieve the best possible guarantees ","element":"span"},{"href":"#id-0","text":"(Dasgupta, ","element":"a"},{"href":"#id-0","text":"2011)","element":"a"},{"text":". Such guarantees are particularly interesting when they are significantly better than those obtained in passive learning ","element":"span"},{"href":"#id-1","text":"(Hanneke and Yang, ","element":"a"},{"href":"#id-1","text":"2015)","element":"a"},{"text":". In the context of this work, we consider binary classification (where the label ","element":"span"},{"text":"Y ","element":"span"},{"text":"of ","element":"span"},{"text":"X ","element":"span"},{"text":"takes its value in ","element":"span"},{"text":"{","element":"span"},{"text":"0","element":"span"},{"text":", ","element":"span"},{"text":"1","element":"span"},{"text":"}","element":"span"},{"text":") in a nonparametric setting. Extensions to multiclass classification and adaptive algorithms are discussed at the end of this paper (Section ","element":"span"},{"text":"6)","element":"span"},{"text":".","element":"span"}],[{"text":"The nonparametric setting has the advantage of providing guarantees with many informations such as the dependence on the dimensional and distributional parameters by using some hypotheses on the regularity of the decision boundary ","element":"span"},{"href":"#id-2","text":"(Castro and Nowak, ","element":"a"},{"href":"#id-2","text":"2008)","element":"a"},{"text":", on the regression function ","element":"span"},{"href":"#id-3","text":"(Minsker, ","element":"a"},{"href":"#id-3","text":"2012; ","element":"a"},{"href":"#id-4","text":"Locatelli et al., ","element":"a"},{"href":"#id-4","text":"2017)","element":"a"},{"text":", and on the geometry of instance space (called strong density assumption) ","element":"span"},{"href":"#id-5","text":"(Audibert and Tsybakov, ","element":"a"},{"href":"#id-5","text":"2007; ","element":"a"},{"href":"#id-4","text":"Locatelli et al., ","element":"a"},{"href":"#id-4","text":"2017; ","element":"a"},{"href":"#id-3","text":"Minsker, ","element":"a"},{"href":"#id-3","text":"2012)","element":"a"},{"text":". One of the initial works on nonparametric active learning ","element":"span"},{"href":"#id-2","text":"(Castro and Nowak, ","element":"a"},{"href":"#id-2","text":"2008) ","element":"a"},{"text":"assumed that the decision boundary is the graph of a smooth function, that a margin assumption very similar to Tsybakov’s noise assumption ","element":"span"},{"href":"#id-6","text":"(Mammen and Tsybakov, ","element":"a"},{"href":"#id-6","text":"1999) ","element":"a"},{"text":"holds, and that distribution ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/1-2.png","element":"img","alt":" PX","inline":true,"padRight":true},{"text":"is uniform. This led to a better guarantee than in passive learning. Instead of the assumption on the decision boundary, other works ","element":"span"},{"href":"#id-3","text":"(Minsker, ","element":"a"},{"href":"#id-3","text":"2012; ","element":"a"},{"href":"#id-4","text":"Locatelli et al., ","element":"a"},{"href":"#id-4","text":"2017) ","element":"a"},{"text":"supposed rather that the regression function is smooth (in some sense). This assumption, along with Tsybakov’s noise assumption and the strong density assumption also gave a better guarantee than in passive learning. Moreover, unlike in ","element":"span"},{"href":"#id-2","text":"(Castro and Nowak, ","element":"a"},{"href":"#id-2","text":"2008)","element":"a"},{"text":", they provided algorithms that are adaptive with respect to the margin’s noise and to the smoothness par","element":"span"},{"href":"#id-7","text":"ameters.","element":"a"}],[{"text":"However, recent work ","element":"span"},{"href":"#id-7","text":"(Chaudhuri and Dasgupta, ","element":"a"},{"href":"#id-7","text":"2014) ","element":"a"},{"text":"pointed out some disadvantages of the preceding smoothness assumption, and extended it in the context of passive learning with ","element":"span"},{"text":"k","element":"span"},{"text":"-nearest neighbors (","element":"span"},{"text":"k","element":"span"},{"text":"-NN) by using a more general smoothness assumption that is able to sharply characterize the rate of convergence for all probability distributio","element":"span"},{"href":"#id-7","text":"ns that satisfy it.","element":"a"}],[{"text":"In this paper, we thus extend the work of ","element":"span"},{"href":"#id-7","text":"(Chaudhuri and Dasgupta, ","element":"a"},{"href":"#id-7","text":"2014) ","element":"a"},{"text":"to the active learning setting, and provide a novel algorithm that outputs a clas-sifier with the same rate of convergence as other recent algorithms with more restrictive hypotheses, as for example ","element":"span"},{"href":"#id-3","text":"(Minsker, ","element":"a"},{"href":"#id-3","text":"2012; ","element":"a"},{"href":"#id-4","text":"Locatelli et al., ","element":"a"},{"href":"#id-4","text":"2017)","element":"a"},{"text":". Section ","element":"span"},{"text":"2 ","element":"span"},{"text":"introduces general definitions, Section ","element":"span"},{"text":"3 ","element":"span"},{"text":"presents previous work on convergence rates in active and passive non-parametric learning, with a special emphasis on the assumptions related to our work. Section ","element":"span"},{"href":"#id-8","text":"4 ","element":"a"},{"text":"provides an outline of our algorithm while Section ","element":"span"},{"text":"5 ","element":"span"},{"text":"describes its theoretical motivations and Section ","element":"span"},{"text":"6 ","element":"span"},{"text":"contains the conclusion and some perspectives for future work.","element":"span"}]]},{"heading":"2 Preliminaries","paragraphs":[[{"text":"We begin with some general definitions and notations about active learning in binary classification, then recall the concept of ","element":"span"},{"text":"k","element":"span"},{"text":"-NN classifiers. Finally, the main assumptions that are used in nonparametric active learning are explained.","element":"span"}],[{"id":"id-32","text":"2.1 Active learning setting","element":"span"}],[{"text":"Let (","element":"span"},{"style":{"height":14},"width":72.84,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-0.png","element":"img","alt":"X, ρ","inline":true},{"text":") be a metric space. In this paper we set ","element":"span"},{"style":{"height":14.16},"width":137.96,"height":35.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-1.png","element":"img","alt":" X ⊂ Rd","inline":true,"padRight":true},{"text":"and refer to it as the instance space, and take ","element":"span"},{"style":{"height":10},"width":21,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-2.png","element":"img","alt":" ρ","inline":true,"padRight":true},{"text":"as the Euclidean metric. Let ","element":"span"},{"text":"Y ","element":"span"},{"text":"= ","element":"span"},{"text":"{","element":"span"},{"text":"0","element":"span"},{"text":", ","element":"span"},{"text":"1","element":"span"},{"text":"} ","element":"span"},{"text":"the label space. We assume that the pairs (","element":"span"},{"text":"X, Y ","element":"span"},{"text":") are random variables distributed according to an unknown probability ","element":"span"},{"text":"P ","element":"span"},{"text":"over ","element":"span"},{"style":{"height":12.8},"width":116.36,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-3.png","element":"img","alt":" X × Y","inline":true},{"text":". Let us denote ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-4.png","element":"img","alt":" PX","inline":true,"padRight":true},{"text":"the marginal distribution of ","element":"span"},{"text":"P ","element":"span"},{"text":"over ","element":"span"},{"text":"X","element":"span"},{"text":".","element":"span"}],[{"text":"Given ","element":"span"},{"style":{"height":11.6},"width":114.92,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-5.png","element":"img","alt":" w ∈ N","inline":true,"padRight":true},{"text":"and an i.i.d. sample (","element":"span"},{"style":{"height":16},"width":351.8,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-6.png","element":"img","alt":"X1, Y1), . . . , (Xw, Yw","inline":true},{"text":") drawn according to probability ","element":"span"},{"text":"P","element":"span"},{"text":", the learning problem consists in minimizing the risk ","element":"span"},{"text":"R","element":"span"},{"text":"(","element":"span"},{"text":"f","element":"span"},{"text":") = ","element":"span"},{"style":{"height":16},"width":205.88,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-7.png","element":"img","alt":"P(Y ̸= f(X","inline":true},{"text":")) over all measurable functions, called classifiers, ","element":"span"},{"style":{"height":14},"width":182.12,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-8.png","element":"img","alt":" f : X → Y","inline":true},{"text":".","element":"span"}],[{"text":"In active learning, the labels are not available from the beginning but we can request iteratively at a certain cost (to a so-called oracle) a given number ","element":"span"},{"text":"n ","element":"span"},{"text":"of samples, called the budget (","element":"span"},{"style":{"height":12.8},"width":118.76,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-9.png","element":"img","alt":"n ≤ w","inline":true},{"text":"). In passive learning, all labels are available and ","element":"span"},{"text":"n ","element":"span"},{"text":"= ","element":"span"},{"text":"w","element":"span"},{"text":". At any time, we choose to request the label of a point ","element":"span"},{"text":"X ","element":"span"},{"text":"according to the previous observations. The point ","element":"span"},{"text":"X ","element":"span"},{"text":"is chosen to be most “informative”, which amounts to belonging to a region where classification is difficult and requires more labeled data to be collected. Therefore, the goal of active learning is to design a sampling strategy that outputs a classifier ","element":"span"},{"style":{"height":15.5},"width":71.96,"height":38.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-10.png","element":"img","alt":"�fn,w","inline":true,"padRight":true},{"text":"whose excess risk (see below) is as small as possible with high probability over the requested samples, as reviewed in ","element":"span"},{"href":"#id-0","text":"(Dasgupta, ","element":"a"},{"href":"#id-0","text":"2011; ","element":"a"},{"href":"#id-1","text":"Hanneke and Yang, ","element":"a"},{"href":"#id-1","text":"2015; ","element":"a"},{"href":"#id-9","text":"Dasgupta, ","element":"a"},{"href":"#id-9","text":"2017)","element":"a"},{"text":".","element":"span"}],[{"text":"Given ","element":"span"},{"text":"x ","element":"span"},{"text":"in ","element":"span"},{"text":"X","element":"span"},{"text":", let us introduce the regression function ","element":"span"},{"style":{"height":16},"width":59.48,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-11.png","element":"img","alt":" η(x","inline":true},{"text":") = ","element":"span"},{"text":"E","element":"span"},{"text":"(","element":"span"},{"text":"Y ","element":"span"},{"text":"|","element":"span"},{"text":"X ","element":"span"},{"text":"= ","element":"span"},{"text":"x","element":"span"},{"text":") = ","element":"span"},{"text":"P","element":"span"},{"text":"(","element":"span"},{"text":"Y ","element":"span"},{"text":"= 1","element":"span"},{"text":"| ","element":"span"},{"text":"X ","element":"span"},{"text":"= ","element":"span"},{"text":"x","element":"span"},{"text":"). It is easy to show ","element":"span"},{"href":"#id-10","text":"(Lugosi, ","element":"a"},{"href":"#id-10","text":"2002) ","element":"a"},{"text":"that the function ","element":"span"},{"style":{"height":16},"width":80.12,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-12.png","element":"img","alt":" f ∗(x","inline":true},{"text":") = ","element":"span"},{"style":{"height":16.48},"width":158.08,"height":41.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-13.png","element":"img","alt":"1η(x)≥1/2","inline":true,"padRight":true},{"text":"achieves the minimum risk and that ","element":"span"},{"style":{"height":16},"width":88.96,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-14.png","element":"img","alt":" R(f ∗","inline":true},{"text":") = ","element":"span"},{"style":{"height":13.1},"width":53.4,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-15.png","element":"img","alt":" EX","inline":true},{"text":"(min(","element":"span"},{"style":{"height":16},"width":169.24,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-16.png","element":"img","alt":"η(X), 1 −","inline":true},{"style":{"height":16},"width":71.48,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-17.png","element":"img","alt":"η(X","inline":true},{"text":"))). Because ","element":"span"},{"text":"P ","element":"span"},{"text":"is unknown, the function ","element":"span"},{"style":{"height":14.16},"width":40,"height":35.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-18.png","element":"img","alt":" f ∗","inline":true,"padRight":true},{"text":"is unreachable and thus the aim of a learning algorithm is to return a classifier ","element":"span"},{"style":{"height":15.51},"width":71.96,"height":38.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-19.png","element":"img","alt":"�fn,w","inline":true,"padRight":true},{"text":"with minimum excess risk ","element":"span"},{"text":"R","element":"span"},{"text":"( ","element":"span"},{"style":{"height":16.7},"width":218.08,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-20.png","element":"img","alt":"�fn,w)−R(f ∗","inline":true},{"text":") with high probability over the sample (","element":"span"},{"style":{"height":16},"width":351.8,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-21.png","element":"img","alt":"X1, Y1), . . . , (Xw, Yw","inline":true},{"text":").","element":"span"}],[{"text":"2.2 ","element":"span"},{"text":"k","element":"span"},{"text":"-Nearest Neighbors (","element":"span"},{"text":"k","element":"span"},{"text":"-NN) classifier","element":"span"}],[{"text":"Given two integers ","element":"span"},{"text":"k, n ","element":"span"},{"text":"such that ","element":"span"},{"text":"k < n","element":"span"},{"text":", and a test point ","element":"span"},{"style":{"height":11.6},"width":123.72,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-22.png","element":"img","alt":" X ∈ X","inline":true},{"text":", the ","element":"span"},{"text":"k","element":"span"},{"text":"-NN classifier predicts the label of ","element":"span"},{"text":"X ","element":"span"},{"text":"by giving the majority vote of its ","element":"span"},{"text":"k ","element":"span"},{"text":"nearest neighbors amongst the sample ","element":"span"},{"style":{"height":14},"width":192.8,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-23.png","element":"img","alt":" X1, . . . , Xn","inline":true},{"text":". For ","element":"span"},{"text":"k ","element":"span"},{"text":"= 1, the ","element":"span"},{"text":"k","element":"span"},{"text":"-NN classifier returns the label of the nearest neighbor of ","element":"span"},{"text":"X ","element":"span"},{"text":"amongst the sample ","element":"span"},{"style":{"height":14},"width":192.32,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-24.png","element":"img","alt":" X1, . . . , Xn","inline":true},{"text":". If ","element":"span"},{"text":"k ","element":"span"},{"text":"is allowed to grow with ","element":"span"},{"text":"n","element":"span"},{"text":", the method is called ","element":"span"},{"style":{"height":13.1},"width":40.64,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/2-25.png","element":"img","alt":" kn","inline":true},{"text":"-NN. For a complete discussion of nearest neighbors classification, see for example ","element":"span"},{"href":"#id-11","text":"(Biau and Devroye, ","element":"a"},{"href":"#id-11","text":"2015; ","element":"a"},{"href":"#id-12","text":"Shalev-Shwartz and Ben-David, ","element":"a"},{"href":"#id-12","text":"2014; ","element":"a"},{"href":"#id-7","text":"Chaudhuri and Dasgupta, ","element":"a"},{"href":"#id-7","text":"2014)","element":"a"},{"text":".","element":"span"}],[{"id":"id-59","text":"2.3 Regularity, noise and strong density assumptions","element":"span"}],[{"text":"Let ","element":"span"},{"text":"B","element":"span"},{"text":"(","element":"span"},{"text":"x, r","element":"span"},{"text":") = ","element":"span"},{"style":{"height":16},"width":404,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-0.png","element":"img","alt":" {x′ ∈ X, ρ(x, x′) < r}","inline":true,"padRight":true},{"text":"and ","element":"span"},{"text":"¯","element":"span"},{"text":"B","element":"span"},{"text":"(","element":"span"},{"text":"x, r","element":"span"},{"text":") = ","element":"span"},{"style":{"height":16},"width":404.48,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-1.png","element":"img","alt":" {x′ ∈ X, ρ(x, x′) ≤ r}","inline":true,"padRight":true},{"text":"the open and closed balls (with respect to the Euclidean metric ","element":"span"},{"style":{"height":10},"width":21,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-2.png","element":"img","alt":" ρ","inline":true},{"text":"), respectively, centered at ","element":"span"},{"style":{"height":11.6},"width":111.72,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-3.png","element":"img","alt":" x ∈ X","inline":true,"padRight":true},{"text":"with radius ","element":"span"},{"text":"r > ","element":"span"},{"text":"0. Let supp(","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-4.png","element":"img","alt":"PX","inline":true},{"text":") = ","element":"span"},{"style":{"height":16},"width":254.2,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-5.png","element":"img","alt":" {x ∈ X, ∀r >","inline":true,"padRight":true},{"text":"0","element":"span"},{"style":{"height":16},"width":331.52,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-6.png","element":"img","alt":", PX(B(x, r)) > 0}","inline":true,"padRight":true},{"text":"the support of the marginal distribution ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-7.png","element":"img","alt":" PX","inline":true},{"text":".","element":"span"}],[{"style":{"width":"48%"},"width":658,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-8.png","element":"img"}],[{"text":"Let ","element":"span"},{"style":{"height":14.4},"width":145.12,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-9.png","element":"img","alt":" η : X →","inline":true,"padRight":true},{"text":"[0","element":"span"},{"text":", ","element":"span"},{"text":"1] ","element":"span"},{"text":"be the regression function defined as ","element":"span"},{"style":{"height":16},"width":59.48,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-10.png","element":"img","alt":" η(x","inline":true},{"text":") = ","element":"span"},{"text":"P","element":"span"},{"text":"(","element":"span"},{"text":"Y ","element":"span"},{"text":"= 1","element":"span"},{"text":"|","element":"span"},{"text":"X ","element":"span"},{"text":"= ","element":"span"},{"text":"x","element":"span"},{"text":")","element":"span"},{"text":". We say that ","element":"span"},{"style":{"height":10.4},"width":20,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-11.png","element":"img","alt":" η","inline":true,"padRight":true},{"text":"is ","element":"span"},{"text":"(","element":"span"},{"style":{"height":14},"width":70.2,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-12.png","element":"img","alt":"α, L","inline":true},{"text":")","element":"span"},{"text":"-","element":"span"},{"style":{"height":11.2},"width":370.4,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-13.png","element":"img","alt":"H¨older continuous","inline":true,"padRight":true},{"text":"(0 ","element":"span"},{"style":{"height":14},"width":154.52,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-14.png","element":"img","alt":" < α ≤ 1,","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":13.2},"width":69.88,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-15.png","element":"img","alt":" L ≥","inline":true,"padRight":true},{"text":"1) ","element":"span"},{"text":"if ","element":"span"},{"style":{"height":10.8},"width":23,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-16.png","element":"img","alt":" ∀","inline":true},{"style":{"height":14},"width":156.36,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-17.png","element":"img","alt":"x, x′ ∈ X","inline":true},{"text":",","element":"span"}],[{"id":"id-15","style":{"width":"66%"},"width":901,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-18.png","element":"img"}],[{"text":"The notion of H¨older continuity ensures that the proximity between two closest (according to the metric ","element":"span"},{"style":{"height":10},"width":21,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-19.png","element":"img","alt":" ρ","inline":true},{"text":") points is reflected in a similar value of the conditional probability ","element":"span"},{"style":{"height":10.4},"width":20,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-20.png","element":"img","alt":" η","inline":true},{"text":".","element":"span"}],[{"text":"This definition remains true for a general metric space, but when ","element":"span"},{"style":{"height":10},"width":21,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-21.png","element":"img","alt":" ρ","inline":true,"padRight":true},{"text":"is the Euclidean metric, we should always have 0 ","element":"span"},{"style":{"height":12.8},"width":124.6,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-22.png","element":"img","alt":" < α ≤","inline":true,"padRight":true},{"text":"1, otherwise ","element":"span"},{"style":{"height":10.4},"width":20,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-23.png","element":"img","alt":" η","inline":true,"padRight":true},{"text":"becomes constant","element":"span"},{"href":"#id-13","referenceIndex":2,"text":"(Pugh, ","element":"a"},{"href":"#id-13","referenceIndex":2,"text":"2002)","element":"a"},{"text":".","element":"span"}],[{"text":"In most of the previous works (for example ","element":"span"},{"href":"#id-5","text":"(Audibert and Tsybakov, ","element":"a"},{"href":"#id-5","text":"2007; ","element":"a"},{"href":"#id-3","text":"Minsker, ","element":"a"},{"href":"#id-3","text":"2012; ","element":"a"},{"href":"#id-14","text":"Ga¨ıffas, ","element":"a"},{"href":"#id-14","text":"2007)","element":"a"},{"text":"), the definition ","element":"span"},{"href":"#id-15","text":"H1a ","element":"a"},{"text":"is used along with the following notion ","element":"span"},{"href":"#id-16","text":"(H1b) ","element":"a"},{"text":"for technical reasons.","element":"span"}],[{"style":{"width":"44%"},"width":600,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-24.png","element":"img"}],[{"text":"Let ","element":"span"},{"text":"P ","element":"span"},{"text":"be the probability distribution defined over ","element":"span"},{"style":{"height":12.8},"width":111.56,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-25.png","element":"img","alt":" X × Y","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-26.png","element":"img","alt":" PX","inline":true,"padRight":true},{"text":"the marginal distribution of ","element":"span"},{"text":"P ","element":"span"},{"text":"over ","element":"span"},{"text":"X","element":"span"},{"text":". We say that ","element":"span"},{"text":"P ","element":"span"},{"text":"satisfies the ","element":"span"},{"text":"strong density ","element":"span"},{"text":"assumption if there exists some constants ","element":"span"},{"style":{"height":11.1},"width":79.96,"height":27.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-27.png","element":"img","alt":" r0 >","inline":true,"padRight":true},{"text":"0","element":"span"},{"text":", ","element":"span"},{"style":{"height":11.1},"width":79,"height":27.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-28.png","element":"img","alt":" c0 >","inline":true,"padRight":true},{"text":"0","element":"span"},{"text":", ","element":"span"},{"style":{"height":12},"width":125.08,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-29.png","element":"img","alt":" pmin >","inline":true,"padRight":true},{"text":"0 ","element":"span"},{"text":"such that for all ","element":"span"},{"style":{"height":9.6},"width":63.96,"height":24,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-30.png","element":"img","alt":"x ∈","inline":true,"padRight":true},{"text":"supp","element":"span"},{"text":"(","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-31.png","element":"img","alt":"PX","inline":true},{"text":")","element":"span"},{"text":":","element":"span"}],[{"id":"id-16","style":{"width":"79%"},"width":1074,"height":100,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-32.png","element":"img"}],[{"text":"where ","element":"span"},{"style":{"height":10},"width":47.16,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-33.png","element":"img","alt":" pX","inline":true,"padRight":true},{"text":"is the density function of the marginal distribution ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-34.png","element":"img","alt":" PX","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":10.8},"width":23,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-35.png","element":"img","alt":" λ","inline":true,"padRight":true},{"text":"is the Lebesgue measure.","element":"span"}],[{"text":"The strong density assumption ensures that, given a realisation ","element":"span"},{"text":"X ","element":"span"},{"text":"= ","element":"span"},{"text":"x ","element":"span"},{"text":"according to ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-36.png","element":"img","alt":" PX","inline":true},{"text":", there exists an infinite number of realisations ","element":"span"},{"style":{"height":14},"width":296.8,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-37.png","element":"img","alt":" X1 = x1, . . . , Xm","inline":true,"padRight":true},{"text":"= ","element":"span"},{"style":{"height":10},"width":117.56,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-38.png","element":"img","alt":"xm, . . .","inline":true,"padRight":true},{"text":"in a neighborhood of ","element":"span"},{"text":"x","element":"span"},{"text":".","element":"span"}],[{"text":"Sometimes, the notion of strong density is used to geometrically characterize the set where the classification is difficult ","element":"span"},{"href":"#id-4","text":"(Locatelli et al., ","element":"a"},{"href":"#id-4","text":"2017)","element":"a"},{"text":", and then combined with the following definition of ","element":"span"},{"text":"Margin noise","element":"span"},{"text":", allows to nicely control the error of classification beyond a given number of label requests.","element":"span"}],[{"style":{"width":"40%"},"width":549,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-39.png","element":"img"}],[{"text":"The probability distribution ","element":"span"},{"text":"P ","element":"span"},{"text":"satisfies the ","element":"span"},{"text":"margin noise ","element":"span"},{"text":"assumption (sometimes called ","element":"span"},{"style":{"height":14},"width":337.32,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-40.png","element":"img","alt":" Tsybakov’s noise","inline":true,"padRight":true},{"text":"assumption ","element":"span"},{"href":"#id-5","text":"(Audibert and Tsybakov, ","element":"a"},{"href":"#id-5","text":"2007)","element":"a"},{"text":") with parameter ","element":"span"},{"style":{"height":14.4},"width":68.44,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-41.png","element":"img","alt":" β ≥","inline":true,"padRight":true},{"text":"0 if for all 0 ","element":"span"},{"style":{"height":12.8},"width":103.96,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-42.png","element":"img","alt":" < ǫ ≤","inline":true,"padRight":true},{"text":"1, there is ","element":"span"},{"style":{"height":16},"width":216.6,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-43.png","element":"img","alt":" C = C(β) ∈","inline":true,"padRight":true},{"text":"[1","element":"span"},{"style":{"height":12.4},"width":88.96,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-44.png","element":"img","alt":", +∞","inline":true},{"text":"[ such that","element":"span"}],[{"id":"id-26","style":{"width":"72%"},"width":983,"height":47,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/3-45.png","element":"img"}],[{"text":"The margin noise assumption gives a bound on the probability that the label of the points in the neigborhood of a point ","element":"span"},{"text":"x ","element":"span"},{"text":"differs from the label of ","element":"span"},{"text":"x ","element":"span"},{"text":"given by the conditional probability ","element":"span"},{"style":{"height":16},"width":59.48,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-0.png","element":"img","alt":" η(x","inline":true},{"text":"). It also describes the behavior of the regression function in the vicinity of the decision boundary ","element":"span"},{"style":{"height":16},"width":59.48,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-1.png","element":"img","alt":" η(x","inline":true},{"text":") = ","element":"span"},{"style":{"height":19.31},"width":16.01,"height":48.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-2.png","element":"img","alt":"12","inline":true},{"text":". When ","element":"span"},{"style":{"height":14.4},"width":23,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-3.png","element":"img","alt":" β","inline":true,"padRight":true},{"text":"goes to ","element":"span"},{"text":"infinity, we observe a “jump” of ","element":"span"},{"style":{"height":10.4},"width":20,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-4.png","element":"img","alt":" η","inline":true,"padRight":true},{"text":"around the decision boundary, and then we obtain Massart’s noise condition ","element":"span"},{"href":"#id-17","text":"(Massart and N´ed´elec, ","element":"a"},{"href":"#id-17","text":"2006)","element":"a"},{"text":". Small values of ","element":"span"},{"style":{"height":14.4},"width":23,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-5.png","element":"img","alt":"β","inline":true,"padRight":true},{"text":"allow for ","element":"span"},{"style":{"height":10.4},"width":20,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-6.png","element":"img","alt":" η","inline":true,"padRight":true},{"text":"to “cuddle” ","element":"span"},{"style":{"height":19.31},"width":16,"height":48.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-7.png","element":"img","alt":"12","inline":true,"padRight":true},{"text":"when we approach the decision boundary.","element":"span"}],[{"style":{"width":"40%"},"width":551,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-8.png","element":"img"}],[{"text":"Let 0 ","element":"span"},{"style":{"height":12.8},"width":111.16,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-9.png","element":"img","alt":" < α ≤","inline":true,"padRight":true},{"text":"1 and ","element":"span"},{"text":"L > ","element":"span"},{"text":"1. The regression function is (","element":"span"},{"style":{"height":16},"width":249.2,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-10.png","element":"img","alt":"α, L)-smooth","inline":true,"padRight":true},{"text":"if for all ","element":"span"},{"style":{"height":12},"width":100.92,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-11.png","element":"img","alt":"x, z ∈","inline":true,"padRight":true},{"text":"supp(","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-12.png","element":"img","alt":"PX","inline":true},{"text":") we have:","element":"span"}],[{"id":"id-18","style":{"width":"74%"},"width":1006,"height":48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-13.png","element":"img"}],[{"text":"where ","element":"span"},{"text":"d ","element":"span"},{"text":"is the dimension of the instance space.","element":"span"}],[{"text":"Equivalently, ","element":"span"},{"href":"#id-18","text":"(H3) ","element":"a"},{"text":"can be rewritten as:","element":"span"}],[{"style":{"width":"87%"},"width":1180,"height":73,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-14.png","element":"img"}],[{"text":"It is important to note that the (","element":"span"},{"style":{"height":14},"width":70.2,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-15.png","element":"img","alt":"α, L","inline":true},{"text":")-smooth assumption ","element":"span"},{"href":"#id-18","text":"(H3) ","element":"a"},{"text":"is more general ","element":"span"},{"id":"id-19","text":"than the H¨older continuity assumption ","element":"span"},{"href":"#id-15","text":"(H1a)","element":"a"},{"text":", as stated in Theorem ","element":"span"},{"href":"#id-19","text":"1 ","element":"a"},{"text":"below.","element":"span"}],[{"style":{"width":"60%"},"width":810,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-16.png","element":"img"}],[{"text":"Suppose that ","element":"span"},{"style":{"height":14.16},"width":139.88,"height":35.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-17.png","element":"img","alt":" X ⊂ Rd","inline":true},{"text":", that the regression function ","element":"span"},{"style":{"height":10.4},"width":20,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-18.png","element":"img","alt":" η","inline":true,"padRight":true},{"text":"is ","element":"span"},{"text":"(","element":"span"},{"style":{"height":14},"width":110.2,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-19.png","element":"img","alt":"αh, Lh","inline":true},{"text":")","element":"span"},{"text":"-H¨older continuous, and that ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-20.png","element":"img","alt":" PX","inline":true,"padRight":true},{"text":"satisfies ","element":"span"},{"href":"#id-16","text":"H1b. ","element":"a"},{"text":"Then there is a constant ","element":"span"},{"text":"L > ","element":"span"},{"text":"1 ","element":"span"},{"text":"such that for any ","element":"span"},{"style":{"height":12},"width":101.88,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-21.png","element":"img","alt":" x, z ∈","inline":true,"padRight":true},{"text":"supp(","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-22.png","element":"img","alt":"PX","inline":true},{"text":"), we have:","element":"span"}],[{"style":{"width":"75%"},"width":1019,"height":118,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-23.png","element":"img"}],[{"text":"The marginal distribution ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-24.png","element":"img","alt":" PX","inline":true,"padRight":true},{"text":"is a ","element":"span"},{"text":"doubling-probability ","element":"span"},{"text":"if there exists a constant ","element":"span"},{"style":{"height":13.1},"width":103,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-25.png","element":"img","alt":" Cdb >","inline":true,"padRight":true},{"text":"0 such that for any ","element":"span"},{"style":{"height":11.6},"width":104.52,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-26.png","element":"img","alt":" x ∈ X","inline":true},{"text":", and ","element":"span"},{"text":"r > ","element":"span"},{"text":"0, we have:","element":"span"}],[{"id":"id-23","style":{"width":"71%"},"width":961,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-27.png","element":"img"}],[{"text":"This notion was initially introduced for geometric purposes in the setting of measure theory ","element":"span"},{"href":"#id-20","text":"(Heinonen, ","element":"a"},{"href":"#id-20","text":"2012; ","element":"a"},{"href":"#id-21","referenceIndex":1,"text":"Federer, ","element":"a"},{"href":"#id-21","referenceIndex":1,"text":"2014)","element":"a"},{"text":". It helps for constructing a subcover of a metric space by also minimizing the overlap between the elements of the subcover. Doubling-probability has been used in a machine learning context, particularly ","element":"span"},{"text":"k","element":"span"},{"text":"-NN classification (or regression), where the constant ","element":"span"},{"style":{"height":13.1},"width":59.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-28.png","element":"img","alt":" Cdb","inline":true,"padRight":true},{"text":"is interpreted as the intrinsic dimension of the region where the data belong ","element":"span"},{"href":"#id-22","text":"(Kpotufe","element":"a"},{"text":", ","element":"span"},{"href":"#id-22","text":"2011)","element":"a"},{"text":". This allows to reduce considerably the complexity of the clas-sification problem and to bypass the so-called curse of dimension. Also, it is also proved ","element":"span"},{"href":"#id-22","text":"(Kpotufe, ","element":"a"},{"href":"#id-22","text":"2011) ","element":"a"},{"text":"that the notion of doubling-probability generalizes the strong density assumption ","element":"span"},{"href":"#id-16","text":"H1b. ","element":"a"},{"text":"It is thus more universal, and does not require a probability density.","element":"span"}],[{"text":"In this paper, doubling-probability is used only for geometrical purposes. It is later relaxed, so that it becomes sufficient to consider only balls ","element":"span"},{"text":"B","element":"span"},{"text":"(","element":"span"},{"text":"x, r","element":"span"},{"text":") with ","element":"span"},{"style":{"height":16},"width":177.36,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/4-29.png","element":"img","alt":" PX(B(x, r","inline":true},{"text":")) sufficiently large to satisfy the doubling-probability condition ","element":"span"},{"href":"#id-23","text":"(H4)","element":"a"},{"text":".","element":"span"}]]},{"heading":"3 Convergence rates in nonparametric active learning","paragraphs":[[{"text":"3.1 Previous work","element":"span"}],[{"text":"Active learning theory has been mostly studied during the last decades in a parametric setting, see for example ","element":"span"},{"href":"#id-24","text":"(Balcan et al., ","element":"a"},{"href":"#id-24","text":"2010; ","element":"a"},{"href":"#id-25","text":"Hanneke","element":"a"},{"text":", ","element":"span"},{"href":"#id-25","text":"2011; ","element":"a"},{"href":"#id-9","text":"Dasgupta, ","element":"a"},{"href":"#id-9","text":"2017) ","element":"a"},{"text":"and references therein. One of the pioneering works studying the achievable limits in active learning in a nonparametric setting ","element":"span"},{"href":"#id-2","text":"(Castro and Nowak, ","element":"a"},{"href":"#id-2","text":"2008) ","element":"a"},{"text":"required that the decision boundary is the graph of a H¨older continuous function with parameter ","element":"span"},{"style":{"height":6.8},"width":26,"height":17,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-0.png","element":"img","alt":" α","inline":true,"padRight":true},{"href":"#id-15","text":"(H1a)","element":"a"},{"text":". Using a notion of margin noise (with parameter ","element":"span"},{"style":{"height":14.4},"width":23,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-1.png","element":"img","alt":" β","inline":true},{"text":") very similar to ","element":"span"},{"href":"#id-26","text":"(H2)","element":"a"},{"text":", the following minimax rate was obtained:","element":"span"}],[{"style":{"width":"59%"},"width":802,"height":73,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-2.png","element":"img"}],[{"text":"where ","element":"span"},{"style":{"height":10.4},"width":22,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-3.png","element":"img","alt":" γ","inline":true,"padRight":true},{"text":"= ","element":"span"},{"style":{"height":19.31},"width":57.76,"height":48.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-4.png","element":"img","alt":"d−1α","inline":true,"padRight":true},{"text":"and ","element":"span"},{"text":"d ","element":"span"},{"text":"is the dimension of instance space (","element":"span"},{"style":{"height":13.36},"width":133.16,"height":33.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-5.png","element":"img","alt":"X = Rd","inline":true},{"text":").","element":"span"}],[{"text":"Note that this result assumes the knowledge of the smoothness and margin noise parameters, whereas an algorithm that achieves the same rate, but that adapts to these parameters was proposed recently in ","element":"span"},{"href":"#id-27","text":"(Locatelli et al., ","element":"a"},{"href":"#id-27","text":"2018)","element":"a"},{"text":".","element":"span"}],[{"text":"In passive learning, by assuming that the regression function is H¨older continuous ","element":"span"},{"href":"#id-15","text":"(H1a)","element":"a"},{"text":", along with ","element":"span"},{"href":"#id-16","text":"(H1b) ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-26","text":"(H2)","element":"a"},{"text":", the following minimax rate was established ","element":"span"},{"href":"#id-5","text":"(Audibert and Tsybakov, ","element":"a"},{"href":"#id-5","text":"2007)","element":"a"},{"text":":","element":"span"}],[{"id":"id-29","style":{"width":"59%"},"width":796,"height":74,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-6.png","element":"img"}],[{"text":"In active learning, using the same assumptions ","element":"span"},{"href":"#id-15","text":"(H1a)","element":"a"},{"text":", ","element":"span"},{"href":"#id-16","text":"(H1b) ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-26","text":"(H2)","element":"a"},{"text":", with the additional condition ","element":"span"},{"style":{"height":14.4},"width":139.08,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-7.png","element":"img","alt":" αβ < d","inline":true},{"text":", the following minimax rate was obtained ","element":"span"},{"href":"#id-4","text":"(Locatelli et al., ","element":"a"},{"href":"#id-4","text":"2017)","element":"a"}],[{"id":"id-28","style":{"width":"60%"},"width":812,"height":72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-8.png","element":"img"}],[{"text":"where ","element":"span"},{"text":"˜","element":"span"},{"text":"O ","element":"span"},{"text":"indicates that there may be additional logarithmic factors. This active learning rate given by ","element":"span"},{"href":"#id-28","text":"(3) ","element":"a"},{"text":"thus represents an improvement over the passive learning rate ","element":"span"},{"href":"#id-29","text":"(2) ","element":"a"},{"text":"that uses the same hypotheses.","element":"span"}],[{"text":"With another assumption on the regression function relating the ","element":"span"},{"style":{"height":13.1},"width":43.36,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-9.png","element":"img","alt":" L2","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":13.1},"width":59.36,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-10.png","element":"img","alt":" L∞","inline":true,"padRight":true},{"text":"approximation losses of certain piecewise constant or polynomial approximations of ","element":"span"},{"style":{"height":10.4},"width":20,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-11.png","element":"img","alt":" η","inline":true,"padRight":true},{"text":"in the vicinity of the decision boundary, the same rate ","element":"span"},{"href":"#id-28","text":"(3) ","element":"a"},{"text":"was also obtained ","element":"span"},{"href":"#id-3","text":"(Minsker, ","element":"a"},{"href":"#id-3","text":"2012)","element":"a"},{"text":".","element":"span"}],[{"text":"3.2 Link with ","element":"span"},{"text":"k","element":"span"},{"text":"-NN classifiers","element":"span"}],[{"text":"For practicals applications, an interesting question is whether ","element":"span"},{"text":"k","element":"span"},{"text":"-NN classifiers attain the rate given by ","element":"span"},{"href":"#id-29","text":"(2) ","element":"a"},{"text":"in passive learning and by ","element":"span"},{"href":"#id-28","text":"(3) ","element":"a"},{"text":"in active learning. In passive learning, under assumptions ","element":"span"},{"href":"#id-15","text":"(H1a)","element":"a"},{"text":", ","element":"span"},{"href":"#id-16","text":"(H1b) ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-26","text":"(H2)","element":"a"},{"text":", and for suitable ","element":"span"},{"style":{"height":13.1},"width":40.64,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-12.png","element":"img","alt":" kn","inline":true},{"text":", it was shown in ","element":"span"},{"href":"#id-7","text":"(Chaudhuri and Dasgupta, ","element":"a"},{"href":"#id-7","text":"2014) ","element":"a"},{"text":"that ","element":"span"},{"style":{"height":13.1},"width":40.64,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/5-13.png","element":"img","alt":" kn","inline":true},{"text":"-NN indeed achieves the rate ","element":"span"},{"href":"#id-29","text":"(2)","element":"a"},{"text":".","element":"span"}],[{"text":"In active learning a pool-based algorithm that outputs a ","element":"span"},{"text":"k","element":"span"},{"text":"-NN classifier has been proposed in ","element":"span"},{"href":"#id-30","text":"(Kontorovich et al., ","element":"a"},{"href":"#id-30","text":"2016)","element":"a"},{"text":", but its assumptions differ from ours in terms of smoothness and noise. Similarly, the algorithm proposed in ","element":"span"},{"href":"#id-31","text":"(Hanneke) ","element":"a"},{"text":"outputs a 1-NN classifier based on a subsample of a given pool of data, such that the label of each instance of this subsample is determined with high probability by the labels of its neighbors within the pool. The number of neighbors is adaptively chosen for each instance in the subsample, leading to the minimax rate ","element":"span"},{"href":"#id-28","text":"(3) ","element":"a"},{"text":"under the same assumptions as in ","element":"span"},{"href":"#id-4","text":"(Locatelli et al., ","element":"a"},{"href":"#id-4","text":"2017)","element":"a"},{"text":".","element":"span"}],[{"text":"To obtain more general results on the rate of convergence for ","element":"span"},{"text":"k","element":"span"},{"text":"-NN classifiers in metric spaces under minimal assumptions, the more general smoothness assumption given by ","element":"span"},{"href":"#id-18","text":"(H3) ","element":"a"},{"text":"was used in ","element":"span"},{"href":"#id-7","text":"(Chaudhuri and Dasgupta, ","element":"a"},{"href":"#id-7","text":"2014)","element":"a"},{"text":". By using a ","element":"span"},{"text":"k","element":"span"},{"text":"-NN algorithm, and under assumptions ","element":"span"},{"href":"#id-26","text":"(H2) ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-18","text":"(H3)","element":"a"},{"text":", the rate of convergence obtained in ","element":"span"},{"href":"#id-7","text":"(Chaudhuri and Dasgupta, ","element":"a"},{"href":"#id-7","text":"2014) ","element":"a"},{"text":"is also of the order of ","element":"span"},{"href":"#id-29","text":"(2)","element":"a"},{"text":". Additionally, using assumption ","element":"span"},{"href":"#id-18","text":"(H3) ","element":"a"},{"text":"instead of ","element":"span"},{"href":"#id-15","text":"(H1a) ","element":"a"},{"text":"removes the need for the strong density assumption ","element":"span"},{"href":"#id-16","text":"(H1b)","element":"a"},{"text":", which therefore allows for more probability classes.","element":"span"}],[{"text":"3.3 Contributions of the current work","element":"span"}],[{"text":"In this work, we will use the assumptions that were used in the context of passive learning in ","element":"span"},{"href":"#id-7","text":"(Chaudhuri and Dasgupta, ","element":"a"},{"href":"#id-7","text":"2014)","element":"a"},{"text":", and show that is is possible to use them in active learning as well.","element":"span"}],[{"text":"For the sake of clarity, let us restate here these assumptions that will be used throughout this paper. We assume that the assumptions ","element":"span"},{"href":"#id-18","text":"(H3)","element":"a"},{"text":", ","element":"span"},{"href":"#id-26","text":"(H2)","element":"a"},{"text":", ","element":"span"},{"href":"#id-23","text":"(H4) ","element":"a"},{"text":"simultaneously hold respectively with parameters (","element":"span"},{"style":{"height":14},"width":70.2,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/6-0.png","element":"img","alt":"α, L","inline":true},{"text":"), (","element":"span"},{"style":{"height":16},"width":172.88,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/6-1.png","element":"img","alt":"β, C), Cdb","inline":true},{"text":".","element":"span"}],[{"text":"In this paper, we provide an active learning algorithm under assumptions ","element":"span"},{"href":"#id-18","text":"(H3)","element":"a"},{"text":", ","element":"span"},{"href":"#id-26","text":"(H2) ","element":"a"},{"text":"that were used in passive learning in ","element":"span"},{"href":"#id-7","text":"(Chaudhuri and Dasgupta, ","element":"a"},{"href":"#id-7","text":"2014)","element":"a"},{"text":". We additionally assume that the underlying marginal probability ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/6-2.png","element":"img","alt":" PX","inline":true,"padRight":true},{"text":"satisfies ","element":"span"},{"href":"#id-23","text":"(H4) ","element":"a"},{"text":"mostly for geometrical convenience. Our algorithm has several advantages:","element":"span"}],[{"style":{"width":"98%"},"width":1325,"height":564,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/6-3.png","element":"img"}],[{"text":"• ","element":"span"},{"text":"According to the assumption ","element":"span"},{"href":"#id-26","text":"(H2)","element":"a"},{"text":", as we will see, our algorithm also (as in ","element":"span"},{"href":"#id-3","text":"(Minsker, ","element":"a"},{"href":"#id-3","text":"2012)","element":"a"},{"text":") covers the most interesting case where the regression function is allowed to cross the boundary decision ","element":"span"},{"style":{"height":16},"width":131,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-0.png","element":"img","alt":" {x, η(x","inline":true},{"text":") = ","element":"span"},{"style":{"height":7.6},"width":16,"height":19,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-1.png","element":"img","alt":"1","inline":true}],[{"id":"id-8","style":{"width":"99%"},"width":1346,"height":313,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-2.png","element":"img"}],[{"id":"id-34","text":"As explained in Section ","element":"span"},{"href":"#id-32","text":"2.1, ","element":"a"},{"text":"we consider an active learning setting with a pool of i.i.d. unlabeled examples ","element":"span"},{"style":{"height":16},"width":403.04,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-3.png","element":"img","alt":" K = {X1, X2, . . . , Xw}","inline":true},{"text":". Let ","element":"span"},{"style":{"height":12.8},"width":118.28,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-4.png","element":"img","alt":" n ≤ w","inline":true,"padRight":true},{"text":"the budget, that is the maximum number of points whose label we are allowed to query to the oracle. The objective of the algorithm is to build a 1-NN classifier, based on a labelled set ","element":"span"},{"style":{"height":13.1},"width":55.28,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-5.png","element":"img","alt":" Sac","inline":true,"padRight":true},{"text":"of carefully chosen points. This set contains a subset of most ","element":"span"},{"text":"informative ","element":"span"},{"text":"points in ","element":"span"},{"text":"K ","element":"span"},{"text":"and is called the ","element":"span"},{"text":"active set","element":"span"},{"text":". More precisely, a point ","element":"span"},{"style":{"height":13.1},"width":45.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-6.png","element":"img","alt":"Xt","inline":true,"padRight":true},{"text":"is considered ","element":"span"},{"text":"informative ","element":"span"},{"text":"if its label cannot be inferred (see below) from the previous observations ","element":"span"},{"style":{"height":13.1},"width":55.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-7.png","element":"img","alt":" Xt′","inline":true},{"text":"(with ","element":"span"},{"style":{"height":10.8},"width":95.12,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-8.png","element":"img","alt":" t′ < t","inline":true},{"text":"). The set ","element":"span"},{"style":{"height":13.1},"width":55.28,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-9.png","element":"img","alt":" Sac","inline":true,"padRight":true},{"text":"starts with ","element":"span"},{"style":{"height":14.64},"width":167.2,"height":36.6,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-10.png","element":"img","alt":" Xt1 = X1","inline":true,"padRight":true},{"text":"chosen arbitrarily in ","element":"span"},{"text":"K ","element":"span"},{"text":"and stops when the budget ","element":"span"},{"text":"n ","element":"span"},{"text":"is reached or when ","element":"span"},{"style":{"height":13.1},"width":56.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-11.png","element":"img","alt":" Xw","inline":true,"padRight":true},{"text":"is attained.","element":"span"}],[{"text":"When a point ","element":"span"},{"style":{"height":13.1},"width":45.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-12.png","element":"img","alt":" Xt","inline":true,"padRight":true},{"text":"is ","element":"span"},{"text":"informative","element":"span"},{"text":", instead of requesting directly its label to the (noisy) oracle, we infer it by requesting the labels of its nearest neighbors in ","element":"span"},{"text":"K","element":"span"},{"text":", as was done in ","element":"span"},{"href":"#id-31","text":"(Hanneke)","element":"a"},{"text":". This is reasonable for practical situations where the uncertainty about the label of ","element":"span"},{"style":{"height":13.1},"width":45.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-13.png","element":"img","alt":" Xt","inline":true,"padRight":true},{"text":"has to be overcome, and it is related to the assumption ","element":"span"},{"href":"#id-18","text":"(H3)","element":"a"},{"text":". Note that it differs from the setting of ","element":"span"},{"href":"#id-27","text":"(Locatelli et al., ","element":"a"},{"href":"#id-27","text":"2018)","element":"a"},{"text":", where the label of ","element":"span"},{"style":{"height":13.1},"width":45.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-14.png","element":"img","alt":" Xt","inline":true,"padRight":true},{"text":"is requested several times. The number of neighbors ","element":"span"},{"style":{"height":13.1},"width":32.64,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-15.png","element":"img","alt":" kt","inline":true,"padRight":true},{"text":"used for inferring that label of ","element":"span"},{"style":{"height":13.1},"width":45.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-16.png","element":"img","alt":" Xt","inline":true,"padRight":true},{"text":"is determined such that, while respecting the budget, we can predict with high confidence the true label as ","element":"span"},{"style":{"height":16},"width":102.72,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-17.png","element":"img","alt":" f ∗(Xt","inline":true},{"text":") of ","element":"span"},{"style":{"height":13.1},"width":45.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-18.png","element":"img","alt":"Xt","inline":true,"padRight":true},{"text":"by the empirical mean of the labels of its ","element":"span"},{"style":{"height":13.1},"width":32.64,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-19.png","element":"img","alt":" kt","inline":true,"padRight":true},{"text":"nearest neighbors.","element":"span"}],[{"text":"The labelled active set ","element":"span"},{"style":{"height":13.1},"width":55.28,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-20.png","element":"img","alt":" Sac","inline":true,"padRight":true},{"text":"output by the algorithm will comprise only the informative points on which we have sufficient guarantees when considering the inferred label as the right label. Finally, we show that the labelled active set ","element":"span"},{"style":{"height":13.1},"width":55.28,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-21.png","element":"img","alt":"Sac","inline":true,"padRight":true},{"text":"is sufficient to predict the label of any new point with a 1-NN classification rule ","element":"span"},{"style":{"height":15.5},"width":71.96,"height":38.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-22.png","element":"img","alt":"�fn,w","inline":true},{"text":".","element":"span"}],[{"text":"4.2 Algorithm","element":"span"}],[{"text":"The ","element":"span"},{"text":"KALLS ","element":"span"},{"text":"algorithm (Algorithm ","element":"span"},{"href":"#id-33","text":"1) ","element":"a"},{"text":"aims at determining the ","element":"span"},{"text":"active set ","element":"span"},{"text":"defined in Section ","element":"span"},{"href":"#id-34","text":"4.1 ","element":"a"},{"text":"and the related 1-NN classifier ","element":"span"},{"style":{"height":15.5},"width":71.96,"height":38.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/7-23.png","element":"img","alt":"�fn,w","inline":true,"padRight":true},{"text":"under the assumption ","element":"span"},{"href":"#id-18","text":"(H3) ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-26","text":"(H2)","element":"a"},{"text":".","element":"span"}],[{"text":"Before beginning the description of ","element":"span"},{"text":"KALLS","element":"span"},{"text":", let us introduce some variables and notations, whose precise form will be justified in Section ","element":"span"},{"text":"5. ","element":"span"},{"text":"The latter contains the proof sketch of the convergence of ","element":"span"},{"text":"KALLS","element":"span"},{"text":", while the complete proofs are in Appendix ","element":"span"},{"href":"#id-35","referenceIndex":3,"text":"A.","element":"a"}],[{"text":"For ","element":"span"},{"style":{"height":14.8},"width":93.24,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-0.png","element":"img","alt":" ǫ, δ ∈","inline":true,"padRight":true},{"text":"(0","element":"span"},{"text":", ","element":"span"},{"text":"1), ","element":"span"},{"style":{"height":13.2},"width":63.64,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-1.png","element":"img","alt":" k ≥","inline":true,"padRight":true},{"text":"1, set:","element":"span"}],[{"id":"id-62","style":{"width":"85%"},"width":1149,"height":259,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-2.png","element":"img"}],[{"text":"where","element":"span"}],[{"id":"id-48","style":{"width":"73%"},"width":988,"height":77,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-3.png","element":"img"}],[{"text":"Let","element":"span"}],[{"style":{"width":"73%"},"width":992,"height":110,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-4.png","element":"img"}],[{"text":"For ","element":"span"},{"style":{"height":16},"width":435.68,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-5.png","element":"img","alt":" Xs ∈ K = {X1, . . . , Xw}","inline":true},{"text":", we denote henceforth by ","element":"span"},{"style":{"height":18.58},"width":78.24,"height":46.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-6.png","element":"img","alt":" X(k)s","inline":true,"padRight":true},{"text":"its ","element":"span"},{"text":"k","element":"span"},{"text":"-th nearest neighbor in ","element":"span"},{"text":"K","element":"span"},{"text":", and ","element":"span"},{"style":{"height":18.58},"width":73.92,"height":46.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-7.png","element":"img","alt":" Y (k)s","inline":true,"padRight":true},{"text":"the corresponding label. For an integer ","element":"span"},{"style":{"height":13.2},"width":64.12,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-8.png","element":"img","alt":" k ≥","inline":true,"padRight":true},{"text":"1, let","element":"span"}],[{"style":{"width":"79%"},"width":1077,"height":118,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-9.png","element":"img"}],[{"text":"The inputs of ","element":"span"},{"text":"KALLS ","element":"span"},{"text":"are a pool ","element":"span"},{"text":"K ","element":"span"},{"text":"of unlabelled data of size ","element":"span"},{"text":"w","element":"span"},{"text":", the budget ","element":"span"},{"text":"n","element":"span"},{"text":", the smoothness parameters (","element":"span"},{"style":{"height":14},"width":75,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-10.png","element":"img","alt":"α, L","inline":true},{"text":") from ","element":"span"},{"href":"#id-18","text":"(H3)","element":"a"},{"text":", the margin noise parameters (","element":"span"},{"style":{"height":14.4},"width":23,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-11.png","element":"img","alt":"β","inline":true},{"text":", ","element":"span"},{"text":"C","element":"span"},{"text":") from ","element":"span"},{"href":"#id-26","text":"(H2)","element":"a"},{"text":", a confidence parameter ","element":"span"},{"style":{"height":12.4},"width":61.08,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-12.png","element":"img","alt":" δ ∈","inline":true,"padRight":true},{"text":"(0","element":"span"},{"text":", ","element":"span"},{"text":"1) and an accuracy parameter ","element":"span"},{"style":{"height":9.6},"width":61.08,"height":24,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-13.png","element":"img","alt":"ǫ ∈","inline":true,"padRight":true},{"text":"(0","element":"span"},{"text":", ","element":"span"},{"text":"1). For the moment, these parameters are fixed from the beginning but adaptive algorithms such as ","element":"span"},{"href":"#id-4","text":"(Locatelli et al., ","element":"a"},{"href":"#id-4","text":"2017) ","element":"a"},{"text":"could be exploited, in particular for the ","element":"span"},{"style":{"height":6.8},"width":26,"height":17,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-14.png","element":"img","alt":" α","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":14.4},"width":23,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-15.png","element":"img","alt":" β","inline":true,"padRight":true},{"text":"parameters.","element":"span"}],[{"text":"At any given stage, the current version of the labelled active set ","element":"span"},{"style":{"height":13.1},"width":55.28,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-16.png","element":"img","alt":" Sac","inline":true,"padRight":true},{"text":"is denoted by ","element":"span"},{"style":{"height":10.8},"width":27.68,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-17.png","element":"img","alt":"�S","inline":true},{"text":". Based on ","element":"span"},{"style":{"height":13.1},"width":55.28,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-18.png","element":"img","alt":" Sac","inline":true},{"text":", with high confidence, the 1-NN classifier ","element":"span"},{"style":{"height":15.5},"width":71.96,"height":38.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-19.png","element":"img","alt":"�fn,w","inline":true,"padRight":true},{"text":"agrees with the Bayes classifier at points that lie beyond some margin ","element":"span"},{"style":{"height":13.9},"width":102.52,"height":34.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-20.png","element":"img","alt":" ∆o >","inline":true,"padRight":true},{"text":"0 of the decision boundary. Formally, given ","element":"span"},{"style":{"height":11.6},"width":115.56,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-21.png","element":"img","alt":" x ∈ X","inline":true,"padRight":true},{"text":"such that ","element":"span"},{"style":{"height":16},"width":323.68,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-22.png","element":"img","alt":" |η(x) − 1/2| > ∆0","inline":true},{"text":", we have ","element":"span"},{"style":{"height":16.7},"width":112.76,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-23.png","element":"img","alt":"�fn,w(x","inline":true},{"text":") = ","element":"span"},{"style":{"height":16.48},"width":158.08,"height":41.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-24.png","element":"img","alt":" 1η(x)≥1/2","inline":true,"padRight":true},{"text":"with high confidence. We will show in Section ","element":"span"},{"text":"5 ","element":"span"},{"text":"that, with a suitable choice of ","element":"span"},{"style":{"height":13.91},"width":49.12,"height":34.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-25.png","element":"img","alt":" ∆o","inline":true},{"text":", the assumption ","element":"span"},{"href":"#id-26","text":"(H2) ","element":"a"},{"text":"leads to the desired rate of convergence ","element":"span"},{"href":"#id-28","text":"(3)","element":"a"},{"text":".","element":"span"}],[{"id":"id-33","text":"KALLS ","element":"span"},{"text":"uses two main subroutines : ","element":"span"},{"text":"Reliable ","element":"span"},{"text":"and ","element":"span"},{"text":"ConfidentLabel","element":"span"},{"text":", which are detailed below in Sections ","element":"span"},{"href":"#id-36","text":"4.3 ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-37","text":"4.4, ","element":"a"},{"text":"respectively.","element":"span"}],[{"id":"id-36","text":"4.3 ","element":"span"},{"text":"Reliable ","element":"span"},{"text":"subroutine","element":"span"}],[{"text":"The ","element":"span"},{"text":"Reliable ","element":"span"},{"text":"subroutine is a binary test that checks if the label of a current point ","element":"span"},{"text":"X ","element":"span"},{"text":"can be inferred with high confidence from some previously informative points before reaching ","element":"span"},{"text":"X","element":"span"},{"text":". These points are obtained via a set ","element":"span"},{"style":{"height":10.8},"width":28.16,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-26.png","element":"img","alt":"�S","inline":true,"padRight":true},{"text":"called ","element":"span"},{"text":"current active set","element":"span"},{"text":". Each element of ","element":"span"},{"style":{"height":10.8},"width":28.16,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-27.png","element":"img","alt":"�S","inline":true,"padRight":true},{"text":"can be seen as a triplet (","element":"span"},{"style":{"height":14},"width":144.24,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-28.png","element":"img","alt":"X′, �Y ′, c","inline":true},{"text":") where ","element":"span"},{"style":{"height":10.8},"width":50.48,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-29.png","element":"img","alt":" X′","inline":true,"padRight":true},{"text":"is an informative point, ","element":"span"},{"style":{"height":10.8},"width":46.16,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/8-30.png","element":"img","alt":"�Y ′","inline":true,"padRight":true},{"text":"its inferred label, and ","element":"span"},{"text":"c > ","element":"span"},{"text":"0 can be thought as","element":"span"}],[{"style":{"width":"100%"},"width":1349,"height":1251,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-0.png","element":"img"}],[{"text":"a guarantee for predicting the right label ","element":"span"},{"text":"Y ","element":"span"},{"text":"of ","element":"span"},{"style":{"height":10.8},"width":50.48,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-1.png","element":"img","alt":" X′","inline":true,"padRight":true},{"text":"as ","element":"span"},{"style":{"height":10.8},"width":46.16,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-2.png","element":"img","alt":"�Y ′","inline":true},{"text":". Formally, we have ","element":"span"},{"style":{"height":19.31},"width":349.88,"height":48.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-3.png","element":"img","alt":"O(c) ≤ |η(X′) − 12|","inline":true,"padRight":true},{"text":"when (","element":"span"},{"style":{"height":16},"width":248.96,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-4.png","element":"img","alt":"X′, �Y ′, c) ∈ �S","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":10.8},"width":50.48,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-5.png","element":"img","alt":" X′","inline":true,"padRight":true},{"text":"is relatively far from the ","element":"span"},{"text":"decision boundary. If ","element":"span"},{"style":{"height":16},"width":385.52,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-6.png","element":"img","alt":" Reliable(X, δ, α, L, �S","inline":true},{"text":") outputs ","element":"span"},{"text":"True","element":"span"},{"text":", the point ","element":"span"},{"text":"X ","element":"span"},{"text":"is not considered to be informative, and ","element":"span"},{"style":{"height":10.8},"width":27.68,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-7.png","element":"img","alt":"�S","inline":true,"padRight":true},{"text":"will not be updated. By convention, ","element":"span"},{"style":{"height":16.8},"width":379.52,"height":42,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-8.png","element":"img","alt":"Reliable(X, δ, α, L, ∅","inline":true},{"text":") always returns ","element":"span"},{"text":"False","element":"span"},{"text":".","element":"span"}],[{"text":"The inputs are the current point ","element":"span"},{"text":"X","element":"span"},{"text":", a confidence parameter ","element":"span"},{"style":{"height":11.6},"width":19,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-9.png","element":"img","alt":" δ","inline":true},{"text":", the smoothness parameters (","element":"span"},{"style":{"height":14},"width":70.2,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-10.png","element":"img","alt":"α, L","inline":true},{"text":") from ","element":"span"},{"href":"#id-18","text":"(H3)","element":"a"},{"text":", and the set ","element":"span"},{"style":{"height":10.8},"width":28.16,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-11.png","element":"img","alt":"�S","inline":true,"padRight":true},{"text":"before examining the point ","element":"span"},{"text":"X","element":"span"},{"text":".","element":"span"}],[{"text":"If ","element":"span"},{"style":{"height":19.31},"width":188.12,"height":48.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-12.png","element":"img","alt":" |η(X) − 12|","inline":true,"padRight":true},{"text":"entails the same confidence lower bound ","element":"span"},{"text":"O","element":"span"},{"text":"(","element":"span"},{"text":"c","element":"span"},{"text":") as that of some ","element":"span"},{"text":"previous informative point ","element":"span"},{"style":{"height":10.8},"width":50.48,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-13.png","element":"img","alt":" X′","inline":true,"padRight":true},{"text":"(with (","element":"span"},{"style":{"height":16},"width":253.28,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-14.png","element":"img","alt":"X′, �Y ′, c) ∈ �S","inline":true},{"text":"), there is a low degree of uncertainty on the label of ","element":"span"},{"text":"X","element":"span"},{"text":", and ","element":"span"},{"text":"X ","element":"span"},{"text":"is considered to be uninformative.","element":"span"}],[{"text":"Using the assumption ","element":"span"},{"href":"#id-18","text":"(H3)","element":"a"},{"text":", it suffices to have","element":"span"}],[{"id":"id-38","style":{"width":"86%"},"width":1169,"height":48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-15.png","element":"img"}],[{"text":"Because the ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-16.png","element":"img","alt":" PX","inline":true,"padRight":true},{"text":"appearing in ","element":"span"},{"href":"#id-38","text":"(9) ","element":"a"},{"text":"are unknown, it has to be replaced by an estimate. We will show that it can be estimated with arbitrary precision and ","element":"span"},{"id":"id-80","text":"confidence using only unlabelled data from ","element":"span"},{"text":"K","element":"span"},{"text":". The ","element":"span"},{"text":"Reliable ","element":"span"},{"text":"subroutine uses ","element":"span"},{"style":{"height":16},"width":292.76,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-17.png","element":"img","alt":" EstProb(X, r, ǫo,","inline":true,"padRight":true},{"text":"50","element":"span"},{"style":{"height":14.8},"width":36.76,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/9-18.png","element":"img","alt":", δ","inline":true},{"text":") (inspired from ","element":"span"},{"href":"#id-30","text":"(Kontorovich et al., ","element":"a"},{"href":"#id-30","text":"2016)","element":"a"},{"text":") as follows:","element":"span"}],[{"style":{"width":"100%"},"width":1349,"height":1744,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/10-0.png","element":"img"}],[{"text":"1. Call the subroutine ","element":"span"},{"style":{"height":16},"width":220.76,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/10-1.png","element":"img","alt":" BerEst(ǫo, δ,","inline":true,"padRight":true},{"text":"50). 2. To draw a single ","element":"span"},{"style":{"height":10},"width":31.16,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/10-2.png","element":"img","alt":" pi","inline":true,"padRight":true},{"text":"in ","element":"span"},{"style":{"height":16},"width":221.24,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/10-3.png","element":"img","alt":" BerEst(ǫo, δ,","inline":true,"padRight":true},{"text":"50), sample randomly an example ","element":"span"},{"style":{"height":13.1},"width":44.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/10-4.png","element":"img","alt":" Xi","inline":true,"padRight":true},{"text":"from ","element":"span"},{"text":"K","element":"span"},{"text":", and set ","element":"span"},{"style":{"height":16.48},"width":273.12,"height":41.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/10-5.png","element":"img","alt":" pi = 1Xi∈B(X,r)","inline":true},{"text":".","element":"span"}],[{"text":"The subroutine ","element":"span"},{"text":"BerEst ","element":"span"},{"text":"consists in estimating adaptively with high probability the expectation of a Bernoulli variable ","element":"span"},{"style":{"height":14},"width":112.72,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/10-6.png","element":"img","alt":" Z ∼ p","inline":true},{"text":". In our setting, we estimate a probability-ball, so that a realisation of ","element":"span"},{"text":"Z ","element":"span"},{"text":"can be set as ","element":"span"},{"style":{"height":10},"width":31.16,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/10-7.png","element":"img","alt":" pi","inline":true,"padRight":true},{"text":"= 1","element":"span"},{"style":{"height":11.2},"width":153.6,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/10-8.png","element":"img","alt":"Xi∈B(x,r)","inline":true},{"text":". The variables ","element":"span"},{"style":{"height":10},"width":162.88,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/10-9.png","element":"img","alt":" p1, . . . , p4","inline":true,"padRight":true},{"text":"are sampled at the beginning for theoretical analysis where we want a concentration inequality to hold for a number of samples greater than 4 (see ","element":"span"},{"href":"#id-30","text":"(Kontorovich et al., ","element":"a"},{"href":"#id-30","text":"2016; ","element":"a"},{"href":"#id-39","text":"Maurer and Pontil, ","element":"a"},{"href":"#id-39","text":"2009) ","element":"a"},{"text":"for more details).","element":"span"}],[{"text":"However, it is not dramatic if ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-0.png","element":"img","alt":" PX","inline":true,"padRight":true},{"text":"is supposed to be known by the learner. This is not a limitation, since it can be assumed that the pool ","element":"span"},{"text":"K ","element":"span"},{"text":"of data is large enough such that ","element":"span"},{"style":{"height":13.1},"width":52.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-1.png","element":"img","alt":" PX","inline":true,"padRight":true},{"text":"can be estimated to any desired accuracy.","element":"span"}],[{"id":"id-37","style":{"width":"41%"},"width":553,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-2.png","element":"img"}],[{"text":"If a point ","element":"span"},{"text":"X ","element":"span"},{"text":"is considered informative, it is introduced in the ","element":"span"},{"text":"ConfidentLabel ","element":"span"},{"text":"(Algorithm","element":"span"},{"href":"#id-40","text":"(5)","element":"a"},{"text":"), along with an integer ","element":"span"},{"style":{"height":10.8},"width":36.08,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-3.png","element":"img","alt":" k′","inline":true},{"text":", a budget parameter ","element":"span"},{"text":"t ","element":"span"},{"text":"and a confi-dence parameter ","element":"span"},{"style":{"height":11.6},"width":19,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-4.png","element":"img","alt":" δ","inline":true},{"text":". This subroutine infers with high confidence (at least 1","element":"span"},{"style":{"height":11.6},"width":55.48,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-5.png","element":"img","alt":"−δ","inline":true},{"text":") the label of ","element":"span"},{"text":"X","element":"span"},{"text":", by using the labels of its ","element":"span"},{"style":{"height":10.8},"width":36.08,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-6.png","element":"img","alt":" k′","inline":true,"padRight":true},{"text":"nearest neighbors, knowing that we can request at most ","element":"span"},{"text":"t ","element":"span"},{"text":"labels. The parameter ","element":"span"},{"style":{"height":10.8},"width":36.08,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-7.png","element":"img","alt":" k′","inline":true,"padRight":true},{"text":"is chosen such that, with high probability, the empirical majority of the ","element":"span"},{"style":{"height":10.8},"width":36.08,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-8.png","element":"img","alt":" k′","inline":true},{"text":"-NN labels differs from the majority in expectation by less than some margin, and all the ","element":"span"},{"style":{"height":10.8},"width":36.08,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-9.png","element":"img","alt":" k′","inline":true},{"text":"-NN are at most at some distance from ","element":"span"},{"text":"X","element":"span"},{"text":". The ","element":"span"},{"text":"ConfidentLabel ","element":"span"},{"text":"subroutine outputs ","element":"span"},{"style":{"height":10.8},"width":31,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-10.png","element":"img","alt":"�Y","inline":true,"padRight":true},{"text":", ","element":"span"},{"text":"Q ","element":"span"},{"text":"where ","element":"span"},{"text":"Q ","element":"span"},{"text":"represents the set of labeled nearest neighbors in the subroutine, and ","element":"span"},{"style":{"height":10.8},"width":31,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-11.png","element":"img","alt":"�Y","inline":true,"padRight":true},{"text":"represents the majority label in ","element":"span"},{"text":"Q","element":"span"},{"text":".","element":"span"}],[{"id":"id-40","style":{"width":"100%"},"width":1349,"height":743,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-12.png","element":"img"}]]},{"heading":"5 Theoretical motivations","paragraphs":[[{"text":"This Section provides the main results and theoretical motivations behind the ","element":"span"},{"text":"KALLS ","element":"span"},{"text":"algorithm. Let us recall ","element":"span"},{"style":{"height":16},"width":322.4,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/11-13.png","element":"img","alt":" K = {X1, . . . , Xw}","inline":true,"padRight":true},{"text":"is the pool of unlabeled data and ","element":"span"},{"text":"n ","element":"span"},{"text":"is the budget.","element":"span"}],[{"text":"Let us denote by ","element":"span"},{"style":{"height":16.3},"width":81.56,"height":40.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-0.png","element":"img","alt":" Aa,w","inline":true,"padRight":true},{"text":"the set of active learning algorithms on ","element":"span"},{"text":"K","element":"span"},{"text":", and ","element":"span"},{"style":{"height":16},"width":112.76,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-1.png","element":"img","alt":" P(α, β","inline":true},{"text":") the set of probabilities that satisfy assumption ","element":"span"},{"href":"#id-18","text":"(H3) ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-26","text":"(H2)","element":"a"},{"text":".","element":"span"}],[{"text":"Additionally, let us introduce the set of probabilities ","element":"span"},{"style":{"height":16},"width":123.32,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-2.png","element":"img","alt":" P′(α, β","inline":true},{"text":") on ","element":"span"},{"style":{"height":12.8},"width":116.36,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-3.png","element":"img","alt":" X × Y","inline":true},{"text":". A probability ","element":"span"},{"style":{"height":16},"width":209.24,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-4.png","element":"img","alt":" P ∈ P′(α, β","inline":true},{"text":") if ","element":"span"},{"style":{"height":16},"width":198.2,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-5.png","element":"img","alt":" P ∈ P(α, β","inline":true},{"text":") and its marginal probability ","element":"span"},{"style":{"height":13.1},"width":52.45,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-6.png","element":"img","alt":" PX","inline":true,"padRight":true},{"text":"is a doubling-probability. For ","element":"span"},{"style":{"height":16.3},"width":162.68,"height":40.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-7.png","element":"img","alt":" A ∈ Aa,w","inline":true},{"text":", we denote by ","element":"span"},{"style":{"height":15.5},"width":105.56,"height":38.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-8.png","element":"img","alt":"�fA,n,w","inline":true,"padRight":true},{"text":":= ","element":"span"},{"style":{"height":15.5},"width":71.96,"height":38.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-9.png","element":"img","alt":"�fn,w","inline":true,"padRight":true},{"text":"the classifier that is provided by ","element":"span"},{"text":"A","element":"span"},{"text":".","element":"span"}],[{"text":"Theorem ","element":"span"},{"href":"#id-41","text":"2 ","element":"a"},{"text":"and its equivalent form in Theorem ","element":"span"},{"href":"#id-42","text":"3 ","element":"a"},{"text":"are the main results of this paper. They provide bounds on the excess risk for the ","element":"span"},{"text":"KALLS ","element":"span"},{"text":"algorithm in terms of the set ","element":"span"},{"style":{"height":16},"width":123.8,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-10.png","element":"img","alt":" P′(α, β","inline":true},{"text":"). The main idea of the proof is sketched in Section ","element":"span"},{"href":"#id-43","text":"5.2, ","element":"a"},{"text":"while a detailed proof can be found in Appendix ","element":"span"},{"href":"#id-35","referenceIndex":3,"text":"A.","element":"a"}],[{"id":"id-41","text":"5.1 Main results","element":"span"}],[{"style":{"width":"72%"},"width":982,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-11.png","element":"img"}],[{"text":"Let the set ","element":"span"},{"style":{"height":16},"width":123.8,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-12.png","element":"img","alt":" P′(α, β","inline":true},{"text":") ","element":"span"},{"text":"such that ","element":"span"},{"style":{"height":14.4},"width":130.92,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-13.png","element":"img","alt":" αβ < d","inline":true,"padRight":true},{"text":"where ","element":"span"},{"text":"d ","element":"span"},{"text":"is the dimension of the input space ","element":"span"},{"style":{"height":14.16},"width":133.16,"height":35.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-14.png","element":"img","alt":" X ⊂ Rd","inline":true},{"text":". For ","element":"span"},{"style":{"height":16},"width":210.2,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-15.png","element":"img","alt":" P ∈ P′(α, β","inline":true},{"text":")","element":"span"},{"text":", if ","element":"span"},{"style":{"height":15.5},"width":71.96,"height":38.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-16.png","element":"img","alt":"�fn,w","inline":true,"padRight":true},{"text":"is the ","element":"span"},{"text":"1 ","element":"span"},{"style":{"height":10.8},"width":112.32,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-17.png","element":"img","alt":" − NN","inline":true,"padRight":true},{"text":"classifier provided by ","element":"span"},{"text":"KALLS","element":"span"},{"text":", then we have:","element":"span"}],[{"id":"id-44","style":{"width":"99%"},"width":1341,"height":172,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-18.png","element":"img"}],[{"text":"The result ","element":"span"},{"href":"#id-44","text":"(10) ","element":"a"},{"text":"is also stated below (Theorem ","element":"span"},{"href":"#id-42","text":"3) ","element":"a"},{"id":"id-42","text":"in a more practical form ","element":"span"},{"text":"using label complexity. This latter form will be used in the proof.","element":"span"}],[{"id":"id-46","style":{"width":"100%"},"width":1347,"height":962,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/12-19.png","element":"img"}],[{"text":"Before proving this theorem, a couple of important remarks should be made:","element":"span"}],[{"text":"1. The rate of convergence ","element":"span"},{"href":"#id-44","text":"(10) ","element":"a"},{"text":"obtained in Theorem ","element":"span"},{"href":"#id-41","text":"2 ","element":"a"},{"text":"is an improvement over","element":"span"}],[{"style":{"width":"95%"},"width":1285,"height":422,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-0.png","element":"img"}],[{"text":"2. The rate ","element":"span"},{"href":"#id-44","text":"(10) ","element":"a"},{"text":"is also minimax. Indeed, let us introduce the set of probabilities ","element":"span"},{"text":"¯","element":"span"},{"style":{"height":16},"width":112.76,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-1.png","element":"img","alt":"P(α, β","inline":true},{"text":") that satisfy the H¨older continuous assumption ","element":"span"},{"href":"#id-15","text":"(H1a) ","element":"a"},{"text":"(with parameter ","element":"span"},{"style":{"height":6.8},"width":26,"height":17,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-2.png","element":"img","alt":" α","inline":true},{"text":"), strong density assumption ","element":"span"},{"href":"#id-16","text":"(H1b)","element":"a"},{"text":", and margin noise assumption ","element":"span"},{"href":"#id-26","text":"(H2)","element":"a"},{"text":".","element":"span"}],[{"text":"Let us assume that ","element":"span"},{"style":{"height":14.4},"width":145.8,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-3.png","element":"img","alt":" αβ < d","inline":true},{"text":". It was proven in ","element":"span"},{"href":"#id-3","text":"(Minsker, ","element":"a"},{"href":"#id-3","text":"2012) ","element":"a"},{"text":"that if ","element":"span"},{"style":{"height":16},"width":211.48,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-4.png","element":"img","alt":"supp(PX) ⊂","inline":true,"padRight":true},{"text":"[0","element":"span"},{"text":", ","element":"span"},{"text":"1]","element":"span"},{"style":{"height":7.6},"width":17,"height":19,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-5.png","element":"img","alt":"d","inline":true},{"text":", there exists a constant ","element":"span"},{"style":{"height":12.4},"width":66.04,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-6.png","element":"img","alt":" γ >","inline":true,"padRight":true},{"text":"0 such that for all ","element":"span"},{"text":"n ","element":"span"},{"text":"large enough and for any active classifier ","element":"span"},{"style":{"height":14},"width":39.68,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-7.png","element":"img","alt":"�fn","inline":true},{"text":", we have:","element":"span"}],[{"id":"id-45","style":{"width":"74%"},"width":997,"height":92,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-8.png","element":"img"}],[{"text":"Moreover, the strong density assumption implies the doubling-probability assumption ","element":"span"},{"href":"#id-22","text":"(Kpotufe","element":"a"},{"text":", ","element":"span"},{"href":"#id-22","text":"2011)","element":"a"},{"text":", and according to Theorem ","element":"span"},{"href":"#id-19","text":"1, ","element":"a"},{"text":"the lower bound obtained in ","element":"span"},{"href":"#id-45","text":"(17) ","element":"a"},{"text":"is also valid for the family of probabilities ","element":"span"},{"style":{"height":16},"width":123.8,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-9.png","element":"img","alt":" P′(α, β","inline":true},{"text":").","element":"span"}],[{"id":"id-43","style":{"width":"41%"},"width":554,"height":91,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-10.png","element":"img"}],[{"text":"For a classifier ","element":"span"},{"style":{"height":15.51},"width":71.96,"height":38.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-11.png","element":"img","alt":"�fn,w","inline":true},{"text":", it is well known","element":"span"},{"href":"#id-10","text":"(Lugosi, ","element":"a"},{"href":"#id-10","text":"2002) ","element":"a"},{"text":"that the excess of risk is:","element":"span"}],[{"id":"id-47","style":{"width":"85%"},"width":1157,"height":99,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-12.png","element":"img"}],[{"text":"We thus aim to prove that ","element":"span"},{"href":"#id-46","text":"(11) ","element":"a"},{"text":"is a sufficient condition to guarantee (with probability ","element":"span"},{"style":{"height":14},"width":115,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-13.png","element":"img","alt":" ≥ 1−δ","inline":true},{"text":"), that ","element":"span"},{"style":{"height":15.5},"width":71.96,"height":38.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-14.png","element":"img","alt":"�fn,w","inline":true,"padRight":true},{"text":"agrees with ","element":"span"},{"style":{"height":14.16},"width":40,"height":35.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-15.png","element":"img","alt":" f ∗","inline":true,"padRight":true},{"text":"on the set ","element":"span"},{"style":{"height":16},"width":387.2,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-16.png","element":"img","alt":" {x, |η(x)−1/2| > ∆o}","inline":true},{"text":", for a suitable choice of ","element":"span"},{"style":{"height":13.9},"width":92.92,"height":34.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-17.png","element":"img","alt":" ∆o >","inline":true,"padRight":true},{"text":"0.","element":"span"}],[{"text":"Introducing ","element":"span"},{"style":{"height":13.9},"width":49.12,"height":34.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-18.png","element":"img","alt":" ∆o","inline":true,"padRight":true},{"text":"in ","element":"span"},{"href":"#id-47","text":"(18) ","element":"a"},{"text":"leads to:","element":"span"}],[{"style":{"width":"80%"},"width":1079,"height":52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-19.png","element":"img"}],[{"text":"Therefore, if ","element":"span"},{"style":{"height":16.98},"width":138.4,"height":42.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-20.png","element":"img","alt":" ∆o ≤ ǫ2","inline":true,"padRight":true},{"text":"then we have immediately, ","element":"span"},{"style":{"height":16.71},"width":380.48,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-21.png","element":"img","alt":" R( �fn,w) − R(f ∗) ≤ ǫ","inline":true},{"text":". On ","element":"span"},{"text":"the other hand, if ","element":"span"},{"style":{"height":17.17},"width":131.68,"height":42.92,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-22.png","element":"img","alt":" ∆o > ǫ2","inline":true},{"text":", by hypothesis ","element":"span"},{"href":"#id-26","text":"(H2)","element":"a"},{"text":", we have ","element":"span"},{"style":{"height":16.7},"width":337.24,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-23.png","element":"img","alt":" R( �fn,w) − R(f ∗) ≤","inline":true,"padRight":true},{"text":"2","element":"span"},{"style":{"height":17.79},"width":137.24,"height":44.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-24.png","element":"img","alt":"C∆β+1o .","inline":true,"padRight":true},{"text":"In the latter case, setting ","element":"span"},{"style":{"height":24.99},"width":270.32,"height":62.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-25.png","element":"img","alt":" ∆o = � ǫ2C� 1β+1","inline":true,"padRight":true},{"text":"guarantees ","element":"span"},{"style":{"height":16.71},"width":180.76,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-26.png","element":"img","alt":" R( �fn,w) −","inline":true},{"style":{"height":16},"width":188.48,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-27.png","element":"img","alt":"R(f ∗) ≤ ǫ","inline":true},{"text":". Altogether, using for ","element":"span"},{"style":{"height":13.9},"width":49.12,"height":34.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-28.png","element":"img","alt":" ∆o","inline":true,"padRight":true},{"text":"the value ","element":"span"},{"style":{"height":11.6},"width":33,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-29.png","element":"img","alt":" ∆","inline":true,"padRight":true},{"text":"= max( ","element":"span"},{"style":{"height":24.96},"width":181.52,"height":62.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-30.png","element":"img","alt":"ǫ2,� ǫ2C� 1β+1","inline":true,"padRight":true},{"text":") guarantees ","element":"span"},{"text":"R","element":"span"},{"text":"( ","element":"span"},{"style":{"height":16.7},"width":315.2,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/13-31.png","element":"img","alt":"�fn,w) − R(f ∗) ≤ ǫ","inline":true},{"text":". This explains the expression ","element":"span"},{"href":"#id-48","text":"(6)","element":"a"},{"text":".","element":"span"}],[{"text":"We present the proof sketch of Theorem ","element":"span"},{"href":"#id-42","text":"3 ","element":"a"},{"text":"in three main steps, and refer to the corresponding Lemmas and Theorems in the Appendix ","element":"span"},{"href":"#id-35","referenceIndex":3,"text":"A ","element":"a"},{"text":"for more details.","element":"span"}],[{"style":{"width":"99%"},"width":1335,"height":394,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-0.png","element":"img"}],[{"text":"will be violated after at most ","element":"span"},{"text":"˜","element":"span"},{"style":{"height":16},"width":104.28,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-1.png","element":"img","alt":"k(ǫ, δs","inline":true},{"text":") requests, with ","element":"span"},{"text":"˜","element":"span"},{"style":{"height":16},"width":280.92,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-2.png","element":"img","alt":"k(ǫ, δs) ≤ k(ǫ, δs","inline":true},{"text":"). Also, the label inferred after ","element":"span"},{"text":"˜","element":"span"},{"style":{"height":16},"width":104.28,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-3.png","element":"img","alt":"k(ǫ, δs","inline":true},{"text":") label requests corresponds to the true label ","element":"span"},{"style":{"height":16},"width":105.72,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-4.png","element":"img","alt":" f ∗(Xs","inline":true},{"text":"). The intuition behind is to adapt the number of labels requested with respect to the noise; i.e., fewer label requests on a less noisy point (i.e., ","element":"span"},{"style":{"height":19.31},"width":304.2,"height":48.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-5.png","element":"img","alt":" |η(Xs)− 12| ≥ 12∆","inline":true},{"text":"), and more label requests on a noisy ","element":"span"},{"text":"point. This provides significant savings in the number of requests needed to predict with high probability the correct label.","element":"span"}],[{"text":"• ","element":"span"},{"text":"In the event ","element":"span"},{"style":{"height":13.9},"width":138.88,"height":34.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-6.png","element":"img","alt":" A1 ∩ A2","inline":true},{"text":", any informative point ","element":"span"},{"style":{"height":13.1},"width":48.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-7.png","element":"img","alt":" Xs","inline":true,"padRight":true},{"text":"falls in a high density region such that all the ","element":"span"},{"style":{"height":16},"width":104.28,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-8.png","element":"img","alt":" k(ǫ, δs","inline":true},{"text":") nearest neighbors of ","element":"span"},{"style":{"height":13.1},"width":48.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-9.png","element":"img","alt":" Xs","inline":true,"padRight":true},{"text":"are within at most some distance to ","element":"span"},{"style":{"height":13.1},"width":48.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-10.png","element":"img","alt":" Xs","inline":true},{"text":", and the condition ","element":"span"},{"href":"#id-46","text":"(12) ","element":"a"},{"text":"is sufficient to have ","element":"span"},{"style":{"height":16},"width":203.72,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-11.png","element":"img","alt":"k(ǫ, δs) ≤ w","inline":true},{"text":".","element":"span"}],[{"id":"id-49","style":{"width":"101%"},"width":1373,"height":781,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-12.png","element":"img"}],[{"text":"(21) In this case, let ","element":"span"},{"style":{"height":13.11},"width":58,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-13.png","element":"img","alt":" Xs′","inline":true,"padRight":true},{"text":"be such a point that satisfies ","element":"span"},{"href":"#id-49","text":"(20) ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-49","text":"(21)","element":"a"},{"text":", we can easily prove that when ","element":"span"},{"style":{"height":13.11},"width":58,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-14.png","element":"img","alt":" Xs′","inline":true,"padRight":true},{"text":"is relatively far from the boundary, i.e., ","element":"span"},{"style":{"height":19.5},"width":241.72,"height":48.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-15.png","element":"img","alt":" |η(Xs′)− 12| ≥","inline":true},{"style":{"height":8},"width":27,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-16.png","element":"img","alt":"∆","inline":true}],[{"id":"id-50","style":{"width":"94%"},"width":1278,"height":133,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/14-17.png","element":"img"}],[{"text":"and easily deduce by using the smoothness assumption, ","element":"span"},{"href":"#id-49","text":"(21) ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-50","text":"(22)","element":"a"},{"text":", that the points ","element":"span"},{"style":{"height":13.1},"width":48.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-0.png","element":"img","alt":" Xs","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":13.1},"width":58,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-1.png","element":"img","alt":" Xs′","inline":true,"padRight":true},{"text":"have the same label, then we do not need to use ","element":"span"},{"style":{"height":13.1},"width":48.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-2.png","element":"img","alt":" Xs","inline":true,"padRight":true},{"text":"in the subroutine ","element":"span"},{"text":"ConfidentLabel ","element":"span"},{"text":"(Algorithm ","element":"span"},{"href":"#id-40","text":"5) ","element":"a"},{"text":"and ","element":"span"},{"style":{"height":13.1},"width":48.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-3.png","element":"img","alt":" Xs","inline":true,"padRight":true},{"text":"is an uninformative point. In addition, ","element":"span"},{"href":"#id-46","text":"(13) ","element":"a"},{"text":"is a sufficient condition such that the number of","element":"span"}],[{"style":{"width":"99%"},"width":1337,"height":237,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-4.png","element":"img"}],[{"text":"The set ","element":"span"},{"text":"I ","element":"span"},{"text":"is introduced in ","element":"span"},{"text":"KALLS ","element":"span"},{"text":"(Algorithm","element":"span"},{"href":"#id-33","text":"(1)","element":"a"},{"text":") as the set of informative points indexes. Let ","element":"span"},{"style":{"height":9.1},"width":33.72,"height":22.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-5.png","element":"img","alt":" sI","inline":true,"padRight":true},{"text":"= max ","element":"span"},{"text":"I","element":"span"},{"text":", the index of the last informative point.","element":"span"}],[{"text":"For ","element":"span"},{"style":{"height":14.8},"width":94.2,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-6.png","element":"img","alt":" ǫ, δ ∈","inline":true,"padRight":true},{"text":"(0","element":"span"},{"style":{"height":19.31},"width":116.52,"height":48.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-7.png","element":"img","alt":", 12), ∆","inline":true,"padRight":true},{"text":"defined in ","element":"span"},{"href":"#id-48","text":"(6)","element":"a"},{"text":", and (","element":"span"},{"style":{"height":14},"width":70.2,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-8.png","element":"img","alt":"α, L","inline":true},{"text":") the smoothness parameters, ","element":"span"},{"text":"let us introduce","element":"span"}],[{"id":"id-51","style":{"width":"95%"},"width":1285,"height":533,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-9.png","element":"img"}],[{"text":"where ","element":"span"},{"style":{"height":18.39},"width":76.8,"height":45.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-10.png","element":"img","alt":" X(1)x","inline":true,"padRight":true},{"text":"is the nearest neighbor of ","element":"span"},{"text":"x ","element":"span"},{"text":"in ","element":"span"},{"style":{"height":13.1},"width":55.76,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-11.png","element":"img","alt":"�Sac","inline":true},{"text":", and ","element":"span"},{"style":{"height":15.5},"width":71.96,"height":38.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-12.png","element":"img","alt":"�fn,w","inline":true,"padRight":true},{"text":"the 1-NN classifier on ","element":"span"},{"style":{"height":13.1},"width":55.76,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-13.png","element":"img","alt":"�Sac","inline":true},{"text":". Additionally, on ","element":"span"},{"style":{"height":13.9},"width":437.92,"height":34.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-14.png","element":"img","alt":" A1 ∩ A2 ∩ A3 ∩ A4 ∩ A5","inline":true,"padRight":true},{"text":"we prove that ","element":"span"},{"href":"#id-46","text":"(11) ","element":"a"},{"text":"is sufficient to obtain ","element":"span"},{"href":"#id-51","text":"(23)","element":"a"},{"text":".","element":"span"}],[{"style":{"width":"102%"},"width":1384,"height":189,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/15-15.png","element":"img"}]]},{"heading":"6 Conclusion and future work","paragraphs":[[{"text":"In this paper we first reviewed the main results for convergence rates in a non-parametric setting for active learning, with a special emphasis on the relative merits of the assumptions about the smoothness and the margin noise. Then, by putting active learning in perspective with recent work on passive learning, we provided a novel active learning algorithm with a particular smoothness assumption customized for ","element":"span"},{"text":"k","element":"span"},{"text":"-NN.","element":"span"}],[{"text":"We showed that our algorithm has a convergence rate comparable to state-of-the art active learning algorithms, but using less restrictive assumptions.","element":"span"}],[{"text":"This removes unnecessary restrictions on the distribution that would exclude important densities (e.g., Gaussian). Additionally, our algorithm can readily be extended to multi-class classifi-cation, and then compared to recent results obtained in passive learning by ","element":"span"},{"href":"#id-52","text":"(Reeve and Brown, ","element":"a"},{"href":"#id-52","text":"2017) ","element":"a"},{"text":"which extended the work of ","element":"span"},{"href":"#id-7","text":"(Chaudhuri and Dasgupta, ","element":"a"},{"href":"#id-7","text":"2014) ","element":"a"},{"text":"to multi-class classification. Finally, an important direction for further work is to extend our results to the case where the key parameters of the problem (defining smoothness and noise) are unknown. Ongoing work in this direction builds upon previous results in an adaptive setting ","element":"span"},{"href":"#id-4","text":"(Locatelli et al., ","element":"a"},{"href":"#id-4","text":"2017)","element":"a"},{"text":", ","element":"span"},{"href":"#id-3","text":"(Minsker, ","element":"a"},{"href":"#id-3","text":"2012)","element":"a"},{"text":", ","element":"span"},{"href":"#id-53","text":"(Balcan and Hanneke, ","element":"a"},{"href":"#id-53","text":"2012)","element":"a"},{"text":", ","element":"span"},{"href":"#id-25","text":"(Hanneke, ","element":"a"},{"href":"#id-25","text":"2011)","element":"a"},{"text":".","element":"span"}]]},{"heading":"References","paragraphs":[[{"id":"id-5","text":"Jean-Yves Audibert and Alexandre B Tsybakov. Fast learning rates for plug-in ","element":"span"},{"text":"classifiers. ","element":"span"},{"text":"The Annals of statistics","element":"span"},{"text":", 35(2):608–633, 2007.","element":"span"}],[{"id":"id-53","text":"Maria Florina Balcan and Steve Hanneke. Robust interactive learning. In ","element":"span"},{"text":"Conference on Learning Theory","element":"span"},{"text":", pages 20–1, 2012.","element":"span"}],[{"id":"id-24","text":"Maria-Florina Balcan, Steve Hanneke, and Jennifer Wortman Vaughan. The ","element":"span"},{"text":"true sample complexity of active learning. ","element":"span"},{"text":"Machine learning","element":"span"},{"text":", 80(2-3):111– 139, 2010.","element":"span"}],[{"id":"id-11","text":"G´erard Biau and Luc Devroye. ","element":"span"},{"text":"Lectures on the nearest neighbor method","element":"span"},{"text":". Springer, 2015.","element":"span"}],[{"id":"id-2","text":"Rui M Castro and Robert D Nowak. Minimax bounds for active learning. ","element":"span"},{"text":"IEEE Transactions on Information Theory","element":"span"},{"text":", 54(5):2339–2353, 2008.","element":"span"}],[{"id":"id-7","text":"Kamalika Chaudhuri and Sanjoy Dasgupta. Rates of convergence for near- ","element":"span"},{"text":"est neighbor classification. In ","element":"span"},{"text":"Advances in Neural Information Processing Systems","element":"span"},{"text":", pages 3437–3445, 2014.","element":"span"}],[{"id":"id-0","text":"Sanjoy Dasgupta. Two faces of active learning. ","element":"span"},{"text":"Theoretical computer science","element":"span"},{"text":", 412(19):1767–1781, 2011.","element":"span"}],[{"id":"id-9","text":"Sanjoy Dasgupta. Active learning theory. ","element":"span"},{"text":"Encyclopedia of Machine Learning and Data Mining","element":"span"},{"text":", pages 14–19, 2017.","element":"span"}],[{"text":"Maik D¨oring, L´aszl´o Gy¨orfi, and Harro Walk. Rate of convergence of k-nearest- ","element":"span"},{"text":"neighbor classification rule. ","element":"span"},{"text":"The Journal of Machine Learning Research","element":"span"},{"text":", 18 (1):8485–8500, 2017.","element":"span"}],[{"id":"id-92","text":"Gerald A Edgar. Packing measure in general metric space. ","element":"span"},{"text":"Real Analysis Exchange","element":"span"},{"text":", 26(2):831–852, 2000.","element":"span"}],[{"id":"id-21","text":"Herbert Federer. ","element":"span"},{"text":"Geometric measure theory","element":"span"},{"text":". Springer, 2014.","element":"span"}],[{"id":"id-14","text":"St´ephane Ga¨ıffas. Sharp estimation in sup norm with random design. ","element":"span"},{"text":"Statistics & probability letters","element":"span"},{"text":", 77(8):782–794, 2007.","element":"span"}],[{"id":"id-31","text":"Steve Hanneke. ","element":"span"},{"text":"Nonparametric active learning, part 1: Smooth regression functions.","element":"span"}],[{"id":"id-25","text":"Steve Hanneke. Rates of convergence in active learning. ","element":"span"},{"text":"The Annals of Statistics","element":"span"},{"text":", 39(1):333–361, 2011.","element":"span"}],[{"id":"id-1","text":"Steve Hanneke and Liu Yang. Minimax analysis of active learning. ","element":"span"},{"text":"The Journal of Machine Learning Research","element":"span"},{"text":", 16(1):3487–3602, 2015.","element":"span"}],[{"id":"id-20","text":"Juha Heinonen. ","element":"span"},{"text":"Lectures on analysis on metric spaces","element":"span"},{"text":". Springer Science & Business Media, 2012.","element":"span"}],[{"id":"id-69","text":"Wassily Hoeffding. Probability inequalities for sums of bounded random vari- ","element":"span"},{"text":"ables. ","element":"span"},{"text":"Journal of the American Statistical Association","element":"span"},{"text":", 58(301):13–30, 1963.","element":"span"}],[{"id":"id-70","text":"Emilie Kaufmann, Olivier Capp´e, and Aur´elien Garivier. On the complexity ","element":"span"},{"text":"of best-arm identification in multi-armed bandit models. ","element":"span"},{"text":"The Journal of Machine Learning Research","element":"span"},{"text":", 17(1):1–42, 2016.","element":"span"}],[{"id":"id-30","text":"Aryeh Kontorovich, Sivan Sabato, and Ruth Urner. Active nearest-neighbor ","element":"span"},{"text":"learning in metric spaces. In ","element":"span"},{"text":"Advances in Neural Information Processing Systems","element":"span"},{"text":", pages 856–864, 2016.","element":"span"}],[{"id":"id-22","text":"Samory Kpotufe. k-nn regression adapts to local intrinsic dimension. In ","element":"span"},{"text":"Advances in Neural Information Processing Systems","element":"span"},{"text":", pages 729–737, 2011.","element":"span"}],[{"id":"id-4","text":"Andrea Locatelli, Alexandra Carpentier, and Samory Kpotufe. Adaptivity to ","element":"span"},{"text":"noise parameters in nonparametric active learning. ","element":"span"},{"text":"Proceedings of Machine Learning Research vol","element":"span"},{"text":", 65:1–34, 2017.","element":"span"}],[{"id":"id-27","text":"Andrea Locatelli, Alexandra Carpentier, and Samory Kpotufe. An adaptive ","element":"span"},{"text":"strategy for active learning with smooth decision boundary. In ","element":"span"},{"text":"Algorithmic Learning Theory","element":"span"},{"text":", pages 547–571, 2018.","element":"span"}],[{"id":"id-10","text":"G´abor Lugosi. ","element":"span"},{"text":"Pattern classification and learning theory. ","element":"span"},{"text":"In ","element":"span"},{"text":"Principles of nonparametric learning","element":"span"},{"text":", pages 1–56. Springer, 2002.","element":"span"}],[{"id":"id-6","text":"Enno Mammen and Alexandre B Tsybakov. Smooth discrimination analysis. ","element":"span"},{"text":"The Annals of Statistics","element":"span"},{"text":", 27(6):1808–1829, 1999.","element":"span"}],[{"id":"id-17","text":"Pascal Massart and ","element":"span"},{"text":"´","element":"span"},{"text":"Elodie N´ed´elec. Risk bounds for statistical learning. ","element":"span"},{"text":"The Annals of Statistics","element":"span"},{"text":", 34(5):2326–2366, 2006.","element":"span"}],[{"id":"id-39","text":"Andreas Maurer and Massimiliano Pontil. Empirical bernstein bounds and ","element":"span"},{"text":"sample variance penalization. ","element":"span"},{"text":"arXiv preprint arXiv:0907.3740","element":"span"},{"text":", 2009.","element":"span"}],[{"id":"id-3","text":"Stanislav Minsker. Plug-in approach to active learning. ","element":"span"},{"text":"Journal of Machine Learning Research","element":"span"},{"text":", 13(Jan):67–90, 2012.","element":"span"}],[{"id":"id-60","text":"Wolfgang Mulzer. Five proofs of Chernoff’s bound with applications. ","element":"span"},{"text":"arXiv preprint arXiv:1801.03365","element":"span"},{"text":", 2018.","element":"span"}],[{"id":"id-13","text":"Charles Chapman Pugh. ","element":"span"},{"text":"Real mathematical analysis","element":"span"},{"text":". Springer, 2002.","element":"span"}],[{"id":"id-52","text":"Henry WJ Reeve and Gavin Brown. Minimax rates for cost-sensitive learn- ","element":"span"},{"text":"ing on manifolds with approximate nearest neighbours. ","element":"span"},{"text":"In ","element":"span"},{"text":"International Conference on Algorithmic Learning Theory","element":"span"},{"text":", pages 11–56, 2017.","element":"span"}],[{"id":"id-12","text":"Shai Shalev-Shwartz and Shai Ben-David. ","element":"span"},{"text":"Understanding machine learning: From theory to algorithms","element":"span"},{"text":". Cambridge university press, 2014.","element":"span"}],[{"id":"id-61","text":"Mathukumalli Vidyasagar. ","element":"span"},{"text":"Learning and generalisation: with applications to neural networks","element":"span"},{"text":". Springer Science & Business Media, 2013.","element":"span"}],[{"id":"id-35","text":"A Detailed proof of Theorem ","element":"span"},{"href":"#id-42","text":"3","element":"a"}],[{"text":"This Appendix is organized as follows: in Section ","element":"span"},{"href":"#id-54","referenceIndex":11,"text":"A.1, ","element":"a"},{"text":"we introduce some additional no-","element":"span"}],[{"text":"tations. In Section ","element":"span"},{"href":"#id-55","referenceIndex":21,"text":"A.2 ","element":"a"},{"text":"we adaptively determine the number of label requests needed to","element":"span"}],[{"text":"accurately predict the label of an informative point that is relatively far from the bound-","element":"span"}],[{"text":"ary decision. In Section ","element":"span"},{"href":"#id-56","referenceIndex":78,"text":"A.4, ","element":"a"},{"text":"we provide some lemmas that give a sufficient condition for","element":"span"}],[{"text":"a point to be informative. In Section ","element":"span"},{"href":"#id-57","referenceIndex":115,"text":"A.5, ","element":"a"},{"text":"we give theorems that allow us to classify each","element":"span"}],[{"text":"instance relatively far from the decision boundary. Finally in Section ","element":"span"},{"href":"#id-58","referenceIndex":122,"text":"A.6, ","element":"a"},{"text":"we provide the","element":"span"}],[{"text":"label complexity and establish Theorem ","element":"span"},{"href":"#id-42","text":"3.","element":"a"}],[{"id":"id-54","text":"A.1 Notations","element":"span"}],[{"text":"Some notations that will be used throughout the proofs are listed here for convenience.","element":"span"}],[{"text":"As defined in Section ","element":"span"},{"href":"#id-59","text":"2.3, ","element":"a"},{"text":"let ","element":"span"},{"style":{"height":14.06},"width":878.36,"height":35.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-0.png","element":"img","alt":" B(x, r) = {x′ ∈ X, ρ(x, x′) < r} and ¯B(x, r) = {x′ ∈","inline":true}],[{"style":{"height":12.8},"width":243.56,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-1.png","element":"img","alt":"X, ρ(x, x′) ≤ r}","inline":true,"padRight":true},{"text":"the open and closed balls with respect to the Euclidean metric ","element":"span"},{"style":{"height":8.4},"width":141.08,"height":21,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-2.png","element":"img","alt":" ρ, respec-","inline":true}],[{"text":"tively, centered at ","element":"span"},{"style":{"height":9.6},"width":85.12,"height":24,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-3.png","element":"img","alt":" x ∈ X","inline":true,"padRight":true},{"text":"with radius ","element":"span"},{"text":"r > ","element":"span"},{"text":"0. Let supp(","element":"span"},{"style":{"height":12.8},"width":580.4,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-4.png","element":"img","alt":"PX) = {x ∈ X, ∀r > 0, PX(B(x, r)) >","inline":true}],[{"text":"0","element":"span"},{"text":"} ","element":"span"},{"text":"the support of the marginal distribution ","element":"span"},{"style":{"height":10.91},"width":58.44,"height":27.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-5.png","element":"img","alt":" PX.","inline":true}],[{"text":"For ","element":"span"},{"style":{"height":12.8},"width":413.28,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-6.png","element":"img","alt":" p ∈ (0, 1], and x ∈ supp(PX","inline":true},{"text":"), let us define","element":"span"}],[{"id":"id-91","style":{"width":"69%"},"width":942,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-7.png","element":"img"}],[{"text":"Let us recall for ","element":"span"},{"style":{"height":12.8},"width":371.72,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-8.png","element":"img","alt":" Xs ∈ K = {X1, . . . , Xw}","inline":true},{"text":", we denote by ","element":"span"},{"style":{"height":16.03},"width":150.48,"height":40.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-9.png","element":"img","alt":" X(k)s its k","inline":true},{"text":"-th nearest neighbor in","element":"span"}],[{"style":{"height":16.32},"width":179.52,"height":40.8,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-10.png","element":"img","alt":"K, and Y (k)s","inline":true,"padRight":true},{"text":"the corresponding label.","element":"span"}],[{"text":"For an integer ","element":"span"},{"style":{"height":11.2},"width":138.76,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-11.png","element":"img","alt":" k ≥ 1, let","inline":true}],[{"id":"id-71","style":{"width":"76%"},"width":1025,"height":97,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-12.png","element":"img"}],[{"id":"id-55","text":"A.2 Adaptive label requests on informative points","element":"span"}],[{"id":"id-64","text":"Lemma 1 (Chernoff bounds, ","element":"span"},{"href":"#id-60","text":"(Mulzer, ","element":"a"},{"href":"#id-60","text":"2018)","element":"a"},{"text":")","element":"span"}],[{"text":"Suppose ","element":"span"},{"style":{"height":11.2},"width":173.84,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-13.png","element":"img","alt":" X1, . . . , Xm","inline":true,"padRight":true},{"text":"are independent random variables taking value in ","element":"span"},{"text":"{","element":"span"},{"text":"0","element":"span"},{"text":", ","element":"span"},{"text":"1","element":"span"},{"text":"}","element":"span"},{"text":". Let ","element":"span"},{"text":"X ","element":"span"},{"text":"denote","element":"span"}],[{"text":"their sum and ","element":"span"},{"style":{"height":12.8},"width":148.84,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-14.png","element":"img","alt":" µ = E(X)","inline":true,"padRight":true},{"text":"its expected value. Then,","element":"span"}],[{"id":"id-98","style":{"width":"98%"},"width":1324,"height":243,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-15.png","element":"img"}],[{"id":"id-67","text":"Lemma 2 (Logarithmic relationship, ","element":"span"},{"href":"#id-61","text":"(Vidyasagar, ","element":"a"},{"href":"#id-61","text":"2013)","element":"a"},{"text":")","element":"span"}],[{"style":{"height":15.07},"width":839,"height":37.68,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-16.png","element":"img","alt":"Suppose a, b, c > 0, abec/a > 4 log2(e), and u ≥ 1. Then:","inline":true}],[{"style":{"width":"44%"},"width":598,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-17.png","element":"img"}],[{"id":"id-65","text":"Lemma 3 ","element":"span"},{"href":"#id-7","text":"(Chaudhuri and Dasgupta","element":"a"},{"text":", ","element":"span"},{"href":"#id-7","text":"2014","element":"a"},{"text":")","element":"span"}],[{"text":"For ","element":"span"},{"style":{"height":12.8},"width":442.6,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-18.png","element":"img","alt":" p ∈ (0, 1], and x ∈ supp(PX)","inline":true},{"text":", let us define ","element":"span"},{"style":{"height":13.14},"width":622.28,"height":32.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-19.png","element":"img","alt":" rp(x) = inf{r > 0, PX(B(x, r)) ≥ p}. For","inline":true}],[{"text":"all ","element":"span"},{"style":{"height":12.8},"width":586.52,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-20.png","element":"img","alt":" p ∈ (0, 1], and x ∈ supp(PX), we have:","inline":true}],[{"style":{"width":"21%"},"width":296,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/19-21.png","element":"img"}],[{"text":"Theorem 4","element":"span"}],[{"text":"Let ","element":"span"},{"style":{"height":26.33},"width":1287.56,"height":65.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-0.png","element":"img","alt":" ǫ, δ ∈ (0, 1). Set ∆ = max(ǫ,� ǫ2C� 1β+1 ), and pǫ =�31∆1024L�d/α, where α, L, β, C are","inline":true}],[{"text":"parameters used in ","element":"span"},{"href":"#id-26","text":"(H2) ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-18","text":"(H3)","element":"a"},{"text":".","element":"span"}],[{"text":"For ","element":"span"},{"style":{"height":12.8},"width":428.2,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-1.png","element":"img","alt":" p ∈ (0, 1], and x ∈ supp(PX)","inline":true},{"text":", let us introduce ","element":"span"},{"style":{"height":13.14},"width":599.8,"height":32.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-2.png","element":"img","alt":" rp(x) = inf{r > 0, PX(B(x, r)) ≥ p} and","inline":true}],[{"href":"#id-62","style":{"height":17.55},"width":698.84,"height":43.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-3.png","element":"img","alt":"ks := k(ǫ, δs) defined in (5) (where δs = δ32s2 ).","inline":true}],[{"style":{"width":"99%"},"width":1346,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-4.png","element":"img"}],[{"style":{"height":16.98},"width":92.28,"height":42.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-5.png","element":"img","alt":"1 −δ16 ","inline":true,"padRight":true},{"text":", such that on ","element":"span"},{"style":{"height":11.6},"width":366.44,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-6.png","element":"img","alt":" A1, for all 1 ≤ s ≤ w, if","inline":true}],[{"id":"id-63","style":{"width":"63%"},"width":857,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-7.png","element":"img"}],[{"text":"then the ","element":"span"},{"style":{"height":10.34},"width":31.76,"height":25.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-8.png","element":"img","alt":" ks","inline":true,"padRight":true},{"text":"nearest neighbors of ","element":"span"},{"style":{"height":10.34},"width":41.84,"height":25.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-9.png","element":"img","alt":" Xs","inline":true,"padRight":true},{"text":"(in the pool ","element":"span"},{"text":"K","element":"span"},{"text":") belong to the ball ","element":"span"},{"style":{"height":13.14},"width":239.96,"height":32.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-10.png","element":"img","alt":" B(Xs, rpǫ(Xs)).","inline":true}],[{"text":"Additionally, the condition","element":"span"}],[{"id":"id-66","style":{"width":"62%"},"width":841,"height":116,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-11.png","element":"img"}],[{"text":"is sufficient to have ","element":"span"},{"href":"#id-63","text":"(29)","element":"a"},{"text":".","element":"span"}],[{"text":"Proof","element":"span"}],[{"text":"Fix ","element":"span"},{"style":{"height":12.8},"width":371,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-12.png","element":"img","alt":" x ∈ supp(PX). For k ∈ N","inline":true},{"text":", let us denote ","element":"span"},{"style":{"height":16.32},"width":195.56,"height":40.8,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-13.png","element":"img","alt":" X(k)x , the kth ","inline":true,"padRight":true},{"text":"nearest neighbor of ","element":"span"},{"text":"x ","element":"span"},{"text":"in the pool.","element":"span"}],[{"text":"we have,","element":"span"}],[{"style":{"width":"64%"},"width":864,"height":90,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-14.png","element":"img"}],[{"text":"Then, by using Lemma ","element":"span"},{"href":"#id-64","referenceIndex":22,"text":"1 ","element":"a"},{"text":"and Lemma ","element":"span"},{"href":"#id-65","referenceIndex":27,"text":"3, ","element":"a"},{"text":"and if ","element":"span"},{"style":{"height":10.34},"width":31.76,"height":25.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-15.png","element":"img","alt":" ks","inline":true,"padRight":true},{"text":"satisfies ","element":"span"},{"href":"#id-63","text":"(29)","element":"a"},{"text":", we have:","element":"span"}],[{"style":{"width":"101%"},"width":1370,"height":524,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-16.png","element":"img"}],[{"text":"Fix ","element":"span"},{"style":{"height":10.33},"width":274.16,"height":25.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-17.png","element":"img","alt":" x = Xs. Given Xs","inline":true},{"text":", there exists an event ","element":"span"},{"style":{"height":12.93},"width":62.96,"height":32.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-18.png","element":"img","alt":" A1,s","inline":true},{"text":", such that ","element":"span"},{"style":{"height":14.73},"width":427.48,"height":36.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-19.png","element":"img","alt":" P (A1,s) ≥ 1 − δ/(32s2), and","inline":true}],[{"text":"on ","element":"span"},{"style":{"height":12.93},"width":108.52,"height":32.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-20.png","element":"img","alt":" A1,s, if","inline":true}],[{"style":{"width":"28%"},"width":382,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-21.png","element":"img"}],[{"text":"we have ","element":"span"},{"style":{"height":13.13},"width":541.04,"height":32.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-22.png","element":"img","alt":" B(Xs, rpǫ(Xs))∩{X1, . . . , Xw} ≥ ks","inline":true},{"text":". By setting ","element":"span"},{"style":{"height":13.9},"width":500.72,"height":34.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-23.png","element":"img","alt":" A1 = ∩s≥1A1,s, we have P (A1) ≥","inline":true}],[{"text":"1 ","element":"span"},{"style":{"height":12.8},"width":273.24,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-24.png","element":"img","alt":" − δ/16, and on A1","inline":true},{"text":", for all 1 ","element":"span"},{"style":{"height":13.91},"width":903.32,"height":34.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-25.png","element":"img","alt":" ≤ s ≤ w, if ks ≤ (1 − τks,s)pǫ(w − 1), then B(Xs, rpǫ(Xs)) ∩","inline":true}],[{"style":{"height":12.8},"width":293.16,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/20-26.png","element":"img","alt":"{X1, . . . , Xw} ≥ ks.","inline":true}],[{"text":"Now, let us proof that the condition ","element":"span"},{"href":"#id-66","text":"(30) ","element":"a"},{"text":"is sufficient to guarantee ","element":"span"},{"href":"#id-63","text":"(29)","element":"a"},{"text":".","element":"span"}],[{"text":"The relation ","element":"span"},{"href":"#id-63","text":"(29) ","element":"a"},{"text":"implies","element":"span"}],[{"text":"We can see by a bit of calculus, that ","element":"span"},{"style":{"height":16.58},"width":293.08,"height":41.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/21-0.png","element":"img","alt":" τks,s ≤ 12, and then","inline":true}],[{"style":{"width":"84%"},"width":1132,"height":366,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/21-1.png","element":"img"}],[{"text":"where ","element":"span"},{"style":{"height":23.2},"width":199.52,"height":58,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/21-2.png","element":"img","alt":" b = 4c�1024L31","inline":true}],[{"id":"id-68","style":{"width":"95%"},"width":1281,"height":719,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/21-3.png","element":"img"}],[{"text":"Now, we are going to apply the Lemma ","element":"span"},{"href":"#id-67","referenceIndex":25,"text":"2. ","element":"a"},{"text":"If we set in Lemma ","element":"span"},{"href":"#id-67","referenceIndex":25,"text":"2","element":"a"}],[{"style":{"width":"74%"},"width":1004,"height":92,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/21-4.png","element":"img"}],[{"text":"we can easily see that ","element":"span"},{"style":{"height":11.2},"width":151.76,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/21-5.png","element":"img","alt":" c ≥ a, a ≥","inline":true,"padRight":true},{"text":"4 and then","element":"span"}],[{"style":{"width":"24%"},"width":325,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/21-6.png","element":"img"}],[{"text":"Then, the relation","element":"span"}],[{"style":{"width":"71%"},"width":968,"height":116,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/21-7.png","element":"img"}],[{"text":"is sufficient to guarantee ","element":"span"},{"href":"#id-68","text":"(32)","element":"a"},{"text":".","element":"span"}],[{"text":"Let us note that the guarantee obtained in the preceding theorem corresponds to that","element":"span"}],[{"text":"obtained in passive setting (","element":"span"},{"text":"w ","element":"span"},{"text":"= ","element":"span"},{"text":"n","element":"span"},{"text":").","element":"span"}],[{"text":"A.3 Motivation for choosing ","element":"span"},{"style":{"height":13.1},"width":35.64,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-0.png","element":"img","alt":" ks","inline":true,"padRight":true},{"text":"for ","element":"span"},{"style":{"height":13.1},"width":48.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-1.png","element":"img","alt":" Xs","inline":true}],[{"id":"id-73","text":"Lemma 4 (Hoeffding’s inequality,","element":"span"},{"href":"#id-69","text":"(Hoeffding, ","element":"a"},{"href":"#id-69","text":"1963)","element":"a"},{"text":")","element":"span"}],[{"style":{"width":"81%"},"width":1103,"height":152,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-2.png","element":"img"}],[{"text":"– ","element":"span"},{"text":"Second version","element":"span"},{"text":": Let ","element":"span"},{"style":{"height":11.2},"width":173.84,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-3.png","element":"img","alt":" X1, . . . , Xm","inline":true,"padRight":true},{"text":"be independent random variables such that ","element":"span"},{"style":{"height":12.8},"width":426.2,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-4.png","element":"img","alt":" −1 ≤ Xi ≤ 1, (i = 0, . . . , m).","inline":true,"padRight":true},{"text":"We define the empirical mean of these variables by","element":"span"}],[{"style":{"width":"66%"},"width":898,"height":184,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-5.png","element":"img"}],[{"id":"id-74","text":"Lemma 5 ","element":"span"},{"href":"#id-70","text":"(Kaufmann et al.","element":"a"},{"text":", ","element":"span"},{"href":"#id-70","text":"2016","element":"a"},{"text":")","element":"span"}],[{"text":"Let ","element":"span"},{"style":{"height":18},"width":181.96,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-6.png","element":"img","alt":" ζ(u) = �","inline":true}],[{"style":{"width":"3%"},"width":51,"height":18,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-7.png","element":"img"}],[{"text":"tributed, such that, for all ","element":"span"},{"style":{"height":17.25},"width":417.72,"height":43.12,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-8.png","element":"img","alt":" v > 0, E(evX1) ≤ ev2σ2/2","inline":true},{"text":". For every positive integer ","element":"span"},{"text":"t","element":"span"},{"text":", let","element":"span"}],[{"style":{"height":10.53},"width":284.6,"height":26.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-9.png","element":"img","alt":"St = X1 + . . . + Xt","inline":true},{"text":". Then, for all ","element":"span"},{"style":{"height":11.6},"width":211.76,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-10.png","element":"img","alt":" γ > 1 and r ≥","inline":true}],[{"style":{"width":"93%"},"width":1253,"height":168,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-11.png","element":"img"}],[{"id":"id-75","text":"Lemma 6","element":"span"}],[{"text":"Let ","element":"span"},{"style":{"height":11.2},"width":269.48,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-12.png","element":"img","alt":" m ≥ 1 and u ≥ 20","inline":true},{"text":". Then we have:","element":"span"}],[{"style":{"width":"46%"},"width":627,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-13.png","element":"img"}],[{"text":"Proof","element":"span"}],[{"text":"Define ","element":"span"},{"style":{"height":12.8},"width":361.68,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-14.png","element":"img","alt":" φ(m) = m − u log(log(m","inline":true},{"text":")), and let ","element":"span"},{"style":{"height":12.8},"width":267.68,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-15.png","element":"img","alt":" m0 = 2u log(log(u","inline":true},{"text":")). We have:","element":"span"}],[{"style":{"width":"63%"},"width":850,"height":82,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-16.png","element":"img"}],[{"text":"It can be shown numerically that ","element":"span"},{"style":{"height":12.8},"width":325.8,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-17.png","element":"img","alt":" φ(m0) ≥ 0 for u ≥ 20.","inline":true}],[{"text":"Also, we have: ","element":"span"},{"style":{"height":21.12},"width":609.72,"height":52.8,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-18.png","element":"img","alt":" φ′(m) = m log(m)−um log(m) ≥ 0 for all m ≥ m0","inline":true,"padRight":true},{"text":"(notice that ","element":"span"},{"style":{"height":12.8},"width":306.12,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-19.png","element":"img","alt":" m0 ≥ u for u ≥ 20).","inline":true}],[{"text":"Then it is easy to see that ","element":"span"},{"style":{"height":12.8},"width":444.6,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-20.png","element":"img","alt":" φ(m) ≥ φ(m0) for all m ≥ m0","inline":true},{"text":". This establishes the lemma.","element":"span"}],[{"id":"id-72","text":"Theorem 5","element":"span"}],[{"text":"Let ","element":"span"},{"style":{"height":12.8},"width":357.16,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-21.png","element":"img","alt":" δ ∈ (0, 1), and ǫ ∈ (0, 1)","inline":true},{"text":". Let us assume that ","element":"span"},{"href":"#id-46","style":{"height":15.05},"width":621.4,"height":37.64,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-22.png","element":"img","alt":" w satisfies (12). For Xs, set ˜k(ǫ, δs) (with","inline":true}],[{"style":{"height":17.55},"width":202.2,"height":43.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-23.png","element":"img","alt":"δs = δ32s2 ) as","inline":true}],[{"style":{"width":"87%"},"width":1181,"height":96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-24.png","element":"img"}],[{"text":"where ","element":"span"},{"href":"#id-62","style":{"height":21.34},"width":1201.88,"height":53.36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-25.png","element":"img","alt":" c ≥ 7.106. For k ≥ 1, s ≤ w, let ∆ = max( ǫ2, � ǫ2C� 1β+1 ) and bδs,k defined in (4).","inline":true}],[{"text":"Then, there exists an event ","element":"span"},{"style":{"height":10.93},"width":40.44,"height":27.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-26.png","element":"img","alt":" A2","inline":true},{"text":", such that ","element":"span"},{"style":{"height":12.8},"width":658.52,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/22-27.png","element":"img","alt":" P (A2) ≥ 1 − δ/8, and on A1 ∩ A2, we have:","inline":true}],[{"text":"1. For ","element":"span"},{"href":"#id-71","style":{"height":12.8},"width":976.76,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-0.png","element":"img","alt":" k ≥ 1, �ηk(Xs) and ¯ηk(Xs) defined in (26), for all s ∈ {1, . . . , w},","inline":true}],[{"id":"id-78","style":{"width":"98%"},"width":1332,"height":426,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-1.png","element":"img"}],[{"text":"Where ","element":"span"},{"style":{"height":12.4},"width":31.76,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-2.png","element":"img","alt":"¯ks","inline":true,"padRight":true},{"text":"is the number of requests made in ","element":"span"},{"style":{"height":12.8},"width":315.32,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-3.png","element":"img","alt":" ConfidentLabel(Xs).","inline":true}],[{"text":"Proof","element":"span"}],[{"text":"1. Let us begin with the proof of the first part of Theorem ","element":"span"},{"href":"#id-72","referenceIndex":71,"text":"5. ","element":"a"},{"text":"Here, we follow the proof of Theorem 8 in ","element":"span"},{"href":"#id-70","text":"(Kaufmann et al., ","element":"a"},{"href":"#id-70","text":"2016","element":"a"},{"text":"), with few additional modifications.","element":"span"}],[{"style":{"width":"95%"},"width":1283,"height":83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-4.png","element":"img"}],[{"style":{"height":17.12},"width":100.8,"height":42.8,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-5.png","element":"img","alt":"η(X(k)s","inline":true,"padRight":true},{"text":")) = 0, and the random variables","element":"span"},{"style":{"height":23.2},"width":452.12,"height":58,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-6.png","element":"img","alt":"�Y (i)s − η(X(i)s ), i = 1, . . . , k�","inline":true},{"text":"are independent. Then by Lemma ","element":"span"},{"href":"#id-73","referenceIndex":59,"text":"4, ","element":"a"},{"text":"given ","element":"span"},{"style":{"height":17.31},"width":480.48,"height":43.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-7.png","element":"img","alt":" {X1, . . . , Xw}, as Y (1)s − η(X(1)s","inline":true,"padRight":true},{"text":") takes values in [","element":"span"},{"style":{"height":12.8},"width":145.56,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-8.png","element":"img","alt":"−1, 1], we","inline":true,"padRight":true},{"text":"have ","element":"span"},{"style":{"height":18.99},"width":602.48,"height":47.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-9.png","element":"img","alt":" E(ev(Y (1)s −η(X(1)s ))) ≤ ev2/2 for all v >","inline":true,"padRight":true},{"text":"0. Furthermore, set ","element":"span"},{"style":{"height":19.81},"width":286.36,"height":49.52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-10.png","element":"img","alt":" z = log( 32s2δ ), and","inline":true},{"style":{"height":19.55},"width":541.04,"height":48.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-11.png","element":"img","alt":"r = z + 3 log(z). We have r ≥ 8(e−1)2","inline":true,"padRight":true},{"text":", and by Lemma ","element":"span"},{"href":"#id-74","referenceIndex":60,"text":"5, ","element":"a"},{"text":"with ","element":"span"},{"style":{"height":12.8},"width":98.12,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-12.png","element":"img","alt":" γ = 3/","inline":true},{"text":"2, we have: ","element":"span"},{"text":"P","element":"span"}],[{"style":{"width":"90%"},"width":1224,"height":315,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-13.png","element":"img"}],[{"text":"Then, we have, given ","element":"span"},{"style":{"height":12.8},"width":217.16,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-14.png","element":"img","alt":" s ∈ {1, . . . , w}","inline":true},{"text":", there exists an event ","element":"span"},{"style":{"height":15.44},"width":389.36,"height":38.6,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-15.png","element":"img","alt":" A′2,s such that P (A′2,s) ≥","inline":true,"padRight":true},{"text":"1 ","element":"span"},{"style":{"height":14.21},"width":130.68,"height":35.52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-16.png","element":"img","alt":" − δ/32s2","inline":true},{"text":", and simultaneously for all ","element":"span"},{"style":{"height":11.2},"width":53.84,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-17.png","element":"img","alt":" k ≥","inline":true,"padRight":true},{"text":"1, we have:","element":"span"}],[{"style":{"width":"98%"},"width":1331,"height":449,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/23-18.png","element":"img"}],[{"style":{"width":"105%"},"width":1420,"height":1399,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/24-0.png","element":"img"}],[{"text":"(39) On the other hand, the right-hand side is smaller than:","element":"span"}],[{"style":{"width":"103%"},"width":1391,"height":717,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/24-1.png","element":"img"}],[{"id":"id-76","style":{"width":"100%"},"width":1349,"height":416,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/25-0.png","element":"img"}],[{"text":"We can apply the Lemma ","element":"span"},{"href":"#id-75","referenceIndex":64,"text":"6 ","element":"a"},{"text":"in ","element":"span"},{"href":"#id-76","text":"(42) ","element":"a"},{"text":"by taking: ","element":"span"},{"style":{"height":22.33},"width":579.48,"height":55.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/25-1.png","element":"img","alt":" m = ek and u = 73728e|η(Xs)− 12 |2 . We have","inline":true},{"style":{"height":11.2},"width":224.24,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/25-2.png","element":"img","alt":"m ≥ 1 and u ≥","inline":true,"padRight":true},{"text":"20 and then, a sufficient condition to have ","element":"span"},{"href":"#id-76","text":"(42) ","element":"a"},{"text":"is:","element":"span"}],[{"id":"id-77","style":{"width":"95%"},"width":1284,"height":453,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/25-3.png","element":"img"}],[{"text":"As ","element":"span"},{"style":{"height":16.58},"width":265.12,"height":41.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/25-4.png","element":"img","alt":" |η(Xs) − 12| ≥ 12∆","inline":true},{"text":", we can easily see that ˜","element":"span"},{"style":{"height":12.8},"width":238.64,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/25-5.png","element":"img","alt":"k(ǫ, δs) ≤ k(ǫ, δs","inline":true},{"text":"). By taking the minimum ","element":"span"},{"text":"value ","element":"span"},{"text":"¯","element":"span"},{"style":{"height":14.06},"width":172.88,"height":35.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/25-6.png","element":"img","alt":"ks = ¯k(ǫ, δs","inline":true},{"text":") that satisfies ","element":"span"},{"href":"#id-77","text":"(44)","element":"a"},{"text":", we can see that when the budget allows us, the subroutine ","element":"span"},{"style":{"height":13.46},"width":411.44,"height":33.64,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/25-7.png","element":"img","alt":" ConfidentLabel requests ¯ks","inline":true,"padRight":true},{"text":"labels, and we have:","element":"span"}],[{"style":{"width":"61%"},"width":831,"height":102,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/25-8.png","element":"img"}],[{"text":"By setting ","element":"span"},{"style":{"height":14.67},"width":613.64,"height":36.68,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/25-9.png","element":"img","alt":" A′′2 = ∩s≥1A′′2,s, we have P (A′′2 ) ≥ 1 − δ/","inline":true},{"text":"16, and we can deduce ","element":"span"},{"href":"#id-78","text":"(34)","element":"a"},{"text":". ","element":"span"},{"text":"We have on ","element":"span"},{"style":{"height":13.63},"width":444.84,"height":34.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/25-10.png","element":"img","alt":" A′2, for all s ≤ w, k ≤ k(ǫ, δs),","inline":true}],[{"style":{"width":"95%"},"width":1283,"height":707,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/25-11.png","element":"img"}],[{"style":{"width":"100%"},"width":1349,"height":798,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-0.png","element":"img"}],[{"text":"Thus we can easily deduce ","element":"span"},{"href":"#id-78","text":"(35)","element":"a"},{"text":". By setting ","element":"span"},{"style":{"height":13.63},"width":874.68,"height":34.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-1.png","element":"img","alt":" A2 = A′2 ∩ A′′2, we have P (A2) ≥ 1 − δ/8 and on A1 ∩ A2","inline":true},{"text":", the item 1 and ","element":"span"},{"text":"item 2 hold simultaneously.","element":"span"}],[{"id":"id-56","text":"A.4 Sufficient condition to be an informative point","element":"span"}],[{"text":"As noticed in Section ","element":"span"},{"href":"#id-36","text":"4.3, ","element":"a"},{"text":"a sufficient condition for a point ","element":"span"},{"style":{"height":12.8},"width":221.76,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-2.png","element":"img","alt":" Xt (with t ≤ w","inline":true},{"text":") to be considered","element":"span"}],[{"text":"as ","element":"span"},{"text":"not ","element":"span"},{"text":"informative is:","element":"span"}],[{"id":"id-79","style":{"width":"87%"},"width":1172,"height":75,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-3.png","element":"img"}],[{"text":"for some previous informative point ","element":"span"},{"style":{"height":12.8},"width":808.84,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-4.png","element":"img","alt":" Xs (with (Xs, �Ys, �LBs) ∈ �S the current active set just","inline":true}],[{"text":"before attaining ","element":"span"},{"style":{"height":10.34},"width":179.24,"height":25.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-5.png","element":"img","alt":" Xt in KALLS","inline":true},{"text":"(Algorithm","element":"span"},{"href":"#id-33","text":"(1)","element":"a"},{"text":")). Because ","element":"span"},{"style":{"height":10.91},"width":45.6,"height":27.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-6.png","element":"img","alt":" PX","inline":true,"padRight":true},{"text":"is unknown, we provide a com-","element":"span"}],[{"text":"putational scheme sufficient to obtain ","element":"span"},{"href":"#id-79","text":"(49)","element":"a"},{"text":".","element":"span"}],[{"text":"Firstly we follow the general procedure used in ","element":"span"},{"href":"#id-30","text":"(Kontorovich et al., ","element":"a"},{"href":"#id-30","text":"2016","element":"a"},{"text":") to estimate adap-","element":"span"}],[{"text":"tively the expectation of a Bernoulli random variable. And secondly, we apply it to the","element":"span"}],[{"text":"Bernoulli variable ","element":"span"},{"style":{"height":12.8},"width":807.24,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-7.png","element":"img","alt":" 1A where A = {x, x ∈ B(X, r)} for r > 0 and X ∈ X.","inline":true}],[{"text":"Lemma 7 ","element":"span"},{"href":"#id-30","text":"(Kontorovich et al.","element":"a"},{"text":", ","element":"span"},{"href":"#id-30","text":"2016","element":"a"},{"text":")","element":"span"}],[{"style":{"height":23.2},"width":852.4,"height":58,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-8.png","element":"img","alt":"Let δ′ ∈ (0, 1), ǫo > 0, t ≥ 7 and set g(t) = 1 + 83t +�","inline":true}],[{"text":"Bernoulli random variables with expectation ","element":"span"},{"style":{"height":11.2},"width":112.08,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-9.png","element":"img","alt":" p. Let �p","inline":true,"padRight":true},{"text":"be the output of ","element":"span"},{"style":{"height":12.8},"width":333.8,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-10.png","element":"img","alt":" BerEst(ǫo, δ′, t). There","inline":true}],[{"text":"exists an event ","element":"span"},{"style":{"height":9.2},"width":37.44,"height":23,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-11.png","element":"img","alt":" A′","inline":true},{"text":", such that ","element":"span"},{"style":{"height":12.8},"width":544.28,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-12.png","element":"img","alt":" P (A′) ≥ 1 − δ′, and on A′, we have:","inline":true}],[{"text":"1. If ","element":"span"},{"style":{"height":16.19},"width":301.08,"height":40.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-13.png","element":"img","alt":" �p ≤ ǫog(t) then p ≤ ǫo","inline":true},{"text":", otherwise, we have ","element":"span"},{"style":{"height":20.93},"width":200.6,"height":52.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-14.png","element":"img","alt":" p ≥ 2−g(t)g(t) ǫ0.","inline":true}],[{"text":"2. The number of random draws in the ","element":"span"},{"text":"BerEst ","element":"span"},{"text":"subroutine (Algorithm ","element":"span"},{"href":"#id-80","text":"4) ","element":"a"},{"text":"is at most","element":"span"}],[{"text":"where ","element":"span"},{"style":{"height":17.74},"width":282.68,"height":44.36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-15.png","element":"img","alt":" ψ := max(ǫo, pg(t) ).","inline":true}],[{"id":"id-95","text":"Lemma 8","element":"span"}],[{"text":"Let ","element":"span"},{"style":{"height":12.8},"width":276.2,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-16.png","element":"img","alt":" ǫ, δ ∈ (0, 1), r > 0","inline":true},{"text":". Let us assume that ","element":"span"},{"text":"w ","element":"span"},{"text":"satisfies ","element":"span"},{"href":"#id-46","text":"(13)","element":"a"},{"text":".","element":"span"}],[{"text":"There exists an event ","element":"span"},{"style":{"height":10.93},"width":40.44,"height":27.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-17.png","element":"img","alt":" A3","inline":true},{"text":", such that ","element":"span"},{"style":{"height":12.8},"width":264.68,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-18.png","element":"img","alt":" P (A3) ≥ 1 − δ/16","inline":true},{"text":", we have, on ","element":"span"},{"style":{"height":11.6},"width":265.88,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-19.png","element":"img","alt":" A3, for all s ≤ w:","inline":true}],[{"text":"If there exists ","element":"span"},{"style":{"height":11.2},"width":152.8,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-20.png","element":"img","alt":" 1 ≤ s′ < s","inline":true},{"text":", such that ","element":"span"},{"style":{"height":11.49},"width":51.76,"height":28.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-21.png","element":"img","alt":" Xs′","inline":true,"padRight":true},{"text":"is an informative point, and ","element":"span"},{"style":{"height":12.8},"width":304.72,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-22.png","element":"img","alt":" (Xs′, �Ys′, �LBs′) ∈ �S","inline":true}],[{"text":"(the current active set just before attaining ","element":"span"},{"href":"#id-33","style":{"height":12.8},"width":685.96,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-23.png","element":"img","alt":" Xs defined in KALLS(Algorithm(1))), and that","inline":true}],[{"text":"satisfies:","element":"span"}],[{"id":"id-81","style":{"width":"82%"},"width":1114,"height":91,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/26-24.png","element":"img"}],[{"text":"where","element":"span"}],[{"style":{"width":"63%"},"width":853,"height":83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-0.png","element":"img"}],[{"text":"and","element":"span"}],[{"style":{"width":"61%"},"width":831,"height":82,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-1.png","element":"img"}],[{"text":"then","element":"span"}],[{"id":"id-82","style":{"width":"92%"},"width":1249,"height":86,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-2.png","element":"img"}],[{"text":"Otherwise, if ","element":"span"},{"href":"#id-81","text":"(50) ","element":"a"},{"text":"does not holds, i.e:","element":"span"}],[{"style":{"width":"42%"},"width":577,"height":86,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-3.png","element":"img"}],[{"text":"then ","element":"span"},{"id":"id-83","text":"min(","element":"span"},{"style":{"height":26.83},"width":813.32,"height":67.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-4.png","element":"img","alt":"PX(B(Xs, ρ(Xs′, Xs))), PX(B(Xs′, ρ(Xs′, Xs)))) ≥ 2847","inline":true}],[{"text":"Proof","element":"span"}],[{"text":"By following the scheme of subroutine ","element":"span"},{"text":"Estprob","element":"span"},{"text":", this Lemma is a direct application of Lemma","element":"span"}],[{"href":"#id-79","text":"7 ","element":"a"},{"text":"by taking for all ","element":"span"},{"style":{"height":26.14},"width":1069.8,"height":65.36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-5.png","element":"img","alt":" s ≤ w, t = 50, ǫo =� 164L �LBs′�d/α, δ′ = δs, r = ρ(Xs, Xs′), A3,s := A′.","inline":true}],[{"text":"And then, if we set ","element":"span"},{"style":{"height":13.9},"width":621.8,"height":34.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-6.png","element":"img","alt":" A3 = ∩s≥1A3,s, we have P (A3) ≥ 1 − δ/","inline":true},{"text":"16, and on the event ","element":"span"},{"style":{"height":11.6},"width":102.36,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-7.png","element":"img","alt":" A3, we","inline":true}],[{"text":"can easily deduce ","element":"span"},{"href":"#id-82","text":"(51) ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-83","referenceIndex":103,"text":"(52) ","element":"a"},{"text":"in each cases.","element":"span"}],[{"text":"On the other hand, for all ","element":"span"},{"style":{"height":11.2},"width":84.96,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-8.png","element":"img","alt":" s ≤ w","inline":true},{"text":", the number of draws in ","element":"span"},{"style":{"height":26.15},"width":704.68,"height":65.36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-9.png","element":"img","alt":" Estprob(Xs, ρ(Xs, Xs′),� 164L �LBs′�d/α, 50, δs)","inline":true}],[{"text":"(respectively ","element":"span"},{"style":{"height":26.14},"width":700.4,"height":65.36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-10.png","element":"img","alt":" Estprob(Xs′ , ρ(Xs, Xs′),� 164L �LBs′�d/α, 50, δs","inline":true},{"text":")) is always lower than ","element":"span"},{"text":"w","element":"span"},{"text":". In-","element":"span"}],[{"text":"deed, by Lemma ","element":"span"},{"href":"#id-79","text":"7, ","element":"a"},{"text":"the number of draws is at most:","element":"span"}],[{"style":{"width":"94%"},"width":1275,"height":98,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-11.png","element":"img"}],[{"text":"Then we have:","element":"span"}],[{"id":"id-84","style":{"width":"93%"},"width":1261,"height":480,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-12.png","element":"img"}],[{"text":"In equation ","element":"span"},{"href":"#id-84","text":"(53)","element":"a"},{"text":", ","element":"span"},{"style":{"height":15.14},"width":130.92,"height":37.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-13.png","element":"img","alt":" bδs′ ,|Qs′ |","inline":true,"padRight":true},{"text":"is defined by ","element":"span"},{"href":"#id-62","text":"(4)","element":"a"},{"text":", and ","element":"span"},{"style":{"height":12.8},"width":71.88,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-14.png","element":"img","alt":" |Qs′|","inline":true,"padRight":true},{"text":"represents the number of label requests","element":"span"}],[{"text":"used in the subroutine ","element":"span"},{"text":"ConfidentLabel","element":"span"},{"text":"(Algorithm ","element":"span"},{"href":"#id-40","text":"(5)","element":"a"},{"text":") at the stage ","element":"span"},{"style":{"height":5.6},"width":35.4,"height":14,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/27-15.png","element":"img","alt":" s′.","inline":true}],[{"id":"id-57","text":"A.5 Label the instance space","element":"span"}],[{"id":"id-85","style":{"width":"99%"},"width":1343,"height":183,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-0.png","element":"img"}],[{"text":"Let ","element":"span"},{"text":"I ","element":"span"},{"text":"the set of indexes of informative points used in ","element":"span"},{"text":"KALLS ","element":"span"},{"text":"(Algorithm ","element":"span"},{"href":"#id-33","text":"1)","element":"a"},{"text":". Let us consider","element":"span"}],[{"text":"its last update in ","element":"span"},{"text":"KALLS ","element":"span"},{"text":"(Algorithm ","element":"span"},{"href":"#id-33","text":"1) ","element":"a"},{"text":"and also denoted it by ","element":"span"},{"text":"I","element":"span"},{"text":".","element":"span"}],[{"text":"Then, set ","element":"span"},{"style":{"height":10.91},"width":176.36,"height":27.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-1.png","element":"img","alt":" sI = max I","inline":true,"padRight":true},{"text":"the index of the last informative point. Let ","element":"span"},{"style":{"height":10.33},"width":49.96,"height":25.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-2.png","element":"img","alt":"�Sac","inline":true,"padRight":true},{"text":"be the active set","element":"span"}],[{"text":"obtained in ","element":"span"},{"text":"KALLS ","element":"span"},{"text":"(Algorithm ","element":"span"},{"href":"#id-33","text":"1) ","element":"a"},{"text":"and denote by ","element":"span"},{"style":{"height":12.34},"width":64.68,"height":30.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-3.png","element":"img","alt":"�fn,w","inline":true,"padRight":true},{"text":"the output ","element":"span"},{"style":{"height":12.8},"width":128.68,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-4.png","element":"img","alt":" 1NN(�Sac)","inline":true},{"text":". There exists an","element":"span"}],[{"text":"event ","element":"span"},{"style":{"height":12.8},"width":1009.64,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-5.png","element":"img","alt":" A4 such that P (A4) ≥ 1 − δ/8, and on A1 ∩ A2 ∩ A3 ∩ A4, we have","inline":true}],[{"id":"id-86","style":{"width":"98%"},"width":1328,"height":93,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-6.png","element":"img"}],[{"text":"2. If ","element":"span"},{"text":"w ","element":"span"},{"text":"satisfies ","element":"span"},{"href":"#id-46","text":"(12) ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-46","text":"(13) ","element":"a"},{"text":"and the following condition holds","element":"span"}],[{"id":"id-90","style":{"width":"52%"},"width":712,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-7.png","element":"img"}],[{"text":"then, for all ","element":"span"},{"style":{"height":16.58},"width":604,"height":41.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-8.png","element":"img","alt":" x ∈ supp(PX) such that |η(x) − 12 | > ∆","inline":true},{"text":", there exists ","element":"span"},{"style":{"height":12.8},"width":284.44,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-9.png","element":"img","alt":" s := s(x) ∈ I such","inline":true,"padRight":true},{"text":"that:","element":"span"}],[{"style":{"width":"95%"},"width":1282,"height":276,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-10.png","element":"img"}],[{"text":"Proof ","element":"span"},{"text":"This proof is based on results from ","element":"span"},{"href":"#id-31","text":"(Hanneke","element":"a"},{"text":") with few additional modifications. 1. Let us begin by proving the first part of Theorem ","element":"span"},{"href":"#id-85","text":"6. ","element":"a"},{"text":"For ","element":"span"},{"style":{"height":12.8},"width":192.48,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-11.png","element":"img","alt":" x ∈ supp(PX","inline":true},{"text":"), let us introduce","element":"span"}],[{"style":{"width":"42%"},"width":578,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-12.png","element":"img"}],[{"text":"By Lemma ","element":"span"},{"href":"#id-65","referenceIndex":27,"text":"3, ","element":"a"},{"text":"we have ","element":"span"},{"style":{"height":13.71},"width":318.72,"height":34.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-13.png","element":"img","alt":" PX(B(x, r˜pǫ(x)) ≥ ˜pǫ","inline":true},{"text":". Then each ","element":"span"},{"text":"¯","element":"span"},{"style":{"height":15.61},"width":435.88,"height":39.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-14.png","element":"img","alt":"X ∈ {X1, . . . , XTǫ,δ} belongs","inline":true,"padRight":true},{"text":"to ","element":"span"},{"style":{"height":13.71},"width":153.4,"height":34.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-15.png","element":"img","alt":" B(x, r˜pǫ(x","inline":true},{"text":")) with probability at least ˜","element":"span"},{"style":{"height":8},"width":29.28,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-16.png","element":"img","alt":"pǫ","inline":true},{"text":". If we denote ","element":"span"},{"style":{"height":8.8},"width":26,"height":22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-17.png","element":"img","alt":"�P","inline":true,"padRight":true},{"text":"the probability over the data, we have:","element":"span"}],[{"style":{"width":"62%"},"width":836,"height":639,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/28-18.png","element":"img"}],[{"text":"Then, there exists an event ","element":"span"},{"style":{"height":10.93},"width":40.44,"height":27.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-0.png","element":"img","alt":" A4","inline":true},{"text":", such that ","element":"span"},{"style":{"height":12.8},"width":231.08,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-1.png","element":"img","alt":" P (A4) ≥ 1 − δ/","inline":true},{"text":"8 and ","element":"span"},{"href":"#id-86","text":"(55) ","element":"a"},{"text":"holds on ","element":"span"},{"style":{"height":10.93},"width":126.04,"height":27.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-2.png","element":"img","alt":" A4. And","inline":true,"padRight":true},{"text":"then, we can easily conclude the first part.","element":"span"}],[{"text":"2. For the second part of Theorem ","element":"span"},{"href":"#id-85","text":"6, ","element":"a"},{"text":"let ","element":"span"},{"style":{"height":12.8},"width":191.52,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-3.png","element":"img","alt":" x ∈ supp(PX","inline":true},{"text":"). By ","element":"span"},{"href":"#id-86","text":"(55)","element":"a"},{"text":", on ","element":"span"},{"style":{"height":10.93},"width":40.44,"height":27.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-4.png","element":"img","alt":" A4","inline":true,"padRight":true},{"text":"there exists ","element":"span"},{"style":{"height":10.34},"width":80.12,"height":25.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-5.png","element":"img","alt":" Xx ∈","inline":true},{"style":{"height":15.62},"width":237.8,"height":39.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-6.png","element":"img","alt":"{X1, . . . , XTǫ,δ }","inline":true,"padRight":true},{"text":"such that:","element":"span"}],[{"id":"id-87","style":{"width":"95%"},"width":1283,"height":413,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-7.png","element":"img"}],[{"text":"As ","element":"span"},{"style":{"height":13.1},"width":143.12,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-8.png","element":"img","alt":" sI ≥ Tǫ,δ","inline":true},{"text":", then there exists ","element":"span"},{"style":{"height":11.49},"width":497.68,"height":28.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-9.png","element":"img","alt":" s′ such that Xx := Xs′ and Xs′","inline":true,"padRight":true},{"text":"passes through the subroutine ","element":"span"},{"text":"Reliable","element":"span"},{"text":". We have two cases: a) ","element":"span"},{"style":{"height":11.49},"width":351.12,"height":28.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-10.png","element":"img","alt":" Xs′ is uninformative","inline":true},{"text":". Then there exists ","element":"span"},{"style":{"height":8},"width":95.04,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-11.png","element":"img","alt":" s < s′","inline":true},{"text":", such that ","element":"span"},{"style":{"height":10.34},"width":41.84,"height":25.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-12.png","element":"img","alt":" Xs","inline":true,"padRight":true},{"text":"is an informative point, and","element":"span"}],[{"id":"id-88","style":{"width":"91%"},"width":1228,"height":1288,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/29-13.png","element":"img"}],[{"style":{"width":"100%"},"width":1349,"height":456,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/30-0.png","element":"img"}],[{"text":"that contradicts ","element":"span"},{"href":"#id-87","text":"(62)","element":"a"},{"text":", then we have ","element":"span"},{"style":{"height":16.77},"width":293.44,"height":41.92,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/30-1.png","element":"img","alt":" |η(Xs) − 12 | ≥ 132 ∆","inline":true},{"text":". Therefore, by ","element":"span"},{"href":"#id-88","text":"(63)","element":"a"},{"text":", ","element":"span"},{"href":"#id-88","text":"(64)","element":"a"},{"text":", ","element":"span"},{"text":"we have:","element":"span"}],[{"id":"id-89","style":{"width":"109%"},"width":1470,"height":1456,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/30-2.png","element":"img"}],[{"text":"b) ","element":"span"},{"style":{"height":11.49},"width":305.04,"height":28.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/30-3.png","element":"img","alt":" Xs′ is informative","inline":true},{"text":". In this case, ","element":"span"},{"style":{"height":5.6},"width":90.72,"height":14,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/30-4.png","element":"img","alt":" s = s′ ","inline":true,"padRight":true},{"text":"and then we always obtains the equation ","element":"span"},{"href":"#id-89","text":"(69)","element":"a"},{"text":", which becomes","element":"span"}],[{"style":{"width":"100%"},"width":1349,"height":501,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-0.png","element":"img"}],[{"text":"On ","element":"span"},{"style":{"height":10.93},"width":121.56,"height":27.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-1.png","element":"img","alt":" A1 ∩ A2","inline":true},{"text":", by Theorem ","element":"span"},{"href":"#id-72","referenceIndex":71,"text":"5, ","element":"a"},{"text":"the subroutine ","element":"span"},{"style":{"height":12.8},"width":291.92,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-2.png","element":"img","alt":" ConfidentLabel(Xs","inline":true},{"text":") uses at most ","element":"span"},{"text":"˜","element":"span"},{"style":{"height":12.8},"width":104.68,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-3.png","element":"img","alt":"k(ǫ, δs)","inline":true,"padRight":true},{"text":"request labels, and returns the correct label (with respect to the Bayes classifier) of ","element":"span"},{"style":{"height":10.34},"width":53.16,"height":25.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-4.png","element":"img","alt":" Xs.","inline":true,"padRight":true},{"text":"Let us proof that ","element":"span"},{"style":{"height":12.8},"width":219.92,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-5.png","element":"img","alt":" f∗(x) = f∗(Xs","inline":true},{"text":"). Let us assume without loss of generality that ","element":"span"},{"style":{"height":12.8},"width":116.72,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-6.png","element":"img","alt":" η(Xs)−","inline":true},{"style":{"height":6.4},"width":15,"height":16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-7.png","element":"img","alt":"1","inline":true},{"style":{"height":13.79},"width":55.28,"height":34.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-8.png","element":"img","alt":"2 ≥","inline":true,"padRight":true},{"text":"0. We will show that ","element":"span"},{"style":{"height":16.58},"width":164.72,"height":41.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-9.png","element":"img","alt":" η(x) − 12 ≥","inline":true,"padRight":true},{"text":"0. We have:","element":"span"}],[{"style":{"width":"56%"},"width":767,"height":446,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-10.png","element":"img"}],[{"text":"Then ","element":"span"},{"style":{"height":12.8},"width":243.72,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-11.png","element":"img","alt":" f∗(x) = f∗(Xs).","inline":true,"padRight":true},{"text":"As ","element":"span"},{"style":{"height":16.77},"width":301.6,"height":41.92,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-12.png","element":"img","alt":" |η(Xs) − 12| ≥ 12∆","inline":true},{"text":", by using Theorem ","element":"span"},{"href":"#id-72","referenceIndex":71,"text":"5 ","element":"a"},{"text":"(the second part), we can easily see ","element":"span"},{"text":"that (","element":"span"},{"style":{"height":12.8},"width":381.68,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-13.png","element":"img","alt":"Xs, �Ys) ∈ �Sac (where �Ys","inline":true,"padRight":true},{"text":"is the inferred label of ","element":"span"},{"style":{"height":10.34},"width":41.84,"height":25.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-14.png","element":"img","alt":" Xs","inline":true,"padRight":true},{"text":"provided by the subroutine ","element":"span"},{"text":"ConfidentLabel ","element":"span"},{"text":"in ","element":"span"},{"text":"KALLS","element":"span"},{"text":"(Algorithm ","element":"span"},{"href":"#id-33","text":"(1)","element":"a"},{"text":")). Let ","element":"span"},{"style":{"height":16.03},"width":68.16,"height":40.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-15.png","element":"img","alt":" X(1)x","inline":true,"padRight":true},{"text":"the nearest neighbor of ","element":"span"},{"style":{"height":10.33},"width":276.36,"height":25.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-16.png","element":"img","alt":" x in �Sac. We have:","inline":true}],[{"style":{"width":"89%"},"width":1210,"height":247,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-17.png","element":"img"}],[{"text":"Then, ","element":"span"},{"style":{"height":19.11},"width":658.24,"height":47.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-18.png","element":"img","alt":" |η(X(1)x ) − 12 | ≥ (1 − 791985 )|η(x) − 12| ≥ 12∆","inline":true,"padRight":true},{"text":"and by Theorem ","element":"span"},{"href":"#id-72","referenceIndex":71,"text":"5, ","element":"a"},{"text":"the subroutine ","element":"span"},{"text":"ConfidentLabel","element":"span"},{"style":{"height":17.31},"width":221.32,"height":43.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-19.png","element":"img","alt":"(X(1)x ) outputs","inline":true}],[{"style":{"width":"95%"},"width":1284,"height":386,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/31-20.png","element":"img"}],[{"id":"id-58","text":"A.6 Label complexity","element":"span"}],[{"text":"Lemma 9","element":"span"}],[{"text":"Let us assume that ","element":"span"},{"href":"#id-46","style":{"height":13.9},"width":539.6,"height":34.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-0.png","element":"img","alt":" w satisfies (12), (13), and w ≥ Tǫ,δ","inline":true},{"text":". Then, there exists an event ","element":"span"},{"style":{"height":10.93},"width":40.44,"height":27.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-1.png","element":"img","alt":" A5","inline":true,"padRight":true},{"text":"such that ","element":"span"},{"style":{"height":12.8},"width":666.84,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-2.png","element":"img","alt":" P (A5) ≥ 1 − δ/8, and on A1 ∩ A2 ∩ A3 ∩ A5","inline":true},{"text":". The condition ","element":"span"},{"href":"#id-46","text":"(11) ","element":"a"},{"text":"is sufficient to guarantee ","element":"span"},{"href":"#id-90","text":"(56)","element":"a"},{"text":".","element":"span"}],[{"text":"Before beginning the proof, let us define a notion that will be used through the proof.","element":"span"}],[{"text":"Definition 5","element":"span"}],[{"text":"Let a set ","element":"span"},{"style":{"height":12.8},"width":688.68,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-3.png","element":"img","alt":" F ⊂ supp(PX). Let {x1, . . . , xm} ⊂ F and p (0,","inline":true,"padRight":true},{"text":"1]. We say that the set ","element":"span"},{"style":{"height":12.8},"width":227.6,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-4.png","element":"img","alt":" {x1, . . . , xm} ⊂","inline":true}],[{"text":"F ","element":"span"},{"text":"is a ","element":"span"},{"text":"p","element":"span"},{"text":"-probability-packing set of ","element":"span"},{"text":"F ","element":"span"},{"text":"if:","element":"span"}],[{"style":{"width":"77%"},"width":1050,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-5.png","element":"img"}],[{"text":"where ","element":"span"},{"style":{"height":9.14},"width":30.36,"height":22.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-6.png","element":"img","alt":" rp","inline":true,"padRight":true},{"text":"is defined by ","element":"span"},{"href":"#id-91","text":"(25)","element":"a"},{"text":", and ","element":"span"},{"style":{"height":12.8},"width":431,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-7.png","element":"img","alt":" a ∨ b = max(a, b) for a, b ∈ R","inline":true}],[{"text":"This notion of ","element":"span"},{"text":"p","element":"span"},{"text":"-probability-packing comes from the Definition 1.4 in ","element":"span"},{"href":"#id-92","text":"(Edgar","element":"a"},{"text":", ","element":"span"},{"href":"#id-92","text":"2000","element":"a"},{"text":"). It will","element":"span"}],[{"text":"be used on a particular set of the form ","element":"span"},{"style":{"height":16.58},"width":724.91,"height":41.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-8.png","element":"img","alt":" {x ∈ supp(PX), γ ≤ |η(x) − 12| ≤ γ′} (where","inline":true}],[{"text":"0 ","element":"span"},{"style":{"height":10},"width":131.52,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-9.png","element":"img","alt":" < γ < γ′","inline":true},{"text":"). This allows us to upper bound the number of informative points where we have","element":"span"}],[{"text":"a very high confidence for inferring their labels.","element":"span"}],[{"text":"Proof","element":"span"}],[{"text":"Let us consider the last update of ","element":"span"},{"text":"I","element":"span"},{"text":", the set of indexes of informative points used in","element":"span"}],[{"text":"KALLS","element":"span"},{"text":"(Algorithm ","element":"span"},{"href":"#id-33","text":"1)","element":"a"},{"text":".","element":"span"}],[{"text":"Set ","element":"span"},{"style":{"height":10.91},"width":163.4,"height":27.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-10.png","element":"img","alt":" sI = max I","inline":true},{"text":", the index of the last informative point. We consider two cases:","element":"span"}],[{"text":"1. ","element":"span"},{"style":{"height":10.91},"width":289.92,"height":27.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-11.png","element":"img","alt":" First case: sI = w","inline":true},{"text":": we can easily see that ","element":"span"},{"href":"#id-90","text":"(56) ","element":"a"},{"text":"is satisfied, and we have trivially that the condition ","element":"span"},{"href":"#id-46","text":"(11) ","element":"a"},{"text":"is sufficient to guarantee ","element":"span"},{"href":"#id-90","text":"(56)","element":"a"},{"text":". 2. ","element":"span"},{"style":{"height":10.91},"width":327.84,"height":27.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-12.png","element":"img","alt":" Second case: sI < w","inline":true},{"text":": then the total number of label requests up to ","element":"span"},{"style":{"height":10.51},"width":75.72,"height":26.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-13.png","element":"img","alt":" sI is:","inline":true}],[{"style":{"width":"119%"},"width":1608,"height":265,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-14.png","element":"img"}],[{"style":{"height":12.8},"width":321.12,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-15.png","element":"img","alt":"ConfidentLabel(Xs, t","inline":true},{"text":") implicitly assumes that the process of label request do not takes into account the constraint related to the budget ","element":"span"},{"text":"n ","element":"span"},{"text":"(very large budget with respect to ","element":"span"},{"style":{"height":12.8},"width":88.88,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-16.png","element":"img","alt":"k(ǫ, δs","inline":true},{"text":")) such that ","element":"span"},{"style":{"height":12.8},"width":761.68,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-17.png","element":"img","alt":" ConfidentLabel(Xs, t)=ConfidentLabel(Xs, t = ∞","inline":true},{"text":") . Then we have:","element":"span"}],[{"id":"id-93","style":{"width":"54%"},"width":740,"height":97,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-18.png","element":"img"}],[{"text":"On the other hand, we want to guarantee the condition ","element":"span"},{"href":"#id-90","text":"(56)","element":"a"},{"text":". For this, necessary for all ","element":"span"},{"text":"s ","element":"span"},{"style":{"height":9.6},"width":48.68,"height":24,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-19.png","element":"img","alt":"∈ I","inline":true},{"text":", such that ","element":"span"},{"style":{"height":13.1},"width":291.92,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-20.png","element":"img","alt":" s ≤ Tǫ,δ, and s < sI","inline":true},{"text":", at the end of the subroutine ","element":"span"},{"style":{"height":12.8},"width":343.08,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-21.png","element":"img","alt":" ConfidentLabel(Xs, t),","inline":true,"padRight":true},{"text":"the budget ","element":"span"},{"text":"n ","element":"span"},{"text":"is not yet reached and then we can replace the relation ","element":"span"},{"href":"#id-93","text":"(77) ","element":"a"},{"text":"by","element":"span"}],[{"id":"id-94","style":{"width":"55%"},"width":752,"height":127,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-22.png","element":"img"}],[{"text":"Then, necessarily, ","element":"span"},{"href":"#id-90","text":"(56) ","element":"a"},{"text":"holds when ","element":"span"},{"href":"#id-94","text":"(78) ","element":"a"},{"text":"holds. Also, for ","element":"span"},{"style":{"height":9.6},"width":76.04,"height":24,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-23.png","element":"img","alt":" s ∈ I","inline":true},{"text":", by theore","element":"span"},{"href":"#id-72","referenceIndex":71,"text":"m5, ","element":"a"},{"text":"if we assume that ","element":"span"},{"style":{"height":16.58},"width":267.04,"height":41.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-24.png","element":"img","alt":" |η(Xs) − 12 | ≥ 12 ∆","inline":true},{"text":", we have that ","element":"span"},{"style":{"height":12.8},"width":96.56,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-25.png","element":"img","alt":" |Qs| ≤","inline":true,"padRight":true},{"text":"˜","element":"span"},{"style":{"height":12.8},"width":88.88,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-26.png","element":"img","alt":"k(ǫ, δs","inline":true},{"text":"), and the subroutine ","element":"span"},{"style":{"height":15.74},"width":854.92,"height":39.36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/32-27.png","element":"img","alt":" ConfidentLabel(Xs, t), (with t = n − �si∈I,si ∆}.","inline":true}],[{"text":"Consequently, we have:","element":"span"}],[{"style":{"width":"100%"},"width":1349,"height":398,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/38-0.png","element":"img"}],[{"text":"Thus with probability at least 1 ","element":"span"},{"style":{"height":12.8},"width":246.4,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/2001.06485/images/38-1.png","element":"img","alt":" − P (Ac) ≥ 1 − δ","inline":true},{"text":", ","element":"span"},{"href":"#id-46","text":"(14) ","element":"a"},{"text":"holds.","element":"span"}]]}],"_version":"3.3.2"},"paperNode":"$1b:props:children:props:children:0:props:product"}]]]}]}]