1b:["$","$L29",null,{"isWhiteLabelled":false,"children":["$","$Lb",null,{"pt":{"compact":0,"expanded":3},"children":[["$","$L2a",null,{"noStar":true,"publisher":true,"task":true,"params":true,"size":"xl","product":{"id":"eyJwYXBlcklEIjoiMTgwOC4wNDUyMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","updated":"2018-08-26T22:45:16.000Z","paperID":"1808.04523","published":"2018-08-14T05:01:55.000Z","authors":"[\"Max Simchowitz\",\"Kevin Jamieson\",\"Jordan W. Suchow\",\"Thomas L. Griffiths\"]","title":"Adaptive Sampling for Convex Regression","scoreTrending":null,"summary":"In this paper, we introduce the first principled adaptive-sampling procedure\nfor learning a convex function in the $L_\\infty$ norm, a problem that arises\noften in the behavioral and social sciences. We present a function-specific\nmeasure of complexity and use it to prove that, for each convex function\n$f_{\\star}$, our algorithm nearly attains the information-theoretically\noptimal, function-specific error rate. We also corroborate our theoretical\ncontributions with numerical experiments, finding that our method substantially\noutperforms passive, uniform sampling for favorable synthetic and data-derived\nfunctions in low-noise settings with large sampling budgets. Our results also\nsuggest an idealized \"oracle strategy\", which we use to gauge the potential\nadvance of any adaptive-sampling strategy over passive sampling, for any given\nconvex function.","lastCheckedForCode":"2022-09-01T21:02:07.554Z","links":[{"id":"eyJ1cmwiOiJodHRwczovL3BhcGVyc3dpdGhjb2RlLmNvbS9wYXBlci9hZGFwdGl2ZS1zYW1wbGluZy1mb3ItY29udmV4LXJlZ3Jlc3Npb24ifQ==","type":"pwc","url":"https://paperswithcode.com/paper/adaptive-sampling-for-convex-regression","data":null}],"reposConnection":{"edges":[]},"models":[],"tags":[{"id":"eyJuYW1lIjoicmVncmVzc2lvbiIsInR5cGUiOiJ0YXNrIn0=","name":"regression","description":"In regression, the input is a set of numerical or categorical variables, and the output is a continuous numerical value. This task is commonly used in forecasting, trend analysis, and determining relationships between variables, such as predicting house prices based on features like size and location.","scoreTrending":null,"count":{"stars":12479,"papers":7292,"models":4571},"__typename":"Tag"}],"summaries":[],"emailsConnection":{"edges":[{"author":"kevin jamieson","node":{"id":"eyJhZGRyZXNzIjoiamFtaWVzb25AY3Mud2FzaGluZ3Rvbi5lZHUifQ==","address":"jamieson@cs.washington.edu","name":"Jamieson","avatar":null,"linkedin":"https://www.linkedin.com/in/kevin-jamieson-977971139","bio":null,"site":null,"override":null,"membership":[{"name":"University of Washington"}],"paper":[{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}}],"github":[{"avatar":"https://avatars.githubusercontent.com/u/2220039?v=4","username":"kgjamieson"}],"scholar":[{"thirdPartyID":"dq3yXjkAAAAJ"}],"twitter":[],"location":[],"owner":[{"id":"eyJ1aWQiOiIxZmM4ODBjMS0xNjY2LTRlYjctOTE5Mi04ZjdlMmU1ZTI3M2QifQ==","name":"kevin jamieson","github":[],"email":[],"authored":[{"id":"eyJwYXBlcklEIjoiMTYwMy4wNjU2MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1603.06560"},{"id":"eyJwYXBlcklEIjoiMTkwNS4wMzgxNCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1905.03814"},{"id":"eyJwYXBlcklEIjoiMTkwNi4wODM5OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1906.08399"},{"id":"eyJwYXBlcklEIjoiMTcwMi4wNTE4NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1702.05186"},{"id":"eyJwYXBlcklEIjoiMjAwNi4xMTY4NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2006.11685"},{"id":"eyJwYXBlcklEIjoiMjEwOC4wMjcxNyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2108.02717"},{"id":"eyJwYXBlcklEIjoiMTkwNi4wNjU5NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1906.06594"},{"id":"eyJwYXBlcklEIjoiMjIwMS4xMTIwNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2201.11206"},{"id":"eyJwYXBlcklEIjoiMjEwNS4wNTgwNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2105.05806"},{"id":"eyJwYXBlcklEIjoiMjAwMi4wMDQ5NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2002.00495"},{"id":"eyJwYXBlcklEIjoiMjAxMS4wMDU3NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2011.00576"},{"id":"eyJwYXBlcklEIjoiMjEwNS4wNjQ5OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2105.06499"},{"id":"eyJwYXBlcklEIjoiMTYwMy4wMjc1MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1603.02752"},{"id":"eyJwYXBlcklEIjoiMTgwOS4wMjIzNSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1809.02235"},{"id":"eyJwYXBlcklEIjoiMjIwNy4wMjU3NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2207.02575"},{"id":"eyJwYXBlcklEIjoiMjMwNi4wOTIxMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2306.09210"},{"id":"eyJwYXBlcklEIjoiMjExMS4xMjE1MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2111.12151"},{"id":"eyJwYXBlcklEIjoiMjMwNi4xMzIzMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2306.13233"},{"id":"eyJwYXBlcklEIjoiMTgwOC4wNDUyMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1808.04523"},{"id":"eyJwYXBlcklEIjoiMjExMi4wMzQzMiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2112.03432"},{"id":"eyJwYXBlcklEIjoiMjEwMi4wNTIxNCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2102.05214"},{"id":"eyJwYXBlcklEIjoiMjMwNi4wOTkxMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2306.09910"},{"id":"eyJwYXBlcklEIjoiMjIwNi4xMTE4MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2206.11183"},{"id":"eyJwYXBlcklEIjoiMTcwNi4wMTU2NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1706.01566"},{"id":"eyJwYXBlcklEIjoiMjExMC4xNDg2NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2110.14864"},{"id":"eyJwYXBlcklEIjoiMjMxMC4wNjA2OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2310.06069"},{"id":"eyJwYXBlcklEIjoiMTYwMy4wODAzNyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1603.08037"},{"id":"eyJwYXBlcklEIjoiMjMxMC4xODQ2NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2310.18465"},{"id":"eyJwYXBlcklEIjoiMjAxMC4xNTM4MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2010.15382"},{"id":"eyJwYXBlcklEIjoiMjMwNi4wODk0MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2306.08942"},{"id":"eyJwYXBlcklEIjoiMjEwNi4xMTIyMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2106.11220"},{"id":"eyJwYXBlcklEIjoiMjIwNy4wMjM1NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2207.02357"},{"id":"eyJwYXBlcklEIjoiMTkxMi4wNzc3NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1912.07777"},{"id":"eyJwYXBlcklEIjoiMTgxMS4wNjE0OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1811.06149"},{"id":"eyJwYXBlcklEIjoiMjAwOC4wNjU1NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2008.06555"},{"id":"eyJwYXBlcklEIjoiMjExMS4wNDkxNSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2111.04915"},{"id":"eyJwYXBlcklEIjoiMjMwMy4xMDU2NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2303.10565"},{"id":"eyJwYXBlcklEIjoiMjMwNy4xNTE1NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2307.15154"},{"id":"eyJwYXBlcklEIjoiNTI5MDEiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"52901"},{"id":"eyJwYXBlcklEIjoiNTM3NzQiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"53774"},{"id":"eyJwYXBlcklEIjoiNTMxOTUiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"53195"},{"id":"eyJwYXBlcklEIjoiMjMxMC4xNjI1MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2310.16252"},{"id":"eyJwYXBlcklEIjoiNzE2NDciLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"71647"},{"id":"eyJwYXBlcklEIjoiNzA0MjIiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"70422"},{"id":"eyJwYXBlcklEIjoiMjMxMi4wODU1OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2312.08559"}]}]}},{"author":null,"node":{"id":"eyJhZGRyZXNzIjoibXNpbWNob3dAYmVya2VsZXkuZWR1In0=","address":"msimchow@berkeley.edu","name":null,"avatar":null,"linkedin":null,"bio":null,"site":null,"override":null,"membership":[],"paper":[{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}}],"github":[{"avatar":"https://avatars.githubusercontent.com/u/963314?v=4","username":"msimchowitz"}],"scholar":[{"thirdPartyID":"QhG_7egAAAAJ"}],"twitter":[],"location":[],"owner":[{"id":"eyJ1aWQiOiI1MWFhYzQ4NC03OTAxLTQ5MzUtYjdiYy1mYmM1NTljYWExZTUifQ==","name":"max simchowitz","github":[],"email":[],"authored":[{"id":"eyJwYXBlcklEIjoiMTcxMC4wNzQwNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1710.07406"},{"id":"eyJwYXBlcklEIjoiMjAwMS4wOTU3NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2001.09576"},{"id":"eyJwYXBlcklEIjoiMTkxMS4wMjIxMiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1911.02212"},{"id":"eyJwYXBlcklEIjoiMTkwNS4wMzgxNCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1905.03814"},{"id":"eyJwYXBlcklEIjoiMjAwMi4wMjc5NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2002.02794"},{"id":"eyJwYXBlcklEIjoiMjAwMS4wOTI1NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2001.09254"},{"id":"eyJwYXBlcklEIjoiMTkxMS4wODY4OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1911.08689"},{"id":"eyJwYXBlcklEIjoiMTcwMi4wNTE4NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1702.05186"},{"id":"eyJwYXBlcklEIjoiMjAwMy4wMDE4OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2003.00189"},{"id":"eyJwYXBlcklEIjoiMjQwNy4wMTM5MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2407.01392"},{"id":"eyJwYXBlcklEIjoiMjEwOC4wMjcxNyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2108.02717"},{"id":"eyJwYXBlcklEIjoiMjIwMS4xMTIwNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2201.11206"},{"id":"eyJwYXBlcklEIjoiMTgwNy4wOTM4NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1807.09386"},{"id":"eyJwYXBlcklEIjoiMjAwNi4wNTA1MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2006.05051"},{"id":"eyJwYXBlcklEIjoiMjAxMC4wMzc5OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2010.03799"},{"id":"eyJwYXBlcklEIjoiMjAwNi4wNTkxMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2006.05910"},{"id":"eyJwYXBlcklEIjoiMTcwNC4wNDU0OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1704.04548"},{"id":"eyJwYXBlcklEIjoiMTYwMy4wMjc1MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1603.02752"},{"id":"eyJwYXBlcklEIjoiMTgwNC4wMTIyMSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1804.01221"},{"id":"eyJwYXBlcklEIjoiMjEwNy4wMTUwOSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2107.01509"},{"id":"eyJwYXBlcklEIjoiMTgwOC4wNDUyMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1808.04523"},{"id":"eyJwYXBlcklEIjoiMjEwMy4wMDM2MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2103.00360"},{"id":"eyJwYXBlcklEIjoiMjExMi4wMzQzMiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2112.03432"},{"id":"eyJwYXBlcklEIjoiMjIwMi4wNzg5MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.07890"},{"id":"eyJwYXBlcklEIjoiMjIwMi4xMTY1OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.11659"},{"id":"eyJwYXBlcklEIjoiMjEwMi4wNTIxNCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2102.05214"},{"id":"eyJwYXBlcklEIjoiMjMxMC4xMTQyOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2310.11428"},{"id":"eyJwYXBlcklEIjoiMjEwMy4xMDYyMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2103.10620"},{"id":"eyJwYXBlcklEIjoiMjMwMS4xMTE4NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2301.11187"},{"id":"eyJwYXBlcklEIjoiMjExMC4wNjQxOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2110.06418"},{"id":"eyJwYXBlcklEIjoiMjMwNy4xNDYxOSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2307.14619"},{"id":"eyJwYXBlcklEIjoiMjMwOS4wMDA4MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2309.00082"},{"id":"eyJwYXBlcklEIjoiMjMwMi4xMzkzNCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2302.13934"},{"id":"eyJwYXBlcklEIjoiMjMwNy4wNjQ1NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2307.06457"},{"id":"eyJwYXBlcklEIjoiNTM4NjUiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"53865"},{"id":"eyJwYXBlcklEIjoiNTQzOTIiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"54392"},{"id":"eyJwYXBlcklEIjoiNzIwODMiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"72083"},{"id":"eyJwYXBlcklEIjoiNzE4MjIiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"71822"},{"id":"eyJwYXBlcklEIjoiNzE3NTAiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"71750"}]}]}},{"author":"jordan w suchow","node":{"id":"eyJhZGRyZXNzIjoic3VjaG93QGJlcmtlbGV5LmVkdSJ9","address":"suchow@berkeley.edu","name":"Jordan Suchow","avatar":null,"linkedin":null,"bio":null,"site":null,"override":null,"membership":[],"paper":[{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}}],"github":[{"avatar":"https://avatars2.githubusercontent.com/u/613981?v=4","username":"suchow"}],"scholar":[{"thirdPartyID":"S9xCl8EAAAAJ"}],"twitter":[],"location":[{"formatted":"Berkeley, CA, USA"}],"owner":[{"id":"eyJ1aWQiOiI0ZGU3OGQyMC0yODA1LTRmZWMtYWVmMC1lNjgzM2U5YzI5ZjYifQ==","name":"jordan w suchow","github":[],"email":[],"authored":[{"id":"eyJwYXBlcklEIjoiMTgwOC4wNDUyMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1808.04523"},{"id":"eyJwYXBlcklEIjoiMTgwNS4wNzY0NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1805.07644"},{"id":"eyJwYXBlcklEIjoiMTgwNS4wNzY1MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1805.07653"}]}]}},{"author":"thomas l griffiths","node":{"id":"eyJhZGRyZXNzIjoidG9tX2dyaWZmaXRoc0BiZXJrZWxleS5lZHUifQ==","address":"tom_griffiths@berkeley.edu","name":"Tom Griffiths","avatar":null,"linkedin":null,"bio":null,"site":null,"override":null,"membership":[],"paper":[{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}}],"github":[],"scholar":[{"thirdPartyID":"UAwKvEsAAAAJ"}],"twitter":[],"location":[],"owner":[{"id":"eyJ1aWQiOiIwZmJlYzk4OC05MDYyLTQ3NDItYjkyNS1jY2Q1ZmZlYWJjMmYifQ==","name":"thomas l griffiths","github":[],"email":[],"authored":[{"id":"eyJwYXBlcklEIjoiMTgwMS4wODkzMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1801.08930"},{"id":"eyJwYXBlcklEIjoiMjMwOS4wMjQyNyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2309.02427"},{"id":"eyJwYXBlcklEIjoiMTgwMi4wMTc4MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1802.01780"},{"id":"eyJwYXBlcklEIjoiMjEwNS4wNzE5NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2105.07197"},{"id":"eyJwYXBlcklEIjoiMTYwOC4wMjE2NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1608.02164"},{"id":"eyJwYXBlcklEIjoiMTgwNy4wNDY0MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1807.04640"},{"id":"eyJwYXBlcklEIjoiMjQwMi4wNDEwNSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2402.04105"},{"id":"eyJwYXBlcklEIjoiMTcwNS4wNDQxNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1705.04416"},{"id":"eyJwYXBlcklEIjoiMTgxMi4wNjA4MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1812.06080"},{"id":"eyJwYXBlcklEIjoiMTgwOC4wOTM1MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1808.09352"},{"id":"eyJwYXBlcklEIjoiMjAwMi4wNTc2OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2002.05769"},{"id":"eyJwYXBlcklEIjoiMjMwMi4wMTMwOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2302.01308"},{"id":"eyJwYXBlcklEIjoiMTMwOS42ODU1IiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1309.6855"},{"id":"eyJwYXBlcklEIjoiMjAwNi4xNjMyNCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2006.16324"},{"id":"eyJwYXBlcklEIjoiMjIwNy4wMDc4NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2207.00787"},{"id":"eyJwYXBlcklEIjoiMjAwMi4wMTUxMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2002.01510"},{"id":"eyJwYXBlcklEIjoiMTcxMS4wNDg1NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1711.04855"},{"id":"eyJwYXBlcklEIjoiMjQwNi4wMTg2MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2406.01860"},{"id":"eyJwYXBlcklEIjoiMjMwMS4xMTk5MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2301.11990"},{"id":"eyJwYXBlcklEIjoiMjQwMS4wODY3MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2401.08672"},{"id":"eyJwYXBlcklEIjoiMjMxMS4wMDY4NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2311.00687"},{"id":"eyJwYXBlcklEIjoiMjIwMi4wNDcyOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.04728"},{"id":"eyJwYXBlcklEIjoiMjMxMi4xNDIyNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2312.14226"},{"id":"eyJwYXBlcklEIjoiMjAxMS4xMzc4MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2011.13782"},{"id":"eyJwYXBlcklEIjoiMTgwOC4wNDUyMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1808.04523"},{"id":"eyJwYXBlcklEIjoiMjQwNS4xOTMxMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2405.19313"},{"id":"eyJwYXBlcklEIjoiMjAwNS4xNDM2MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2005.14363"},{"id":"eyJwYXBlcklEIjoiMjQwMS4xNjY0NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2401.16646"},{"id":"eyJwYXBlcklEIjoiMjMxMC4yMDA1OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2310.20059"},{"id":"eyJwYXBlcklEIjoiMjAwNy4xMzg2MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2007.13862"},{"id":"eyJwYXBlcklEIjoiMjQwMS4xNjY1NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2401.16657"},{"id":"eyJwYXBlcklEIjoiMjAwNy4wODcyMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2007.08723"},{"id":"eyJwYXBlcklEIjoiMjAxMi4wOTAzNSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2012.09035"},{"id":"eyJwYXBlcklEIjoiMjMwNS4xODIxMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2305.18213"},{"id":"eyJwYXBlcklEIjoiMjAxMC4wMjM3NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2010.02375"},{"id":"eyJwYXBlcklEIjoiMjAwOS4xNDA1MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2009.14050"},{"id":"eyJwYXBlcklEIjoiMjMxMS4xMDU4MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2311.10580"},{"id":"eyJwYXBlcklEIjoiMjIwNi4wNzg3MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2206.07870"},{"id":"eyJwYXBlcklEIjoiMTgwNS4wNzY0NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1805.07644"},{"id":"eyJwYXBlcklEIjoiMjAwOS4xNDcxNSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2009.14715"},{"id":"eyJwYXBlcklEIjoiMjExMC4wNDMyOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2110.04328"},{"id":"eyJwYXBlcklEIjoiMTkwMi4wNjc0NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1902.06744"},{"id":"eyJwYXBlcklEIjoiMjIwMi4wNDY3MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.04670"},{"id":"eyJwYXBlcklEIjoiMTgwNy4wNzEzNCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1807.07134"},{"id":"eyJwYXBlcklEIjoiMjMxMi4xNDEwNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2312.14106"},{"id":"eyJwYXBlcklEIjoiMTcwNS4wMzI2MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1705.03260"},{"id":"eyJwYXBlcklEIjoiMjIwMi4xMjIyNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.12226"},{"id":"eyJwYXBlcklEIjoiMjMwNS4xNzI2MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2305.17262"},{"id":"eyJwYXBlcklEIjoiMjEwNS4xMTk1MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2105.11950"},{"id":"eyJwYXBlcklEIjoiMjQwMi4xNjY2OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2402.16668"},{"id":"eyJwYXBlcklEIjoiMjQwNi4wMzcwNyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2406.03707"},{"id":"eyJwYXBlcklEIjoiMjIwNy4wOTg0NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2207.09847"},{"id":"eyJwYXBlcklEIjoiMjQwMi4wNzAzNSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2402.07035"},{"id":"eyJwYXBlcklEIjoiMjIwOC4wNjAyOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2208.06028"},{"id":"eyJwYXBlcklEIjoiNTMwOTMiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"53093"},{"id":"eyJwYXBlcklEIjoiNTQwMjIiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"54022"},{"id":"eyJwYXBlcklEIjoiMjQwMi4wNjk5MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2402.06992"},{"id":"eyJwYXBlcklEIjoiNzIxNTgiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"72158"},{"id":"eyJwYXBlcklEIjoiNzMwNzEiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"73071"},{"id":"eyJwYXBlcklEIjoiNzI5MTciLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"72917"},{"id":"eyJwYXBlcklEIjoiMjMxMS4xNDYwMSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2311.14601"},{"id":"eyJwYXBlcklEIjoiMjQwMy4xOTY2OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2403.19669"},{"id":"eyJwYXBlcklEIjoiMjQwNi4wMjI2OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2406.02268"},{"id":"eyJwYXBlcklEIjoiMTgwNS4wNzY0NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1805.07647"},{"id":"eyJwYXBlcklEIjoiMTgwNS4wNzY1MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1805.07653"},{"id":"eyJwYXBlcklEIjoiMTkwNi4wMTk4MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1906.01983"},{"id":"eyJwYXBlcklEIjoiMTcwNS4wNDM1MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1705.04351"},{"id":"eyJwYXBlcklEIjoiMTgwOC4wMjk2MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1808.02961"},{"id":"eyJwYXBlcklEIjoiMjAwNi4wNDE1NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2006.04156"}]}]}}]},"__typename":"paper","authorArray":["Max Simchowitz","Kevin Jamieson","Jordan W. Suchow","Thomas L. Griffiths"]}}],["$","$L18",null,{"container":true,"columns":100,"spacing":{"compact":0,"expanded":2,"large":3},"children":[["$","$L18",null,{"size":{"compact":100,"expanded":100,"large":68},"children":[["$","$7",null,{"children":["$","$L2b",null,{"publisher":"arxiv","paperID":"1808.04523","product":{"paper":"$1b:props:children:props:children:0:props:product","models":"$1b:props:children:props:children:0:props:product:models"},"isWhiteLabelled":false}]}],["$","$7",null,{"children":["$","$L2c",null,{"article":"$L2d","model":"$undefined"}]}]]}],["$","$L18",null,{"size":"grow","children":["$","$L2e",null,{}]}]]}],["$","$7",null,{"children":null}],[["$","audio",null,{"id":"tts"}],["$","$L2f",null,{"paperID":"1808.04523","publisher":"arxiv","paperJSON":{"title":"Adaptive Sampling for Convex Regression","paperID":"1808.04523","avgLineHeight":13.53,"imgScale":4,"sections":[{"heading":"Abstract","paragraphs":[[{"text":"In this paper, we introduce the first principled adaptive-sampling procedure for learning a convex function in the ","element":"span"},{"style":{"height":13.19},"width":59.12,"height":32.98,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/0-0.png","element":"img","alt":" L∞","inline":true,"padRight":true},{"text":"norm, a problem that arises often in the behavioral and social sciences. We present a function-specific measure of complexity and use it to prove that, for each convex function ","element":"span"},{"style":{"height":14},"width":33.51,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/0-1.png","element":"img","alt":" f⋆","inline":true},{"text":", our algorithm nearly attains the information-theoretically optimal, function-specific error rate. We also corroborate our theoretical contributions with numerical experiments, finding that our method substantially outperforms passive, uniform sampling for favorable synthetic and data-derived functions in low-noise settings with large sampling budgets. Our results also suggest an idealized “oracle strategy”, which we use to gauge the potential advance of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"any ","element":"span"},{"text":"adaptive-sampling strategy over passive sampling, for any given convex function.","element":"span"}]]},{"heading":"1 Introduction","paragraphs":[[{"text":"Many functions that model individual economic utility, the output of manufacturing processes, and natural phenomena in the social sciences are either convex or concave. For example, convex functions are used to model utility functions that exhibit ","element":"span"},{"style":{"fontStyle":"italic"},"text":"temporal discounting","element":"span"},{"text":", a classic effect in behavioral economics where people value immediate rewards over delayed rewards ","element":"span"},{"href":"#id-0","referenceIndex":9,"text":"[Frederick et al., ","element":"a"},{"href":"#id-0","referenceIndex":9,"text":"2002, ","element":"a"},{"href":"#id-1","referenceIndex":10,"text":"Green ","element":"a"},{"href":"#id-1","referenceIndex":10,"text":"and Myerson, ","element":"a"},{"href":"#id-1","referenceIndex":10,"text":"2004]","element":"a"},{"text":". To measure such curves, it is common practice to manipulate a variable (e.g., delay) over a fixed, uniformly spaced grid of ","element":"span"},{"style":{"height":16},"width":297.33,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/0-2.png","element":"img","alt":" ≈ design points","inline":true,"padRight":true},{"text":"5 points ","element":"span"},{"href":"#id-2","referenceIndex":8,"text":"[Fisher, ","element":"a"},{"href":"#id-2","referenceIndex":8,"text":"1937]","element":"a"},{"text":", collect many repeated trials of data, and fit a function of assumed parametric form (e.g., exponential or hyperbolic) using maximum likelihood estimation. This approach can be brittle to model mismatch when the true function ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"lies outside the assumed class of functions. Moreover, non-linear parametric families can introduce challenges for constructing faithful and accurate confidence intervals when interpolating the estimator between measured design points.","element":"span"}],[{"text":"Non-parametric convex regression (c.f. ","element":"span"},{"href":"#id-3","referenceIndex":6,"text":"Dümbgen et al. ","element":"a"},{"href":"#id-3","referenceIndex":6,"text":"[2004]","element":"a"},{"text":") corrects for the shortcomings of parametric methods by making no assumptions other than that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"is convex. In additional to faithfully modeling a large class of functions, non-parametric methods can also be employed to construct error bars at any ","element":"span"},{"style":{"height":17.6},"width":177.7,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/0-3.png","element":"img","alt":" x ∈ [0, 1]","inline":true,"padRight":true},{"text":"(see ","element":"span"},{"href":"#id-4","referenceIndex":3,"text":"Cai et al. ","element":"a"},{"href":"#id-4","referenceIndex":3,"text":"[2013]","element":"a"},{"text":"). Unfortunately, even with shape restrictions, non-parametric methods may require prohibitively many samples for practical use.","element":"span"}],[{"text":"In this paper, we propose a more parsimonious approach to non-parametric curve estimation by allowing the design points to be chosen sequentially and adaptively. Formally, we consider the problem of estimating an unknown convex function ","element":"span"},{"style":{"height":17.2},"width":262.21,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/0-4.png","element":"img","alt":" f⋆ : [0, 1] → R","inline":true,"padRight":true},{"text":"with an estimator ","element":"span"},{"style":{"height":16.4},"width":30.18,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/0-5.png","element":"img","alt":"�f","inline":true,"padRight":true},{"text":"which is close ","element":"span"},{"text":"in the ","element":"span"},{"style":{"height":14.62},"width":63.7,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-0.png","element":"img","alt":" L∞","inline":true,"padRight":true},{"text":"metric ","element":"span"},{"style":{"height":17.6},"width":618.69,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-1.png","element":"img","alt":" ∥ �f − f∗∥∞ = supx |f⋆(x) − �f(x)|","inline":true,"padRight":true},{"text":". The estimator is constructed from sequential, noisy evaluations ","element":"span"},{"style":{"height":12},"width":179.68,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-2.png","element":"img","alt":" y1, . . . , yτ","inline":true,"padRight":true},{"text":"from an oracle ","element":"span"},{"style":{"fontStyle":"italic"},"text":"F ","element":"span"},{"text":"at design points ","element":"span"},{"style":{"height":11.2},"width":202.06,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-3.png","element":"img","alt":" x1, . . . , xτ,","inline":true}],[{"style":{"width":"69%"},"width":1306,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-4.png","element":"img"}],[{"text":"and where ","element":"span"},{"style":{"height":10.62},"width":43.24,"height":26.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-5.png","element":"img","alt":" wt","inline":true,"padRight":true},{"text":"represents zero-mean noise. We let ","element":"span"},{"style":{"height":15.02},"width":43.36,"height":37.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-6.png","element":"img","alt":" Ft","inline":true,"padRight":true},{"text":"denote the filtration generated by the design points and measurements ","element":"span"},{"style":{"height":19.01},"width":194.66,"height":47.52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-7.png","element":"img","alt":" (xs, ys)ts=1","inline":true,"padRight":true},{"text":"up to time ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t","element":"span"},{"text":", and assume that the number of samples ","element":"span"},{"style":{"height":8},"width":27,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-8.png","element":"img","alt":" τ","inline":true,"padRight":true},{"text":"is ","element":"span"},{"text":"a stopping time with respect to ","element":"span"},{"style":{"height":17.6},"width":89.41,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-9.png","element":"img","alt":" {Ft}","inline":true},{"text":", where ","element":"span"},{"style":{"height":22.07},"width":146.95,"height":55.18,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-10.png","element":"img","alt":" wt��Ft−1","inline":true,"padRight":true},{"text":"is zero mean, ","element":"span"},{"style":{"height":15.13},"width":43.5,"height":37.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-11.png","element":"img","alt":" σ2","inline":true},{"text":"-subgaussian. We refer to measurement allocation strategies for which ","element":"span"},{"style":{"height":11.82},"width":80.51,"height":29.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-12.png","element":"img","alt":" xt+1","inline":true,"padRight":true},{"text":"does not depend on ","element":"span"},{"style":{"height":19.01},"width":194.65,"height":47.53,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-13.png","element":"img","alt":" (xs, ys)ts=1","inline":true,"padRight":true},{"text":"as ","element":"span"},{"style":{"fontStyle":"italic"},"text":"passive","element":"span"},{"text":", and ","element":"span"},{"text":"adaptive sampling strategies for which ","element":"span"},{"style":{"height":11.82},"width":80.51,"height":29.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-14.png","element":"img","alt":" xt+1","inline":true,"padRight":true},{"text":"may depend on ","element":"span"},{"style":{"height":19.01},"width":385.21,"height":47.53,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-15.png","element":"img","alt":" (xs, ys)ts=1 as active.","inline":true}],[{"text":"Our main contributions are the following:","element":"span"}],[{"style":{"fontStyle":"italic"},"text":"• ","element":"span"},{"text":"Inspired by ","element":"span"},{"href":"#id-4","referenceIndex":3,"text":"Cai et al. ","element":"a"},{"href":"#id-4","referenceIndex":3,"text":"[2013]","element":"a"},{"text":", we introduce the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"local approximation modulus","element":"span"},{"text":", a new measure of local curvature for convex functions, ","element":"span"},{"style":{"height":17.6},"width":184.77,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-16.png","element":"img","alt":" ω(f⋆, x, ϵ)","inline":true},{"text":", and a function-specific complexity measure ","element":"span"},{"style":{"height":23.55},"width":588.54,"height":58.86,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-17.png","element":"img","alt":"Λavg(f⋆, ϵ) ≈� 10 ω(f⋆, x, ϵ)−1dx","inline":true},{"text":", called the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"average ","element":"span"},{"text":"approximation modulus. ","element":"span"},{"style":{"height":18.22},"width":195.89,"height":45.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-18.png","element":"img","alt":" Λavg(f⋆, ϵ)","inline":true,"padRight":true},{"text":"coin- ","element":"span"},{"text":"cides with the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"average ","element":"span"},{"text":"curvature of ","element":"span"},{"style":{"height":16.4},"width":37.36,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-19.png","element":"img","alt":" f⋆","inline":true},{"text":", up to logarithmic factors and endpoint considerations. We prove a function-specific lower bound on the sample complexity of actively estimating any convex function ","element":"span"},{"style":{"height":16.4},"width":309.56,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-20.png","element":"img","alt":" f⋆ to L∞-error ϵ","inline":true,"padRight":true},{"text":"that scales at least as ","element":"span"},{"style":{"height":24.25},"width":362.93,"height":60.62,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-21.png","element":"img","alt":" (1 + σ2ϵ2 ) Λavg(f⋆, ϵ)","inline":true},{"text":", up to logarithmic ","element":"span"},{"text":"factors.","element":"span"}],[{"style":{"fontStyle":"italic"},"text":"• ","element":"span"},{"text":"The packing argument for constructing our lower bound explicitly describes a near-optimal, clairvoyant sampling allocation tailored to ","element":"span"},{"style":{"height":16.4},"width":37.36,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-22.png","element":"img","alt":" f⋆","inline":true},{"text":"; we call this the “oracle allocation” (Proposition ","element":"span"},{"href":"#id-5","text":"3.4)","element":"a"},{"text":". This allocation is instructive as an experimental benchmark when ","element":"span"},{"style":{"height":16.4},"width":233.28,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-23.png","element":"img","alt":" f⋆ is known.","inline":true}],[{"id":"id-17","style":{"width":"0%"},"width":15,"height":4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-24.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"• ","element":"span"},{"text":"We introduce an active sampling procedure and an estimator ","element":"span"},{"style":{"height":16.4},"width":30.18,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-25.png","element":"img","alt":"�f","inline":true,"padRight":true},{"text":"whose sample complexity forany ","element":"span"},{"style":{"height":24.25},"width":805.48,"height":60.62,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-26.png","element":"img","alt":" particular f∗ scales as (1 + σ2ϵ2 ) · Λavg(f⋆, ϵ)","inline":true,"padRight":true},{"text":"up to logarithmic factors, nearly matching our ","element":"span"},{"text":"lower bounds.","element":"span"}],[{"style":{"fontStyle":"italic"},"text":"• ","element":"span"},{"text":"We show that for passive designs (e.g., sampled evenly on a grid), the sample complexity necessarily scales as ","element":"span"},{"style":{"height":24.25},"width":383.36,"height":60.62,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-27.png","element":"img","alt":" (1 + σ2ϵ2 ) · Λmax(f, ϵ)","inline":true},{"text":", where ","element":"span"},{"style":{"height":19.95},"width":632.6,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-28.png","element":"img","alt":" Λmax(f, ϵ) ≈ maxx∈[0,1] ω(f⋆, x, ϵ)","inline":true,"padRight":true},{"text":"coincides ","element":"span"},{"text":"with the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"maximum ","element":"span"},{"text":"curvature. ","element":"span"},{"text":"We compare ","element":"span"},{"style":{"height":18.22},"width":195.89,"height":45.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-29.png","element":"img","alt":" Λavg(f⋆, ϵ)","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":17.6},"width":207.65,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-30.png","element":"img","alt":" Λmax(f⋆, ϵ)","inline":true,"padRight":true},{"text":"for many natural classes of functions, including quadratic functions, exponential curves, and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"text":"-piecewise linear functions. For ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"text":"-piecewise linear functions, the ","element":"span"},{"style":{"height":14.62},"width":63.7,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-31.png","element":"img","alt":" L∞","inline":true,"padRight":true},{"text":"error of our active algorithm scales no slower than ","element":"span"},{"style":{"height":19.93},"width":226.34,"height":49.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-32.png","element":"img","alt":" kn−1/2 log n","inline":true},{"text":", whereas passive designs scale no faster than ","element":"span"},{"style":{"height":16.33},"width":243.82,"height":40.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/1-33.png","element":"img","alt":" n−1/3 after n","inline":true,"padRight":true},{"text":"evaluations (see Remark ","element":"span"},{"href":"#id-6","text":"3.2)","element":"a"},{"text":".","element":"span"}],[{"text":"Finally, we validate our theoretical claims with an empirical study using both synthetic functions and those derived from real data. We observe that in low-noise settings or when the sampling budget is large, active sampling can substantially outperform passive uniform sampling. Moreover, our algorithm constitutes the first theoretically justified algorithm (passive or active) that guarantees uniform accuracy, even at the boundaries of the interval ","element":"span"},{"href":"#id-4","referenceIndex":3,"text":"[Cai et al., ","element":"a"},{"href":"#id-4","referenceIndex":3,"text":"2013, ","element":"a"},{"href":"#id-3","referenceIndex":6,"text":"Dümbgen et al., ","element":"a"},{"href":"#id-3","referenceIndex":6,"text":"2004]","element":"a"},{"text":". Even so, comparing the performance of our active algorithm to the oracle sampling strategy suggests room for modest but non-negligible improvements.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"1.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Related Work","element":"span"}],[{"href":"#id-7","referenceIndex":4,"text":"Castro et al. ","element":"a"},{"href":"#id-7","referenceIndex":4,"text":"[2005] ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-8","referenceIndex":13,"text":"Korostelev ","element":"a"},{"href":"#id-8","referenceIndex":13,"text":"[1999] ","element":"a"},{"text":"studied the minimax rates of active non-parametric regression, showing that active and passive learning attain the same minimax rates of convergence for Holder smooth classes, but that active learning achieves faster rates when the function is known to be well approximated by a piecewise-constant function.","element":"span"}],[{"text":"Prior literature on convex and concave regression consider the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"passive design ","element":"span"},{"text":"case, where the design points do not depend on measurements. Typically, the design points are chosen to be uniformly spaced on the unit interval, that is, ","element":"span"},{"style":{"height":21.29},"width":168.57,"height":53.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-0.png","element":"img","alt":" xi = 1n−1","inline":true,"padRight":true},{"text":"for ","element":"span"},{"style":{"height":15.2},"width":298.23,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-1.png","element":"img","alt":" i = 0, . . . , n − 1","inline":true,"padRight":true},{"href":"#id-3","referenceIndex":6,"text":"[Dümbgen et al., ","element":"a"},{"href":"#id-3","referenceIndex":6,"text":"2004]","element":"a"},{"text":". If ","element":"span"},{"style":{"fontStyle":"italic"},"text":"F ","element":"span"},{"text":"is ","element":"span"},{"text":"the set of Lipschitz, convex functions, then the ","element":"span"},{"style":{"height":20.59},"width":981.99,"height":51.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-2.png","element":"img","alt":" L∞-norm ∥ �fLS − f⋆∥L∞ = supx∈[0,1] |f⋆(x) − �fLS(x)|","inline":true,"padRight":true},{"text":"of the least squares estimator ","element":"span"},{"style":{"height":16.4},"width":61.54,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-3.png","element":"img","alt":"�fLS","inline":true,"padRight":true},{"text":"decreases like ","element":"span"},{"style":{"height":20.33},"width":250.4,"height":50.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-4.png","element":"img","alt":" (log(n)/n)1/3","inline":true},{"text":", whereas if the convex function has Lipschitz gradients, the rate improves to ","element":"span"},{"style":{"height":20.33},"width":249.32,"height":50.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-5.png","element":"img","alt":" (log(n)/n)2/5 ","inline":true,"padRight":true},{"href":"#id-3","referenceIndex":6,"text":"[Dümbgen et al., ","element":"a"},{"href":"#id-3","referenceIndex":6,"text":"2004]","element":"a"},{"text":".","element":"span"}],[{"text":"Recent work by ","element":"span"},{"href":"#id-9","referenceIndex":11,"text":"Guntuboyina and Sen ","element":"a"},{"href":"#id-9","referenceIndex":11,"text":"[2015] ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-10","referenceIndex":5,"text":"Chatterjee ","element":"a"},{"href":"#id-10","referenceIndex":5,"text":"[2016] ","element":"a"},{"text":"has aimed at developing sharp errors bounds on the squared ","element":"span"},{"style":{"height":14.62},"width":46.7,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-6.png","element":"img","alt":" L2","inline":true},{"text":"-norm ","element":"span"},{"style":{"height":24.06},"width":738.34,"height":60.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-7.png","element":"img","alt":" ∥ �f − f⋆∥2L2 :=�x∈[0,1] | �f(x) − f⋆(x)|2dx","inline":true,"padRight":true},{"text":"of the least squares ","element":"span"},{"text":"estimator, when samples are uniformly spaced on a grid. They show that even with this uniform allocation, the error ","element":"span"},{"style":{"height":21.68},"width":339.08,"height":54.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-8.png","element":"img","alt":" ∥ �f − f⋆∥2L2 adapts","inline":true,"padRight":true},{"text":"to the true regression functions ","element":"span"},{"style":{"height":16.4},"width":37.37,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-9.png","element":"img","alt":" f⋆","inline":true},{"text":". For example, ","element":"span"},{"href":"#id-10","referenceIndex":5,"text":"Chatterjee ","element":"a"},{"href":"#id-10","referenceIndex":5,"text":"[2016] ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-11","referenceIndex":1,"text":"Bellec et al. ","element":"a"},{"href":"#id-11","referenceIndex":1,"text":"[2018] ","element":"a"},{"text":"show that if ","element":"span"},{"style":{"height":16.4},"width":37.36,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-10.png","element":"img","alt":" f⋆","inline":true,"padRight":true},{"text":"is a ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"text":"-piecewise linear function, then ","element":"span"},{"style":{"height":16.4},"width":61.53,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-11.png","element":"img","alt":"�fLS","inline":true,"padRight":true},{"text":"obtains the parametric error rate of ","element":"span"},{"style":{"height":21.68},"width":419.17,"height":54.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-12.png","element":"img","alt":" ∥ �fLS − f⋆∥2L2 ≤ Ck/n","inline":true},{"text":". In a similar vein, ","element":"span"},{"href":"#id-4","referenceIndex":3,"text":"Cai et al. ","element":"a"},{"href":"#id-4","referenceIndex":3,"text":"[2013] ","element":"a"},{"text":"proves sharp ","element":"span"},{"text":"confidence intervals for ","element":"span"},{"style":{"height":17.6},"width":118.14,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-13.png","element":"img","alt":" f⋆(x0)","inline":true,"padRight":true},{"text":"for a fixed point ","element":"span"},{"style":{"height":17.6},"width":206.17,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-14.png","element":"img","alt":" x0 ∈ (0, 1).","inline":true}],[{"text":"Our work draws heavily upon ","element":"span"},{"href":"#id-4","referenceIndex":3,"text":"Cai et al. ","element":"a"},{"href":"#id-4","referenceIndex":3,"text":"[2013] ","element":"a"},{"text":"(who in turn build on ","element":"span"},{"href":"#id-12","referenceIndex":7,"text":"Dümbgen et al. ","element":"a"},{"href":"#id-12","referenceIndex":7,"text":"[2003]","element":"a"},{"text":"), whose aim was to characterize the function-specific sample complexity of estimating a convex ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"at a given point in the interior of ","element":"span"},{"text":"[0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1]","element":"span"},{"text":", from uniform measurements. We extend these tools to characterize the complexity of estimating ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"with uniform accuracy over the interval ","element":"span"},{"text":"[0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1]","element":"span"},{"text":", from measurements which may be chosen in an adaptive, function-dependent manner. We are thus able to obtain exceptionally granular, instance-specific results similar to those in the multi-arm bandit literature ","element":"span"},{"href":"#id-13","referenceIndex":12,"text":"[Kaufmann et al., ","element":"a"},{"href":"#id-13","referenceIndex":12,"text":"2016]","element":"a"},{"text":", and in recent work studying the local minimax sample complexity of convex optimization ","element":"span"},{"href":"#id-14","referenceIndex":14,"text":"[Zhu et al., ","element":"a"},{"href":"#id-14","referenceIndex":14,"text":"2016]","element":"a"},{"text":".","element":"span"}]]},{"heading":"2 Eﬃciently Learning a Convex Function","paragraphs":[[{"text":"We begin by establishing preliminary notation. The class of convex functions is denoted as ","element":"span"},{"style":{"height":15.02},"width":158.69,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-15.png","element":"img","alt":" Fconv :=","inline":true},{"style":{"height":17.6},"width":1520.87,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-16.png","element":"img","alt":"{f : [0, 1] → R|f((1 − λ)x + λy) ≤ (1 − λ)f(x) + λf(y), ∀x, y, λ ∈ [0, 1]}","inline":true},{"text":". ","element":"span"},{"text":"For an interval ","element":"span"},{"style":{"height":17.6},"width":318.58,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-17.png","element":"img","alt":"I = [a, b] ⊆ [0, 1]","inline":true},{"text":", define the left-, middle- and right-endpoints as ","element":"span"},{"style":{"height":22.42},"width":600.99,"height":56.06,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-18.png","element":"img","alt":" xl(I) = a, xm(I) = a+b2 , xr(I) = b","inline":true},{"text":". ","element":"span"},{"text":"We define the secant approximation of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"on an interval ","element":"span"},{"style":{"height":17.6},"width":221.22,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-19.png","element":"img","alt":" I ⊂ [0, 1] as","inline":true}],[{"style":{"width":"81%"},"width":1519,"height":101,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-20.png","element":"img"}],[{"text":"and note that for a convex function, this approximation never underestimates ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f","element":"span"},{"text":"; that is, one has ","element":"span"},{"style":{"height":17.2},"width":353.56,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-21.png","element":"img","alt":"Sec[f, I](x) ≥ f(x)","inline":true},{"text":". We denote the error of the second approximation to ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"on ","element":"span"},{"style":{"fontStyle":"italic"},"text":"I ","element":"span"},{"text":"at the midpoint ","element":"span"},{"style":{"height":10.62},"width":54.94,"height":26.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-22.png","element":"img","alt":" xm","inline":true,"padRight":true},{"text":"as","element":"span"}],[{"id":"id-15","style":{"width":"72%"},"width":1351,"height":97,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-23.png","element":"img"}],[{"text":"In addition, we overload notation so that for any ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x, t ","element":"span"},{"text":"such that ","element":"span"},{"style":{"height":17.6},"width":228.86,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-24.png","element":"img","alt":" x ∈ [t, 1 − t]","inline":true},{"text":", we have ","element":"span"},{"style":{"height":17.6},"width":232.45,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-25.png","element":"img","alt":" ∆(f, x, t) :=","inline":true},{"style":{"height":17.6},"width":357.02,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-26.png","element":"img","alt":"∆(f, [x − t, x + t]).","inline":true,"padRight":true},{"text":"We now state a remarkable fact about convex functions that is at the core of our analysis.","element":"span"}],[{"style":{"width":"76%"},"width":1438,"height":65,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/2-27.png","element":"img"}],[{"text":"Lemma ","element":"span"},{"href":"#id-15","text":"2.1 ","element":"a"},{"text":"is a special case of a more general lemma stated in Section ","element":"span"},{"text":"6 ","element":"span"},{"text":"that upper bounds the supremum of the secant approximation error by a constant using only a single point within the interval. Convexity is critical to the proof of this lemma and such a property does not hold, for instance, on merely monotonic functions. We remark that the first inequality is trivial, and the ","element":"span"},{"text":"second inequality is tight in the sense that it is achieved by ","element":"span"},{"style":{"height":17.2},"width":293.48,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-0.png","element":"img","alt":" f(x) = (1 − x)p ","inline":true,"padRight":true},{"text":"on interval ","element":"span"},{"style":{"fontStyle":"italic"},"text":"I ","element":"span"},{"text":"= [0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1] ","element":"span"},{"text":"as ","element":"span"},{"style":{"height":12.8},"width":145.46,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-1.png","element":"img","alt":"p → ∞.","inline":true}],[{"text":"The above observations motivate our strategy of approximating ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"with secant approximations on disjoint intervals whose union is ","element":"span"},{"text":"[0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1]","element":"span"},{"text":". The next definition relates the secant approximation error to the required sampling density.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Definition 1 (Local Approximation Modulus) ","element":"span"},{"style":{"fontStyle":"italic"},"text":"We define the ","element":"span"},{"style":{"height":8},"width":18,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-2.png","element":"img","alt":" ϵ","inline":true},{"style":{"fontStyle":"italic"},"text":"-approximation modulus of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"style":{"fontStyle":"italic"},"text":"at a point ","element":"span"},{"style":{"height":17.2},"width":164.3,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-3.png","element":"img","alt":" x ∈ [0, 1]","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"as the least ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t ","element":"span"},{"style":{"fontStyle":"italic"},"text":"such that the midpoint secant approximation to ","element":"span"},{"style":{"height":17.2},"width":409.36,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-4.png","element":"img","alt":" f on [x − t, x + t] has","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"bias ","element":"span"},{"style":{"height":8.4},"width":31.72,"height":21,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-5.png","element":"img","alt":" ϵ:","inline":true}],[{"style":{"width":"78%"},"width":1470,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-6.png","element":"img"}],[{"text":"Note that ","element":"span"},{"style":{"height":17.6},"width":563.47,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-7.png","element":"img","alt":" ω(f, x, ϵ) > 0 for all x ∈ (0, 1)","inline":true},{"text":", because convex functions are continuous on their domain. Intuitively, ","element":"span"},{"style":{"height":17.2},"width":167.44,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-8.png","element":"img","alt":" ω(f, x, ϵ)","inline":true,"padRight":true},{"text":"is the scale at which ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"“looks” linear around some ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x","element":"span"},{"text":", up to a tolerance ","element":"span"},{"style":{"height":16},"width":149.67,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-9.png","element":"img","alt":" ϵ. Away","inline":true,"padRight":true},{"text":"from the endpoints ","element":"span"},{"style":{"fontStyle":"italic"},"text":"{","element":"span"},{"text":"0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1","element":"span"},{"style":{"fontStyle":"italic"},"text":"}","element":"span"},{"text":", smaller values of ","element":"span"},{"style":{"height":17.6},"width":168.12,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-10.png","element":"img","alt":" ω(f, x, ϵ)","inline":true,"padRight":true},{"text":"correspond to larger complexities, because they imply that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"can only be approximated by a linear function on a small interval. But if ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x ","element":"span"},{"text":"is the near ","element":"span"},{"style":{"height":17.6},"width":628.09,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-11.png","element":"img","alt":" {0, 1}, ω(f, x, ϵ) ≤ min{x, 1 − x}","inline":true,"padRight":true},{"text":"will take small values, potentially overestimating the local complexity. We remedy this issue by defining the following left- and right-approximation points:","element":"span"}],[{"style":{"width":"46%"},"width":866,"height":114,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-12.png","element":"img"}],[{"text":"Within ","element":"span"},{"style":{"height":18.44},"width":421.14,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-13.png","element":"img","alt":" [tleft(f, ϵ), 1 − tright(ϵ)]","inline":true},{"text":", we will show that the midpoint errors ","element":"span"},{"style":{"height":17.6},"width":174.52,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-14.png","element":"img","alt":" ∆(f, x, t)","inline":true,"padRight":true},{"text":"concisely describe how densely one would need to sample ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"in the neighborhood of ","element":"span"},{"style":{"height":17.2},"width":164.86,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-15.png","element":"img","alt":" x ∈ [0, 1]","inline":true,"padRight":true},{"text":"in order to estimate it to the desired accuracy ","element":"span"},{"style":{"height":8},"width":18,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-16.png","element":"img","alt":" ϵ","inline":true,"padRight":true},{"text":"in the ","element":"span"},{"style":{"height":14.62},"width":63.7,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-17.png","element":"img","alt":" L∞","inline":true},{"text":"-norm. Moreover, we show that it suffices to sample at constant number of design points on the end-intervals ","element":"span"},{"style":{"height":17.6},"width":226.16,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-18.png","element":"img","alt":" [0, tleft(f, ϵ)]","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":18.44},"width":280.91,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-19.png","element":"img","alt":" [1 − tright(ϵ), 1]","inline":true},{"text":". At a high level, the main finding of this paper is as follows:","element":"span"}],[{"style":{"fontStyle":"italic"},"text":"The sample complexity of learning a particular convex function ","element":"span"},{"style":{"height":16.4},"width":182.68,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-20.png","element":"img","alt":" f⋆ up to ϵ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"accuracy in ","element":"span"},{"style":{"height":14.62},"width":158.57,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-21.png","element":"img","alt":" L∞ with","inline":true,"padRight":true},{"style":{"fontStyle":"italic","fontWeight":"bold"},"text":"passive sampling ","element":"span"},{"style":{"fontStyle":"italic"},"text":"is parametrized by the ","element":"span"},{"style":{"fontStyle":"italic","fontWeight":"bold"},"text":"worst-case ","element":"span"},{"style":{"fontStyle":"italic"},"text":"approximation modulus","element":"span"}],[{"style":{"width":"75%"},"width":1412,"height":90,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-22.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"In contrast, the sample complexity of ","element":"span"},{"style":{"fontStyle":"italic","fontWeight":"bold"},"text":"active sampling ","element":"span"},{"style":{"fontStyle":"italic"},"text":"algorithms is parametrized by the ","element":"span"},{"style":{"fontStyle":"italic","fontWeight":"bold"},"text":"average ","element":"span"},{"style":{"fontStyle":"italic"},"text":"approximation modulus","element":"span"}],[{"style":{"width":"70%"},"width":1324,"height":119,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-23.png","element":"img"}],[{"text":"We emphasize that the algorithm presented in in this work guarantees accuracy on the whole interval ","element":"span"},{"text":"[0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1]","element":"span"},{"text":", whereas many passive algorithms pointwise and ","element":"span"},{"style":{"height":14.62},"width":63.7,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-24.png","element":"img","alt":" L∞","inline":true,"padRight":true},{"text":"risk bounds ","element":"span"},{"href":"#id-4","referenceIndex":3,"text":"[Cai et al., ","element":"a"},{"href":"#id-4","referenceIndex":3,"text":"2013, ","element":"a"},{"href":"#id-3","referenceIndex":6,"text":"Dümbgen ","element":"a"},{"href":"#id-3","referenceIndex":6,"text":"et al., ","element":"a"},{"href":"#id-3","referenceIndex":6,"text":"2004] ","element":"a"},{"text":"only guarantee accuracy on a strictly smaller sub-interval.","element":"span"}],[{"id":"id-30","style":{"fontWeight":"bold"},"text":"2.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Examples","element":"span"}],[{"text":"Explicit parameterizations of ","element":"span"},{"style":{"height":16.4},"width":37.36,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-25.png","element":"img","alt":" f⋆","inline":true,"padRight":true},{"text":"provide intuition for when active sampling is advantageous. In this section, we describe different scalings of ","element":"span"},{"style":{"height":17.42},"width":274.09,"height":43.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-26.png","element":"img","alt":" Λmax and Λavg","inline":true,"padRight":true},{"text":"for various ","element":"span"},{"style":{"height":16.4},"width":37.36,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/3-27.png","element":"img","alt":" f⋆","inline":true},{"text":"; later, in Remark ","element":"span"},{"href":"#id-6","text":"3.2, ","element":"a"},{"text":"we explain how these scalings can imply substantial differences in sample complexity.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"2.1.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Piecewise Linear Functions","element":"span"}],[{"text":"Let ","element":"span"},{"style":{"height":16.4},"width":37.36,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-0.png","element":"img","alt":" f⋆","inline":true,"padRight":true},{"text":"be a Lipschitz, piecewise linear convex function with a constant number of pieces. It follows that ","element":"span"},{"style":{"height":24.72},"width":565.99,"height":61.8,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-1.png","element":"img","alt":" ω(f⋆, x, ϵ)−1 ≈ min{1ϵ, 1d(x,f⋆)}","inline":true,"padRight":true},{"text":"where ","element":"span"},{"style":{"height":17.6},"width":141.44,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-2.png","element":"img","alt":" d(x, f⋆)","inline":true,"padRight":true},{"text":"is the distance to the closest knot adjoining any ","element":"span"},{"text":"two linear pieces of ","element":"span"},{"style":{"height":16.4},"width":37.36,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-3.png","element":"img","alt":" f⋆","inline":true},{"text":". It follows that ","element":"span"},{"style":{"height":19.75},"width":889.91,"height":49.38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-4.png","element":"img","alt":" Λmax(f, ϵ) ≈ ϵ−1 whereas Λavg(f, ϵ) ≈ log(1/ϵ).","inline":true}],[{"style":{"fontWeight":"bold"},"text":"2.1.2 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Bounded third-derivative:","element":"span"}],[{"text":"Suppose ","element":"span"},{"style":{"height":20.59},"width":395.7,"height":51.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-5.png","element":"img","alt":" supx∈[0,1] f′′′⋆ (x) < ∞","inline":true},{"text":". We may apply a Taylor series to find ","element":"span"},{"style":{"height":20.8},"width":499.02,"height":52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-6.png","element":"img","alt":" ω(f⋆, x, ϵ)−1 =�f′′⋆ (x)/2ϵ","inline":true,"padRight":true},{"text":"as ","element":"span"},{"style":{"height":12},"width":107.59,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-7.png","element":"img","alt":"ϵ → 0","inline":true},{"text":", which makes an explicit connection between the curvature of the function and the differences between ","element":"span"},{"style":{"height":17.82},"width":493.69,"height":44.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-8.png","element":"img","alt":" Λavg(f⋆, ϵ) and Λmax(f⋆, ϵ)","inline":true},{"text":". This suggests that if the function has areas of high but localized curvature such as ","element":"span"},{"style":{"height":17.77},"width":307.62,"height":44.43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-9.png","element":"img","alt":" f⋆(x) = 1 − √x","inline":true,"padRight":true},{"text":"or ","element":"span"},{"style":{"height":21.29},"width":750.06,"height":53.23,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-10.png","element":"img","alt":" f⋆(x) = 1100 log(1 + exp(−100(x − 12)))","inline":true,"padRight":true},{"text":"then the difference ","element":"span"},{"text":"between ","element":"span"},{"style":{"height":18.22},"width":500.46,"height":45.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-11.png","element":"img","alt":" Λavg(f⋆, ϵ) and Λmax(f⋆, ϵ)","inline":true,"padRight":true},{"text":"can be as vast as ","element":"span"},{"style":{"height":17.6},"width":370.13,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-12.png","element":"img","alt":" log(1/ϵ) versus 1/ϵ.","inline":true}],[{"style":{"fontWeight":"bold"},"text":"2.1.3 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Quadratic Functions:","element":"span"}],[{"text":"Let ","element":"span"},{"style":{"height":21.29},"width":462.74,"height":53.23,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-13.png","element":"img","alt":" f⋆(x) = 12ax2 + bx + c","inline":true,"padRight":true},{"text":"for some real coefficients ","element":"span"},{"style":{"fontStyle":"italic"},"text":"a, b, c","element":"span"},{"text":". ","element":"span"},{"text":"Ignoring the effect of endpoints, ","element":"span"},{"style":{"height":21},"width":390.52,"height":52.5,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-14.png","element":"img","alt":"ω(f⋆, x, ϵ)−1 = � a2ϵ","inline":true,"padRight":true},{"text":"for all ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x ","element":"span"},{"text":"due to the function having constant curvature, so ","element":"span"},{"style":{"height":18.22},"width":252.33,"height":45.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-15.png","element":"img","alt":" Λavg(f⋆, ϵ) =","inline":true},{"style":{"height":17.6},"width":218.68,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-16.png","element":"img","alt":"Λmax(f⋆, ϵ).","inline":true}]]},{"heading":"3 Main Results","paragraphs":[[{"text":"In this section, we state a formal upper bound obtained by Algorithm ","element":"span"},{"href":"#id-16","text":"2, ","element":"a"},{"text":"described in Section ","element":"span"},{"text":"4. ","element":"span"},{"text":"Algorithm ","element":"span"},{"href":"#id-16","text":"2 ","element":"a"},{"text":"takes in a confidence parameter ","element":"span"},{"style":{"height":17.2},"width":170.17,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-17.png","element":"img","alt":" δ ∈ (0, 1)","inline":true},{"text":", as well as a second parameter ","element":"span"},{"style":{"height":16.4},"width":296.67,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-18.png","element":"img","alt":" β > 0 governing","inline":true,"padRight":true},{"text":"the degree to which the active sampling algorithm is ‘aggressive’; from simulations, we recommend setting ","element":"span"},{"style":{"height":17.6},"width":161.95,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-19.png","element":"img","alt":" β = 1/2","inline":true},{"text":". Lastly, at each round, Algorithm ","element":"span"},{"href":"#id-16","text":"2 ","element":"a"},{"text":"maintains an estimator ","element":"span"},{"style":{"height":16.4},"width":198.15,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-20.png","element":"img","alt":"�ft ∈ Fconv","inline":true},{"text":", whose performance is characterized by the following theorem:","element":"span"}],[{"id":"id-25","style":{"fontWeight":"bold"},"text":"Theorem 3.1 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"C > ","element":"span"},{"text":"0 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"be a universal constant, and for ","element":"span"},{"style":{"height":17.6},"width":775.4,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-21.png","element":"img","alt":" f⋆ ∈ Fconv, δ ∈ (0, 1/2) and β > 0, define","inline":true}],[{"style":{"width":"94%"},"width":1776,"height":196,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-22.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Then, if Algorithm ","element":"span"},{"href":"#id-16","style":{"fontStyle":"italic"},"text":"2 ","element":"a"},{"style":{"fontStyle":"italic"},"text":"is run with parameters ","element":"span"},{"style":{"height":12.8},"width":20,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-23.png","element":"img","alt":" δ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and ","element":"span"},{"style":{"height":16.4},"width":26,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-24.png","element":"img","alt":" β","inline":true},{"style":{"fontStyle":"italic"},"text":", with access to an oracle ","element":"span"},{"href":"#id-17","text":"(1)","element":"a"},{"style":{"fontStyle":"italic"},"text":", the estimators ","element":"span"},{"style":{"height":16.4},"width":188.11,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-25.png","element":"img","alt":"�ft ∈ Fconv","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and confidence estimates ","element":"span"},{"style":{"height":10.22},"width":29.71,"height":25.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-26.png","element":"img","alt":" ϵt","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"defined in Section ","element":"span"},{"href":"#id-18","style":{"fontStyle":"italic"},"text":"4.2 ","element":"a"},{"style":{"fontStyle":"italic"},"text":"satisfy the following any-time guarantee:","element":"span"}],[{"style":{"width":"66%"},"width":1251,"height":80,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-27.png","element":"img"}],[{"text":"In the case of the default parameter setting ","element":"span"},{"style":{"height":17.6},"width":152.86,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-28.png","element":"img","alt":" β = 1/2","inline":true},{"text":", we find that, for a possibly larger universal constant ","element":"span"},{"style":{"height":15.6},"width":57.49,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-29.png","element":"img","alt":" C′,","inline":true}],[{"style":{"width":"83%"},"width":1570,"height":109,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-30.png","element":"img"}],[{"text":"Up to constants and logarithmic factors, the sample complexity is dominated by the term ","element":"span"},{"style":{"height":18.22},"width":253.56,"height":45.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-31.png","element":"img","alt":" Λavg(f, ϵ/10)·","inline":true},{"style":{"height":32},"width":407.44,"height":80,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-32.png","element":"img","alt":"max�1, σ2ϵ2�. Here σ2ϵ2","inline":true,"padRight":true},{"text":"corresponds to the standard rate for estimating a scalar. The dependence on ","element":"span"},{"style":{"height":18.44},"width":219.81,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/4-33.png","element":"img","alt":"Λavg(f, cβϵ)","inline":true,"padRight":true},{"text":"captures the number of points required to estimate ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"with a discretized proxy.","element":"span"}],[{"text":"To better understand why ","element":"span"},{"style":{"height":17.82},"width":81.77,"height":44.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-0.png","element":"img","alt":" Λavg","inline":true,"padRight":true},{"text":"is the appropriate quantity to consider, we now introduce a construction of local packings of ","element":"span"},{"style":{"height":15.02},"width":99.22,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-1.png","element":"img","alt":" Fconv","inline":true},{"text":", centered at a given ","element":"span"},{"style":{"height":16.4},"width":178.58,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-2.png","element":"img","alt":" f ∈ Fconv","inline":true},{"text":". Recall that ","element":"span"},{"style":{"height":17.6},"width":136.5,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-3.png","element":"img","alt":" ∆(f, I)","inline":true,"padRight":true},{"text":"denotes the error of the secant approximation to ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"on the midpoint ","element":"span"},{"style":{"height":14.75},"width":98.25,"height":36.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-4.png","element":"img","alt":" xm(I)","inline":true,"padRight":true},{"text":"of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"I","element":"span"},{"text":", constructed using the endpoints ","element":"span"},{"style":{"height":14.75},"width":184.85,"height":36.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-5.png","element":"img","alt":" xl(I), xr(I)","inline":true},{"text":". We note that if any algorithm, even an active one, does not measure ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"on an interval ","element":"span"},{"style":{"height":17.2},"width":434.9,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-6.png","element":"img","alt":" I for which ∆(f, I) ≥ ϵ","inline":true},{"text":", then one cannot distinguish between ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"and the alternative function ","element":"span"},{"style":{"height":20.61},"width":786.27,"height":51.52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-7.png","element":"img","alt":"˜fI := f(x) + I(x ∈ I)(Sec[f, I](x) − f(x))","inline":true},{"text":". Thus, a key step to showing that ","element":"span"},{"style":{"height":17.6},"width":125.25,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-8.png","element":"img","alt":" Λ(f, ϵ)","inline":true,"padRight":true},{"text":"approximately lower bounds the number of evaluations is to show that it approximately lower bounds the number of intervals ","element":"span"},{"style":{"height":17.2},"width":436.14,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-9.png","element":"img","alt":" I for which ∆(f, I) ≥ ϵ","inline":true},{"text":". This is achieved in the following theorem proved in Section ","element":"span"},{"href":"#id-19","text":"5.3:","element":"a"}],[{"style":{"height":17.6},"width":776.13,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-10.png","element":"img","alt":"Theorem 3.2 (Packing) Let f ∈ Fconv","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be a convex function, ","element":"span"},{"style":{"height":12.4},"width":97.89,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-11.png","element":"img","alt":" ϵ > 0","inline":true},{"style":{"fontStyle":"italic"},"text":", and define","element":"span"}],[{"id":"id-21","style":{"width":"76%"},"width":1436,"height":104,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-12.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"where ","element":"span"},{"style":{"height":21},"width":1078.28,"height":52.5,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-13.png","element":"img","alt":" ωmax(f, ϵ) := maxx∈[tleft(f,ϵ),1−tright(f,ϵ)] ω(f, x, ϵ) and ωmin","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is defined analogously. Then, there is an ","element":"span"},{"style":{"height":18.44},"width":376.19,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-14.png","element":"img","alt":" Npck(f, ϵ) ≥ N(f, ϵ)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"such that the points ","element":"span"},{"style":{"height":24.93},"width":365.62,"height":62.34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-15.png","element":"img","alt":" {zi}Npck(f,ϵ)i=1 ⊂ [0, 1]","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"such that the intervals","element":"span"}],[{"style":{"width":"52%"},"width":985,"height":47,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-16.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"have disjoint interiors, are contained in ","element":"span"},{"style":{"height":18.04},"width":487.06,"height":45.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-17.png","element":"img","alt":" [2tleft(f, ϵ), 1−2tright(f, ϵ)]","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and satisfy ","element":"span"},{"style":{"height":17.69},"width":225.72,"height":44.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-18.png","element":"img","alt":" ∆(f, Iϵi ) = ϵ","inline":true},{"style":{"fontStyle":"italic"},"text":". Moreover, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"the interval endpoints overlap so that ","element":"span"},{"style":{"height":17.39},"width":303.78,"height":43.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-19.png","element":"img","alt":" xℓ(Iϵi+1) = xr(Iϵi ).","inline":true}],[{"text":"Note that ","element":"span"},{"style":{"height":18.04},"width":183.18,"height":45.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-20.png","element":"img","alt":" Npck(f, ϵ)","inline":true,"padRight":true},{"text":"corresponds to the actual size of the explict packing, and ","element":"span"},{"style":{"height":17.2},"width":134.19,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-21.png","element":"img","alt":" N(f, ϵ)","inline":true,"padRight":true},{"text":"is a computable lower bound on ","element":"span"},{"style":{"height":18.44},"width":183.52,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-22.png","element":"img","alt":" Npck(f, ϵ)","inline":true},{"text":". We now consider the class ","element":"span"},{"style":{"height":17.6},"width":280.6,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-23.png","element":"img","alt":" G(f, ϵ) ⊂ Fconv","inline":true,"padRight":true},{"text":"of alternative functions","element":"span"}],[{"id":"id-22","style":{"width":"88%"},"width":1666,"height":138,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-24.png","element":"img"}],[{"text":"We observe that ","element":"span"},{"style":{"height":17.64},"width":305.73,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-25.png","element":"img","alt":" f ∈ Gf,ϵ ⊂ Fconv","inline":true},{"text":", and by definition, if ","element":"span"},{"style":{"height":17.64},"width":221.46,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-26.png","element":"img","alt":" g1, g2 ∈ Gf,ϵ","inline":true,"padRight":true},{"text":"are distinct, then ","element":"span"},{"style":{"height":17.6},"width":294.5,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-27.png","element":"img","alt":" ∥g1 − g2∥∞ ≥ ϵ.","inline":true,"padRight":true},{"text":"In particular, given any set of points ","element":"span"},{"style":{"height":18.44},"width":631.06,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-28.png","element":"img","alt":" {xi}ni=1 ⊂ [0, 1] for n < Npck(f, ϵ)","inline":true},{"text":", then there exist two convex ","element":"span"},{"text":"functions ","element":"span"},{"style":{"height":12},"width":96.95,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-29.png","element":"img","alt":" g1, g2","inline":true,"padRight":true},{"text":"in ","element":"span"},{"style":{"height":17.64},"width":69.25,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-30.png","element":"img","alt":" Gf,ϵ","inline":true},{"text":", such that ","element":"span"},{"style":{"height":17.6},"width":286.81,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-31.png","element":"img","alt":" g1(xi) = g2(xi)","inline":true,"padRight":true},{"text":"for all ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i ","element":"span"},{"text":"and ","element":"span"},{"style":{"height":17.6},"width":292.15,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-32.png","element":"img","alt":" ∥g1 − g2∥∞ ≥ ϵ","inline":true},{"text":". In Section ","element":"span"},{"href":"#id-20","text":"5.2, ","element":"a"},{"text":"we formalize this argument to yield the following theorem:","element":"span"}],[{"id":"id-24","style":{"height":17.6},"width":1335.64,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-33.png","element":"img","alt":"Theorem 3.3 Fix an f⋆ ∈ Fconv, ϵ > 0, and δ ∈ (0, 1/3). Let N(f⋆, ϵ)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be as in Lemma ","element":"span"},{"href":"#id-21","style":{"fontStyle":"italic"},"text":"3.2, ","element":"a"},{"style":{"fontStyle":"italic"},"text":"and let and ","element":"span"},{"style":{"height":17.64},"width":69.24,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-34.png","element":"img","alt":" Gf,ϵ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be as given by Equation ","element":"span"},{"href":"#id-22","text":"(7)","element":"a"},{"style":{"fontStyle":"italic"},"text":". Let ","element":"span"},{"text":"Alg ","element":"span"},{"style":{"fontStyle":"italic"},"text":"be any active algorithm that returns an estimator ","element":"span"},{"style":{"height":16.4},"width":79.29,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-35.png","element":"img","alt":"�f at","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"a stopping time ","element":"span"},{"style":{"height":8},"width":23,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-36.png","element":"img","alt":" τ","inline":true},{"style":{"fontStyle":"italic"},"text":", and satisfies the correctness guarantee","element":"span"}],[{"id":"id-48","style":{"width":"70%"},"width":1325,"height":59,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-37.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Then the stopping time ","element":"span"},{"style":{"height":8},"width":23,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-38.png","element":"img","alt":" τ","inline":true},{"style":{"fontStyle":"italic"},"text":", under observations from ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f","element":"span"},{"style":{"fontStyle":"italic"},"text":", is lower bounded by","element":"span"}],[{"style":{"width":"47%"},"width":890,"height":109,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-39.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"and the average sample complexity over ","element":"span"},{"style":{"height":17.64},"width":86.21,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-40.png","element":"img","alt":" Gf,2ϵ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is at least","element":"span"}],[{"id":"id-23","style":{"width":"84%"},"width":1586,"height":131,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-41.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"The above bounds hold when ","element":"span"},{"style":{"height":17.6},"width":145.6,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-42.png","element":"img","alt":" N(f⋆, ·)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is replaced by ","element":"span"},{"style":{"height":17.24},"width":172.11,"height":43.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/5-43.png","element":"img","alt":" 1 ∨ Npck.","inline":true}],[{"style":{"fontWeight":"bold"},"text":"Remark 3.1 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"The additional logarithmic factor that arises in ","element":"span"},{"href":"#id-23","text":"(9) ","element":"a"},{"style":{"fontStyle":"italic"},"text":"is due to the fact that estimating a function ","element":"span"},{"style":{"height":17.64},"width":429.13,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-0.png","element":"img","alt":" g ∈ Gf,2ϵ to L∞-error ϵ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"corresponds to correctly performing ","element":"span"},{"style":{"height":17.2},"width":155.58,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-1.png","element":"img","alt":" N(f, 2ϵ)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"simultaneous hypothesis tests, regarding the value of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"g ","element":"span"},{"style":{"fontStyle":"italic"},"text":"on each of the intervals ","element":"span"},{"style":{"height":16.89},"width":38.61,"height":42.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-2.png","element":"img","alt":" Iϵi","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":". However, for any fixed ","element":"span"},{"style":{"height":17.64},"width":167.25,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-3.png","element":"img","alt":" g ∈ Gf,2ϵ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"(and, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"in particular, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"= ","element":"span"},{"style":{"fontStyle":"italic"},"text":"g","element":"span"},{"style":{"fontStyle":"italic"},"text":"), one can devise an algorithm that does not suffer this logarithmic factor by ‘biasing’ the algorithm towards that function.","element":"span"}],[{"text":"In addition to providing a lower bound, the packing of Theorem ","element":"span"},{"href":"#id-21","text":"3.2 ","element":"a"},{"text":"defines a near-optimal covering as well, in the sense that it defines a sampling allocation that can be used to ","element":"span"},{"style":{"fontStyle":"italic"},"text":"test the hypothesis ","element":"span"},{"text":"that, for a given ","element":"span"},{"style":{"height":17.6},"width":902.49,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-4.png","element":"img","alt":" f⋆, H0 : {f = f⋆} versus H1 : ∥f − f⋆∥∞ = Ω(ϵ)","inline":true},{"text":". Formally, we have the following:","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Proposition 3.4 ","element":"span"},{"id":"id-5","style":{"fontStyle":"italic"},"text":"For every function ","element":"span"},{"style":{"height":17.6},"width":437.74,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-5.png","element":"img","alt":" f⋆, ϵ > 0 and δ ∈ (0, 1)","inline":true},{"style":{"fontStyle":"italic"},"text":", there exists a","element":"span"}],[{"style":{"width":"56%"},"width":1050,"height":98,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-6.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"a deterministic sampling allocation ","element":"span"},{"style":{"height":24.29},"width":549.99,"height":60.73,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-7.png","element":"img","alt":" X (pck) := {x(pck)1 , . . . , x(pck)T }","inline":true},{"style":{"fontStyle":"italic"},"text":", and a test function ","element":"span"},{"style":{"height":17.6},"width":201.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-8.png","element":"img","alt":" ψ ∈ {0, 1}","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"constructed from the allocation ","element":"span"},{"style":{"height":16.34},"width":307.19,"height":40.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-9.png","element":"img","alt":" X (pck) such that","inline":true}],[{"style":{"width":"59%"},"width":1124,"height":47,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-10.png","element":"img"}],[{"text":"The design ","element":"span"},{"style":{"height":15.93},"width":115.43,"height":39.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-11.png","element":"img","alt":" X (pck) ","inline":true,"padRight":true},{"text":"is explicitly constructed in Section ","element":"span"},{"href":"#id-19","text":"5.3 ","element":"a"},{"text":"by augmenting the ","element":"span"},{"style":{"height":18.04},"width":200.02,"height":45.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-12.png","element":"img","alt":" Npck(f⋆, ϵ)","inline":true},{"text":"-intervals in Theorem ","element":"span"},{"href":"#id-21","text":"3.2 ","element":"a"},{"text":"with at most three additional intervals to ensure coverage of all of ","element":"span"},{"text":"[0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1]","element":"span"},{"text":". Crucially, we made use of the fact that intervals ","element":"span"},{"style":{"height":16.89},"width":38.6,"height":42.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-13.png","element":"img","alt":" Iϵi ","inline":true,"padRight":true},{"text":"share endpoints, and have secant error ","element":"span"},{"style":{"height":17.69},"width":184.89,"height":44.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-14.png","element":"img","alt":" Sec[f⋆, Iϵi ]","inline":true,"padRight":true},{"text":"exactly equal ","element":"span"},{"text":"to ","element":"span"},{"style":{"height":8},"width":18,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-15.png","element":"img","alt":" ϵ","inline":true},{"text":". In light of Theorem ","element":"span"},{"href":"#id-24","text":"3.3, ","element":"a"},{"text":"we see that the design ","element":"span"},{"style":{"height":15.94},"width":115.42,"height":39.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-16.png","element":"img","alt":" X (pck) ","inline":true,"padRight":true},{"text":"is optimal for verifying that ","element":"span"},{"style":{"height":16.4},"width":198.78,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-17.png","element":"img","alt":" f = f⋆, up","inline":true,"padRight":true},{"text":"to scaling ","element":"span"},{"style":{"height":8},"width":18,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-18.png","element":"img","alt":" ϵ","inline":true,"padRight":true},{"text":"by constant factors. For this reason, we refer to this construction as the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"oracle allocation ","element":"span"},{"text":"since it precisely characterizes the optimal sampling allocation taken if one ","element":"span"},{"style":{"height":16.4},"width":147.42,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-19.png","element":"img","alt":" knew f⋆","inline":true},{"text":". In general, this allocation may be too optimistic, since an algorithm which does not know the true ","element":"span"},{"style":{"height":16.4},"width":37.36,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-20.png","element":"img","alt":" f⋆","inline":true,"padRight":true},{"text":"cannot choose this allocation a fortiori.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"3.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Comparison between Upper and Lower Bounds","element":"span"}],[{"text":"For the purpose of comparing upper and lower bounds, we will consider running Algorithm ","element":"span"},{"href":"#id-16","text":"2 ","element":"a"},{"text":"with the setting ","element":"span"},{"style":{"height":16.4},"width":108.72,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-21.png","element":"img","alt":" β = 1","inline":true},{"text":"; any constant ","element":"span"},{"style":{"height":16.4},"width":26,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-22.png","element":"img","alt":" β","inline":true,"padRight":true},{"text":"bounded away from zero will yield qualitatively similar results. We find that the upper bound of Theorem ","element":"span"},{"href":"#id-25","text":"3.1 ","element":"a"},{"text":"and lower bound of Theorem ","element":"span"},{"href":"#id-24","text":"3.3 ","element":"a"},{"text":"nearly match, with the following exceptions:","element":"span"}],[{"text":"1. The upper bound involves a doubly logarithmic factor that depends on ","element":"span"},{"style":{"height":18.49},"width":106.38,"height":46.23,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-23.png","element":"img","alt":" 1 + σϵ","inline":true,"padRight":true},{"text":". This is a ","element":"span"},{"text":"consequence of the law of the iterated logarithm, which Algorithm ","element":"span"},{"href":"#id-16","text":"2 ","element":"a"},{"text":"uses to maintain uniform correctness of its confidence intervals over time.","element":"span"}],[{"text":"2. Theorem ","element":"span"},{"href":"#id-25","text":"3.1 ","element":"a"},{"text":"is given in terms of ","element":"span"},{"style":{"height":17.6},"width":170.04,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-24.png","element":"img","alt":" Λ(f, ϵ/6)","inline":true},{"text":", whereas our lower bound is stated in terms of ","element":"span"},{"style":{"height":17.2},"width":145.59,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-25.png","element":"img","alt":"Λ(f, 2ϵ)","inline":true},{"text":". The two quantities can be related by the following proposition, proved in Section ","element":"span"},{"href":"#id-26","text":"6.4.","element":"a"}],[{"style":{"height":17.6},"width":1762.97,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-26.png","element":"img","alt":"Proposition 3.5 For any 0 < c ≤ 1, ϵ > 0 and any convex f, ω(f, x, ϵ) ≥ ω(f, x, cϵ) ≥","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"cω","element":"span"},{"style":{"height":18.44},"width":1047.11,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-27.png","element":"img","alt":"(f, x, ϵ) for all x ∈ [tleft(f, ϵ), 1 − tright(f, ϵ)]. Moreover,","inline":true}],[{"id":"id-53","style":{"width":"89%"},"width":1679,"height":108,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/6-28.png","element":"img"}],[{"text":"Hence, ignoring the contributions of the endpoints ","element":"span"},{"style":{"height":17.24},"width":245.68,"height":43.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-0.png","element":"img","alt":" tleft and tright","inline":true},{"text":", rescaling ","element":"span"},{"style":{"height":8},"width":18,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-1.png","element":"img","alt":" ϵ","inline":true,"padRight":true},{"text":"by a multiplicative constant ","element":"span"},{"style":{"height":17.6},"width":318.91,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-2.png","element":"img","alt":" c changes Λ(f, ϵ)","inline":true,"padRight":true},{"text":"by at most ","element":"span"},{"style":{"fontStyle":"italic"},"text":"c","element":"span"},{"text":".","element":"span"}],[{"text":"3. Lastly, the upper and lower bounds differs in that ","element":"span"},{"style":{"height":17.6},"width":134.87,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-3.png","element":"img","alt":" N(f, ϵ)","inline":true,"padRight":true},{"text":"requires dividing through by ","element":"span"},{"style":{"height":17.6},"width":289.72,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-4.png","element":"img","alt":"log(ωmax/ωmin)","inline":true},{"text":". We conjecture that the lower bound more accurately reflects the true sample complexity; see Remark ","element":"span"},{"href":"#id-27","text":"A.1.","element":"a"}],[{"style":{"fontWeight":"bold"},"text":"3.2 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Sample Complexity for Passive Designs","element":"span"}],[{"text":"In this section, we show that the sample complexity for estimating a convex function ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"with an approximately uniform passive design up to error ","element":"span"},{"style":{"height":8},"width":18,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-5.png","element":"img","alt":" ϵ","inline":true,"padRight":true},{"text":"is governed by the parameter ","element":"span"},{"style":{"height":17.6},"width":167.34,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-6.png","element":"img","alt":" Λmax(f).","inline":true}],[{"style":{"fontWeight":"bold"},"text":"Theorem 3.6 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Consider a (possibly randomized) passive design ","element":"span"},{"style":{"height":18.09},"width":136.96,"height":45.23,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-7.png","element":"img","alt":" {xi}ni=1","inline":true},{"style":{"fontStyle":"italic"},"text":", which is uniform in the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"sense that, for some ","element":"span"},{"style":{"height":12.4},"width":115.64,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-8.png","element":"img","alt":" τ > 1","inline":true},{"style":{"fontStyle":"italic"},"text":", and any interval ","element":"span"},{"style":{"fontStyle":"italic"},"text":"I ","element":"span"},{"text":"= [","element":"span"},{"style":{"fontStyle":"italic"},"text":"a, b","element":"span"},{"text":"] ","element":"span"},{"style":{"fontStyle":"italic"},"text":"with ","element":"span"},{"style":{"height":17.6},"width":239.38,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-9.png","element":"img","alt":" b − a ≤ 1/n","inline":true},{"style":{"fontStyle":"italic"},"text":", one has that ","element":"span"},{"style":{"height":17.6},"width":143.69,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-10.png","element":"img","alt":" E[|{xi :","inline":true},{"style":{"height":17.6},"width":316.58,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-11.png","element":"img","alt":"xi ∈ [a, b]}|] ≤ τ","inline":true},{"style":{"fontStyle":"italic"},"text":". Then, for a universal constant ","element":"span"},{"style":{"fontStyle":"italic"},"text":"c","element":"span"},{"style":{"fontStyle":"italic"},"text":", any ","element":"span"},{"style":{"height":17.6},"width":222.35,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-12.png","element":"img","alt":" δ ∈ (0, 1/3)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and all ","element":"span"},{"style":{"height":16.4},"width":184.28,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-13.png","element":"img","alt":" f ∈ Fconv","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"such that ","element":"span"},{"style":{"height":31.6},"width":718.88,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-14.png","element":"img","alt":"�1 + σ2ϵ2�Λmax(f, 2ϵ) ≥ cn log(1/δ)/τ","inline":true},{"style":{"fontStyle":"italic"},"text":", there exists an alternative ","element":"span"},{"style":{"height":16.4},"width":370.55,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-15.png","element":"img","alt":"�f ∈ Fconv such that","inline":true}],[{"id":"id-28","style":{"width":"31%"},"width":599,"height":100,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-16.png","element":"img"}],[{"text":"The proof for the above theorem is as follows. Let","element":"span"}],[{"style":{"width":"59%"},"width":1112,"height":48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-17.png","element":"img"}],[{"text":"which intuitively corresponds to the point with the highest local curvature. Further, let ","element":"span"},{"style":{"height":14.62},"width":103.27,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-18.png","element":"img","alt":" I∗ :=","inline":true},{"style":{"height":17.6},"width":668.59,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-19.png","element":"img","alt":"[x∗ − ω(f, x∗, 2ϵ), x∗ + ω(f, x∗, 2ϵ)]","inline":true},{"text":", so that ","element":"span"},{"style":{"height":17.6},"width":391.02,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-20.png","element":"img","alt":" 1/|I∗| ≳ Λmax(f, 2ϵ)","inline":true},{"text":". If we consider the alternative function","element":"span"}],[{"style":{"width":"76%"},"width":1429,"height":97,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-21.png","element":"img"}],[{"text":"then by construction, ","element":"span"},{"style":{"height":16.4},"width":30.18,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-22.png","element":"img","alt":"�f","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"differ only on ","element":"span"},{"style":{"height":17.6},"width":131.6,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-23.png","element":"img","alt":" Int(I∗)","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":17.6},"width":291.12,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-24.png","element":"img","alt":" ∥ �f − f∥∞ ≥ 2ϵ","inline":true},{"text":". So if ","element":"span"},{"text":"Alg ","element":"span"},{"text":"can estimate ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"up to ","element":"span"},{"style":{"height":14.62},"width":63.69,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-25.png","element":"img","alt":" L∞","inline":true},{"text":"-norm error ","element":"span"},{"style":{"height":10.4},"width":66.78,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-26.png","element":"img","alt":" < ϵ","inline":true},{"text":", then ","element":"span"},{"text":"Alg ","element":"span"},{"text":"can distinguish between ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"and ","element":"span"},{"style":{"height":16.4},"width":30.18,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-27.png","element":"img","alt":"�f","inline":true},{"text":". Consequently, standard information-theoretic arguments (Section ","element":"span"},{"href":"#id-20","text":"5.2) ","element":"a"},{"text":"imply that any sampling algorithm must collect ","element":"span"},{"style":{"height":24.25},"width":377.12,"height":60.62,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-28.png","element":"img","alt":"≳ (1 + σ2ϵ2 ) log(1/δ)","inline":true,"padRight":true},{"text":"samples within ","element":"span"},{"style":{"height":17.6},"width":128.18,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-29.png","element":"img","alt":" Int(I∗)","inline":true},{"text":". Theorem ","element":"span"},{"href":"#id-28","text":"3.6 ","element":"a"},{"text":"then follows by the uniformity of the ","element":"span"},{"text":"sampling procedure. In the case where the design is passive but not uniform, it is possible that the design performs well on particular functions ","element":"span"},{"style":{"height":16.4},"width":178.62,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-30.png","element":"img","alt":" f ∈ Fconv","inline":true},{"text":". In Remark ","element":"span"},{"href":"#id-29","text":"A.2, ","element":"a"},{"text":"we show that nevertheless, if the design is not uniform, it will underperform on a ‘translation’ of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f","element":"span"},{"text":".","element":"span"}],[{"id":"id-6","style":{"fontWeight":"bold"},"text":"Remark 3.2 ","element":"span"},{"style":{"fontStyle":"italic","fontWeight":"bold"},"text":"(Piecewise linear) ","element":"span"},{"style":{"fontStyle":"italic"},"text":"If ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"style":{"fontStyle":"italic"},"text":"is Lipschitz and piecewise linear with a constant number of pieces, then from Section ","element":"span"},{"href":"#id-30","style":{"fontStyle":"italic"},"text":"2.1 ","element":"a"},{"style":{"fontStyle":"italic"},"text":"we have ","element":"span"},{"style":{"height":19.13},"width":309.39,"height":47.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-31.png","element":"img","alt":" Λmax(f, ϵ) ≈ ϵ−1","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"whereas ","element":"span"},{"style":{"height":18.22},"width":388.33,"height":45.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-32.png","element":"img","alt":" Λavg(f, ϵ) ≈ log(1/ϵ)","inline":true},{"style":{"fontStyle":"italic"},"text":". Theorem ","element":"span"},{"href":"#id-28","style":{"fontStyle":"italic"},"text":"3.6 ","element":"a"},{"style":{"fontStyle":"italic"},"text":"implies that any ","element":"span"},{"style":{"height":17.2},"width":232.74,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-33.png","element":"img","alt":" (ϵ, δ)-correct","inline":true,"padRight":true},{"text":"passive sampling ","element":"span"},{"style":{"fontStyle":"italic"},"text":"procedure requires ","element":"span"},{"style":{"height":19.13},"width":203.2,"height":47.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-34.png","element":"img","alt":" ϵ−3 log(/δ)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"measurements whereas Theorem ","element":"span"},{"href":"#id-25","style":{"fontStyle":"italic"},"text":"3.1 ","element":"a"},{"style":{"fontStyle":"italic"},"text":"says that our ","element":"span"},{"text":"active sampling ","element":"span"},{"style":{"fontStyle":"italic"},"text":"procedure takes just ","element":"span"},{"style":{"height":19.14},"width":516.95,"height":47.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-35.png","element":"img","alt":" ϵ−2 log(1/ϵ) log(log(ϵ−1)/δ)","inline":true},{"style":{"fontStyle":"italic"},"text":". Thus, after ","element":"span"},{"style":{"fontStyle":"italic"},"text":"n ","element":"span"},{"style":{"fontStyle":"italic"},"text":"total samples the ","element":"span"},{"style":{"height":14.62},"width":63.7,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-36.png","element":"img","alt":" L∞","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"of passive sampling decays no faster than ","element":"span"},{"style":{"height":21.95},"width":114.25,"height":54.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-37.png","element":"img","alt":" ( 1n)1/3","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"whereas active sampling ","element":"span"},{"style":{"fontStyle":"italic"},"text":"decays like ","element":"span"},{"style":{"height":24.22},"width":345.66,"height":60.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-38.png","element":"img","alt":" ( log(n) log log(n)n )1/2.","inline":true}]]},{"heading":"4 Recursive Secant Approximation","paragraphs":[[{"text":"We now introduce the recursive secant approximation algorithm for learning a convex function with noise. We begin by sampling each endpoint ","element":"span"},{"style":{"fontStyle":"italic"},"text":"{","element":"span"},{"text":"0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1","element":"span"},{"style":{"fontStyle":"italic"},"text":"} ","element":"span"},{"text":"once. Subsequently, let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t ","element":"span"},{"text":"= 3","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"4","element":"span"},{"style":{"fontStyle":"italic"},"text":", . . . ","element":"span"},{"text":"denote the number of samples taken, and let ","element":"span"},{"style":{"height":15.42},"width":37,"height":38.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/7-39.png","element":"img","alt":" Tt","inline":true,"padRight":true},{"text":"denote a binary tree of intervals contained in ","element":"span"},{"text":"[0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1]","element":"span"},{"text":", where","element":"span"}],[{"id":"id-32","style":{"width":"99%"},"width":1872,"height":471,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-0.png","element":"img"}],[{"text":"the children of an interval ","element":"span"},{"style":{"fontStyle":"italic"},"text":"I ","element":"span"},{"text":"are given by ","element":"span"},{"style":{"height":19.95},"width":225.12,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-1.png","element":"img","alt":" [xl(I), xm(I)]","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":19.95},"width":230.86,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-2.png","element":"img","alt":" [xm(I), xr(I)]","inline":true},{"text":". We let ","element":"span"},{"style":{"height":17.6},"width":102.4,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-3.png","element":"img","alt":" L(Tt)","inline":true,"padRight":true},{"text":"denote the set of leaves of ","element":"span"},{"id":"id-31","style":{"height":15.42},"width":37,"height":38.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-4.png","element":"img","alt":" Tt","inline":true},{"text":". By construction, ","element":"span"},{"style":{"height":15.42},"width":37,"height":38.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-5.png","element":"img","alt":" Tt","inline":true,"padRight":true},{"text":"immediately satisfies the following properties stipulated in Lemma ","element":"span"},{"href":"#id-31","text":"4.1:","element":"a"}],[{"style":{"fontWeight":"bold"},"text":"Lemma 4.1 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"For any ","element":"span"},{"style":{"height":14},"width":99.91,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-6.png","element":"img","alt":" t ≥ 1","inline":true},{"style":{"fontStyle":"italic"},"text":", we have ","element":"span"},{"style":{"height":17.6},"width":215.55,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-7.png","element":"img","alt":" |I ∩ I′| = 0","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"for any ","element":"span"},{"style":{"height":17.6},"width":278.92,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-8.png","element":"img","alt":" I ̸= I′ ∈ L(Tt)","inline":true},{"style":{"fontStyle":"italic"},"text":"; ","element":"span"},{"style":{"height":21.85},"width":340.82,"height":54.62,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-9.png","element":"img","alt":"�I∈L(Tt) I = [0, 1]","inline":true},{"style":{"fontStyle":"italic"},"text":"; and ","element":"span"},{"style":{"height":21.85},"width":1042.14,"height":54.63,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-10.png","element":"img","alt":"�I∈L(Tt){xm(I), xl(I), xr(I)} = �I∈Tt{xm(I), xl(I), xr(I)}.","inline":true}],[{"text":"At each round ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t","element":"span"},{"text":", we maintain three estimates of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f","element":"span"},{"text":". First, an estimator ","element":"span"},{"style":{"height":18.33},"width":166.35,"height":45.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-11.png","element":"img","alt":" fpnt of f","inline":true,"padRight":true},{"text":"defined only at the points ","element":"span"},{"style":{"height":21.85},"width":510.49,"height":54.63,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-12.png","element":"img","alt":"�I∈L(Tt){xm(I), xl(I), xr(I)}","inline":true},{"text":". Second, a secant-approximation estimator ","element":"span"},{"style":{"height":16.4},"width":69.47,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-13.png","element":"img","alt":" fsec","inline":true,"padRight":true},{"text":"which extends ","element":"span"},{"text":"the domain of ","element":"span"},{"style":{"height":18.73},"width":425.52,"height":46.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-14.png","element":"img","alt":" fpnt to all of [0, 1] via:","inline":true}],[{"style":{"width":"73%"},"width":1370,"height":50,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-15.png","element":"img"}],[{"text":"Note that ","element":"span"},{"style":{"height":16.4},"width":69.47,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-16.png","element":"img","alt":" fsec","inline":true,"padRight":true},{"text":"is well defined, since by Lemma ","element":"span"},{"href":"#id-31","text":"4.1, ","element":"a"},{"text":"for all ","element":"span"},{"style":{"height":17.6},"width":168.25,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-17.png","element":"img","alt":" x ∈ [0, 1]","inline":true},{"text":", (a) there exists an ","element":"span"},{"style":{"height":17.6},"width":180.05,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-18.png","element":"img","alt":" I ∈ L(Tt)","inline":true,"padRight":true},{"text":"such that ","element":"span"},{"style":{"height":12.8},"width":100.29,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-19.png","element":"img","alt":" x ∈ I","inline":true,"padRight":true},{"text":"and (b) if ","element":"span"},{"style":{"height":14.62},"width":201.07,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-20.png","element":"img","alt":" x ∈ I1 ∩ I2","inline":true,"padRight":true},{"text":"for ","element":"span"},{"style":{"height":17.6},"width":251.1,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-21.png","element":"img","alt":" I1, I2 ∈ L(Tt)","inline":true},{"text":", then ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x ","element":"span"},{"text":"is a common endpoint of ","element":"span"},{"style":{"height":14.62},"width":36.18,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-22.png","element":"img","alt":" I1","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":14.62},"width":36.18,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-23.png","element":"img","alt":" I2","inline":true},{"text":", and thus the secant approximations coincide at ","element":"span"},{"style":{"height":18.73},"width":996.82,"height":46.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-24.png","element":"img","alt":" x so that fsec(x) = Sec[fpnt, I1](x) = Sec[fpnt, I2](x).","inline":true,"padRight":true},{"text":"Lastly, since ","element":"span"},{"style":{"height":16.4},"width":69.46,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-25.png","element":"img","alt":" fsec ","inline":true,"padRight":true},{"text":"is not guaranteed to be convex when measurements are noisy, we define an estimator ","element":"span"},{"style":{"height":16.4},"width":235.61,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-26.png","element":"img","alt":"�f via an L∞","inline":true,"padRight":true},{"text":"projection onto ","element":"span"},{"style":{"height":15.6},"width":113.58,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-27.png","element":"img","alt":" Fconv,","inline":true}],[{"style":{"width":"64%"},"width":1200,"height":80,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-28.png","element":"img"}],[{"text":"By definition ","element":"span"},{"style":{"height":17.6},"width":557.13,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-29.png","element":"img","alt":" ∥fsec − �f∥∞ ≤ ∥fsec − f⋆∥∞","inline":true,"padRight":true},{"text":"so that ","element":"span"},{"style":{"height":17.6},"width":548.15,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-30.png","element":"img","alt":" ∥ �f − f⋆∥∞ ≤ 2∥fsec − f⋆∥∞","inline":true,"padRight":true},{"text":"by the triangle inequality. When not clear from context, we employ the use of a subscript ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t ","element":"span"},{"text":"on ","element":"span"},{"style":{"height":21.12},"width":221.6,"height":52.79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-31.png","element":"img","alt":" fpntt , fsect , �ft","inline":true,"padRight":true},{"text":"to denote these functions once ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t ","element":"span"},{"text":"samples have been taken.","element":"span"}],[{"id":"id-38","style":{"fontWeight":"bold"},"text":"4.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Recursive Secant Approximation without Noise","element":"span"}],[{"text":"To build intuition for Algorithm ","element":"span"},{"href":"#id-16","text":"2, ","element":"a"},{"text":"we consider the following noiseless variant of our main algorithm, Algorithm ","element":"span"},{"href":"#id-32","text":"1, ","element":"a"},{"text":"where the oracle returns noiseless queries ","element":"span"},{"style":{"fontStyle":"italic"},"text":"F","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"x","element":"span"},{"text":") = ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"x","element":"span"},{"text":")","element":"span"},{"text":". In this case, ","element":"span"},{"style":{"height":18.33},"width":75.75,"height":45.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-32.png","element":"img","alt":" fpnt","inline":true,"padRight":true},{"text":"is set to be equal to ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"x","element":"span"},{"text":") ","element":"span"},{"text":"at each point ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x ","element":"span"},{"text":"that is queried, and a placeholder value of ","element":"span"},{"style":{"height":8},"width":77.95,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-33.png","element":"img","alt":" −∞","inline":true,"padRight":true},{"text":"elsewhere. The algorithm maintains the invariant that, for all ","element":"span"},{"style":{"height":19.95},"width":286.33,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-34.png","element":"img","alt":" I ∈ L(Tt), xl(I)","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":14.75},"width":84.51,"height":36.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-35.png","element":"img","alt":" xr(I)","inline":true,"padRight":true},{"text":"have been measured and recorded in ","element":"span"},{"style":{"height":18.33},"width":75.72,"height":45.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-36.png","element":"img","alt":" fpnt","inline":true},{"text":". Moreover, since the queries are noiseless, the secant approximation ","element":"span"},{"style":{"height":16.4},"width":257.12,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-37.png","element":"img","alt":" fsec is convex","inline":true,"padRight":true},{"text":"and no projection is required.","element":"span"}],[{"text":"At each round, Algorithm ","element":"span"},{"href":"#id-32","text":"1 ","element":"a"},{"text":"queries the interval ","element":"span"},{"style":{"height":17.2},"width":177.66,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-38.png","element":"img","alt":" I ∈ L(Tt)","inline":true,"padRight":true},{"text":"for which the secant bias ","element":"span"},{"style":{"height":18.74},"width":232.02,"height":46.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-39.png","element":"img","alt":" ∆(fpnt, I) is","inline":true,"padRight":true},{"text":"largest; note that if there is an interval ","element":"span"},{"style":{"height":18.75},"width":323.41,"height":46.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-40.png","element":"img","alt":" I for which xm(I)","inline":true,"padRight":true},{"text":"has not been sampled, then ","element":"span"},{"style":{"height":21.08},"width":256.94,"height":52.71,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-41.png","element":"img","alt":" fpnt(xm(I)) =","inline":true},{"style":{"height":21.09},"width":675.68,"height":52.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-42.png","element":"img","alt":"−∞ and ∆(fpnt, I) = ∞, and xm(I)","inline":true,"padRight":true},{"text":"will be queried, with ties broken arbitrarily. In preparation for the analysis of the noise-tolerant algorithm, we shall analyze the stopping time:","element":"span"}],[{"id":"id-44","style":{"width":"73%"},"width":1370,"height":81,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/8-43.png","element":"img"}],[{"text":"We shall prove the following proposition:","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Proposition 4.2 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"For all ","element":"span"},{"style":{"height":17.6},"width":560.56,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-0.png","element":"img","alt":" t ≥ τ (ϵ), ∥fsect − f∥∞ ≤ 2ϵ","inline":true},{"style":{"fontStyle":"italic"},"text":". Moreover, for any ","element":"span"},{"style":{"height":17.6},"width":197.99,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-1.png","element":"img","alt":" α ∈ (0, 1)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"we have","element":"span"}],[{"style":{"width":"34%"},"width":640,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-2.png","element":"img"}],[{"style":{"fontWeight":"bold"},"text":"Proof ","element":"span"},{"text":"Since ","element":"span"},{"style":{"height":18.73},"width":287.28,"height":46.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-3.png","element":"img","alt":" fpnt(x) = f(x)","inline":true,"padRight":true},{"text":"for all queried points ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x","element":"span"},{"text":", we have that, for ","element":"span"},{"style":{"height":17.6},"width":469.05,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-4.png","element":"img","alt":" t = τ (ϵ), ∥fsect − f∥∞ ≤","inline":true},{"style":{"height":23.24},"width":545.43,"height":58.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-5.png","element":"img","alt":"maxI∈L(Tt) 2∆(fpntt , I) ≤ 2ϵ","inline":true},{"text":". ","element":"span"},{"text":"Moreover, since for any ","element":"span"},{"style":{"height":17.8},"width":226.14,"height":44.51,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-6.png","element":"img","alt":" t′ > t, fsect′","inline":true,"padRight":true},{"text":"is constructed using secant approximations on a refinement of the intervals ","element":"span"},{"style":{"height":18.6},"width":658.89,"height":46.51,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-7.png","element":"img","alt":" L(Tt), ∥fsect′ −f⋆∥∞ ≤ ∥fsect −f⋆∥∞","inline":true,"padRight":true},{"text":"(see Lemma ","element":"span"},{"href":"#id-33","text":"6.1.","element":"a"},{"text":")","element":"span"}],[{"text":"It remains to bound ","element":"span"},{"style":{"height":17.6},"width":78.02,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-8.png","element":"img","alt":" τ (ϵ)","inline":true},{"text":". Let ","element":"span"},{"style":{"height":14.62},"width":43.12,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-9.png","element":"img","alt":" Xt","inline":true,"padRight":true},{"text":"denote the set of points sampled at the start of round ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t","element":"span"},{"text":"; in the noiseless setting, ","element":"span"},{"style":{"height":17.6},"width":146.33,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-10.png","element":"img","alt":" t = |Xt|","inline":true},{"text":", but bounding ","element":"span"},{"style":{"height":17.6},"width":69.48,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-11.png","element":"img","alt":" |Xt|","inline":true,"padRight":true},{"text":"will be of broader interest for the noise-tolerant algorithm. Since ","element":"span"},{"style":{"height":18.75},"width":91.7,"height":46.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-12.png","element":"img","alt":" Xτ (ϵ)","inline":true,"padRight":true},{"text":"are the endpoints of the intervals ","element":"span"},{"style":{"height":19.95},"width":226.61,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-13.png","element":"img","alt":" I ∈ L(Tτ (ϵ))","inline":true},{"text":", which are adjacent, we have ","element":"span"},{"style":{"height":19.95},"width":448.58,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-14.png","element":"img","alt":"|Xτ (ϵ)| ≤ 2|L(Tτ (ϵ))| + 1","inline":true},{"text":". Moreover, if ","element":"span"},{"style":{"height":19.95},"width":315.1,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-15.png","element":"img","alt":" parents(L(Tτ (ϵ)))","inline":true,"padRight":true},{"text":"denotes the parent-intervals of ","element":"span"},{"style":{"height":19.95},"width":150.62,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-16.png","element":"img","alt":" L(Tτ (ϵ))","inline":true},{"text":", we have ","element":"span"},{"style":{"height":19.95},"width":595.56,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-17.png","element":"img","alt":" |L(Tτ (ϵ))| ≤ 2|parents(L(Tτ (ϵ)))|","inline":true},{"text":". Thus, to bound ","element":"span"},{"style":{"height":17.6},"width":77.93,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-18.png","element":"img","alt":" τ (ϵ)","inline":true},{"text":", it suffices to bound ","element":"span"},{"style":{"height":19.95},"width":339.98,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-19.png","element":"img","alt":" |parents(L(Tτ (ϵ)))|","inline":true},{"text":". We adopt the shorthand ","element":"span"},{"style":{"height":19.95},"width":435.38,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-20.png","element":"img","alt":" I′ := parents(L(Tτ (ϵ))).","inline":true}],[{"text":"We now make a key observation about ","element":"span"},{"style":{"height":12},"width":42.98,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-21.png","element":"img","alt":" I′","inline":true},{"text":", which will allow us to relate ","element":"span"},{"style":{"height":17.6},"width":62.28,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-22.png","element":"img","alt":" |I′|","inline":true,"padRight":true},{"text":"to ","element":"span"},{"style":{"height":17.82},"width":81.76,"height":44.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-23.png","element":"img","alt":" Λavg","inline":true},{"text":": for every ","element":"span"},{"style":{"height":12.8},"width":127.04,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-24.png","element":"img","alt":"I ∈ I′","inline":true},{"text":", we have ","element":"span"},{"style":{"height":17.6},"width":221.65,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-25.png","element":"img","alt":" ∆(f, I) ≥ ϵ","inline":true},{"text":"; if not, then at the round ","element":"span"},{"style":{"height":17.6},"width":164.78,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-26.png","element":"img","alt":" s < τ (ϵ)","inline":true,"padRight":true},{"text":"at which ","element":"span"},{"style":{"fontStyle":"italic"},"text":"I ","element":"span"},{"text":"is bisected, we have ","element":"span"},{"style":{"height":19.95},"width":665.14,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-27.png","element":"img","alt":"maxI′∈L(Ts) ∆(f, I′) = ∆(f, I) < ϵ","inline":true},{"text":", which implies that ","element":"span"},{"style":{"height":17.6},"width":166.14,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-28.png","element":"img","alt":" s ≥ τ (ϵ)","inline":true},{"text":", a contradiction. The following lemma, proved in Section ","element":"span"},{"href":"#id-34","text":"4.4, ","element":"a"},{"text":"shows that the inequality ","element":"span"},{"style":{"height":17.2},"width":210.72,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-29.png","element":"img","alt":" ∆(f, I) ≥ ϵ","inline":true,"padRight":true},{"text":"implies that the average modulus on each ","element":"span"},{"style":{"fontStyle":"italic"},"text":"I ","element":"span"},{"text":"cannot be too small.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Lemma 4.3 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"height":17.6},"width":372.75,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-30.png","element":"img","alt":" [a, b] ⊂ [0, 1], ϵ > 0","inline":true},{"id":"id-39","style":{"fontStyle":"italic"},"text":", and suppose that ","element":"span"},{"style":{"height":17.6},"width":282.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-31.png","element":"img","alt":" ∆(f, [a, b]) ≥ ϵ","inline":true},{"style":{"fontStyle":"italic"},"text":". Then for any ","element":"span"},{"style":{"height":17.6},"width":185.21,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-32.png","element":"img","alt":" α ∈ (0, 1)","inline":true},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"style":{"height":25.03},"width":596.46,"height":62.58,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-33.png","element":"img","alt":"� ba ω(f, x, (1 − α)ϵ)−1dx ≥ 2α1+α.","inline":true}],[{"text":"As a consequence, for any ","element":"span"},{"style":{"height":31.3},"width":1854.1,"height":78.25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-34.png","element":"img","alt":" α ≥ 0 and I such that ∆(fpntt , I) ≥ ϵ, we have�I ω(f, (1 − α)ϵ, x)−1dx ≥2α","inline":true},{"style":{"height":9.6},"width":65.28,"height":24,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-35.png","element":"img","alt":"1+α","inline":true},{"text":". To relate to the integral ","element":"span"},{"style":{"height":17.82},"width":80.98,"height":44.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-36.png","element":"img","alt":" Λavg","inline":true},{"text":", we observe that the intervals ","element":"span"},{"style":{"height":12.8},"width":118.95,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-37.png","element":"img","alt":" I ∈ I′","inline":true,"padRight":true},{"text":"are disjoint except at their ","element":"span"},{"text":"endpoints, which yields","element":"span"}],[{"style":{"width":"100%"},"width":1876,"height":1043,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-38.png","element":"img"}],[{"text":"We remark that our bound on ","element":"span"},{"style":{"height":17.6},"width":69.47,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-39.png","element":"img","alt":" |Xt|","inline":true,"padRight":true},{"text":"only used the fact that at time ","element":"span"},{"style":{"height":17.6},"width":151.76,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-40.png","element":"img","alt":" t ≤ τ (ϵ)","inline":true,"padRight":true},{"text":"we had ","element":"span"},{"style":{"height":17.6},"width":212.68,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-41.png","element":"img","alt":" ∆(f, I) ≥ ϵ","inline":true,"padRight":true},{"text":"for each ","element":"span"},{"style":{"height":17.2},"width":341.35,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/9-42.png","element":"img","alt":" I ∈ parents(L(Tt))","inline":true},{"text":". This observation will be essential in generalizing to the setting with a noise oracle.","element":"span"}],[{"id":"id-16","style":{"width":"99%"},"width":1872,"height":1147,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-0.png","element":"img"}],[{"id":"id-18","style":{"fontWeight":"bold"},"text":"4.2 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Recursive Secant Approximation with Noise","element":"span"}],[{"text":"We now describe how to generalize Algorithm ","element":"span"},{"href":"#id-32","text":"1 ","element":"a"},{"text":"to allow for noisy observations. Fix some time ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t ","element":"span"},{"text":"and let ","element":"span"},{"style":{"height":19.01},"width":236.94,"height":47.53,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-1.png","element":"img","alt":" {(xs, ys)}ts=1 ","inline":true,"padRight":true},{"text":"be the collection of noisy function evaluation pairs. Recall that ","element":"span"},{"style":{"height":17.2},"width":404.88,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-2.png","element":"img","alt":" ys = f(xs)+ws where","inline":true},{"style":{"height":10.62},"width":47.24,"height":26.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-3.png","element":"img","alt":"ws","inline":true,"padRight":true},{"text":"is independent, mean-zero ","element":"span"},{"style":{"height":15.13},"width":43.5,"height":37.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-4.png","element":"img","alt":" σ2","inline":true},{"text":"-sub-Gaussian distributed noise, i.e. ","element":"span"},{"style":{"height":19.13},"width":524.29,"height":47.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-5.png","element":"img","alt":" E[exp(λws)] ≤ exp(λ2σ2/2)","inline":true},{"text":". In the algorithm, ","element":"span"},{"style":{"height":21.2},"width":474.74,"height":53.01,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-6.png","element":"img","alt":" Nt(x) = �ts=1 1{xs = x}","inline":true,"padRight":true},{"text":"will denote the number of times the point ","element":"span"},{"style":{"height":17.6},"width":242.18,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-7.png","element":"img","alt":" x ∈ [0, 1] has","inline":true,"padRight":true},{"text":"been sampled so that ","element":"span"},{"style":{"height":25.49},"width":660.04,"height":63.73,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-8.png","element":"img","alt":" fpnt(x) = 1Nt(x)�ts=1 1{xs = x} ys","inline":true,"padRight":true},{"text":"if ","element":"span"},{"style":{"height":17.6},"width":189,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-9.png","element":"img","alt":" Nt(x) ≥ 1","inline":true},{"text":", and ","element":"span"},{"style":{"height":8},"width":77.95,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-10.png","element":"img","alt":" −∞","inline":true,"padRight":true},{"text":"otherwise. Lastly, ","element":"span"},{"text":"we let ","element":"span"},{"style":{"height":17.6},"width":116.16,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-11.png","element":"img","alt":" φ(t, δ)","inline":true,"padRight":true},{"text":"denote an anytime confidence interval such that","element":"span"}],[{"id":"id-36","style":{"width":"35%"},"width":661,"height":132,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-12.png","element":"img"}],[{"text":"For example, ","element":"span"},{"style":{"height":20.8},"width":634.87,"height":52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-13.png","element":"img","alt":" φ(t, δ) =�16σ2 log(log2(2t)/δ)/t","inline":true,"padRight":true},{"text":"suffices but we recommend using ","element":"span"},{"href":"#id-13","referenceIndex":12,"text":"Kaufmann et al. ","element":"a"},{"href":"#id-13","referenceIndex":12,"text":"[2016, ","element":"a"},{"text":"Theorem 8]. In general ","element":"span"},{"style":{"height":17.2},"width":103.27,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-14.png","element":"img","alt":" φ(·, ·)","inline":true,"padRight":true},{"text":"can be chosen to be monotically decreasing in the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t","element":"span"},{"text":"-argument, and increasing in the ","element":"span"},{"style":{"height":12.8},"width":20,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-15.png","element":"img","alt":" δ","inline":true},{"text":"-argument. In addition to ","element":"span"},{"style":{"height":18.33},"width":222.42,"height":45.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-16.png","element":"img","alt":" Nt and fpnt","inline":true},{"text":", we maintain a function ","element":"span"},{"style":{"height":18.73},"width":337.68,"height":46.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-17.png","element":"img","alt":" δpnt : [0, 1] → R>0","inline":true,"padRight":true},{"text":"such that","element":"span"}],[{"style":{"width":"81%"},"width":1529,"height":132,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-18.png","element":"img"}],[{"text":"We shall let ","element":"span"},{"style":{"height":17.64},"width":94.7,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-19.png","element":"img","alt":" Egood","inline":true,"padRight":true},{"text":"denote the event inside the probability operator in the above display. Finally, define confidence bounds","element":"span"}],[{"style":{"width":"73%"},"width":1386,"height":159,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/10-20.png","element":"img"}],[{"text":"Crucially, our confidence bounds ensure the following sandwich relation, proved in Section ","element":"span"},{"href":"#id-35","text":"4.5:","element":"a"}],[{"style":{"fontWeight":"bold"},"text":"Lemma 4.4 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"On ","element":"span"},{"style":{"height":17.64},"width":94.7,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-0.png","element":"img","alt":" Egood","inline":true},{"style":{"fontStyle":"italic"},"text":", the following holds for all ","element":"span"},{"style":{"height":14},"width":103.21,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-1.png","element":"img","alt":" t ≥ 1","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and ","element":"span"},{"style":{"height":21.48},"width":681.64,"height":53.71,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-2.png","element":"img","alt":" I ∈ L(Tt): supx∈I |Sec[fpntt , I](x) −","inline":true},{"style":{"height":31.6},"width":855.24,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-3.png","element":"img","alt":"f(x)| ≤ 2�max{0, ∆(fpntt , I)} + Bt(I, δpntt )�.","inline":true}],[{"text":"As a consequence, we find that","element":"span"}],[{"style":{"width":"96%"},"width":1804,"height":179,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-4.png","element":"img"}],[{"text":"using the while loop of Line ","element":"span"},{"href":"#id-16","text":"10. ","element":"a"},{"text":"This is to ensure that the stochastic variance always dominates the bias of the approximation. The parameter ","element":"span"},{"style":{"height":16.4},"width":107.16,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-5.png","element":"img","alt":" β > 0","inline":true,"padRight":true},{"text":"appears to have little effect on performance as long as it is smaller than ","element":"span"},{"text":"1","element":"span"},{"text":"; we recommend setting ","element":"span"},{"style":{"height":17.6},"width":150.04,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-6.png","element":"img","alt":" β = 1/2","inline":true},{"text":". The definition of ","element":"span"},{"style":{"height":12.33},"width":39.6,"height":30.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-7.png","element":"img","alt":" I∗ ","inline":true,"padRight":true},{"text":"in the algorithm is motivated by the sandwich relationship (Lemma ","element":"span"},{"href":"#id-36","text":"4.4) ","element":"a"},{"text":"noted above. And in each case, ","element":"span"},{"style":{"height":10.62},"width":36.94,"height":26.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-8.png","element":"img","alt":" xt","inline":true,"padRight":true},{"text":"is chosen in order to minimize the maximum confidence bound relevant to the interval ","element":"span"},{"style":{"height":12.33},"width":39.61,"height":30.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-9.png","element":"img","alt":" I∗","inline":true},{"text":". The values of ","element":"span"},{"style":{"height":23.63},"width":1302.92,"height":59.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-10.png","element":"img","alt":"δpnt(xm(Ij)) satisfy �x:T(x)>0 δpnt(x) ≤ δ since 3 · 16 + �∞k=2 12k2 ≤ 1.","inline":true}],[{"style":{"fontWeight":"bold"},"text":"4.3 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Upper Bound, Theorem ","element":"span"},{"href":"#id-25","style":{"fontWeight":"bold"},"text":"3.1","element":"a"}],[{"text":"Recall the definition set ","element":"span"},{"style":{"height":21.85},"width":633.03,"height":54.63,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-11.png","element":"img","alt":" Xt := �I∈L(Tt){xm(I), xr(I), xl(I)}","inline":true},{"text":", and we shall assume that ","element":"span"},{"style":{"height":17.64},"width":94.7,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-12.png","element":"img","alt":" Egood","inline":true,"padRight":true},{"text":"holds. ","element":"span"},{"text":"Fix an ","element":"span"},{"style":{"height":15.6},"width":301.01,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-13.png","element":"img","alt":" ϵ > 0, and let ϵt","inline":true,"padRight":true},{"text":"be as in Algorithm ","element":"span"},{"href":"#id-16","text":"2 ","element":"a"},{"text":"Line ","element":"span"},{"href":"#id-16","text":"5, ","element":"a"},{"text":"and define the stopping time","element":"span"}],[{"id":"id-37","style":{"width":"65%"},"width":1227,"height":145,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-14.png","element":"img"}],[{"text":"The correctness guarantee is a direct consequence of ","element":"span"},{"href":"#id-37","text":"(14) ","element":"a"},{"text":"since on ","element":"span"},{"style":{"height":18.44},"width":652.03,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-15.png","element":"img","alt":" Egood, ∥ �ft −f∥∞ ≤ 2∥fsect −f∥∞ ≤","inline":true},{"style":{"height":17.6},"width":206,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-16.png","element":"img","alt":"2 · ϵt/2 ≤ ϵ","inline":true},{"text":". Because ","element":"span"},{"style":{"height":16.4},"width":33.36,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-17.png","element":"img","alt":"�ft","inline":true,"padRight":true},{"text":"is only updated using a decreasing sequence of values of ","element":"span"},{"style":{"height":10.22},"width":29.71,"height":25.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-18.png","element":"img","alt":" ϵt","inline":true},{"text":", the guarantee immediately holds for all ","element":"span"},{"style":{"height":17.6},"width":160.82,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-19.png","element":"img","alt":" t′ ≥ τ(ϵ)","inline":true},{"text":". In order to upper bound ","element":"span"},{"style":{"height":17.6},"width":75.7,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-20.png","element":"img","alt":" τ(ϵ)","inline":true},{"text":", we have the identity","element":"span"}],[{"id":"id-41","style":{"width":"66%"},"width":1252,"height":108,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-21.png","element":"img"}],[{"text":"Thus, a crucial part of bounding ","element":"span"},{"style":{"height":17.6},"width":76.04,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-22.png","element":"img","alt":" τ(ϵ)","inline":true,"padRight":true},{"text":"is showing that we do not ","element":"span"},{"style":{"height":16},"width":379.9,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-23.png","element":"img","alt":" oversample x ∈ Xt","inline":true},{"text":"; this is accomplished by relating the stopping condition to the sampling rule.","element":"span"}],[{"style":{"height":23.78},"width":1417.12,"height":59.46,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-24.png","element":"img","alt":"Lemma 4.5 ∀x ∈ Xτ(ϵ), Nτ(ϵ)−1(x) ≤ 1 ∨ maxs≥1{φ(s, δpnt(x)) ≥ ϵ6(2+β)}.","inline":true}],[{"text":"As a consequence,","element":"span"}],[{"style":{"width":"60%"},"width":1135,"height":237,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-25.png","element":"img"}],[{"text":"where the second line uses the fact that ","element":"span"},{"style":{"height":17.2},"width":103.27,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-26.png","element":"img","alt":" φ(·, ·)","inline":true,"padRight":true},{"text":"is monotone in its second argument, and ","element":"span"},{"style":{"height":18.73},"width":345.85,"height":46.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-27.png","element":"img","alt":" maxx∈Xt δpnt(x) =","inline":true},{"style":{"height":19.13},"width":152.04,"height":47.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-28.png","element":"img","alt":"1/2|Xt|2","inline":true},{"text":". We can upper bound the inversion of ","element":"span"},{"style":{"height":17.6},"width":103.61,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-29.png","element":"img","alt":" φ(·, ·)","inline":true,"padRight":true},{"text":"to yield (see e.g. ","element":"span"},{"href":"#id-13","referenceIndex":12,"text":"Kaufmann et al. ","element":"a"},{"href":"#id-13","referenceIndex":12,"text":"[2016]","element":"a"},{"text":")","element":"span"}],[{"style":{"width":"62%"},"width":1168,"height":57,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-30.png","element":"img"}],[{"text":"To wrap up, it suffices to prove that for some ","element":"span"},{"style":{"height":17.6},"width":178.41,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-31.png","element":"img","alt":" α ∈ (0, 1)","inline":true}],[{"style":{"width":"74%"},"width":1398,"height":94,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/11-32.png","element":"img"}],[{"text":"Recalling the argument from Section ","element":"span"},{"href":"#id-38","text":"4.1, ","element":"a"},{"text":"it suffices only to verify that, if ","element":"span"},{"style":{"height":17.2},"width":511.44,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-0.png","element":"img","alt":" I ∈ L(Tt) for t = τ(ϵ), then","inline":true,"padRight":true},{"text":"the secant approximation error of its parent ","element":"span"},{"style":{"height":12},"width":38.61,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-1.png","element":"img","alt":" I′ ","inline":true,"padRight":true},{"text":"is lower bounded by ","element":"span"},{"style":{"height":26.54},"width":315.72,"height":66.35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-2.png","element":"img","alt":" ∆(f, I′) ≥ βϵ2(2+β)","inline":true},{"text":". We prove this ","element":"span"},{"text":"as follows: fix some ","element":"span"},{"style":{"height":17.6},"width":178.27,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-3.png","element":"img","alt":" I ∈ L(Tt)","inline":true,"padRight":true},{"text":"for ","element":"span"},{"style":{"height":17.6},"width":150.45,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-4.png","element":"img","alt":" t = τ(ϵ)","inline":true},{"text":". If ","element":"span"},{"style":{"height":12},"width":38.61,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-5.png","element":"img","alt":" I′","inline":true,"padRight":true},{"text":"is the parent of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"I ","element":"span"},{"text":"then there exists some previous time ","element":"span"},{"style":{"fontStyle":"italic"},"text":"s < t ","element":"span"},{"text":"such that","element":"span"}],[{"style":{"width":"60%"},"width":1143,"height":77,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-6.png","element":"img"}],[{"text":"that is, ","element":"span"},{"style":{"height":18.73},"width":443.1,"height":46.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-7.png","element":"img","alt":" ∆(f, I′) ≥ βBs(I′, δpnt)","inline":true},{"text":". On the other hand, to split on ","element":"span"},{"style":{"height":17.6},"width":154.34,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-8.png","element":"img","alt":" s < τ(ϵ)","inline":true,"padRight":true},{"text":"we must also have that","element":"span"}],[{"style":{"width":"39%"},"width":747,"height":116,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-9.png","element":"img"}],[{"text":"Together, these two displays imply ","element":"span"},{"style":{"height":18.73},"width":866.58,"height":46.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-10.png","element":"img","alt":" ∆(f, I′) ≥ βBs(I′, δpnt) ≥ β(ϵ/4 − ∆(f, I′))/2","inline":true},{"text":". Rearranging, we find ","element":"span"},{"style":{"height":26.54},"width":340.08,"height":66.34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-11.png","element":"img","alt":" ∆(f, I′) ≥ β2(2+β)ϵ","inline":true,"padRight":true},{"text":"which proves what we set out to verify.","element":"span"}],[{"id":"id-34","style":{"fontWeight":"bold"},"text":"4.4 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Lemma ","element":"span"},{"href":"#id-39","style":{"fontWeight":"bold"},"text":"4.3","element":"a"}],[{"text":"Fix ","element":"span"},{"style":{"height":17.6},"width":185.08,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-12.png","element":"img","alt":" α ∈ (0, 1)","inline":true},{"text":". This proof relies on the following upper-continuity property of ","element":"span"},{"style":{"height":8.4},"width":27,"height":21,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-13.png","element":"img","alt":" ω","inline":true},{"text":", whose proof is deferred to Section ","element":"span"},{"id":"id-51","href":"#id-40","text":"6.2:","element":"a"}],[{"style":{"height":17.2},"width":694.16,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-14.png","element":"img","alt":"Lemma 4.6 Let [x−t, x+t] ⊂ [0, 1]","inline":true},{"style":{"fontStyle":"italic"},"text":", and suppose that ","element":"span"},{"style":{"height":24.22},"width":830.76,"height":60.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-15.png","element":"img","alt":" ∆(f, x, t) ≥ ϵ. Then, ω(f, x+τ, ϵ(1− |τ|t )) ≤","inline":true},{"style":{"height":17.6},"width":130.34,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-16.png","element":"img","alt":"t + |τ|.","inline":true}],[{"style":{"width":"97%"},"width":1821,"height":642,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-17.png","element":"img"}],[{"text":"which proves one side of the integral. A similar argument holds for ","element":"span"},{"style":{"height":24.22},"width":667,"height":60.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-18.png","element":"img","alt":" u ∈ [x−αt, x] since 1−α ≤ 1− |u−x|t .","inline":true}],[{"id":"id-35","style":{"fontWeight":"bold"},"text":"4.5 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Lemma ","element":"span"},{"href":"#id-36","style":{"fontWeight":"bold"},"text":"4.4","element":"a"}],[{"text":"Define ","element":"span"},{"style":{"height":18.74},"width":877.58,"height":46.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-19.png","element":"img","alt":" �r(x) = fpnt(x) − f(x) for all x ∈ supp(fpnt)","inline":true},{"text":". First note that","element":"span"}],[{"style":{"width":"74%"},"width":1392,"height":119,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-20.png","element":"img"}],[{"text":"using the fact that the secant approximations are affine on ","element":"span"},{"style":{"height":17.6},"width":747.02,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-21.png","element":"img","alt":" I and Sec[f, I](x) − f(x) ≤ 2∆(f, I) by","inline":true,"padRight":true},{"text":"Lemma ","element":"span"},{"href":"#id-15","text":"2.1. ","element":"a"},{"text":"Adding and subtracting ","element":"span"},{"style":{"height":18.73},"width":224.05,"height":46.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-22.png","element":"img","alt":" 2∆(fpnt, I),","inline":true}],[{"style":{"width":"74%"},"width":1396,"height":275,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/12-23.png","element":"img"}],[{"text":"whence we conclude ","element":"span"},{"style":{"height":21.29},"width":1059.57,"height":53.23,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-0.png","element":"img","alt":"12(Sec[fpnt, I](x) − f(x)) ≤ ∆(fpnt, I) + B(I, �δ) on Egood","inline":true},{"text":". For the lower bound, ","element":"span"},{"text":"we see","element":"span"}],[{"style":{"width":"74%"},"width":1392,"height":121,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-1.png","element":"img"}],[{"text":"so that ","element":"span"},{"style":{"height":25.52},"width":840.08,"height":63.8,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-2.png","element":"img","alt":" − Sec[fpnt,I](x)−f(x)2 ≤ B(I, �δ) on Egood. Thus,","inline":true}],[{"style":{"width":"66%"},"width":1254,"height":196,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-3.png","element":"img"}],[{"id":"id-63","style":{"fontWeight":"bold"},"text":"Remark 4.1 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"In the proof of Lemma ","element":"span"},{"href":"#id-36","style":{"fontStyle":"italic"},"text":"4.4 ","element":"a"},{"style":{"fontStyle":"italic"},"text":"we lower bound ","element":"span"},{"style":{"height":19.95},"width":807.54,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-4.png","element":"img","alt":" min{�r(xl(I)), �r(xr(I))} by −B(I, �δ) which is","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"quite loose since this quantity is also lower bounded by ","element":"span"},{"style":{"height":21.08},"width":1037.89,"height":52.71,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-5.png","element":"img","alt":" − max{φ(T(xl(I)), δpnt(xl(I))), φ(Nt(xr(I)), δpnt(xr(I)))}","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and can be at least a factor of two smaller. Nevertheless, using matching upper and lower bounds for ","element":"span"},{"style":{"height":18.73},"width":402.24,"height":46.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-6.png","element":"img","alt":"Sec[fpnt, I](x) − f(x)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"substantially simplifies clutter in the algorithm. It is straightforward to modify the algorithm to use these non-matching upper and lower bounds for superior empirical performance, and, indeed, our experiments implement this modification.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"4.6 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Lemma ","element":"span"},{"href":"#id-41","style":{"fontWeight":"bold"},"text":"4.5","element":"a"}],[{"text":"Fix an ","element":"span"},{"style":{"height":19.09},"width":195.66,"height":47.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-7.png","element":"img","alt":" x∗ ∈ Xτ(ϵ)","inline":true},{"text":", and let ","element":"span"},{"style":{"height":17.6},"width":163.43,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-8.png","element":"img","alt":" s < τ(ϵ)","inline":true,"padRight":true},{"text":"be the last round at which ","element":"span"},{"style":{"height":12.74},"width":41.94,"height":31.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-9.png","element":"img","alt":" x∗","inline":true,"padRight":true},{"text":"was sampled; note then that ","element":"span"},{"style":{"height":16.65},"width":136.81,"height":41.62,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-10.png","element":"img","alt":"x∗ ∈ I∗s","inline":true,"padRight":true},{"text":". It suffices to bound ","element":"span"},{"style":{"height":17.6},"width":130.83,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-11.png","element":"img","alt":" Ns(x∗)","inline":true},{"text":". If ","element":"span"},{"style":{"height":16.65},"width":39.6,"height":41.62,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-12.png","element":"img","alt":" I∗s","inline":true,"padRight":true},{"text":"is a new, just-bisected interval then we must have that ","element":"span"},{"style":{"height":19.09},"width":217.56,"height":47.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-13.png","element":"img","alt":"x∗ = xm(I∗s )","inline":true,"padRight":true},{"text":"by the sampling rule (","element":"span"},{"style":{"height":17.6},"width":435.67,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-14.png","element":"img","alt":"φ(0, ·) = ∞) so that x∗ ","inline":true,"padRight":true},{"text":"was sampled only a single time.","element":"span"}],[{"text":"Otherwise, ","element":"span"},{"style":{"height":12.74},"width":41.94,"height":31.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-15.png","element":"img","alt":" x∗ ","inline":true,"padRight":true},{"text":"has been sampled more than once and ","element":"span"},{"style":{"height":20.89},"width":799.03,"height":52.23,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-16.png","element":"img","alt":" max{0, ∆(fpnts , I∗s )} ≤ (1 + β)Bs(I∗s , δpnt).","inline":true,"padRight":true},{"text":"This means that for ","element":"span"},{"style":{"height":16.65},"width":39.61,"height":41.62,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-17.png","element":"img","alt":" I∗s ","inline":true,"padRight":true},{"text":"one has that","element":"span"}],[{"style":{"width":"62%"},"width":1175,"height":292,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-18.png","element":"img"}],[{"text":"where the last line follows by the sampling rule. It suffices for the right-hand side to be less than ","element":"span"},{"style":{"height":17.6},"width":61.53,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-19.png","element":"img","alt":" ϵ/4","inline":true,"padRight":true},{"text":"to meet the stopping condition.","element":"span"}]]},{"heading":"5 Proof of Packing and Lower Bounds","paragraphs":[[{"style":{"fontWeight":"bold"},"text":"5.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Theorem ","element":"span"},{"href":"#id-21","style":{"fontWeight":"bold"},"text":"3.2","element":"a"}],[{"text":"We construct the packing by choosing a sequence of interval midpoints ","element":"span"},{"style":{"height":10.62},"width":50.32,"height":26.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-20.png","element":"img","alt":" mi","inline":true,"padRight":true},{"text":"and interval lengths ","element":"span"},{"style":{"height":13.82},"width":27.76,"height":34.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-21.png","element":"img","alt":" ti","inline":true},{"text":", such that the intervals ","element":"span"},{"style":{"height":17.2},"width":576.68,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-22.png","element":"img","alt":" Ii := [mi − ti, mi + ti] = [ai, bi]","inline":true,"padRight":true},{"text":"overlap only at their endpoints, and such that ","element":"span"},{"style":{"height":17.6},"width":292.26,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-23.png","element":"img","alt":"∆(f, mi, ti) = ϵ","inline":true},{"text":". To do this, we define ","element":"span"},{"style":{"height":17.6},"width":370.73,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-24.png","element":"img","alt":" t0 = m0 = tleft(f, ϵ)","inline":true},{"text":". By definition of ","element":"span"},{"style":{"height":17.6},"width":160.79,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-25.png","element":"img","alt":" tleft(f, ϵ)","inline":true},{"text":", we have the equality ","element":"span"},{"style":{"height":17.6},"width":756.4,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-26.png","element":"img","alt":" ∆(f, tleft(f, ϵ), tleft(f, ϵ)) = ϵ. Let b0 = 0","inline":true},{"text":", and for each ","element":"span"},{"style":{"height":15.6},"width":296,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-27.png","element":"img","alt":" i ≥ 1, we define","inline":true}],[{"style":{"width":"58%"},"width":1093,"height":170,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/13-28.png","element":"img"}],[{"text":"One can think of ","element":"span"},{"style":{"height":13.82},"width":27.76,"height":34.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-0.png","element":"img","alt":" ti","inline":true,"padRight":true},{"text":"as as the equivalent of ","element":"span"},{"style":{"height":14.04},"width":63.57,"height":35.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-1.png","element":"img","alt":" tleft","inline":true},{"text":", but starting at ","element":"span"},{"style":{"height":15.02},"width":73.61,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-2.png","element":"img","alt":" bi−1","inline":true,"padRight":true},{"text":"rather than zero. Note that ","element":"span"},{"style":{"height":17.6},"width":300.42,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-3.png","element":"img","alt":"∆(f, bi−1 + t, t)","inline":true,"padRight":true},{"text":"is non-decreasing and continuous in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t ","element":"span"},{"text":"(Lemma ","element":"span"},{"href":"#id-42","text":"6.3)","element":"a"},{"text":", and thus, if there exists a ","element":"span"},{"style":{"height":23.5},"width":264.29,"height":58.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-4.png","element":"img","alt":"t ∈ [0, 1−bi−12 ]","inline":true,"padRight":true},{"text":"such that ","element":"span"},{"style":{"height":17.6},"width":387.58,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-5.png","element":"img","alt":" ∆(f, bi−1 + t, t) ≥ ϵ","inline":true},{"text":", then the supremum in the definition of ","element":"span"},{"style":{"height":13.82},"width":27.76,"height":34.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-6.png","element":"img","alt":" ti","inline":true,"padRight":true},{"text":"will be attained for a ","element":"span"},{"style":{"height":17.2},"width":842.46,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-7.png","element":"img","alt":" ti such that ∆(f, bi−1+t, t) = ∆(f, mi, ti) = ϵ","inline":true},{"text":". Thus, we will terminate the construction at ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i ","element":"span"},{"text":"= ","element":"span"},{"style":{"fontStyle":"italic"},"text":"n","element":"span"},{"text":", where ","element":"span"},{"style":{"fontStyle":"italic"},"text":"n ","element":"span"},{"text":"is the first number satisfying ","element":"span"},{"style":{"height":18.44},"width":384.64,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-8.png","element":"img","alt":" bn ≥ 1 − 2tright(f, ϵ)","inline":true},{"text":", or ","element":"span"},{"style":{"height":17.6},"width":472.72,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-9.png","element":"img","alt":" ∆(f, bn + tn+1, tn+1) < ϵ","inline":true},{"text":". Note that ","element":"span"},{"style":{"height":23.5},"width":676.78,"height":58.75,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-10.png","element":"img","alt":" bn = bi−1 + 2ti ≤ bi + 2( 1−bi−12 ) = 1","inline":true},{"text":". Collecting what we have established thus far,","element":"span"}],[{"text":"1. ","element":"span"},{"style":{"height":17.6},"width":668.37,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-11.png","element":"img","alt":" ∆(f, mi, ti) = ∆(f, bi−1 + ti, ti) = ϵ","inline":true},{"text":". By Lemma ","element":"span"},{"href":"#id-43","text":"6.4, ","element":"a"},{"text":"it follows that ","element":"span"},{"style":{"height":17.6},"width":294.1,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-12.png","element":"img","alt":" ti = ω(f, mi, ϵ).","inline":true}],[{"text":"2. By definition, ","element":"span"},{"style":{"height":17.6},"width":282.16,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-13.png","element":"img","alt":" a1 = 2tleft(f, ϵ)","inline":true},{"text":". And, by the stopping condition, ","element":"span"},{"style":{"height":18.44},"width":563.11,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-14.png","element":"img","alt":" an ≤ 1 − 2tright(f, ϵ) ≤ bn ≤ 1","inline":true}],[{"text":"3. Hence, since ","element":"span"},{"style":{"height":16.22},"width":168.6,"height":40.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-15.png","element":"img","alt":" bi = ai+1","inline":true},{"text":", we have that ","element":"span"},{"style":{"height":20.6},"width":818.54,"height":51.5,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-16.png","element":"img","alt":"�Ii = [a1, bn] ⊇ [2tleft(f, ϵ), 1 − 2tright(ϵ, f)]","inline":true},{"text":", and that ","element":"span"},{"style":{"height":14.62},"width":31.19,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-17.png","element":"img","alt":" Ii","inline":true,"padRight":true},{"text":"have disjoint interiors.","element":"span"}],[{"text":"To conclude, we adopt an argument similar to the proof of Proposition ","element":"span"},{"href":"#id-44","text":"4.2. ","element":"a"},{"text":"For ease of notation, define ","element":"span"},{"id":"id-45","style":{"height":18.44},"width":650.05,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-18.png","element":"img","alt":" I(f, ϵ) := [tleft(f, ϵ), 1 − tright(f, ϵ)]","inline":true},{"text":". We start off by showing that","element":"span"},{"style":{"height":20.87},"width":509.3,"height":52.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-19.png","element":"img","alt":"� mm−t ω(f, u, ϵ)−1du = �O(1)","inline":true,"padRight":true},{"text":"for ","element":"span"},{"style":{"height":17.6},"width":220.93,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-20.png","element":"img","alt":" m ∈ I(f, ϵ).","inline":true}],[{"style":{"height":19.55},"width":2029.49,"height":48.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-21.png","element":"img","alt":"Lemma 5.1 Let m ∈ I(f, ϵ) and t = ω(f, m, ϵ), so that ∆(f, m, t) = ϵ. Then if ω0 = infu∈[m−t,m+t] ω(f, u, ϵ),","inline":true}],[{"style":{"fontStyle":"italic"},"text":"one has","element":"span"}],[{"style":{"width":"88%"},"width":1655,"height":1330,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-22.png","element":"img"}],[{"text":"and thus the number of intervals satisfies","element":"span"}],[{"style":{"width":"52%"},"width":980,"height":121,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/14-23.png","element":"img"}],[{"text":"Finally, we remove the last interval ","element":"span"},{"style":{"height":14.62},"width":40.19,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-0.png","element":"img","alt":" In","inline":true},{"text":". Since the right endpoint ","element":"span"},{"style":{"height":18.04},"width":695.88,"height":45.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-1.png","element":"img","alt":" an of In satisfies an ≤ 1−2tright(f, ϵ),","inline":true,"padRight":true},{"text":"the intervals ","element":"span"},{"style":{"height":15.2},"width":218.16,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-2.png","element":"img","alt":" I1, . . . , In−1","inline":true,"padRight":true},{"text":"are contained within ","element":"span"},{"style":{"height":18.44},"width":528.37,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-3.png","element":"img","alt":" [2tright(f, ϵ), 1 − 2tright(f, ϵ)]","inline":true},{"text":", and we have","element":"span"}],[{"style":{"width":"69%"},"width":1308,"height":121,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-4.png","element":"img"}],[{"text":"Note then that we may take ","element":"span"},{"style":{"height":18.44},"width":355.01,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-5.png","element":"img","alt":" n − 1 = Npck(f, ϵ).","inline":true}],[{"style":{"fontWeight":"bold"},"text":"5.1.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Lemma ","element":"span"},{"href":"#id-45","style":{"fontWeight":"bold"},"text":"5.1","element":"a"}],[{"text":"We first need a technical lemma, which we prove in Section ","element":"span"},{"href":"#id-46","text":"6.5.","element":"a"}],[{"style":{"height":17.6},"width":1739.11,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-6.png","element":"img","alt":"Lemma 5.2 Let x ∈ I(f, ϵ), and τ ∈ [−1, 1], such that u := x + τω(f, x, ϵ) ∈ I(f, ϵ). Then,","inline":true}],[{"id":"id-47","style":{"width":"67%"},"width":1266,"height":93,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-7.png","element":"img"}],[{"text":"We shall now establish","element":"span"},{"style":{"height":31.6},"width":691.88,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-8.png","element":"img","alt":"� m+tm ω(f, u, ϵ)−1du ≤ 2�1 + log tω0�","inline":true},{"text":"; the bound on the integral over ","element":"span"},{"style":{"height":17.2},"width":174.04,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-9.png","element":"img","alt":" [m−t, m]","inline":true,"padRight":true},{"text":"is analogous. We can write ","element":"span"},{"style":{"height":17.6},"width":267.66,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-10.png","element":"img","alt":" u ∈ [m, m + t]","inline":true,"padRight":true},{"text":"as ","element":"span"},{"style":{"height":12.8},"width":215.01,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-11.png","element":"img","alt":" u = m + τt","inline":true},{"text":", where ","element":"span"},{"style":{"height":17.6},"width":255.28,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-12.png","element":"img","alt":" t = ω(f, m, ϵ)","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":17.6},"width":164.72,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-13.png","element":"img","alt":" τ ∈ [0, 1]","inline":true},{"text":". Now, set ","element":"span"},{"style":{"height":19.95},"width":531.23,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-14.png","element":"img","alt":" ωmin = minu∈[m,m+1](f, u, ϵ)","inline":true},{"text":". Using Lemma ","element":"span"},{"href":"#id-47","text":"5.2, ","element":"a"},{"text":"we can integrate","element":"span"}],[{"style":{"width":"58%"},"width":1102,"height":551,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-15.png","element":"img"}],[{"text":"where ","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"a","element":"span"},{"text":") ","element":"span"},{"text":"is precisely Lemma ","element":"span"},{"href":"#id-47","text":"5.2. ","element":"a"},{"text":"Lastly, we can bound ","element":"span"},{"style":{"height":17.6},"width":566.38,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-16.png","element":"img","alt":" log(1 ∧ t/(2ω0)) ≤ log(t/ω0)","inline":true},{"text":", since ","element":"span"},{"style":{"height":17.6},"width":371.33,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-17.png","element":"img","alt":"ω0 ≤ t = ω(f, m, ϵ).","inline":true}],[{"id":"id-20","style":{"fontWeight":"bold"},"text":"5.2 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Noisy Lower Bound, Theorem ","element":"span"},{"href":"#id-24","style":{"fontWeight":"bold"},"text":"3.3","element":"a"}],[{"text":"It suffices to prove the theorem with ","element":"span"},{"style":{"fontStyle":"italic"},"text":"N ","element":"span"},{"text":"replaced by ","element":"span"},{"style":{"height":17.24},"width":86.92,"height":43.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-18.png","element":"img","alt":" Npck","inline":true},{"text":", since ","element":"span"},{"style":{"height":18.44},"width":422.9,"height":46.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-19.png","element":"img","alt":" Npck(f, ·) ≥ Npck(f, ·)","inline":true},{"text":"; the case where ","element":"span"},{"style":{"height":18.04},"width":283.72,"height":45.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-20.png","element":"img","alt":" Npck(f, 2ϵ) = 0","inline":true,"padRight":true},{"text":"is addressed at the end of the section. Let ","element":"span"},{"text":"Alg ","element":"span"},{"text":"be any algorithm satisfying the correctness guarantee ","element":"span"},{"href":"#id-48","text":"(8) ","element":"a"},{"text":"for some ","element":"span"},{"style":{"height":18.22},"width":816.8,"height":45.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-21.png","element":"img","alt":" ϵ > 0 and δ ∈ (0, 1/3). For g ∈ Fconv, let Pg","inline":true,"padRight":true},{"text":"denote the law under ","element":"span"},{"style":{"fontStyle":"italic"},"text":"g ","element":"span"},{"text":"and ","element":"span"},{"text":"Alg","element":"span"},{"text":". Consider the local alternative class ","element":"span"},{"style":{"height":17.64},"width":258.91,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-22.png","element":"img","alt":" Gf⋆,2ϵ ⊂ Fconv","inline":true,"padRight":true},{"text":"and intervals ","element":"span"},{"style":{"height":24.96},"width":420.14,"height":62.41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-23.png","element":"img","alt":" If⋆,2ϵ := {Ii}Npck(f⋆,2ϵ)i=1","inline":true,"padRight":true},{"text":", where ","element":"span"},{"style":{"height":18.04},"width":378.23,"height":45.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-24.png","element":"img","alt":" Npck(f⋆, ·) and Gf⋆,·","inline":true,"padRight":true},{"text":"are defined Theorem ","element":"span"},{"href":"#id-21","text":"3.2 ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-22","text":"(7)","element":"a"},{"text":", respectively. Let ","element":"span"},{"style":{"height":10.22},"width":31.08,"height":25.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-25.png","element":"img","alt":" τi","inline":true,"padRight":true},{"text":"denote the random variable corresponding to the number of times ","element":"span"},{"text":"Alg ","element":"span"},{"text":"samples in the interior of ","element":"span"},{"style":{"height":14.62},"width":35.76,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-26.png","element":"img","alt":" Ii","inline":true},{"text":", and observe that since the intervals in ","element":"span"},{"style":{"fontStyle":"italic"},"text":"i ","element":"span"},{"text":"have disjoint interiors, the stopping time ","element":"span"},{"style":{"height":18.25},"width":517.83,"height":45.63,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-27.png","element":"img","alt":" τ of Alg satisfies �i τi ≤ τ.","inline":true}],[{"text":"We can reduce to a multiple hypothesis testing problem by recalling that, for ","element":"span"},{"style":{"height":18.04},"width":271.18,"height":45.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-28.png","element":"img","alt":" h ̸= g ∈ Gf⋆,2ϵ","inline":true},{"text":", ","element":"span"},{"style":{"height":17.6},"width":408.66,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-29.png","element":"img","alt":"∥h − g∥∞ ∈ [2ϵ, 2 · 2ϵ]","inline":true},{"text":". Hence, for ","element":"span"},{"style":{"height":17.64},"width":177.59,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-30.png","element":"img","alt":" g ∈ Gf⋆,2ϵ","inline":true},{"text":", the events ","element":"span"},{"style":{"height":18.22},"width":424.88,"height":45.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-31.png","element":"img","alt":" Ag := {∥ �f − g∥∞ < ϵ}","inline":true,"padRight":true},{"text":"are pairwise disjoint. Further, by ","element":"span"},{"href":"#id-48","text":"(8)","element":"a"},{"text":", one has ","element":"span"},{"style":{"height":18.45},"width":489.83,"height":46.12,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/15-32.png","element":"img","alt":" Pg[Ag] ≥ 1 − δ, ∀g ∈ G2ϵ,f⋆","inline":true},{"text":". We also recall Birge’s inequality:","element":"span"}],[{"style":{"width":"103%"},"width":1931,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-0.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"denote a family of probability distributions on a space ","element":"span"},{"style":{"height":17.2},"width":568.35,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-1.png","element":"img","alt":" (Ω, F), and let A0, A1, . . . , An","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"denote pairwise disjoint events. If ","element":"span"},{"style":{"height":17.6},"width":654.45,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-2.png","element":"img","alt":" p := mini Pi(Ai) ≥ 1/(n + 1), then","inline":true}],[{"style":{"width":"99%"},"width":1868,"height":228,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-3.png","element":"img"}],[{"text":"To apply Birge’s inequality, we first compute ","element":"span"},{"style":{"height":18.22},"width":208.71,"height":45.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-4.png","element":"img","alt":" KL(Pg, Ph)","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"style":{"height":17.64},"width":205.35,"height":44.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-5.png","element":"img","alt":" g, h ∈ G2ϵ,f","inline":true,"padRight":true},{"text":"such that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"g","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"x","element":"span"},{"text":") = ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"x","element":"span"},{"text":") ","element":"span"},{"text":"for all ","element":"span"},{"style":{"height":18.44},"width":1055.64,"height":46.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-6.png","element":"img","alt":" x ∈ [0, 1] \\ Int(Ii), where Ii ∈ If⋆,2ϵ. Let KL(g(x), h(x))","inline":true,"padRight":true},{"text":"denote the ","element":"span"},{"style":{"height":19.13},"width":465.68,"height":47.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-7.png","element":"img","alt":" KL between N(g(x), σ2)","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":19.13},"width":225.05,"height":47.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-8.png","element":"img","alt":" N(h(x), σ2)","inline":true},{"text":", which is equal to ","element":"span"},{"style":{"height":19.13},"width":490.58,"height":47.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-9.png","element":"img","alt":" (g(x) − h(x))2/2σ2. Then","inline":true}],[{"style":{"width":"49%"},"width":928,"height":633,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-10.png","element":"img"}],[{"text":"where ","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"i","element":"span"},{"text":") ","element":"span"},{"text":"uses the fact that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"g ","element":"span"},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"differ only on ","element":"span"},{"style":{"height":17.6},"width":123.52,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-11.png","element":"img","alt":" Ii, (ii)","inline":true,"padRight":true},{"text":"uses the fact that, on ","element":"span"},{"style":{"height":14.62},"width":31.18,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-12.png","element":"img","alt":" Ii","inline":true},{"text":", one of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"{","element":"span"},{"style":{"fontStyle":"italic"},"text":"g, h","element":"span"},{"style":{"fontStyle":"italic"},"text":"} ","element":"span"},{"text":"is equal to ","element":"span"},{"style":{"height":16.4},"width":37.36,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-13.png","element":"img","alt":" f⋆","inline":true},{"text":", one is equal to ","element":"span"},{"style":{"height":17.6},"width":238.57,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-14.png","element":"img","alt":" Sec[f⋆, Ii](x)","inline":true},{"text":", and thus by Lemma ","element":"span"},{"href":"#id-15","text":"2.1, ","element":"a"},{"text":"we have that","element":"span"}],[{"style":{"width":"49%"},"width":926,"height":108,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-15.png","element":"img"}],[{"style":{"fontWeight":"bold"},"text":"First part of Theorem ","element":"span"},{"href":"#id-24","style":{"fontWeight":"bold"},"text":"3.3: ","element":"a"},{"text":"For each ","element":"span"},{"style":{"height":18.44},"width":303.17,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-16.png","element":"img","alt":" i ∈ [Npck(f, 2ϵ)]","inline":true},{"text":", let ","element":"span"},{"style":{"height":19.94},"width":60.08,"height":49.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-17.png","element":"img","alt":" g(i)","inline":true,"padRight":true},{"text":"denote the alternative corresponding to the vector ","element":"span"},{"style":{"height":26.41},"width":277.34,"height":66.02,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-18.png","element":"img","alt":" b(i)j := I(i = j)","inline":true,"padRight":true},{"text":"in ","element":"span"},{"href":"#id-22","text":"(7)","element":"a"},{"text":". Hence, ","element":"span"},{"style":{"height":19.93},"width":199.21,"height":49.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-19.png","element":"img","alt":" f⋆ and g(i) ","inline":true,"padRight":true},{"text":"differ only on ","element":"span"},{"style":{"height":15.6},"width":224.38,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-20.png","element":"img","alt":" Ii, and thus","inline":true}],[{"style":{"width":"32%"},"width":603,"height":58,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-21.png","element":"img"}],[{"text":"Birge’s inequality with ","element":"span"},{"style":{"height":20.12},"width":1321.68,"height":50.31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-22.png","element":"img","alt":" n = 1, P1 = Pf⋆ and P0 = Pg(i), and A1 = Af⋆ and A0 = Ag(i) implies","inline":true}],[{"style":{"width":"59%"},"width":1111,"height":129,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-23.png","element":"img"}],[{"text":"We rearrange to get ","element":"span"},{"style":{"height":19.98},"width":509.76,"height":49.94,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-24.png","element":"img","alt":" Ef⋆[τi] ≥ σ2kl(1 − δ, δ)/8ϵ2","inline":true},{"text":", and sum over ","element":"span"},{"style":{"height":18.44},"width":294.27,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-25.png","element":"img","alt":" i ∈ [Npck(f⋆, ϵ)]","inline":true,"padRight":true},{"text":"to obtain ","element":"span"},{"style":{"height":18.44},"width":159.85,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-26.png","element":"img","alt":" Ef⋆[τ] ≳","inline":true}],[{"style":{"width":"100%"},"width":1877,"height":263,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/16-27.png","element":"img"}],[{"text":"Hence, applying Birge’s inequality with ","element":"span"},{"style":{"height":18.07},"width":377.94,"height":45.17,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-0.png","element":"img","alt":" P0 = Pgb, Pi = Pgb⊕i","inline":true},{"text":", and the disjoint events ","element":"span"},{"style":{"height":17.82},"width":263.7,"height":44.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-1.png","element":"img","alt":" A0 = Agb and","inline":true},{"style":{"height":18.87},"width":204.65,"height":47.17,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-2.png","element":"img","alt":"Ai = Agb⊕i","inline":true},{"text":", we have that for any ","element":"span"},{"style":{"height":17.6},"width":217.09,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-3.png","element":"img","alt":" b ∈ {0, 1}n,","inline":true}],[{"style":{"width":"56%"},"width":1056,"height":123,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-4.png","element":"img"}],[{"text":"where the last inequality uses the ","element":"span"},{"text":"KL","element":"span"},{"text":"-computation above, and the fact that ","element":"span"},{"style":{"height":16.44},"width":215.76,"height":41.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-5.png","element":"img","alt":" gb⊕i and gb","inline":true,"padRight":true},{"text":"differ only on ","element":"span"},{"style":{"height":15.2},"width":190.5,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-6.png","element":"img","alt":" Ii. Hence,","inline":true}],[{"style":{"width":"99%"},"width":1872,"height":406,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-7.png","element":"img"}],[{"text":"small (even zero), we still have the bounds ","element":"span"},{"style":{"height":18.44},"width":385.74,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-8.png","element":"img","alt":" Eg[τ] ≳ Npck(f⋆, 2ϵ)","inline":true,"padRight":true},{"text":"for every ","element":"span"},{"style":{"height":17.6},"width":245.92,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-9.png","element":"img","alt":" g ∈ G(f⋆, 2ϵ)","inline":true},{"text":". To this end, fix ","element":"span"},{"style":{"height":17.64},"width":180.21,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-10.png","element":"img","alt":" g ∈ Gf⋆,2ϵ","inline":true},{"text":"; we show ","element":"span"},{"style":{"height":17.6},"width":213.33,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-11.png","element":"img","alt":" E[τi] ≥ 1/3","inline":true},{"text":". Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"h ","element":"span"},{"text":"be the alternative to ","element":"span"},{"style":{"fontStyle":"italic"},"text":"g ","element":"span"},{"text":"in ","element":"span"},{"style":{"height":17.64},"width":101.84,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-12.png","element":"img","alt":" Gf⋆,2ϵ","inline":true,"padRight":true},{"text":"which differs only on ","element":"span"},{"style":{"height":14.62},"width":31.18,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-13.png","element":"img","alt":" Ii","inline":true},{"text":", and let ","element":"span"},{"style":{"height":15.02},"width":40.66,"height":37.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-14.png","element":"img","alt":" Bi","inline":true,"padRight":true},{"text":"denote the event that ","element":"span"},{"text":"Alg ","element":"span"},{"text":"never samples in ","element":"span"},{"style":{"height":14.62},"width":31.18,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-15.png","element":"img","alt":" Ii","inline":true},{"text":". Note then that for any event ","element":"span"},{"style":{"fontStyle":"italic"},"text":"A","element":"span"},{"text":",","element":"span"}],[{"style":{"width":"79%"},"width":1482,"height":198,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-16.png","element":"img"}],[{"text":"where we used that ","element":"span"},{"style":{"height":18.62},"width":1042.68,"height":46.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-17.png","element":"img","alt":" Ag ∩ Ah = ∅. Hence E[τi] ≥ 1 − Pg[Bi] ≥ 1 − 2δ ≥ 1/3.","inline":true}],[{"style":{"height":18.44},"width":757.61,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-18.png","element":"img","alt":"Lower bound when Npck(f, 2ϵ) = 0","inline":true},{"text":". ","element":"span"},{"text":"When ","element":"span"},{"style":{"height":18.44},"width":305.29,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-19.png","element":"img","alt":" Npck(f, 2ϵ) < 1","inline":true},{"text":", we can consider the single alternative function ","element":"span"},{"style":{"height":17.6},"width":1086.69,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-20.png","element":"img","alt":"�f(x) = f(x) + 2ϵ. Since | �f(x)−f(x)| = 2ϵ for all x ∈ [0, 1]","inline":true},{"text":", the above arguments show that one needs at least ","element":"span"},{"style":{"height":24.25},"width":421.23,"height":60.62,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-21.png","element":"img","alt":" ≳ max{1, σ2ϵ2 log(1/δ)}","inline":true,"padRight":true},{"text":"samples to distinguish between ","element":"span"},{"style":{"height":16.4},"width":162.96,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-22.png","element":"img","alt":" �f and f.","inline":true}],[{"style":{"fontWeight":"bold"},"text":"5.3 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Proposition ","element":"span"},{"href":"#id-5","style":{"fontWeight":"bold"},"text":"3.4","element":"a"}],[{"text":"Recall that the construction of the packing in Theorem ","element":"span"},{"href":"#id-21","text":"3.2 ","element":"a"},{"text":"in Section ","element":"span"},{"href":"#id-19","text":"5.3 ","element":"a"},{"text":"is constructed with ","element":"span"},{"style":{"fontStyle":"italic"},"text":"n ","element":"span"},{"text":"= ","element":"span"},{"style":{"height":18.04},"width":256.12,"height":45.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-23.png","element":"img","alt":"Npck(f, ϵ) + 1","inline":true,"padRight":true},{"text":"intervals of the form ","element":"span"},{"style":{"height":18.09},"width":528.54,"height":45.22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-24.png","element":"img","alt":" {[ai, bi]}ni=1 with ∆(f, ϵ) = ϵ","inline":true},{"text":". Define the interval ","element":"span"},{"style":{"height":17.2},"width":298.83,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-25.png","element":"img","alt":" [a0, b0] = [0, a1],","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":17.6},"width":383.26,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-26.png","element":"img","alt":" [an+1, bn+1] = [bn, 1]","inline":true},{"text":". The following fact is straightforward to verify using the construction in Section ","element":"span"},{"href":"#id-19","text":"5.3:","element":"a"}],[{"style":{"fontWeight":"bold"},"text":"Fact 5.4 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"The intervals ","element":"span"},{"style":{"height":19.95},"width":564.32,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-27.png","element":"img","alt":" {[ai, bi]}i∈[n+1]∪{0} cover [0, 1]","inline":true},{"style":{"fontStyle":"italic"},"text":", and satisfy ","element":"span"},{"style":{"height":17.6},"width":314.71,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-28.png","element":"img","alt":" ∆(f, [ai, bi]) ≤ ϵ.","inline":true}],[{"text":"Let ","element":"span"},{"style":{"height":22.91},"width":526.98,"height":57.27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-29.png","element":"img","alt":" X := {ai, bi, ai+bi2 }i∈[n+1]∪{0","inline":true},{"text":". We collect ","element":"span"},{"style":{"height":24.25},"width":472.72,"height":60.62,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-30.png","element":"img","alt":" max{1, 8σ2ϵ2 log(|X|/2δ))}","inline":true},{"text":"-samples at each ","element":"span"},{"style":{"height":12.8},"width":114.3,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-31.png","element":"img","alt":" x ∈ X","inline":true},{"text":", and ","element":"span"},{"text":"define ","element":"span"},{"style":{"height":17.6},"width":84.97,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-32.png","element":"img","alt":"�f(x)","inline":true,"padRight":true},{"text":"to denote the empirical mean of these samples. We then define our test function to be","element":"span"}],[{"id":"id-49","style":{"width":"33%"},"width":633,"height":84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-33.png","element":"img"}],[{"text":"It now suffices to show that ","element":"span"},{"style":{"height":18.44},"width":353.58,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-34.png","element":"img","alt":" Pf⋆[ψ ̸= 0] ≤ δ/2","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":18.44},"width":339.82,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-35.png","element":"img","alt":" Pf[ψ ̸= 1] ≤ δ/2","inline":true,"padRight":true},{"text":"for ","element":"span"},{"style":{"height":16.4},"width":194.84,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-36.png","element":"img","alt":" f ∈ Fconv","inline":true,"padRight":true},{"text":"satisfying ","element":"span"},{"style":{"height":17.6},"width":319.01,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-37.png","element":"img","alt":"∥f − f⋆∥∞ ≥ 10ϵ","inline":true},{"text":". By standard sub-gaussian concentration,","element":"span"}],[{"id":"id-19","style":{"width":"68%"},"width":1292,"height":84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-38.png","element":"img"}],[{"text":"which immediately implies that ","element":"span"},{"style":{"height":18.44},"width":319.64,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/17-39.png","element":"img","alt":" Pf⋆[ψ ̸= 0] ≤ δ/2","inline":true},{"text":". To prove the other direction, it suffices to prove the following lemma:","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Lemma 5.5 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"If ","element":"span"},{"style":{"height":16.4},"width":181.82,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-0.png","element":"img","alt":" f ∈ Fconv","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"satisfies ","element":"span"},{"style":{"height":17.6},"width":302.14,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-1.png","element":"img","alt":" ∥f − f⋆∥∞ ≥ 9ϵ","inline":true},{"style":{"fontStyle":"italic"},"text":", then there exists an ","element":"span"},{"style":{"height":12.8},"width":117.52,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-2.png","element":"img","alt":" x ∈ X","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"such that ","element":"span"},{"style":{"height":17.6},"width":142.08,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-3.png","element":"img","alt":" |f(x) −","inline":true},{"style":{"height":17.6},"width":200.19,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-4.png","element":"img","alt":"f⋆(x)| > ϵ.","inline":true}],[{"text":"Indeed, by Lemma ","element":"span"},{"href":"#id-19","text":"5.5, ","element":"a"},{"text":"the triangle inequality, and ","element":"span"},{"href":"#id-49","text":"(19) ","element":"a"},{"text":"we have","element":"span"}],[{"style":{"width":"79%"},"width":1493,"height":84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-5.png","element":"img"}],[{"style":{"fontWeight":"bold"},"text":"Proof ","element":"span"},{"text":"[Proof of Lemma ","element":"span"},{"href":"#id-19","text":"5.5] ","element":"a"},{"text":"We prove the contrapositive. Suppose that ","element":"span"},{"style":{"height":18.19},"width":495.02,"height":45.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-6.png","element":"img","alt":" supx∈X |f(x) − f⋆(x)| < ϵ,","inline":true,"padRight":true},{"text":"and let ","element":"span"},{"style":{"height":17.6},"width":1734.37,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-7.png","element":"img","alt":" z ∈ [0, 1]. Then z ∈ [ai, bi] for some i ∈ {0, . . . , n + 1}. Let I = [ai, bi] and mi = (bi + ai)/2.","inline":true,"padRight":true},{"text":"We then have that","element":"span"}],[{"style":{"width":"88%"},"width":1660,"height":278,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-8.png","element":"img"}],[{"text":"Lastly, we observe that ","element":"span"},{"style":{"height":17.6},"width":1414.62,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-9.png","element":"img","alt":" |∆(f, I) − ∆(f⋆, I)| ≤ |f(mi) − f⋆(mi)| + |Sec[f, I](mi) − Sec[f⋆, I](mi)| ≤","inline":true},{"style":{"height":17.6},"width":660.58,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-10.png","element":"img","alt":"ϵ + |Sec[f, I](mi) − Sec[f⋆, I](mi)|","inline":true},{"text":". ","element":"span"},{"text":"Moreover, for all ","element":"span"},{"style":{"height":12.8},"width":109.9,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-11.png","element":"img","alt":" t ∈ I","inline":true,"padRight":true},{"text":"(in particular ","element":"span"},{"style":{"height":14.4},"width":185.34,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-12.png","element":"img","alt":" t = z, mi","inline":true},{"text":"), we have ","element":"span"},{"style":{"height":17.6},"width":1291.01,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-13.png","element":"img","alt":"|Sec[f, I](t) − Sec[f⋆, I](t)| ≤ max{|f(ai) − f⋆(ai)|, |f(bi) − f⋆(bi)|}","inline":true},{"text":", which is ","element":"span"},{"style":{"height":10.4},"width":69.23,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-14.png","element":"img","alt":" < ϵ","inline":true,"padRight":true},{"text":"by assumption. Thus, we can bound ","element":"span"},{"style":{"height":17.6},"width":1019.22,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-15.png","element":"img","alt":" |Sec[f, I](z)−Sec[f⋆, I](z)|+2|∆(f, I)−∆(f⋆, I)| < 5ϵ","inline":true},{"text":". Putting things together, ","element":"span"},{"style":{"height":17.6},"width":354.23,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-16.png","element":"img","alt":"|f(z) − f⋆(z)| < 9ϵ","inline":true},{"text":", as needed.","element":"span"}]]},{"heading":"6 Structural Results about Convex Functions","paragraphs":[[{"text":"In this section, we introduce structural tools regarding convex functions, and use these tools to concludes the proof of the technical lemmas used above. The first guarantee is that the error of secant approximation is ","element":"span"},{"style":{"fontStyle":"italic"},"text":"monotone ","element":"span"},{"text":"in the following sense:","element":"span"}],[{"style":{"width":"73%"},"width":1378,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-17.png","element":"img"}],[{"text":"Next, we state a generalization of Lemma ","element":"span"},{"href":"#id-15","text":"2.1:","element":"a"}],[{"style":{"height":17.6},"width":586.49,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-18.png","element":"img","alt":"Lemma 6.2 Let f : [0, 1] → R","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be convex. For any ","element":"span"},{"style":{"height":17.6},"width":387.92,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-19.png","element":"img","alt":" x, z ∈ (t1, t2) ⊂ [0, 1]","inline":true},{"style":{"fontStyle":"italic"},"text":", one has that","element":"span"}],[{"id":"id-33","style":{"width":"77%"},"width":1448,"height":105,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-20.png","element":"img"}],[{"text":"We observe that Lemma ","element":"span"},{"href":"#id-15","text":"2.1 ","element":"a"},{"text":"follows as a corollary by choosing ","element":"span"},{"style":{"height":19.15},"width":686.11,"height":47.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-21.png","element":"img","alt":" t1 = xl(I), t2 = xr(I), and x = xm(I),","inline":true,"padRight":true},{"text":"and considering the maximum over ","element":"span"},{"style":{"height":19.95},"width":298.8,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/18-22.png","element":"img","alt":" z ∈ [xl(I), xr(I)].","inline":true}],[{"text":"The remainder of the section is organized as follows. In Section ","element":"span"},{"href":"#id-50","text":"6.1, ","element":"a"},{"text":"we prove Lemma ","element":"span"},{"href":"#id-33","text":"6.2 ","element":"a"},{"text":"and in Section ","element":"span"},{"href":"#id-40","text":"6.2, ","element":"a"},{"text":"we prove Lemma ","element":"span"},{"href":"#id-51","text":"4.6. ","element":"a"},{"text":"We then introduce further technical lemmas in Section ","element":"span"},{"href":"#id-52","text":"6.3, ","element":"a"},{"text":"which we use to prove Proposition ","element":"span"},{"href":"#id-53","text":"3.5 ","element":"a"},{"text":"in Section ","element":"span"},{"href":"#id-26","text":"6.4, ","element":"a"},{"text":"and Lemma ","element":"span"},{"href":"#id-47","text":"5.2 ","element":"a"},{"text":"in ","element":"span"},{"href":"#id-46","text":"6.5. ","element":"a"},{"text":"The proof of Lemma ","element":"span"},{"href":"#id-33","text":"6.1 ","element":"a"},{"text":"is given in Section ","element":"span"},{"href":"#id-54","text":"6.6.","element":"a"}],[{"id":"id-50","style":{"fontWeight":"bold"},"text":"6.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Lemma ","element":"span"},{"href":"#id-33","style":{"fontWeight":"bold"},"text":"6.2","element":"a"}],[{"text":"Note that adding an affine function to ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"does not change the value of ","element":"span"},{"style":{"height":17.2},"width":572.42,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-0.png","element":"img","alt":" Sec[f, [t1, t2]](x) − f(x). Thus,","inline":true,"padRight":true},{"text":"we may assume that ","element":"span"},{"style":{"height":17.6},"width":338.6,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-1.png","element":"img","alt":" f(t1) = f(t2) = 0","inline":true},{"text":". Without loss of generality, we may also take ","element":"span"},{"style":{"height":14.22},"width":119.63,"height":35.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-2.png","element":"img","alt":" t1 = 0","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":14.22},"width":114.27,"height":35.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-3.png","element":"img","alt":"t2 = 1","inline":true},{"text":". With these simplifications, ","element":"span"},{"style":{"height":17.2},"width":595.54,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-4.png","element":"img","alt":" Sec[f, [0, 1]] = Sec[f, [t1, t2]] = 0","inline":true},{"text":", and hence our goal is show that","element":"span"}],[{"style":{"width":"43%"},"width":807,"height":132,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-5.png","element":"img"}],[{"text":"or equivalently, that","element":"span"}],[{"style":{"width":"37%"},"width":706,"height":132,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-6.png","element":"img"}],[{"text":"To this end, fix a subgradient ","element":"span"},{"style":{"height":17.6},"width":187.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-7.png","element":"img","alt":" g ∈ ∂f(x)","inline":true,"padRight":true},{"text":"and some ","element":"span"},{"style":{"height":13.6},"width":106.14,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-8.png","element":"img","alt":" z ≥ x","inline":true},{"text":". By the definition of the subgradient, it holds that","element":"span"}],[{"style":{"width":"35%"},"width":668,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-9.png","element":"img"}],[{"text":"By choosing ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t ","element":"span"},{"text":"= 0 ","element":"span"},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t ","element":"span"},{"text":"= ","element":"span"},{"style":{"fontStyle":"italic"},"text":"z ","element":"span"},{"text":"in the above display, we verify that (a) ","element":"span"},{"style":{"height":17.6},"width":378.54,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-10.png","element":"img","alt":" f(x) + g(0 − x) ≤ 0","inline":true},{"text":", and (b) ","element":"span"},{"style":{"height":17.6},"width":435.5,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-11.png","element":"img","alt":"f(x) + g(z − x) ≤ f(z)","inline":true,"padRight":true},{"text":". Combining ","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"a","element":"span"},{"text":") ","element":"span"},{"text":"and ","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"b","element":"span"},{"text":")","element":"span"},{"text":", and noting that ","element":"span"},{"style":{"height":15.6},"width":267.92,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-12.png","element":"img","alt":" z ≥ x, we find","inline":true}],[{"style":{"width":"73%"},"width":1385,"height":266,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-13.png","element":"img"}],[{"text":"as needed. On the other hand, suppose ","element":"span"},{"style":{"height":13.6},"width":104.13,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-14.png","element":"img","alt":" x ≥ z","inline":true},{"text":". Noting that the function ","element":"span"},{"style":{"height":17.6},"width":286.81,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-15.png","element":"img","alt":"�f(t) = f(1 − t)","inline":true,"padRight":true},{"text":"is convex and satisfies ","element":"span"},{"style":{"height":17.6},"width":477.47,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-16.png","element":"img","alt":"�f(0) = �f(1) = 0, we have","inline":true}],[{"style":{"width":"60%"},"width":1137,"height":107,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-17.png","element":"img"}],[{"id":"id-40","style":{"fontWeight":"bold"},"text":"6.2 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Lemma ","element":"span"},{"href":"#id-51","style":{"fontWeight":"bold"},"text":"4.6","element":"a"}],[{"text":"For any ","element":"span"},{"style":{"height":17.6},"width":366.94,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-18.png","element":"img","alt":" τ = [−t, t], we have","inline":true}],[{"style":{"width":"80%"},"width":1511,"height":273,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-19.png","element":"img"}],[{"text":"First suppose that ","element":"span"},{"style":{"height":15.6},"width":214.77,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-20.png","element":"img","alt":" τ ≥ 0, then","inline":true}],[{"style":{"width":"66%"},"width":1252,"height":266,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/19-21.png","element":"img"}],[{"text":"On the other hand, if ","element":"span"},{"style":{"height":14},"width":104.2,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-0.png","element":"img","alt":" τ ≤ 0","inline":true},{"text":", then similarly we have","element":"span"}],[{"style":{"width":"78%"},"width":1468,"height":93,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-1.png","element":"img"}],[{"text":"By definition, and combining the above pieces, we get","element":"span"}],[{"id":"id-42","style":{"width":"69%"},"width":1301,"height":93,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-2.png","element":"img"}],[{"id":"id-52","style":{"fontWeight":"bold"},"text":"6.3 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Additional Structural Lemmas","element":"span"}],[{"text":"Before continuing, we state three additional structural results that we shall need throughout. First, we observe that the following secant approximation functions are monotone:","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Lemma 6.3 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"For any convex function ","element":"span"},{"style":{"height":17.2},"width":248.86,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-3.png","element":"img","alt":" f : [0, 1] → R","inline":true},{"style":{"fontStyle":"italic"},"text":", the functions ","element":"span"},{"style":{"height":17.2},"width":610.39,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-4.png","element":"img","alt":" t �→ ∆(f, x, t), t �→ ∆(f, x + t, t)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and ","element":"span"},{"style":{"height":17.6},"width":326.18,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-5.png","element":"img","alt":" t �→ ∆(f, x − t, t)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"(defined on the appropriate domains) are all non-decreasing in ","element":"span"},{"style":{"height":14},"width":108.76,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-6.png","element":"img","alt":" t ≥ 0.","inline":true}],[{"style":{"fontWeight":"bold"},"text":"Proof ","element":"span"},{"text":"The mononoticity of ","element":"span"},{"style":{"height":17.2},"width":256.03,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-7.png","element":"img","alt":" t �→ ∆(f, x, t)","inline":true,"padRight":true},{"text":"is a consequence of the monotonicity of secant approximations, Lemma ","element":"span"},{"href":"#id-33","text":"6.1. ","element":"a"},{"text":"Here, we will prove that ","element":"span"},{"style":{"height":17.2},"width":315.96,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-8.png","element":"img","alt":" t �→ ∆(f, x+t, t)","inline":true,"padRight":true},{"text":"is non-decreasing; that ","element":"span"},{"style":{"height":17.2},"width":316.64,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-9.png","element":"img","alt":" t �→ ∆(f, x−t, t)","inline":true,"padRight":true},{"text":"is non-decreasing will follow by a similar argument. Write ","element":"span"},{"style":{"height":24.22},"width":733.46,"height":60.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-10.png","element":"img","alt":" ∆(f, x + t, t) = f(x)+f(x+2t)2 − f(x + t)","inline":true},{"text":". Since continuously differentiable convex functions are dense in class, we may assume ","element":"span"},{"style":{"height":16.4},"width":42.06,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-11.png","element":"img","alt":" f′ ","inline":true,"padRight":true},{"text":"exists. Thus,","element":"span"}],[{"style":{"width":"99%"},"width":1868,"height":51,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-12.png","element":"img"}],[{"text":"The next result states that the continuity modulus can be regarded as the inverse function of the secant error function ","element":"span"},{"style":{"height":12.8},"width":37,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-13.png","element":"img","alt":" ∆","inline":true},{"id":"id-43","text":", in the following sense:","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Lemma 6.4 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"For any ","element":"span"},{"style":{"height":12.4},"width":110.07,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-14.png","element":"img","alt":" ϵ > 0","inline":true},{"style":{"fontStyle":"italic"},"text":", and ","element":"span"},{"style":{"height":18.44},"width":761.13,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-15.png","element":"img","alt":" x ∈ [tleft(f, ϵ), 1 − tright(f, ϵ)], ω(f, x, ϵ)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is equal to the unique ","element":"span"},{"style":{"height":17.6},"width":341.88,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-16.png","element":"img","alt":"t ∈ [0, x ∧ (1 − x)]","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"satisfying ","element":"span"},{"style":{"height":17.6},"width":262.32,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-17.png","element":"img","alt":" ∆(f, x, t) = ϵ.","inline":true}],[{"style":{"width":"115%"},"width":2162,"height":478,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-18.png","element":"img"}],[{"text":"Lemmas ","element":"span"},{"href":"#id-42","text":"6.3 ","element":"a"},{"text":"and ","element":"span"},{"href":"#id-43","text":"6.4 ","element":"a"},{"text":"are used in Section ","element":"span"},{"href":"#id-19","text":"5.3, ","element":"a"},{"text":"which proves the packing given in Theorem ","element":"span"},{"href":"#id-21","text":"3.2. ","element":"a"},{"text":"Next, we have a ‘change-of-scale’ lemma, whose proof is at the heart of Proposition ","element":"span"},{"href":"#id-53","text":"3.5 ","element":"a"},{"text":"and Lemma ","element":"span"},{"href":"#id-47","text":"5.2:","element":"a"}],[{"style":{"height":18.44},"width":1428.9,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-19.png","element":"img","alt":"Lemma 6.5 For any 0 < ϵ′ ≤ ϵ and x ∈ [tleft(f, ϵ), 1 − tright(f, ϵ)], we have","inline":true}],[{"id":"id-56","style":{"width":"36%"},"width":689,"height":95,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-20.png","element":"img"}],[{"style":{"height":18.04},"width":971.88,"height":45.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-21.png","element":"img","alt":"Proof Fix 0 < ϵ′ ≤ ϵ, x ∈ [tleft(f, ϵ), 1 − tright(f, ϵ)]","inline":true},{"text":", which implies that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"ω","element":"span"},{"style":{"height":17.2},"width":490.49,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-22.png","element":"img","alt":"(f, x, ϵ) ≤ x ∧ (1 − x). Let","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"φ","element":"span"},{"style":{"height":17.6},"width":930.7,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-23.png","element":"img","alt":"(t) := ∆(f, x, t) = (f(x − t) + f(x + t))/2 − f(x)","inline":true},{"text":", which is defined for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"t ","element":"span"},{"style":{"height":17.6},"width":314.38,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/20-24.png","element":"img","alt":" ∈ [0, x ∧ (1 − x)]","inline":true},{"text":", convex, ","element":"span"},{"text":"and satisfies ","element":"span"},{"style":{"height":17.6},"width":166.17,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-0.png","element":"img","alt":" φ(0) = 0","inline":true},{"text":". A standard computation (Lemma ","element":"span"},{"href":"#id-55","text":"6.6) ","element":"a"},{"text":"shows that, for any ","element":"span"},{"style":{"height":13.6},"width":103.54,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-1.png","element":"img","alt":" t′ ≤ t","inline":true},{"text":", one has","element":"span"}],[{"style":{"width":"81%"},"width":1532,"height":196,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-2.png","element":"img"}],[{"text":"where ","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"i.a","element":"span"},{"text":") ","element":"span"},{"text":"and ","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"i.b","element":"span"},{"text":") ","element":"span"},{"text":"are by Lemma ","element":"span"},{"href":"#id-43","text":"6.4, ","element":"a"},{"text":"and ","element":"span"},{"text":"(","element":"span"},{"style":{"fontStyle":"italic"},"text":"ii","element":"span"},{"text":") ","element":"span"},{"text":"uses the fact that ","element":"span"},{"style":{"height":17.6},"width":409.23,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-3.png","element":"img","alt":" ω(f, x, ϵ′) ≤ ω(f, x, ϵ)","inline":true,"padRight":true},{"text":"(this is immediate from the definition of ","element":"span"},{"style":{"height":17.6},"width":57.61,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-4.png","element":"img","alt":" ω).","inline":true}],[{"id":"id-26","style":{"fontWeight":"bold"},"text":"6.4 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Proposition ","element":"span"},{"href":"#id-53","style":{"fontWeight":"bold"},"text":"3.5","element":"a"}],[{"text":"Let ","element":"span"},{"style":{"height":17.6},"width":169.22,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-5.png","element":"img","alt":" c ∈ (0, 1)","inline":true},{"text":", and observe that ","element":"span"},{"style":{"height":17.6},"width":397.94,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-6.png","element":"img","alt":" tleft(f, cϵ) ≤ tleft(f, ϵ)","inline":true},{"text":", and similarly for ","element":"span"},{"style":{"height":17.64},"width":227.07,"height":44.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-7.png","element":"img","alt":" tright. Thus,","inline":true}],[{"style":{"width":"63%"},"width":1181,"height":390,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-8.png","element":"img"}],[{"text":"By Lemma ","element":"span"},{"href":"#id-56","text":"6.5, ","element":"a"},{"text":"we have","element":"span"}],[{"style":{"width":"75%"},"width":1419,"height":119,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-9.png","element":"img"}],[{"text":"Next, let ","element":"span"},{"style":{"height":17.6},"width":477.09,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-10.png","element":"img","alt":" x ∈ [tleft(f, cϵ), tleft(f, ϵ)]","inline":true},{"text":"; we show that ","element":"span"},{"style":{"height":17.6},"width":303.35,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-11.png","element":"img","alt":" ω(f, x, cϵ) ≥ cx","inline":true},{"text":". Indeed, let ","element":"span"},{"style":{"height":17.6},"width":305.55,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-12.png","element":"img","alt":" ϵ∗ := ∆(f, x, x)","inline":true},{"text":". Since ","element":"span"},{"style":{"height":17.6},"width":500.73,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-13.png","element":"img","alt":" tleft(f, ϵ∗) ≤ x ≤ tleft(f, ϵ)","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":17.6},"width":155.2,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-14.png","element":"img","alt":" tleft(f, ·)","inline":true,"padRight":true},{"text":"is monotone, we have ","element":"span"},{"style":{"height":14.74},"width":122.13,"height":36.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-15.png","element":"img","alt":" ϵ∗ ≤ ϵ","inline":true},{"text":". By definition, both ","element":"span"},{"style":{"height":17.6},"width":638.43,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-16.png","element":"img","alt":"ω(f, x, cϵ) ≤ x and ω(f, x, ϵ∗) ≤ x","inline":true},{"text":". Hence, Lemma ","element":"span"},{"href":"#id-56","text":"6.5 ","element":"a"},{"text":"and the bound ","element":"span"},{"style":{"height":16},"width":234.3,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-17.png","element":"img","alt":" ϵ∗ ≤ ϵ imply","inline":true}],[{"style":{"width":"42%"},"width":795,"height":80,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-18.png","element":"img"}],[{"text":"as needed. Hence,","element":"span"}],[{"style":{"width":"69%"},"width":1308,"height":119,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-19.png","element":"img"}],[{"text":"The case ","element":"span"},{"style":{"height":18.44},"width":657.04,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-20.png","element":"img","alt":" x ∈ [1 − tright(f, ϵ), 1 − tright(f, cϵ)]","inline":true,"padRight":true},{"text":"similarly yields","element":"span"}],[{"style":{"width":"52%"},"width":987,"height":122,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-21.png","element":"img"}],[{"text":"Putting everything together,","element":"span"}],[{"style":{"width":"56%"},"width":1057,"height":108,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/21-22.png","element":"img"}],[{"id":"id-46","style":{"fontWeight":"bold"},"text":"6.5 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Lemma ","element":"span"},{"href":"#id-47","style":{"fontWeight":"bold"},"text":"5.2","element":"a"}],[{"text":"We may assume without loss of generality that ","element":"span"},{"style":{"height":17.6},"width":164.5,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-0.png","element":"img","alt":" τ ∈ [0, 1]","inline":true},{"text":". For ease of notation, set","element":"span"}],[{"style":{"width":"54%"},"width":1022,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-1.png","element":"img"}],[{"text":"noting that ","element":"span"},{"style":{"height":8},"width":22,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-2.png","element":"img","alt":" �ϵ","inline":true,"padRight":true},{"text":"is the secant approximation bias on the interval ","element":"span"},{"style":{"height":17.6},"width":511.1,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-3.png","element":"img","alt":" [u − (1 − τ)t, u + (1 − τ)t]","inline":true},{"text":". Since ","element":"span"},{"style":{"height":17.6},"width":1265.97,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-4.png","element":"img","alt":"u + (1 − τ)t = x + t and [u − (1 − τ)t, u + (1 − τ)t] ⊆ [x − t, x + t]","inline":true},{"text":", we have that","element":"span"}],[{"style":{"width":"52%"},"width":979,"height":283,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-5.png","element":"img"}],[{"text":"First, if ","element":"span"},{"style":{"height":14.8},"width":193.18,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-6.png","element":"img","alt":" �ϵ ≤ ϵ then","inline":true}],[{"style":{"width":"76%"},"width":1438,"height":76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-7.png","element":"img"}],[{"text":"On the other hand, if ","element":"span"},{"style":{"height":14.8},"width":290.05,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-8.png","element":"img","alt":" ϵ < �ϵ ≤ 2ϵ then","inline":true}],[{"id":"id-55","style":{"width":"64%"},"width":1211,"height":93,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-9.png","element":"img"}],[{"text":"In either case ","element":"span"},{"style":{"height":24.22},"width":331.09,"height":60.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-10.png","element":"img","alt":" ω(f, u, ϵ) ≥ (1−τ)t2","inline":true,"padRight":true},{"text":", which conclude the proof.","element":"span"}],[{"id":"id-54","style":{"fontWeight":"bold"},"text":"6.6 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Proof of Lemma ","element":"span"},{"href":"#id-33","style":{"fontWeight":"bold"},"text":"6.1","element":"a"}],[{"text":"We begin with a simple technical lemma.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"Lemma 6.6 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"height":17.6},"width":253.35,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-11.png","element":"img","alt":" ϕ : [0, t] → 1","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"be a convex function satisfying ","element":"span"},{"style":{"height":17.6},"width":174.37,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-12.png","element":"img","alt":" ϕ(0) = 0","inline":true},{"style":{"fontStyle":"italic"},"text":". Then for all ","element":"span"},{"style":{"height":17.6},"width":168.57,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-13.png","element":"img","alt":" c ∈ [0, 1]","inline":true},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"style":{"height":17.6},"width":265.43,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-14.png","element":"img","alt":"ϕ(ct) ≤ cϕ(t).","inline":true}],[{"style":{"height":17.6},"width":1129.46,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-15.png","element":"img","alt":"Proof cϕ(t) = cϕ(t) + (1 − c)ϕ(0) ≤ ϕ(ct + (1 − c)0)ϕ(ct)","inline":true},{"text":", where the first equality uses ","element":"span"},{"style":{"height":17.6},"width":165.4,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-16.png","element":"img","alt":" ϕ(0) = 0","inline":true,"padRight":true},{"text":"and the second uses convexity.","element":"span"}],[{"text":"We are now ready to prove Lemma ","element":"span"},{"href":"#id-33","text":"6.1: ","element":"a"},{"style":{"fontWeight":"bold"},"text":"Proof ","element":"span"},{"text":"[Proof of Lemma ","element":"span"},{"href":"#id-33","text":"6.1] ","element":"a"},{"text":"It suffices to prove that this is the case when ","element":"span"},{"style":{"fontStyle":"italic"},"text":"a ","element":"span"},{"text":"= ","element":"span"},{"style":{"fontStyle":"italic"},"text":"c ","element":"span"},{"text":"or ","element":"span"},{"style":{"fontStyle":"italic"},"text":"d ","element":"span"},{"text":"= ","element":"span"},{"style":{"fontStyle":"italic"},"text":"b","element":"span"},{"text":", since then ","element":"span"},{"style":{"height":17.2},"width":933.37,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-17.png","element":"img","alt":"Sec[f, [a, b]](x) ≤ Sec[f, [c, b]](x) ≤ Sec[f, [c, d]](x)","inline":true},{"text":". We assume without loss of generality that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"a ","element":"span"},{"text":"= ","element":"span"},{"style":{"fontStyle":"italic"},"text":"c","element":"span"},{"text":". Then, it suffices to show that, for ","element":"span"},{"style":{"height":17.2},"width":720.06,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-18.png","element":"img","alt":" t ≥ x, the map t �→ Sec[f, [a, a + t]](x)","inline":true,"padRight":true},{"text":"is non-decreasing. We have","element":"span"}],[{"style":{"width":"64%"},"width":1212,"height":94,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-19.png","element":"img"}],[{"text":"where ","element":"span"},{"style":{"height":17.2},"width":916.34,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-20.png","element":"img","alt":" ϕ(t) = f(a)(a + t − x) + (x − a)f(a + t). Since ϕ","inline":true,"padRight":true},{"text":"is convex (sum of affine function and convex function as ","element":"span"},{"style":{"height":17.6},"width":452.87,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-21.png","element":"img","alt":" x ≥ a, and t �→ f(a + t)","inline":true,"padRight":true},{"text":"is convex), and ","element":"span"},{"style":{"height":17.6},"width":164.52,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-22.png","element":"img","alt":" ϕ(0) = 0","inline":true},{"text":", we conclude by Lemma ","element":"span"},{"href":"#id-55","text":"6.6 ","element":"a"},{"text":"that ","element":"span"},{"style":{"height":24.22},"width":60.48,"height":60.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/22-23.png","element":"img","alt":"ϕ(t)t","inline":true,"padRight":true},{"text":"is non-decreasing, as needed.","element":"span"}]]},{"heading":"7 Empirical Results","paragraphs":[[{"text":"In this section, we validate our theoretical results through empirical comparisons of active and passive sampling using simulated data and data drawn from the behavioral literature. In all experiments, a query at ","element":"span"},{"style":{"height":17.2},"width":164.81,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-0.png","element":"img","alt":" x ∈ [0, 1]","inline":true,"padRight":true},{"text":"results in an observation ","element":"span"},{"style":{"height":23.09},"width":504.71,"height":57.74,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-1.png","element":"img","alt":" y i.i.d.∼ N(f(x), σ2) where σ","inline":true,"padRight":true},{"text":"depends on the experiments and is known to the algorithm. We construct our confidence intervals ","element":"span"},{"style":{"height":17.6},"width":116.08,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-2.png","element":"img","alt":" φ(t, δ)","inline":true,"padRight":true},{"text":"using ","element":"span"},{"href":"#id-13","referenceIndex":12,"text":"Kaufmann et al. ","element":"a"},{"href":"#id-13","referenceIndex":12,"text":"[2016, ","element":"a"},{"text":"Theorem 8], scaled by ","element":"span"},{"style":{"height":8},"width":25,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-3.png","element":"img","alt":" σ","inline":true},{"text":". Further implementation details are described in Section ","element":"span"},{"href":"#id-57","text":"7.3.","element":"a"}],[{"style":{"fontWeight":"bold"},"text":"7.1 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Piecewise Linear Function","element":"span"}],[{"text":"We begin by comparing the performance of the active and passive methods on a piecewise linear function, ","element":"span"},{"style":{"height":17.6},"width":433.98,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-4.png","element":"img","alt":" f(x) = max{1 − 5x, 0}","inline":true},{"text":", over the domain ","element":"span"},{"style":{"height":17.6},"width":166.58,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-5.png","element":"img","alt":" x ∈ [0, 1]","inline":true},{"text":". We consider the noise level ","element":"span"},{"style":{"height":15.13},"width":160.7,"height":37.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-6.png","element":"img","alt":" σ2 = .01","inline":true},{"text":". As discussed in Remark ","element":"span"},{"href":"#id-6","text":"3.2, ","element":"a"},{"text":"theory predicts that, up to logarithmic factors, the error incurred by passive sampling scales as ","element":"span"},{"style":{"height":20.33},"width":371.64,"height":50.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-7.png","element":"img","alt":" ∥ �f − f⋆∥∞ ∼ n−1/3","inline":true},{"text":", whereas active sampling attains the parameteric rate ","element":"span"},{"style":{"height":20.33},"width":360.66,"height":50.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-8.png","element":"img","alt":" ∥ �f − f⋆∥∞ ∼ n−1/2","inline":true},{"text":". As a benchmark, we plot a passive algorithm based on constrainted least squares (see, e.g. ","element":"span"},{"href":"#id-3","referenceIndex":6,"text":"Dümbgen et al. ","element":"a"},{"href":"#id-3","referenceIndex":6,"text":"[2004]","element":"a"},{"text":"), and also plot the error incurred by sampling according to an “oracle allocation”, which samples ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"at the endpoints ","element":"span"},{"style":{"fontStyle":"italic"},"text":"{","element":"span"},{"text":"0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1","element":"span"},{"style":{"fontStyle":"italic"},"text":"}","element":"span"},{"text":", as well as the inflection point ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x ","element":"span"},{"text":"= 0","element":"span"},{"style":{"fontStyle":"italic"},"text":".","element":"span"},{"text":"2","element":"span"},{"text":". The implementation details are deferred to the end of the section.","element":"span"}],[{"id":"id-58","style":{"width":"34%"},"width":638,"height":162,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-9.png","element":"img"}],[{"text":"Figure 1: ","element":"figcaption","subtype":"caption"},{"text":"Comparison of active, passive and oracle performance on ","element":"figcaption","subtype":"caption"},{"style":{"height":17.6},"width":449.89,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-10.png","element":"img","alt":"f(x) = max{1 − 5x, 0}","inline":true},{"text":". The ","element":"figcaption","subtype":"caption"},{"style":{"fontStyle":"italic"},"text":"x","element":"figcaption","subtype":"caption"},{"text":"-axis is the number of samples taken, and the ","element":"figcaption","subtype":"caption"},{"style":{"fontStyle":"italic"},"text":"y","element":"figcaption","subtype":"caption"},{"text":"-axis is ","element":"figcaption","subtype":"caption"},{"style":{"height":14.62},"width":63.7,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-11.png","element":"img","alt":" L∞","inline":true,"padRight":true},{"text":"error. Dotted lines denote a least-squares trendline. A slope of ","element":"figcaption","subtype":"caption"},{"style":{"height":11.6},"width":57.94,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-12.png","element":"img","alt":" −p","inline":true,"padRight":true},{"text":"suggests a rate of ","element":"figcaption","subtype":"caption"},{"style":{"height":17.6},"width":276.05,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-13.png","element":"img","alt":" (#samples)−p.","inline":true}],[{"style":{"width":"40%"},"width":754,"height":163,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-14.png","element":"img"}],[{"text":"Figure ","element":"span"},{"href":"#id-58","text":"1 ","element":"a"},{"text":"corroborates our theoretical predictions. The dotted trend lines correspond to a least-squares fit to the logarithm of the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x ","element":"span"},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"y ","element":"span"},{"text":"coordinates, so that the displayed slopes approximate the exponent in the rate at which the errors decay. In particular, we see that the slope of the line corresponding to passive sampling is close to ","element":"span"},{"style":{"height":21.29},"width":55.72,"height":53.23,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-15.png","element":"img","alt":" − 13","inline":true,"padRight":true},{"text":"indicating a rate approximately equal to ","element":"span"},{"style":{"height":16.34},"width":103.42,"height":40.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-16.png","element":"img","alt":" n−1/3","inline":true},{"text":", ","element":"span"},{"text":"and the slopes for the active and oracle methods are close to ","element":"span"},{"style":{"height":21.29},"width":55.72,"height":53.23,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-17.png","element":"img","alt":" − 12","inline":true},{"text":". Observe that the oracle method ","element":"span"},{"text":"still significantly outperforms the active sampling algorithm, perhaps explained by the additional ","element":"span"},{"style":{"height":17.6},"width":150.9,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-18.png","element":"img","alt":"log(1/ϵ)","inline":true,"padRight":true},{"text":"superfluous locations the active procedure samples at to achieve ","element":"span"},{"style":{"height":8},"width":18,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/23-19.png","element":"img","alt":" ϵ","inline":true},{"text":"-error relative to the oracle method.","element":"span"}],[{"style":{"fontWeight":"bold"},"text":"7.2 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Data Derived Function","element":"span"}],[{"text":"Next, we evaluate the performance of our active algorithm on a convex function derived from real data. 250 participants were asked to choose between a hypothetical reward of $100 given immediately and a reward of $115 given at a time ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x ","element":"span"},{"text":"= 0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"2","element":"span"},{"style":{"fontStyle":"italic"},"text":", . . . , ","element":"span"},{"text":"64 ","element":"span"},{"text":"days in the future (times were randomized and rescaled to be in ","element":"span"},{"text":"[0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1]","element":"span"},{"text":"). We fit a convex function to this data using least squares and sampled ","element":"span"},{"id":"id-59","text":"from it as above; the function is displayed in Figure ","element":"span"},{"href":"#id-59","text":"2.","element":"a"}],[{"style":{"width":"41%"},"width":780,"height":95,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/24-0.png","element":"img"}],[{"text":"Figure 2: Data-derived discount function. Here, the ","element":"figcaption","subtype":"caption"},{"style":{"fontStyle":"italic"},"text":"x ","element":"figcaption","subtype":"caption"},{"text":"value corresponds for the days for which the reward is delayed, and ","element":"figcaption","subtype":"caption"},{"style":{"fontStyle":"italic"},"text":"y ","element":"figcaption","subtype":"caption"},{"text":"is the fraction of the population who would accept the delay for greater monetary reward.","element":"figcaption","subtype":"caption"}],[{"id":"id-60","style":{"width":"42%"},"width":791,"height":411,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/24-1.png","element":"img"}],[{"text":"Figure 3: Comparison of active and passive performance on the function depicted in Figure ","element":"figcaption","subtype":"caption"},{"href":"#id-59","text":"2. ","element":"a","subtype":"caption"},{"text":"The ","element":"figcaption","subtype":"caption"},{"style":{"fontStyle":"italic"},"text":"x","element":"figcaption","subtype":"caption"},{"text":"-axis is the number of samples taken, and the ","element":"figcaption","subtype":"caption"},{"style":{"fontStyle":"italic"},"text":"y","element":"figcaption","subtype":"caption"},{"text":"-axis is ","element":"figcaption","subtype":"caption"},{"style":{"height":14.62},"width":63.7,"height":36.55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/24-2.png","element":"img","alt":" L∞","inline":true,"padRight":true},{"text":"error. Dotted lines denote a least-squares trendline.","element":"figcaption","subtype":"caption"}],[{"style":{"width":"38%"},"width":724,"height":200,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/24-3.png","element":"img"}],[{"text":"In Figure ","element":"span"},{"href":"#id-60","text":"3, ","element":"a"},{"text":"we compare the performance of the active and passive algorithms for the function ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"depicted in Figure ","element":"span"},{"href":"#id-59","text":"2. ","element":"a"},{"text":"Again, the dotted trend lines correspond to a least-squares fit to the logarithmic of the ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x ","element":"span"},{"text":"and ","element":"span"},{"style":{"fontStyle":"italic"},"text":"y ","element":"span"},{"text":"coordinates. We find that the passive algorithm appears to obtain a rate of ","element":"span"},{"style":{"height":16.33},"width":103.42,"height":40.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/24-4.png","element":"img","alt":" n−1/3","inline":true},{"text":", and the error of the active algorithm has a scaling closer to that of the parametric rate.","element":"span"}],[{"text":"For insight into why the active algorithm fares better on this ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f","element":"span"},{"text":", we can examine the oracle allocations, as constructed in Proposition ","element":"span"},{"href":"#id-5","text":"3.4. ","element":"a"},{"text":"We find that at higher levels of granularity, ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"text":"is well ","element":"span"},{"text":"approximated by a piecewise linear function, and thus an oracle would only sample at a few, key ","element":"span"},{"id":"id-61","text":"design points (Figure ","element":"span"},{"href":"#id-61","text":"4)","element":"a"},{"text":":","element":"span"}],[{"style":{"width":"45%"},"width":854,"height":116,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/25-0.png","element":"img"}],[{"text":"Figure 4: A plot of the design points correspond to the oracle allocation constructed in Proposition ","element":"figcaption","subtype":"caption"},{"href":"#id-5","text":"3.4, ","element":"a","subtype":"caption"},{"text":"for granularities ","element":"figcaption","subtype":"caption"},{"style":{"height":17.6},"width":270.41,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/25-1.png","element":"img","alt":" ϵ ∈ {.01, .001}","inline":true},{"text":". Bar height is equal to ","element":"figcaption","subtype":"caption"},{"text":"1000 ","element":"figcaption","subtype":"caption"},{"text":"divided by the number of design points.","element":"figcaption","subtype":"caption"}],[{"style":{"width":"43%"},"width":810,"height":106,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/25-2.png","element":"img"}],[{"text":"Notice that as the granularity decreases, the oracle allocation refines its design points, dividing the function into regions in which the piecewise-linear approximation holds to a higher accuracy.","element":"span"}],[{"id":"id-57","style":{"fontWeight":"bold"},"text":"7.3 ","element":"span"},{"style":{"fontWeight":"bold"},"text":"Implementation","element":"span"}],[{"text":"The passive algorithm samples at design points ","element":"span"},{"style":{"height":10.62},"width":36.94,"height":26.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/25-3.png","element":"img","alt":" xt","inline":true,"padRight":true},{"text":"along a dyadic sequence ","element":"span"},{"style":{"height":17.6},"width":600.47,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/25-4.png","element":"img","alt":" ⃗s = (0, 1, 1/2, 1/4, 3/4, 1/8, . . . ).","inline":true,"padRight":true},{"text":"The algorithm then estimates ","element":"span"},{"style":{"height":16.4},"width":37.36,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/25-5.png","element":"img","alt":" f⋆","inline":true,"padRight":true},{"text":"using the following constrained least squares problem.","element":"span"}],[{"id":"id-62","style":{"width":"66%"},"width":1245,"height":129,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/25-6.png","element":"img"}],[{"text":"We found in practice that this least squares minimization performs surprising well in practice, drastically outperforming projections in the ","element":"span"},{"style":{"height":8},"width":44,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/25-7.png","element":"img","alt":" ∞","inline":true},{"text":"-norm. We used this observation to modify the implementation of our active algorithm. At each round, the active algorithm alternates between sampling a design points ","element":"span"},{"style":{"height":19.95},"width":484.06,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/25-8.png","element":"img","alt":" x ∈ {xℓ(I∗), xm(I∗), xr(I∗)}","inline":true,"padRight":true},{"text":"as in Algorithm ","element":"span"},{"href":"#id-16","text":"2, ","element":"a"},{"text":"and sampling dyadic sequences supported on ","element":"span"},{"style":{"height":19.95},"width":451.08,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/25-9.png","element":"img","alt":" I∗ given by xℓ(I∗) +|I∗|⃗s","inline":true},{"text":". We then return ","element":"span"},{"style":{"height":16.4},"width":30.18,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/25-10.png","element":"img","alt":"�f","inline":true,"padRight":true},{"text":"using the constrained least-squares problem in ","element":"span"},{"href":"#id-62","text":"(23)","element":"a"},{"text":". In addition, our implementation makes use of the sharper upper- and lower-confidence bounds described in Remark ","element":"span"},{"href":"#id-63","text":"4.1.","element":"a"}]]},{"heading":"References","paragraphs":[[{"id":"id-11","text":"Pierre C Bellec et al. Sharp oracle inequalities for least squares estimators in shape restricted ","element":"span"},{"text":"regression. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"The Annals of Statistics","element":"span"},{"text":", 46(2):745–780, 2018.","element":"span"}],[{"text":"Stéphane Boucheron, Gábor Lugosi, and Pascal Massart. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Concentration inequalities: A nonasymptotic theory of independence","element":"span"},{"text":". Oxford University Press, 2013.","element":"span"}],[{"id":"id-4","text":"T Tony Cai, Mark G Low, Yin Xia, et al. Adaptive confidence intervals for regression functions ","element":"span"},{"text":"under shape constraints. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"The Annals of Statistics","element":"span"},{"text":", 41(2):722–750, 2013.","element":"span"}],[{"id":"id-7","text":"Rui Castro, Rebecca Willett, and Robert Nowak. Faster rates in regression via active learning. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Advances in Neural Information Processing Systems","element":"span"},{"text":", 18, 2005.","element":"span"}],[{"id":"id-10","text":"Sabyasachi Chatterjee. An improved global risk bound in concave regression. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Electronic Journal of Statistics","element":"span"},{"text":", 10(1):1608–1629, 2016.","element":"span"}],[{"id":"id-3","text":"Lutz Dümbgen, Sandra Freitag, and Geurt Jongbloed. Consistency of concave regression with an ","element":"span"},{"text":"application to current-status data. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Mathematical Methods of Statistics","element":"span"},{"text":", 13:69–81, 2004.","element":"span"}],[{"id":"id-12","text":"Lutz Dümbgen et al. Optimal confidence bands for shape-restricted curves. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Bernoulli","element":"span"},{"text":", 9(3):423–449, 2003.","element":"span"}],[{"id":"id-2","text":"Ronald Aylmer Fisher. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"The design of experiments","element":"span"},{"text":". Oliver And Boyd; Edinburgh; London, 1937.","element":"span"}],[{"id":"id-0","text":"Shane Frederick, George Loewenstein, and Ted O’donoghue. Time discounting and time preference: ","element":"span"},{"text":"A critical review. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Journal of economic literature","element":"span"},{"text":", 40(2):351–401, 2002.","element":"span"}],[{"id":"id-1","text":"Leonard Green and Joel Myerson. A discounting framework for choice with delayed and probabilistic ","element":"span"},{"text":"rewards. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Psychological Bulletin","element":"span"},{"text":", 130(5):769–792, 2004.","element":"span"}],[{"id":"id-9","text":"Adityanand Guntuboyina and Bodhisattva Sen. Global risk bounds and adaptation in univariate ","element":"span"},{"text":"convex regression. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Probability Theory and Related Fields","element":"span"},{"text":", 163(1-2):379–411, 2015.","element":"span"}],[{"id":"id-13","text":"Emilie Kaufmann, Olivier Cappé, and Aurélien Garivier. On the complexity of best-arm identification ","element":"span"},{"text":"in multi-armed bandit models. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"The Journal of Machine Learning Research","element":"span"},{"text":", 17(1):1–42, 2016.","element":"span"}],[{"id":"id-8","text":"Alexander Korostelev. On minimax rates of convergence in image models under sequential design. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Statistics & Probability Letters","element":"span"},{"text":", 43(4):369–375, 1999.","element":"span"}],[{"id":"id-14","text":"Yuancheng Zhu, Sabyasachi Chatterjee, John Duchi, and John Lafferty. Local minimax complexity ","element":"span"},{"text":"of stochastic convex optimization. ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Advances in Neural Information Processing Systems","element":"span"},{"text":", 29, 2016.","element":"span"}]]},{"heading":"A Additional Remarks","paragraphs":[[{"id":"id-27","style":{"fontWeight":"bold"},"text":"Remark A.1 (Gap Between Upper and Lower Bounds) ","element":"span"},{"style":{"fontStyle":"italic"},"text":"Let ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"style":{"fontStyle":"italic"},"text":"be a ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"style":{"fontStyle":"italic"},"text":"-piecewise linear function ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f","element":"span"},{"style":{"fontStyle":"italic"},"text":". Then there exists a set of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k ","element":"span"},{"style":{"fontStyle":"italic"},"text":"intervals ","element":"span"},{"style":{"height":18.44},"width":261.66,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-0.png","element":"img","alt":" I = {Ii}1≤i≤k","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"such that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"style":{"fontStyle":"italic"},"text":"is linear on each interval; measuring ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"style":{"fontStyle":"italic"},"text":"at ","element":"span"},{"style":{"height":19.95},"width":413.21,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-1.png","element":"img","alt":" {xl(I), xr(I), xm(I)}I∈I","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"would be enough to estimate ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"style":{"fontStyle":"italic"},"text":"with zero error over ","element":"span"},{"text":"[0","element":"span"},{"style":{"fontStyle":"italic"},"text":", ","element":"span"},{"text":"1]","element":"span"},{"style":{"fontStyle":"italic"},"text":". Hence, we must have ","element":"span"},{"style":{"height":17.6},"width":229.83,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-2.png","element":"img","alt":" N(f, ϵ) ≲ k","inline":true},{"style":{"fontStyle":"italic"},"text":". On the other hand, ","element":"span"},{"style":{"height":18.22},"width":435.02,"height":45.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-3.png","element":"img","alt":" Λavg(f, ϵ) ≈ k log(1/ϵ)","inline":true},{"style":{"fontStyle":"italic"},"text":", and indeed one can show that for a ","element":"span"},{"style":{"fontStyle":"italic"},"text":"k","element":"span"},{"style":{"fontStyle":"italic"},"text":"-piecewise linear function, ","element":"span"},{"style":{"height":17.6},"width":501.24,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-4.png","element":"img","alt":" log(ωmax/ωmin) ≈ log(1/ϵ)","inline":true},{"style":{"fontStyle":"italic"},"text":", yielding the necessary cancelation. As with the term ","element":"span"},{"style":{"height":29.62},"width":690.44,"height":74.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-5.png","element":"img","alt":" log tleft(f,ϵ)tright(f,ϵ)tleft(f,cϵ)tright(f,cϵ), log(ωmax/ωmin)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"scales at most as ","element":"span"},{"style":{"height":17.6},"width":152.46,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-6.png","element":"img","alt":" log(1/ϵ)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"for ","element":"span"},{"style":{"fontStyle":"italic"},"text":"most reasonable functions, and can be bounded by ","element":"span"},{"style":{"height":19.95},"width":711.71,"height":49.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-7.png","element":"img","alt":" log(maxx∈[0,1] f′′(x)/ minx∈[0,1] f′′(x))","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"for any twice-differentiable function ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f","element":"span"},{"style":{"fontStyle":"italic"},"text":". Overall, we conjecture that the true sample complexity lies closer to ","element":"span"},{"style":{"height":17.6},"width":134.78,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-8.png","element":"img","alt":" N(f, ϵ)","inline":true},{"style":{"fontStyle":"italic"},"text":", because in the noiseless setting, one can approximate left- and right-derivatives of ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"style":{"fontStyle":"italic"},"text":"to arbitrary accuracy using just two points. This makes it possible to learn a 2-piecewise linear function with a constant number of function evaluations, rather than the ","element":"span"},{"style":{"height":17.6},"width":220.12,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-9.png","element":"img","alt":" O(log(1/ϵ))","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"implied by ","element":"span"},{"style":{"height":17.6},"width":137.98,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-10.png","element":"img","alt":" Λ(f, ϵ).","inline":true}],[{"id":"id-29","style":{"fontWeight":"bold"},"text":"Remark A.2 (Sub-Optimality of Non-Uniform Designs) ","element":"span"},{"style":{"fontStyle":"italic"},"text":"In this remark, we argue that although non-uniform designs can improve upon uniform designs for some functions of interest, in general they provide no benefit.","element":"span"}],[{"style":{"fontStyle":"italic"},"text":"To present the argument, we begin with by recalling the proof of Theorem ","element":"span"},{"href":"#id-28","style":{"fontStyle":"italic"},"text":"3.6. ","element":"a"},{"style":{"fontStyle":"italic"},"text":"We argued that given ","element":"span"},{"style":{"height":16.4},"width":182.32,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-11.png","element":"img","alt":"f ∈ Fconv","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and ","element":"span"},{"style":{"height":18.44},"width":628.4,"height":46.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-12.png","element":"img","alt":" x0 ∈ [tleft(f, 2ϵ), 1 − tright(f, 2ϵ))]","inline":true},{"style":{"fontStyle":"italic"},"text":", then unless a design collects ","element":"span"},{"style":{"height":24.25},"width":370.03,"height":60.62,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-13.png","element":"img","alt":" ≳ (1 + σ2ϵ2 ) log(1/δ)","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"samples in the interior of the interval ","element":"span"},{"style":{"height":17.6},"width":792.68,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-14.png","element":"img","alt":" I0 := [x0 − ω(f, x0, 2ϵ), x0 + ω(f, x0, 2ϵ)]","inline":true},{"style":{"fontStyle":"italic"},"text":", the alternative function","element":"span"}],[{"style":{"width":"103%"},"width":1940,"height":459,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-15.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Without uniformity, we cannot rule out that the design concentrates its samples in ","element":"span"},{"style":{"height":14.62},"width":36.18,"height":36.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-16.png","element":"img","alt":" I∗","inline":true},{"style":{"fontStyle":"italic"},"text":". Nevertheless, we can show that there is a function “similar” to ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"style":{"fontStyle":"italic"},"text":"which incurs the same lower bound. For ease, assume that ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"style":{"fontStyle":"italic"},"text":"is right differentiable at ","element":"span"},{"style":{"fontStyle":"italic"},"text":"x ","element":"span"},{"text":"= 1 ","element":"span"},{"style":{"fontStyle":"italic"},"text":"and left differentiable at ","element":"span"},{"style":{"height":15.13},"width":139.15,"height":37.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-17.png","element":"img","alt":" x = 0.1","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"Letting ","element":"span"},{"style":{"height":15.42},"width":49.16,"height":38.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-18.png","element":"img","alt":" ∂−","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"and ","element":"span"},{"style":{"height":16.62},"width":49.16,"height":41.54,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-19.png","element":"img","alt":" ∂+","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"denote the right- and left-derivative, we can define the shift function as","element":"span"}],[{"style":{"width":"60%"},"width":1142,"height":184,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-20.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"We observe that ","element":"span"},{"style":{"height":16.4},"width":67.24,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-21.png","element":"img","alt":" f←t","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is convex, and if ","element":"span"},{"style":{"height":12.73},"width":41.94,"height":31.83,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-22.png","element":"img","alt":" x∗","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is as above, then for any ","element":"span"},{"style":{"height":17.6},"width":582.06,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-23.png","element":"img","alt":" t ∈ (x∗ + ω(f, x∗, 2ϵ) − 1, x∗ −","inline":true},{"style":{"height":17.6},"width":225.5,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-24.png","element":"img","alt":"ω(f, x∗, 2ϵ))","inline":true},{"style":{"fontStyle":"italic"},"text":", one can verify that","element":"span"}],[{"style":{"width":"32%"},"width":614,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-25.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"Then, for any interval ","element":"span"},{"style":{"height":17.6},"width":366.02,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-26.png","element":"img","alt":" Ibad = [a, b] ⊂ [0, 1]","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"for which ","element":"span"},{"style":{"height":18.51},"width":219.88,"height":46.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-27.png","element":"img","alt":" |Ibad| ≥ 2I∗ℓ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":", there exists a ","element":"span"},{"style":{"height":14.84},"width":157.7,"height":37.1,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-28.png","element":"img","alt":" tbad ∈ R","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"such that ","element":"span"},{"style":{"height":19.76},"width":558.91,"height":49.41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-29.png","element":"img","alt":"I∗←tbad := I∗ + tbad|I∗| ⊂ Ibad","inline":true},{"style":{"fontStyle":"italic"},"text":". Hence, if ","element":"span"},{"style":{"height":19.14},"width":605.62,"height":47.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/27-30.png","element":"img","alt":" E[|xt : xt ∈ Ibad|] ≪ σ2 log(1/δ)","inline":true},{"style":{"fontStyle":"italic"},"text":", then even if the passive","element":"span"}],[{"style":{"fontStyle":"italic"},"text":"design can estimate ","element":"span"},{"style":{"fontStyle":"italic"},"text":"f ","element":"span"},{"style":{"fontStyle":"italic"},"text":"correctly, it will fail to distinguish between the shifted function ","element":"span"},{"style":{"height":17.27},"width":273.2,"height":43.17,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/28-0.png","element":"img","alt":" f←tbad and the","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"shifted alternative:","element":"span"}],[{"style":{"width":"74%"},"width":1395,"height":60,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/28-1.png","element":"img"}],[{"style":{"fontStyle":"italic"},"text":"As a consequence, any algorithm which is ","element":"span"},{"style":{"height":12.8},"width":20,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/28-2.png","element":"img","alt":" δ","inline":true},{"style":{"fontStyle":"italic"},"text":"-correct for all ","element":"span"},{"style":{"height":16.4},"width":217.88,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/28-3.png","element":"img","alt":" f shifts f←t","inline":true},{"style":{"fontStyle":"italic"},"text":", and alternatives defined above must collect at least ","element":"span"},{"style":{"height":19.13},"width":534.62,"height":47.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/28-4.png","element":"img","alt":" ≳ σ2 log(1/δ) · ω(f, x∗, 2ϵ)−1 ","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"samples. The above argument can also be extended to the case where the shift ","element":"span"},{"style":{"height":14.04},"width":70.5,"height":35.11,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1808.04523/images/28-5.png","element":"img","alt":" tbad","inline":true,"padRight":true},{"style":{"fontStyle":"italic"},"text":"is chosen at random (as opposed to depending on the design).","element":"span"}]]}],"_version":"3.3.2"},"paperNode":"$1b:props:children:props:children:0:props:product"}]]]}]}]