1b:["$","$L29",null,{"isWhiteLabelled":false,"children":["$","$Lb",null,{"pt":{"compact":0,"expanded":3},"children":[["$","$L2a",null,{"noStar":true,"publisher":true,"task":true,"params":true,"size":"xl","product":{"id":"eyJwYXBlcklEIjoiMTkwNi4wMDI2NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","updated":"2019-06-01T18:28:29.000Z","paperID":"1906.00264","published":"2019-06-01T18:28:29.000Z","authors":"[\"Roi Livni\",\"Yishay Mansour\"]","title":"Graph-based Discriminators: Sample Complexity and Expressiveness","scoreTrending":null,"summary":"$2b","lastCheckedForCode":"2022-09-03T16:19:56.911Z","links":[{"id":"eyJ1cmwiOiJodHRwczovL3BhcGVyc3dpdGhjb2RlLmNvbS9wYXBlci8xOTA2MDAyNjQifQ==","type":"pwc","url":"https://paperswithcode.com/paper/190600264","data":"{\"date\":\"2024-09-04T20:17:22.262Z\"}"}],"reposConnection":{"edges":[]},"models":[],"tags":[{"id":"eyJuYW1lIjoibGVhcm5pbmcgdGhlb3J5IiwidHlwZSI6InRhc2sifQ==","name":"learning theory","description":"Learning theory in machine learning involves understanding the principles and models that govern how algorithms learn from data. It's used to design and analyze machine learning algorithms, ensuring they can generalize well from training data to unseen data.","scoreTrending":null,"count":{"stars":955,"papers":502,"models":209},"__typename":"Tag"}],"summaries":[{"model":"gpt-4o-mini","header":"paper.summary.expertise.beginner","summary":"This paper discusses a new way of using graphs in machine learning to improve how models learn from data. It shows that these graph-based methods can understand and generate complicated patterns better than traditional approaches. The study also looks at how many examples these models need to learn effectively. Overall, it's about making machine learning smarter by using the structure of data shown in graphs."}],"emailsConnection":{"edges":[{"author":"yishay mansour","node":{"id":"eyJhZGRyZXNzIjoibWFuc291ci55aXNoYXlAZ21haWwuY29tIn0=","address":"mansour.yishay@gmail.com","name":"Yishay Mansour","avatar":null,"linkedin":"https://www.linkedin.com/in/yishay-mansour-a607a04","bio":null,"site":null,"override":null,"membership":[{"name":"TAU"}],"paper":[{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}}],"github":[],"scholar":[{"thirdPartyID":"OEJUgwkAAAAJ"}],"twitter":[],"location":[{"formatted":"Israel"}],"owner":[{"id":"eyJ1aWQiOiI5YjJiZTBlOS1jOTYxLTQ5YzQtOGFiZi00ZWU3OWJmYWFlYTYifQ==","name":"yishay mansour","github":[],"email":[],"authored":[{"id":"eyJwYXBlcklEIjoiMDkwMi4zNDMwIiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"0902.3430"},{"id":"eyJwYXBlcklEIjoiMTYwMi4wNzU3MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1602.07570"},{"id":"eyJwYXBlcklEIjoiMTMxMS4wNDY2IiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1311.0466"},{"id":"eyJwYXBlcklEIjoiMTQwOS44NDI4IiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1409.8428"},{"id":"eyJwYXBlcklEIjoiMTMwNy40NTY0IiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1307.4564"},{"id":"eyJwYXBlcklEIjoiMTkwMi4wNjIyMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1902.06223"},{"id":"eyJwYXBlcklEIjoiMTkwNS4wNzc3MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1905.07773"},{"id":"eyJwYXBlcklEIjoiMTgxMC4wMjE4MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1810.02180"},{"id":"eyJwYXBlcklEIjoiMTYwMy4wNjM1MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1603.06352"},{"id":"eyJwYXBlcklEIjoiMjAwNi4xMTU2MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2006.11561"},{"id":"eyJwYXBlcklEIjoiMjExMi4wMjg2NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2112.02866"},{"id":"eyJwYXBlcklEIjoiMTkwNS4xMjYyNCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1905.12624"},{"id":"eyJwYXBlcklEIjoiMTcwMi4wNzQ0NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1702.07444"},{"id":"eyJwYXBlcklEIjoiMjAwNy4xMDE0NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2007.10144"},{"id":"eyJwYXBlcklEIjoiMTcwMi4wODUzMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1702.08533"},{"id":"eyJwYXBlcklEIjoiMTkxMS4xMDEzNyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1911.10137"},{"id":"eyJwYXBlcklEIjoiMjAwMi4wOTg2OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2002.09869"},{"id":"eyJwYXBlcklEIjoiMjAwNS4wMTc1NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2005.01757"},{"id":"eyJwYXBlcklEIjoiMjEwNi4wMjkwMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2106.02900"},{"id":"eyJwYXBlcklEIjoiMjAwOC4wOTQ5MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2008.09490"},{"id":"eyJwYXBlcklEIjoiMjEwMy4xMzA1NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2103.13056"},{"id":"eyJwYXBlcklEIjoiMTgwNS4wMjM2MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1805.02363"},{"id":"eyJwYXBlcklEIjoiMTkwNy4wMzM0NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1907.03346"},{"id":"eyJwYXBlcklEIjoiMjAwMi4xMDI4NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2002.10286"},{"id":"eyJwYXBlcklEIjoiMTkwNS4xMTM2MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1905.11361"},{"id":"eyJwYXBlcklEIjoiMjMwOC4xNDY0MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2308.14642"},{"id":"eyJwYXBlcklEIjoiMjAxMi4xNDg0MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2012.14843"},{"id":"eyJwYXBlcklEIjoiMjExMC4xMDEzMiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2110.10132"},{"id":"eyJwYXBlcklEIjoiMTkwNS4wOTcwNCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1905.09704"},{"id":"eyJwYXBlcklEIjoiMjIwMi4wNTQyMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.05420"},{"id":"eyJwYXBlcklEIjoiMjMwMS4xMzA4NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2301.13087"},{"id":"eyJwYXBlcklEIjoiMTQxMS4xMTU4IiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1411.1158"},{"id":"eyJwYXBlcklEIjoiMjQwMi4xOTMwMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2402.19303"},{"id":"eyJwYXBlcklEIjoiMjIwMy4wMDk5NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2203.00995"},{"id":"eyJwYXBlcklEIjoiMjIwMS4xMzE3MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2201.13172"},{"id":"eyJwYXBlcklEIjoiMTgxMC4wOTM0NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1810.09346"},{"id":"eyJwYXBlcklEIjoiMTkwNi4wOTA1OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1906.09059"},{"id":"eyJwYXBlcklEIjoiMjAwOS4wNTk4NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2009.05986"},{"id":"eyJwYXBlcklEIjoiMjExMi4xNDQ0NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2112.14445"},{"id":"eyJwYXBlcklEIjoiMjAwNC4wNzgzOSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2004.07839"},{"id":"eyJwYXBlcklEIjoiMjQwNS4xNjg0MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2405.16843"},{"id":"eyJwYXBlcklEIjoiMjIxMS4xNDkzMiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2211.14932"},{"id":"eyJwYXBlcklEIjoiMjMwMi4xNDA5OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2302.14099"},{"id":"eyJwYXBlcklEIjoiMjIwNy4xMTEyNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2207.11126"},{"id":"eyJwYXBlcklEIjoiMjMwMi4wMzgwNSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2302.03805"},{"id":"eyJwYXBlcklEIjoiMjIwMi4xMzM2MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.13361"},{"id":"eyJwYXBlcklEIjoiMjAxMC4xNDU2MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2010.14563"},{"id":"eyJwYXBlcklEIjoiMTkwNi4wMDI2NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1906.00264"},{"id":"eyJwYXBlcklEIjoiMTcwNS4wODQzMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1705.08430"},{"id":"eyJwYXBlcklEIjoiMjEwMi4wMDQ5MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2102.00490"},{"id":"eyJwYXBlcklEIjoiMjMwMi4wMDYxMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2302.00610"},{"id":"eyJwYXBlcklEIjoiMjQwNi4wNzU4NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2406.07585"},{"id":"eyJwYXBlcklEIjoiMjMwMS4xMjUzNSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2301.12535"},{"id":"eyJwYXBlcklEIjoiMjQwMS4wMDI5OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2401.00298"},{"id":"eyJwYXBlcklEIjoiMjAxMC4wMDkxNyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2010.00917"},{"id":"eyJwYXBlcklEIjoiMjIxMC4wMjU2MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2210.02562"},{"id":"eyJwYXBlcklEIjoiMjIwMi4xMTU5MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.11593"},{"id":"eyJwYXBlcklEIjoiMTUxMS4wMzIyNSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1511.03225"},{"id":"eyJwYXBlcklEIjoiMTkwMi4wNDc0MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1902.04741"},{"id":"eyJwYXBlcklEIjoiMTgwNi4wNjY1NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1806.06654"},{"id":"eyJwYXBlcklEIjoiMTcwMy4wNzQzMiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1703.07432"},{"id":"eyJwYXBlcklEIjoiMjAwNy4xMjcyOSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2007.12729"},{"id":"eyJwYXBlcklEIjoiMjEwNi4xNTIwNyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2106.15207"},{"id":"eyJwYXBlcklEIjoiMjIxMi4wNDIxNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2212.04216"},{"id":"eyJwYXBlcklEIjoiMjIwMi4wNjE0MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.06143"},{"id":"eyJwYXBlcklEIjoiNTQzNzEiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"54371"},{"id":"eyJwYXBlcklEIjoiNTQzMzEiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"54331"},{"id":"eyJwYXBlcklEIjoiNTMzODciLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"53387"},{"id":"eyJwYXBlcklEIjoiNTQxMDAiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"54100"},{"id":"eyJwYXBlcklEIjoiNzE3NjIiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"71762"},{"id":"eyJwYXBlcklEIjoiNzA5NDAiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"70940"}]}]}},{"author":"roi livni","node":{"id":"eyJhZGRyZXNzIjoicmxpdm5pQHRhdWV4LnRhdS5hYy5pbCJ9","address":"rlivni@tauex.tau.ac.il","name":null,"avatar":null,"linkedin":null,"bio":null,"site":null,"override":null,"membership":[],"paper":[{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}}],"github":[],"scholar":[{"thirdPartyID":"xhU85M4AAAAJ"}],"twitter":[],"location":[],"owner":[{"id":"eyJ1aWQiOiI4MzQwM2JiNi0wM2VlLTRhNDYtOTc0MS01MjJkNTIxZWYyODIifQ==","name":"roi livni","github":[],"email":[],"authored":[{"id":"eyJwYXBlcklEIjoiMTQxMC4xMTQxIiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1410.1141"},{"id":"eyJwYXBlcklEIjoiMjAwMy4wMDU2MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2003.00563"},{"id":"eyJwYXBlcklEIjoiMTYwMy4wNjM1MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1603.06352"},{"id":"eyJwYXBlcklEIjoiMjEwMi4wMTExNyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2102.01117"},{"id":"eyJwYXBlcklEIjoiMTcwMi4wNzQ0NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1702.07444"},{"id":"eyJwYXBlcklEIjoiMTcxMS4wNTg5MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1711.05893"},{"id":"eyJwYXBlcklEIjoiMTcxMC4wODk5NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1710.08997"},{"id":"eyJwYXBlcklEIjoiMTYwNi4wNTMxNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1606.05316"},{"id":"eyJwYXBlcklEIjoiMjAwMy4wNjE1MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2003.06152"},{"id":"eyJwYXBlcklEIjoiMjAwMi4xMDI4NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2002.10286"},{"id":"eyJwYXBlcklEIjoiMTMwNC43MDQ1IiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1304.7045"},{"id":"eyJwYXBlcklEIjoiMTkwMi4wMzQ2OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1902.03468"},{"id":"eyJwYXBlcklEIjoiMjMwMi4wNDkyNSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2302.04925"},{"id":"eyJwYXBlcklEIjoiMjQwNC4wNDkzMSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2404.04931"},{"id":"eyJwYXBlcklEIjoiMjEwNy4wMDQ2OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2107.00469"},{"id":"eyJwYXBlcklEIjoiMjIwMi4xMzM2MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.13361"},{"id":"eyJwYXBlcklEIjoiMTkwNi4wMDI2NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1906.00264"},{"id":"eyJwYXBlcklEIjoiMTkwMi4wNDc4MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1902.04782"},{"id":"eyJwYXBlcklEIjoiMjIwNi4wMzA5OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2206.03098"},{"id":"eyJwYXBlcklEIjoiMjIwMi4xMzMyOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.13328"},{"id":"eyJwYXBlcklEIjoiMTcwOS4wMzg3MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1709.03871"},{"id":"eyJwYXBlcklEIjoiMjEwNi4xMzUxMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2106.13513"},{"id":"eyJwYXBlcklEIjoiMjIwNC4wODgwOSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2204.08809"},{"id":"eyJwYXBlcklEIjoiNTMyNTIiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"53252"},{"id":"eyJwYXBlcklEIjoiNTQzMzEiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"54331"},{"id":"eyJwYXBlcklEIjoiNTQzMDIiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"54302"},{"id":"eyJwYXBlcklEIjoiNzI3MDAiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"72700"},{"id":"eyJwYXBlcklEIjoiMjQwNi4xNTkxNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2406.15916"}]}]}}]},"__typename":"paper","authorArray":["Roi Livni","Yishay Mansour"]}}],["$","$L18",null,{"container":true,"columns":100,"spacing":{"compact":0,"expanded":2,"large":3},"children":[["$","$L18",null,{"size":{"compact":100,"expanded":100,"large":68},"children":[["$","$7",null,{"children":["$","$L2c",null,{"publisher":"arxiv","paperID":"1906.00264","product":{"paper":"$1b:props:children:props:children:0:props:product","models":"$1b:props:children:props:children:0:props:product:models"},"isWhiteLabelled":false}]}],["$","$7",null,{"children":["$","$L2d",null,{"article":"$L2e","model":"$undefined"}]}]]}],["$","$L18",null,{"size":"grow","children":["$","$L2f",null,{}]}]]}],["$","$7",null,{"children":null}],[["$","audio",null,{"id":"tts"}],["$","$L30",null,{"paperID":"1906.00264","publisher":"arxiv","paperJSON":{"title":"Graph-based Discriminators: Sample Complexity and Expressiveness","paperID":"1906.00264","avgLineHeight":13.56,"imgScale":4,"sections":[{"heading":"Abstract","paragraphs":[[{"text":"A basic question in learning theory is to identify if two distributions are identical when we have access only to examples sampled from the distributions. This basic task is considered, for example, in the context of Generative Adversarial Networks (GANs), where a discriminator is trained to distinguish between a real-life distribution and a synthetic distribution. Classically, we use a hypothesis class ","element":"span"},{"text":"H ","element":"span"},{"text":"and claim that the two distributions are distinct if for some ","element":"span"},{"style":{"height":11.6},"width":108,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/0-0.png","element":"img","alt":" h ∈ H","inline":true,"padRight":true},{"text":"the expected value on the two distributions is (significantly) different.","element":"span"}],[{"text":"Our starting point is the following fundamental problem: ”is having the hypothesis dependent on more than a single random example bene-ficial”. To address this challenge we define ","element":"span"},{"text":"k","element":"span"},{"text":"-ary based discriminators, which have a family of Boolean ","element":"span"},{"text":"k","element":"span"},{"text":"-ary functions ","element":"span"},{"text":"G","element":"span"},{"text":". Each function ","element":"span"},{"style":{"height":14},"width":94.6,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/0-1.png","element":"img","alt":" g ∈ G","inline":true,"padRight":true},{"text":"naturally defines a hyper-graph, indicating whether a given hyper-edge exists. A function ","element":"span"},{"style":{"height":14},"width":94.12,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/0-2.png","element":"img","alt":" g ∈ G","inline":true,"padRight":true},{"text":"distinguishes between two distributions, if the expected value of ","element":"span"},{"text":"g","element":"span"},{"text":", on a ","element":"span"},{"text":"k","element":"span"},{"text":"-tuple of i.i.d examples, on the two distributions is (significantly) different.","element":"span"}],[{"text":"We study the expressiveness of families of ","element":"span"},{"text":"k","element":"span"},{"text":"-ary functions, compared to the classical hypothesis class ","element":"span"},{"text":"H","element":"span"},{"text":", which is ","element":"span"},{"text":"k ","element":"span"},{"text":"= 1. We show a separation in expressiveness of ","element":"span"},{"text":"k ","element":"span"},{"text":"+ 1-ary versus ","element":"span"},{"text":"k","element":"span"},{"text":"-ary functions. This demonstrate the great benefit of having ","element":"span"},{"style":{"height":13.2},"width":64.12,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/0-3.png","element":"img","alt":" k ≥","inline":true,"padRight":true},{"text":"2 as distinguishers.","element":"span"}],[{"text":"For ","element":"span"},{"style":{"height":13.2},"width":64.12,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/0-4.png","element":"img","alt":" k ≥","inline":true,"padRight":true},{"text":"2 we introduce a notion similar to the VC-dimension, and show that it controls the sample complexity. We proceed and provide upper and lower bounds as a function of our extended notion of VCdimension.","element":"span"}]]},{"heading":"1 Introduction","paragraphs":[[{"text":"The task of discrimination consists of a ","element":"span"},{"text":"discriminator ","element":"span"},{"text":"that receives finite samples from two distributions, say ","element":"span"},{"style":{"height":16},"width":177.8,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/0-5.png","element":"img","alt":" p1 and p2","inline":true},{"text":", and needs to certify whether the two distributions are distinct. Discrimination has a central role within the framework of Generative Adversarial Networks ","element":"span"},{"href":"#id-0","referenceIndex":12,"text":"[12]","element":"a"},{"text":", where a discriminator trains a neural net to distinguish between samples from a real-life distribution and samples generated synthetically by another neural network, called a ","element":"span"},{"text":"generator","element":"span"},{"text":".","element":"span"}],[{"text":"A possible formal setup for discrimination identifies the discriminator with some distinguishing class ","element":"span"},{"style":{"height":17.6},"width":855.84,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/1-0.png","element":"img","alt":" D = {f : X → R} of distinguishing functions.","inline":true,"padRight":true},{"text":"In turn, the discriminator wishes to find the best ","element":"span"},{"style":{"height":13.2},"width":114.64,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/1-1.png","element":"img","alt":" d ∈ D","inline":true,"padRight":true},{"text":"that distinguishes between the two distributions. Formally, she wishes to find ","element":"span"},{"style":{"height":15.53},"width":314.6,"height":38.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/1-2.png","element":"img","alt":" d ∈ D such that1","inline":true}],[{"id":"id-1","style":{"width":"90%"},"width":1292,"height":108,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/1-3.png","element":"img"}],[{"text":"For examples, in GANs, the class of distinguishing functions we will consider could be the class of neural networks trained by the discriminator.","element":"span"}],[{"text":"The first term in the RHS of eq. ","element":"span"},{"href":"#id-1","text":"(1) ","element":"a"},{"text":"is often referred to as the ","element":"span"},{"text":"Integral Probability Metric ","element":"span"},{"text":"(IPM distance) w.r.t a class ","element":"span"},{"text":"D ","element":"span"},{"href":"#id-2","referenceIndex":17,"text":"[17]","element":"a"},{"text":", denoted IPM","element":"span"},{"style":{"height":15.49},"width":115.4,"height":38.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/1-4.png","element":"img","alt":"D. As","inline":true,"padRight":true},{"text":"such, we can think of the discriminator as computing the IPM","element":"span"},{"style":{"height":15.09},"width":210.72,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/1-5.png","element":"img","alt":"D distance.","inline":true}],[{"text":"Whether two, given, distributions can be distinguished by the discriminator becomes, in the IPM setup, a property of the distinguishing class. Also, the number of examples needed to be observed will depend on the class in question. Thus, if we take a large expressive class of distinguishers, the discriminator can potentially distinguish between any two distributions that are far in total variation. In that extreme, though, the class of distinguishers would need to be very large and in turn, the number of samples needed to be observed scales accordingly. One could also choose a “small” class, but at a cost of smaller distinguishing power that yields smaller IPM distance.","element":"span"}],[{"text":"For example, consider two distributions over [","element":"span"},{"text":"n","element":"span"},{"text":"] to be distinguished. We could choose as a distinguishing class the class of ","element":"span"},{"text":"all ","element":"span"},{"text":"possible subsets over ","element":"span"},{"text":"n","element":"span"},{"text":". This distinguishing class give rise to the total variation distance, but the sample complexity turns out to be ","element":"span"},{"text":"O","element":"span"},{"text":"(","element":"span"},{"text":"n","element":"span"},{"text":"). Alternatively we can consider the class of ","element":"span"},{"text":"singletones","element":"span"},{"text":": This class will induce a simple IPM distance, with graceful sample complexity, however in worst case the IPM distance can be as small as ","element":"span"},{"text":"O","element":"span"},{"text":"(1","element":"span"},{"text":"/n","element":"span"},{"text":") even though the total variation distance is large.","element":"span"}],[{"text":"Thus, IPM framework initiates a study of generalization complexity where we wish to understand what is the expressive power of each class and what is its sample complexity.","element":"span"}],[{"text":"For this special case that ","element":"span"},{"text":"D ","element":"span"},{"text":"consists of Boolean functions, the problem turns out to be closely related to the classical statistical learning setting and prediction ","element":"span"},{"href":"#id-3","referenceIndex":22,"text":"[22]","element":"a"},{"text":". The sample complexity (i.e., number of samples needed to be observed by the discriminator) is governed by a combinatorial measure termed ","element":"span"},{"text":"VC dimension","element":"span"},{"text":". Specifically, for the discriminator to be able to find a ","element":"span"},{"text":"d ","element":"span"},{"text":"as in eq. ","element":"span"},{"href":"#id-1","text":"(1)","element":"a"},{"text":", she needs to observe order of Θ( ","element":"span"},{"style":{"height":20},"width":28.92,"height":50,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/2-0.png","element":"img","alt":"ρǫ2","inline":true},{"text":") examples, where ","element":"span"},{"style":{"height":15.6},"width":67.4,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/2-1.png","element":"img","alt":" ρ is","inline":true,"padRight":true},{"text":"the VC dimension of the class ","element":"span"},{"text":"D ","element":"span"},{"href":"#id-4","referenceIndex":5,"text":"[5, ","element":"a"},{"href":"#id-3","referenceIndex":22,"text":"22]","element":"a"},{"text":".","element":"span"}],[{"text":"In this work we consider a natural extension of this framework to more sophisticated discriminators: For example, consider a discriminator that observes pairs of points from the distribution and checks for collisions – such a distinguisher cannot apriori be modeled as a test of Boolean functions, as the tester measures a relation between two points and not a property of a single point. The collision test has indeed been used, in the context of synthetic data generation, to evaluate the ","element":"span"},{"text":"diversity ","element":"span"},{"text":"of the synthetic distribution ","element":"span"},{"href":"#id-5","referenceIndex":1,"text":"[2]","element":"a"},{"text":".","element":"span"}],[{"text":"More generally, suppose we have a class of 2-ary Boolean functions: ","element":"span"},{"text":"G ","element":"span"},{"text":"= ","element":"span"},{"style":{"height":17.6},"width":469.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/2-2.png","element":"img","alt":"{g : g(x1, x2) → {0, 1}}","inline":true,"padRight":true},{"text":"and the discriminator wishes to (approximately) compute","element":"span"}],[{"id":"id-6","style":{"width":"80%"},"width":1155,"height":133,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/2-3.png","element":"img"}],[{"text":"Here ","element":"span"},{"style":{"height":18.34},"width":39.08,"height":45.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/2-4.png","element":"img","alt":" p2 ","inline":true,"padRight":true},{"text":"denotes the product distribution over ","element":"span"},{"text":"p","element":"span"},{"text":". More generally, we may consider ","element":"span"},{"text":"k","element":"span"},{"text":"-ary mappings, but for the sake of clarity, we will restrict our attention in this introduction to ","element":"span"},{"text":"k ","element":"span"},{"text":"= 2. Such 2-ary Boolean mapping can be considered as graphs where ","element":"span"},{"style":{"height":17.6},"width":209.2,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/2-5.png","element":"img","alt":" g(x1, x2) =","inline":true,"padRight":true},{"text":"1 symbolizes that there exists an edge between ","element":"span"},{"style":{"height":15.09},"width":197.96,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/2-6.png","element":"img","alt":" x1 and x2","inline":true,"padRight":true},{"text":"and similarly ","element":"span"},{"style":{"height":17.6},"width":144.68,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/2-7.png","element":"img","alt":"g(x1, x2","inline":true},{"text":") = 0 denotes that there is no such edge. ","element":"span"},{"text":"The collision test, for example, is modelled by a graph that contains only self–loops. We thus call such multi-ary statistical tests ","element":"span"},{"text":"graph-based distinguishers","element":"span"},{"text":". Two natural question then arise","element":"span"}],[{"text":"1. Do graph–based discriminators have any added distinguishing power over classical discriminators?","element":"span"}],[{"text":"2. What is the sample complexity of graph–based discriminators?","element":"span"}],[{"text":"With respect to the first question we give an affirmative answer and we show a separation between the distinguishing power of graph–based discriminators and classical discriminators. ","element":"span"},{"text":"As to the second question, we introduce a new combinatorial measure (termed ","element":"span"},{"text":"graph VC dimension","element":"span"},{"text":") that governs the","element":"span"}],[{"text":"sample complexity of graph–based discriminators – analogously to the VC characterization of the sample complexity of classical discriminators. We next elaborate on each of these two results. As to the distinguishing power of graph–based discriminators, we give an affirmative answer in the following sense: We show that there exists a single graph ","element":"span"},{"text":"g ","element":"span"},{"text":"such that, for any distinguishing class ","element":"span"},{"text":"D ","element":"span"},{"text":"with bounded VC dimension, and ","element":"span"},{"style":{"height":8},"width":18,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/3-0.png","element":"img","alt":" ǫ","inline":true},{"text":", there are two distributions ","element":"span"},{"style":{"height":16},"width":445.36,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/3-1.png","element":"img","alt":" p1 and p2 that are D–","inline":true,"padRight":true},{"text":"indistinguishable but ","element":"span"},{"text":"g ","element":"span"},{"text":"certifies that ","element":"span"},{"style":{"height":16},"width":187.88,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/3-2.png","element":"img","alt":" p1 and p2","inline":true,"padRight":true},{"text":"are distinct. Namely, the quantity in eq. ","element":"span"},{"href":"#id-6","text":"(2) ","element":"a"},{"text":"is at least 1","element":"span"},{"text":"/","element":"span"},{"text":"4 for ","element":"span"},{"text":"G ","element":"span"},{"text":"= ","element":"span"},{"text":"{","element":"span"},{"text":"g","element":"span"},{"text":"}","element":"span"},{"text":". This result may be surprising. It is indeed known that for any two distributions that are ","element":"span"},{"style":{"height":8},"width":18,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/3-3.png","element":"img","alt":" ǫ","inline":true},{"text":"–far in total variation, there exists a boolean mapping ","element":"span"},{"text":"d ","element":"span"},{"text":"that distinguishes between the two distributions. In that sense, distinguishing classes are known to be universal. Thus, asymptotically, with enough samples any two distribution can be ultimately distinguished via a standard distinguishing function. Nevertheless, our result shows that, given finite data, the restriction to classes with finite capacity is limiting, and there could be graph-based distinguishing functions whose distinguishing power is not comparable to ","element":"span"},{"text":"any ","element":"span"},{"text":"class with finite capacity. We stress that the same graph competes with ","element":"span"},{"text":"all ","element":"span"},{"text":"finite–capacity classes, irrespective of their VC dimension. With respect to the second question, we introduce a new VC-like notion termed ","element":"span"},{"text":"graph VC dimension ","element":"span"},{"text":"that extends naturally to graphs (and hypergraphs). ","element":"span"},{"text":"On a high level, we show that for a class of graph-based distinguishers with graph VC dimension ","element":"span"},{"style":{"height":17.6},"width":123.8,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/3-4.png","element":"img","alt":" ρ, O(ρ","inline":true},{"text":") examples are sufficient for discrimination and that Ω(","element":"span"},{"style":{"height":17.6},"width":59.48,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/3-5.png","element":"img","alt":"√ρ","inline":true},{"text":") examples are necessary. This leaves a gap of factor ","element":"span"},{"style":{"height":17.6},"width":59,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/3-6.png","element":"img","alt":"√ρ","inline":true,"padRight":true},{"text":"which we leave as an open question. The notion we introduce is strictly weaker than the standard VC–dimension of families of multi-ary functions, and the proofs we provide do not follow directly from classical results on learnability of finite VC classes ","element":"span"},{"href":"#id-3","referenceIndex":22,"text":"[22, ","element":"a"},{"href":"#id-4","referenceIndex":5,"text":"5]","element":"a"},{"text":". In more details, a graph-based distinguishing class ","element":"span"},{"text":"G ","element":"span"},{"text":"is a family of Boolean functions over the product space of vertices ","element":"span"},{"style":{"height":21.55},"width":300.6,"height":53.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/3-7.png","element":"img","alt":" V: G ⊆ {0, 1}V2","inline":true},{"text":". As such it is equipped with a VC dimension, the largest set of pairs of vertices that is shattered by ","element":"span"},{"text":"G","element":"span"},{"text":". It is not hard to show that finite VC is sufficient to achieve finite sample complexity bounds over 2-ary functions ","element":"span"},{"href":"#id-7","referenceIndex":9,"text":"[9]","element":"a"},{"text":". It turns out, though, that it is not a necessary condition: For example, one can show that the class of ","element":"span"},{"text":"k-regular graphs ","element":"span"},{"text":"has finite graph VC dimension but infinite VC dimension. Thus, even though they are not learnable in the standard PAC setting, they have finite sample complexity within the framework of discrimination. The reason for this gap, between learnability and discriminability, is that","element":"span"}],[{"text":"learning requires uniform convergence with respect to any possible distribution over pairs, while discrimination requires uniform convergence only with respect to product distributions – formally then, it is a weaker task, and, potentially, can be performed even for classes with infinite VC dimension.","element":"span"}],[{"text":"1.1 ","element":"span"},{"text":"Related Work","element":"span"}],[{"text":"The task of discrimination has been considered as early as the work of Vapnik and Chervonenkis in ","element":"span"},{"href":"#id-3","referenceIndex":22,"text":"[22]","element":"a"},{"text":". In fact, even though Vapnik and Chervonenkis original work is often referred in the context of prediction, the original work considered the question of when the empirical frequency of Boolean functions converges uniformly to the true probability over a class of functions. In that sense, this work can be considered as a natural extension to ","element":"span"},{"text":"k","element":"span"},{"text":"-ary functions and generalization of the notion of VC dimension.","element":"span"}],[{"text":"The work of ","element":"span"},{"href":"#id-7","referenceIndex":9,"text":"[9, ","element":"a"},{"href":"#id-8","referenceIndex":8,"text":"8] ","element":"a"},{"text":"studies also a generalization of VC theory to multi-ary functions in the context of ranking tasks and U-statistics. They study the standard notion of VC dimension. Specifically they consider the function class as Boolean functions over multi-tuples and the VC dimension is defined by the largest set of multi-tuples that can be shattered. Their work provides several interesting fast-rate convergence guarantees. ","element":"span"},{"text":"As discussed in the introduction, our notion of capacity is weaker, and in general the results are incomparable.","element":"span"}],[{"text":"GANs ","element":"span"},{"text":"A more recent interest in discrimination tasks is motivated by the framework of GANs, where a neural network is trained to distinguish between two sets of data – one is real and the other is generated by another neural network called ","element":"span"},{"text":"generator","element":"span"},{"text":". ","element":"span"},{"text":"Multi-ary tests have been proposed to assess the quality of GANs networks. ","element":"span"},{"href":"#id-5","referenceIndex":1,"text":"[2] ","element":"a"},{"text":"suggests birthday paradox to evaluate ","element":"span"},{"text":"diversity ","element":"span"},{"text":"in GANs. ","element":"span"},{"href":"#id-9","referenceIndex":19,"text":"[19] ","element":"a"},{"text":"uses Binning to assess the solution proposed by GANs.","element":"span"}],[{"text":"Closer to this work ","element":"span"},{"href":"#id-10","referenceIndex":15,"text":"[15] ","element":"a"},{"text":"suggests the use of a discriminator that observes samples from the ","element":"span"},{"text":"m","element":"span"},{"text":"-th product distribution. Motivated by the problem of ","element":"span"},{"text":"mode collapse ","element":"span"},{"text":"they suggest a theoretical framework in which they study the algorithmic benefits of such discriminators and observe that they can significantly reduce mode collapse. In contrast, our work is less concerned with the problem of mode collapse directly and we ask in general if we can boost the distinguishing power of discriminators via multi-ary discrimination. Moreover, we provide several novel sample complexity bounds.","element":"span"}],[{"text":"Property Testing ","element":"span"},{"text":"A related problem to ours is that of testing closeness of distributions ","element":"span"},{"href":"#id-11","referenceIndex":2,"text":"[3, ","element":"a"},{"href":"#id-12","referenceIndex":10,"text":"11]","element":"a"},{"text":". Traditionally, testing closeness of distribution is concerned with evaluating if two discrete distributions are close vs. far/identical in ","element":"span"},{"text":"total variation","element":"span"},{"text":". ","element":"span"},{"href":"#id-12","referenceIndex":10,"text":"[11]","element":"a"},{"text":", motivated by graph expansion test, propose a collision test to verify if a certain distribution is close to uniform. Interestingly, a collision test is a graph-based discriminator which turns out to be optimal for the setting","element":"span"},{"href":"#id-13","referenceIndex":18,"text":"[18]","element":"a"},{"text":". Our sample–complexity lower bounds are derived from these results. ","element":"span"},{"text":"Specifically we reduce discrimination to testing uniformity ","element":"span"},{"href":"#id-13","referenceIndex":18,"text":"[18]","element":"a"},{"text":". Other lower bounds in the literature can be similarly used to achieve alternative (yet incomparable bounds) (e.g. ","element":"span"},{"href":"#id-14","referenceIndex":7,"text":"[7] ","element":"a"},{"text":"provides a Ω(","element":"span"},{"style":{"height":20.34},"width":297.32,"height":50.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/5-0.png","element":"img","alt":"n2/3/ǫ3/4) lower","inline":true,"padRight":true},{"text":"bounds for testing whether two distributions are far or close).","element":"span"}],[{"text":"In contrast with the aforementioned setup, here we do not measure distance between distributions in terms of total variation but in terms of an IPM distance induced by a class of distinguishers. The advantage of the IPM distance is that it sometimes can be estimated with limited amount of samples, while the total variation distance scales with the size of the support, which is often too large to allow estimation.","element":"span"}],[{"text":"Several works do study the question of distinguishing between two distributions w.r.t a finite capacity class of tests, Specifically the work of ","element":"span"},{"href":"#id-15","referenceIndex":14,"text":"[14] ","element":"a"},{"text":"studies refutation algorithms that distinguish between noisy labels and labels that correlate with a bounded hypothesis class. ","element":"span"},{"href":"#id-16","referenceIndex":20,"text":"[21] ","element":"a"},{"text":"studies a closely related question in the context of realizable PAC learning. A graph-based discriminator can be directly turned to a refutation algorithm, and both works of ","element":"span"},{"href":"#id-15","referenceIndex":14,"text":"[14, ","element":"a"},{"href":"#id-16","referenceIndex":20,"text":"21] ","element":"a"},{"text":"show reductions from refutation to learning. In turn, the agnostic bounds of ","element":"span"},{"href":"#id-15","referenceIndex":14,"text":"[14] ","element":"a"},{"text":"can be harnessed to achieve lower bounds for graph-based discrimination. Unfortunately this approach leads to suboptimal lower bounds. It would be interesting to see if one can improve the guarantees for such reductions, and in turn exploit it for our setting.","element":"span"}]]},{"heading":"2 Problem Setup","paragraphs":[[{"text":"2.1 ","element":"span"},{"text":"Basic Notations – Graphs and HyperGraphs","element":"span"}],[{"text":"Recall that a ","element":"span"},{"text":"k","element":"span"},{"text":"-hypergraph ","element":"span"},{"text":"g ","element":"span"},{"text":"consists of a a set ","element":"span"},{"style":{"height":17.49},"width":246.04,"height":43.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/5-1.png","element":"img","alt":" Vg of vertices","inline":true,"padRight":true},{"text":"and a collection of non empty ","element":"span"},{"text":"k","element":"span"},{"text":"–tuples over ","element":"span"},{"style":{"height":20.22},"width":218.64,"height":50.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/5-2.png","element":"img","alt":" V: Eg ⊆ Vk","inline":true},{"text":", which are referred to as ","element":"span"},{"text":"hyperedges","element":"span"},{"text":". If ","element":"span"},{"text":"k ","element":"span"},{"text":"= 2 then ","element":"span"},{"text":"g ","element":"span"},{"text":"is called a graph. 1–hypergraphs are simply identified as subsets over ","element":"span"},{"text":"V","element":"span"},{"text":". We will normally use ","element":"span"},{"text":"d ","element":"span"},{"text":"to denote such 1-hypergraphs and will refer to them as ","element":"span"},{"text":"distinguishers","element":"span"},{"text":". A distinguisher ","element":"span"},{"text":"d ","element":"span"},{"text":"can be identified with a Boolean function according to the rule: ","element":"span"},{"style":{"height":17.6},"width":370.56,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/5-3.png","element":"img","alt":" d(x) = 1 iff x ∈ Ed.","inline":true}],[{"text":"Similarly we can identify a ","element":"span"},{"text":"k","element":"span"},{"text":"-hypergraph with a function ","element":"span"},{"style":{"height":19.54},"width":295.2,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/5-4.png","element":"img","alt":" g : Vk → {0, 1}.","inline":true}],[{"text":"Namely, for any graph ","element":"span"},{"text":"g ","element":"span"},{"text":"we identify it with the Boolean function","element":"span"}],[{"style":{"width":"49%"},"width":707,"height":132,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-0.png","element":"img"}],[{"text":"We will further simplify and assume that ","element":"span"},{"text":"g ","element":"span"},{"text":"is ","element":"span"},{"text":"undirected","element":"span"},{"text":", this means that for any permutation ","element":"span"},{"style":{"height":17.6},"width":214.04,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-1.png","element":"img","alt":" π : [k] → [k","inline":true},{"text":"], we have that","element":"span"}],[{"style":{"width":"51%"},"width":737,"height":51,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-2.png","element":"img"}],[{"text":"We will call undirected ","element":"span"},{"text":"k","element":"span"},{"text":"-hypergraphs, ","element":"span"},{"text":"k","element":"span"},{"text":"-distinguishers. ","element":"span"},{"text":"A collection of ","element":"span"},{"text":"k","element":"span"},{"text":"-distinguishers over a common set of vertices ","element":"span"},{"text":"V ","element":"span"},{"text":"will be referred to as a ","element":"span"},{"text":"k","element":"span"},{"text":"-","element":"span"},{"text":"distinguishing class","element":"span"},{"text":". If ","element":"span"},{"text":"k ","element":"span"},{"text":"= 1 we will simply call such a collection ","element":"span"},{"text":"a distinguishing class","element":"span"},{"text":". For ","element":"span"},{"text":"k > ","element":"span"},{"text":"1 we will normally denote such a collection with ","element":"span"},{"text":"G ","element":"span"},{"text":"and for ","element":"span"},{"text":"k ","element":"span"},{"text":"= 1 we will often use the letter ","element":"span"},{"text":"D","element":"span"},{"text":".","element":"span"}],[{"text":"Next, given a distribution ","element":"span"},{"text":"P ","element":"span"},{"text":"over vertices and a ","element":"span"},{"text":"k","element":"span"},{"text":"–hypergraph ","element":"span"},{"text":"g ","element":"span"},{"text":"let us denote as follows the frequency of an edge w.r.t ","element":"span"},{"text":"P","element":"span"},{"text":":","element":"span"}],[{"style":{"width":"88%"},"width":1274,"height":84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-3.png","element":"img"}],[{"text":"where we use the notation ","element":"span"},{"style":{"height":10.69},"width":64.8,"height":26.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-4.png","element":"img","alt":" v1:t","inline":true,"padRight":true},{"text":"in shorthand for the sequence (","element":"span"},{"style":{"height":17.6},"width":241.64,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-5.png","element":"img","alt":"v1, . . . , vt) ∈","inline":true},{"style":{"height":18.34},"width":208.08,"height":45.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-6.png","element":"img","alt":"Vt, and P k ","inline":true,"padRight":true},{"text":"denotes the product distribution of ","element":"span"},{"text":"P k ","element":"span"},{"text":"times.","element":"span"}],[{"text":"Similarly, given a sample ","element":"span"},{"style":{"height":18},"width":220.52,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-7.png","element":"img","alt":" S = {vi}mi=1 ","inline":true,"padRight":true},{"text":"we denote the empirical frequency ","element":"span"},{"text":"of an edge:","element":"span"}],[{"style":{"width":"83%"},"width":1204,"height":127,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-8.png","element":"img"}],[{"text":"As a final set of notations: Given a ","element":"span"},{"text":"k","element":"span"},{"text":"-hypergraph ","element":"span"},{"text":"g ","element":"span"},{"text":"a sequence ","element":"span"},{"style":{"height":15.09},"width":199.04,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-9.png","element":"img","alt":" v1:n where","inline":true,"padRight":true},{"text":"n < k","element":"span"},{"text":", we define a ","element":"span"},{"style":{"height":12.8},"width":103.28,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-10.png","element":"img","alt":" k − n","inline":true},{"text":"–distinguisher ","element":"span"},{"style":{"height":12.41},"width":82.32,"height":31.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-11.png","element":"img","alt":" gv1:n","inline":true,"padRight":true},{"text":"as follows:","element":"span"}],[{"style":{"width":"54%"},"width":776,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-12.png","element":"img"}],[{"text":"In turn, we define the following distinguishing classes: For every sequence ","element":"span"},{"style":{"height":15.6},"width":209.72,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-13.png","element":"img","alt":"v1:n, n < k","inline":true},{"text":", the distinguishing class ","element":"span"},{"style":{"height":16.82},"width":87.6,"height":42.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-14.png","element":"img","alt":" Gv1:n","inline":true,"padRight":true},{"text":"is defined as follows:","element":"span"}],[{"style":{"width":"64%"},"width":923,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-15.png","element":"img"}],[{"text":"Finally, we point out that we will mainly be concerned with the case that ","element":"span"},{"style":{"height":17.6},"width":340.64,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/6-16.png","element":"img","alt":" |V| ≤ ∞ or V = N","inline":true},{"text":". However, all the results here can be easily extended to other domains as long as certain (natural) measurability assumptions are given to ensure that VC theory holds (see ","element":"span"},{"href":"#id-3","referenceIndex":22,"text":"[22, ","element":"a"},{"href":"#id-17","referenceIndex":4,"text":"4]","element":"a"},{"text":").","element":"span"}],[{"text":"2.2 ","element":"span"},{"text":"IPM distance","element":"span"}],[{"text":"Given a class of distinguishers ","element":"span"},{"text":"D ","element":"span"},{"text":"the induced IPM distance ","element":"span"},{"href":"#id-2","referenceIndex":17,"text":"[17]","element":"a"},{"text":", denoted by IPM","element":"span"},{"style":{"height":8.8},"width":27,"height":22,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-0.png","element":"img","alt":"D","inline":true},{"text":", is a (pseudo)–metric between distributions over ","element":"span"},{"text":"V ","element":"span"},{"text":"defined as follows","element":"span"}],[{"style":{"width":"90%"},"width":1296,"height":108,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-1.png","element":"img"}],[{"text":"The definition can naturally be extended to a general family of graphs, and we define:","element":"span"}],[{"style":{"width":"103%"},"width":1481,"height":134,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-2.png","element":"img"}],[{"text":"Another metric we would care about is the ","element":"span"},{"text":"total variation metric","element":"span"},{"text":". Given two distributions ","element":"span"},{"style":{"height":16},"width":179.24,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-3.png","element":"img","alt":" p1 and p2","inline":true,"padRight":true},{"text":"the total variation distance is defined as:","element":"span"}],[{"style":{"width":"43%"},"width":621,"height":71,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-4.png","element":"img"}],[{"text":"where ","element":"span"},{"style":{"height":18.34},"width":200.36,"height":45.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-5.png","element":"img","alt":" E ⊆ V{0,1} ","inline":true,"padRight":true},{"text":"goes over all measurable events.","element":"span"}],[{"text":"In contrast with an IPM distance, the total variation metric is indeed a","element":"span"}],[{"text":"metric and any two distributions ","element":"span"},{"style":{"height":16.8},"width":143.24,"height":42,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-6.png","element":"img","alt":" p1 ̸= p2","inline":true,"padRight":true},{"text":"we have ","element":"span"},{"text":"t","element":"span"},{"text":"hat TV(","element":"span"},{"style":{"height":17.6},"width":279.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-7.png","element":"img","alt":"p1, p2) > 0. In","inline":true}],[{"text":"fact, for every distinguishing class ","element":"span"},{"style":{"height":17.94},"width":328.04,"height":44.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-8.png","element":"img","alt":" D, IPMD ⪯ TV.2","inline":true}],[{"text":"For finite classes of vertices ","element":"span"},{"text":"V","element":"span"},{"text":", it is known that the total variation metric","element":"span"}],[{"style":{"width":"72%"},"width":1039,"height":149,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-9.png","element":"img"}],[{"text":"Further, if we let ","element":"span"},{"text":"D ","element":"span"},{"text":"= ","element":"span"},{"text":"P","element":"span"},{"text":"(","element":"span"},{"text":"V","element":"span"},{"text":") the power set of ","element":"span"},{"text":"V ","element":"span"},{"text":"we obtain","element":"span"}],[{"style":{"width":"39%"},"width":565,"height":50,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-10.png","element":"img"}],[{"id":"id-23","text":"2.3 ","element":"span"},{"text":"Discriminating Algorithms","element":"span"}],[{"text":"Definition 1. ","element":"span"},{"text":"Given a distinguishing class ","element":"span"},{"text":"G ","element":"span"},{"text":"a ","element":"span"},{"text":"G","element":"span"},{"text":"-discriminating algorithm ","element":"span"},{"text":"A ","element":"span"},{"text":"with sample complexity ","element":"span"},{"style":{"height":17.6},"width":129.8,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-11.png","element":"img","alt":" m(ǫ, δ)","inline":true,"padRight":true},{"text":"is an algorithm that receives as input two finite samples ","element":"span"},{"style":{"height":17.6},"width":245,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-12.png","element":"img","alt":" S = (S1, S2)","inline":true,"padRight":true},{"text":"of vertices and outputs a hyper-graph ","element":"span"},{"style":{"height":20.72},"width":143.16,"height":51.8,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-13.png","element":"img","alt":" gAS ∈ G","inline":true,"padRight":true},{"text":"such that:","element":"span"}],[{"text":"If ","element":"span"},{"style":{"height":15.6},"width":108.68,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-14.png","element":"img","alt":" S1, S2","inline":true,"padRight":true},{"text":"are drawn IID from some unknown distributions ","element":"span"},{"style":{"height":11.6},"width":245.44,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-15.png","element":"img","alt":" p1, p2 respec-","inline":true,"padRight":true},{"text":"tively and ","element":"span"},{"style":{"height":17.6},"width":684.68,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/7-16.png","element":"img","alt":" |S1|, |S2| > m(ǫ, δ) then w.p. (1 − δ)","inline":true,"padRight":true},{"text":"the algorithm’s output satis-fies:","element":"span"}],[{"style":{"width":"52%"},"width":753,"height":55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/8-0.png","element":"img"}],[{"text":"The sample complexity of a class ","element":"span"},{"text":"G ","element":"span"},{"text":"is then given by the smallest possible sample complexity of a ","element":"span"},{"text":"G","element":"span"},{"text":"-discriminating algorithm ","element":"span"},{"text":"A","element":"span"},{"text":".","element":"span"}],[{"style":{"width":"94%"},"width":1361,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/8-1.png","element":"img"}],[{"text":"Namely there exists a discriminating algorithm for ","element":"span"},{"text":"G ","element":"span"},{"text":"with sample complexity ","element":"span"},{"style":{"height":17.6},"width":245.32,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/8-2.png","element":"img","alt":"m(ǫ, δ) < ∞.","inline":true}],[{"text":"VC classes are discriminable ","element":"span"},{"text":"For the case ","element":"span"},{"text":"k ","element":"span"},{"text":"= 1, discrimination is closely related to PAC learning. It is easy to see that a proper learning algorithm for a class ","element":"span"},{"text":"D ","element":"span"},{"text":"can be turned into a discriminator: Indeed, given access to samples from two distributions ","element":"span"},{"style":{"height":16},"width":177.32,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/8-3.png","element":"img","alt":" p1 and p2","inline":true,"padRight":true},{"text":"we can provide a learner with labelled examples from a distribution ","element":"span"},{"text":"p ","element":"span"},{"text":"defined as follows: ","element":"span"},{"text":"p","element":"span"},{"text":"(","element":"span"},{"text":"y ","element":"span"},{"text":"= 1) = ","element":"span"},{"style":{"height":21.26},"width":1149.8,"height":53.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/8-4.png","element":"img","alt":"p(y = −1) = 12 and p(·|y = 1) = p1, and p(·|y = −1) = p2","inline":true},{"text":". Given access ","element":"span"},{"text":"to samples from ","element":"span"},{"style":{"height":16},"width":187.4,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/8-5.png","element":"img","alt":" p1 and p2","inline":true,"padRight":true},{"text":"we can clearly generate IID samples from the distribution ","element":"span"},{"text":"p","element":"span"},{"text":". If, in turn, we provide a learner with samples from ","element":"span"},{"text":"p ","element":"span"},{"text":"and it outputs a hypothesis ","element":"span"},{"style":{"height":13.2},"width":109.84,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/8-6.png","element":"img","alt":" d ∈ D","inline":true,"padRight":true},{"text":"we have that (w.h.p):","element":"span"}],[{"style":{"width":"87%"},"width":1249,"height":621,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/8-7.png","element":"img"}],[{"text":"One can also see that a converse relation holds, if we restrict our attention to learning balanced labels (i.e., ","element":"span"},{"style":{"height":17.6},"width":371.44,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/8-8.png","element":"img","alt":" p(y = 1) = p(y = −","inline":true},{"text":"1)). Namely, given labelled examples from some balanced distribution, the output of a discriminator is a predictor that competes with the class of predictors induced by ","element":"span"},{"text":"D","element":"span"},{"text":".","element":"span"}],[{"text":"Overall, the above calculation, together with Vapnik and Chervonenkis’s classical result ","element":"span"},{"href":"#id-3","referenceIndex":22,"text":"[22] ","element":"a"},{"text":"shows that classes with finite VC dimension ","element":"span"},{"style":{"height":12},"width":23,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/8-9.png","element":"img","alt":" ρ","inline":true,"padRight":true},{"text":"are discriminable with sample complexity ","element":"span"},{"style":{"height":21.07},"width":137,"height":52.68,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/8-10.png","element":"img","alt":" O( ρǫ2).3","inline":true,"padRight":true},{"text":"The necessity of finite VC dimension ","element":"span"},{"text":"for agnostic PAC-learning was shown in ","element":"span"},{"text":"[1]","element":"span"},{"text":". Basically the same argument shows that given a class ","element":"span"},{"style":{"height":22.54},"width":141.72,"height":56.36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/9-0.png","element":"img","alt":" D, ˜Ω( ρǫ2","inline":true,"padRight":true},{"text":") examples are necessary for discrimination. ","element":"span"},{"text":"We next introduce a natural extension of VC dimension to hypergraphs, which will play a similar role.","element":"span"}],[{"text":"2.4 ","element":"span"},{"text":"VC Dimension of hypergraphs","element":"span"}],[{"text":"We next define the notion of graph VC dimension for hypergraphs, as we will later see this notion indeed characterizes the sample complexity of discriminating classes, and in that sense it is a natural extension of the notion of VC dimension for hypotheses classes:","element":"span"}],[{"text":"Definition 2. ","element":"span"},{"text":"Given a family of ","element":"span"},{"text":"k","element":"span"},{"text":"-hypergraphs, ","element":"span"},{"text":"G","element":"span"},{"text":": The graph VC dimension of the class ","element":"span"},{"text":"G","element":"span"},{"text":", denoted ","element":"span"},{"text":"gVC(","element":"span"},{"text":"G","element":"span"},{"text":")","element":"span"},{"text":", is defined inductively as follows: For ","element":"span"},{"text":"k ","element":"span"},{"text":"= 1 gVC(","element":"span"},{"text":"G","element":"span"},{"text":") ","element":"span"},{"text":"is the standard notion of VC dimension, i.e., ","element":"span"},{"text":"gVC(","element":"span"},{"text":"G","element":"span"},{"text":") = VC(","element":"span"},{"text":"G","element":"span"},{"text":")","element":"span"},{"text":". For ","element":"span"},{"text":"k > ","element":"span"},{"text":"1","element":"span"},{"text":":","element":"span"}],[{"style":{"width":"34%"},"width":491,"height":65,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/9-1.png","element":"img"}],[{"text":"Roughly, the graph VC dimension of a hypergraph is given by the VC dimension of the induced classes of distinguishers via projections. Namely, we can think of the VC dimension of hypergraphs as the projected VC dimension when we fix all coordinates in an edge except for one.","element":"span"}]]},{"heading":"3 Main Results","paragraphs":[[{"text":"We next describe the main results of this work. ","element":"span"},{"text":"The results are divided into two sections: For the first part we characterize the sample complexity of graph–based distinguishing class. The second part is concerned with the expressive/distinguishing power of graph–based discriminators. All proofs are provided in appendices ","element":"span"},{"text":"B ","element":"span"},{"text":"and ","element":"span"},{"text":"C ","element":"span"},{"text":"respectively.","element":"span"}],[{"text":"3.1 ","element":"span"},{"text":"The sample complexity of graph-based distinguishing class","element":"span"}],[{"text":"We begin by providing upper bounds to the sample complexity for discrim- ","element":"span"},{"id":"id-18","text":"ination","element":"span"}],[{"text":"Theorem 1 ","element":"span"},{"text":"(Sample Complexity – Upper Bound)","element":"span"},{"text":". ","element":"span"},{"text":"Let ","element":"span"},{"text":"G ","element":"span"},{"text":"be a ","element":"span"},{"text":"k","element":"span"},{"text":"–distinguishing class with ","element":"span"},{"text":"gVC(","element":"span"},{"style":{"height":17.6},"width":266.04,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/9-2.png","element":"img","alt":"G) = ρ then G","inline":true,"padRight":true},{"text":"has sample complexity ","element":"span"},{"style":{"height":25.21},"width":277.97,"height":63.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/9-3.png","element":"img","alt":" O(ρk2ǫ2 log 1/δ).","inline":true}],[{"text":"Theorem ","element":"span"},{"href":"#id-18","text":"1 ","element":"a"},{"text":"is a corollary of the following uniform convergence upper ","element":"span"},{"id":"id-19","text":"bound for graph-based distinguishing classes.","element":"span"}],[{"text":"Theorem 2 ","element":"span"},{"text":"(uniform convergence)","element":"span"},{"text":". ","element":"span"},{"text":"Let ","element":"span"},{"text":"G ","element":"span"},{"text":"be a ","element":"span"},{"text":"k","element":"span"},{"text":"–distinguishing class with ","element":"span"},{"text":"gVC(","element":"span"},{"style":{"height":18},"width":452.84,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/10-0.png","element":"img","alt":"G) = ρ. Let S = {vi}mi=1 ","inline":true,"padRight":true},{"text":"be an IID sample of vertices drawn from some ","element":"span"},{"text":"unknown distribution ","element":"span"},{"style":{"height":25.22},"width":470.12,"height":63.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/10-1.png","element":"img","alt":" P. If m = Ω(ρk2ǫ2 log 1/δ)","inline":true,"padRight":true},{"text":"then with probability at least ","element":"span"},{"text":"(1 ","element":"span"},{"style":{"height":17.6},"width":81.32,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/10-2.png","element":"img","alt":" − δ)","inline":true,"padRight":true},{"text":"(over the randomness of ","element":"span"},{"text":"S","element":"span"},{"text":"):","element":"span"}],[{"style":{"width":"31%"},"width":457,"height":78,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/10-3.png","element":"img"}],[{"text":"The proof of theorem ","element":"span"},{"href":"#id-19","text":"2 ","element":"a"},{"text":"is given in appendix ","element":"span"},{"href":"#id-20","text":"B.1. ","element":"a"},{"text":"We next provide a lower bound for the sample complexity of discriminating algorithms in terms of the graph VC dimension of the class","element":"span"}],[{"id":"id-22","style":{"width":"102%"},"width":1467,"height":232,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/10-4.png","element":"img"}],[{"text":"We refer the reader to appendix ","element":"span"},{"href":"#id-21","text":"B.2 ","element":"a"},{"text":"for a proof of theorem ","element":"span"},{"href":"#id-22","text":"3. ","element":"a"},{"text":"Our upper bounds and lower bounds leave a gap of order ","element":"span"},{"style":{"height":19.25},"width":110.84,"height":48.12,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/10-5.png","element":"img","alt":" O(√ρ","inline":true},{"text":"). As dicussed in section ","element":"span"},{"href":"#id-23","text":"2.3, ","element":"a"},{"text":"for the case ","element":"span"},{"text":"k ","element":"span"},{"text":"= 1 we can provide a tight ","element":"span"},{"style":{"height":20},"width":72.6,"height":50,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/10-6.png","element":"img","alt":" θ( ρǫ2","inline":true},{"text":") bound through a ","element":"span"},{"text":"reduction to agnostic PAC learning and the appropriate lower bounds","element":"span"},{"text":"[1]","element":"span"},{"text":". In general it would be interesting to improve the above bound both in terms of ","element":"span"},{"style":{"height":16.4},"width":157.92,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/10-7.png","element":"img","alt":" ρ and k.","inline":true}],[{"text":"3.2 ","element":"span"},{"text":"The expressive power of graph-based distinguishing class","element":"span"}],[{"text":"So far we have characterized the discriminability of graph-based distinguishing classes. It is natural though to ask if graph–based distinguishing classes add any advantage over standard 1-distinguishing classes. In this section we provide several results that show that indeed graph provide extra expressive power over standard distinguishing classes.","element":"span"}],[{"text":"We begin by providing a result over infinite graphs. (proof is provided ","element":"span"},{"id":"id-24","text":"in appendix ","element":"span"},{"text":"C.1)","element":"span"}],[{"text":"Theorem 4. ","element":"span"},{"text":"Let ","element":"span"},{"text":"V ","element":"span"},{"text":"= ","element":"span"},{"text":"N","element":"span"},{"text":". There exists a distinguishing graph class ","element":"span"},{"text":"G","element":"span"},{"text":", with sample complexity ","element":"span"},{"style":{"height":24.77},"width":687.28,"height":61.92,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/10-8.png","element":"img","alt":" m(ǫ, δ) = O(log 1/δǫ2 ) (in fact |G| = 1","inline":true},{"text":") such that: for any ","element":"span"},{"text":"1","element":"span"},{"text":"-distinguishing class ","element":"span"},{"text":"D ","element":"span"},{"text":"with finite VC dimension, and every ","element":"span"},{"style":{"height":13.2},"width":276.36,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/10-9.png","element":"img","alt":" ǫ > 0 there are","inline":true,"padRight":true},{"text":"two distributions ","element":"span"},{"style":{"height":17.6},"width":1087.6,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/10-10.png","element":"img","alt":" p1, p2 such that IPMD(p1, p2) < ǫ but IPMG(p1, p2) > 1/2","inline":true}],[{"text":"Theorem ","element":"span"},{"href":"#id-24","text":"4 ","element":"a"},{"text":"can be generalized to higher order distinguishing classes (see ","element":"span"},{"id":"id-43","text":"appendix ","element":"span"},{"href":"#id-25","text":"C.2 ","element":"a"},{"text":"for a proof):","element":"span"}],[{"text":"Theorem 5. ","element":"span"},{"text":"Let ","element":"span"},{"text":"V ","element":"span"},{"text":"= ","element":"span"},{"text":"N","element":"span"},{"text":". There exists a ","element":"span"},{"text":"k","element":"span"},{"text":"-distinguishing class ","element":"span"},{"style":{"height":15.28},"width":43.92,"height":38.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-0.png","element":"img","alt":" Gk","inline":true},{"text":", with sample complexity ","element":"span"},{"style":{"height":25.99},"width":427.4,"height":64.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-1.png","element":"img","alt":" m(ǫ, δ) = O(k2+log 1/δǫ2 )","inline":true,"padRight":true},{"text":"such that: For any ","element":"span"},{"style":{"height":12.8},"width":83.92,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-2.png","element":"img","alt":" k−1","inline":true},{"text":"-distinguishing class ","element":"span"},{"style":{"height":15.28},"width":88.04,"height":38.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-3.png","element":"img","alt":" Gk−1","inline":true,"padRight":true},{"text":"with bounded sample complexity, and every ","element":"span"},{"style":{"height":12.4},"width":105.04,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-4.png","element":"img","alt":" ǫ > 0","inline":true,"padRight":true},{"text":"there are two distributions ","element":"span"},{"style":{"height":18.19},"width":1177,"height":45.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-5.png","element":"img","alt":" p1, p2 such that IPMGk−1(p1, p2) < ǫ and IPMGk(p1, p2) > 1/4.","inline":true}],[{"text":"Finite Graphs ","element":"span"},{"text":"We next study the expressive power of distinguishing graphs over finite domains.","element":"span"}],[{"text":"It is known that, over a finite domain ","element":"span"},{"text":"V ","element":"span"},{"text":"= ","element":"span"},{"text":"{","element":"span"},{"text":"1","element":"span"},{"text":", . . . , n","element":"span"},{"text":"}","element":"span"},{"text":", we can learn with a sample complexity of ","element":"span"},{"style":{"height":19.73},"width":225.44,"height":49.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-6.png","element":"img","alt":" O( nǫ2 log 1/δ","inline":true},{"text":") any distinguishing class. In fact, we ","element":"span"},{"text":"can learn the total variation metric (indeed the sample complexity of ","element":"span"},{"text":"P","element":"span"},{"text":"(","element":"span"},{"text":"V","element":"span"},{"text":") is bounded by log ","element":"span"},{"text":"|P","element":"span"},{"text":"(","element":"span"},{"text":"V ","element":"span"},{"text":")","element":"span"},{"text":"| ","element":"span"},{"text":"= ","element":"span"},{"text":"n","element":"span"},{"text":").","element":"span"}],[{"text":"Therefore if we allow classes whose sample complexity scales linearly with ","element":"span"},{"text":"n ","element":"span"},{"text":"we cannot hope to show any advantage for distinguishing graphs. However, in most natural problems ","element":"span"},{"text":"n ","element":"span"},{"text":"is considered to be very large (for example, over the Boolean cube ","element":"span"},{"text":"n ","element":"span"},{"text":"is exponential in the dimension). We thus, in general, would like to study classes that have better complexity in terms of ","element":"span"},{"text":"n","element":"span"},{"text":". In that sense, we can show that indeed distinguishing graphs yield extra expressive power.","element":"span"}],[{"text":"In particular, we show that for classes with sublogarithmic sample complexity, we can construct graphs that are incomparable with a higher order distinguishing class.","element":"span"}],[{"id":"id-27","text":"Theorem 6. ","element":"span"},{"text":"Let ","element":"span"},{"text":"|V| ","element":"span"},{"text":"= ","element":"span"},{"text":"n","element":"span"},{"text":". There exists a ","element":"span"},{"text":"k","element":"span"},{"text":"-distinguishing class ","element":"span"},{"style":{"height":15.6},"width":160.32,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-7.png","element":"img","alt":" Gk, with","inline":true,"padRight":true},{"text":"sample complexity ","element":"span"},{"style":{"height":25.79},"width":756.4,"height":64.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-8.png","element":"img","alt":" m(ǫ, δ) = O(k2+log 1/δǫ2 ) (in fact |G| = 1","inline":true},{"text":") such that: For any ","element":"span"},{"style":{"height":16.4},"width":381.04,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-9.png","element":"img","alt":" ǫ > 0 and any k − 1","inline":true,"padRight":true},{"text":"distinguishing class ","element":"span"},{"style":{"height":16.4},"width":146.48,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-10.png","element":"img","alt":" Gk−1 if:","inline":true}],[{"style":{"width":"27%"},"width":399,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-11.png","element":"img"}],[{"style":{"height":24.25},"width":613,"height":60.64,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-12.png","element":"img","alt":"then gVC(Gk−1) = Ω( ǫ2k2√log n).","inline":true}],[{"text":"The proof is given in appendix ","element":"span"},{"href":"#id-26","text":"C.3. ","element":"a"},{"text":"We can improve the bound in ","element":"span"},{"text":"theorem ","element":"span"},{"href":"#id-27","text":"6 ","element":"a"},{"text":"for the case ","element":"span"},{"text":"k ","element":"span"},{"text":"= 1 (see appendix ","element":"span"},{"href":"#id-28","text":"C.4 ","element":"a"},{"text":"for proof).","element":"span"}],[{"text":"Theorem 7. ","element":"span"},{"text":"Let ","element":"span"},{"text":"|V| ","element":"span"},{"text":"= ","element":"span"},{"text":"n","element":"span"},{"text":". There exists a ","element":"span"},{"text":"2","element":"span"},{"text":"-distinguishing class ","element":"span"},{"text":"G","element":"span"},{"text":", with sample complexity ","element":"span"},{"style":{"height":24.58},"width":680.56,"height":61.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-13.png","element":"img","alt":" m(ǫ, δ) = O(log 1/δǫ2 ) (in fact |G| = 1","inline":true},{"text":") such that: For any ","element":"span"},{"style":{"height":12.4},"width":97.84,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-14.png","element":"img","alt":"ǫ > 0","inline":true,"padRight":true},{"text":"and any distinguishing class ","element":"span"},{"text":"D ","element":"span"},{"text":"if:","element":"span"}],[{"style":{"width":"22%"},"width":327,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-15.png","element":"img"}],[{"style":{"height":20.42},"width":521.32,"height":51.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/11-16.png","element":"img","alt":"then gVC(D) = ˜Ω(ǫ2 log n).","inline":true}]]},{"heading":"4 Discussion and open problems","paragraphs":[[{"text":"In this work we developed a generalization of the standard framework of discrimination to graph-based distinguishers that discriminate between two distributions by considering multi-ary tests. ","element":"span"},{"text":"Several open question arise from our results:","element":"span"}],[{"text":"Improving Sample Complexity Bounds ","element":"span"},{"text":"In terms of sample complexity, while we give a natural upper bound of ","element":"span"},{"style":{"height":19.13},"width":114.92,"height":47.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/12-0.png","element":"img","alt":" O(ρk2","inline":true},{"text":"), the lower bound we provide are not tight neither in ","element":"span"},{"text":"d ","element":"span"},{"text":"nor in ","element":"span"},{"text":"k ","element":"span"},{"text":"and we provide a lower bound of Ω(","element":"span"}],[{"text":"of ","element":"span"},{"text":"k","element":"span"},{"text":".","element":"span"}],[{"text":"Improving Expressiveness Bounds ","element":"span"},{"text":"We also showed that, over finite domains, we can construct a graph that is incomparable with any class with VC dimension Ω(","element":"span"},{"style":{"height":18.73},"width":133.52,"height":46.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/12-1.png","element":"img","alt":"ǫ2 log n","inline":true},{"text":"). The best upper bound we can provide (the VC of a class that competes with any graph) is the naive ","element":"span"},{"text":"O","element":"span"},{"text":"(","element":"span"},{"text":"n","element":"span"},{"text":") which is the VC dimension of the total variation metric.","element":"span"}],[{"text":"Additionally, for the ","element":"span"},{"text":"k","element":"span"},{"text":"-hypergraph case, our bounds deteriorate to a Ω(","element":"span"},{"style":{"height":18.74},"width":162.8,"height":46.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/12-2.png","element":"img","alt":"ǫ2√log n","inline":true},{"text":"). The improvement in the graph case follows from using an argument in the spirit of Boosting ","element":"span"},{"href":"#id-12","referenceIndex":10,"text":"[10] ","element":"a"},{"text":"and Hardcore Lemma ","element":"span"},{"href":"#id-29","referenceIndex":13,"text":"[13] ","element":"a"},{"text":"to construct two indistinguishable probabilities with distinct support over a small domain. It would be interesting to extend these techniques in order to achieve similar bounds for the ","element":"span"},{"text":"k > ","element":"span"},{"text":"2 case.","element":"span"}],[{"text":"Relation to GANs and Extension to Online Setting ","element":"span"},{"text":"Finally, a central motivation for learning the sample complexity of discriminators is in the context of GANs. It then raises interesting questions as to the ","element":"span"},{"text":"foolability ","element":"span"},{"text":"of graph-based distinguishers.","element":"span"}],[{"text":"The work of ","element":"span"},{"href":"#id-30","referenceIndex":6,"text":"[6] ","element":"a"},{"text":"suggests a framework for studying sequential games between generators and discriminators (","element":"span"},{"text":"GAM-Fooling","element":"span"},{"text":"). In a nutshell, the GAM setting considers a sequential game between a generator ","element":"span"},{"text":"G ","element":"span"},{"text":"that outputs distributions and a discriminator ","element":"span"},{"text":"D ","element":"span"},{"text":"that has access to data from some distribution ","element":"span"},{"style":{"height":15.94},"width":39.08,"height":39.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/12-3.png","element":"img","alt":" p∗ ","inline":true,"padRight":true},{"text":"(not known to ","element":"span"},{"text":"G","element":"span"},{"text":"). At each round of the game, the generator proposes a distribution and the discriminator outputs a ","element":"span"},{"style":{"height":13.2},"width":119.92,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/12-4.png","element":"img","alt":" d ∈ D","inline":true,"padRight":true},{"text":"which distinguishes between the distribution of ","element":"span"},{"text":"G ","element":"span"},{"text":"and the true distribution ","element":"span"},{"style":{"height":16},"width":151.52,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/12-5.png","element":"img","alt":" p∗. The","inline":true,"padRight":true},{"text":"class ","element":"span"},{"text":"D ","element":"span"},{"text":"is said to be GAM-Foolable if the generator outputs after finitely many rounds a distribution ","element":"span"},{"text":"p ","element":"span"},{"text":"that is ","element":"span"},{"text":"D","element":"span"},{"text":"–indistinguishable from ","element":"span"},{"style":{"height":15.94},"width":39.08,"height":39.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/12-6.png","element":"img","alt":" p∗","inline":true}],[{"href":"#id-30","referenceIndex":6,"text":"[6] ","element":"a"},{"text":"showed that a class ","element":"span"},{"text":"D ","element":"span"},{"text":"is GAM–foolable if and only if it has finite Littlestone dimension. We then ask, similarly, which classes of graph–based distinguishers are GAM-Foolable? ","element":"span"},{"text":"A characterization of such classes can potentially lead to a natural extension of the Littlestone notion and on-line prediction, to graph-based classes analogously to this work w.r.t VC dimension","element":"span"}],[{"text":"Acknowledgements ","element":"span"},{"text":"The authors would like to thank Shay Moran for helpful discussions and suggesting simplifications for the proofs of theorems ","element":"span"},{"href":"#id-24","text":"4 ","element":"a"},{"text":"to ","element":"span"},{"href":"#id-27","text":"6.","element":"a"}]]},{"heading":"References","paragraphs":[[{"id":"id-5","text":"[1] Martin Anthony and Peter L Bartlett. ","element":"span"},{"text":"Neural network learning: Theoretical foundations","element":"span"},{"text":". cambridge university press, 2009.","element":"span"}],[{"id":"id-11","text":"[2] Sanjeev Arora and Yi Zhang. Do gans actually learn the distribution? ","element":"span"},{"text":"an empirical study. ","element":"span"},{"text":"arXiv preprint arXiv:1706.08224","element":"span"},{"text":", 2017.","element":"span"}],[{"text":"[3] Tugkan Batu, Lance Fortnow, Ronitt Rubinfeld, Warren D Smith, and Patrick White. ","element":"span"},{"text":"Testing that distributions are close. ","element":"span"},{"text":"In ","element":"span"},{"text":"Proceedings 41st Annual Symposium on Foundations of Computer Science","element":"span"},{"text":", pages 259–269. IEEE, 2000.","element":"span"}],[{"id":"id-17","text":"[4] Shai Ben-David. 2 notes on classes with vapnik-chervonenkis dimension ","element":"span"},{"text":"1. ","element":"span"},{"text":"arXiv preprint arXiv:1507.05307","element":"span"},{"text":", 2015.","element":"span"}],[{"id":"id-4","text":"[5] Anselm Blumer, Andrzej Ehrenfeucht, David Haussler, and Manfred K ","element":"span"},{"text":"Warmuth. Learnability and the vapnik-chervonenkis dimension. ","element":"span"},{"text":"Journal of the ACM (JACM)","element":"span"},{"text":", 36(4):929–965, 1989.","element":"span"}],[{"id":"id-30","text":"[6] Olivier Bousquet, Roi Livni, and Shay Moran. Passing tests without ","element":"span"},{"text":"memorizing: Two models for fooling discriminators. ","element":"span"},{"text":"arXiv preprint arXiv:1902.03468","element":"span"},{"text":", 2019.","element":"span"}],[{"id":"id-14","text":"[7] Siu-On Chan, Ilias Diakonikolas, Paul Valiant, and Gregory Valiant. ","element":"span"},{"text":"Optimal algorithms for testing closeness of discrete distributions. In ","element":"span"},{"text":"Proceedings of the twenty-fifth annual ACM-SIAM symposium on Discrete algorithms","element":"span"},{"text":", pages 1193–1203. SIAM, 2014.","element":"span"}],[{"id":"id-8","text":"[8] St´ephan Cl´emen¸con, Igor Colin, and Aur´elien Bellet. Scaling-up em- ","element":"span"},{"text":"pirical risk minimization: optimization of incomplete u-statistics. ","element":"span"},{"text":"The Journal of Machine Learning Research","element":"span"},{"text":", 17(1):2682–2717, 2016.","element":"span"}],[{"id":"id-7","text":"[9] St´ephan Cl´emen¸con, G´abor Lugosi, Nicolas Vayatis, et al. ","element":"span"},{"text":"Ranking and empirical minimization of u-statistics. ","element":"span"},{"text":"The Annals of Statistics","element":"span"},{"text":", 36(2):844–874, 2008.","element":"span"}],[{"id":"id-12","text":"[10] Yoav Freund and Robert E Schapire. Game theory, on-line prediction ","element":"span"},{"text":"and boosting. In ","element":"span"},{"text":"COLT","element":"span"},{"text":", volume 96, pages 325–332. Citeseer, 1996.","element":"span"}],[{"text":"[11] Oded Goldreich and Dana Ron. ","element":"span"},{"text":"On testing expansion in boundeddegree graphs. In ","element":"span"},{"text":"Studies in Complexity and Cryptography. Miscellanea on the Interplay between Randomness and Computation","element":"span"},{"text":", pages 68–75. Springer, 2011.","element":"span"}],[{"id":"id-0","text":"[12] Ian Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David ","element":"span"},{"text":"Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. Generative adversarial nets. In ","element":"span"},{"text":"Advances in neural information processing systems","element":"span"},{"text":", pages 2672–2680, 2014.","element":"span"}],[{"id":"id-29","text":"[13] Russell Impagliazzo. Hard-core distributions for somewhat hard prob- ","element":"span"},{"text":"lems. In ","element":"span"},{"text":"Proceedings of IEEE 36th Annual Foundations of Computer Science","element":"span"},{"text":", pages 538–545. IEEE, 1995.","element":"span"}],[{"id":"id-15","text":"[14] Pravesh K Kothari and Roi Livni. Agnostic learning by refuting. In ","element":"span"},{"text":"9th Innovations in Theoretical Computer Science Conference (ITCS 2018)","element":"span"},{"text":". Schloss Dagstuhl-Leibniz-Zentrum fuer Informatik, 2018.","element":"span"}],[{"id":"id-10","text":"[15] Zinan Lin, Ashish Khetan, Giulia Fanti, and Sewoong Oh. Pacgan: The ","element":"span"},{"text":"power of two samples in generative adversarial networks. In ","element":"span"},{"text":"Advances in Neural Information Processing Systems","element":"span"},{"text":", pages 1498–1507, 2018.","element":"span"}],[{"id":"id-46","text":"[16] Richard J Lipton and Neal E Young. Simple strategies for large zero- ","element":"span"},{"text":"sum games with applications to complexity theory. In ","element":"span"},{"text":"Proceedings of the twenty-sixth annual ACM symposium on Theory of computing","element":"span"},{"text":", pages 734–740. ACM, 1994.","element":"span"}],[{"id":"id-2","text":"[17] Alfred M¨uller. Integral probability metrics and their generating classes ","element":"span"},{"text":"of functions. ","element":"span"},{"text":"Advances in Applied Probability","element":"span"},{"text":", 29(2):429–443, 1997.","element":"span"}],[{"id":"id-13","text":"[18] Liam Paninski. ","element":"span"},{"text":"A coincidence-based test for uniformity given very sparsely sampled discrete data. ","element":"span"},{"text":"IEEE Transactions on Information Theory","element":"span"},{"text":", 54(10):4750–4755, 2008.","element":"span"}],[{"id":"id-9","text":"[19] Eitan Richardson and Yair Weiss. On gans and gmms. In ","element":"span"},{"text":"Advances in Neural Information Processing Systems","element":"span"},{"text":", pages 5847–5858, 2018.","element":"span"}],[{"id":"id-16","text":"[20] Shai Shalev-Shwartz and Shai Ben-David. ","element":"span"},{"text":"Understanding machine learning: From theory to algorithms","element":"span"},{"text":". Cambridge university press, 2014.","element":"span"}],[{"text":"[21] Salil P. Vadhan. ","element":"span"},{"text":"On learning vs. refutation. ","element":"span"},{"text":"In ","element":"span"},{"text":"Proceedings of the 30th Conference on Learning Theory, COLT 2017, Amsterdam, The Netherlands, 7-10 July 2017","element":"span"},{"text":", pages 1835–1848, 2017.","element":"span"}],[{"id":"id-3","text":"[22] Vladimir N Vapnik and Aleksei Yakovlevich Chervonenkis. The uni- ","element":"span"},{"text":"form convergence of frequencies of the appearance of events to their probabilities. In ","element":"span"},{"text":"Doklady Akademii Nauk","element":"span"},{"text":", volume 181, pages 781–783. Russian Academy of Sciences, 1968.","element":"span"}]]},{"heading":"A Prelimineries and Technical Background","paragraphs":[[{"style":{"width":"54%"},"width":781,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/15-0.png","element":"img"}],[{"text":"We begin with a brief overview of some classical results in Statistical Learning theory which characterizes VC classes. Throughout we assume a domain ","element":"span"},{"text":"X ","element":"span"},{"text":"and a ","element":"span"},{"text":"hypothesis class ","element":"span"},{"text":"which is a family of Boolean functions over ","element":"span"},{"text":"X","element":"span"},{"text":": ","element":"span"},{"style":{"height":19.54},"width":245.28,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/15-1.png","element":"img","alt":"H ⊆ {0, 1}X .","inline":true}],[{"id":"id-31","text":"Theorem 8. ","element":"span"},{"text":"[Within proof of Thm. 6.11 in ","element":"span"},{"href":"#id-16","referenceIndex":20,"text":"[20]","element":"a"},{"text":"] Let ","element":"span"},{"text":"H ","element":"span"},{"text":"be a class with VC dimension ","element":"span"},{"style":{"height":16.4},"width":121.04,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/15-2.png","element":"img","alt":" ρ then","inline":true}],[{"style":{"width":"67%"},"width":967,"height":111,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/15-3.png","element":"img"}],[{"text":"Recall that a class ","element":"span"},{"text":"H ","element":"span"},{"text":"has the ","element":"span"},{"text":"uniform convergence property","element":"span"},{"text":", if for some ","element":"span"},{"style":{"height":19.14},"width":409.84,"height":47.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/15-4.png","element":"img","alt":"m : (0, 1)2 → N if P","inline":true,"padRight":true},{"text":"is some unknown distribution and ","element":"span"},{"style":{"height":18},"width":326.32,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/15-5.png","element":"img","alt":" S = {xi}mi=1 is a","inline":true,"padRight":true},{"text":"sample drawn IID from ","element":"span"},{"style":{"height":17.6},"width":477.92,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/15-6.png","element":"img","alt":" P such that |S| > m(ǫ, δ","inline":true},{"text":") then w.p. (1 ","element":"span"},{"style":{"height":17.6},"width":194.12,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/15-7.png","element":"img","alt":" − δ) (over","inline":true,"padRight":true},{"text":"the sample ","element":"span"},{"text":"S","element":"span"},{"text":"):","element":"span"}],[{"style":{"width":"38%"},"width":551,"height":122,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/15-8.png","element":"img"}],[{"text":"The following, high probability analogue of theorem ","element":"span"},{"href":"#id-31","text":"8, ","element":"a"},{"text":"is also an immediate corollary of Theorem 6.8 in ","element":"span"},{"href":"#id-16","referenceIndex":20,"text":"[20]","element":"a"},{"style":{"height":14.73},"width":30.72,"height":36.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/15-9.png","element":"img","alt":"4:","inline":true}],[{"text":"Corollary 1. ","element":"span"},{"text":"[Within Thm 6.8 ","element":"span"},{"href":"#id-16","referenceIndex":20,"text":"[20]","element":"a"},{"text":"] Let ","element":"span"},{"text":"D ","element":"span"},{"text":"be a class with VC dimension ","element":"span"},{"style":{"height":12},"width":23,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/15-10.png","element":"img","alt":"ρ","inline":true},{"text":". There exists a constant ","element":"span"},{"text":"C > ","element":"span"},{"text":"0","element":"span"},{"text":", such that: Let ","element":"span"},{"text":"p ","element":"span"},{"text":"be a distribution with finite support over ","element":"span"},{"text":"V","element":"span"},{"text":". Let ","element":"span"},{"text":"S ","element":"span"},{"text":"be an IID sequence","element":"span"}],[{"text":"of ","element":"span"},{"text":"m ","element":"span"},{"text":"elements drawn from ","element":"span"},{"text":"p","element":"span"},{"text":", and denote by ","element":"span"},{"style":{"height":11.6},"width":44.08,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-0.png","element":"img","alt":" pS","inline":true,"padRight":true},{"text":"the empirical distribution over ","element":"span"},{"style":{"height":24.77},"width":741.8,"height":61.92,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-1.png","element":"img","alt":" S. If m ≥ C ρ+log 1/δǫ2 then w.p. (1 − δ)","inline":true,"padRight":true},{"text":"(over the random choice of ","element":"span"},{"text":"S","element":"span"},{"text":") we have that","element":"span"}],[{"style":{"width":"80%"},"width":1148,"height":186,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-2.png","element":"img"}],[{"text":"The problem of testing the closeness of two discrete distributions can be phrased as follows: Given samples from two distributions ","element":"span"},{"style":{"height":16},"width":274.88,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-3.png","element":"img","alt":" p1 and p2 the","inline":true,"padRight":true},{"text":"tester needs to distinguish between the case ","element":"span"},{"style":{"height":11.6},"width":160.52,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-4.png","element":"img","alt":" p1 = p2","inline":true,"padRight":true},{"text":"and the case that ","element":"span"},{"style":{"height":17.6},"width":256.56,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-5.png","element":"img","alt":"∥p1−p2∥1 ≥ ǫ","inline":true},{"text":". We will rely on the following result which follows immediately from a uniformity test lower bound due to ","element":"span"},{"href":"#id-13","referenceIndex":18,"text":"[18]","element":"a"},{"text":".","element":"span"}],[{"id":"id-36","style":{"height":13.2},"width":502.96,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-6.png","element":"img","alt":"Theorem 9. Given ǫ > 0","inline":true,"padRight":true},{"text":"and access to samples from distributions ","element":"span"},{"style":{"height":16},"width":128.68,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-7.png","element":"img","alt":" p1 and","inline":true},{"style":{"height":17.6},"width":197.28,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-8.png","element":"img","alt":"p2 over [n]","inline":true,"padRight":true},{"text":"any algorithm that returns with probability ","element":"span"},{"text":"2","element":"span"},{"text":"/","element":"span"},{"text":"3 ","element":"span"},{"text":"EQUIV ALENT","element":"span"}],[{"text":"if ","element":"span"},{"style":{"height":11.6},"width":137.96,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-9.png","element":"img","alt":" p1 = p2","inline":true,"padRight":true},{"text":"and returns ","element":"span"},{"style":{"height":17.6},"width":592.56,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-10.png","element":"img","alt":" DISTINCT if ∥p1 − p2∥1 > ǫ","inline":true,"padRight":true},{"text":"must observe at least ","element":"span"},{"text":"Ω","element":"span"},{"style":{"height":20.8},"width":358.12,"height":52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-11.png","element":"img","alt":"�√n/ǫ2}�samples.","inline":true}],[{"text":"We note that ","element":"span"},{"href":"#id-14","referenceIndex":7,"text":"[7] ","element":"a"},{"text":"gives a slightly better lower bound, of an order of Ω","element":"span"},{"style":{"height":21.73},"width":466.88,"height":54.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-12.png","element":"img","alt":"�max(n3/4/ǫ4/3, √n/ǫ2)�","inline":true},{"text":". However, to simplify we will focus on rates of ","element":"span"},{"style":{"height":19.13},"width":129.32,"height":47.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-13.png","element":"img","alt":"O(1/ǫ2","inline":true},{"text":") that scale quadratically in ","element":"span"},{"style":{"height":8},"width":29.76,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-14.png","element":"img","alt":" ǫ.","inline":true}]]},{"heading":"B Sample Complexity –Proofs","paragraphs":[[{"id":"id-20","style":{"width":"40%"},"width":579,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-15.png","element":"img"}],[{"text":"Theorem 2 ","element":"span"},{"text":"(uniform convergence)","element":"span"},{"text":". ","element":"span"},{"text":"Let ","element":"span"},{"text":"G ","element":"span"},{"text":"be a ","element":"span"},{"text":"k","element":"span"},{"text":"–distinguishing class with ","element":"span"},{"text":"gVC(","element":"span"},{"style":{"height":18},"width":452.84,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-16.png","element":"img","alt":"G) = ρ. Let S = {vi}mi=1 ","inline":true,"padRight":true},{"text":"be an IID sample of vertices drawn from some ","element":"span"},{"text":"unknown distribution ","element":"span"},{"style":{"height":25.22},"width":470.12,"height":63.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-17.png","element":"img","alt":" P. If m = Ω(ρk2ǫ2 log 1/δ)","inline":true,"padRight":true},{"text":"then with probability at least ","element":"span"},{"text":"(1 ","element":"span"},{"style":{"height":17.6},"width":81.32,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-18.png","element":"img","alt":" − δ)","inline":true,"padRight":true},{"text":"(over the randomness of ","element":"span"},{"text":"S","element":"span"},{"text":"):","element":"span"}],[{"style":{"width":"31%"},"width":457,"height":78,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-19.png","element":"img"}],[{"text":"Fix a ","element":"span"},{"text":"k","element":"span"},{"text":"–distinguishing class ","element":"span"},{"text":"G ","element":"span"},{"text":"with graph VC dimension ","element":"span"},{"style":{"height":12},"width":23,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/16-20.png","element":"img","alt":" ρ","inline":true},{"text":". As in the standard proof of uniform convergence for VC classes, we first prove the statement in expectation and then apply Mcdiarmid’s inequality to prove the result w.h.p. Specifically, we will use the following Lemma (whose proof ","element":"span"},{"id":"id-33","text":"is given in appendix ","element":"span"},{"href":"#id-32","text":"B.1.1)","element":"a"},{"text":":","element":"span"}],[{"text":"Lemma 1 ","element":"span"},{"text":"(Uniform Convergence in Expectation)","element":"span"},{"text":". ","element":"span"},{"text":"Let ","element":"span"},{"text":"G ","element":"span"},{"text":"be a ","element":"span"},{"text":"k","element":"span"},{"text":"–distinguishing class with ","element":"span"},{"text":"gVC(","element":"span"},{"style":{"height":18},"width":454.76,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-0.png","element":"img","alt":"G) = ρ. Let S = {vi}mi=1 ","inline":true,"padRight":true},{"text":"be an IID sample of vertices drawn ","element":"span"},{"text":"from some unknown distribution ","element":"span"},{"text":"P","element":"span"},{"text":". Then,","element":"span"}],[{"style":{"width":"84%"},"width":1206,"height":132,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-1.png","element":"img"}],[{"text":"We next proceed with the proof of theorem ","element":"span"},{"href":"#id-19","text":"2, ","element":"a"},{"text":"assuming the correctness of lemma ","element":"span"},{"href":"#id-33","text":"1. ","element":"a"},{"text":"Define","element":"span"}],[{"style":{"width":"37%"},"width":537,"height":78,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-2.png","element":"img"}],[{"text":"Let ","element":"span"},{"style":{"height":17.6},"width":294,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-3.png","element":"img","alt":" S = (v1, . . . , vm","inline":true},{"text":") be a sample and ","element":"span"},{"style":{"height":12.8},"width":45.28,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-4.png","element":"img","alt":" S′","inline":true},{"text":", some sequence that differ from ","element":"span"},{"text":"S ","element":"span"},{"text":"only in the ","element":"span"},{"text":"i","element":"span"},{"text":"-th vertex then we will show that:","element":"span"}],[{"id":"id-34","style":{"width":"63%"},"width":907,"height":91,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-5.png","element":"img"}],[{"text":"Once we show eq. ","element":"span"},{"href":"#id-34","text":"(4) ","element":"a"},{"text":"holds, the result indeed follow from Mcdiarmid’s inequality and lemma ","element":"span"},{"href":"#id-33","text":"1. ","element":"a"},{"text":"Specifically if we assume that ","element":"span"},{"style":{"height":25.98},"width":441.04,"height":64.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-6.png","element":"img","alt":" m ≥ 8k2(4+ρ log(2em/ρ)ǫ2 +","inline":true}],[{"style":{"width":"75%"},"width":1076,"height":191,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-7.png","element":"img"}],[{"text":"Applying Mcdiarmid’s we obtain that with probability at least (1 ","element":"span"},{"style":{"height":28.46},"width":184.32,"height":71.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-8.png","element":"img","alt":" − e− mǫ28k2 ),","inline":true,"padRight":true},{"text":"over the sample ","element":"span"},{"text":"S","element":"span"},{"text":":","element":"span"}],[{"style":{"width":"95%"},"width":1368,"height":93,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-9.png","element":"img"}],[{"text":"Noting that ","element":"span"},{"style":{"height":25.98},"width":257.92,"height":64.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-10.png","element":"img","alt":" m > 8k2 log 1/δǫ2","inline":true,"padRight":true},{"text":", we obtain that with probability at least (1 ","element":"span"},{"style":{"height":17.6},"width":81.32,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-11.png","element":"img","alt":" − δ)","inline":true}],[{"style":{"width":"85%"},"width":1230,"height":93,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-12.png","element":"img"}],[{"text":"We are thus left with proving that eq. ","element":"span"},{"href":"#id-34","text":"(4) ","element":"a"},{"text":"holds.","element":"span"}],[{"text":"For an index ","element":"span"},{"style":{"height":14.8},"width":231,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-13.png","element":"img","alt":" i and m ≥ i","inline":true},{"text":", let us denote by ","element":"span"},{"style":{"height":17.49},"width":177.56,"height":43.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-14.png","element":"img","alt":" πi,m all k","inline":true},{"text":"-subsets of indices from ","element":"span"},{"text":"{","element":"span"},{"text":"1","element":"span"},{"text":", . . . , m","element":"span"},{"text":"} ","element":"span"},{"text":"that include ","element":"span"},{"text":"i ","element":"span"},{"text":"and we let ","element":"span"},{"style":{"height":17.49},"width":261.56,"height":43.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-15.png","element":"img","alt":" π¬i,m be all k","inline":true},{"text":"-sequences that do not include ","element":"span"},{"text":"i","element":"span"},{"text":". Given a set ","element":"span"},{"style":{"height":17.49},"width":561.56,"height":43.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-16.png","element":"img","alt":" S of size m let Si,+ all the k","inline":true},{"text":"-subsets of ","element":"span"},{"text":"S ","element":"span"},{"text":"that include ","element":"span"},{"style":{"height":17.49},"width":281.84,"height":43.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-17.png","element":"img","alt":" vi and let Si,−","inline":true,"padRight":true},{"text":"be all the ","element":"span"},{"text":"k","element":"span"},{"text":"-subsets that do not include ","element":"span"},{"style":{"height":15.2},"width":180.96,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/17-18.png","element":"img","alt":" vi. Next,","inline":true,"padRight":true},{"text":"denote","element":"span"}],[{"style":{"width":"54%"},"width":781,"height":125,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/18-0.png","element":"img"}],[{"text":"And similarly","element":"span"}],[{"style":{"width":"55%"},"width":802,"height":125,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/18-1.png","element":"img"}],[{"text":"Then, let ","element":"span"},{"style":{"height":12.8},"width":173.92,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/18-2.png","element":"img","alt":" S and S′ ","inline":true,"padRight":true},{"text":"be two samples that differ on the ","element":"span"},{"text":"i","element":"span"},{"text":"-th example. Specifi-cally assume that ","element":"span"},{"style":{"height":17.39},"width":350.08,"height":43.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/18-3.png","element":"img","alt":" vi ∈ S and v′i ∈ S′","inline":true},{"text":". Note that ","element":"span"},{"style":{"height":19.79},"width":353.76,"height":49.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/18-4.png","element":"img","alt":" Si,− = S′i,−. Then:","inline":true}],[{"style":{"width":"112%"},"width":1621,"height":955,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/18-5.png","element":"img"}],[{"text":"We are thus left with proving lemma ","element":"span"},{"href":"#id-33","text":"1:","element":"a"}],[{"id":"id-32","style":{"width":"37%"},"width":543,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/18-6.png","element":"img"}],[{"text":"The proof of the statement follows by induction. The case ","element":"span"},{"text":"k ","element":"span"},{"text":"= 1 is the standard uniform convergence property of VC classes, and it follows from theorem ","element":"span"},{"href":"#id-31","text":"8.","element":"a"}],[{"text":"We next proceed to prove the statement for ","element":"span"},{"text":"k","element":"span"},{"text":", assuming it holds for ","element":"span"},{"style":{"height":12.8},"width":96.48,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/18-7.png","element":"img","alt":" k−1.","inline":true}],[{"text":"We begin with the following, triangular, inequality:","element":"span"}],[{"style":{"width":"122%"},"width":1760,"height":842,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/19-0.png","element":"img"}],[{"text":"We next bound the two terms","element":"span"}],[{"style":{"width":"107%"},"width":1541,"height":573,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/19-1.png","element":"img"}],[{"text":"where we denoted by ","element":"span"},{"style":{"height":16.42},"width":104.28,"height":41.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/19-2.png","element":"img","alt":" USk−1","inline":true,"padRight":true},{"text":"the uniform distribution over ","element":"span"},{"style":{"height":12.8},"width":67.12,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/19-3.png","element":"img","alt":" k −","inline":true,"padRight":true},{"text":"1-tuples from ","element":"span"},{"text":"S","element":"span"},{"text":". The expectation in the last expression is thus taken w.r.t a process where we pick ","element":"span"},{"text":"m ","element":"span"},{"text":"elements according to ","element":"span"},{"text":"P ","element":"span"},{"text":"and then partition them to ","element":"span"},{"style":{"height":14},"width":196.24,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/19-4.png","element":"img","alt":" m − k + 1","inline":true,"padRight":true},{"text":"elements and to a sequence ","element":"span"},{"style":{"height":10.88},"width":114.92,"height":27.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/19-5.png","element":"img","alt":" v1:k−1","inline":true,"padRight":true},{"text":"of distinct elements. ","element":"span"},{"text":"This process is equivalent to simply choosing ","element":"span"},{"style":{"height":12.8},"width":113.24,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/19-6.png","element":"img","alt":" m − k","inline":true,"padRight":true},{"text":"+ 1 elements according to ","element":"span"},{"text":"P","element":"span"},{"text":", and then picking ","element":"span"},{"style":{"height":12.8},"width":68.08,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/19-7.png","element":"img","alt":" k −","inline":true,"padRight":true},{"text":"1 new elements, again, according to ","element":"span"},{"text":"P ","element":"span"},{"text":"as follows:","element":"span"}],[{"style":{"width":"98%"},"width":1407,"height":289,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/20-0.png","element":"img"}],[{"text":"Note that the quantity ","element":"span"},{"style":{"height":21.46},"width":172.8,"height":53.64,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/20-1.png","element":"img","alt":"1m� d(vi","inline":true},{"text":") is dependent on ","element":"span"},{"style":{"height":17.39},"width":123.96,"height":43.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/20-2.png","element":"img","alt":" Gv1:k−1","inline":true},{"text":", namely these are random sampled choices that depend on our choice of distinguishing class. To bound their effect we next add and subtract auxiliary random variables ","element":"span"},{"style":{"height":11.2},"width":233.48,"height":28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/20-3.png","element":"img","alt":"u1, . . . , uk−1","inline":true,"padRight":true},{"text":"sampled IID according to ","element":"span"},{"text":"P","element":"span"},{"text":":","element":"span"}],[{"style":{"width":"121%"},"width":1741,"height":919,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/20-4.png","element":"img"}],[{"text":"Renaming ","element":"span"},{"style":{"height":15.6},"width":565.64,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/20-5.png","element":"img","alt":" u1, . . . , uk−1 and v1, . . . , vk−1","inline":true,"padRight":true},{"text":"we can write:","element":"span"}],[{"style":{"width":"75%"},"width":1860,"height":481,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/20-6.png","element":"img"}],[{"text":"Finally we apply. theorem ","element":"span"},{"href":"#id-31","text":"8. ","element":"a"},{"text":"Recalling that gVC(","element":"span"},{"style":{"height":18.19},"width":443.72,"height":45.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/21-0.png","element":"img","alt":"Du1:k−1) = ρ, and that","inline":true,"padRight":true},{"text":"the sequence ","element":"span"},{"text":"S ","element":"span"},{"text":"is drawn IID independent of the choice ","element":"span"},{"style":{"height":10.88},"width":116.36,"height":27.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/21-1.png","element":"img","alt":" u1:k−1","inline":true},{"text":", we obtain for every fixed (","element":"span"},{"style":{"height":17.6},"width":209.48,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/21-2.png","element":"img","alt":"u1, . . . , uk)","inline":true}],[{"style":{"width":"99%"},"width":1422,"height":955,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/21-3.png","element":"img"}],[{"text":"We now use the induction hypothesis: Note that ","element":"span"},{"style":{"height":17.6},"width":181.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/21-4.png","element":"img","alt":" Gv is (k −","inline":true},{"text":"1)-distinguishing class with gVC(","element":"span"},{"style":{"height":17.6},"width":143.96,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/21-5.png","element":"img","alt":"Gv) ≤ ρ","inline":true,"padRight":true},{"text":"for every choice of ","element":"span"},{"text":"v","element":"span"},{"text":". Thus, fixing ","element":"span"},{"text":"v","element":"span"},{"text":":","element":"span"}],[{"style":{"width":"94%"},"width":1359,"height":314,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/21-6.png","element":"img"}],[{"text":"Continuing the proof ","element":"span"},{"text":"With the aforementioned bound on the terms * and ** we now obtain","element":"span"}],[{"text":"∗ ","element":"span"},{"text":"+ ","element":"span"},{"style":{"height":54.66},"width":1171.76,"height":136.64,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/21-7.png","element":"img","alt":" ∗∗ ≤ 4 +�ρ log 2em/ρ√2m + 2km +(k − 1)�4 +�ρ log(2em/ρ)�","inline":true}],[{"style":{"width":"49%"},"width":717,"height":137,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/21-8.png","element":"img"}],[{"id":"id-21","style":{"width":"102%"},"width":1467,"height":316,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-0.png","element":"img"}],[{"text":"To prove theorem ","element":"span"},{"href":"#id-22","text":"3 ","element":"a"},{"text":"we will in fact prove a stronger statement: We will show that it is not only hard to compute a ","element":"span"},{"style":{"height":16.4},"width":102.84,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-1.png","element":"img","alt":" g ∈ G","inline":true,"padRight":true},{"text":"as required, but in fact it is even hard to determine if such ","element":"span"},{"text":"g ","element":"span"},{"text":"exists vs. the case that ","element":"span"},{"style":{"height":11.6},"width":151.68,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-2.png","element":"img","alt":" p1 = p2.","inline":true}],[{"text":"Specifically let us call an algorithm ","element":"span"},{"text":"A ","element":"span"},{"text":"a testing algorithm for ","element":"span"},{"text":"G ","element":"span"},{"text":"with sample complexity ","element":"span"},{"style":{"height":17.6},"width":208.68,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-3.png","element":"img","alt":" m(ǫ, δ) if A","inline":true,"padRight":true},{"text":"receives IID samples from two distributions ","element":"span"},{"style":{"height":11.6},"width":39.08,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-4.png","element":"img","alt":" p1","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":17.6},"width":295.04,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-5.png","element":"img","alt":" p2 of size m(ǫ, δ","inline":true},{"text":") and returns either ","element":"span"},{"text":"EQUIV ALENT ","element":"span"},{"text":"or ","element":"span"},{"text":"DISTINCT ","element":"span"},{"text":"such that w.p. (1 ","element":"span"},{"style":{"height":17.6},"width":93.12,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-6.png","element":"img","alt":" − δ):","inline":true}],[{"text":"• ","element":"span"},{"text":"If ","element":"span"},{"style":{"height":11.6},"width":137.96,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-7.png","element":"img","alt":" p1 = p2","inline":true,"padRight":true},{"text":"the algorithm returns ","element":"span"},{"text":"EQUIV ALENT ","element":"span"},{"text":".","element":"span"}],[{"text":"• ","element":"span"},{"text":"If IPM","element":"span"},{"style":{"height":17.6},"width":234.96,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-8.png","element":"img","alt":"G(p1, p2) > ǫ","inline":true,"padRight":true},{"text":"the algorithm returns ","element":"span"},{"text":"DISTINCT","element":"span"}],[{"id":"id-35","text":"Theorem 10. ","element":"span"},{"text":"Let ","element":"span"},{"text":"G ","element":"span"},{"text":"be a ","element":"span"},{"text":"k","element":"span"},{"text":"–distinguishing class with ","element":"span"},{"text":"gVC(","element":"span"},{"style":{"height":17.6},"width":267.32,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-9.png","element":"img","alt":"G) = ρ. Any","inline":true,"padRight":true},{"text":"testing algorithm ","element":"span"},{"text":"A ","element":"span"},{"text":"with sample complexity ","element":"span"},{"style":{"height":17.6},"width":129.8,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-10.png","element":"img","alt":" m(ǫ, δ)","inline":true,"padRight":true},{"text":"must observe ","element":"span"},{"text":"Ω","element":"span"},{"style":{"height":31.6},"width":158.47,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-11.png","element":"img","alt":"� √ρ27k3ǫ2�","inline":true}],[{"style":{"width":"38%"},"width":557,"height":55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-12.png","element":"img"}],[{"text":"Clearly, theorem ","element":"span"},{"href":"#id-22","text":"3 ","element":"a"},{"text":"is a corollary of theorem ","element":"span"},{"href":"#id-35","text":"10. ","element":"a"},{"text":"Indeed if ","element":"span"},{"text":"A ","element":"span"},{"text":"is a discriminating algorithm for ","element":"span"},{"text":"G ","element":"span"},{"text":"with sample complexity ","element":"span"},{"style":{"height":17.6},"width":112.16,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-13.png","element":"img","alt":" m(ǫ, δ","inline":true},{"text":") we can apply it over a sample of size ","element":"span"},{"style":{"height":17.6},"width":155.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-14.png","element":"img","alt":" m(ǫ/3, δ","inline":true},{"text":") to receive (w.p. 1 ","element":"span"},{"style":{"height":17.6},"width":351.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-15.png","element":"img","alt":" − δ) a graph g s.t.","inline":true}],[{"style":{"width":"49%"},"width":715,"height":80,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-16.png","element":"img"}],[{"text":"With an additional sample of size ","element":"span"},{"style":{"height":25.98},"width":196,"height":64.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-17.png","element":"img","alt":" O(k2 log 1/δǫ2","inline":true,"padRight":true},{"text":") we can estimate ","element":"span"},{"style":{"height":18.29},"width":179.92,"height":45.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-18.png","element":"img","alt":" |Ep1(g) −","inline":true},{"style":{"height":18.29},"width":132.48,"height":45.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-19.png","element":"img","alt":"Ep2(g)|","inline":true,"padRight":true},{"text":"within accuracy ","element":"span"},{"style":{"height":17.6},"width":39.76,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-20.png","element":"img","alt":" ǫ/","inline":true},{"text":"3, and verify if IPM","element":"span"},{"style":{"height":17.6},"width":244.56,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-21.png","element":"img","alt":"G(p1, p2) < ǫ","inline":true},{"text":": The test will then output ","element":"span"},{"style":{"height":19.34},"width":815.52,"height":48.36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-22.png","element":"img","alt":" EQUIV ALENT if |Ep1(g) − Ep2(g)| < ǫ3.","inline":true}],[{"text":"To conclude, we constructed a testing algorithm with sample complexity ","element":"span"},{"style":{"height":25.98},"width":635.96,"height":64.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-23.png","element":"img","alt":"m(ǫ, δ) + C k2 log 1/δǫ2 . Assuming ρ","inline":true,"padRight":true},{"text":"is sufficiently large, in particular ","element":"span"},{"style":{"height":27.49},"width":131.36,"height":68.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-24.png","element":"img","alt":"√ρ27k3 ≫","inline":true}],[{"style":{"width":"77%"},"width":1105,"height":205,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/22-25.png","element":"img"}],[{"text":"The proof is done by induction. For the induction, we will assume a more fine-grained lower bound. We will assume that there exists a constant ","element":"span"},{"text":"C ","element":"span"},{"text":"so that for every ","element":"span"},{"style":{"height":17.6},"width":400.64,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-0.png","element":"img","alt":" n ≤ k − 1, if mn(ǫ, δ","inline":true},{"text":") is the sample complexity of a testing algorithm for an ","element":"span"},{"text":"n","element":"span"},{"text":"-distinguishing class then:","element":"span"}],[{"style":{"width":"82%"},"width":1184,"height":115,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-1.png","element":"img"}],[{"text":"C > ","element":"span"},{"text":"0 will depend only on the constant for the lower bound for testing if two distributions are distinct or ","element":"span"},{"style":{"height":8},"width":18,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-2.png","element":"img","alt":" ǫ","inline":true},{"text":"-far in total variation, as in theorem ","element":"span"},{"href":"#id-36","text":"9.","element":"a"}],[{"text":"We start with the case ","element":"span"},{"text":"k ","element":"span"},{"text":"= 1. ","element":"span"},{"text":"k ","element":"span"},{"text":"= 1 ","element":"span"},{"text":"The case ","element":"span"},{"text":"k ","element":"span"},{"text":"= 1 follows directly from theorem ","element":"span"},{"href":"#id-36","text":"9. ","element":"a"},{"text":"Let ","element":"span"},{"text":"D ","element":"span"},{"text":"be a","element":"span"}],[{"text":"class with VC dimension ","element":"span"},{"style":{"height":12},"width":34.56,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-3.png","element":"img","alt":" ρ.","inline":true,"padRight":true},{"text":"by restricting our attention to probabilities","element":"span"}],[{"text":"supported on the shattered set of size ","element":"span"},{"style":{"height":12},"width":23,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-4.png","element":"img","alt":" ρ","inline":true},{"text":", we may assume that ","element":"span"},{"style":{"height":17.6},"width":227.52,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-5.png","element":"img","alt":" |V| = ρ and","inline":true}],[{"text":"that ","element":"span"},{"text":"D ","element":"span"},{"text":"= ","element":"span"},{"text":"P","element":"span"},{"text":"(","element":"span"},{"text":"V","element":"span"},{"text":"). Note then, that for the IPM distance we then have","element":"span"}],[{"style":{"width":"35%"},"width":516,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-6.png","element":"img"}],[{"text":"theorem ","element":"span"},{"href":"#id-36","text":"9 ","element":"a"},{"text":"immediately yields the result.","element":"span"}],[{"text":"the induction stepWe now proceed with the proof assuming the statement holds for ","element":"span"},{"style":{"height":12.8},"width":110.88,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-7.png","element":"img","alt":" k − 1.","inline":true}],[{"text":"By assumption gVC(","element":"span"},{"style":{"height":17.6},"width":378.96,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-8.png","element":"img","alt":"G) = ρ. Fix v ∈ V","inline":true,"padRight":true},{"text":"such that gVC(","element":"span"},{"style":{"height":17.6},"width":259.88,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-9.png","element":"img","alt":"Gv) = ρ. For","inline":true,"padRight":true},{"text":"every ","element":"span"},{"style":{"height":17.6},"width":124.8,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-10.png","element":"img","alt":" q ∈ (0,","inline":true,"padRight":true},{"text":"1) and distribution ","element":"span"},{"text":"p ","element":"span"},{"text":"denote","element":"span"}],[{"style":{"width":"63%"},"width":910,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-11.png","element":"img"}],[{"id":"id-37","text":"We next state the core Lemma we will need for the proof:","element":"span"}],[{"text":"Lemma 2. ","element":"span"},{"text":"Let ","element":"span"},{"text":"G ","element":"span"},{"text":"be a family of ","element":"span"},{"text":"k","element":"span"},{"text":"-hypergraphs and ","element":"span"},{"style":{"height":11.6},"width":99.08,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-12.png","element":"img","alt":" p1, p2","inline":true,"padRight":true},{"text":"two distributions. Assume that for some ","element":"span"},{"style":{"height":12.8},"width":106.32,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-13.png","element":"img","alt":" v ∈ V","inline":true,"padRight":true},{"text":"we have that:","element":"span"}],[{"style":{"width":"74%"},"width":1071,"height":313,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-14.png","element":"img"}],[{"text":"We deter the proof of lemma ","element":"span"},{"href":"#id-37","text":"2 ","element":"a"},{"text":"to appendix ","element":"span"},{"href":"#id-38","text":"B.2.2, ","element":"a"},{"text":"and proceed with the proof of the induction step. Let us denote ","element":"span"},{"style":{"height":18.01},"width":490.88,"height":45.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-15.png","element":"img","alt":" δk = 2−k log k and denote","inline":true},{"style":{"height":20.03},"width":211.68,"height":50.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-16.png","element":"img","alt":"ck = 2−3k2.","inline":true}],[{"style":{"width":"95%"},"width":1366,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-17.png","element":"img"}],[{"text":"theorem ","element":"span"},{"href":"#id-35","text":"10. ","element":"a"},{"text":"We can now construct a testing algorithm for ","element":"span"},{"style":{"height":15.09},"width":42.92,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-18.png","element":"img","alt":" Gv","inline":true,"padRight":true},{"text":"with sample complexity","element":"span"}],[{"id":"id-39","style":{"width":"70%"},"width":1018,"height":84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/23-19.png","element":"img"}],[{"style":{"width":"73%"},"width":1796,"height":218,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-0.png","element":"img"}],[{"text":"We now show that if ","element":"span"},{"style":{"height":11.6},"width":166.28,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-1.png","element":"img","alt":" p1 = p2","inline":true,"padRight":true},{"text":"the algorithm outputs w.p. ","element":"span"},{"text":"(1 ","element":"span"},{"style":{"height":17.6},"width":87.08,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-2.png","element":"img","alt":" − δ)","inline":true,"padRight":true},{"text":"EQUIV ALENT ","element":"span"},{"text":": Indeed, since ","element":"span"},{"style":{"height":11.6},"width":157.16,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-3.png","element":"img","alt":" p1 = p2","inline":true},{"text":", we have that ","element":"span"},{"style":{"height":19.04},"width":297.6,"height":47.6,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-4.png","element":"img","alt":" pq1 = pq2 for all","inline":true,"padRight":true},{"text":"q","element":"span"},{"text":": Applying union bound we have that w.p. (1 ","element":"span"},{"style":{"height":12.8},"width":65.12,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-5.png","element":"img","alt":" − δ","inline":true},{"text":") the algorithm indeed outputs ","element":"span"},{"text":"EQUIV ALENT ","element":"span"},{"text":".","element":"span"}],[{"text":"On the other hand, if IPM","element":"span"},{"style":{"height":17.6},"width":260.4,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-6.png","element":"img","alt":"Gv(p1, p2) ≥ ǫ","inline":true,"padRight":true},{"text":"we have by lemma ","element":"span"},{"href":"#id-37","text":"2 ","element":"a"},{"text":"that for one of the distributions (","element":"span"},{"style":{"height":19.04},"width":517.2,"height":47.6,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-7.png","element":"img","alt":"pq1, pq2), IPMG(pq1, pq2) > ckǫ","inline":true,"padRight":true},{"text":", in particular the al- ","element":"span"},{"text":"gorithm will output ","element":"span"},{"text":"DISTINCT ","element":"span"},{"text":"with probability (1 ","element":"span"},{"style":{"height":17.6},"width":346.4,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-8.png","element":"img","alt":" − δ). Overall we","inline":true,"padRight":true},{"text":"constructed a testing algorithm for ","element":"span"},{"style":{"height":15.09},"width":42.92,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-9.png","element":"img","alt":" Gv","inline":true,"padRight":true},{"text":"with sample complexity as in eq. ","element":"span"},{"href":"#id-39","text":"(7)","element":"a"},{"text":". Reparametrizing we obtain:","element":"span"}],[{"style":{"width":"34%"},"width":489,"height":105,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-10.png","element":"img"}],[{"text":"If ","element":"span"},{"style":{"height":16.74},"width":386.92,"height":41.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-11.png","element":"img","alt":" kδ < 2−(k−1) log(k−1)","inline":true},{"text":", in particular ","element":"span"},{"style":{"height":15.94},"width":228.72,"height":39.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-12.png","element":"img","alt":" δ < 2−k log k","inline":true},{"text":": we obtain from the induction hypothesis that","element":"span"}],[{"style":{"width":"54%"},"width":776,"height":118,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-13.png","element":"img"}],[{"id":"id-38","text":"and the result immediately follows.","element":"span"}],[{"style":{"width":"37%"},"width":544,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-14.png","element":"img"}],[{"text":"Denote","element":"span"}],[{"style":{"width":"119%"},"width":1714,"height":101,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-15.png","element":"img"}],[{"text":"One can show that","element":"span"}],[{"style":{"width":"76%"},"width":1875,"height":365,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-16.png","element":"img"}],[{"text":"where ","element":"span"},{"style":{"height":18.29},"width":313.36,"height":45.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-17.png","element":"img","alt":" pg(q) is some k−","inline":true},{"text":"2 degree polynomial in ","element":"span"},{"text":"q ","element":"span"},{"text":"whose coefficient depend ","element":"span"},{"id":"id-40","text":"on ","element":"span"},{"style":{"height":16.4},"width":301.16,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/24-18.png","element":"img","alt":" g and p1 and p2","inline":true},{"text":". We next apply the following claim","element":"span"}],[{"style":{"width":"100%"},"width":1435,"height":180,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-0.png","element":"img"}],[{"text":"Proof Sketch. ","element":"span"},{"text":"We provide a full proof for this claim in appendix ","element":"span"},{"text":"D.1. ","element":"span"},{"text":"In a nutshell, claim ","element":"span"},{"href":"#id-40","text":"1 ","element":"a"},{"text":"follows from the equivalence between norms in finite dimensional spaces. Indeed, the mapping","element":"span"}],[{"style":{"width":"57%"},"width":825,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-1.png","element":"img"}],[{"text":"where ","element":"span"},{"style":{"height":19.28},"width":289.44,"height":48.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-2.png","element":"img","alt":" pa(x) = � aixi ","inline":true,"padRight":true},{"text":"is known to be a non–singular linear transformation induced by the appropriate Vandermonde matrix (specifically. ","element":"span"},{"style":{"height":18.29},"width":219.76,"height":45.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-3.png","element":"img","alt":" Vi,j = ((i −","inline":true,"padRight":true},{"text":"1)","element":"span"},{"style":{"height":19.14},"width":429.48,"height":47.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-4.png","element":"img","alt":"/k))j−1). Letting λmin","inline":true,"padRight":true},{"text":"be the smallest singular value of the matrix ","element":"span"},{"text":"V ","element":"span"},{"text":", we know that ","element":"span"},{"style":{"height":17.6},"width":558.28,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-5.png","element":"img","alt":" ∥V a∥2 ≥ λmin∥a∥2. where a","inline":true,"padRight":true},{"text":"is the vector of coefficients of the polynomial ","element":"span"},{"style":{"height":11.6},"width":53.76,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-6.png","element":"img","alt":" pa.","inline":true}],[{"text":"Finally, we exploit the relation in ","element":"span"},{"style":{"height":19.54},"width":703.69,"height":48.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-7.png","element":"img","alt":" Rk+1: ∥x∥∞ ≤ ∥x∥2 ≤√k + 1∥x∥∞.","inline":true,"padRight":true},{"text":"We can, thus, relate the max norm of the coefficient vector ","element":"span"},{"style":{"height":17.6},"width":283.59,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-8.png","element":"img","alt":" ∥a∥∞ ≥ |a1| to","inline":true,"padRight":true},{"text":"the maximum value max","element":"span"},{"style":{"height":21.58},"width":751.68,"height":53.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-9.png","element":"img","alt":"i∈{0,...,k}� aj(i/k)j = ∥V a∥∞ to obtain","inline":true}],[{"style":{"width":"98%"},"width":1406,"height":107,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-10.png","element":"img"}],[{"text":"It remains only to lower bound the singular values of ","element":"span"},{"text":"V ","element":"span"},{"text":", this is done in the full proof in appendix ","element":"span"},{"text":"D.1.","element":"span"}],[{"text":"With claim ","element":"span"},{"href":"#id-40","text":"1 ","element":"a"},{"text":"in mind we prove the result as follows: First, suppose that for some ","element":"span"},{"style":{"height":16.4},"width":102.84,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-11.png","element":"img","alt":" g ∈ G","inline":true,"padRight":true},{"text":"we have that","element":"span"}],[{"style":{"width":"41%"},"width":594,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-12.png","element":"img"}],[{"text":"In this case, applying claim ","element":"span"},{"href":"#id-40","text":"1 ","element":"a"},{"text":"with ","element":"span"},{"style":{"height":19.04},"width":980.36,"height":47.6,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-13.png","element":"img","alt":" a0 = ∆g0(p1, p2) and a1 = k (∆g0(p1, p2) − ∆g1(p1, p2))","inline":true}],[{"text":"and ","element":"span"},{"style":{"height":13.09},"width":118.24,"height":32.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-14.png","element":"img","alt":" p = pg","inline":true},{"text":", we obtain that there exists a value ","element":"span"},{"text":"q ","element":"span"},{"text":"= ","element":"span"},{"text":"j/k ","element":"span"},{"text":"such that IPM","element":"span"},{"style":{"height":28.83},"width":1463.92,"height":72.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-15.png","element":"img","alt":"G(pq1, pq2) ≥ǫ","inline":true}],[{"style":{"width":"70%"},"width":1009,"height":190,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-16.png","element":"img"}],[{"text":"For any ","element":"span"},{"style":{"height":16.4},"width":102.84,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-17.png","element":"img","alt":" g ∈ G","inline":true},{"text":", by assumption we have that ","element":"span"},{"style":{"height":19.23},"width":604.8,"height":48.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-18.png","element":"img","alt":" |∆g1(p1, p2)| > ǫ, for some g ∈ G.","inline":true,"padRight":true},{"text":"Hence ","element":"span"},{"style":{"height":19.23},"width":324.4,"height":48.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-19.png","element":"img","alt":" |∆g0(p1, p2)| > ǫ/","inline":true},{"text":"2. By definition of ∆","element":"span"},{"style":{"height":8.4},"width":17,"height":21,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-20.png","element":"img","alt":"0","inline":true,"padRight":true},{"text":"we have that for ","element":"span"},{"text":"q ","element":"span"},{"text":"= 0 we ","element":"span"},{"text":"obtain that: IPM","element":"span"},{"style":{"height":20.19},"width":603.36,"height":50.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/25-21.png","element":"img","alt":"G(pq1, pq2) = |E(pq1) − E(pq2)| > ǫ2.","inline":true}]]},{"heading":"C Expressivity – Proofs","paragraphs":[[{"style":{"width":"40%"},"width":580,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-0.png","element":"img"}],[{"text":"Theorem 4. ","element":"span"},{"text":"Let ","element":"span"},{"text":"V ","element":"span"},{"text":"= ","element":"span"},{"text":"N","element":"span"},{"text":". There exists a distinguishing graph class ","element":"span"},{"text":"G","element":"span"},{"text":", with sample complexity ","element":"span"},{"style":{"height":24.58},"width":687.28,"height":61.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-1.png","element":"img","alt":" m(ǫ, δ) = O(log 1/δǫ2 ) (in fact |G| = 1","inline":true},{"text":") such that: for any ","element":"span"},{"text":"1","element":"span"},{"text":"-distinguishing class ","element":"span"},{"text":"D ","element":"span"},{"text":"with finite VC dimension, and every ","element":"span"},{"style":{"height":13.2},"width":276.36,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-2.png","element":"img","alt":" ǫ > 0 there are","inline":true,"padRight":true},{"text":"two distributions ","element":"span"},{"style":{"height":17.6},"width":1087.6,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-3.png","element":"img","alt":" p1, p2 such that IPMD(p1, p2) < ǫ but IPMG(p1, p2) > 1/2","inline":true}],[{"style":{"width":"95%"},"width":1366,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-4.png","element":"img"}],[{"text":"to be a bipartite graph. We thus, divide the vertices into two infinite sets: ","element":"span"},{"style":{"height":14.69},"width":43.88,"height":36.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-5.png","element":"img","alt":" V1","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":14.69},"width":43.88,"height":36.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-6.png","element":"img","alt":" V2","inline":true,"padRight":true},{"text":"the elements of ","element":"span"},{"style":{"height":14.69},"width":43.88,"height":36.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-7.png","element":"img","alt":" V1","inline":true,"padRight":true},{"text":"will be indexed by ","element":"span"},{"style":{"height":17.6},"width":587.36,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-8.png","element":"img","alt":" N i.e. V1 = {v1, v2, · · · } and we","inline":true,"padRight":true},{"text":"index the elements of ","element":"span"},{"style":{"height":14.69},"width":43.88,"height":36.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-9.png","element":"img","alt":" V2","inline":true,"padRight":true},{"text":"with finite subsets of ","element":"span"},{"style":{"height":17.6},"width":584.64,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-10.png","element":"img","alt":" N V2 = {vA : A ⊆ N, |A| < ∞}.","inline":true,"padRight":true},{"text":"Next we define ","element":"span"},{"text":"g ","element":"span"},{"text":"so that an edge passes between ","element":"span"},{"style":{"height":15.09},"width":464.8,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-11.png","element":"img","alt":" vi ∈ V1 and vA ∈ V2 iff","inline":true},{"style":{"height":13.6},"width":113.28,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-12.png","element":"img","alt":"i ∈ A.","inline":true}],[{"text":"Let ","element":"span"},{"text":"D ","element":"span"},{"text":"be a distinguishing class with finite sample complexity, in particular gVC(","element":"span"},{"style":{"height":17.6},"width":156.32,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-13.png","element":"img","alt":"D) < ∞","inline":true},{"text":". Denote gVC(","element":"span"},{"style":{"height":17.6},"width":298.28,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-14.png","element":"img","alt":"D) = ρ. Let D1","inline":true,"padRight":true},{"text":"be the restriction of ","element":"span"},{"text":"D ","element":"span"},{"text":"to ","element":"span"},{"style":{"height":14.69},"width":43.88,"height":36.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-15.png","element":"img","alt":"V1","inline":true},{"text":": Note that gVC(","element":"span"},{"style":{"height":17.6},"width":162.24,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-16.png","element":"img","alt":"D1) ≤ ρ.","inline":true}],[{"id":"id-41","text":"Next we make the following claim:","element":"span"}],[{"style":{"width":"98%"},"width":1406,"height":222,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-17.png","element":"img"}],[{"text":"Proof. ","element":"span"},{"text":"To construct two such distributions, choose a set ","element":"span"},{"style":{"height":15.09},"width":332.76,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-18.png","element":"img","alt":" S ⊆ V1 of size m","inline":true,"padRight":true},{"text":"large enough (to be determined later). Then, randomly choose two samples ","element":"span"},{"style":{"height":15.09},"width":349.16,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-19.png","element":"img","alt":"S1 and S2 out of S","inline":true,"padRight":true},{"text":"(uniformly), each of size ","element":"span"},{"style":{"height":19.81},"width":85.09,"height":49.52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-20.png","element":"img","alt":" O( ρǫ2","inline":true},{"text":"). Then, by theorem ","element":"span"},{"href":"#id-19","text":"2 ","element":"a"},{"text":"with ","element":"span"},{"text":"some constant probability we have that IPM","element":"span"},{"style":{"height":17.62},"width":290.32,"height":44.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-21.png","element":"img","alt":"D(pS1, pS) < ǫ/","inline":true},{"text":"2 and similarly IPM","element":"span"},{"style":{"height":17.62},"width":287.92,"height":44.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-22.png","element":"img","alt":"D(pS, pS2) < ǫ/","inline":true},{"text":"2 . Taken together we obtain that IPM","element":"span"},{"style":{"height":17.62},"width":287.52,"height":44.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-23.png","element":"img","alt":"G(pS1, pS2) < ǫ.","inline":true}],[{"text":"Also, if ","element":"span"},{"text":"S ","element":"span"},{"text":"is sufficiently large (say, of order ","element":"span"},{"style":{"height":25.22},"width":88.44,"height":63.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-24.png","element":"img","alt":" O(ρ2ǫ4","inline":true,"padRight":true},{"text":")), we would have that ","element":"span"},{"text":"w.h.p ","element":"span"},{"style":{"height":16.29},"width":219.76,"height":40.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-25.png","element":"img","alt":" S1 ∩ S2 = ∅","inline":true},{"text":". Thus, let ","element":"span"},{"style":{"height":16.81},"width":427.2,"height":42.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-26.png","element":"img","alt":" q1 = pS1 and q2 = pS2.","inline":true}],[{"text":"With claim ","element":"span"},{"href":"#id-41","text":"2, ","element":"a"},{"text":"we proceed with the proof. Let ","element":"span"},{"style":{"height":16},"width":171.08,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-27.png","element":"img","alt":" q1 and q2","inline":true,"padRight":true},{"text":"be as in claim ","element":"span"},{"href":"#id-41","text":"2. ","element":"a"},{"text":"Let ","element":"span"},{"text":"A ","element":"span"},{"text":"be the support of ","element":"span"},{"style":{"height":11.6},"width":36.68,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-28.png","element":"img","alt":" q1","inline":true},{"text":", and define ","element":"span"},{"style":{"height":11.6},"width":39.08,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-29.png","element":"img","alt":" p1","inline":true,"padRight":true},{"text":"to be a distribution ","element":"span"},{"style":{"height":21.27},"width":278.6,"height":53.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-30.png","element":"img","alt":" p1 = 12δA + 12q1","inline":true,"padRight":true},{"text":"and similarly we define ","element":"span"},{"style":{"height":21.27},"width":288.68,"height":53.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-31.png","element":"img","alt":" p2 = 12δA + 12q2","inline":true},{"text":". We then have","element":"span"}],[{"style":{"width":"41%"},"width":596,"height":248,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/26-32.png","element":"img"}],[{"text":"On the other hand, note that for ","element":"span"},{"style":{"height":11.6},"width":39.08,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-0.png","element":"img","alt":" p1","inline":true,"padRight":true},{"text":"the probability to draw an edge from ","element":"span"},{"text":"g ","element":"span"},{"text":"is at least 1","element":"span"},{"text":"/","element":"span"},{"text":"2 (indeed if ","element":"span"},{"style":{"height":16.8},"width":407.08,"height":42,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-1.png","element":"img","alt":" v1 = vA and v2 ̸= vA","inline":true,"padRight":true},{"text":"drawn from ","element":"span"},{"style":{"height":16},"width":139.2,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-2.png","element":"img","alt":" q1 then","inline":true},{"style":{"height":17.6},"width":137,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-3.png","element":"img","alt":"g(v1, v2","inline":true},{"text":") = 1. On the other hand, the probability to draw an edge from ","element":"span"},{"style":{"height":11.6},"width":39.08,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-4.png","element":"img","alt":" p2","inline":true,"padRight":true},{"text":"is 0. It follows that IPM","element":"span"},{"style":{"height":35.41},"width":260.64,"height":88.52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-5.png","element":"img","alt":"G(p1, p2) > 12.","inline":true}],[{"id":"id-25","style":{"width":"40%"},"width":579,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-6.png","element":"img"}],[{"text":"Theorem 5. ","element":"span"},{"text":"Let ","element":"span"},{"text":"V ","element":"span"},{"text":"= ","element":"span"},{"text":"N","element":"span"},{"text":". There exists a ","element":"span"},{"text":"k","element":"span"},{"text":"-distinguishing class ","element":"span"},{"style":{"height":15.28},"width":43.92,"height":38.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-7.png","element":"img","alt":" Gk","inline":true},{"text":", with sample complexity ","element":"span"},{"style":{"height":25.98},"width":427.4,"height":64.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-8.png","element":"img","alt":" m(ǫ, δ) = O(k2+log 1/δǫ2 )","inline":true,"padRight":true},{"text":"such that: For any ","element":"span"},{"style":{"height":12.8},"width":83.92,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-9.png","element":"img","alt":" k−1","inline":true},{"text":"-distinguishing class ","element":"span"},{"style":{"height":15.28},"width":88.04,"height":38.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-10.png","element":"img","alt":" Gk−1","inline":true,"padRight":true},{"text":"with bounded sample complexity, and every ","element":"span"},{"style":{"height":12.4},"width":105.04,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-11.png","element":"img","alt":" ǫ > 0","inline":true,"padRight":true},{"text":"there are two distributions ","element":"span"},{"style":{"height":18.19},"width":1177,"height":45.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-12.png","element":"img","alt":" p1, p2 such that IPMGk−1(p1, p2) < ǫ and IPMGk(p1, p2) > 1/4.","inline":true}],[{"style":{"width":"95%"},"width":1365,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-13.png","element":"img"}],[{"text":"vertices into two infinite sets ","element":"span"},{"style":{"height":15.09},"width":191.72,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-14.png","element":"img","alt":" V1 and V2","inline":true},{"text":". Again, the elements of ","element":"span"},{"style":{"height":15.09},"width":191.36,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-15.png","element":"img","alt":" V1 will be","inline":true,"padRight":true},{"text":"indexed by ","element":"span"},{"text":"N","element":"span"},{"text":", and the elements of ","element":"span"},{"style":{"height":14.69},"width":43.88,"height":36.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-16.png","element":"img","alt":" V2","inline":true,"padRight":true},{"text":"are indexed by finite subsets of ","element":"span"},{"text":"N","element":"span"},{"text":". ","element":"span"},{"style":{"height":17.6},"width":544.8,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-17.png","element":"img","alt":"V2 = {vA : A ⊆ N, |A| < ∞}.","inline":true}],[{"text":"We define the hyper graph ","element":"span"},{"style":{"height":12},"width":38.64,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-18.png","element":"img","alt":" gk","inline":true,"padRight":true},{"text":"to be a (undirected) graph that contains a hyperedge (","element":"span"},{"style":{"height":18.19},"width":874.08,"height":45.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-19.png","element":"img","alt":"vi1, . . . , vik−1, vA) whenever {i1 . . . , ik−1} ⊆ A.","inline":true}],[{"text":"Next, as before we construct two distributions with distinct support such that IPM","element":"span"},{"style":{"height":17.6},"width":235.92,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-20.png","element":"img","alt":"G(p1, p2) ≤ ǫ","inline":true},{"text":". This is done similar to the proof of theorem ","element":"span"},{"href":"#id-24","text":"4. ","element":"a"},{"text":"Specifically:","element":"span"}],[{"id":"id-42","style":{"height":14.8},"width":495.76,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-21.png","element":"img","alt":"Claim 3. Let G be a k −1","inline":true},{"text":"-distinguishing class defined on ","element":"span"},{"style":{"height":14.69},"width":43.88,"height":36.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-22.png","element":"img","alt":" V1","inline":true},{"text":". There are two distributions, ","element":"span"},{"style":{"height":16},"width":175.4,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-23.png","element":"img","alt":" q1 and q2","inline":true},{"text":", supported on ","element":"span"},{"style":{"height":15.09},"width":192.68,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-24.png","element":"img","alt":" V1 so that","inline":true}],[{"style":{"width":"61%"},"width":876,"height":141,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-25.png","element":"img"}],[{"text":"The proof is a repetition of the proof of claim ","element":"span"},{"href":"#id-41","text":"2, ","element":"a"},{"text":"where we draw ","element":"span"},{"style":{"height":15.09},"width":130.56,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-26.png","element":"img","alt":" S1 and","inline":true},{"style":{"height":15.09},"width":43.88,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-27.png","element":"img","alt":"S2","inline":true,"padRight":true},{"text":"to be order of ","element":"span"},{"style":{"height":25.41},"width":108.2,"height":63.52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-28.png","element":"img","alt":" O(k2ρǫ2","inline":true,"padRight":true},{"text":"), and again invoke theorem ","element":"span"},{"href":"#id-19","text":"2.","element":"a"}],[{"text":"As before, then, given a class ","element":"span"},{"style":{"height":14.8},"width":165.04,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-29.png","element":"img","alt":" G of k −","inline":true,"padRight":true},{"text":"1–hypergraphs we take two distributions ","element":"span"},{"style":{"height":16},"width":183.08,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-30.png","element":"img","alt":" q1 and q2","inline":true,"padRight":true},{"text":"as in claim ","element":"span"},{"href":"#id-42","text":"3 ","element":"a"},{"text":"and if ","element":"span"},{"text":"A ","element":"span"},{"text":"is the support of ","element":"span"},{"style":{"height":16},"width":218.72,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-31.png","element":"img","alt":" q1, we take","inline":true},{"style":{"height":21.26},"width":1025.48,"height":53.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-32.png","element":"img","alt":"p1 = 1kδvA + (1 − 1k)q1 and let p2 = 1kδvA + (1 − 1k)q2","inline":true},{"text":". Then, we can show ","element":"span"},{"text":"that IPM","element":"span"},{"style":{"height":17.6},"width":235.44,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-33.png","element":"img","alt":"G(p1, p2) ≤ ǫ","inline":true},{"text":". On the other hand, the probability to draw an edge from ","element":"span"},{"style":{"height":21.28},"width":488.36,"height":53.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-34.png","element":"img","alt":" gk is k · 1k(1− 1k)k−1 ≥ e−1","inline":true,"padRight":true},{"text":"according to ","element":"span"},{"style":{"height":11.6},"width":39.08,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-35.png","element":"img","alt":" p1","inline":true},{"text":", but the probability to draw ","element":"span"},{"text":"an edge from ","element":"span"},{"style":{"height":15.2},"width":132.48,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/27-36.png","element":"img","alt":" p2 is 0.","inline":true}],[{"id":"id-26","style":{"width":"40%"},"width":579,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-0.png","element":"img"}],[{"text":"Theorem 6. ","element":"span"},{"text":"Let ","element":"span"},{"text":"|V| ","element":"span"},{"text":"= ","element":"span"},{"text":"n","element":"span"},{"text":". There exists a ","element":"span"},{"text":"k","element":"span"},{"text":"-distinguishing class ","element":"span"},{"style":{"height":15.6},"width":160.32,"height":39,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-1.png","element":"img","alt":" Gk, with","inline":true,"padRight":true},{"text":"sample complexity ","element":"span"},{"style":{"height":25.99},"width":756.4,"height":64.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-2.png","element":"img","alt":" m(ǫ, δ) = O(k2+log 1/δǫ2 ) (in fact |G| = 1","inline":true},{"text":") such that: For any ","element":"span"},{"style":{"height":16.4},"width":381.04,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-3.png","element":"img","alt":" ǫ > 0 and any k − 1","inline":true,"padRight":true},{"text":"distinguishing class ","element":"span"},{"style":{"height":16.4},"width":146.48,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-4.png","element":"img","alt":" Gk−1 if:","inline":true}],[{"style":{"width":"27%"},"width":399,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-5.png","element":"img"}],[{"style":{"height":24.26},"width":613,"height":60.64,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-6.png","element":"img","alt":"then gVC(Gk−1) = Ω( ǫ2k2√log n).","inline":true}],[{"text":"The proof is similar to the proof of theorem ","element":"span"},{"href":"#id-43","text":"5. ","element":"a"},{"text":"For simplicity, let us assume that ","element":"span"},{"text":"|V| ","element":"span"},{"text":"= ","element":"span"},{"text":"n","element":"span"},{"text":"+log ","element":"span"},{"text":"n","element":"span"},{"text":". This will not change the results up to constants.","element":"span"}],[{"style":{"width":"95%"},"width":1365,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-7.png","element":"img"}],[{"text":"and ","element":"span"},{"style":{"height":14.69},"width":43.88,"height":36.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-8.png","element":"img","alt":" V2","inline":true},{"text":". We index the elements of ","element":"span"},{"style":{"height":18.48},"width":394,"height":46.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-9.png","element":"img","alt":" V1 as {v1, . . . , vlog n}","inline":true,"padRight":true},{"text":"and we index the elements of ","element":"span"},{"style":{"height":14.69},"width":43.88,"height":36.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-10.png","element":"img","alt":" V2","inline":true,"padRight":true},{"text":"with subsets of [log ","element":"span"},{"text":"n","element":"span"},{"text":"]. ","element":"span"},{"text":"We then consider a graph ","element":"span"},{"text":"g ","element":"span"},{"text":"that contains only hyper-edges of the form (","element":"span"},{"style":{"height":18},"width":677.48,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-11.png","element":"img","alt":"vi1, . . . , vk−1, vA) iff {i1, . . . , ik−1} ∈","inline":true,"padRight":true},{"text":"A","element":"span"},{"text":".","element":"span"}],[{"style":{"width":"95%"},"width":1364,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-12.png","element":"img"}],[{"style":{"height":31.6},"width":349.05,"height":79,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-13.png","element":"img","alt":"m(ǫ, δ) = O�ρk2ǫ2�","inline":true},{"text":"be an upper bound on the sample complexity of classes of graph VC dimension ","element":"span"},{"style":{"height":12},"width":34.56,"height":30,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-14.png","element":"img","alt":" ρ.","inline":true}],[{"style":{"width":"95%"},"width":1365,"height":47,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-15.png","element":"img"}],[{"text":"butions ","element":"span"},{"style":{"height":17.6},"width":301.52,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-16.png","element":"img","alt":" q1, q2 over [log n","inline":true},{"text":"], with disjoint support such that IPM","element":"span"},{"style":{"height":18.19},"width":258.72,"height":45.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-17.png","element":"img","alt":"Gk(q1, q2) < ǫ.","inline":true,"padRight":true},{"text":"The proof is done as in claim ","element":"span"},{"href":"#id-42","text":"3.","element":"a"}],[{"style":{"width":"95%"},"width":1367,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-18.png","element":"img"}],[{"style":{"height":17.6},"width":565.92,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-19.png","element":"img","alt":"{1, . . . , log n} of size m(ǫ/8, 0.","inline":true},{"text":"99). One can show that w.p 1","element":"span"},{"text":"/","element":"span"},{"text":"4 we have that ","element":"span"},{"style":{"height":15.09},"width":141.8,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-20.png","element":"img","alt":"S1 ∩ S2","inline":true,"padRight":true},{"text":"are distinct, also we have w.p 0","element":"span"},{"text":".","element":"span"},{"text":"98 that IPM","element":"span"},{"style":{"height":17.62},"width":401.28,"height":44.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-21.png","element":"img","alt":"G(pS, pS1) < ǫ/8 and","inline":true,"padRight":true},{"text":"similarly IPM","element":"span"},{"style":{"height":17.62},"width":282.64,"height":44.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-22.png","element":"img","alt":"G(pS, pS2) < ǫ/","inline":true},{"text":"8. Taken together we obtain that with positive probability ","element":"span"},{"style":{"height":16.82},"width":409.56,"height":42.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-23.png","element":"img","alt":" q1 = pS1 and q2 = pS2","inline":true,"padRight":true},{"text":"have disjoint support and IPM","element":"span"},{"style":{"height":17.6},"width":200.08,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-24.png","element":"img","alt":"G(q1, q2) <","inline":true}],[{"style":{"width":"99%"},"width":1431,"height":82,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-25.png","element":"img"}],[{"style":{"height":21.27},"width":428.36,"height":53.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-26.png","element":"img","alt":"p1 = 1kδvA + (1 − 1k)q1","inline":true,"padRight":true},{"text":"and similarly ","element":"span"},{"style":{"height":21.27},"width":411.56,"height":53.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-27.png","element":"img","alt":" p2 = 1kδvA + (1 − 1kq2","inline":true},{"text":". One can show ","element":"span"},{"text":"that IPM","element":"span"},{"style":{"height":19.15},"width":239.2,"height":47.88,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-28.png","element":"img","alt":"G(p1, p2) < ǫ4 ","inline":true,"padRight":true},{"text":"but the probability to draw an edge from ","element":"span"},{"text":"g ","element":"span"},{"text":"according ","element":"span"},{"text":"to ","element":"span"},{"style":{"height":11.6},"width":36.68,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-29.png","element":"img","alt":" q1","inline":true,"padRight":true},{"text":"is at least 1","element":"span"},{"text":"/","element":"span"},{"text":"4, while it equals 0 if we draw edges according to ","element":"span"},{"style":{"height":11.6},"width":52.8,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-30.png","element":"img","alt":" p2.","inline":true}],[{"style":{"width":"94%"},"width":1362,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-31.png","element":"img"}],[{"style":{"height":16.29},"width":124.2,"height":40.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-32.png","element":"img","alt":"ǫIPMG","inline":true},{"text":". In other words, if IPM","element":"span"},{"style":{"height":19.73},"width":778.56,"height":49.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-33.png","element":"img","alt":"Gk ≻ ǫ · IPMG then log n ≤ m2(ǫ/8, 0.99).","inline":true}],[{"style":{"width":"26%"},"width":385,"height":109,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/28-34.png","element":"img"}],[{"id":"id-28","style":{"width":"40%"},"width":581,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-0.png","element":"img"}],[{"text":"Theorem 7. ","element":"span"},{"text":"Let ","element":"span"},{"text":"|V| ","element":"span"},{"text":"= ","element":"span"},{"text":"n","element":"span"},{"text":". There exists a ","element":"span"},{"text":"2","element":"span"},{"text":"-distinguishing class ","element":"span"},{"text":"G","element":"span"},{"text":", with sample complexity ","element":"span"},{"style":{"height":24.77},"width":680.56,"height":61.92,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-1.png","element":"img","alt":" m(ǫ, δ) = O(log 1/δǫ2 ) (in fact |G| = 1","inline":true},{"text":") such that: For any ","element":"span"},{"style":{"height":12.4},"width":97.84,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-2.png","element":"img","alt":"ǫ > 0","inline":true,"padRight":true},{"text":"and any distinguishing class ","element":"span"},{"text":"D ","element":"span"},{"text":"if:","element":"span"}],[{"style":{"width":"22%"},"width":327,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-3.png","element":"img"}],[{"style":{"height":20.22},"width":521.32,"height":50.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-4.png","element":"img","alt":"then gVC(D) = ˜Ω(ǫ2 log n).","inline":true}],[{"text":"The proof is similar to the proof of theorem ","element":"span"},{"href":"#id-24","text":"4 ","element":"a"},{"text":"but we will use an improved upper bound on the size of ","element":"span"},{"text":"S ","element":"span"},{"text":"which we next state (see appendix ","element":"span"},{"href":"#id-44","text":"C.5 ","element":"a"},{"text":"for a proof):","element":"span"}],[{"id":"id-45","text":"Lemma 3. ","element":"span"},{"text":"Let ","element":"span"},{"text":"D ","element":"span"},{"text":"be a class with ","element":"span"},{"text":"gVC(","element":"span"},{"style":{"height":17.6},"width":133.4,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-5.png","element":"img","alt":"D) = ρ","inline":true,"padRight":true},{"text":"over a domain ","element":"span"},{"text":"S","element":"span"},{"text":". There exists a constant ","element":"span"},{"text":"c > ","element":"span"},{"text":"0 ","element":"span"},{"text":"(independent of ","element":"span"},{"text":"D ","element":"span"},{"text":"and ","element":"span"},{"text":"d","element":"span"},{"text":") such that if ","element":"span"},{"style":{"height":22.05},"width":401.8,"height":55.12,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-6.png","element":"img","alt":" |S| > c· dǫ2 log2(d/ǫ2),","inline":true,"padRight":true},{"text":"Then there are two distributions ","element":"span"},{"style":{"height":16},"width":175.4,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-7.png","element":"img","alt":" q1 and q2","inline":true},{"text":", supported on ","element":"span"},{"text":"S ","element":"span"},{"text":"such that:","element":"span"}],[{"text":"1. ","element":"span"},{"style":{"height":16},"width":188.84,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-8.png","element":"img","alt":" q1, and q2","inline":true,"padRight":true},{"text":"have disjoint support.","element":"span"}],[{"style":{"height":17.6},"width":378.48,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-9.png","element":"img","alt":"2. IPMD(q1, q2) < ǫ","inline":true}],[{"text":"The graph ","element":"span"},{"text":"g ","element":"span"},{"text":"is constructed as in theorem ","element":"span"},{"href":"#id-24","text":"4. ","element":"a"},{"text":"Let ","element":"span"},{"text":"V ","element":"span"},{"text":"be a set of vertices of size ","element":"span"},{"style":{"height":16.4},"width":300.68,"height":41,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-10.png","element":"img","alt":" n + log n, let V1","inline":true,"padRight":true},{"text":"be a set of size log ","element":"span"},{"text":"n ","element":"span"},{"text":"and we index its elements with ","element":"span"},{"style":{"height":18.48},"width":640.52,"height":46.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-11.png","element":"img","alt":"{v1, . . . , v2, . . . , vlog n}. We let V2","inline":true,"padRight":true},{"text":"include all other elements and we index them via subsets of [log ","element":"span"},{"text":"n","element":"span"},{"text":"]. The graph is again constructed so that ","element":"span"},{"style":{"height":14.69},"width":145.64,"height":36.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-12.png","element":"img","alt":" vA ∈ V2","inline":true,"padRight":true},{"text":"has an edge to ","element":"span"},{"style":{"height":15.49},"width":321,"height":38.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-13.png","element":"img","alt":" vi ∈ V1 iff i ∈ A","inline":true},{"text":". As before, we make the graph bipartite, i.e. both ","element":"span"},{"style":{"height":15.09},"width":188.84,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-14.png","element":"img","alt":" V1 and V2","inline":true,"padRight":true},{"text":"are independent sets.","element":"span"}],[{"text":"Now suppose log ","element":"span"},{"style":{"height":22.05},"width":266.52,"height":55.12,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-15.png","element":"img","alt":" n ≥ c ρǫ2 log2 dǫ2","inline":true},{"text":". By lemma ","element":"span"},{"href":"#id-45","text":"3 ","element":"a"},{"text":"we have that there exists a ","element":"span"},{"text":"set ","element":"span"},{"style":{"height":17.6},"width":344.08,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-16.png","element":"img","alt":" A ⊆ {1, . . . , log n}","inline":true},{"text":", a distribution ","element":"span"},{"style":{"height":16},"width":361.64,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-17.png","element":"img","alt":" p1 and p2 where p1","inline":true,"padRight":true},{"text":"is supported on ","element":"span"},{"text":"A ","element":"span"},{"text":"and ","element":"span"},{"style":{"height":11.6},"width":39.08,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-18.png","element":"img","alt":" p2","inline":true,"padRight":true},{"text":"is supported on its compelement so that IPM","element":"span"},{"style":{"height":17.6},"width":234.96,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-19.png","element":"img","alt":"G(p1, p2) < ǫ","inline":true},{"text":". As before we construct ","element":"span"},{"style":{"height":17.6},"width":868.04,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-20.png","element":"img","alt":" q1 = δvA + (1 − δ)p1 and q2 = δvA + (1 − δ)p2","inline":true},{"text":". One can verify that IPM","element":"span"},{"style":{"height":21.71},"width":689,"height":54.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-21.png","element":"img","alt":"G(q1, q2) < ǫ but IPMGk+1(q1, q2) > 12","inline":true},{"text":". Thus, if IPM","element":"span"},{"style":{"height":18.59},"width":291,"height":46.48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-22.png","element":"img","alt":"Gk ≻ ǫ·IPMGk+1","inline":true,"padRight":true},{"text":"then log ","element":"span"},{"style":{"height":22.55},"width":748.32,"height":56.36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-23.png","element":"img","alt":" n ≤ c ρǫ2 log2 dǫ2. In turn d = ˜Ω(ǫ2 log n).","inline":true}],[{"id":"id-44","style":{"width":"99%"},"width":1433,"height":263,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/29-24.png","element":"img"}],[{"text":"D} ","element":"span"},{"text":"and denote ","element":"span"},{"style":{"height":23.9},"width":251.88,"height":59.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-0.png","element":"img","alt":" H = H 2ǫ2 ln |S|","inline":true},{"text":". Note that","element":"span"}],[{"style":{"width":"62%"},"width":896,"height":295,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-1.png","element":"img"}],[{"text":"It thus follows that there exists ","element":"span"},{"style":{"height":17.6},"width":259.72,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-2.png","element":"img","alt":" f /∈ H. Let f","inline":true,"padRight":true},{"text":"be such and define a matrix","element":"span"}],[{"style":{"width":"65%"},"width":944,"height":213,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-3.png","element":"img"}],[{"text":"Now suppose that for some distribution ","element":"span"},{"text":"q ","element":"span"},{"text":"over ","element":"span"},{"text":"S","element":"span"},{"text":", for every ","element":"span"},{"text":"d ","element":"span"},{"text":"we have that ","element":"span"},{"style":{"height":21.27},"width":526.73,"height":53.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-4.png","element":"img","alt":"Ev∼q[d(v) = f(v)] < 12 + 1ǫ","inline":true},{"text":". Then, defining ","element":"span"},{"style":{"height":17.6},"width":561.52,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-5.png","element":"img","alt":" q1 = q(·|f(v) = 0) and q2 =","inline":true},{"style":{"height":17.6},"width":126.12,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-6.png","element":"img","alt":"q(·|f(v","inline":true},{"text":") = 1) yields the desired result. Indeed,","element":"span"}],[{"style":{"width":"76%"},"width":1862,"height":481,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-7.png","element":"img"}],[{"text":"We now wish to prove that indeed, such a ","element":"span"},{"text":"q ","element":"span"},{"text":"exists. Suppose, otherwise: That for any distribution ","element":"span"},{"text":"q ","element":"span"},{"text":"over ","element":"span"},{"text":"S ","element":"span"},{"text":"we can find ","element":"span"},{"style":{"height":18.29},"width":482.8,"height":45.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-8.png","element":"img","alt":" d such that Ev∼q[d(v) =","inline":true},{"style":{"height":21.27},"width":247.36,"height":53.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-9.png","element":"img","alt":"f(v)] > 12 + 1ǫ","inline":true},{"text":". This can be rephrased in terms of a value of a minimax game ","element":"span"},{"text":"as follows:","element":"span"}],[{"style":{"width":"33%"},"width":483,"height":98,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-10.png","element":"img"}],[{"text":"Where ∆(","element":"span"},{"text":"S","element":"span"},{"text":") denotes the set of distributions over ","element":"span"},{"text":"S","element":"span"},{"text":". It is well known (","element":"span"},{"href":"#id-46","referenceIndex":16,"text":"[16]","element":"a"},{"text":", thm 2), that for any game defined by any matrix ","element":"span"},{"text":"M ","element":"span"},{"text":"with ","element":"span"},{"text":"c ","element":"span"},{"text":"columns, there exists a strategy for the row player that chooses uniformly from a multiset of ","element":"span"},{"style":{"height":22.24},"width":48.6,"height":55.6,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-11.png","element":"img","alt":"ln c2ǫ2","inline":true,"padRight":true},{"text":"and achieves ","element":"span"},{"style":{"height":8},"width":18,"height":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/30-12.png","element":"img","alt":" ǫ","inline":true},{"text":"-optimiality.","element":"span"}],[{"style":{"width":"99%"},"width":1430,"height":293,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-0.png","element":"img"}],[{"text":"this contradicts the fact that ","element":"span"},{"style":{"height":17.6},"width":129.12,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-1.png","element":"img","alt":" f /∈ H.","inline":true}],[{"style":{"width":"99%"},"width":1433,"height":92,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-2.png","element":"img"}]]},{"heading":"D Additional Proofs","paragraphs":[[{"style":{"width":"35%"},"width":512,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-3.png","element":"img"}],[{"text":"Consider the Vandermonde Matrix ","element":"span"},{"style":{"height":24.53},"width":755.04,"height":61.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-4.png","element":"img","alt":" V ∈ Mk+1,k+1 given by Vi,j =�i−1k �j−1.","inline":true,"padRight":true},{"text":"Our first step will be to lower bound the smallest singular value of ","element":"span"},{"text":"V ","element":"span"},{"text":". In turn, we will obtain a lower bound on the maximum value over the coordinates of the vector ","element":"span"},{"text":"V ","element":"span"},{"text":"a","element":"span"},{"text":". The proof can then be derived from the identity: (","element":"span"},{"style":{"height":17.6},"width":139.6,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-5.png","element":"img","alt":"V a)i =","inline":true},{"style":{"height":26.16},"width":299.04,"height":65.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-6.png","element":"img","alt":"�k+1j=1 aj� i−1k �j.","inline":true}],[{"text":"Let ","element":"span"},{"style":{"height":16.48},"width":431.24,"height":41.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-7.png","element":"img","alt":" λ1 ≤ λ2 ≤ . . . ≤ λk+1","inline":true,"padRight":true},{"text":"be the singular values of ","element":"span"},{"text":"V ","element":"span"},{"text":". To bound the smallest singular value, ","element":"span"},{"style":{"height":15.09},"width":42.44,"height":37.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-8.png","element":"img","alt":" λ1","inline":true},{"text":", we first observe that ","element":"span"},{"style":{"height":16.48},"width":87.56,"height":41.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-9.png","element":"img","alt":" λk+1","inline":true},{"text":"– the highest singular value is bounded by ","element":"span"},{"text":"k ","element":"span"},{"text":"+ 1. To see that ","element":"span"},{"style":{"height":16.48},"width":173.72,"height":41.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-10.png","element":"img","alt":" λk+1 ≤ k","inline":true,"padRight":true},{"text":"+ 1, observe that for any vector ","element":"span"},{"style":{"height":17.6},"width":114.16,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-11.png","element":"img","alt":" ∥a∥ ≤","inline":true,"padRight":true},{"text":"1 we have that","element":"span"}],[{"style":{"width":"47%"},"width":685,"height":46,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-12.png","element":"img"}],[{"text":"Next, using the formula for the determinant of a Vandermonde matrix, and the relation det(","element":"span"},{"style":{"height":17.74},"width":196.32,"height":44.36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-13.png","element":"img","alt":"V ) = � λi","inline":true},{"text":", we obtain:","element":"span"}],[{"style":{"width":"34%"},"width":500,"height":350,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/31-14.png","element":"img"}],[{"text":"Taken together we obtain","element":"span"}],[{"style":{"width":"38%"},"width":554,"height":496,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/32-0.png","element":"img"}],[{"text":"Finally, for any polynomial ","element":"span"},{"style":{"height":19.09},"width":203.04,"height":47.72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/32-1.png","element":"img","alt":" p = � aiqi ","inline":true,"padRight":true},{"text":"with coefficient ","element":"span"},{"style":{"height":17.6},"width":65.76,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/32-2.png","element":"img","alt":" |a1|","inline":true,"padRight":true},{"text":"we have that ","element":"span"},{"style":{"height":17.6},"width":211.2,"height":44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/32-3.png","element":"img","alt":"∥a∥2 ≥ |a1|","inline":true},{"text":". We thus obtain,","element":"span"}],[{"style":{"width":"45%"},"width":646,"height":508,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1906.00264/images/32-4.png","element":"img"}]]}],"_version":"3.3.2"},"paperNode":"$1b:props:children:props:children:0:props:product"}]]]}]}]