1b:["$","$L29",null,{"isWhiteLabelled":false,"children":["$","$Lb",null,{"pt":{"compact":0,"expanded":3},"children":[["$","$L2a",null,{"noStar":true,"publisher":true,"task":true,"params":true,"size":"xl","product":{"id":"eyJwYXBlcklEIjoiMTkwMy4wMTA4MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","updated":"2019-11-21T08:32:27.000Z","paperID":"1903.01083","published":"2019-03-04T05:56:20.000Z","authors":"[\"Shuai Li\",\"Wei Chen\",\"Zheng Wen\",\"Kwong-Sak Leung\"]","title":"Stochastic Online Learning with Probabilistic Graph Feedback","scoreTrending":null,"summary":"$2b","lastCheckedForCode":"2022-09-05T22:03:42.795Z","links":[{"id":"eyJ1cmwiOiJodHRwczovL3BhcGVyc3dpdGhjb2RlLmNvbS9wYXBlci9zdG9jaGFzdGljLW9ubGluZS1sZWFybmluZy13aXRoLXByb2JhYmlsaXN0aWMifQ==","type":"pwc","url":"https://paperswithcode.com/paper/stochastic-online-learning-with-probabilistic","data":null}],"reposConnection":{"edges":[]},"models":[],"tags":[],"summaries":[],"emailsConnection":{"edges":[{"author":"wei chen","node":{"id":"eyJhZGRyZXNzIjoid2VpY0BtaWNyb3NvZnQuY29tIn0=","address":"weic@microsoft.com","name":"Wei Chen","avatar":"https://img.fullcontact.com/static/2f80c80e777eb2d703f27883ce835477_39ea71a872dbcdb18f591eddd8fc384a719e0c54c35509b6a8a8668820e29d50","linkedin":null,"bio":null,"site":null,"override":null,"membership":[{"name":"Microsoft"}],"paper":[{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}}],"github":[{"avatar":"https://avatars.githubusercontent.com/u/20201601?v=4","username":"jiawei-chen"},{"avatar":"https://avatars.githubusercontent.com/u/17846414?v=4","username":"GongweiChen"}],"scholar":[{"thirdPartyID":"Qzyy5mcAAAAJ"},{"thirdPartyID":"hlEPkxAAAAAJ"},{"thirdPartyID":"Q9h02J0AAAAJ"},{"thirdPartyID":"skRvzjoAAAAJ"},{"thirdPartyID":"PvICLFYAAAAJ"},{"thirdPartyID":"y0lN_XsAAAAJ"},{"thirdPartyID":"5otfh2EAAAAJ"}],"twitter":[],"location":[],"owner":[{"id":"eyJ1aWQiOiI5NmFlOWU1Yy04NDY0LTQzNDMtOTY1ZS00OGI1ZGZhNGYyYjUifQ==","name":"Wei Chen","github":[],"email":[{"avatar":"https://img.fullcontact.com/static/2f80c80e777eb2d703f27883ce835477_39ea71a872dbcdb18f591eddd8fc384a719e0c54c35509b6a8a8668820e29d50"}],"authored":[{"id":"eyJwYXBlcklEIjoiMTYwMS4wNjU1MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1601.06551"},{"id":"eyJwYXBlcklEIjoiMTQwNy44MzM5IiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1407.8339"},{"id":"eyJwYXBlcklEIjoiMTcwMy4wMTYxMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1703.01610"},{"id":"eyJwYXBlcklEIjoiMTgwMy4wNDYyMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1803.04623"},{"id":"eyJwYXBlcklEIjoiMTYxMC4wNjYwMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1610.06603"},{"id":"eyJwYXBlcklEIjoiMTkwMi4wOTE2MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1902.09162"},{"id":"eyJwYXBlcklEIjoiMTkwMy4wMTA4MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1903.01083"},{"id":"eyJwYXBlcklEIjoiMTgwNS4wMTY4NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1805.01685"},{"id":"eyJwYXBlcklEIjoiMTkwNi4wODY1NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1906.08656"},{"id":"eyJwYXBlcklEIjoiMTkwNi4wMzczNyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1906.03737"},{"id":"eyJwYXBlcklEIjoiMjAxMS4wNjM3OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2011.06378"},{"id":"eyJwYXBlcklEIjoiMjMwMS4xMzM5MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2301.13392"},{"id":"eyJwYXBlcklEIjoiMjExMC4xNTc3MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2110.15771"},{"id":"eyJwYXBlcklEIjoiMjQwNS4wOTIyMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2405.09220"},{"id":"eyJwYXBlcklEIjoiMjIwNi4wMTk5NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2206.01995"},{"id":"eyJwYXBlcklEIjoiMjAwMi4wMzU4MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2002.03580"},{"id":"eyJwYXBlcklEIjoiMjAwNi4xMjc3MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2006.12772"},{"id":"eyJwYXBlcklEIjoiMTgxMS4wNTEzNCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1811.05134"},{"id":"eyJwYXBlcklEIjoiMjIwOC4xNDgzNyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2208.14837"},{"id":"eyJwYXBlcklEIjoiMjQwNi4wMTM4NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2406.01386"},{"id":"eyJwYXBlcklEIjoiMjMwNS4xNjA3NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2305.16074"},{"id":"eyJwYXBlcklEIjoiMjIwNS4xMjQxOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2205.12418"},{"id":"eyJwYXBlcklEIjoiMjIwNi4wNzg4MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2206.07883"},{"id":"eyJwYXBlcklEIjoiMjEwNi4wMzQwMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2106.03403"},{"id":"eyJwYXBlcklEIjoiMjAwNi4wNzkwNSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2006.07905"},{"id":"eyJwYXBlcklEIjoiMTkxMS4wNTk0OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1911.05949"},{"id":"eyJwYXBlcklEIjoiMjQwMi4wMTQwMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2402.01400"},{"id":"eyJwYXBlcklEIjoiMjQwMy4xODEyNyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2403.18127"},{"id":"eyJwYXBlcklEIjoiMjQwMS4wODk2MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2401.08961"},{"id":"eyJwYXBlcklEIjoiMjEwOS4wNjA3NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2109.06077"},{"id":"eyJwYXBlcklEIjoiMjAwNi4xMzQxMSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2006.13411"},{"id":"eyJwYXBlcklEIjoiMTQwMy4wMDU3IiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1403.0057"},{"id":"eyJwYXBlcklEIjoiMjAwNy4wMjczOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2007.02738"},{"id":"eyJwYXBlcklEIjoiMjAxMi4wMzM1NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2012.03354"},{"id":"eyJwYXBlcklEIjoiMjEwMi4xMjA5NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2102.12094"},{"id":"eyJwYXBlcklEIjoiMjEwNi4wNTA2NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2106.05065"},{"id":"eyJwYXBlcklEIjoiMjExMS4wNDI5NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2111.04295"},{"id":"eyJwYXBlcklEIjoiMjIwMi4wNzk5NSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.07995"},{"id":"eyJwYXBlcklEIjoiMjMwMy4xNzExMCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2303.17110"},{"id":"eyJwYXBlcklEIjoiMjMwNi4wNzc2MSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2306.07761"},{"id":"eyJwYXBlcklEIjoiNTUwMTUiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"55015"},{"id":"eyJwYXBlcklEIjoiNTQ0MjQiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"54424"},{"id":"eyJwYXBlcklEIjoiNjk4ODEiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"69881"},{"id":"eyJwYXBlcklEIjoiNzA0NDQiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"70444"}]}]}},{"author":null,"node":{"id":"eyJhZGRyZXNzIjoic2h1YWlsaUBjc2UuY3Voay5lZHUuaGsifQ==","address":"shuaili@cse.cuhk.edu.hk","name":null,"avatar":null,"linkedin":null,"bio":null,"site":null,"override":null,"membership":[],"paper":[{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}}],"github":[],"scholar":[{"thirdPartyID":"Jr-faBMAAAAJ"}],"twitter":[],"location":[],"owner":[{"id":"eyJ1aWQiOiJmNzg0ZDBhOS0yOThiLTQ4MjctODQ4OS03ZDMwNGMzMmE5NTYifQ==","name":"Pengfei Liu","github":[],"email":[],"authored":[{"id":"eyJwYXBlcklEIjoiMTkwMi4wOTE2MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1902.09162"},{"id":"eyJwYXBlcklEIjoiMTcxMS4wODU5NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1711.08594"},{"id":"eyJwYXBlcklEIjoiMTgwNC4xMDQ4OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1804.10488"},{"id":"eyJwYXBlcklEIjoiMTkwMy4wMTA4MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1903.01083"},{"id":"eyJwYXBlcklEIjoiMTgxMC4wMjU2NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1810.02567"},{"id":"eyJwYXBlcklEIjoiMTgxMi4xMTE3OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1812.11178"},{"id":"eyJwYXBlcklEIjoiMjIwNy4wNTEzOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2207.05138"}]}]}},{"author":null,"node":{"id":"eyJhZGRyZXNzIjoia3NsZXVuZ0Bjc2UuY3Voay5lZHUuaGsifQ==","address":"ksleung@cse.cuhk.edu.hk","name":null,"avatar":null,"linkedin":null,"bio":null,"site":null,"override":null,"membership":[],"paper":[{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}}],"github":[],"scholar":[{"thirdPartyID":"Jr-faBMAAAAJ"}],"twitter":[],"location":[],"owner":[{"id":"eyJ1aWQiOiJmNzg0ZDBhOS0yOThiLTQ4MjctODQ4OS03ZDMwNGMzMmE5NTYifQ==","name":"Pengfei Liu","github":[],"email":[],"authored":[{"id":"eyJwYXBlcklEIjoiMTkwMi4wOTE2MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1902.09162"},{"id":"eyJwYXBlcklEIjoiMTcxMS4wODU5NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1711.08594"},{"id":"eyJwYXBlcklEIjoiMTgwNC4xMDQ4OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1804.10488"},{"id":"eyJwYXBlcklEIjoiMTkwMy4wMTA4MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1903.01083"},{"id":"eyJwYXBlcklEIjoiMTgxMC4wMjU2NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1810.02567"},{"id":"eyJwYXBlcklEIjoiMTgxMi4xMTE3OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1812.11178"},{"id":"eyJwYXBlcklEIjoiMjIwNy4wNTEzOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2207.05138"}]}]}},{"author":"zheng wen","node":{"id":"eyJhZGRyZXNzIjoiendlbkBhZG9iZS5jb20ifQ==","address":"zwen@adobe.com","name":"Z. Wen","avatar":null,"linkedin":null,"bio":null,"site":null,"override":null,"membership":[{"name":"Adobe"}],"paper":[{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}},{"modelsAggregate":{"count":0}}],"github":[{"avatar":"https://avatars1.githubusercontent.com/u/22273298?v=4","username":"Caizhengwen123"}],"scholar":[{"thirdPartyID":"kK3qvd8AAAAJ"}],"twitter":[],"location":[],"owner":[{"id":"eyJ1aWQiOiJjMjYzMjFmMy01MDg5LTQyMzQtODNhMy1hODU0ZTQ3ZjRlMTUifQ==","name":"zheng wen","github":[],"email":[],"authored":[{"id":"eyJwYXBlcklEIjoiMTcwMy4wNzYwOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1703.07608"},{"id":"eyJwYXBlcklEIjoiMTUwNy4wNDIwOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1507.04208"},{"id":"eyJwYXBlcklEIjoiMTQxMC4wOTQ5IiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1410.0949"},{"id":"eyJwYXBlcklEIjoiMTQwMy41MDQ1IiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1403.5045"},{"id":"eyJwYXBlcklEIjoiMjEwNy4wODkyNCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2107.08924"},{"id":"eyJwYXBlcklEIjoiMTYwMy4wNTM1OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1603.05359"},{"id":"eyJwYXBlcklEIjoiMTYwOC4wMzAyMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1608.03023"},{"id":"eyJwYXBlcklEIjoiMTcwMy4wNjUxMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1703.06513"},{"id":"eyJwYXBlcklEIjoiMTcxMi4wNDY0NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1712.04644"},{"id":"eyJwYXBlcklEIjoiMjAxMi4wMTc4MCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2012.01780"},{"id":"eyJwYXBlcklEIjoiMTkwMi4wOTMwNSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1902.09305"},{"id":"eyJwYXBlcklEIjoiMjEwMy4wNDA0NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2103.04047"},{"id":"eyJwYXBlcklEIjoiMTMwNy40ODQ3IiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1307.4847"},{"id":"eyJwYXBlcklEIjoiMTgwNS4wOTc5MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1805.09793"},{"id":"eyJwYXBlcklEIjoiMTkwNi4wNTI0NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1906.05247"},{"id":"eyJwYXBlcklEIjoiMTYwNS4wNjU5MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1605.06593"},{"id":"eyJwYXBlcklEIjoiMjEwNy4wOTIyNCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2107.09224"},{"id":"eyJwYXBlcklEIjoiMTcxMS4wNzk3OSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1711.07979"},{"id":"eyJwYXBlcklEIjoiMjAxMi4wMzQwOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2012.03408"},{"id":"eyJwYXBlcklEIjoiMTgwNC4xMDQ4OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1804.10488"},{"id":"eyJwYXBlcklEIjoiMTgwNi4wMDg5MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1806.00892"},{"id":"eyJwYXBlcklEIjoiMjExMC4wNDYyOSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2110.04629"},{"id":"eyJwYXBlcklEIjoiMjEwOC4wNDQ0NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2108.04444"},{"id":"eyJwYXBlcklEIjoiMTkwMy4wMTA4MyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1903.01083"},{"id":"eyJwYXBlcklEIjoiMjEwMy4wNzgzOCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2103.07838"},{"id":"eyJwYXBlcklEIjoiMjAwNy4xNTc4OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2007.15788"},{"id":"eyJwYXBlcklEIjoiMjEwNy4wODcxMiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2107.08712"},{"id":"eyJwYXBlcklEIjoiMjAwNi4wNDA3OCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2006.04078"},{"id":"eyJwYXBlcklEIjoiMTkwMi4wNzIzOSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1902.07239"},{"id":"eyJwYXBlcklEIjoiMjIwMy4wMTMwMyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2203.01303"},{"id":"eyJwYXBlcklEIjoiMjEwNS4wMjY0NiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2105.02646"},{"id":"eyJwYXBlcklEIjoiMjQwNi4wOTU3NCIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2406.09574"},{"id":"eyJwYXBlcklEIjoiMTcwOS4wNzE3MiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1709.07172"},{"id":"eyJwYXBlcklEIjoiMTQxMS4zNjUwIiwicHVibGlzaGVyIjoiYXJ4aXYifQ==","publisher":"arxiv","paperID":"1411.3650"},{"id":"eyJwYXBlcklEIjoiMTcwMS4wODcxNiIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1701.08716"},{"id":"eyJwYXBlcklEIjoiMTgxMS4wMDkxMSIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"1811.00911"},{"id":"eyJwYXBlcklEIjoiMjIwMi4wOTM2NyIsInB1Ymxpc2hlciI6ImFyeGl2In0=","publisher":"arxiv","paperID":"2202.09367"},{"id":"eyJwYXBlcklEIjoiNTM4NjAiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"53860"},{"id":"eyJwYXBlcklEIjoiNzEwMTUiLCJwdWJsaXNoZXIiOiJuZXVyaXBzIn0=","publisher":"neurips","paperID":"71015"}]}]}}]},"__typename":"paper","authorArray":["Shuai Li","Wei Chen","Zheng Wen","Kwong-Sak Leung"]}}],["$","$L18",null,{"container":true,"columns":100,"spacing":{"compact":0,"expanded":2,"large":3},"children":[["$","$L18",null,{"size":{"compact":100,"expanded":100,"large":68},"children":[["$","$7",null,{"children":["$","$L2c",null,{"publisher":"arxiv","paperID":"1903.01083","product":{"paper":"$1b:props:children:props:children:0:props:product","models":"$1b:props:children:props:children:0:props:product:models"},"isWhiteLabelled":false}]}],["$","$7",null,{"children":["$","$L2d",null,{"article":"$L2e","model":"$undefined"}]}]]}],["$","$L18",null,{"size":"grow","children":["$","$L2f",null,{}]}]]}],["$","$7",null,{"children":null}],[["$","audio",null,{"id":"tts"}],["$","$L30",null,{"paperID":"1903.01083","publisher":"arxiv","paperJSON":{"title":"Stochastic Online Learning with Probabilistic Graph Feedback","paperID":"1903.01083","avgLineHeight":10.92,"imgScale":4,"sections":[{"heading":"Abstract","paragraphs":[[{"text":"We consider a problem of stochastic online learning with general probabilistic graph feedback. Two cases are covered. (a) The one-step case where for each edge ","element":"span"},{"text":"(","element":"span"},{"text":"i, j","element":"span"},{"text":") ","element":"span"},{"text":"with probability ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/0-0.png","element":"img","alt":" pij","inline":true,"padRight":true},{"text":"in the probabilistic feedback graph. After playing arm ","element":"span"},{"text":"i ","element":"span"},{"text":"the learner observes a sample reward feedback of arm ","element":"span"},{"text":"j ","element":"span"},{"text":"with independent probability ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/0-1.png","element":"img","alt":" pij","inline":true},{"text":". (b) The cascade case where after playing arm ","element":"span"},{"text":"i ","element":"span"},{"text":"the learner observes feedback of all arms ","element":"span"},{"text":"j ","element":"span"},{"text":"in a probabilistic cascade starting from ","element":"span"},{"text":"i ","element":"span"},{"text":"– for each ","element":"span"},{"text":"(","element":"span"},{"text":"i, j","element":"span"},{"text":") ","element":"span"},{"text":"with probability ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/0-2.png","element":"img","alt":" pij","inline":true},{"text":", if arm ","element":"span"},{"text":"i ","element":"span"},{"text":"is played or observed, then a reward sample of arm ","element":"span"},{"text":"j ","element":"span"},{"text":"would be observed with independent probability ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/0-3.png","element":"img","alt":" pij","inline":true},{"text":". Previous works mainly focus on deterministic graphs which corresponds to one-step case with ","element":"span"},{"style":{"height":16.71},"width":200.96,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/0-4.png","element":"img","alt":" pij ∈ {0, 1}","inline":true},{"text":", an adversarial sequence of graphs with certain topology guarantees or a specific type of random graphs. We analyze the asymptotic lower bounds and design algorithms in both cases. The regret upper bounds of the algorithms match the lower bounds with high probability.","element":"span"}]]},{"heading":"1 Introduction","paragraphs":[[{"text":"Stochastic online learning is a general framework of sequential decision problem. ","element":"span"},{"text":"At each time, the learner selects (or plays) an action from a given finite action set, receives some random reward and observes some random feedback. One simplest, though often unrealistic, feedback model is full-information feedback where the learning agent can observe the random rewards of all actions no matter which action is selected. ","element":"span"},{"text":"Another popular feedback model is bandit feedback where only the random reward of the selected action is revealed to the learner [","element":"span"},{"text":"Auer et al.","element":"span"},{"href":"#id-0","referenceIndex":4,"text":", 2002","element":"a"},{"text":"]. ","element":"span"},{"text":"Recent studies further generalize them to graph feedback where the feedback model is characterized by a (directed) graph [","element":"span"},{"href":"#id-1","referenceIndex":22,"text":"Mannor and Shamir, 2011","element":"a"},{"text":"]. ","element":"span"},{"text":"Each edge ","element":"span"},{"text":"(","element":"span"},{"text":"i, j","element":"span"},{"text":") ","element":"span"},{"text":"means the learner will observe the random reward of action ","element":"span"},{"text":"j ","element":"span"},{"text":"if playing action ","element":"span"},{"text":"i","element":"span"},{"text":". This problem is motivated by advertisements where the response for a vacation advertisement could provide sideinformation for a similar vacation place and social networks where the response from a user to a promotion could infer her neighbors to similar offers.","element":"span"}],[{"text":"The ","element":"span"},{"text":"problem of ","element":"span"},{"text":"online ","element":"span"},{"text":"learning ","element":"span"},{"text":"with ","element":"span"},{"text":"graph ","element":"span"},{"text":"feedback ","element":"span"},{"text":"has ","element":"span"},{"text":"been ","element":"span"},{"text":"intensively ","element":"span"},{"text":"studied ","element":"span"},{"text":"in ","element":"span"},{"text":"both ","element":"span"},{"text":"adversarial ","element":"span"},{"text":"[","element":"span"},{"href":"#id-1","referenceIndex":22,"text":"Mannor and Shamir, 2011","element":"a"},{"text":"; ","element":"span"},{"text":"Alon et al.","element":"span"},{"href":"#id-2","referenceIndex":1,"text":", 2015a","element":"a"},{"text":"; ","element":"span"},{"href":"#id-3","referenceIndex":16,"text":"Koc´ak ","element":"a"},{"text":"et al., 2014","element":"span"},{"text":"; ","element":"span"},{"href":"#id-4","referenceIndex":11,"text":"Cohen ","element":"a"},{"text":"et al., 2016","element":"span"},{"text":"] ","element":"span"},{"text":"and ","element":"span"},{"text":"stochastic ","element":"span"},{"text":"cases ","element":"span"},{"text":"[","element":"span"},{"text":"Caron et al.","element":"span"},{"href":"#id-5","referenceIndex":7,"text":", 2012","element":"a"},{"text":"; ","element":"span"},{"href":"#id-6","referenceIndex":6,"text":"Buccapatnam ","element":"a"},{"text":"et al., 2014","element":"span"},{"text":"; ","element":"span"},{"href":"#id-7","referenceIndex":24,"text":"Tossou ","element":"a"},{"text":"et al., 2017","element":"span"},{"text":"; ","element":"span"},{"text":"Wu et al.","element":"span"},{"href":"#id-8","referenceIndex":30,"text":", 2015","element":"a"},{"text":"]. ","element":"span"},{"text":"However only a few of them do not assume self-loops [","element":"span"},{"text":"Alon et al.","element":"span"},{"href":"#id-2","referenceIndex":1,"text":", 2015a","element":"a"},{"text":"; ","element":"span"},{"text":"Wu et al.","element":"span"},{"href":"#id-8","referenceIndex":30,"text":", 2015","element":"a"},{"text":"] where the reward of the selected action might be invisible. ","element":"span"},{"text":"This general setting would fit into the ","element":"span"},{"text":"partial ","element":"span"},{"text":"monitoring ","element":"span"},{"text":"framework ","element":"span"},{"text":"[","element":"span"},{"href":"#id-9","referenceIndex":5,"text":"Bart´ok ","element":"a"},{"text":"et al., 2014","element":"span"},{"text":"; ","element":"span"},{"href":"#id-10","referenceIndex":18,"text":"Komiyama ","element":"a"},{"text":"et al., 2015","element":"span"},{"text":"] but the literature on the latter mainly focus on finite case where the possible outcomes are finite. We also consider general feedback graphs that do not assume self-loops.","element":"span"}],[{"text":"Though some studies assume feedback graphs could vary over time or even invisible to the learner before selecting actions [","element":"span"},{"href":"#id-3","referenceIndex":16,"text":"Koc´ak ","element":"a"},{"text":"et al., 2014","element":"span"},{"text":"; ","element":"span"},{"href":"#id-7","referenceIndex":24,"text":"Tossou ","element":"a"},{"text":"et al., 2017","element":"span"},{"text":"], most works focus on deterministic graphs or an adversarial list of graphs with certain topology guarantees. To the best of our knowledge, only a few of them work on probabilistic graphs with [","element":"span"},{"href":"#id-11","referenceIndex":17,"text":"Koc´ak ","element":"a"},{"text":"et al., 2016","element":"span"},{"text":"; ","element":"span"},{"text":"Alon et al.","element":"span"},{"href":"#id-12","referenceIndex":3,"text":", 2017","element":"a"},{"text":"] on adversarial case and [","element":"span"},{"text":"Liu et al.","element":"span"},{"href":"#id-13","referenceIndex":21,"text":", 2018","element":"a"},{"text":"] on stochastic case and they only discuss about Erd¨os-R´enyi random graphs [","element":"span"},{"href":"#id-14","referenceIndex":13,"text":"Erd˝os and R´enyi, 1960","element":"a"},{"text":"]. Recall that an Erd¨os-R´enyi graph with parameter ","element":"span"},{"text":"p ","element":"span"},{"text":"is by random sampling the edge of every pair of nodes with probability ","element":"span"},{"text":"p ","element":"span"},{"text":"independently.","element":"span"}],[{"text":"We consider general probabilistic feedback graphs with both the one-step case and the cascade case. The one-step case is the usual one where the learner observes reward of ","element":"span"},{"text":"j ","element":"span"},{"text":"if edge ","element":"span"},{"text":"(","element":"span"},{"text":"i, j","element":"span"},{"text":") ","element":"span"},{"text":"exists in the random graph and ","element":"span"},{"text":"i ","element":"span"},{"text":"is selected. The cascade case means the learner observes reward of ","element":"span"},{"text":"j ","element":"span"},{"text":"if there is a (directed) path from ","element":"span"},{"text":"i ","element":"span"},{"text":"to ","element":"span"},{"text":"j ","element":"span"},{"text":"in the random graph and ","element":"span"},{"text":"i ","element":"span"},{"text":"is selected. The observations of the cascade case, in other words, follow a probabilistic cascading starting from the selected action — for each edge ","element":"span"},{"text":"(","element":"span"},{"text":"i, j","element":"span"},{"text":") ","element":"span"},{"text":"with probability ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/0-5.png","element":"img","alt":" pij","inline":true},{"text":", if action ","element":"span"},{"text":"i ","element":"span"},{"text":"is either played or observed, then with an independent probability ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/0-6.png","element":"img","alt":" pij","inline":true,"padRight":true},{"text":"a random reward sample of action ","element":"span"},{"text":"j ","element":"span"},{"text":"will be observed. As a motivating example, consider the information propagation in social networks. If selecting a user in a social network causes an information cascade in the social network, one may be able to observe further feedback from the cascade users.","element":"span"}],[{"text":"This paper makes three major contributions.","element":"span"}],[{"text":"1. We formalize the setting of stochastic online learning with general probabilistic graph feedback and consider","element":"span"}],[{"style":{"width":"65%"},"width":637,"height":37,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-0.png","element":"img"}],[{"text":"2. We derive asymptotic lower bounds for both the one-step and the cascade cases.","element":"span"}],[{"text":"3. We design algorithms for both the one-step and the cascade cases and analyze their regrets. Their asymptotic regret upper bounds match the asymptotic lower bounds with high probability.","element":"span"}],[{"text":"Related work ","element":"span"},{"text":"The studies on online learning with graph feedback start from adversarial online learning with side observations where a decision maker can observe rewards of other actions as well as observe the reward of the selected action [","element":"span"},{"href":"#id-1","referenceIndex":22,"text":"Mannor and Shamir, 2011","element":"a"},{"text":"]. ","element":"span"},{"text":"The observation structure can be encoded as a directed graph where there is an edge ","element":"span"},{"text":"(","element":"span"},{"text":"i, j","element":"span"},{"text":") ","element":"span"},{"text":"if the reward of action ","element":"span"},{"text":"j ","element":"span"},{"text":"is observed when ","element":"span"},{"text":"i ","element":"span"},{"text":"is selected. Their setting assumes that self-loops exist on every node. ","element":"span"},{"text":"Alon ","element":"span"},{"href":"#id-2","referenceIndex":1,"text":"et al. ","element":"a"},{"text":"[","element":"span"},{"href":"#id-2","referenceIndex":1,"text":"2015a","element":"a"},{"text":"] then generalize to arbitrary directed graphs as long as each action is observable by selecting some action. They show the structure of feedback graph controls the inherent difficulty of the learning problem and present a classification over graphs. These works assume the feedback graph is fixed over time and known to the learner. A follow-up [","element":"span"},{"text":"Alon et al.","element":"span"},{"href":"#id-15","referenceIndex":2,"text":", 2015b","element":"a"},{"text":"] extends to time-varying feedback graphs where the graphs are revealed either at the beginning of the round or at the end of the round but assumes good topology properties on the graphs. ","element":"span"},{"href":"#id-3","referenceIndex":16,"text":"Koc´ak ","element":"a"},{"text":"et al. ","element":"span"},{"text":"[","element":"span"},{"href":"#id-3","referenceIndex":16,"text":"2014","element":"a"},{"text":"] also allow the feedback graph to vary over time and can be revealed to the learner at the end of the round, but assume the existence of self-loops. ","element":"span"},{"href":"#id-4","referenceIndex":11,"text":"Cohen ","element":"a"},{"text":"et al. ","element":"span"},{"text":"[","element":"span"},{"href":"#id-4","referenceIndex":11,"text":"2016","element":"a"},{"text":"] assume the graph is not revealed in both adversarial and stochastic cases but also assume the existence of self-loops.","element":"span"}],[{"text":"Besides the last one, there are also other works on the stochastic case. ","element":"span"},{"href":"#id-5","referenceIndex":7,"text":"Caron ","element":"a"},{"text":"et al. ","element":"span"},{"text":"[","element":"span"},{"href":"#id-5","referenceIndex":7,"text":"2012","element":"a"},{"text":"] first study the stochastic case with side observations and design UCB-like algorithms with improved regret bound over the standard one without additional feedback. ","element":"span"},{"href":"#id-6","referenceIndex":6,"text":"Buccapatnam ","element":"a"},{"text":"et al. ","element":"span"},{"text":"[","element":"span"},{"href":"#id-6","referenceIndex":6,"text":"2014","element":"a"},{"text":"] derive an asymptotic lower bound and design two algorithms that are near-optimal. ","element":"span"},{"href":"#id-7","referenceIndex":24,"text":"Tossou ","element":"a"},{"text":"et al. ","element":"span"},{"text":"[","element":"span"},{"href":"#id-7","referenceIndex":24,"text":"2017","element":"a"},{"text":"] apply Thompson sampling and allow the feedback graph to be unknown and/or changing. They bound the Bayesian regret in terms of the size of minimum clique covering. All these works assume the feedback graph has self-loops. ","element":"span"},{"text":"Wu ","element":"span"},{"href":"#id-8","referenceIndex":30,"text":"et al. ","element":"a"},{"text":"[","element":"span"},{"href":"#id-8","referenceIndex":30,"text":"2015","element":"a"},{"text":"] consider general feedback graphs but assume different observation variance from different choices of actions. They provide non-asymptotic problem-dependent regret lower bound and also design algorithms that achieve the problem-dependent lower bound and the minimax lower bounds.","element":"span"}],[{"text":"There are several works on Erd¨os-R´enyi random feedback graphs where the feedback graph at each time is randomly generated by Erd¨os-R´enyi model. ","element":"span"},{"href":"#id-11","referenceIndex":17,"text":"Koc´ak ","element":"a"},{"text":"et al. ","element":"span"},{"text":"[","element":"span"},{"href":"#id-11","referenceIndex":17,"text":"2016","element":"a"},{"text":"] consider adversarial case with the unknown generating probability of the feedback graphs. ","element":"span"},{"text":"Liu ","element":"span"},{"href":"#id-13","referenceIndex":21,"text":"et al. ","element":"a"},{"text":"[","element":"span"},{"href":"#id-13","referenceIndex":21,"text":"2018","element":"a"},{"text":"] consider stochastic case and design a randomized policy with Bayesian regret guarantee. Both of them assume self-observability. An updated version [","element":"span"},{"text":"Alon et al.","element":"span"},{"href":"#id-12","referenceIndex":3,"text":", 2017","element":"a"},{"text":"] of [","element":"span"},{"text":"Alon et al.","element":"span"},{"href":"#id-15","referenceIndex":2,"text":", 2015b","element":"a"},{"text":"] extends one result to Erd¨os-R´enyi model in the adversarial case.","element":"span"}],[{"text":"The setting of graph feedback can be fit into a more general setting of partial monitoring [","element":"span"},{"href":"#id-16","referenceIndex":23,"text":"Rustichini, 1999","element":"a"},{"text":"; ","element":"span"},{"href":"#id-17","referenceIndex":8,"text":"Cesa-Bianchi and Lugosi, 2006","element":"a"},{"text":"] where feedback matrix and reward matrix ","element":"span"},{"text":"are given for each ","element":"span"},{"text":"pair of the chosen action and the environment. ","element":"span"},{"href":"#id-9","referenceIndex":5,"text":"Bart´ok ","element":"a"},{"text":"et al. ","element":"span"},{"text":"[","element":"span"},{"href":"#id-9","referenceIndex":5,"text":"2014","element":"a"},{"text":"] make a significant progress on classifying finite adversarial partial monitoring games which is completed by [","element":"span"},{"href":"#id-18","referenceIndex":20,"text":"Lattimore and Szepesvari, 2018","element":"a"},{"text":"]. ","element":"span"},{"href":"#id-10","referenceIndex":18,"text":"Komiyama ","element":"a"},{"text":"et al. ","element":"span"},{"text":"[","element":"span"},{"href":"#id-10","referenceIndex":18,"text":"2015","element":"a"},{"text":"] derive a problem-dependent regret lower bound and design an algorithm with asymptotically optimal regret upper bound in the stochastic case. Most studies on general partial monitoring framework focus on finite case where the number of actions and possible outcomes are finite. The algorithms for general partial monitoring games are not efficient in our case since the feedback matrix might be infinite or exponentially large.","element":"span"}],[{"text":"The cascade observation feedback resembles the independent cascade model in the context of influence maximization studies [","element":"span"},{"href":"#id-19","referenceIndex":15,"text":"Kempe ","element":"a"},{"text":"et al., 2003","element":"span"},{"text":"; ","element":"span"},{"text":"Chen et al.","element":"span"},{"href":"#id-20","referenceIndex":9,"text":", 2013","element":"a"},{"text":"], but the goal is different: influence maximization aims at finding a set of ","element":"span"},{"text":"k ","element":"span"},{"text":"seeds that generates the largest expected cascade size, while our goal is to find the best action (arm) utilizing the cascade feedback. Influence maximization has been combined with online learning in several studies [","element":"span"},{"href":"#id-21","referenceIndex":26,"text":"Vaswani ","element":"a"},{"text":"et al., 2015","element":"span"},{"text":"; ","element":"span"},{"text":"Chen et al.","element":"span"},{"href":"#id-22","referenceIndex":10,"text":", 2016","element":"a"},{"text":"; ","element":"span"},{"text":"Wen et al.","element":"span"},{"href":"#id-23","referenceIndex":29,"text":", 2017","element":"a"},{"text":"], but again their goal is to maximize influence cascade size while using online learning to gradually learn edge probabilities.","element":"span"}]]},{"heading":"2 Settings","paragraphs":[[{"text":"The feedback structure is characterized by a directed graph ","element":"span"},{"text":"G ","element":"span"},{"text":"= (","element":"span"},{"text":"V, E, p","element":"span"},{"text":") ","element":"span"},{"text":"over the set of actions ","element":"span"},{"text":"V ","element":"span"},{"text":"= [","element":"span"},{"text":"K","element":"span"},{"text":"]","element":"span"},{"text":", where ","element":"span"},{"text":"E ","element":"span"},{"text":"is the set of edges between actions and ","element":"span"},{"style":{"height":16},"width":250.52,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-1.png","element":"img","alt":" p : E → [0, 1]","inline":true,"padRight":true},{"text":"is a mapping describing live probabilities on edges. ","element":"span"},{"text":"G ","element":"span"},{"text":"is fixed and known to the learner. Assume for each action ","element":"span"},{"text":"j","element":"span"},{"text":", there is an edge ","element":"span"},{"style":{"height":16},"width":160.6,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-2.png","element":"img","alt":" (i, j) ∈ E","inline":true,"padRight":true},{"text":"for some ","element":"span"},{"text":"i","element":"span"},{"text":".","element":"span"}],[{"text":"At each time ","element":"span"},{"text":"t","element":"span"},{"text":", the environment privately draws a reward vector ","element":"span"},{"style":{"height":16},"width":355.84,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-3.png","element":"img","alt":" rt = (rt(i) : i ∈ [K])","inline":true,"padRight":true},{"text":"and a random realization ","element":"span"},{"style":{"height":13.1},"width":43.2,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-4.png","element":"img","alt":" Gt","inline":true,"padRight":true},{"text":"of ","element":"span"},{"text":"G ","element":"span"},{"text":"where any edge ","element":"span"},{"style":{"height":16},"width":161.08,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-5.png","element":"img","alt":" (i, j) ∈ E","inline":true,"padRight":true},{"text":"is live in ","element":"span"},{"style":{"height":13.1},"width":43.2,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-6.png","element":"img","alt":" Gt","inline":true,"padRight":true},{"text":"with probability ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-7.png","element":"img","alt":" pij","inline":true,"padRight":true},{"text":"independently of other edges. Here ","element":"span"},{"style":{"height":16},"width":77.44,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-8.png","element":"img","alt":" rt(i)","inline":true,"padRight":true},{"text":"is an independent random sample from the associated Gaussian reward distribution of action ","element":"span"},{"text":"i ","element":"span"},{"text":"which has mean ","element":"span"},{"style":{"height":13.1},"width":29.72,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-9.png","element":"img","alt":" θi","inline":true,"padRight":true},{"text":"and variance ","element":"span"},{"text":"1 ","element":"span"},{"text":"for each ","element":"span"},{"style":{"height":16},"width":132.44,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-10.png","element":"img","alt":"i ∈ [K]","inline":true,"padRight":true},{"text":"independently of other actions. The assumption of the Gaussian distributions is mainly for the sake of simplicity and can be generalized (see Section ","element":"span"},{"href":"#id-24","text":"4.4","element":"a"},{"text":"). Then the learner selects an action ","element":"span"},{"style":{"height":12.7},"width":25.92,"height":31.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-11.png","element":"img","alt":" it","inline":true,"padRight":true},{"text":"and receives reward ","element":"span"},{"style":{"height":16},"width":90.88,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-12.png","element":"img","alt":" rt(it)","inline":true,"padRight":true},{"text":"which might be invisible to the learner. We consider the following two kinds of feedback models. One-Step Case ","element":"span"},{"text":"The learner will receive feedback ","element":"span"},{"style":{"height":16},"width":147.04,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-13.png","element":"img","alt":" (j, rt(j))","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"text":"j ","element":"span"},{"text":"such that there is an edge ","element":"span"},{"style":{"height":16},"width":95.68,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-14.png","element":"img","alt":" (it, j)","inline":true,"padRight":true},{"text":"in ","element":"span"},{"style":{"height":13.1},"width":43.2,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-15.png","element":"img","alt":" Gt","inline":true},{"text":". Cascade Case ","element":"span"},{"text":"The learner will receive feedback ","element":"span"},{"style":{"height":16},"width":147.04,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-16.png","element":"img","alt":" (j, rt(j))","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"text":"j ","element":"span"},{"text":"such that there is a directed path from ","element":"span"},{"style":{"height":12.71},"width":25.92,"height":31.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-17.png","element":"img","alt":" it","inline":true,"padRight":true},{"text":"to ","element":"span"},{"text":"j ","element":"span"},{"text":"in ","element":"span"},{"style":{"height":13.11},"width":43.2,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-18.png","element":"img","alt":" Gt","inline":true},{"text":".","element":"span"}],[{"text":"Note that [","element":"span"},{"text":"Caron et al.","element":"span"},{"href":"#id-5","referenceIndex":7,"text":", 2012","element":"a"},{"text":"; ","element":"span"},{"href":"#id-6","referenceIndex":6,"text":"Buccapatnam ","element":"a"},{"text":"et al., 2014","element":"span"},{"text":"; ","element":"span"},{"href":"#id-7","referenceIndex":24,"text":"Tossou ","element":"a"},{"text":"et al., 2017","element":"span"},{"text":"; ","element":"span"},{"text":"Alon et al.","element":"span"},{"href":"#id-2","referenceIndex":1,"text":", 2015a","element":"a"},{"text":"; ","element":"span"},{"text":"Wu et al.","element":"span"},{"href":"#id-8","referenceIndex":30,"text":", 2015","element":"a"},{"text":"] consider deterministic feedback graphs which correspond to our one-step case with each ","element":"span"},{"style":{"height":11.51},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-19.png","element":"img","alt":" pij","inline":true,"padRight":true},{"text":"equal to ","element":"span"},{"text":"1","element":"span"},{"text":". The first three of them also assume self-loops exist in the graph. The work [","element":"span"},{"text":"Liu et al.","element":"span"},{"href":"#id-13","referenceIndex":21,"text":", 2018","element":"a"},{"text":"] corresponds to our one-step case with ","element":"span"},{"text":"E ","element":"span"},{"text":"containing the edge between every pair of actions and all ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-20.png","element":"img","alt":" pij","inline":true,"padRight":true},{"text":"have the same value.","element":"span"}],[{"text":"Suppose ","element":"span"},{"style":{"height":16},"width":165.08,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/1-21.png","element":"img","alt":" θi ∈ [0, 1]","inline":true,"padRight":true},{"text":"for each action ","element":"span"},{"text":"i","element":"span"},{"text":". The learner’s objective is to maximize its expected cumulative reward, or equiv-","element":"span"}],[{"text":"alently, to minimize its expected cumulative regret","element":"span"}],[{"style":{"width":"66%"},"width":645,"height":81,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-0.png","element":"img"}],[{"text":"where the expectation is over the randomness of ","element":"span"},{"style":{"height":9.1},"width":29.76,"height":22.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-1.png","element":"img","alt":" rt","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":13.1},"width":43.2,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-2.png","element":"img","alt":" Gt","inline":true},{"text":". We will omit ","element":"span"},{"text":"G ","element":"span"},{"text":"in the regret expression if the context is clear.","element":"span"}]]},{"heading":"3 Asymptotic Lower Bounds","paragraphs":[[{"text":"3.1 ","element":"span"},{"text":"Lower Bound for the One-Step Case","element":"span"}],[{"text":"For the sake of simplicity on the following index notations, assume there is a unique optimal action for ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-3.png","element":"img","alt":" θ","inline":true},{"text":". Let ","element":"span"},{"style":{"height":16},"width":84.64,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-4.png","element":"img","alt":" ik(θ)","inline":true,"padRight":true},{"text":"be the ","element":"span"},{"text":"k","element":"span"},{"text":"-th best action index for the parameter ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-5.png","element":"img","alt":" θ","inline":true},{"text":". Then ","element":"span"},{"style":{"height":16.48},"width":131.32,"height":41.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-6.png","element":"img","alt":" θi1(θ) >","inline":true},{"style":{"height":16},"width":239.2,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-7.png","element":"img","alt":"θi, ∀i ̸= i1(θ)","inline":true},{"text":". Denote ","element":"span"},{"style":{"height":17.68},"width":342.2,"height":44.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-8.png","element":"img","alt":" ∆i(θ) = θi1(θ) − θi","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":16},"width":134.68,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-9.png","element":"img","alt":" ∆(θ) =","inline":true},{"style":{"height":17.36},"width":493.12,"height":43.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-10.png","element":"img","alt":"(∆i(θ) : i ∈ [K]) ∈ [0, 1]K","inline":true},{"text":". Let ","element":"span"},{"style":{"height":17.36},"width":371.48,"height":43.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-11.png","element":"img","alt":" V in(j) = {i ∈ [K] :","inline":true},{"style":{"height":16},"width":201.92,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-12.png","element":"img","alt":"(i, j) ∈ E}","inline":true,"padRight":true},{"text":"be the set of incoming neighbors of action ","element":"span"},{"text":"j","element":"span"},{"text":". Define","element":"span"}],[{"id":"id-33","style":{"width":"94%"},"width":916,"height":392,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-13.png","element":"img"}],[{"text":"Recall that an algorithm is consistent if ","element":"span"},{"style":{"height":16},"width":266.56,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-14.png","element":"img","alt":" Rθ(T ) = o(T a)","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"text":"a > ","element":"span"},{"text":"0 ","element":"span"},{"text":"and any parameter ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-15.png","element":"img","alt":" θ","inline":true},{"text":". Then the asymptotic lower bound for any consistent algorithm is provided in the following theorem.","element":"span"}],[{"text":"Theorem 1 For any consistent algorithm, the regret satisfies","element":"span"}],[{"id":"id-30","style":{"width":"78%"},"width":763,"height":93,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-16.png","element":"img"}],[{"text":"Note this lower bound can easily recover the lower bound in [","element":"span"},{"href":"#id-25","referenceIndex":27,"text":"Wang and Chen, 2017","element":"a"},{"text":", Theorem 3] where they only consider a special probabilistic graph ","element":"span"},{"text":"G","element":"span"},{"text":". Proof. Fix any consistent algorithm and any parameter ","element":"span"},{"style":{"height":11.6},"width":63,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-17.png","element":"img","alt":" θ ∈","inline":true},{"style":{"height":17.36},"width":108.16,"height":43.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-18.png","element":"img","alt":"[0, 1]K","inline":true},{"text":".","element":"span"}],[{"text":"For any ","element":"span"},{"style":{"height":16},"width":205.6,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-19.png","element":"img","alt":" j ̸= i1(θ)","inline":true},{"text":", define ","element":"span"},{"style":{"height":10.8},"width":153.88,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-20.png","element":"img","alt":" θ′ = θ","inline":true,"padRight":true},{"text":"except ","element":"span"},{"style":{"height":17.42},"width":103,"height":43.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-21.png","element":"img","alt":" θ′j =","inline":true},{"style":{"height":16.48},"width":177.92,"height":41.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-22.png","element":"img","alt":"θi1(θ) + ǫ","inline":true},{"text":". ","element":"span"},{"text":"Let ","element":"span"},{"text":"P ","element":"span"},{"text":"and ","element":"span"},{"style":{"height":10.8},"width":38.48,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-23.png","element":"img","alt":" P′","inline":true,"padRight":true},{"text":"be the probability over all outcomes in ","element":"span"},{"text":"T ","element":"span"},{"text":"rounds when expected reward is ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-24.png","element":"img","alt":" θ","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":10.8},"width":33.68,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-25.png","element":"img","alt":"θ′","inline":true,"padRight":true},{"text":"respectively. ","element":"span"},{"text":"By high-dimensional Pinsker’s inequality ([","element":"span"},{"href":"#id-26","referenceIndex":19,"text":"Lattimore and Szepesvari, 2017","element":"a"},{"text":", Lemma 5])","element":"span"}],[{"style":{"width":"75%"},"width":731,"height":145,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-26.png","element":"img"}],[{"text":"Note that","element":"span"}],[{"style":{"width":"95%"},"width":927,"height":244,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-27.png","element":"img"}],[{"text":"where ","element":"span"},{"style":{"height":18.62},"width":189.28,"height":46.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-28.png","element":"img","alt":" r(θj), r(θ′j)","inline":true,"padRight":true},{"text":"are the Gaussian random variables with ","element":"span"},{"text":"mean ","element":"span"},{"style":{"height":17.23},"width":86.96,"height":43.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-29.png","element":"img","alt":" θj, θ′j","inline":true,"padRight":true},{"text":"(and variance ","element":"span"},{"text":"1","element":"span"},{"text":") respectively. Then","element":"span"}],[{"style":{"width":"96%"},"width":940,"height":562,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-30.png","element":"img"}],[{"text":"Since the algorithm is consistent, ","element":"span"},{"style":{"height":16},"width":277.12,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-31.png","element":"img","alt":" Rθ(T ) = o(T a)","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"text":"a > ","element":"span"},{"text":"0 ","element":"span"},{"text":"and any parameter ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-32.png","element":"img","alt":" θ","inline":true},{"text":", or equivalently","element":"span"}],[{"style":{"width":"99%"},"width":970,"height":502,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-33.png","element":"img"}],[{"text":"Similar result follows","element":"span"}],[{"style":{"width":"84%"},"width":818,"height":111,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-34.png","element":"img"}],[{"text":"Thus the vector ","element":"span"},{"style":{"height":24.05},"width":470.56,"height":60.12,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-35.png","element":"img","alt":" lim infT →∞E[N(T )]log T ∈ C(θ)","inline":true},{"text":". Recall the regret is ","element":"span"},{"style":{"height":20.46},"width":546.88,"height":51.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-36.png","element":"img","alt":" Rθ(T ) = �Ki=1 E [Ni(T )] ∆i(θ)","inline":true},{"text":". The result fol- ","element":"span"},{"text":"lows. ","element":"span"},{"style":{"height":0},"width":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-37.png","element":"img","alt":" □","inline":true}],[{"text":"3.2 ","element":"span"},{"text":"Lower Bound for the Cascade Case","element":"span"}],[{"text":"Let ","element":"span"},{"style":{"height":13.42},"width":44.2,"height":33.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-38.png","element":"img","alt":" p′ij","inline":true,"padRight":true},{"text":"be the probability that there is a directed path from ","element":"span"},{"text":"i ","element":"span"},{"text":"to ","element":"span"},{"text":"j ","element":"span"},{"text":"in a random realization of ","element":"span"},{"text":"G","element":"span"},{"text":". Define","element":"span"}],[{"style":{"width":"96%"},"width":943,"height":257,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-39.png","element":"img"}],[{"id":"id-29","text":"Theorem 2 ","element":"span"},{"text":"For any consistent algorithm, the regret satisfies","element":"span"}],[{"style":{"width":"58%"},"width":564,"height":92,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-40.png","element":"img"}],[{"text":"Note that the computation of ","element":"span"},{"style":{"height":13.23},"width":44.2,"height":33.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-41.png","element":"img","alt":" p′ij","inline":true,"padRight":true},{"text":"is #P-hard for general ","element":"span"},{"id":"id-31","text":"graphs [","element":"span"},{"href":"#id-27","referenceIndex":25,"text":"Valiant, 1979","element":"a"},{"text":"; ","element":"span"},{"text":"Wang et al.","element":"span"},{"href":"#id-28","referenceIndex":28,"text":", 2012","element":"a"},{"text":"]. ","element":"span"},{"text":"Thus the lower bound is not efficiently computable even when ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-42.png","element":"img","alt":" θ","inline":true,"padRight":true},{"text":"is known.","element":"span"}],[{"text":"The proof of Theorem ","element":"span"},{"href":"#id-29","text":"2 ","element":"a"},{"text":"is similar to that of Theorem ","element":"span"},{"href":"#id-30","text":"1 ","element":"a"},{"text":"by replacing (","element":"span"},{"href":"#id-31","text":"3","element":"a"},{"text":") with the following formula","element":"span"}],[{"style":{"width":"80%"},"width":787,"height":86,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/2-43.png","element":"img"}]]},{"heading":"4 Algorithm and Analysis","paragraphs":[[{"text":"Let ","element":"span"},{"style":{"height":16},"width":90.88,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-0.png","element":"img","alt":" Ni(t)","inline":true,"padRight":true},{"text":"be the number of times the learner selects an action ","element":"span"},{"text":"i ","element":"span"},{"text":"and ","element":"span"},{"style":{"height":16},"width":447.04,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-1.png","element":"img","alt":" N(t) = (Ni(t) : i ∈ [K])","inline":true,"padRight":true},{"text":"by the end of time ","element":"span"},{"text":"t","element":"span"},{"text":". Let ","element":"span"},{"style":{"height":17.33},"width":30.72,"height":43.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-2.png","element":"img","alt":"ˆθt","inline":true,"padRight":true},{"text":"be the sample-mean estimates of ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-3.png","element":"img","alt":" θ","inline":true,"padRight":true},{"text":"by the end of time ","element":"span"},{"text":"t","element":"span"},{"text":". Let ","element":"span"},{"style":{"height":16.7},"width":97.6,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-4.png","element":"img","alt":" nij(t)","inline":true,"padRight":true},{"text":"be the number of times that action ","element":"span"},{"text":"i ","element":"span"},{"text":"is selected and reward for action ","element":"span"},{"text":"j ","element":"span"},{"text":"is observed by the end of time ","element":"span"},{"text":"t","element":"span"},{"text":". Then ","element":"span"},{"style":{"height":16.7},"width":465.16,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-5.png","element":"img","alt":"E [nij(t) | Ni(t)] = Ni(t)pij","inline":true},{"text":". Let ","element":"span"},{"style":{"height":16.8},"width":310.24,"height":42,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-6.png","element":"img","alt":" mj(t) = �i nij(t)","inline":true,"padRight":true},{"text":"be the ","element":"span"},{"text":"number of observations for action ","element":"span"},{"text":"j ","element":"span"},{"text":"by the end of time ","element":"span"},{"text":"t","element":"span"},{"text":".","element":"span"}],[{"text":"4.1 ","element":"span"},{"text":"One-Step Uniform Case","element":"span"}],[{"text":"The uniform case in which all ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-7.png","element":"img","alt":" pij","inline":true},{"text":"’s have the same value ","element":"span"},{"text":"p ","element":"span"},{"text":"is first considered in this section. When ","element":"span"},{"text":"E ","element":"span"},{"text":"contains edges between every pair of actions, this graph reduces to Erd¨osR´enyi random graph with parameter ","element":"span"},{"text":"p","element":"span"},{"text":".","element":"span"}],[{"text":"Let ","element":"span"},{"style":{"height":19.6},"width":458.32,"height":49,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-8.png","element":"img","alt":" Mj(t) = �i∈V in(j) Ni(t)p","inline":true,"padRight":true},{"text":"be the expected number","element":"span"},{"text":"of observations for action ","element":"span"},{"text":"j ","element":"span"},{"text":"at the end of time ","element":"span"},{"text":"t","element":"span"},{"text":". ","element":"span"},{"text":"Then ","element":"span"},{"style":{"height":16.7},"width":440.8,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-9.png","element":"img","alt":"E [mj(t) | Mj(t)] = Mj(t)","inline":true},{"text":".","element":"span"}],[{"id":"id-32","style":{"width":"100%"},"width":974,"height":910,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-10.png","element":"img"}],[{"text":"The pseudocode of the algorithm is provided in Algorithm ","element":"span"},{"href":"#id-32","text":"1","element":"a"},{"text":". It starts with the initialization of ","element":"span"},{"style":{"height":10.8},"width":51.48,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-11.png","element":"img","alt":" N e","inline":true,"padRight":true},{"text":"and the estimates of ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-12.png","element":"img","alt":" θ","inline":true,"padRight":true},{"text":"(line ","element":"span"},{"href":"#id-32","text":"1","element":"a"},{"text":"). Here ","element":"span"},{"style":{"height":10.8},"width":51.48,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-13.png","element":"img","alt":" N e","inline":true,"padRight":true},{"text":"is the number of exploration rounds for the learner to know more about unknown ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-14.png","element":"img","alt":" θ","inline":true,"padRight":true},{"text":"which will be clearer later. At each time ","element":"span"},{"text":"t","element":"span"},{"text":", if for some ","element":"span"},{"text":"j ","element":"span"},{"text":"the real observation times of action ","element":"span"},{"text":"j ","element":"span"},{"text":"is less than half the expected observation times (line ","element":"span"},{"href":"#id-32","text":"3","element":"a"},{"text":"), then the learner selects a parent of ","element":"span"},{"text":"j ","element":"span"},{"text":"to try to observe reward of ","element":"span"},{"text":"j ","element":"span"},{"text":"once more (line ","element":"span"},{"href":"#id-32","text":"4","element":"a"},{"text":") and keeps ","element":"span"},{"style":{"height":10.8},"width":51.48,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-15.png","element":"img","alt":" N e","inline":true,"padRight":true},{"text":"unchanged (line ","element":"span"},{"href":"#id-32","text":"5","element":"a"},{"text":"). Note that ","element":"span"},{"style":{"height":16.71},"width":473.24,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-16.png","element":"img","alt":" E [mj(t) | Mj(t) = m] = m","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":16.7},"width":97.6,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-17.png","element":"img","alt":" mj(t)","inline":true,"padRight":true},{"text":"will concentrate at ","element":"span"},{"text":"m ","element":"span"},{"text":"as ","element":"span"},{"text":"m ","element":"span"},{"text":"goes to infinity. The condition ","element":"span"},{"style":{"height":16.7},"width":307.04,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-18.png","element":"img","alt":" mj(t) < Mj(t)/2","inline":true,"padRight":true},{"text":"means part of the realizations of graph ","element":"span"},{"text":"G ","element":"span"},{"text":"is far from the expectation and ","element":"span"},{"text":"2 ","element":"span"},{"text":"can be changed to other larger-than-","element":"span"},{"text":"1 ","element":"span"},{"text":"constant. This is one of the key differences from deterministic graph feedback where the number of observations is well controlled by just selecting actions. While under the probabilistic graph feedback, there is a gap between the number of real observations and expected number of observations.","element":"span"}],[{"text":"When ","element":"span"},{"style":{"height":16.7},"width":296.48,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-19.png","element":"img","alt":" mj(t) ≥ Mj(t)/2","inline":true,"padRight":true},{"text":"for all ","element":"span"},{"text":"j","element":"span"},{"text":", then the realizations of ","element":"span"},{"text":"G ","element":"span"},{"text":"are good enough and the learner can rely on the quantities of selections to control the accuracy of the estimates. If the selection vector is good enough for current ","element":"span"},{"style":{"height":15.02},"width":22.88,"height":37.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-20.png","element":"img","alt":"ˆθ","inline":true,"padRight":true},{"text":"under current accuracy level (line ","element":"span"},{"href":"#id-32","text":"6","element":"a"},{"text":"), then the learner will exploit the current best action (line ","element":"span"},{"href":"#id-32","text":"7","element":"a"},{"text":") and keep ","element":"span"},{"style":{"height":10.8},"width":51.48,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-21.png","element":"img","alt":" N e","inline":true,"padRight":true},{"text":"unchanged. Here ","element":"span"},{"style":{"height":16},"width":73.6,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-22.png","element":"img","alt":" C(·)","inline":true,"padRight":true},{"text":"is defined as in (","element":"span"},{"href":"#id-33","text":"1","element":"a"},{"text":") and represents the set of good selected “fractions” of actions that are able to identify the reward gaps between actions.","element":"span"}],[{"text":"If the current selection vector ","element":"span"},{"text":"N ","element":"span"},{"text":"is not good enough, then the learner will first check if ","element":"span"},{"style":{"height":15.02},"width":23.36,"height":37.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-23.png","element":"img","alt":"ˆθ","inline":true,"padRight":true},{"text":"is close enough to ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-24.png","element":"img","alt":" θ","inline":true,"padRight":true},{"text":"(line ","element":"span"},{"href":"#id-32","text":"9","element":"a"},{"text":"-","element":"span"},{"href":"#id-32","text":"11","element":"a"},{"text":") and if yes, will explore according to current ","element":"span"},{"style":{"height":15.02},"width":23.36,"height":37.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-25.png","element":"img","alt":"ˆθ","inline":true},{"text":". The number ","element":"span"},{"style":{"height":10.8},"width":51.48,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-26.png","element":"img","alt":" N e","inline":true,"padRight":true},{"text":"of exploration rounds for the learner to know more about ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-27.png","element":"img","alt":" θ","inline":true,"padRight":true},{"text":"will increase in this part (line ","element":"span"},{"href":"#id-32","text":"11","element":"a"},{"text":"&","element":"span"},{"href":"#id-32","text":"14","element":"a"},{"text":"). The condition of line ","element":"span"},{"href":"#id-32","text":"9 ","element":"a"},{"text":"has an auxiliary function ","element":"span"},{"style":{"height":16},"width":279.52,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-28.png","element":"img","alt":" β : N → [0, ∞)","inline":true,"padRight":true},{"text":"to guide the exploration such that ","element":"span"},{"style":{"height":15.03},"width":23.36,"height":37.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-29.png","element":"img","alt":"ˆθ","inline":true,"padRight":true},{"text":"will be close to ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-30.png","element":"img","alt":" θ","inline":true,"padRight":true},{"text":"in the long run. This auxiliary function is also crucial in previous work [","element":"span"},{"text":"Wu et al.","element":"span"},{"href":"#id-8","referenceIndex":30,"text":", 2015","element":"a"},{"text":"] to control the regret bound in the asymptotic sense. The auxiliary function ","element":"span"},{"style":{"height":14.4},"width":23,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-31.png","element":"img","alt":" β","inline":true,"padRight":true},{"text":"can be any non-decreasing function satisfying ","element":"span"},{"style":{"height":16},"width":300.8,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-32.png","element":"img","alt":" 0 ≤ β(n) ≤ n/2","inline":true,"padRight":true},{"text":"and the subadditivity ","element":"span"},{"style":{"height":16},"width":439.84,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-33.png","element":"img","alt":"β(m + n) ≤ β(m) + β(n)","inline":true},{"text":". If some component of ","element":"span"},{"style":{"height":15.02},"width":23.36,"height":37.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-34.png","element":"img","alt":"ˆθ","inline":true,"padRight":true},{"text":"has not been explored enough (line ","element":"span"},{"href":"#id-32","text":"9","element":"a"},{"text":"), then the learner selects a parent to try to get one more observation (line ","element":"span"},{"href":"#id-32","text":"10","element":"a"},{"text":") and increases ","element":"span"},{"style":{"height":10.8},"width":51.48,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-35.png","element":"img","alt":"N e","inline":true,"padRight":true},{"text":"(line ","element":"span"},{"href":"#id-32","text":"11","element":"a"},{"text":").","element":"span"}],[{"text":"When all components of ","element":"span"},{"style":{"height":15.03},"width":23.36,"height":37.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-36.png","element":"img","alt":"ˆθ","inline":true,"padRight":true},{"text":"are close to ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-37.png","element":"img","alt":" θ","inline":true},{"text":", the learner selects an action according to the current ","element":"span"},{"style":{"height":15.03},"width":23.36,"height":37.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-38.png","element":"img","alt":"ˆθ","inline":true,"padRight":true},{"text":"with minimal cost on the regret instructed by the asymptotic lower bound (","element":"span"},{"href":"#id-30","text":"2","element":"a"},{"text":"). Here ","element":"span"},{"style":{"height":16},"width":92.8,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-39.png","element":"img","alt":"ci(θ′)","inline":true,"padRight":true},{"text":"denotes any optimal solution of the linear programming problem that minimizes ","element":"span"},{"style":{"height":16},"width":97.6,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-40.png","element":"img","alt":" ⟨c, θ′⟩","inline":true,"padRight":true},{"text":"among all ","element":"span"},{"style":{"height":16},"width":162.4,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-41.png","element":"img","alt":" c ∈ C(θ′)","inline":true},{"text":". Since ","element":"span"},{"style":{"height":15.02},"width":23.36,"height":37.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-42.png","element":"img","alt":"ˆθ","inline":true,"padRight":true},{"text":"is close enough to ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-43.png","element":"img","alt":" θ","inline":true,"padRight":true},{"text":"under current accuracy level, the vector ","element":"span"},{"style":{"height":18.83},"width":135.52,"height":47.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-44.png","element":"img","alt":"ci(ˆθt−1)","inline":true,"padRight":true},{"text":"is close enough to ","element":"span"},{"style":{"height":16},"width":81.28,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-45.png","element":"img","alt":" ci(θ)","inline":true,"padRight":true},{"text":"(which is part of the proof for the following theorem). There must be at least an ","element":"span"},{"text":"i ","element":"span"},{"text":"such that ","element":"span"},{"style":{"height":19.02},"width":568.96,"height":47.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-46.png","element":"img","alt":" Ni(t − 1) < 16 ci(ˆθt−1) log(t − 1)","inline":true,"padRight":true},{"text":"or else the condition of line ","element":"span"},{"href":"#id-32","text":"6 ","element":"a"},{"text":"holds.","element":"span"}],[{"id":"id-47","text":"The regret bound for the algorithm is stated as follows. ","element":"span"},{"text":"Theorem 3 The regret of Algorithm ","element":"span"},{"href":"#id-32","text":"1 ","element":"a"},{"text":"for one-step uniform case satisfies for any ","element":"span"},{"style":{"height":11.6},"width":89.6,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-47.png","element":"img","alt":" ǫ > 0","inline":true},{"text":",","element":"span"}],[{"style":{"width":"100%"},"width":973,"height":678,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-48.png","element":"img"}],[{"text":"any ","element":"span"},{"style":{"height":11.6},"width":89.12,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-49.png","element":"img","alt":" ǫ > 0","inline":true},{"text":". Then for any ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-50.png","element":"img","alt":" θ","inline":true,"padRight":true},{"text":"such that ","element":"span"},{"style":{"height":16},"width":68.32,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-51.png","element":"img","alt":" c(θ)","inline":true,"padRight":true},{"text":"is unique,","element":"span"}],[{"style":{"width":"86%"},"width":837,"height":66,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-52.png","element":"img"}],[{"text":"holds with probability at least ","element":"span"},{"style":{"height":11.6},"width":87.64,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-53.png","element":"img","alt":" 1 − δ","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"style":{"height":12.4},"width":92.48,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/3-54.png","element":"img","alt":" δ > 0","inline":true},{"text":".","element":"span"}],[{"text":"Note that any ","element":"span"},{"style":{"height":17.36},"width":194.96,"height":43.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-0.png","element":"img","alt":" β(n) = anb","inline":true,"padRight":true},{"text":"with ","element":"span"},{"style":{"height":19.51},"width":353.44,"height":48.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-1.png","element":"img","alt":" a ∈�0, 12�, b ∈ (0, 1)","inline":true,"padRight":true},{"text":"meets the requirements. Due to space limit, the proofs are provided in Appendix ","element":"span"},{"text":"A","element":"span"},{"text":".","element":"span"}],[{"text":"4.2 ","element":"span"},{"text":"One-Step General Case","element":"span"}],[{"text":"In the general case where ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-2.png","element":"img","alt":" pij","inline":true,"padRight":true},{"text":"can be different, ","element":"span"},{"style":{"height":16.7},"width":157.24,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-3.png","element":"img","alt":" Mj(t) =","inline":true},{"style":{"height":19.6},"width":308.2,"height":49,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-4.png","element":"img","alt":"�i∈V in(j) Ni(t)pij","inline":true},{"text":". The algorithm follows as in Algorithm ","element":"span"},{"href":"#id-32","text":"1 ","element":"a"},{"text":"by only replacing line ","element":"span"},{"href":"#id-32","text":"4 ","element":"a"},{"text":"with (","element":"span"},{"href":"#id-32","text":"4","element":"a"},{"text":"’) Play ","element":"span"},{"style":{"height":17.04},"width":375.4,"height":42.6,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-5.png","element":"img","alt":" it ∈ argmaxi∈V in(j)pij","inline":true},{"text":". Let","element":"span"}],[{"id":"id-34","style":{"width":"97%"},"width":944,"height":73,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-6.png","element":"img"}],[{"text":"be the set of exploration nodes that have the largest live probability among all incoming edges to some ","element":"span"},{"text":"j","element":"span"},{"text":". Let","element":"span"}],[{"id":"id-35","style":{"width":"96%"},"width":942,"height":73,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-7.png","element":"img"}],[{"text":"be the minimal exploration probability for any ","element":"span"},{"style":{"height":11.6},"width":109.56,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-8.png","element":"img","alt":" i ∈ V e","inline":true},{"text":". With a modified proof to the uniform case, the theoretical guarantee for the general case follows.","element":"span"}],[{"text":"Theorem 4 The regret of the modified Algorithm ","element":"span"},{"href":"#id-32","text":"1","element":"a"},{"text":"’ for one-step general case satisfies for any ","element":"span"},{"style":{"height":11.6},"width":89.12,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-9.png","element":"img","alt":" ǫ > 0","inline":true},{"text":",","element":"span"}],[{"id":"id-46","style":{"width":"89%"},"width":873,"height":525,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-10.png","element":"img"}],[{"text":"Assume ","element":"span"},{"style":{"height":16},"width":211.84,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-11.png","element":"img","alt":" β(n) = o(n)","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":28.8},"width":448,"height":72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-12.png","element":"img","alt":"�∞s=0 exp�− β(s)ǫ22K �< ∞","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"style":{"height":11.6},"width":89.12,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-13.png","element":"img","alt":" ǫ > 0","inline":true},{"text":". Then for any ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-14.png","element":"img","alt":" θ","inline":true,"padRight":true},{"text":"such that ","element":"span"},{"style":{"height":16},"width":68.32,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-15.png","element":"img","alt":" c(θ)","inline":true,"padRight":true},{"text":"is unique,","element":"span"}],[{"style":{"width":"86%"},"width":837,"height":65,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-16.png","element":"img"}],[{"text":"holds with probability at least ","element":"span"},{"style":{"height":11.6},"width":87.64,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-17.png","element":"img","alt":" 1 − δ","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"style":{"height":12.4},"width":92.48,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-18.png","element":"img","alt":" δ > 0","inline":true},{"text":".","element":"span"}],[{"text":"4.3 ","element":"span"},{"text":"Cascade Case","element":"span"}],[{"text":"For the deterministic graphs, there is no essential difference between one-step case and cascade case — the cascade case on a deterministic graph would be equivalent to constructing a new graph where an edge exists if and only if there is a path on the original graph. For a probabilistic graph, one might try a similar solution for the cascade case by constructing a new graph ","element":"span"},{"style":{"height":10.8},"width":45.2,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-19.png","element":"img","alt":" G′","inline":true,"padRight":true},{"text":"where the probability of an edge ","element":"span"},{"text":"(","element":"span"},{"text":"i, j","element":"span"},{"text":") ","element":"span"},{"text":"is just the probability ","element":"span"},{"style":{"height":13.23},"width":44.2,"height":33.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-20.png","element":"img","alt":" p′ij","inline":true,"padRight":true},{"text":"of ","element":"span"},{"text":"i ","element":"span"},{"text":"connecting to ","element":"span"},{"text":"j ","element":"span"},{"text":"in a random re- ","element":"span"},{"text":"alization of the original graph. However the computation of ","element":"span"},{"style":{"height":13.42},"width":44.2,"height":33.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-21.png","element":"img","alt":"p′ij","inline":true,"padRight":true},{"text":"is #P-hard for general graphs, and thus the accurate graph ","element":"span"},{"style":{"height":10.8},"width":45.2,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-22.png","element":"img","alt":"G′","inline":true,"padRight":true},{"text":"is unattainable, though it can be approximated within any accuracy by Monte Carlo simulations. Therefore, during the running of the algorithm, a reasonable approximation of ","element":"span"},{"style":{"height":10.8},"width":45.2,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-23.png","element":"img","alt":" G′","inline":true,"padRight":true},{"text":"is needed.","element":"span"}],[{"id":"id-37","style":{"width":"100%"},"width":973,"height":923,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-24.png","element":"img"}],[{"text":"Define ","element":"span"},{"style":{"height":10.8},"width":62.96,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-25.png","element":"img","alt":" V e′","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":14.79},"width":50.96,"height":36.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-26.png","element":"img","alt":" pei′","inline":true,"padRight":true},{"text":"similarly with (","element":"span"},{"href":"#id-34","text":"6","element":"a"},{"text":") and (","element":"span"},{"href":"#id-35","text":"7","element":"a"},{"text":") by replacing ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-27.png","element":"img","alt":"pij","inline":true,"padRight":true},{"text":"with ","element":"span"},{"style":{"height":13.23},"width":44.2,"height":33.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-28.png","element":"img","alt":" p′ij","inline":true},{"text":". Since the computation of ","element":"span"},{"style":{"height":13.23},"width":44.2,"height":33.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-29.png","element":"img","alt":" p′ij","inline":true,"padRight":true},{"text":"is #P-hard, we ","element":"span"},{"text":"define an estimated version of ","element":"span"},{"style":{"height":10.8},"width":62.96,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-30.png","element":"img","alt":" V e′","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":14.78},"width":50.96,"height":36.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-31.png","element":"img","alt":" pei′","inline":true,"padRight":true},{"text":"respectively:","element":"span"}],[{"style":{"width":"79%"},"width":770,"height":208,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-32.png","element":"img"}],[{"text":"for any ","element":"span"},{"style":{"height":15.63},"width":109.56,"height":39.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-33.png","element":"img","alt":" i ∈ ˆV e","inline":true},{"text":". Then ","element":"span"},{"style":{"height":16.22},"width":186.08,"height":40.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-34.png","element":"img","alt":" ˆpei ≥ pei′′/2","inline":true,"padRight":true},{"text":"for some ","element":"span"},{"style":{"height":10.4},"width":27.92,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-35.png","element":"img","alt":" i′","inline":true},{"text":". ","element":"span"},{"text":"To overcome the stated challenge, we need an auxiliary functions ","element":"span"},{"style":{"height":16},"width":272.8,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-36.png","element":"img","alt":" η : N+ → [0, 1)","inline":true,"padRight":true},{"text":"to set up the tolerance of the approximation. At each time ","element":"span"},{"text":"t","element":"span"},{"text":", the path from ","element":"span"},{"text":"i ","element":"span"},{"text":"to ","element":"span"},{"text":"j ","element":"span"},{"text":"with probability ","element":"span"},{"style":{"height":18.62},"width":179.2,"height":46.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-37.png","element":"img","alt":" p′ij ≤ η(t)","inline":true,"padRight":true},{"text":"can be treated as nonexistent (with prob- ","element":"span"},{"text":"ability ","element":"span"},{"text":"0","element":"span"},{"text":") and the estimation of ","element":"span"},{"style":{"height":13.23},"width":44.2,"height":33.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-38.png","element":"img","alt":" p′ij","inline":true,"padRight":true},{"text":"has noise at most ","element":"span"},{"style":{"height":16},"width":106.4,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-39.png","element":"img","alt":" η(t)/2","inline":true,"padRight":true},{"text":"if the real value ","element":"span"},{"style":{"height":18.62},"width":187.36,"height":46.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-40.png","element":"img","alt":" p′ij > η(t)","inline":true},{"text":". Any non-increasing function ","element":"span"},{"text":"with limit ","element":"span"},{"text":"0 ","element":"span"},{"text":"can be chosen as ","element":"span"},{"style":{"height":10.4},"width":20,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-41.png","element":"img","alt":" η","inline":true},{"text":". The choice of ","element":"span"},{"style":{"height":10.4},"width":20,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-42.png","element":"img","alt":" η","inline":true,"padRight":true},{"text":"is to control the complexity of the graph with only focusing the path of a reasonable length.","element":"span"}],[{"style":{"width":"95%"},"width":931,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-43.png","element":"img"}],[{"text":"lem","element":"span"}],[{"id":"id-36","style":{"width":"94%"},"width":920,"height":108,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-44.png","element":"img"}],[{"text":"where ","element":"span"},{"style":{"height":17.36},"width":266.56,"height":43.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-45.png","element":"img","alt":" P ∈ [0, 1]K×K","inline":true,"padRight":true},{"text":"satisfies ","element":"span"},{"style":{"height":15.5},"width":151.52,"height":38.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-46.png","element":"img","alt":" Pij = 0","inline":true,"padRight":true},{"text":"if ","element":"span"},{"style":{"height":17.02},"width":145.76,"height":42.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-47.png","element":"img","alt":" p′ij ≤ η","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":20.86},"width":321.92,"height":52.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-48.png","element":"img","alt":"��Pij − p′ij�� ≤ η/2","inline":true,"padRight":true},{"text":"if ","element":"span"},{"style":{"height":15.23},"width":151.52,"height":38.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-49.png","element":"img","alt":" p′ij > η","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":23.54},"width":273.6,"height":58.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-50.png","element":"img","alt":" bi(θ′) = 1∆2i (θ′)","inline":true,"padRight":true},{"text":"for","element":"span"}],[{"style":{"width":"63%"},"width":620,"height":50,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-51.png","element":"img"}],[{"text":"With the approximation ","element":"span"},{"style":{"height":13.1},"width":43.2,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-52.png","element":"img","alt":" Gt","inline":true,"padRight":true},{"text":"and the estimated value for reward vector ","element":"span"},{"style":{"height":17.33},"width":71.68,"height":43.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-53.png","element":"img","alt":"ˆθt−1","inline":true},{"text":", the linear programming problem considered in time ","element":"span"},{"text":"t ","element":"span"},{"text":"is ","element":"span"},{"style":{"height":19.02},"width":367.84,"height":47.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-54.png","element":"img","alt":" LPt = LP(ˆθt−1, η(t))","inline":true,"padRight":true},{"text":"and the corresponding ","element":"span"},{"text":"P ","element":"span"},{"text":"in (","element":"span"},{"href":"#id-36","text":"10","element":"a"},{"text":") is denoted as ","element":"span"},{"style":{"height":13.1},"width":37.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-55.png","element":"img","alt":" Pt","inline":true},{"text":". Then the algorithm runs with ","element":"span"},{"style":{"height":13.1},"width":63.84,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-56.png","element":"img","alt":"LPt","inline":true,"padRight":true},{"text":"accordingly. The complete pseudocode is presented in Algorithm ","element":"span"},{"href":"#id-37","text":"2","element":"a"},{"text":". In particular, the examination on the realization is performed on approximated graph ","element":"span"},{"style":{"height":13.1},"width":43.2,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-57.png","element":"img","alt":" Gt","inline":true,"padRight":true},{"text":"with probability matrix ","element":"span"},{"style":{"height":13.1},"width":37.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-58.png","element":"img","alt":" Pt","inline":true,"padRight":true},{"text":"(line ","element":"span"},{"href":"#id-37","text":"3","element":"a"},{"text":"). The exploitation condition is on the ","element":"span"},{"style":{"height":13.1},"width":63.84,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/4-59.png","element":"img","alt":" LPt","inline":true,"padRight":true},{"text":"(line","element":"span"}],[{"href":"#id-37","text":"6","element":"a"},{"text":"). Here ","element":"span"},{"style":{"height":16},"width":176.32,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-0.png","element":"img","alt":" Sfeas(LPt)","inline":true,"padRight":true},{"text":"is the feasible solution set of the linear programming problem ","element":"span"},{"style":{"height":13.1},"width":63.84,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-1.png","element":"img","alt":" LPt","inline":true,"padRight":true},{"text":"which is the set of all ","element":"span"},{"style":{"height":14.16},"width":137.44,"height":35.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-2.png","element":"img","alt":" c ∈ RK","inline":true,"padRight":true},{"text":"satisfying ","element":"span"},{"style":{"height":14.64},"width":253.76,"height":36.6,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-3.png","element":"img","alt":" P ⊤t c ≥ b, c ≥ 0","inline":true},{"text":". The exploration when all compo- ","element":"span"},{"text":"nents of estimated ","element":"span"},{"style":{"height":15.02},"width":23.36,"height":37.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-4.png","element":"img","alt":"ˆθ","inline":true,"padRight":true},{"text":"are accurate enough with minimal cost instructed by linear programming solutions is also related to ","element":"span"},{"style":{"height":13.1},"width":63.84,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-5.png","element":"img","alt":"LPt","inline":true,"padRight":true},{"text":"(line ","element":"span"},{"href":"#id-37","text":"13","element":"a"},{"text":"). Here ","element":"span"},{"style":{"height":16.7},"width":170.08,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-6.png","element":"img","alt":" Sopt(LPt)","inline":true,"padRight":true},{"text":"is the optimal solution set of ","element":"span"},{"style":{"height":13.1},"width":63.84,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-7.png","element":"img","alt":"LPt","inline":true},{"text":". Also ","element":"span"},{"style":{"height":18.62},"width":400.84,"height":46.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-8.png","element":"img","alt":" M ′j(t) = �i Ni(t)(Pt)ij","inline":true,"padRight":true},{"text":"is changed accordingly. ","element":"span"},{"text":"The regret of the Algorithm ","element":"span"},{"href":"#id-37","text":"2 ","element":"a"},{"text":"is upper bounded in the fol-","element":"span"}],[{"id":"id-49","text":"lowing theorem.","element":"span"}],[{"text":"Theorem 5 The regret of the Algorithm ","element":"span"},{"href":"#id-37","text":"2 ","element":"a"},{"text":"for cascade case","element":"span"}],[{"text":"satisfies for any ","element":"span"},{"style":{"height":11.6},"width":89.12,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-9.png","element":"img","alt":" ǫ > 0","inline":true},{"text":",","element":"span"}],[{"style":{"width":"94%"},"width":918,"height":531,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-10.png","element":"img"}],[{"text":"where","element":"span"}],[{"style":{"width":"90%"},"width":884,"height":114,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-11.png","element":"img"}],[{"text":"Assume ","element":"span"},{"style":{"height":16},"width":211.84,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-12.png","element":"img","alt":" β(n) = o(n)","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":28.8},"width":448,"height":72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-13.png","element":"img","alt":"�∞s=0 exp�− β(s)ǫ22K �< ∞","inline":true,"padRight":true},{"text":"for","element":"span"}],[{"text":"any ","element":"span"},{"style":{"height":11.6},"width":89.12,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-14.png","element":"img","alt":" ǫ > 0","inline":true},{"text":". Then for any ","element":"span"},{"style":{"height":10.8},"width":19,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-15.png","element":"img","alt":" θ","inline":true,"padRight":true},{"text":"such that ","element":"span"},{"style":{"height":16},"width":68.32,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-16.png","element":"img","alt":" c(θ)","inline":true,"padRight":true},{"text":"is unique,","element":"span"}],[{"style":{"width":"86%"},"width":837,"height":65,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-17.png","element":"img"}],[{"text":"holds with probability at least ","element":"span"},{"style":{"height":11.6},"width":87.64,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-18.png","element":"img","alt":" 1 − δ","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"style":{"height":12.4},"width":92.48,"height":31,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-19.png","element":"img","alt":" δ > 0","inline":true},{"text":".","element":"span"}],[{"text":"The result depends on the robustness of the linear programming problems. The ","element":"span"},{"text":"P ","element":"span"},{"text":"matrix in the LP problem (","element":"span"},{"href":"#id-36","text":"10","element":"a"},{"text":") is noisy, which is much different from one-step case and the case of deterministic graphs where the noise is only on ","element":"span"},{"style":{"height":10.8},"width":33.68,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-20.png","element":"img","alt":" θ′","inline":true},{"text":". See discussions in the next section. The full proof is put in Appendix ","element":"span"},{"text":"B","element":"span"},{"text":".","element":"span"}],[{"id":"id-24","text":"4.4 ","element":"span"},{"text":"Discussions","element":"span"}],[{"text":"The assumption on the Gaussian reward distributions can be generalized to any type of sub-Gaussian distributions with a ","element":"span"},{"text":"1","element":"span"},{"text":"-dimensional mean parameter such that the ","element":"span"},{"text":"KL","element":"span"},{"text":"-divergence between distributions of different parameters is finite and is continuous in the ","element":"span"},{"text":"1","element":"span"},{"text":"-dimensional mean parameter. Then the term ","element":"span"},{"style":{"height":17.58},"width":141.44,"height":43.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-21.png","element":"img","alt":" ∆2i (θ)/2","inline":true,"padRight":true},{"text":"in the definition of ","element":"span"},{"style":{"height":16},"width":82.24,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-22.png","element":"img","alt":" C(θ)","inline":true,"padRight":true},{"text":"(see Eq.(","element":"span"},{"href":"#id-33","text":"1","element":"a"},{"text":")) is ","element":"span"},{"text":"replaced by ","element":"span"},{"style":{"height":17.68},"width":325.6,"height":44.2,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-23.png","element":"img","alt":" KL(r(θi), r(θi1(θ)))","inline":true,"padRight":true},{"text":"accordingly where ","element":"span"},{"style":{"height":16},"width":61.12,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-24.png","element":"img","alt":" r(·)","inline":true,"padRight":true},{"text":"is the corresponding reward distribution with mean parameter. The Gaussian distribution, Bernoulli distribution and common continuous random distribution on a common bounded interval like Beta distribution all satisfy the requirements.","element":"span"}],[{"text":"The term ","element":"span"},{"style":{"height":28.8},"width":395.52,"height":72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-25.png","element":"img","alt":" O�log(T ) �Ki=1∆i(θ)p �","inline":true},{"text":"in the regret bound for one-step uniform case (same for other two cases) is due to the gap between the realizations and the expectations of the probabilistic graphs. The term ","element":"span"},{"text":"1","element":"span"},{"text":"/p ","element":"span"},{"text":"also appears in the regret ","element":"span"},{"style":{"height":19.2},"width":170.56,"height":48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-26.png","element":"img","alt":" O(�T/p)","inline":true,"padRight":true},{"text":"of [","element":"span"},{"href":"#id-11","referenceIndex":17,"text":"Koc´ak ","element":"a"},{"text":"et al., 2016","element":"span"},{"text":"] on Erd¨os-R´enyi random graphs in adversarial setting. It is not clear whether this ","element":"span"},{"text":"1","element":"span"},{"text":"/p ","element":"span"},{"text":"term represents hindsight difficulty between the probabilistic graphs and deterministic graphs. This would be an interesting future direction.","element":"span"}],[{"text":"The terms ","element":"span"},{"style":{"height":16.22},"width":240.8,"height":40.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-27.png","element":"img","alt":" {pei : i ∈ [K]}","inline":true,"padRight":true},{"text":"in the one-step general case de- ","element":"span"},{"text":"scribes the minimal exploration probabilities to observe every action. For each ","element":"span"},{"style":{"height":16.7},"width":397.96,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-28.png","element":"img","alt":" i ∈ [K], pei = maxi′ pi′j","inline":true,"padRight":true},{"text":"for some ","element":"span"},{"text":"j","element":"span"},{"text":", that is ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-29.png","element":"img","alt":"pij","inline":true,"padRight":true},{"text":"is the largest live probabilities among all incoming edges for some ","element":"span"},{"text":"j","element":"span"},{"text":". These terms represent the problem complexities for the underlying probabilistic graph. When all ","element":"span"},{"style":{"height":11.5},"width":44.2,"height":28.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-30.png","element":"img","alt":" pij","inline":true,"padRight":true},{"text":"are equal to ","element":"span"},{"style":{"height":14.78},"width":152.56,"height":36.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-31.png","element":"img","alt":" p, pei = p","inline":true},{"text":".","element":"span"}],[{"text":"The term ","element":"span"},{"style":{"height":14.78},"width":51.44,"height":36.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-32.png","element":"img","alt":" pei′","inline":true,"padRight":true},{"text":"in the cascade case is usually larger than ","element":"span"},{"style":{"height":14.79},"width":35.16,"height":36.96,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-33.png","element":"img","alt":" pei","inline":true,"padRight":true},{"text":"since it takes the same operations on the connection probabilities of incoming paths which are larger than live probabilities of incoming edges. The term ","element":"span"},{"style":{"height":15.03},"width":35.16,"height":37.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-34.png","element":"img","alt":" ˆpei","inline":true,"padRight":true},{"text":"is an estimation satisfying ","element":"span"},{"style":{"height":16.42},"width":185.6,"height":41.04,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-35.png","element":"img","alt":"ˆpei ≥ pei′ ′/2","inline":true,"padRight":true},{"text":"for some ","element":"span"},{"style":{"height":10.4},"width":27.92,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-36.png","element":"img","alt":" i′","inline":true},{"text":".","element":"span"}],[{"text":"Next we discuss the difference in proof of the cascade case. If the noise of the linear programming problems is on the ","element":"span"},{"text":"b ","element":"span"},{"text":"vector in (","element":"span"},{"href":"#id-36","text":"10","element":"a"},{"text":"), then by the standard results in statistics [","element":"span"},{"href":"#id-38","referenceIndex":12,"text":"Dontchev and Rockafellar, 2009","element":"a"},{"text":", ","element":"span"},{"style":{"height":14},"width":18,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-37.png","element":"img","alt":" §","inline":true},{"text":"3C.5], the resulting optimal solution sets are Lipschitz continuous. The property of Lipschitz continuity is essential since actions are selected according to the optimal solution of a noisy LP problem (line ","element":"span"},{"href":"#id-37","text":"13","element":"a"},{"text":") and we need to guarantee this kind of selections is safe. The noise on ","element":"span"},{"style":{"height":11.6},"width":33,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-38.png","element":"img","alt":" ∆","inline":true,"padRight":true},{"text":"vector in (","element":"span"},{"href":"#id-36","text":"10","element":"a"},{"text":") is also easy to deal with by considering the dual problem. However, it is much different if the noise is on the ","element":"span"},{"text":"P ","element":"span"},{"text":"matrix. For example, consider the LP problem that minimizes ","element":"span"},{"text":"x ","element":"span"},{"text":"over all ","element":"span"},{"style":{"height":13.2},"width":128,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-39.png","element":"img","alt":" ax ≥ 1","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":13.2},"width":107.36,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-40.png","element":"img","alt":" x ≥ 0","inline":true,"padRight":true},{"text":"with parameter ","element":"span"},{"text":"a > ","element":"span"},{"text":"0","element":"span"},{"text":". The optimal solution ","element":"span"},{"style":{"height":16},"width":155.4,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/5-41.png","element":"img","alt":" x∗ = 1/a","inline":true,"padRight":true},{"text":"is not Lipschitz continuous with respect to ","element":"span"},{"text":"a","element":"span"},{"text":". So the standard statistical tools could not apply here. We derive a novel property of the Lipschitz continuity when there is noise on ","element":"span"},{"text":"P ","element":"span"},{"text":"for our specific ","element":"span"},{"text":"P ","element":"span"},{"text":"matrix.","element":"span"}]]},{"heading":"5 Conclusion and Future Work","paragraphs":[[{"text":"We are the first to formalize the setting of stochastic online learning with probabilistic feedback graph. We derive asymptotic lower bounds for both one-step and cascade cases. The regret bounds of our designed algorithms match the lower bounds with high probability.","element":"span"}],[{"text":"This framework is new and we only provide asymptotic lower bounds and finite-time problem-dependent upper bounds. Finite-time lower bounds and minimax upper/lower bounds are all interesting future directions. Bayesian regret is also an interesting topic.","element":"span"}]]},{"heading":"References","paragraphs":[[{"id":"id-2","text":"[Alon ","element":"span"},{"text":"et al., 2015a] Noga Alon, Nicolo Cesa-Bianchi, Ofer Dekel, and Tomer Koren. Online learning with feedback graphs: Beyond bandits. In Conference on Learning Theory, pages 23–35, 2015.","element":"span"}],[{"id":"id-15","text":"[Alon ","element":"span"},{"text":"et al., 2015b] Noga Alon, Nicol`o Cesa-Bianchi, Ofer Dekel, ","element":"span"},{"text":"and Tomer Koren. ","element":"span"},{"text":"Online learning with feedback graphs: ","element":"span"},{"text":"Beyond bandits. ","element":"span"},{"text":"arXiv preprint arXiv:1502.07617, 2015.","element":"span"}],[{"id":"id-12","text":"[Alon ","element":"span"},{"text":"et al., 2017] Noga Alon, Nicolo Cesa-Bianchi, Claudio Gentile, Shie Mannor, Yishay Mansour, and Ohad Shamir. Nonstochastic multi-armed bandits with graphstructured feedback. ","element":"span"},{"text":"SIAM Journal on Computing, 46(6):1785–1826, 2017.","element":"span"}],[{"id":"id-0","text":"[Auer ","element":"span"},{"text":"et al., 2002] Peter Auer, Nicolo Cesa-Bianchi, and Paul Fischer. Finite-time analysis of the multiarmed bandit problem. Machine learning, 47(2-3):235–256, 2002.","element":"span"}],[{"id":"id-9","text":"[Bart´ok ","element":"span"},{"text":"et al., 2014] G´abor Bart´ok, Dean P Foster, D´avid P´al, Alexander Rakhlin, and Csaba Szepesv´ari. ","element":"span"},{"text":"Partial monitoring—classification, regret bounds, and algorithms. Mathematics of Operations Research, 39(4):967– 997, 2014.","element":"span"}],[{"id":"id-6","text":"[Buccapatnam ","element":"span"},{"text":"et al., 2014] Swapna ","element":"span"},{"text":"Buccapatnam, ","element":"span"},{"text":"Atilla Eryilmaz, and Ness B Shroff. ","element":"span"},{"text":"Stochastic bandits with side observations on networks. ACM SIGMETRICS Performance Evaluation Review, 42(1):289–300, 2014.","element":"span"}],[{"id":"id-5","text":"[Caron ","element":"span"},{"text":"et al., 2012] St´ephane ","element":"span"},{"text":"Caron, ","element":"span"},{"text":"Branislav ","element":"span"},{"text":"Kveton, Marc Lelarge, and Smriti Bhagat. ","element":"span"},{"text":"Leveraging side observations in stochastic bandits. In UAI, pages 142–151. AUAI Press, 2012.","element":"span"}],[{"id":"id-17","text":"[Cesa-Bianchi and Lugosi, 2006] Nicolo Cesa-Bianchi and ","element":"span"},{"text":"G´abor Lugosi. Prediction, learning, and games. Cambridge university press, 2006.","element":"span"}],[{"id":"id-20","text":"[Chen ","element":"span"},{"text":"et al., 2013] Wei Chen, Laks V. S. Lakshmanan, and Carlos Castillo. Information and Influence Propagation in Social Networks. Morgan & Claypool Publishers, 2013.","element":"span"}],[{"id":"id-22","text":"[Chen ","element":"span"},{"text":"et al., 2016] Wei Chen, Yajun Wang, Yang Yuan, and Qinshi Wang. Combinatorial multi-armed bandit and its extension to probabilistically triggered arms. The Journal of Machine Learning Research, 17(1):1746–1778, 2016.","element":"span"}],[{"id":"id-4","text":"[Cohen ","element":"span"},{"text":"et al., 2016] Alon Cohen, Tamir Hazan, and Tomer Koren. Online learning with feedback graphs without the graphs. In International Conference on Machine Learning, pages 811–819, 2016.","element":"span"}],[{"id":"id-38","text":"[Dontchev and Rockafellar, 2009] Asen L Dontchev and ","element":"span"},{"text":"R Tyrrell Rockafellar. Implicit functions and solution mappings. Springer Monogr. Math., 2009.","element":"span"}],[{"id":"id-14","text":"[Erd˝os and R´enyi, 1960] P Erd˝os and A R´enyi. On the evo- ","element":"span"},{"text":"lution of random graphs. Publications of the Mathematical Institute of the Hungarian Academy of Sciences, 5:17–61, 1960.","element":"span"}],[{"id":"id-51","text":"[Hoeffding, 1963] Wassily Hoeffding. Probability inequali- ","element":"span"},{"text":"ties for sums of bounded random variables. Journal of the American statistical association, 58(301):13–30, 1963.","element":"span"}],[{"id":"id-19","text":"[Kempe ","element":"span"},{"text":"et al., 2003] David Kempe, Jon M. Kleinberg, and ´Eva Tardos. Maximizing the spread of influence through a social network. In Proceedings of the 9th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD), pages 137–146, 2003.","element":"span"}],[{"id":"id-3","text":"[Koc´ak ","element":"span"},{"text":"et al., 2014] Tom´aˇs Koc´ak, Gergely Neu, Michal Valko, and R´emi Munos. Efficient learning by implicit exploration in bandit problems with side observations. In Advances in Neural Information Processing Systems, pages 613–621, 2014.","element":"span"}],[{"id":"id-11","text":"[Koc´ak ","element":"span"},{"text":"et al., 2016] Tom´aˇs Koc´ak, Gergely Neu, and Michal Valko. Online learning with erd˝os-r´enyi side-observation graphs. In Uncertainty in Artificial Intelligence, 2016.","element":"span"}],[{"id":"id-10","text":"[Komiyama ","element":"span"},{"text":"et al., 2015] Junpei Komiyama, Junya Honda, and Hiroshi Nakagawa. Regret lower bound and optimal algorithm in finite stochastic partial monitoring. In Advances in Neural Information Processing Systems, pages 1792–1800, 2015.","element":"span"}],[{"id":"id-26","text":"[Lattimore and Szepesvari, 2017] Tor Lattimore and Csaba ","element":"span"},{"text":"Szepesvari. The end of optimism? an asymptotic analysis of finite-armed linear bandits. In Artificial Intelligence and Statistics, pages 728–737, 2017.","element":"span"}],[{"id":"id-18","text":"[Lattimore and Szepesvari, 2018] Tor Lattimore and Csaba ","element":"span"},{"text":"Szepesvari. Cleaning up the neighborhood: A full classi-fication for adversarial partial monitoring. arXiv preprint arXiv:1805.09247, 2018.","element":"span"}],[{"id":"id-13","text":"[Liu ","element":"span"},{"text":"et al., 2018] Fang Liu, Swapna Buccapatnam, and Ness Shroff. Information directed sampling for stochastic bandits with graph feedback. In AAAI, 2018.","element":"span"}],[{"id":"id-1","text":"[Mannor and Shamir, 2011] Shie Mannor and Ohad Shamir. ","element":"span"},{"text":"From bandits to experts: ","element":"span"},{"text":"On the value of sideobservations. In Advances in Neural Information Processing Systems, pages 684–692, 2011.","element":"span"}],[{"id":"id-16","text":"[Rustichini, 1999] Aldo Rustichini. Minimizing regret: The ","element":"span"},{"text":"general case. ","element":"span"},{"text":"Games and Economic Behavior, 29(1-2):224–243, 1999.","element":"span"}],[{"id":"id-7","text":"[Tossou ","element":"span"},{"text":"et al., 2017] Aristide CY Tossou, Christos Dimitrakakis, and Devdatt Dubhashi. Thompson sampling for stochastic bandits with graph feedback. ","element":"span"},{"text":"In Thirty-First AAAI Conference on Artificial Intelligence, 2017.","element":"span"}],[{"id":"id-27","text":"[Valiant, 1979] Leslie G. Valiant. The complexity of enu- ","element":"span"},{"text":"meration and reliability problems. SIAM Journal on Computing, 8(3):410–421, 1979.","element":"span"}],[{"id":"id-21","text":"[Vaswani ","element":"span"},{"text":"et al., 2015] Sharan Vaswani, Laks Lakshmanan, Mark Schmidt, et al. Influence maximization with bandits. arXiv preprint arXiv:1503.00024, 2015.","element":"span"}],[{"id":"id-25","text":"[Wang and Chen, 2017] Qinshi Wang and Wei Chen. ","element":"span"},{"text":"Improving regret bounds for combinatorial semi-bandits with probabilistically triggered arms and its applications. ","element":"span"},{"text":"In NeurIPS, pages 1161–1171, 2017.","element":"span"}],[{"id":"id-28","text":"[Wang ","element":"span"},{"text":"et al., 2012] Chi Wang, Wei Chen, and Yajun Wang. Scalable influence maximization for independent cascade model in large-scale social networks. DMKD, 2012.","element":"span"}],[{"id":"id-23","text":"[Wen ","element":"span"},{"text":"et al., 2017] Zheng Wen, Branislav Kveton, Michal Valko, and Sharan Vaswani. Online influence maximization under independent cascade model with semi-bandit feedback. In NeurIPS, pages 3025–3035, 2017.","element":"span"}],[{"id":"id-8","text":"[Wu ","element":"span"},{"text":"et al., 2015] Yifan Wu, Andr´as Gy¨orgy, and Csaba Szepesv´ari. Online learning with gaussian payoffs and side observations. In NeurIPS, pages 1360–1368, 2015.","element":"span"}]]},{"heading":"A Proofs of the Upper Bounds in One-Step Case","paragraphs":[[{"style":{"height":19.2},"width":265.24,"height":48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-0.png","element":"img","alt":"At =�Mj(t) <","inline":true,"padRight":true},{"text":"10 log","element":"span"},{"style":{"height":19.86},"width":123.32,"height":49.64,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-1.png","element":"img","alt":"�Kt2�,","inline":true,"padRight":true},{"text":"for some ","element":"span"},{"style":{"height":19.2},"width":149.72,"height":48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-2.png","element":"img","alt":" j ∈ [K]�","inline":true}],[{"style":{"height":16.71},"width":210.4,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-3.png","element":"img","alt":"Bt = {mj(t)","inline":true,"padRight":true},{"text":"< M","element":"span"},{"style":{"height":16.71},"width":101.6,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-4.png","element":"img","alt":"j(t)/2","inline":true},{"text":", ","element":"span"},{"text":"for some ","element":"span"},{"text":"j ","element":"span"},{"style":{"height":16},"width":116.48,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-5.png","element":"img","alt":" ∈ [K]}","inline":true}],[{"style":{"width":"36%"},"width":745,"height":236,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-6.png","element":"img"}],[{"style":{"height":16.7},"width":332.8,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-7.png","element":"img","alt":"Et = {Mj(t) < 2 β(","inline":true},{"text":"N ","element":"span"},{"style":{"height":16},"width":78.4,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-8.png","element":"img","alt":"e(t))","inline":true,"padRight":true},{"text":"/ K, ","element":"span"},{"text":"for some ","element":"span"},{"text":"j ","element":"span"},{"style":{"height":16},"width":116.48,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-9.png","element":"img","alt":" ∈ [K]}","inline":true}],[{"style":{"height":29.78},"width":325.24,"height":74.44,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-10.png","element":"img","alt":"Ft =��ˆθt,i − θi�� ≤","inline":true,"padRight":true},{"text":"ǫ, ","element":"span"},{"text":"for any ","element":"span"},{"text":"i ","element":"span"},{"id":"id-43","style":{"height":28.8},"width":123.48,"height":72,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-11.png","element":"img","alt":" ∈ [K]�","inline":true}],[{"text":"Bound the regret under ","element":"span"},{"text":"B ","element":"span"},{"text":"Note","element":"span"}],[{"text":"Then it remains to bound ","element":"span"},{"style":{"height":21.01},"width":525.32,"height":52.52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-12.png","element":"img","alt":"�Tt=1 E�∆it(θ)1�Bct−1, Cct−1��","inline":true},{"text":". Bound the regret under ","element":"span"},{"text":"D ","element":"span"},{"text":"Suppose ","element":"span"},{"style":{"height":13.11},"width":83.68,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-13.png","element":"img","alt":" Dt−1","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":14.83},"width":174.88,"height":37.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-14.png","element":"img","alt":" Bct−1, Cct−1","inline":true,"padRight":true},{"text":"hold. Then ","element":"span"},{"style":{"height":24.56},"width":732.64,"height":61.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-15.png","element":"img","alt":" �i∈V in(j) Ni(t − 1)p ≥ 32∆j(ˆθt−1)2 log(t − 1)","inline":true,"padRight":true},{"text":"for any","element":"span"}],[{"style":{"height":19.02},"width":208.48,"height":47.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-16.png","element":"img","alt":"j ̸= i1(ˆθt−1)","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":27.76},"width":830.56,"height":69.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-17.png","element":"img","alt":"�i∈V in(j) Ni(t − 1)p ≥ 32∆i2(ˆθt−1)(ˆθt−1)2 log(t − 1)","inline":true,"padRight":true},{"text":"for ","element":"span"},{"style":{"height":19.02},"width":208.48,"height":47.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-18.png","element":"img","alt":" j = i1(ˆθt−1)","inline":true},{"text":". Or equivalently","element":"span"}],[{"style":{"height":16.71},"width":125.56,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-19.png","element":"img","alt":"Mj(t −","inline":true,"padRight":true},{"text":"1) ","element":"span"},{"style":{"height":12.8},"width":31,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-20.png","element":"img","alt":" ≥","inline":true,"padRight":true},{"text":"32 ∆","element":"span"},{"style":{"height":9.6},"width":13,"height":24,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-21.png","element":"img","alt":"j","inline":true},{"text":"(","element":"span"},{"text":"ˆ","element":"span"},{"style":{"height":16.21},"width":105.28,"height":40.52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-22.png","element":"img","alt":"θt−1)2","inline":true,"padRight":true},{"text":"log(","element":"span"},{"style":{"height":10},"width":54.52,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-23.png","element":"img","alt":"t −","inline":true,"padRight":true},{"text":"1) ","element":"span"},{"text":"for ","element":"span"},{"style":{"height":15.2},"width":101.92,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-24.png","element":"img","alt":" j ̸= i1","inline":true},{"text":"(","element":"span"},{"id":"id-40","text":"ˆ","element":"span"},{"style":{"height":16},"width":106.52,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-25.png","element":"img","alt":"θt−1) ,","inline":true}],[{"style":{"height":16.7},"width":125.56,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-26.png","element":"img","alt":"Mj(t −","inline":true,"padRight":true},{"text":"1) ","element":"span"},{"style":{"height":12.8},"width":31,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-27.png","element":"img","alt":" ≥","inline":true,"padRight":true},{"text":"32 ∆","element":"span"},{"style":{"height":25.5},"width":204.64,"height":63.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-28.png","element":"img","alt":"2i2(ˆθt−1)(ˆθt−1","inline":true},{"text":")","element":"span"},{"text":"log(","element":"span"},{"style":{"height":10},"width":54.04,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-29.png","element":"img","alt":"t −","inline":true,"padRight":true},{"text":"1) ","element":"span"},{"text":"for ","element":"span"},{"style":{"height":13.6},"width":101.92,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-30.png","element":"img","alt":" j = i1","inline":true},{"text":"(","element":"span"},{"text":"ˆ","element":"span"},{"style":{"height":16},"width":106.52,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-31.png","element":"img","alt":"θt−1) .","inline":true}],[{"text":"On ","element":"span"},{"style":{"height":14.64},"width":79.36,"height":36.6,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-32.png","element":"img","alt":" Bct−1","inline":true},{"text":",","element":"span"}],[{"style":{"height":16.71},"width":121.72,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-33.png","element":"img","alt":"mj(t −","inline":true,"padRight":true},{"text":"1) ","element":"span"},{"style":{"height":12.8},"width":31,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-34.png","element":"img","alt":" ≥","inline":true,"padRight":true},{"text":"16 ∆","element":"span"},{"style":{"height":9.6},"width":13,"height":24,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-35.png","element":"img","alt":"j","inline":true},{"text":"(","element":"span"},{"text":"ˆ","element":"span"},{"style":{"height":16.21},"width":104.8,"height":40.52,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-36.png","element":"img","alt":"θt−1)2","inline":true,"padRight":true},{"text":"log(","element":"span"},{"style":{"height":10},"width":54.04,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-37.png","element":"img","alt":"t −","inline":true,"padRight":true},{"text":"1) ","element":"span"},{"text":"for ","element":"span"},{"style":{"height":15.2},"width":101.92,"height":38,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-38.png","element":"img","alt":" j ̸= i1","inline":true},{"text":"(","element":"span"},{"text":"ˆ","element":"span"},{"style":{"height":16},"width":106.52,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-39.png","element":"img","alt":"θt−1) ,","inline":true}],[{"style":{"height":16.7},"width":121.72,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-40.png","element":"img","alt":"mj(t −","inline":true,"padRight":true},{"text":"1) ","element":"span"},{"style":{"height":12.8},"width":31,"height":32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-41.png","element":"img","alt":" ≥","inline":true,"padRight":true},{"text":"16 ∆","element":"span"},{"style":{"height":25.7},"width":204.64,"height":64.24,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-42.png","element":"img","alt":"2i2(ˆθt−1)(ˆθt−1","inline":true},{"text":")","element":"span"},{"text":"log(","element":"span"},{"style":{"height":10},"width":54.04,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-43.png","element":"img","alt":"t −","inline":true,"padRight":true},{"text":"1) ","element":"span"},{"text":"for ","element":"span"},{"style":{"height":13.6},"width":101.44,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-44.png","element":"img","alt":" j = i1","inline":true},{"text":"(","element":"span"},{"text":"ˆ","element":"span"},{"style":{"height":16},"width":107,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-45.png","element":"img","alt":"θt−1) .","inline":true}],[{"text":"Then","element":"span"}],[{"id":"id-44","style":{"width":"30%"},"width":297,"height":117,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/7-46.png","element":"img"}],[{"text":"since","element":"span"}],[{"style":{"width":"75%"},"width":1523,"height":163,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-0.png","element":"img"}],[{"text":"Thus it remains to bound ","element":"span"},{"style":{"height":21.2},"width":629,"height":53,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-1.png","element":"img","alt":"�Tt=1 E�∆it(θ)1�Bct−1, Cct−1, Dct−1��","inline":true},{"text":". Bound the regret under ","element":"span"},{"style":{"height":14},"width":164.24,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-2.png","element":"img","alt":" Bc, Cc, Dc","inline":true,"padRight":true},{"text":"Similar to [","element":"span"},{"text":"Wu et al.","element":"span"},{"href":"#id-8","referenceIndex":30,"text":", 2015","element":"a"},{"text":", Proposition 17] where the statement ","element":"span"},{"style":{"height":19.5},"width":287.8,"height":48.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-3.png","element":"img","alt":"�i∈V in(j) Nij ≥","inline":true},{"style":{"height":16},"width":130.08,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-4.png","element":"img","alt":"β(s)/K","inline":true,"padRight":true},{"text":"is replaced by ","element":"span"},{"style":{"height":16.7},"width":284.16,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-5.png","element":"img","alt":" Mj(t) ≥ β(s)/K","inline":true},{"text":",","element":"span"}],[{"id":"id-41","style":{"width":"50%"},"width":1024,"height":122,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-6.png","element":"img"}],[{"text":"Then","element":"span"}],[{"id":"id-39","style":{"width":"94%"},"width":918,"height":264,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-7.png","element":"img"}],[{"text":"Next by [","element":"span"},{"text":"Wu et al.","element":"span"},{"href":"#id-8","referenceIndex":30,"text":", 2015","element":"a"},{"text":", Lemma 19],","element":"span"}],[{"id":"id-42","style":{"width":"65%"},"width":641,"height":396,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-8.png","element":"img"}],[{"text":"Thus by (","element":"span"},{"href":"#id-39","text":"16","element":"a"},{"text":"), (","element":"span"},{"href":"#id-40","text":"14","element":"a"},{"text":"), (","element":"span"},{"href":"#id-41","text":"17","element":"a"},{"text":") and (","element":"span"},{"href":"#id-42","text":"18","element":"a"},{"text":"),","element":"span"}],[{"text":"Putting (","element":"span"},{"href":"#id-43","text":"13","element":"a"},{"text":"), (","element":"span"},{"href":"#id-40","text":"14","element":"a"},{"text":"), (","element":"span"},{"href":"#id-44","text":"15","element":"a"},{"text":"), (","element":"span"},{"href":"#id-41","text":"17","element":"a"},{"text":"), (","element":"span"},{"href":"#id-42","text":"19","element":"a"},{"text":"), (","element":"span"},{"href":"#id-45","text":"20","element":"a"},{"text":") together, the regret satisfies","element":"span"}],[{"id":"id-45","style":{"width":"75%"},"width":1520,"height":260,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-9.png","element":"img"}],[{"text":"Next prove the asymptotic behavior of the regret upper bound. Claim:","element":"span"},{"style":{"height":16.71},"width":202.72,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-10.png","element":"img","alt":"Mj(t) → ∞","inline":true,"padRight":true},{"text":"as ","element":"span"},{"style":{"height":10.4},"width":116.32,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-11.png","element":"img","alt":" t → ∞","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"style":{"height":16},"width":126.2,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-12.png","element":"img","alt":" j ∈ [K]","inline":true},{"text":".","element":"span"}],[{"text":"Suppose not. There exists ","element":"span"},{"style":{"height":16},"width":135.8,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-13.png","element":"img","alt":" j ∈ [K]","inline":true,"padRight":true},{"text":"such that ","element":"span"},{"style":{"height":16.7},"width":100.96,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-14.png","element":"img","alt":" Mj(t)","inline":true},{"text":", or ","element":"span"},{"style":{"height":16},"width":91.36,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-15.png","element":"img","alt":" Ni(t)","inline":true,"padRight":true},{"text":"for all ","element":"span"},{"style":{"height":17.36},"width":183.04,"height":43.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-16.png","element":"img","alt":" i ∈ V in(j)","inline":true},{"text":", stops increasing when ","element":"span"},{"style":{"height":13.2},"width":116.8,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-17.png","element":"img","alt":" t ≥ T1","inline":true,"padRight":true},{"text":"for some ","element":"span"},{"style":{"height":13.1},"width":114.08,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-18.png","element":"img","alt":"T1 > 0","inline":true},{"text":". Then the condition on line ","element":"span"},{"href":"#id-32","text":"6 ","element":"a"},{"text":"is not satisfied when ","element":"span"},{"style":{"height":13.2},"width":201.28,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-19.png","element":"img","alt":" t ≥ T2 ≥ T1","inline":true,"padRight":true},{"text":"for some ","element":"span"},{"style":{"height":13.1},"width":39.52,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-20.png","element":"img","alt":" T2","inline":true,"padRight":true},{"text":"¿ 0. By the condition on line ","element":"span"},{"href":"#id-32","text":"9","element":"a"},{"text":", ","element":"span"},{"style":{"height":16},"width":99.52,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/8-21.png","element":"img","alt":" N e(t)","inline":true,"padRight":true},{"text":"also stops increasing and the condition on line ","element":"span"},{"href":"#id-32","text":"9 ","element":"a"},{"text":"for any ","element":"span"},{"style":{"height":13.6},"width":32.72,"height":34,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-0.png","element":"img","alt":" j′","inline":true,"padRight":true},{"text":"is not satisfied any more when ","element":"span"},{"style":{"height":13.2},"width":204.16,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-1.png","element":"img","alt":" t ≥ T3 ≥ T2","inline":true,"padRight":true},{"text":"for some ","element":"span"},{"style":{"height":13.1},"width":116,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-2.png","element":"img","alt":" T3 > 0","inline":true},{"text":". Also line ","element":"span"},{"href":"#id-32","text":"14 ","element":"a"},{"text":"will not be performed since ","element":"span"},{"style":{"height":16},"width":99.04,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-3.png","element":"img","alt":" N e(t)","inline":true,"padRight":true},{"text":"stops increasing. Therefore the condition on line ","element":"span"},{"href":"#id-32","text":"3 ","element":"a"},{"text":"always holds, which is impossible.","element":"span"}],[{"text":"For any ","element":"span"},{"style":{"height":16},"width":163.36,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-4.png","element":"img","alt":" δ ∈ (0, 1)","inline":true},{"text":", the probability that the condition on line ","element":"span"},{"href":"#id-32","text":"3 ","element":"a"},{"text":"does not hold when ","element":"span"},{"style":{"height":19.31},"width":297.28,"height":48.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-5.png","element":"img","alt":" Mj(t) > 10 log Kδ","inline":true,"padRight":true},{"text":"is at least ","element":"span"},{"style":{"height":16},"width":146.4,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-6.png","element":"img","alt":" 1 − δ/K","inline":true},{"text":". ","element":"span"},{"text":"There exists ","element":"span"},{"style":{"height":13.11},"width":121.76,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-7.png","element":"img","alt":" T4 > 0","inline":true,"padRight":true},{"text":"such that when ","element":"span"},{"style":{"height":19.5},"width":436.48,"height":48.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-8.png","element":"img","alt":" t ≥ T4, Mj(t) > 10 log Kδ","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"text":"j ","element":"span"},{"text":"since ","element":"span"},{"style":{"height":16.71},"width":209.92,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-9.png","element":"img","alt":" Mj(t) → ∞","inline":true},{"text":". Then with probability at least ","element":"span"},{"style":{"height":11.6},"width":85.72,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-10.png","element":"img","alt":"1 − δ","inline":true},{"text":", line ","element":"span"},{"href":"#id-32","text":"4","element":"a"},{"text":"-","element":"span"},{"href":"#id-32","text":"5 ","element":"a"},{"text":"are not called any more. The events ","element":"span"},{"style":{"height":13.9},"width":43.68,"height":34.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-11.png","element":"img","alt":" At","inline":true,"padRight":true},{"text":"is modified by ","element":"span"},{"style":{"height":19.31},"width":413.44,"height":48.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-12.png","element":"img","alt":" A′t =�Mj(t) < 10 log Kδ","inline":true,"padRight":true},{"text":"for some ","element":"span"},{"style":{"height":19.2},"width":149.72,"height":48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-13.png","element":"img","alt":" j ∈ [K]�","inline":true},{"text":"and (","element":"span"},{"href":"#id-43","text":"13","element":"a"},{"text":") is","element":"span"}],[{"text":"replaced by ","element":"span"},{"style":{"height":29.2},"width":1174.24,"height":73,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-14.png","element":"img","alt":"�Tt=1 E�∆it(θ)1�Nit(t − 1) < 10p log Kδ��≤ �Ki=110∆i(θ)p log(K/δ)","inline":true},{"text":". All other parts stay the same. Then the regret satisfies","element":"span"}],[{"style":{"width":"74%"},"width":1510,"height":260,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-15.png","element":"img"}],[{"text":"For any ","element":"span"},{"style":{"height":14.4},"width":102.56,"height":36,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-16.png","element":"img","alt":" η > 0","inline":true},{"text":", there exists an ","element":"span"},{"style":{"height":16},"width":226.4,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-17.png","element":"img","alt":" ǫ = ǫ(θ) > 0","inline":true,"padRight":true},{"text":"such that the distance between the optimal solution set of ","element":"span"},{"style":{"height":16},"width":68.32,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-18.png","element":"img","alt":" c(θ)","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":16},"width":79.37,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-19.png","element":"img","alt":" c(θ′)","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"style":{"height":10.8},"width":33.68,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-20.png","element":"img","alt":" θ′","inline":true,"padRight":true},{"text":"such that ","element":"span"},{"style":{"height":16.03},"width":216.8,"height":40.08,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-21.png","element":"img","alt":" |θ′i − θi| ≤ ǫ","inline":true,"padRight":true},{"text":"for all ","element":"span"},{"style":{"height":16},"width":130.04,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-22.png","element":"img","alt":" i ∈ [K]","inline":true,"padRight":true},{"text":"is at most ","element":"span"},{"style":{"height":10.4},"width":20,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-23.png","element":"img","alt":" η","inline":true},{"text":". Here the distance is Pompeiu-Hausdorff distance of sets. This is ","element":"span"},{"text":"because the Lipschitz continuity of the optimal set mapping (see [","element":"span"},{"href":"#id-38","referenceIndex":12,"text":"Dontchev and Rockafellar, 2009","element":"a"},{"text":", ","element":"span"},{"style":{"height":14},"width":18,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-24.png","element":"img","alt":" §","inline":true},{"text":"3C.5]) and the duality of linear programming problems. Since ","element":"span"},{"style":{"height":16},"width":68.32,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-25.png","element":"img","alt":" c(θ)","inline":true,"padRight":true},{"text":"is unique, ","element":"span"},{"style":{"height":16},"width":115.84,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-26.png","element":"img","alt":" ci(θ, ǫ)","inline":true,"padRight":true},{"text":"is upper bounded by ","element":"span"},{"style":{"height":16},"width":148.64,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-27.png","element":"img","alt":" ci(θ) + η","inline":true},{"text":". Then divide ","element":"span"},{"style":{"height":16},"width":108.64,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-28.png","element":"img","alt":" Rθ(T )","inline":true,"padRight":true},{"text":"by ","element":"span"},{"text":"log(","element":"span"},{"text":"T ","element":"span"},{"text":") ","element":"span"},{"text":"and let ","element":"span"},{"text":"T ","element":"span"},{"text":"go to ","element":"span"},{"style":{"height":7.2},"width":40,"height":18,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-29.png","element":"img","alt":" ∞","inline":true},{"text":",","element":"span"}],[{"style":{"width":"70%"},"width":1424,"height":117,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-30.png","element":"img"}],[{"text":"For instance, ","element":"span"},{"style":{"height":10.4},"width":20,"height":26,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-31.png","element":"img","alt":" η","inline":true,"padRight":true},{"text":"can be chosen as ","element":"span"},{"style":{"height":33.33},"width":919.6,"height":83.32,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-32.png","element":"img","alt":" ⟨c(θ), ∆(θ)⟩/ �Ki=1 ∆i(θ).□","inline":true}],[{"text":"The proof of Theorem ","element":"span"},{"href":"#id-46","text":"4 ","element":"a"},{"text":"is similar to Theorem ","element":"span"},{"href":"#id-47","text":"3 ","element":"a"},{"text":"by modifying (","element":"span"},{"href":"#id-43","text":"13","element":"a"},{"text":") with","element":"span"}],[{"style":{"width":"90%"},"width":1816,"height":382,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-33.png","element":"img"}]]},{"heading":"B Proof of Theorem 5","paragraphs":[[{"text":"First we prove a useful lemma on the robustness of linear programming problem where the coefficient matrix is of a specific ","element":"span"},{"id":"id-50","text":"form.","element":"span"}],[{"text":"Lemma 6 Denote the linear programming problem of the form","element":"span"}],[{"text":"by ","element":"span"},{"text":"LP(","element":"span"},{"text":"A, b, c","element":"span"},{"text":") ","element":"span"},{"text":"where ","element":"span"},{"style":{"height":14.96},"width":448.16,"height":37.4,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-34.png","element":"img","alt":" A ∈ Rn×n, b ∈ Rn, c ∈ Rn","inline":true,"padRight":true},{"text":"and all entries in ","element":"span"},{"text":"A, b, c ","element":"span"},{"text":"are non-negative. Let the feasible set mapping, the optimal value mapping and the optimal set mapping be","element":"span"}],[{"style":{"width":"45%"},"width":910,"height":172,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-35.png","element":"img"}],[{"text":"respectively. Note that ","element":"span"},{"style":{"height":16},"width":197.44,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-36.png","element":"img","alt":" Sval(A, b, c)","inline":true,"padRight":true},{"text":"is always finite with the ","element":"span"},{"text":"A, b, c ","element":"span"},{"text":"of positive (or even non-negative) entries. Fix a pair ","element":"span"},{"text":"(","element":"span"},{"text":"i, j","element":"span"},{"text":")","element":"span"},{"text":", assume ","element":"span"},{"text":"A","element":"span"},{"text":"(","element":"span"},{"text":"i, j","element":"span"},{"text":") ","element":"span"},{"text":"> ","element":"span"},{"text":"0","element":"span"},{"text":". Let ","element":"span"},{"style":{"height":11.6},"width":124.08,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-37.png","element":"img","alt":" A′ = A","inline":true,"padRight":true},{"text":"except ","element":"span"},{"style":{"height":16},"width":355.04,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-38.png","element":"img","alt":" A′(i, j) = A(i, j) + ǫ","inline":true},{"text":". Then","element":"span"}],[{"style":{"width":"62%"},"width":1256,"height":45,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-39.png","element":"img"}],[{"text":"for some ","element":"span"},{"style":{"height":6.8},"width":26,"height":17,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/9-40.png","element":"img","alt":" α","inline":true,"padRight":true},{"text":"depending on ","element":"span"},{"text":"A, b, c ","element":"span"},{"text":"and ","element":"span"},{"text":"i, j","element":"span"},{"text":".","element":"span"}],[{"text":"Proof. By [","element":"span"},{"href":"#id-38","referenceIndex":12,"text":"Dontchev and Rockafellar, 2009","element":"a"},{"text":", Theorem 3C.3], the mapping","element":"span"}],[{"id":"id-48","style":{"width":"65%"},"width":1313,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-0.png","element":"img"}],[{"text":"is Lipschitz continuous. Recall that the distance on sets is Pompeiu-Hausdorff distance ","element":"span"},{"style":{"height":13.1},"width":47.64,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-1.png","element":"img","alt":" dH","inline":true},{"text":". First assume ","element":"span"},{"style":{"height":11.6},"width":89.6,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-2.png","element":"img","alt":" ǫ > 0","inline":true},{"text":". Then","element":"span"}],[{"style":{"width":"36%"},"width":733,"height":43,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-3.png","element":"img"}],[{"text":"If ","element":"span"},{"style":{"height":16.7},"width":337.6,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-4.png","element":"img","alt":" Sopt(A′) ⊂ Sfeas(A)","inline":true},{"text":", then ","element":"span"},{"style":{"height":16},"width":319.36,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-5.png","element":"img","alt":" Sval(A) = Sval(A′)","inline":true},{"text":". Suppose not and let ","element":"span"},{"style":{"height":16.7},"width":404.8,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-6.png","element":"img","alt":" x′ ∈ Sopt(A′) \\ Sfeas(A)","inline":true},{"text":". Then","element":"span"}],[{"style":{"width":"73%"},"width":1489,"height":316,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-7.png","element":"img"}],[{"text":"for some ","element":"span"},{"style":{"height":9.1},"width":41.44,"height":22.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-8.png","element":"img","alt":" α1","inline":true,"padRight":true},{"text":"depending on ","element":"span"},{"text":"A, b, c ","element":"span"},{"text":"and ","element":"span"},{"text":"i, j","element":"span"},{"text":". Thus","element":"span"}],[{"text":"for some ","element":"span"},{"style":{"height":10},"width":102.4,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-9.png","element":"img","alt":" α2, α3","inline":true,"padRight":true},{"text":"depending on ","element":"span"},{"text":"A, b, c","element":"span"},{"text":", where ","element":"span"},{"style":{"height":13.1},"width":46.24,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-10.png","element":"img","alt":" B1","inline":true,"padRight":true},{"text":"is the unit ball in ","element":"span"},{"style":{"height":10.8},"width":48.8,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-11.png","element":"img","alt":" Rn","inline":true,"padRight":true},{"text":"and the second inequality is due to the Lipshitz continuity of ","element":"span"},{"text":"G ","element":"span"},{"text":"in (","element":"span"},{"href":"#id-48","text":"25","element":"a"},{"text":"). Also","element":"span"}],[{"style":{"width":"77%"},"width":1558,"height":66,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-12.png","element":"img"}],[{"text":"where (*) is by by the Lipshitz continuity of ","element":"span"},{"style":{"height":16},"width":639.2,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-13.png","element":"img","alt":" G′ : t �→ {x ∈ Sfeas(A′, b) | ⟨c, x⟩ ≤ t}","inline":true},{"text":". The case of ","element":"span"},{"style":{"height":11.6},"width":89.6,"height":29,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-14.png","element":"img","alt":" ǫ < 0","inline":true,"padRight":true},{"text":"follows similarly. ","element":"span"},{"style":{"height":0},"width":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-15.png","element":"img","alt":" □","inline":true}],[{"text":"Proof. [of Theorem ","element":"span"},{"href":"#id-49","text":"5","element":"a"},{"text":"] The finite-time regret is similar to the previous proof. The main difference is on the bound for line ","element":"span"},{"href":"#id-37","text":"13","element":"a"},{"text":". In particular, the results of (","element":"span"},{"href":"#id-42","text":"18","element":"a"},{"text":") and (","element":"span"},{"href":"#id-42","text":"19","element":"a"},{"text":") are changed to be","element":"span"}],[{"style":{"width":"76%"},"width":1537,"height":257,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-16.png","element":"img"}],[{"text":"since ","element":"span"},{"style":{"height":16},"width":199.84,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-17.png","element":"img","alt":" ci(θ, ǫ, η(t))","inline":true,"padRight":true},{"text":"can bound ","element":"span"},{"style":{"height":16.7},"width":170.08,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-18.png","element":"img","alt":" Sopt(LPt)","inline":true},{"text":". The proof of other parts follow the proof of Theorem ","element":"span"},{"href":"#id-46","text":"4 ","element":"a"},{"text":"similarly.","element":"span"}],[{"text":"By the non-increasing property of ","element":"span"},{"style":{"height":16},"width":66.88,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-19.png","element":"img","alt":" η(t)","inline":true,"padRight":true},{"text":"whose limit is ","element":"span"},{"style":{"height":16},"width":108.16,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-20.png","element":"img","alt":" 0, η(t)","inline":true,"padRight":true},{"text":"would be smaller than ","element":"span"},{"style":{"height":18.74},"width":237.16,"height":46.84,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-21.png","element":"img","alt":" minij:p′ij>0 p′ij","inline":true,"padRight":true},{"text":"when ","element":"span"},{"style":{"height":13.2},"width":112,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-22.png","element":"img","alt":" t ≥ T1","inline":true,"padRight":true},{"text":"for some ","element":"span"},{"style":{"height":13.1},"width":130.88,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-23.png","element":"img","alt":"T1 > 0","inline":true},{"text":". Then ","element":"span"},{"style":{"height":13.1},"width":37.44,"height":32.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-24.png","element":"img","alt":" Pt","inline":true,"padRight":true},{"text":"only has small noise on the nonzero entries of ","element":"span"},{"style":{"height":18.63},"width":214.6,"height":46.56,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-25.png","element":"img","alt":" P ′ = (p′ij)ij","inline":true},{"text":". By Lemma ","element":"span"},{"href":"#id-50","text":"6","element":"a"},{"text":", ","element":"span"},{"style":{"height":16.7},"width":219.04,"height":41.76,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-26.png","element":"img","alt":" Sopt(θ′, η(t))","inline":true,"padRight":true},{"text":"is Lipschitz ","element":"span"},{"text":"continuous in ","element":"span"},{"style":{"height":16},"width":66.88,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-27.png","element":"img","alt":" η(t)","inline":true,"padRight":true},{"text":"for any ","element":"span"},{"style":{"height":10.8},"width":33.68,"height":27,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-28.png","element":"img","alt":" θ′","inline":true},{"text":". Thus","element":"span"}],[{"style":{"width":"23%"},"width":464,"height":55,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-29.png","element":"img"}],[{"text":"The remaining discussion on ","element":"span"},{"style":{"height":0},"width":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-30.png","element":"img","alt":" ǫ","inline":true,"padRight":true},{"text":"is similar. ","element":"span"},{"style":{"height":0},"width":20,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-31.png","element":"img","alt":" □","inline":true}]]},{"heading":"C Technical Lemmas","paragraphs":[[{"text":"Lemma 7 (Hoeffding’s Inequality [","element":"span"},{"href":"#id-51","referenceIndex":14,"text":"Hoeffding, 1963","element":"a"},{"text":"]) Let ","element":"span"},{"style":{"height":14},"width":192.8,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-32.png","element":"img","alt":" X1, . . . , Xn","inline":true,"padRight":true},{"text":"be independent random variable with common support ","element":"span"},{"text":"[0","element":"span"},{"text":", ","element":"span"},{"text":"1]","element":"span"},{"text":". Let ","element":"span"},{"style":{"height":19.31},"width":271.64,"height":48.28,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-33.png","element":"img","alt":"¯X = 1n�ni=1 Xi","inline":true,"padRight":true},{"text":"and ","element":"span"},{"style":{"height":19.2},"width":179.52,"height":48,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-34.png","element":"img","alt":" E� ¯X�= µ","inline":true},{"text":". Then for all ","element":"span"},{"style":{"height":13.2},"width":93.92,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-35.png","element":"img","alt":" a ≥ 0","inline":true},{"text":", ","element":"span"},{"style":{"height":19.66},"width":1111.64,"height":49.16,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-36.png","element":"img","alt":"P� ¯X − µ ≥ a�≤ exp(−2na2), P� ¯X − µ ≤ −a�≤ exp(−2na2).","inline":true}],[{"id":"id-53","text":"Lemma 8 (Bernstein’s Inequality) ","element":"span"},{"text":"Let ","element":"span"},{"style":{"height":14},"width":192.8,"height":35,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-37.png","element":"img","alt":" X1, . . . , Xn","inline":true,"padRight":true},{"text":"be independent zero-mean random variables. Suppose that ","element":"span"},{"style":{"height":16},"width":174.96,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-38.png","element":"img","alt":" |Xi| ≤ M","inline":true,"padRight":true},{"text":"almost surely for all ","element":"span"},{"text":"i","element":"span"},{"text":". Then for all ","element":"span"},{"style":{"height":13.2},"width":93.92,"height":33,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-39.png","element":"img","alt":" a ≥ 0","inline":true},{"text":",","element":"span"}],[{"style":{"width":"43%"},"width":868,"height":121,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/10-40.png","element":"img"}],[{"text":"Lemma 9 Let ","element":"span"},{"style":{"height":10},"width":222.24,"height":25,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/11-0.png","element":"img","alt":" x1, x2, . . . , xt","inline":true,"padRight":true},{"text":"be independent Bernoulli random variables with mean ","element":"span"},{"style":{"height":16},"width":354.4,"height":40,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/11-1.png","element":"img","alt":" p1, p2, . . . , pt ∈ (0, 1)","inline":true,"padRight":true},{"text":"respectively.","element":"span"}],[{"style":{"width":"60%"},"width":1228,"height":205,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/11-2.png","element":"img"}],[{"id":"id-52","text":"Proof.","element":"span"}],[{"text":"where (","element":"span"},{"href":"#id-52","text":"26","element":"a"},{"text":") is by Bernstein’s inequality (Lemma ","element":"span"},{"href":"#id-53","text":"8","element":"a"},{"text":") and (","element":"span"},{"href":"#id-54","text":"27","element":"a"},{"id":"id-54","text":") holds when","element":"span"}],[{"style":{"width":"58%"},"width":1189,"height":177,"src":"https://cdn.bytez.com/mobilePapers/v2/arxiv/1903.01083/images/11-3.png","element":"img"}]]}],"_version":"3.3.2"},"paperNode":"$1b:props:children:props:children:0:props:product"}]]]}]}]