There was a subtle but small error in the proofs published in [1]. We have corrected the error in a new appendix G added to the arXiv version of the paper, available at https://arxiv.org/abs/1307.6134v4. These corrections also apply to other papers which built off of the results in [1], including [2], and [3].
The error arose from our application of concentration inequalities, sometimes known as tail bounds. In the originally-published proofs, we condition on the number of times that the algorithm has selected arm
up to time
. Since the arm selection policy depends on the rewards accrued,
and the rewards are dependent random variables. In the correction, we build upon an alternative concentration inequality that accounts for this dependence and show that proofs of all the performance bounds follow a similar pattern with slight modification to the decision heuristic.
![[pdf]](http://www.paulreverdy.com/wp-content/plugins/papercite/img/pdf.png)
[Bibtex]
@article{PBR-VS-NEL:14,
Author = {Reverdy, Paul B and Srivastava, Vaibhav and Leonard, Naomi Ehrich},
Date-Added = {2018-02-05 06:22:16 +0000},
Date-Modified = {2018-02-08 21:11:54 +0000},
Journal = {Proceedings of the {IEEE}},
Number = {4},
Pages = {544--571},
Pdf = {http://www.paulreverdy.com/wp-content/papercite-data/pdf/PBR-VS-NEL-14.pdf},
Publisher = {IEEE},
Title = {Modeling human decision making in generalized {G}aussian multiarmed bandits},
Volume = {102},
Year = {2014}}
![[doi]](http://www.paulreverdy.com/wp-content/plugins/papercite/img/external.png)
[Bibtex]
@article{PR-VS-NEL:17,
Author = {Reverdy, Paul and Srivastava, Vaibhav and Leonard, Naomi Ehrich},
Date-Added = {2018-02-05 06:22:16 +0000},
Date-Modified = {2018-02-08 21:34:38 +0000},
Doi = {10.1109/TAC.2016.2644380},
Journal = {{IEEE} {T}ransactions on {A}utomatic {C}ontrol},
Number = {8},
Pages = {3788--3803},
Publisher = {IEEE},
Title = {Satisficing in multi-armed bandit problems},
Volume = {62},
Year = {2017},
Bdsk-Url-1 = {https://doi.org/10.1109/TAC.2016.2644380}}
![[pdf]](http://www.paulreverdy.com/wp-content/plugins/papercite/img/pdf.png)
![[doi]](http://www.paulreverdy.com/wp-content/plugins/papercite/img/external.png)
[Bibtex]
@article{PR-NEL:16,
Author = {Reverdy, Paul and Leonard, Naomi Ehrich},
Date-Added = {2018-02-05 06:22:16 +0000},
Date-Modified = {2018-02-08 21:37:50 +0000},
Doi = {10.1109/TASE.2015.2499244},
Journal = {{IEEE} {T}ransactions on {A}utomation {S}cience and {E}ngineering},
Number = {1},
Pages = {54--67},
Pdf = {http://www.paulreverdy.com/wp-content/papercite-data/pdf/PR-NEL-16.pdf},
Publisher = {IEEE},
Title = {Parameter estimation in softmax decision-making models with linear objective functions},
Volume = {13},
Year = {2016},
Bdsk-Url-1 = {https://doi.org/10.1109/TASE.2015.2499244}}