BibTex

@InProceedings{Supelec913,
author = {Matthieu Geist},
title = {{A multiplicative UCB strategy for Gamma rewards}},
year = {2015},
booktitle = {{European Workshop on Reinforcement Learning (EWRL)}},
url = {http://www.metz.supelec.fr//metz/personnel/geist_mat/pdfs/gamma_ucb.pdf},
abstract = {We consider the stochastic multi-armed bandit problem where rewards are distributed according to Gamma probability measures (unknown up to a lower bound on the form factor). To handle this problem, we propose an UCB-like strategy where indexes are multiplicative (sampled mean times a scaling factor). An upper-bound for the associated regret is provided and the proposed strategy is illustrated on some simple experiments.}
}