@InProceedings{Supelec777,
author = {Matthieu Geist and Bruno Scherrer and Alessandro Lazaric and Mohammad Ghavamzadeh},
title = {{A Dantzig Selector Approach to Temporal Difference Learning}},
year = {2012},
booktitle = {{International Conference on Machine Learning (ICML)}},
note = {(to appear)},
url = {http://arxiv.org/abs/1206.6480},
abstract = {LSTD is one of the most popular reinforcement
learning algorithms for value function
approximation. Whenever the number of
samples is larger than the number of features,
LSTD must be paired with some form of regularization.
In particular, L1-regularization
methods tends to perform feature selection
by promoting sparsity and thus they are particularly
suited in high–dimensional problems.
Nonetheless, since LSTD is not a
simple regression algorithm but it solves a
fixed–point problem, the integration with L1-
regularization is not straightforward and it
might come with some drawbacks (see e.g.,
the P-matrix assumption for LASSO-TD).
In this paper we introduce a novel algorithm
obtained by integrating LSTD with the
Dantzig Selector. In particular, we investigate
the performance of the algorithm and
its relationship with existing regularized approaches,
showing how it overcomes some of
the drawbacks of existing solutions.}
}