@InCollection{Supelec749,
author = {Matthieu Geist and Bruno Scherrer},
title = {{L1-penalized projected Bellman residual}},
year = {2011},
booktitle = {{Proceedings of the European Workshop on Reinforcement Learning (EWRL 2011)}},
publisher = {Springer Verlag - Heidelberg Berlin},
pages = {12 pages},
month = {september},
series = {Lecture Notes in Computer Science (LNCS)},
address = {Athens (Greece)},
url = {http://www.metz.supelec.fr//metz/personnel/geist_mat/pdfs/supelec749.pdf},
abstract = {We consider the task of feature selection for value function
approximation in reinforcement learning. A promising approach
consists in combining the Least-Squares Temporal Difference (LSTD)
algorithm with L1-regularization, which has proven to be
effective in the supervised learning community. This has been done
recently whit the LARS-TD algorithm, which replaces the projection
operator of LSTD with an L1-penalized projection and solves
the corresponding fixed-point problem. However, this approach is not
guaranteed to be correct in the general off-policy setting. We take
a different route by adding an L1-penalty term to the
projected Bellman residual, which requires weaker assumptions while
offering a comparable performance. However, this comes at the cost
of a higher computational complexity if only a part of the
regularization path is computed. Nevertheless, our approach ends up
to a supervised learning problem, which let envision easy extensions
to other penalties.}
}