@InProceedings{Supelec636,
author = {Matthieu Geist and Olivier Pietquin},
title = {{Statistically Linearized Least-Squares Temporal Differences}},
year = {2010},
booktitle = {{Proceedings of the IEEE International Conference on Ultra Modern Control systems (ICUMT 2010)}},
publisher = {IEEE},
pages = {450 - 457},
month = {October},
address = {Moscow (Russia)},
url = {http://www.metz.supelec.fr/metz/personnel/geist_mat/pdfs/Supelec636.pdf},
isbn = {978-1-4244-7285-7},
doi = {10.1109/ICUMT.2010.5676598},
abstract = {A common drawback of standard reinforcement
learning algorithms is their inability to scale-up to real-world
problems. For this reason, a current important trend of research
is (state-action) value function approximation. A prominent
value function approximator is the least-squares temporal
differences (LSTD) algorithm. However, for technical reasons,
linearity is mandatory: the parameterization of the value
function must be linear (compact nonlinear representations are
not allowed) and only the Bellman evaluation operator can
be considered (imposing policy-iteration-like schemes). In this
paper, this restriction of LSTD is lifted thanks to a
derivativefree
statistical linearization approach. This way, nonlinear
parameterizations and the Bellman optimality operator can be
taken into account (this last point allows taking into account
value-iteration-like schemes). The efficiency of the resulting
algorithms are demonstrated using a linear parametrization
and neural networks as well as on a Q-learning-like problem.
A theoretical analysis is also provided.}
}