@Workshop{Supelec612,
author = {Matthieu Geist and Olivier Pietquin},
title = {{Statistically Linearized Least-Squares Temporal Differences}},
year = {2010},
booktitle = {{Journées Francophones de Planification, Décision et Apprentissage pour la conduite de systèmes (JFPDA 2010)}},
month = {June},
note = {8 pages},
address = {Besançon (France)},
abstract = {A major drawback of standard reinforcement
learning algorithms is their inability to scaleup
to real-world problems. For this reason,
a current important trend of research
is (state-action) value function approximation.
A prominent value function approximator
is the least-squares temporal differences
(LSTD) algorithm. However, for technical
reasons, linearity is mandatory: the parameterization
of the value function must be
linear (compact nonlinear representations are
not allowed) and only the Bellman evaluation
operator can be considered. In this paper,
this restriction of LSTD is lifted thanks to
a derivative-free statistical linearization approach.
This way, nonlinear parameterizations
and the Bellman optimality operator
can be taken into account.}
}