@Article{Supelec823,
author = {Matthieu Geist and Olivier Pietquin},
title = {{Algorithmic Survey of Parametric Value Function Approximation}},
journal = {IEEE Transactions on Neural Networks and Learning Systems},
year = {2013},
volume = {24},
number = {6},
pages = {845 - 867},
note = {pdf},
url = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp\'earnumber=6471847\&sortType%3Dasc_p_Sequence%26filter%3DAND%28p_IS_Number%3A6494635%29},
abstract = {Reinforcement learning is a machine learning answer to the
optimal control problem. It consists in learning an optimal
control policy through interactions with the system to be
controlled, the quality of this policy being quantified by the
so-called value function. A recurrent subtopic of reinforcement
learning is to compute an approximation of this value function
when the system is too large for an exact representation.
This survey reviews state-of-the-art methods for (parametric)
value function approximation by grouping them into three main
categories: bootstrapping, residual and projected fixed-point
approaches. Related algorithms are derived by considering one of
the associated cost functions and a specific minimization
method, generally a stochastic gradient descent or a recursive
least-squares approach. }
}