@InProceedings{Supelec677,
author = {Matthieu Geist and Olivier Pietquin},
title = {{Parametric Value Function Approximation: a Unified View}},
year = {2011},
booktitle = {{Proceedings of the IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL 2011)}},
pages = {9-16},
month = {April},
address = {Paris (France)},
url = {http://www.metz.supelec.fr//metz/personnel/pietquin/pdf/ADPRL_2011_MGOP.pdf},
abstract = {Reinforcement learning (RL) is a machine learning answer to the
optimal control problem. It consists in learning an optimal
control policy through interactions with the system to be
controlled, the quality of this policy being quantified by the
so-called value function. An important RL subtopic is to
approximate this function when the system is too large for an
exact representation. This survey reviews and unifies state of
the art methods for parametric value function approximation by
grouping them into three main categories: bootstrapping,
residuals and projected fixed-point approaches. Related
algorithms are derived by considering one of the associated
cost functions and a specific way to minimize it, almost always
a stochastic gradient descent or a recursive least-squares
approach.}
}