@InCollection{Supelec539,
author = {Matthieu Geist and Olivier Pietquin and Gabriel Fricout},
title = {{Tracking in Reinforcement Learning}},
year = {2009},
booktitle = {{Proceedings of the 16th International Conference on Neural Information Processing (ICONIP 2009)}},
publisher = {Springer LNCS},
volume = {5863, Part I},
pages = {502-511},
month = {December},
note = {ENNS best student paper award},
address = {Bangkok (Thailand)},
url = {http://www.metz.supelec.fr/metz/personnel/geist_mat/pdfs/Supelec539.pdf},
abstract = {Reinforcement learning induces non-stationarity at several
levels. Adaptation to non-stationary environments is of course
a desired feature of a fair RL algorithm. Yet, even if the
environment of the learning agent can be considered as
stationary, generalized policy iteration frameworks, because of
the interleaving of learning and control, will produce non-
stationarity of the evaluated policy and so of its value
function. Tracking the optimal solution instead of trying to
converge to it is therefore preferable. In this paper, we
propose to handle this tracking issue with a Kalman-based
temporal difference framework. Complexity and convergence
analysis are studied. Empirical investigations of its ability to
handle non-stationarity is finally provided.}
}