@InProceedings{Supelec471,
author = {Matthieu Geist and Olivier Pietquin and Gabriel Fricout},
title = {{Kalman Temporal Differences: the deterministic case }},
year = {2009},
booktitle = {{IEEE International Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL 2009)}},
pages = {185-192},
month = {April},
address = {Nashville (TN, USA)},
url = {http://hal-supelec.archives-ouvertes.fr/hal-00380870/en/},
doi = {10.1109/ADPRL.2009.4927543},
abstract = {This paper deals with value function and $Q$-function
approximation in deterministic Markovian decision processes. A
general statistical framework based on the Kalman filtering
paradigm is introduced. Its principle is to adopt a parametric
representation of the value function, to model the associated
parameter vector as a random variable and to minimize the
mean-squared error of the parameters conditioned on past
observed
transitions. From this general framework, which will be called
Kalman Temporal Differences (KTD), and using an approximation
scheme called the unscented transform, a family of algorithms is
derived, namely KTD-V, KTD-SARSA and KTD-Q, which aim
respectively at estimating the value function of a given policy,
the $Q$-function of a given policy and the optimal $Q$-function.
The proposed approach holds for linear and
nonlinear parameterization. This framework is discussed and
potential advantages and shortcomings are highlighted.}
}