@InProceedings{Supelec740,
author = {Hadrien Glaude and Fadi Akrimi and Matthieu Geist and Olivier Pietquin},
title = {{A Non-Parametric Approach to Approximate Dynamic Programming}},
year = {2011},
booktitle = {{Proceedings of the 10th IEEE International Conference on Machine Learning and Applications (ICMLA 2011)}},
pages = {317-322},
month = {December},
address = {Honolulu (USA)},
url = {http://www.metz.supelec.fr/metz/personnel/pietquin/pdf/ICMLA_2011_HGFAMGOP.pdf},
doi = {10.1109/ICMLA.2011.19},
abstract = {Approximate Dynamic Programming (ADP) is a machine
learning method aiming at learning an optimal control policy
for a dynamic and stochastic system from a logged set of
observed interactions between the system and one or several non-
optimal controlers. It defines a class of particular
Reinforcement Learning (RL) algorithms which is a general
paradigm for learning such a control policy from interactions.
ADP addresses the problem of systems exhibiting a state space
which is too large to be enumerated in the memory of a
computer. Because
of this,
approximation schemes are used to generalize estimates over
continuous
state spaces. Nevertheless, RL still suffers from a lack of
scalability to
multidimensional continuous state spaces. In this paper, we
propose the
use of the Locally Weighted Projection Regression (LWPR) method
to
handle this scalability problem. We prove the efficacy of our
approach
on two standard benchmarks modified to exhibit larger state
spaces.}
}