@InProceedings{Supelec778,
author = {Bruno Scherrer and Victor Gabillon and Mohammad Ghavamzadeh and Matthieu Geist},
title = {{Approximate Modified Policy Iteration}},
year = {2012},
booktitle = {{International Conference on Machine Learning (ICML)}},
note = {(to appear)},
url = {http://arxiv.org/abs/1205.3054},
abstract = {Modified policy iteration (MPI) is a dynamic
programming (DP) algorithm that contains
the two celebrated policy and value iteration
methods. Despite its generality, MPI has
not been thoroughly studied, especially its
approximation form which is used when the
state and/or action spaces are large or infinite.
In this paper, we propose three approximate
MPI (AMPI) algorithms that are extensions
of the well-known approximate DP
algorithms: fitted-value iteration, fitted-Q iteration,
and classification-based policy iteration.
For all algorithms, we provide error
propagation analyses that unify those for approximate
policy and value iteration. On
an implementation of the last classification-based
algorithm, we develop a finite-sample
analysis that shows that MPI’s main parameter
allows to control the balance between the
estimation error of the classifier and the overall
value function approximation.}
}