@Article{Supelec907,
author = {Bruno Scherrer and Mohammad Ghavamzadeh and Victor Gabillon and Boris Lesner and Matthieu Geist},
title = {{Approximate Modified Policy Iteration and its Application to the Game of Tetris}},
journal = {Journal of Machine Learning Research},
year = {2015},
volume = {16},
pages = {1629-1676},
url = {http://jmlr.org/papers/volume16/scherrer15a/scherrer15a.pdf},
abstract = {Modied policy iteration (MPI) is a dynamic programming (DP)
algorithm that contains
the two celebrated policy and value iteration methods. Despite
its generality, MPI has not
been thoroughly studied, especially its approximation form which
is used when the state
and/or action spaces are large or innite. In this paper, we
propose three implementations
of approximate MPI (AMPI) that are extensions of the well-known
approximate DP algo-
rithms: tted-value iteration, tted-Q iteration, and
classication-based policy iteration.
We provide error propagation analysis that unify those for
approximate policy and value
iteration. We develop the nite-sample analysis of these
algorithms, which highlights the
in uence of their parameters. In the classication-based version
of the algorithm (CBMPI),
the analysis shows that MPI\'s main parameter controls the balance
between the estima-
tion error of the classier and the overall value function
approximation. We illustrate and
evaluate the behavior of these new algorithms in the Mountain Car
and Tetris problems.
Remarkably, in Tetris, CBMPI outperforms the existing DP
approaches by a large margin,
and competes with the current state-of-the-art methods while
using fewer samples.
}
}