@InProceedings{Supelec943,
author = {Julien PĂ©rolat and Bilal PIOT and Matthieu Geist and Bruno Scherrer and Olivier Pietquin},
title = {{Softened Approximate Policy Iteration for Markov Games}},
year = {2016},
booktitle = {{International Conference on Machine Learning (ICML)}},
url = {http://jmlr.org/proceedings/papers/v48/perolat16.html},
abstract = {This paper reports theoretical and empirical investigations on
the use of quasi-Newton methods to minimize the Optimal Bellman
Residual (OBR) of zero-sum two-player Markov Games. First, it
reveals that state-of-the-art algorithms can be derived by the
direct application of Newton's method to different norms of the
OBR. More precisely, when applied to the norm of the OBR,
Newton's method results in the Bellman Residual Minimization
Policy Iteration (BRMPI) and, when applied to the norm of the
Projected OBR (POBR), it results into the standard Least Squares
Policy Iteration (LSPI) algorithm. Consequently, new algorithms
are proposed, making use of quasi-Newton methods to minimize the
OBR and the POBR so as to take benefit of enhanced empirical
performances at low cost. Indeed, using a quasi-Newton method
approach introduces slight modifications in term of coding of
LSPI and BRMPI but improves significantly both the stability and
the performance of those algorithms. These phenomena are
illustrated on an experiment conducted on artificially
constructed games called Garnets.}
}