@InProceedings{Supelec886,
author = {Bruno Scherrer and Matthieu Geist},
title = {{Local Policy Search in a Convex Space and Conservative Policy Iteration as Boosted Policy Search}},
year = {2014},
booktitle = {{European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML/PKDD)}},
note = {(to appear)},
url = {http://www.metz.supelec.fr//metz/personnel/geist_mat/pdfs/supelec886.pdf},
abstract = {Local Policy Search is a popular reinforcement learning approach
for handling large state spaces. Formally, it searches locally in
a parameterized policy space in order to maximize the associated
value function averaged over some predefined distribution. The
best one can hope in general from such an approach is to get a
local optimum of this criterion. The first result of this article
is the following surprising result: if the policy space is
convex, any (approximate) local optimum enjoys a global
performance guarantee.
Unfortunately, the convexity assumption is strong: it is not
satisfied by commonly used parameterizations and designing a
parameterization that induces this property seems hard. A natural
solution to alleviate this issue consists in deriving an
algorithm that solves the local policy search problem using a
boosting approach (constrained to the convex hull of the policy
space). Quite surprisingly, the resulting algorithm turns out to
be a slight generalization of conservative policy iteration;
thus, our second contribution is to highlight an original
connection between local policy search and approximate dynamic
programming.}
}