@InProceedings{Supelec874,
author = {Bilal PIOT and Matthieu Geist and Olivier Pietquin},
title = {{Boosted and Reward-regularized Classification for Apprenticeship Learning}},
year = {2014},
booktitle = {{13th International Conference on Autonomous Agents and Multiagent Systems (AAMAS 2014)}},
note = {(accepted, to appear)},
address = {Paris, France},
url = {http://www.metz.supelec.fr//metz/personnel/geist_mat/pdfs/supelec874.pdf},
abstract = {This paper deals with the problem of learning from demonstrations,
where an agent called the apprentice tries to learn
a behavior from demonstrations of another agent called the
expert. To address this problem, we place ourselves into the
Markov Decision Process (MDP) framework, which is well
suited for sequential decision making problems. A way to
tackle this problem is to reduce it to classication but doing
so we do not take into account the MDP structure. Other
methods which take into account the MDP structure need
to solve MDPs which is a dicult task and/or need a choice
of features which is problem-dependent. The main contribution
of the paper is to extend a large margin approach,
which is a classication method, by adding a regularization
term which takes into account the MDP structure. The
derived algorithm, called Reward-regularized Classication
for Apprenticeship Learning (RCAL), does not need to solve
MDPs. But, the major advantage is that it can be boosted:
this avoids the choice of features, which is a drawback of
parametric approaches. A state of the art experiment (Highway)
and generic experiments (structured Garnets) are conducted
to show the performance of RCAL compared to algorithms
from the literature.}
}