@InProceedings{Supelec855,
author = {Bilal PIOT and Matthieu Geist and Olivier Pietquin},
title = {{Learning from demonstrations: Is it worth estimating a reward function\'e}},
year = {2013},
booktitle = {{Proceedings of the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML/PKDD 2013)}},
publisher = {Springer},
volume = {8188},
pages = {17-32},
month = {September},
editor = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and Zelezny, Filip},
series = {Lecture Notes in Computer Science},
address = {Prague (Czech Republic) },
url = {http://www.ecmlpkdd2013.org/wp-content/uploads/2013/07/384.pdf},
isbn = {978-3-642-40987-5},
doi = {10.1007/978-3-642-40988-2_2},
abstract = {This paper provides a comparative study between Inverse
Reinforcement Learning (IRL) and Apprenticeship Learning (AL).
IRL and AL are two frameworks, using Markov Decision Processes
(MDP), which are used for the imitation learning problem where
an agent tries to learn from demonstrations of an expert. In
the AL Framework, the agent tries to learn the expert policy
whereas in the IRL Framework, the agent tries to learn a reward
which can explain the behavior of the expert. This reward is
then optimized to imitate the expert.
One can wonder if it is worth estimating such a reward, or if
estimating a Policy is sufficient. This quite natural question
has not really been addressed in the literature right now. We
provide partial answers, both from a theoretical and empirical
point of view.}
}