@Misc{Supelec944,
author = {Bilal PIOT and Matthieu Geist and Olivier Pietquin},
title = {{Difference of Convex Functions Programming Applied to Control with Expert Data}},
year = {2016},
howpublished = {arxiv},
url = {http://arxiv.org/abs/1606.01128},
abstract = {This paper shows how Difference of Convex functions (DC)
programming can improve the performance of some Reinforcement
Learning (RL) algorithms using expert data and Learning from
Demonstrations (LfD) algorithms. This is principally due to the
fact that the norm of the Optimal Bellman Residual (OBR), which
is one of the main component of the algorithms considered, is DC.
The slight performance improvement is shown on two algorithms,
namely Reward-regularized Classification for Apprenticeship
Learning (RCAL) and Reinforcement Learning with Expert
Demonstrations (RLED), through experiments on generic Markov
Decision Processes (MDP) called Garnets. }
}