@Article{Supelec685,
author = {Olivier Pietquin and Matthieu Geist and Senthilkumar Chandramohan and Hervé Frezza-Buet},
title = {{Sample-Efficient Batch Reinforcement Learning for Dialogue Management Optimization}},
journal = {ACM Transactions on Speech and Language Processing},
year = {2011},
volume = {7},
number = {3},
pages = {7:1-7:21},
month = {May},
url = {http://www.metz.supelec.fr/metz/personnel/pietquin/pdf/ACM_TSLP_2011_OPMGSCHFB.pdf},
doi = {10.1145/1966407.1966412},
abstract = {Spoken Dialogue Systems (SDS) are systems which have the
ability to
interact with human beings using natural language as the medium
of
interaction. A dialogue policy plays a crucial role in
determining
the functioning of the dialogue management module. Hand-
crafting the
dialogue policy is not always an option considering the
complexity
of the dialogue task and the stochastic behavior of users. In
recent
years approaches based on Reinforcement Learning (RL) for policy
optimization in dialogue management have been proved to be an
efficient approach for dialogue policy optimization. Yet most
of the
conventional RL algorithms are data intensive and demand
techniques
such as user simulation. Doing so, additional modeling errors
are
likely to occur. This paper explores the possibility of using a
set
of approximate dynamic programming algorithms for policy
optimization in SDS. Moreover, these algorithms are combined to
a
method for learning a sparse representation of the value
function.
Experimental results show that these algorithms when applied to
dialogue management optimization are particularly \emph{sample
efficient} since they learn from few hundreds of dialogue
examples.
These algorithms learn in an \emph{off-policy} manner meaning
that
they can learn optimal policies with dialogue examples generated
with a quite simple strategy. Thus they can learn good dialogue
policies directly \emph{from data}, avoiding user modeling
errors.}
}