@Article{Supelec808,
author = {Lucie Daubigney and Matthieu Geist and Senthilkumar Chandramohan and Olivier Pietquin},
title = {{A Comprehensive Reinforcement Learning Framework for Dialogue Management Optimisation}},
journal = {IEEE Journal of Selected Topics in Signal Processing},
year = {2012},
volume = {6},
number = {8},
pages = {891-902},
month = {December},
note = {pdf},
url = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp\'earnumber=6359744},
isbn = {1932-4553},
doi = {10.1109/JSTSP.2012.2229257},
abstract = {Reinforcement learning is now an acknowledged approach for
optimising the interaction strategy of spoken dialogue systems.
If the first considered algorithms were quite basic (like
SARSA), recent works concentrated on more sophisticated
methods. More attention has been paid to off-policy learning,
dealing with the exploration-exploitation dilemma, sample
efficiency or handling non-stationarity. New algorithms have
been proposed to address these issues and have been applied to
dialogue management. However, each algorithm often solves a
single issue at a time, while dialogue systems exhibit all the
problems at once. In this paper, we propose to apply the Kalman
Temporal Differences (KTD) framework to the problem of dialogue
strategy optimisation so as to address all these issues in a
comprehensive manner with a single framework. Our claims are
illustrated by experiments led on two real-world goal-oriented
dialogue management frameworks, DIPPER and HIS. }
}