@InProceedings{Supelec719,
author = {Lucie Daubigney and Milica Gasic and Senthilkumar Chandramohan and Matthieu Geist and Olivier Pietquin and Steve Young},
title = {{Uncertainty management for on-line optimisation of a POMDP-based large-scale spoken dialogue system}},
year = {2011},
booktitle = {{Proceedings of 12th Annual Conference of the International Speech Communication Association (Interspeech 2011)}},
pages = {1301-1304},
month = {August},
address = {Florence (Italy)},
url = {http://www.metz.supelec.fr//metz/personnel/pietquin/pdf/IS_2011_LDMGSCMGOPSY.pdf},
abstract = {The optimization of dialogue policies using reinforcement
learning (RL) is now an accepted part of the state of the art
in spoken dialogue systems (SDS). Yet, it is still the case
that the commonly used training algorithms for SDS require a
large number of dialogues and hence most systems still rely on
artificial data generated by a user simulator. Optimization is
therefore performed off-line before releasing the system to
real users. Gaussian Processes (GP) for RL have recently been
applied to dialogue systems. One advantage of GP is that they
compute an explicit measure of uncertainty in the value
function estimates computed during learning. In this paper, a
class of novel learning strategies is described which use
uncertainty to control exploration on-line. Comparisons between
several exploration schemes show that significant improvements
to learning speed can be obtained and that rapid and safe
online optimisation is possible, even on a complex task.}
}