@InProceedings{Supelec626,
author = {Senthilkumar Chandramohan and Matthieu Geist and Olivier Pietquin},
title = {{Optimizing Spoken Dialogue Management with Fitted Value Iteration}},
year = {2010},
booktitle = {{Proceedings of the International Conference on Speech Communication and Technologies (Interspeech 2010)}},
publisher = {ISCA},
pages = {86-89},
month = {September},
address = {Makuhari (Japan)},
url = {http://www.metz.supelec.fr/metz/personnel/geist_mat/pdfs/Supelec626.pdf},
abstract = { In recent years machine learning approaches have been
proposed
for dialogue management optimization in spoken dialogue systems.
It is customary to cast the dialogue management problem into a
Markov Decision Process and to find the optimal policy using
Reinforcement Learning (RL) algorithms. Yet, the dialogue state
space is large and standard RL algorithms fail to handle it. In
this paper we explore the possibility of using a generalization
framework for dialogue management which is a particular fitted
value iteration algorithm (namely fitted-Q iteration). We show
that fitted-Q, when applied to continuous state space dialogue
management problems, can generalize well and makes efficient use
of samples to learn the approximate optimal state-action value
function. Our experimental results show that fitted-Q performs
significantly better than the hand-coded policy and relatively
better than the policy learned using least-square policy
iteration, another generalization algorithm.}
}