@InProceedings{Supelec650,
author = {Matthieu Geist and Olivier Pietquin},
title = {{Managing Uncertainty within the KTD Framework}},
year = {2011},
booktitle = {{Proceedings of the Workshop on Active Learning and Experimental Design (AL\&E collocated with AISTAT 2010)}},
pages = {157-168},
series = {Journal of Machine Learning Research Conference and Workshop Proceedings},
address = {Sardinia (Italy)},
url = {http://jmlr.csail.mit.edu/proceedings/papers/v16/geist11a/geist11a.pdf},
abstract = {The dilemma between exploration and exploitation is an important
topic in reinforcement learning (RL). Most successful approaches
in addressing this problem tend to use some uncertainty
information about values estimated during learning. On another
hand, scalability is known as being a lack of RL algorithms and
value function approximation has become a major topic of
research. Both problems arise in real-world applications, however
few approaches allow approximating the value function while
maintaining uncertainty information about estimates. Even fewer
use this information in the purpose of addressing the
exploration/exploitation dilemma. In this paper, we show how such
an uncertainty information can be derived from a Kalman-based
Temporal Di\fferences (KTD) framework and how it can be used. }
}