@InCollection{Supelec725,
author = {Bruno Scherrer and Matthieu Geist},
title = {{Recursive Least-Squares Learning with Eligibility Traces}},
year = {2011},
booktitle = {{Proceedings of the European Workshop on Machine Learning (EWRL 2011)}},
publisher = {Springer Verlag - Heidelberg Berlin},
pages = {12 pages},
month = {september},
series = {Lecture Notes in Computer Science (LNCS)},
address = {Athens (Greece)},
url = {http://www.metz.supelec.fr//metz/personnel/geist_mat/pdfs/supelec725.pdf},
abstract = {In the framework of Markov Decision Processes, we
consider the problem of learning a linear approximation of the value
function of some fixed policy from one trajectory possibly generated
by some other policy. We describe a systematic approach for adapting
\emph{on-policy} learning least squares algorithms of the literature
(LSTD, LSPE,
FPKF and GPTD/KTD) to \emph{off-policy}
learning \emph{with eligibility traces}. This leads to two known
algorithms, LSTD($\lambda$)/LSPE($\lambda$) and suggests
new extensions of FPKF and GPTD/KTD. We describe their recursive
implementation, discuss their convergence properties, and illustrate
their behavior experimentally. Overall, our study suggests that the
state-of-art LSTD($\lambda$) remains the best least-squares
algorithm.}
}