@Article{Supelec439,
author = {Asma Rabaoui and MANUEL DAVY and Stéphane Rossignol and Noureddine ELLOUZE},
title = {{One-Class SVMs and Wavelets for Audio Surveillance}},
journal = {IEEE Transactions on Information Forensics \& Security},
year = {2008},
abstract = { This paper presents a procedure aimed at recognizing
environmental sounds for surveillance and security
applications. We propose to apply One-Class Support Vector
Machines (1-SVMs) together with a sophisticated
dissimilarity measure as a discriminative framework in order to
address audio classification, and hence, sound
recognition. We illustrate the performance of this method on an
audio database, which consists of 1015 sounds
belonging to 9 classes. The used database presents high
intra-class diversity in the signal properties and some kind
of inter-class similarities. The number of items in each class is
deliberately not equal, and sometimes very different
which results in conducting experiments that simulate non-uniform
probability of sound appearances. First, the use of
a set of state-of-the-art audio features is studied. Then, we
introduce a set of novel features obtained by combining
elementary features. The experiments conducted on a multi-class
classification problem show the superiority of
this novel sound recognition method. The best recognition
accuracy (96.89%) is obtained when combining in the
feature vector wavelet-based features, MFCCs and individual
temporal and frequency features. Our 1-SVM approach
overperforms the conventional HMM-based system in the conducted
experiments, the improvement in the error rate
can reach 50%. Besides, we provide empirical results comparing
the single-class SVM to a two-class SVM method.
We discuss the superiority of the proposed methodologies and
approaches based on 1-SVM addressing a multi-class
problem.
Moreover, the robustness to the environmental noise is
investigated for specific types of acoustic representations.
We showed that we can efficiently address a sound classification
problem characterized by complex real-world
datasets, even under important noise degradation conditions.
}
}