@article {Nakajima:2016:0736-2935:1832,
title = "DNN-based Environmental Sound Recognition with Real- recorded and Artificially-mixed Training Data",
journal = "INTER-NOISE and NOISE-CON Congress and Conference Proceedings",
parent_itemid = "infobike://ince/incecp",
publishercode ="ince",
year = "2016",
volume = "253",
number = "7",
publication date ="2016-08-21T00:00:00",
pages = "1832-1841",
itemtype = "ARTICLE",
issn = "0736-2935",
url = "https://ince.publisher.ingentaconnect.com/content/ince/incecp/2016/00000253/00000007/art00116",
author = "Nakajima, Yasutaka and Naito, Taisuke and Sunago, Norihito and Ohshima, Toshiya and Ono, Nobutaka",
abstract = "In this paper, we investigate environmental sound recognition using Deep Neural Network (DNN). Generally, preparing the sufficient amount of training data is important in machine learning. Because different environmental sounds, for example cicada sound and ambulance sound, happen with
overlapping each other, the training data including mixtures of different sounds is necessary for environmental sound recognition. However, it is difficult to obtain all combination of different sounds in real-recorded data. In this paper, we increase the amount of training data by artificially-mixed
sounds. First, some distinctive single sounds which were recorded on different days near the sound sources individually are selected, or others are separated from the real-recorded data by extracting appropriate parts in time domain. Those sounds mainly consist of a single sound source and
applying filters to reduce other sounds in frequency domain. Then, they are mixed with different ratios of sound levels, which simulate a variation of possible mixings in the real environment. Finally, both the real-recorded and the artificially-mixed sound data are used for training DNN.
In the experiments of environmental sound recognition, we show that this approach achieves more accurate results than the ones with using only real-recorded data.",
}