@article {Lee:2023:0736-2935:2068,
title = "Inter-channel Conv-TasNet for source-agnostic multichannel audio enhancement",
journal = "INTER-NOISE and NOISE-CON Congress and Conference Proceedings",
parent_itemid = "infobike://ince/incecp",
publishercode ="ince",
year = "2023",
volume = "265",
number = "5",
publication date ="2023-02-01T00:00:00",
pages = "2068-2075",
itemtype = "ARTICLE",
issn = "0736-2935",
url = "https://ince.publisher.ingentaconnect.com/content/ince/incecp/2023/00000265/00000005/art00010",
doi = "doi:10.3397/IN_2022_0297",
author = "Lee, Dongheon and Choi, Jung-Woo",
abstract = "Deep neural network (DNN) models for the audio enhancement task have been developed in various ways. Most of them rely on the source-dependent characteristics, such as temporal or spectral characteristics of speeches, to suppress noises embedded in measured signals. Only a few studies
have attempted to exploit the spatial information embedded in multichannel data. In this work, we propose a DNN architecture that fully exploits inter-channel relations to realize source-agnostic audio enhancement. The proposed model is based on the fully convolutional time-domain audio separation
network (Conv-TasNet) but extended to extract and learn spatial features from multichannel input signals. The use of spatial information is facilitated by separating each convolutional layer into dedicated inter-channel 1x1 Conv blocks and 2D spectro-temporal Conv blocks. The performance of
the proposed model is verified through the training and test with heterogeneous datasets including speech and other audio datasets, which demonstrates that the enriched spatial information from the proposed architecture enables the versatile audio enhancement in a source-agnostic way.",
}