@article {LEE:2024:0736-2935:5216,
title = "Development of automatic audio panning system for immersive sound through stage size-aware model and object-tracking",
journal = "INTER-NOISE and NOISE-CON Congress and Conference Proceedings",
parent_itemid = "infobike://ince/incecp",
publishercode ="ince",
year = "2024",
volume = "270",
number = "6",
publication date ="2024-10-04T00:00:00",
pages = "5216-5222",
itemtype = "ARTICLE",
issn = "0736-2935",
url = "https://ince.publisher.ingentaconnect.com/content/ince/incecp/2024/00000270/00000006/art00025",
doi = "doi:10.3397/IN_2024_3562",
author = "LEE, Kangeun and KIM, Sungyoung",
abstract = "We investigated a novel AI-assisted automated 'immersive' audio panning system designed to track audio-related objects within a video clip. This system comprises four sequential steps: Object-Tracking, Stage dimension Estimation, XY-Coordinate Calculation, and Object Audio Rendering.
The system is designed to overcome existing challenges arising from the rapid and frequent movement of target objects by employing a pre-trained object-tracking model and integrating depth information to ensure stability in subsequent tasks. Additionally, we introduce a stage size-aware model
to extrapolate stage dimension using our manually collected dataset, formatted as (Image, Width, Depth), which facilitates model training. Consequently, the system calculates XY-Coordinate pairs, serving as panning values for conventional audio mixers or decoders to enable immersive audio
reproduction. We anticipate that this video- and space-aware automatic panning system will be valuable for the rapid production of new media.",
}