@inproceedings{2229892efcb14602884ce3715a9fc13c,
title = "Multitrack Music Transformer",
abstract = "Existing approaches for generating multitrack music with transformer models have been limited in terms of the number of instruments, the length of the music segments and slow inference. This is partly due to the memory requirements of the lengthy input sequences necessitated by existing representations. In this work, we propose a new multitrack music representation that allows a diverse set of instruments while keeping a short sequence length. Our proposed Multitrack Music Transformer (MMT) achieves comparable performance with state-of-the-art systems, landing in between two recently proposed models in a subjective listening test, while achieving substantial speedups and memory reductions over both, making the method attractive for real time improvisation or near real time creative applications. Further, we propose a new measure for analyzing musical self-attention and show that the trained model attends more to notes that form a consonant interval with the current note and to notes that are 4N beats away from the current step.",
keywords = "computer music, deep learning, machine learning, Music generation, music information retrieval, neural networks",
author = "Dong, {Hao Wen} and Ke Chen and Shlomo Dubnov and Julian McAuley and Taylor Berg-Kirkpatrick",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 48th IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2023 ; Conference date: 04-06-2023 Through 10-06-2023",
year = "2023",
month = jan,
day = "1",
doi = "10.1109/ICASSP49357.2023.10094628",
language = "English",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers",
booktitle = "ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing, Proceedings",
address = "United States",
}