ABSTRACT={Like in many other research areas, deep learning (DL) is increasingly adopted in music recommendation systems (MRS). Deep neural networks are used in this domain particularly for extracting latent factors of music items from audio signals or metadata and for learning sequential patterns of music items (tracks or artists) from music playlists or listening sessions. Latent item factors are commonly integrated into content-based filtering and hybrid MRS, whereas sequence models of music items are used for sequential music recommendation, e.g., automatic playlist continuation. This review article explains particularities of the music domain in RS research. It gives an overview of the state of the art that employs deep learning for music recommendation. The discussion is structured according to the dimensions of neural network type, input data, recommendation approach (content-based filtering, collaborative filtering, or both), and task (standard or sequential music recommendation). In addition, we discuss major challenges faced in MRS, in particular in the context of the current research on deep learning.}
}
@book{briot2019deep,
title={Deep Learning Techniques for Music Generation},
author={Briot, J.P. and Hadjeres, G. and Pachet, F.D.},
isbn={9783319701639},
series={Computational Synthesis and Creative Systems},
title = {Physical Modeling Using Digital Waveguides},
volume = {16},
year = {1992}
}
@article{karplus,
ISSN = {01489267, 15315169},
URL = {http://www.jstor.org/stable/3680062},
author = {Kevin Karplus and Alex Strong},
journal = {Computer Music Journal},
number = {2},
pages = {43--55},
publisher = {The MIT Press},
title = {Digital Synthesis of Plucked-String and Drum Timbres},
volume = {7},
year = {1983}
}
@techreport{ISO3740,
type = {Standard},
key = {ISO 3740:2019(en)},
year = {2019},
title = {{Acoustics - Determination of sound power levels of noise sources - Guidelines for the use of basic standards}},
volume = {2019},
address = {Geneva, CH},
institution = {International Organization for Standardization}
}
@book{adv_sigproc_book,
author = {Vaseghi, Saeed V.},
title = {Advanced Digital Signal Processing and Noise Reduction},
year = {2006},
isbn = {047009494X},
publisher = {John Wiley & Sons, Inc.},
address = {Hoboken, NJ, USA}
}
@article{reiss2008understanding, author={Reiss, Joshua D.}, journal={journal of the audio engineering society}, title={understanding sigma-delta modulation: the solved and unsolved issues}, year={2008}, volume={56}, number={1/2}, pages={49-64}, doi={}, month={january},}
@ARTICLE{splines, author={ {Hsieh Hou} and H. {Andrews}}, journal={IEEE Transactions on Acoustics, Speech, and Signal Processing}, title={Cubic splines for image interpolation and digital filtering}, year={1978}, volume={26}, number={6}, pages={508-517}, doi={10.1109/TASSP.1978.1163154}}
@article{akima,
author = {Akima, Hiroshi},
title = {A New Method of Interpolation and Smooth Curve Fitting Based on Local Procedures},
year = {1970},
issue_date = {Oct. 1970},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {17},
number = {4},
issn = {0004-5411},
url = {https://doi.org/10.1145/321607.321609},
doi = {10.1145/321607.321609},
abstract = {A new mathematical method is developed for interpolation from a given set of data points in a plane and for fitting a smooth curve to the points. This method is devised in such a way that the resultant curve will pass through the given points and will appear smooth and natural. It is based on a piecewise function composed of a set of polynomials, each of degree three, at most, and applicable to successive intervals of the given points. In this method, the slope of the curve is determined at each given point locally, and each polynomial representing a portion of the curve between a pair of given points is determined by the coordinates of and the slopes at the points. Comparison indicates that the curve obtained by this new method is closer to a manually drawn curve than those drawn by other mathematical methods.},
journal = {J. ACM},
month = oct,
pages = {589–602},
numpages = {14}
}
@article{review_disto,
author = {Pakarinen, Jyri and Yeh, David},
year = {2009},
month = {06},
pages = {85-100},
title = {A Review of Digital Techniques for Modeling Vacuum-Tube Guitar Amplifiers},
volume = {33},
journal = {Computer Music Journal},
doi = {10.1162/comj.2009.33.2.85}
}
@Article{app10020638,
AUTHOR = {Martínez Ramírez, Marco A. and Benetos, Emmanouil and Reiss, Joshua D.},
TITLE = {Deep Learning for Black-Box Modeling of Audio Effects},
JOURNAL = {Applied Sciences},
VOLUME = {10},
YEAR = {2020},
NUMBER = {2},
ARTICLE-NUMBER = {638},
URL = {https://www.mdpi.com/2076-3417/10/2/638},
ISSN = {2076-3417},
ABSTRACT = {Virtual analog modeling of audio effects consists of emulating the sound of an audio processor reference device. This digital simulation is normally done by designing mathematical models of these systems. It is often difficult because it seeks to accurately model all components within the effect unit, which usually contains various nonlinearities and time-varying components. Most existing methods for audio effects modeling are either simplified or optimized to a very specific circuit or type of audio effect and cannot be efficiently translated to other types of audio effects. Recently, deep neural networks have been explored as black-box modeling strategies to solve this task, i.e., by using only input–output measurements. We analyse different state-of-the-art deep learning models based on convolutional and recurrent neural networks, feedforward WaveNet architectures and we also introduce a new model based on the combination of the aforementioned models. Through objective perceptual-based metrics and subjective listening tests we explore the performance of these models when modeling various analog audio effects. Thus, we show virtual analog models of nonlinear effects, such as a tube preamplifier; nonlinear effects with memory, such as a transistor-based limiter and nonlinear time-varying effects, such as the rotating horn and rotating woofer of a Leslie speaker cabinet.},
DOI = {10.3390/app10020638}
}
@article{herre1999temporal, author={Herre Jürgen}, journal={journal of the audio engineering society}, title={temporal noise shaping, qualtization and coding methods in perceptual audio coding: a tutorial introduction}, year={1999}, volume={}, number={}, pages={}, doi={}, month={september},}
@article{Brandenburg1999MP3AA,
title={MP3 and AAC Explained},
author={K. Brandenburg},
year={1999}
}
@article{polyphase, author={J. {Rothweiler}}, booktitle={ICASSP '83. IEEE International Conference on Acoustics, Speech, and Signal Processing}, title={Polyphase quadrature filters--A new subband coding technique}, year={1983}, volume={8}, number={}, pages={1280-1283}, doi={10.1109/ICASSP.1983.1172005}}
@article{Jacaba2001AUDIOCU,
title={AUDIO COMPRESSION USING MODIFIED DISCRETE COSINE TRANSFORM: THE MP3 CODING STANDARD},
author={Joebert S. Jacaba},
year={2001}
}
@article{phase_vocoder,
author = {Götzen, Amalia and Bernardini, Nicola and Arfib, Daniel},
year = {0001},
month = {01},
pages = {},
title = {Traditional implementations of a phase vocoder: the tricks of the trade}
}
@Article{masking,
AUTHOR = {Herre, Jürgen and Dick, Sascha},
TITLE = {Psychoacoustic Models for Perceptual Audio Coding—A Tutorial Review},
JOURNAL = {Applied Sciences},
VOLUME = {9},
YEAR = {2019},
NUMBER = {14},
ARTICLE-NUMBER = {2854},
URL = {https://www.mdpi.com/2076-3417/9/14/2854},
ISSN = {2076-3417},
ABSTRACT = {Psychoacoustic models of human auditory perception have found an important application in the realm of perceptual audio coding, where exploiting the limitations of perception and removal of irrelevance is key to achieving a significant reduction in bitrate while preserving subjective audio quality. To this end, psychoacoustic models do not need to be perfect to satisfy their purpose, and in fact the commonly employed models only represent a small subset of the known properties and abilities of the human auditory system. This paper provides a tutorial introduction of the most commonly used psychoacoustic models for low bitrate perceptual audio coding.},
DOI = {10.3390/app9142854}
}
@dataset{ATH,
author = {Neubauer, Reinhard},
year = {2004},
month = {01},
pages = {},
title = {Subjective Estimation of Airborne Sound Insulation in Buildings and How to Quantify the Real Acoustical Comfort of Dwellings.}
}
@article{smith1981the,
author={Dave Smith, Chet Wood},
journal={journal of the audio engineering society},
title={the 'usi', or universal synthesizer interface},
year={1981},
volume={},
number={},
pages={},
doi={},
month={october}
}
@phdthesis{notch,
url = {http://hdl.handle.net/10603/110422},
title = {Design Techniques for FIR Notch Filters},
author = {Rohini Deshpande},
copyright = {university},
year = {04/2011},
abstract = {Digital notch filters form an integral part of digital signal processing (DSP) used invariably in communication, control, instrumentation and bio medical engineering, to mention a few. They are used to eliminate noise and power line interferences. Digital notch filters can be designed either as an infinite impulse response filter (IIR) or as an finite impulse response filter (FIR). IIR filters have the advantage that they require lower orders for efficient approximation of a given set of specifications. However, IIR filters are potentially not stable and do not provide linear phase characteristics. FIR filters, on the
newlinecontrary, are unconditionally stable and can be designed to give exact linear phase
newlinecharacteristics. In view of this, the thrust in this research work is on FIR notch
newlinefilters.
newline
newlineIn most of the DSP applications, the interference frequency is not stable. It often varies about a fixed frequency by certain percentage. To remove such interference there can be two options:
newline
newline1. Design a data dependent filter in which the notch frequencies can be adjusted
newlinebased on the input data
newline
newlineor
newline
newline2. Design a data independent notch filter with controlled null width that matches
newlinewith the expected variable range of the interference.},
school = {Guru Gobind Singh Indraprastha University},