commit c452790a796730113dd62db0e743b11045606e27
parent d89a0a8026e0010a96a1309d70f8fcc2164dd5a6
Author: aerworker <aerworker@users.noreply.github.com>
Date:   Sat, 13 Jul 2019 22:38:47 +0300

[yandexmusic] Add support for multi disk albums and extract track number and disk number (closes #21420) (#21421)

* [yandexmusic] extract tracks from all volumes of an album (closes #21420)

* [yandexmusic] extract genre, disk_number and track_number

* [yandexmusic] extract decomposed artist names

* Update yandexmusic.py

* Update yandexmusic.py

* Update yandexmusic.py

Diffstat:
Myoutube_dl/extractor/yandexmusic.py | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 55 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py @@ -51,23 +51,43 @@ class YandexMusicTrackIE(YandexMusicBaseIE): IE_DESC = 'Яндекс.Музыка - Трек' _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://music.yandex.ru/album/540508/track/4878838', 'md5': 'f496818aa2f60b6c0062980d2e00dc20', 'info_dict': { 'id': '4878838', 'ext': 'mp3', - 'title': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', + 'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', 'filesize': 4628061, 'duration': 193.04, 'track': 'Gypsy Eyes 1', 'album': 'Gypsy Soul', 'album_artist': 'Carlo Ambrosio', - 'artist': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari', + 'artist': 'Carlo Ambrosio & Fabio Di Bari', 'release_year': 2009, }, 'skip': 'Travis CI servers blocked by YandexMusic', - } + }, { + # multiple disks + 'url': 'http://music.yandex.ru/album/3840501/track/705105', + 'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e', + 'info_dict': { + 'id': '705105', + 'ext': 'mp3', + 'title': 'Hooverphonic - Sometimes', + 'filesize': 5743386, + 'duration': 239.27, + 'track': 'Sometimes', + 'album': 'The Best of Hooverphonic', + 'album_artist': 'Hooverphonic', + 'artist': 'Hooverphonic', + 'release_year': 2016, + 'genre': 'pop', + 'disc_number': 2, + 'track_number': 9, + }, + 'skip': 'Travis CI servers blocked by YandexMusic', + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -110,9 +130,21 @@ class YandexMusicTrackIE(YandexMusicBaseIE): 'abr': int_or_none(download_data.get('bitrate')), } + def extract_artist_name(artist): + decomposed = artist.get('decomposed') + if not isinstance(decomposed, list): + return artist['name'] + parts = [artist['name']] + for element in decomposed: + if isinstance(element, dict) and element.get('name'): + parts.append(element['name']) + elif isinstance(element, compat_str): + parts.append(element) + return ''.join(parts) + def extract_artist(artist_list): if artist_list and isinstance(artist_list, list): - artists_names = [a['name'] for a in artist_list if a.get('name')] + artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')] if artists_names: return ', '.join(artists_names) @@ -121,10 +153,17 @@ class YandexMusicTrackIE(YandexMusicBaseIE): album = albums[0] if isinstance(album, dict): year = album.get('year') + disc_number = int_or_none(try_get( + album, lambda x: x['trackPosition']['volume'])) + track_number = int_or_none(try_get( + album, lambda x: x['trackPosition']['index'])) track_info.update({ 'album': album.get('title'), 'album_artist': extract_artist(album.get('artists')), 'release_year': int_or_none(year), + 'genre': album.get('genre'), + 'disc_number': disc_number, + 'track_number': track_number, }) track_artist = extract_artist(track.get('artists')) @@ -152,7 +191,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): IE_DESC = 'Яндекс.Музыка - Альбом' _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)' - _TEST = { + _TESTS = [{ 'url': 'http://music.yandex.ru/album/540508', 'info_dict': { 'id': '540508', @@ -160,7 +199,15 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): }, 'playlist_count': 50, 'skip': 'Travis CI servers blocked by YandexMusic', - } + }, { + 'url': 'https://music.yandex.ru/album/3840501', + 'info_dict': { + 'id': '3840501', + 'title': 'Hooverphonic - The Best of Hooverphonic (2016)', + }, + 'playlist_count': 33, + 'skip': 'Travis CI servers blocked by YandexMusic', + }] def _real_extract(self, url): album_id = self._match_id(url) @@ -169,7 +216,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id, album_id, 'Downloading album JSON') - entries = self._build_playlist(album['volumes'][0]) + entries = self._build_playlist([track for volume in album['volumes'] for track in volume]) title = '%s - %s' % (album['artists'][0]['name'], album['title']) year = album.get('year')