commit 96b8b9abaecb7518d901dc9d6a617f19c3161236
parent 178ee88319a384b66d9b2da27a819f32ba870425
Author: Ricardo Constantino <wiiaboo@gmail.com>
Date:   Wed,  7 Mar 2018 21:31:53 +0000

[extractor/generic] Support relative URIs in _parse_xspf

<location> can have relative URIs, not just absolute.

Diffstat:
Mtest/test_InfoExtractor.py | 42++++++++++++++++++++++++++++++++++++++++++
Atest/testdata/xspf/foo_xspf.xspf | 34++++++++++++++++++++++++++++++++++
Myoutube_dl/extractor/common.py | 6+++---
Myoutube_dl/extractor/generic.py | 4+++-
4 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py @@ -694,6 +694,48 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + def test_parse_xspf(self): + _TEST_CASES = [ + ( + 'foo_xspf', + 'https://example.org/src/', + [{ + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 202.416, + 'formats': [{'url': 'https://example.org/src/cd1/track%201.mp3'}], + 'id': 'foo_xspf', + 'title': 'Pandemonium' + }, + { + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 255.857, + 'formats': [{'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3'}], + 'id': 'foo_xspf', + 'title': 'Final Cartridge (Nichico Twelve Remix)' + }, + { + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 287.915, + 'formats': [ + {'url': 'https://example.org/src/track3.mp3'}, + {'url': 'https://example.com/track3.mp3'} + ], + 'id': 'foo_xspf', + 'title': 'Rebuilding Nightingale' + }] + ), + ] + + for xspf_file, xspf_base_url, expected_entries in _TEST_CASES: + with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, + mode='r', encoding='utf-8') as f: + entries = self.ie._parse_xspf( + compat_etree_fromstring(f.read().encode('utf-8')), + xspf_file, xspf_base_url) + expect_value(self, entries, expected_entries, None) + for i in range(len(entries)): + expect_dict(self, entries[i], expected_entries[i]) + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/xspf/foo_xspf.xspf b/test/testdata/xspf/foo_xspf.xspf @@ -0,0 +1,34 @@ +<?xml version="1.0" encoding="UTF-8"?> +<playlist version="1" xmlns="http://xspf.org/ns/0/"> + <date>2018-03-09T18:01:43Z</date> + <trackList> + <track> + <location>cd1/track%201.mp3</location> + <title>Pandemonium</title> + <creator>Foilverb</creator> + <annotation>Visit http://bigbrother404.bandcamp.com</annotation> + <album>Pandemonium EP</album> + <trackNum>1</trackNum> + <duration>202416</duration> + </track> + <track> + <location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location> + <title>Final Cartridge (Nichico Twelve Remix)</title> + <annotation>Visit http://bigbrother404.bandcamp.com</annotation> + <creator>Foilverb</creator> + <album>Pandemonium EP</album> + <trackNum>2</trackNum> + <duration>255857</duration> + </track> + <track> + <location>track3.mp3</location> + <location>https://example.com/track3.mp3</location> + <title>Rebuilding Nightingale</title> + <annotation>Visit http://bigbrother404.bandcamp.com</annotation> + <creator>Foilverb</creator> + <album>Pandemonium EP</album> + <trackNum>3</trackNum> + <duration>287915</duration> + </track> + </trackList> +</playlist> diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py @@ -1700,9 +1700,9 @@ class InfoExtractor(object): 'Unable to download xspf manifest', fatal=fatal) if xspf is False: return [] - return self._parse_xspf(xspf, playlist_id) + return self._parse_xspf(xspf, playlist_id, base_url(playlist_url)) - def _parse_xspf(self, playlist, playlist_id): + def _parse_xspf(self, playlist, playlist_id, playlist_base_url=''): NS_MAP = { 'xspf': 'http://xspf.org/ns/0/', 's1': 'http://static.streamone.nl/player/ns/0', @@ -1720,7 +1720,7 @@ class InfoExtractor(object): xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) formats = [{ - 'url': location.text, + 'url': urljoin(playlist_base_url, location.text), 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -2232,7 +2232,9 @@ class GenericIE(InfoExtractor): self._sort_formats(smil['formats']) return smil elif doc.tag == '{http://xspf.org/ns/0/}playlist': - return self.playlist_result(self._parse_xspf(doc, video_id), video_id) + return self.playlist_result( + self._parse_xspf(doc, video_id, compat_str(full_response.geturl())), + video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( doc,