[extractor/generic] Support relative URIs in _parse_xspf
authorRicardo Constantino <wiiaboo@gmail.com>
Wed, 7 Mar 2018 21:31:53 +0000 (21:31 +0000)
committerSergey M․ <dstftw@gmail.com>
Sat, 17 Mar 2018 19:48:44 +0000 (02:48 +0700)
<location> can have relative URIs, not just absolute.

test/test_InfoExtractor.py
test/testdata/xspf/foo_xspf.xspf [new file with mode: 0644]
youtube_dl/extractor/common.py
youtube_dl/extractor/generic.py

index 7b31d5198b561f7e3725a15055439ca5f35b791b..a695ce64b3fbc9a8c9eca4562f8fc94ba2c742b5 100644 (file)
@@ -694,6 +694,48 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
 
+    def test_parse_xspf(self):
+        _TEST_CASES = [
+            (
+                'foo_xspf',
+                'https://example.org/src/',
+                [{
+                    'description': 'Visit http://bigbrother404.bandcamp.com',
+                    'duration': 202.416,
+                    'formats': [{'url': 'https://example.org/src/cd1/track%201.mp3'}],
+                    'id': 'foo_xspf',
+                    'title': 'Pandemonium'
+                },
+                {
+                    'description': 'Visit http://bigbrother404.bandcamp.com',
+                    'duration': 255.857,
+                    'formats': [{'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3'}],
+                    'id': 'foo_xspf',
+                    'title': 'Final Cartridge (Nichico Twelve Remix)'
+                },
+                {
+                    'description': 'Visit http://bigbrother404.bandcamp.com',
+                    'duration': 287.915,
+                    'formats': [
+                        {'url': 'https://example.org/src/track3.mp3'},
+                        {'url': 'https://example.com/track3.mp3'}
+                    ],
+                    'id': 'foo_xspf',
+                    'title': 'Rebuilding Nightingale'
+                }]
+            ),
+        ]
+
+        for xspf_file, xspf_base_url, expected_entries in _TEST_CASES:
+            with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
+                         mode='r', encoding='utf-8') as f:
+                entries = self.ie._parse_xspf(
+                    compat_etree_fromstring(f.read().encode('utf-8')),
+                        xspf_file, xspf_base_url)
+                expect_value(self, entries, expected_entries, None)
+                for i in range(len(entries)):
+                    expect_dict(self, entries[i], expected_entries[i])
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/testdata/xspf/foo_xspf.xspf b/test/testdata/xspf/foo_xspf.xspf
new file mode 100644 (file)
index 0000000..b7f0086
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<playlist version="1" xmlns="http://xspf.org/ns/0/">
+    <date>2018-03-09T18:01:43Z</date>
+    <trackList>
+        <track>
+            <location>cd1/track%201.mp3</location>
+            <title>Pandemonium</title>
+            <creator>Foilverb</creator>
+            <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+            <album>Pandemonium EP</album>
+            <trackNum>1</trackNum>
+            <duration>202416</duration>
+        </track>
+        <track>
+            <location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location>
+            <title>Final Cartridge (Nichico Twelve Remix)</title>
+            <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+            <creator>Foilverb</creator>
+            <album>Pandemonium EP</album>
+            <trackNum>2</trackNum>
+            <duration>255857</duration>
+        </track>
+        <track>
+            <location>track3.mp3</location>
+            <location>https://example.com/track3.mp3</location>
+            <title>Rebuilding Nightingale</title>
+            <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+            <creator>Foilverb</creator>
+            <album>Pandemonium EP</album>
+            <trackNum>3</trackNum>
+            <duration>287915</duration>
+        </track>
+    </trackList>
+</playlist>
index fcdd0fd14a85a12690031b409058d932a3d4e4db..c1e1012e7afc99f4e066f44f1b32f03833da6610 100644 (file)
@@ -1700,9 +1700,9 @@ class InfoExtractor(object):
             'Unable to download xspf manifest', fatal=fatal)
         if xspf is False:
             return []
-        return self._parse_xspf(xspf, playlist_id)
+        return self._parse_xspf(xspf, playlist_id, base_url(playlist_url))
 
-    def _parse_xspf(self, playlist, playlist_id):
+    def _parse_xspf(self, playlist, playlist_id, playlist_base_url=''):
         NS_MAP = {
             'xspf': 'http://xspf.org/ns/0/',
             's1': 'http://static.streamone.nl/player/ns/0',
@@ -1720,7 +1720,7 @@ class InfoExtractor(object):
                 xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
 
             formats = [{
-                'url': location.text,
+                'url': urljoin(playlist_base_url, location.text),
                 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
                 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
                 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
index dbd565066eb4b56f7df699e26957be3b718b7980..023ccbc9bf108e5cf51c0afc7579905899956ae3 100644 (file)
@@ -2232,7 +2232,9 @@ class GenericIE(InfoExtractor):
                 self._sort_formats(smil['formats'])
                 return smil
             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
-                return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
+                return self.playlist_result(
+                    self._parse_xspf(doc, video_id, compat_str(full_response.geturl())),
+                    video_id)
             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                 info_dict['formats'] = self._parse_mpd_formats(
                     doc,