[extractor/common] Clarify playlists can have description field
[youtube-dl] / youtube_dl / extractor / common.py
index f9578b8380cfcaa1d63a7688307ef231d5691d75..9b4775e0a4bf902f8e77e050ba6679a24b0c6534 100644 (file)
@@ -39,6 +39,8 @@ from ..utils import (
     sanitize_filename,
     unescapeHTML,
     url_basename,
+    xpath_text,
+    xpath_with_ns,
 )
 
 
@@ -202,8 +204,8 @@ class InfoExtractor(object):
     There must be a key "entries", which is a list, an iterable, or a PagedList
     object, each element of which is a valid dictionary by this specification.
 
-    Additionally, playlists can have "title" and "id" attributes with the same
-    semantics as videos (see above).
+    Additionally, playlists can have "title", "description" and "id" attributes
+    with the same semantics as videos (see above).
 
 
     _type "multi_video" indicates that there are multiple videos that
@@ -638,7 +640,7 @@ class InfoExtractor(object):
     @staticmethod
     def _meta_regex(prop):
         return r'''(?isx)<meta
-                    (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
+                    (?=[^>]+(?:itemprop|name|property|id)=(["\']?)%s\1)
                     [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
 
     def _og_search_property(self, prop, html, name=None, **kargs):
@@ -999,8 +1001,7 @@ class InfoExtractor(object):
             assert not fatal
             return []
 
-        namespace = self._search_regex(
-            r'{([^}]+)?}smil', smil.tag, 'namespace', default=None)
+        namespace = self._parse_smil_namespace(smil)
 
         return self._parse_smil_formats(
             smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
@@ -1017,8 +1018,7 @@ class InfoExtractor(object):
             'Unable to download SMIL file', fatal=fatal)
 
     def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
-        namespace = self._search_regex(
-            r'{([^}]+)?}smil', smil.tag, 'namespace', default=None)
+        namespace = self._parse_smil_namespace(smil)
 
         formats = self._parse_smil_formats(
             smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
@@ -1045,6 +1045,10 @@ class InfoExtractor(object):
             'subtitles': subtitles,
         }
 
+    def _parse_smil_namespace(self, smil):
+        return self._search_regex(
+            r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
+
     def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None):
         base = smil_url
         for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
@@ -1101,7 +1105,7 @@ class InfoExtractor(object):
                         'plugin': 'flowplayer-3.2.0.1',
                     }
                 f4m_url += '&' if '?' in f4m_url else '?'
-                f4m_url += compat_urllib_parse.urlencode(f4m_params).encode('utf-8')
+                f4m_url += compat_urllib_parse.urlencode(f4m_params)
                 formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds'))
                 continue
 
@@ -1140,6 +1144,45 @@ class InfoExtractor(object):
             })
         return subtitles
 
+    def _extract_xspf_playlist(self, playlist_url, playlist_id):
+        playlist = self._download_xml(
+            playlist_url, playlist_id, 'Downloading xpsf playlist',
+            'Unable to download xspf manifest')
+
+        NS_MAP = {
+            'xspf': 'http://xspf.org/ns/0/',
+            's1': 'http://static.streamone.nl/player/ns/0',
+        }
+
+        entries = []
+        for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
+            title = xpath_text(
+                track, xpath_with_ns('./xspf:title', NS_MAP), 'title')
+            description = xpath_text(
+                track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
+            thumbnail = xpath_text(
+                track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
+            duration = float_or_none(
+                xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
+
+            formats = [{
+                'url': location.text,
+                'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
+                'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
+                'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
+            } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
+            self._sort_formats(formats)
+
+            entries.append({
+                'id': playlist_id,
+                'title': title,
+                'description': description,
+                'thumbnail': thumbnail,
+                'duration': duration,
+                'formats': formats,
+            })
+        return entries
+
     def _live_title(self, name):
         """ Generate the title for a live video """
         now = datetime.datetime.now()