X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fcommon.py;h=16ae4b98ffe09c97f604981bf6c2ce9dc1e44e03;hb=11bed5827dace09b5483b159476ce9f8c29d6078;hp=c123d9fca6dfe73eae0cdd220dec02985a8de809;hpb=41c3a5a7beebbf5f60c5edb5093d564f0829c5c1;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c123d9fca..16ae4b98f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -39,6 +39,8 @@ from ..utils import ( sanitize_filename, unescapeHTML, url_basename, + xpath_text, + xpath_with_ns, ) @@ -202,8 +204,8 @@ class InfoExtractor(object): There must be a key "entries", which is a list, an iterable, or a PagedList object, each element of which is a valid dictionary by this specification. - Additionally, playlists can have "title" and "id" attributes with the same - semantics as videos (see above). + Additionally, playlists can have "title", "description" and "id" attributes + with the same semantics as videos (see above). _type "multi_video" indicates that there are multiple videos that @@ -638,7 +640,7 @@ class InfoExtractor(object): @staticmethod def _meta_regex(prop): return r'''(?isx)]+(?:itemprop|name|property)=(["\']?)%s\1) + (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1) [^>]+?content=(["\'])(?P.*?)\2''' % re.escape(prop) def _og_search_property(self, prop, html, name=None, **kargs): @@ -999,8 +1001,7 @@ class InfoExtractor(object): assert not fatal return [] - namespace = self._search_regex( - r'{([^}]+)?}smil', smil.tag, 'namespace', default=None) + namespace = self._parse_smil_namespace(smil) return self._parse_smil_formats( smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) @@ -1017,8 +1018,7 @@ class InfoExtractor(object): 'Unable to download SMIL file', fatal=fatal) def _parse_smil(self, smil, smil_url, video_id, f4m_params=None): - namespace = self._search_regex( - r'{([^}]+)?}smil', smil.tag, 'namespace', default=None) + namespace = self._parse_smil_namespace(smil) formats = self._parse_smil_formats( smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) @@ -1045,6 +1045,10 @@ class InfoExtractor(object): 'subtitles': subtitles, } + def _parse_smil_namespace(self, smil): + return self._search_regex( + r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None) + def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None): base = smil_url for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): @@ -1140,6 +1144,49 @@ class InfoExtractor(object): }) return subtitles + def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True): + xspf = self._download_xml( + playlist_url, playlist_id, 'Downloading xpsf playlist', + 'Unable to download xspf manifest', fatal=fatal) + if xspf is False: + return [] + return self._parse_xspf(xspf, playlist_id) + + def _parse_xspf(self, playlist, playlist_id): + NS_MAP = { + 'xspf': 'http://xspf.org/ns/0/', + 's1': 'http://static.streamone.nl/player/ns/0', + } + + entries = [] + for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): + title = xpath_text( + track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id) + description = xpath_text( + track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description') + thumbnail = xpath_text( + track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail') + duration = float_or_none( + xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) + + formats = [{ + 'url': location.text, + 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), + 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), + 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), + } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] + self._sort_formats(formats) + + entries.append({ + 'id': playlist_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + }) + return entries + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now()