X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fcommon.py;h=16ae4b98ffe09c97f604981bf6c2ce9dc1e44e03;hb=11bed5827dace09b5483b159476ce9f8c29d6078;hp=c123d9fca6dfe73eae0cdd220dec02985a8de809;hpb=41c3a5a7beebbf5f60c5edb5093d564f0829c5c1;p=youtube-dl
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index c123d9fca..16ae4b98f 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -39,6 +39,8 @@ from ..utils import (
sanitize_filename,
unescapeHTML,
url_basename,
+ xpath_text,
+ xpath_with_ns,
)
@@ -202,8 +204,8 @@ class InfoExtractor(object):
There must be a key "entries", which is a list, an iterable, or a PagedList
object, each element of which is a valid dictionary by this specification.
- Additionally, playlists can have "title" and "id" attributes with the same
- semantics as videos (see above).
+ Additionally, playlists can have "title", "description" and "id" attributes
+ with the same semantics as videos (see above).
_type "multi_video" indicates that there are multiple videos that
@@ -638,7 +640,7 @@ class InfoExtractor(object):
@staticmethod
def _meta_regex(prop):
return r'''(?isx)]+(?:itemprop|name|property)=(["\']?)%s\1)
+ (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
[^>]+?content=(["\'])(?P.*?)\2''' % re.escape(prop)
def _og_search_property(self, prop, html, name=None, **kargs):
@@ -999,8 +1001,7 @@ class InfoExtractor(object):
assert not fatal
return []
- namespace = self._search_regex(
- r'{([^}]+)?}smil', smil.tag, 'namespace', default=None)
+ namespace = self._parse_smil_namespace(smil)
return self._parse_smil_formats(
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
@@ -1017,8 +1018,7 @@ class InfoExtractor(object):
'Unable to download SMIL file', fatal=fatal)
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
- namespace = self._search_regex(
- r'{([^}]+)?}smil', smil.tag, 'namespace', default=None)
+ namespace = self._parse_smil_namespace(smil)
formats = self._parse_smil_formats(
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
@@ -1045,6 +1045,10 @@ class InfoExtractor(object):
'subtitles': subtitles,
}
+ def _parse_smil_namespace(self, smil):
+ return self._search_regex(
+ r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
+
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None):
base = smil_url
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
@@ -1140,6 +1144,49 @@ class InfoExtractor(object):
})
return subtitles
+ def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
+ xspf = self._download_xml(
+ playlist_url, playlist_id, 'Downloading xpsf playlist',
+ 'Unable to download xspf manifest', fatal=fatal)
+ if xspf is False:
+ return []
+ return self._parse_xspf(xspf, playlist_id)
+
+ def _parse_xspf(self, playlist, playlist_id):
+ NS_MAP = {
+ 'xspf': 'http://xspf.org/ns/0/',
+ 's1': 'http://static.streamone.nl/player/ns/0',
+ }
+
+ entries = []
+ for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
+ title = xpath_text(
+ track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
+ description = xpath_text(
+ track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
+ thumbnail = xpath_text(
+ track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
+ duration = float_or_none(
+ xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
+
+ formats = [{
+ 'url': location.text,
+ 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
+ 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
+ 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
+ } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': playlist_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ })
+ return entries
+
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()