X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=415dc84c82d537fad9493faf38d5348143a7e521;hb=02af6ec70784d168fd27f379a1bf1d0f228f91b3;hp=1076b46da773b5c90cf0c898202f9a8fc5279dbf;hpb=0ce26ef22834587920bd06f3cb32dab4fcef3c2a;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1076b46da..415dc84c8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -21,6 +21,7 @@ from ..compat import ( compat_os_name, compat_str, compat_urllib_error, + compat_urllib_parse_unquote, compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, @@ -234,7 +235,7 @@ class InfoExtractor(object): chapter_id: Id of the chapter the video belongs to, as a unicode string. The following fields should only be used when the video is an episode of some - series or programme: + series, programme or podcast: series: Title of the series or programme the video episode belongs to. season: Title of the season the video episode belongs to. @@ -1099,6 +1100,13 @@ class InfoExtractor(object): manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'], 'bootstrap info', default=None) + vcodec = None + mime_type = xpath_text( + manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'], + 'base URL', default=None) + if mime_type and mime_type.startswith('audio/'): + vcodec = 'none' + for i, media_el in enumerate(media_nodes): tbr = int_or_none(media_el.attrib.get('bitrate')) width = int_or_none(media_el.attrib.get('width')) @@ -1139,6 +1147,7 @@ class InfoExtractor(object): 'width': f.get('width') or width, 'height': f.get('height') or height, 'format_id': f.get('format_id') if not tbr else format_id, + 'vcodec': vcodec, }) formats.extend(f4m_formats) continue @@ -1155,6 +1164,7 @@ class InfoExtractor(object): 'tbr': tbr, 'width': width, 'height': height, + 'vcodec': vcodec, 'preference': preference, }) return formats @@ -1801,7 +1811,11 @@ class InfoExtractor(object): return is_plain_url, formats entries = [] - for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage): + media_tags = [(media_tag, media_type, '') + for media_tag, media_type + in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)] + media_tags.extend(re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage)) + for media_tag, media_type, media_content in media_tags: media_info = { 'formats': [], 'subtitles': {}, @@ -1870,11 +1884,11 @@ class InfoExtractor(object): formats.extend(self._extract_f4m_formats( http_base_url + '/manifest.f4m', video_id, f4m_id='hds', fatal=False)) + if 'dash' not in skip_protocols: + formats.extend(self._extract_mpd_formats( + http_base_url + '/manifest.mpd', + video_id, mpd_id='dash', fatal=False)) if re.search(r'(?:/smil:|\.smil)', url_base): - if 'dash' not in skip_protocols: - formats.extend(self._extract_mpd_formats( - http_base_url + '/manifest.mpd', - video_id, mpd_id='dash', fatal=False)) if 'smil' not in skip_protocols: rtmp_formats = self._extract_smil_formats( http_base_url + '/jwplayer.smil', @@ -2020,6 +2034,12 @@ class InfoExtractor(object): headers['Ytdl-request-proxy'] = geo_verification_proxy return headers + def _generic_id(self, url): + return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) + + def _generic_title(self, url): + return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) + class SearchInfoExtractor(InfoExtractor): """