X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fmtv.py;h=15df62649ff3d1366ff7fefd4de396f2c263d648;hb=071c10137b6b17b79ecfc8676736d5cc243022f6;hp=7a3b62ebe7bb56c109ba9dad86fa11112a1f69b0;hpb=e525d9a3dfb03152e133b8c0ccc8a104289cf5cf;p=youtube-dl diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 7a3b62ebe..15df62649 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals import re -from .subtitles import SubtitlesInfoExtractor +from .common import InfoExtractor from ..compat import ( compat_urllib_parse, compat_urllib_request, @@ -23,8 +23,9 @@ def _media_xml_tag(tag): return '{http://search.yahoo.com/mrss/}%s' % tag -class MTVServicesInfoExtractor(SubtitlesInfoExtractor): +class MTVServicesInfoExtractor(InfoExtractor): _MOBILE_TEMPLATE = None + _LANG = None @staticmethod def _id_from_uri(uri): @@ -79,12 +80,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor): try: _, _, ext = rendition.attrib['type'].partition('/') rtmp_video_url = rendition.find('./src').text - formats.append({'ext': ext, - 'url': self._transform_rtmp_url(rtmp_video_url), - 'format_id': rendition.get('bitrate'), - 'width': int(rendition.get('width')), - 'height': int(rendition.get('height')), - }) + if rtmp_video_url.endswith('siteunavail.png'): + continue + formats.append({ + 'ext': ext, + 'url': self._transform_rtmp_url(rtmp_video_url), + 'format_id': rendition.get('bitrate'), + 'width': int(rendition.get('width')), + 'height': int(rendition.get('height')), + }) except (KeyError, TypeError): raise ExtractorError('Invalid rendition field.') self._sort_formats(formats) @@ -92,25 +96,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor): def _extract_subtitles(self, mdoc, mtvn_id): subtitles = {} - FORMATS = { - 'scc': 'cea-608', - 'eia-608': 'cea-608', - 'xml': 'ttml', - } - subtitles_format = FORMATS.get( - self._downloader.params.get('subtitlesformat'), 'ttml') for transcript in mdoc.findall('.//transcript'): if transcript.get('kind') != 'captions': continue lang = transcript.get('srclang') - for typographic in transcript.findall('./typographic'): - captions_format = typographic.get('format') - if captions_format == subtitles_format: - subtitles[lang] = compat_str(typographic.get('src')) - break - if self._downloader.params.get('listsubtitles', False): - self._list_available_subtitles(mtvn_id, subtitles) - return self.extract_subtitles(mtvn_id, subtitles) + subtitles[lang] = [{ + 'url': compat_str(typographic.get('src')), + 'ext': typographic.get('format') + } for typographic in transcript.findall('./typographic')] + return subtitles def _get_video_info(self, itemdoc): uri = itemdoc.find('guid').text @@ -125,6 +119,14 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor): mediagen_doc = self._download_xml(mediagen_url, video_id, 'Downloading video urls') + item = mediagen_doc.find('./video/item') + if item is not None and item.get('type') == 'text': + message = '%s returned error: ' % self.IE_NAME + if item.get('code') is not None: + message += '%s - ' % item.get('code') + message += item.text + raise ExtractorError(message, expected=True) + description_node = itemdoc.find('description') if description_node is not None: description = description_node.text.strip() @@ -168,8 +170,12 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor): video_id = self._id_from_uri(uri) feed_url = self._get_feed_url(uri) data = compat_urllib_parse.urlencode({'uri': uri}) + info_url = feed_url + '?' + if self._LANG: + info_url += 'lang=%s&' % self._LANG + info_url += data idoc = self._download_xml( - feed_url + '?' + data, video_id, + info_url, video_id, 'Downloading info', transform_source=fix_xml_ampersands) return self.playlist_result( [self._get_video_info(item) for item in idoc.findall('.//item')]) @@ -193,8 +199,6 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor): webpage, 'mgid') videos_info = self._get_videos_info(mgid) - if self._downloader.params.get('listsubtitles', False): - return return videos_info @@ -240,25 +244,14 @@ class MTVIE(MTVServicesInfoExtractor): _TESTS = [ { 'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', - 'file': '853555.mp4', 'md5': '850f3f143316b1e71fa56a4edfd6e0f8', 'info_dict': { + 'id': '853555', + 'ext': 'mp4', 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"', 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', }, }, - { - 'add_ie': ['Vevo'], - 'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', - 'file': 'USCJY1331283.mp4', - 'md5': '73b4e7fcadd88929292fe52c3ced8caf', - 'info_dict': { - 'title': 'Everything Has Changed', - 'upload_date': '20130606', - 'uploader': 'Taylor Swift', - }, - 'skip': 'VEVO is only available in some countries', - }, ] def _get_thumbnail_url(self, uri, itemdoc): @@ -272,8 +265,8 @@ class MTVIE(MTVServicesInfoExtractor): webpage = self._download_webpage(url, video_id) # Some videos come from Vevo.com - m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";', - webpage, re.DOTALL) + m_vevo = re.search( + r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage) if m_vevo: vevo_id = m_vevo.group(1) self.to_screen('Vevo video detected: %s' % vevo_id) @@ -295,3 +288,40 @@ class MTVIggyIE(MTVServicesInfoExtractor): } } _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/' + +class MTVDEIE(MTVServicesInfoExtractor): + IE_NAME = 'mtv.de' + _VALID_URL = r'''(?x)^https?://(?:www\.)?mtv\.de(?P/artists/.*)''' + _TESTS = [ + { + 'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum', + 'info_dict': { + 'id': 'a50bc5f0b3aa4b3190aa', + 'ext': 'mp4', + 'title': 'cro-traum', + 'description': 'Cro - Traum', + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + return self._get_videos_info(url, mobj.group('video_path')) + + def _get_videos_info(self, url, video_path): + webpage = self._download_webpage(url, video_path) + playlist_js = self._search_regex(r'