X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fmtv.py;h=f6f31bfdc53c6cb0685cfb74e9cc59b56269d77f;hb=c4db377cbb25c35c0e9df95f275d439cff75a770;hp=8385929e02681a55260fbe4ccec368d57a10389b;hpb=90834c78fed7b383efac8cb1b8adb9f864992c98;p=youtube-dl diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 8385929e0..f6f31bfdc 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -1,5 +1,6 @@ +from __future__ import unicode_literals + import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -7,6 +8,8 @@ from ..utils import ( ExtractorError, find_xpath_attr, fix_xml_ampersands, + url_basename, + RegexNotFoundError, ) @@ -36,10 +39,9 @@ class MTVServicesInfoExtractor(InfoExtractor): else: return thumb_node.attrib['url'] - def _extract_video_formats(self, metadataXml): - if '/error_country_block.swf' in metadataXml: - raise ExtractorError(u'This video is not available from your country.', expected=True) - mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8')) + def _extract_video_formats(self, mdoc): + if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None: + raise ExtractorError('This video is not available from your country.', expected=True) formats = [] for rendition in mdoc.findall('.//rendition'): @@ -62,11 +64,12 @@ class MTVServicesInfoExtractor(InfoExtractor): self.report_extraction(video_id) mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url'] # Remove the templates, like &device={device} - mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url) + mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url) if 'acceptMethods' not in mediagen_url: mediagen_url += '&acceptMethods=fms' - mediagen_page = self._download_webpage(mediagen_url, video_id, - u'Downloading video urls') + + mediagen_doc = self._download_xml(mediagen_url, video_id, + 'Downloading video urls') description_node = itemdoc.find('description') if description_node is not None: @@ -86,10 +89,11 @@ class MTVServicesInfoExtractor(InfoExtractor): title = title_el.text if title is None: raise ExtractorError('Could not find video title') + title = title.strip() return { 'title': title, - 'formats': self._extract_video_formats(mediagen_page), + 'formats': self._extract_video_formats(mediagen_doc), 'id': video_id, 'thumbnail': self._get_thumbnail_url(uri, itemdoc), 'description': description, @@ -101,9 +105,23 @@ class MTVServicesInfoExtractor(InfoExtractor): idoc = self._download_xml( self._FEED_URL + '?' + data, video_id, - u'Downloading info', transform_source=fix_xml_ampersands) + 'Downloading info', transform_source=fix_xml_ampersands) return [self._get_video_info(item) for item in idoc.findall('.//item')] + def _real_extract(self, url): + title = url_basename(url) + webpage = self._download_webpage(url, title) + try: + # the url can be http://media.mtvnservices.com/fb/{mgid}.swf + # or http://media.mtvnservices.com/{mgid} + og_url = self._og_search_video_url(webpage) + mgid = url_basename(og_url) + if mgid.endswith('.swf'): + mgid = mgid[:-4] + except RegexNotFoundError: + mgid = self._search_regex(r'data-mgid="(.*?)"', webpage, u'mgid') + return self._get_videos_info(mgid) + class MTVIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)^https?:// @@ -114,25 +132,25 @@ class MTVIE(MTVServicesInfoExtractor): _TESTS = [ { - u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', - u'file': u'853555.mp4', - u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8', - u'info_dict': { - u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"', - u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', + 'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', + 'file': '853555.mp4', + 'md5': '850f3f143316b1e71fa56a4edfd6e0f8', + 'info_dict': { + 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"', + 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', }, }, { - u'add_ie': ['Vevo'], - u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', - u'file': u'USCJY1331283.mp4', - u'md5': u'73b4e7fcadd88929292fe52c3ced8caf', - u'info_dict': { - u'title': u'Everything Has Changed', - u'upload_date': u'20130606', - u'uploader': u'Taylor Swift', + 'add_ie': ['Vevo'], + 'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', + 'file': 'USCJY1331283.mp4', + 'md5': '73b4e7fcadd88929292fe52c3ced8caf', + 'info_dict': { + 'title': 'Everything Has Changed', + 'upload_date': '20130606', + 'uploader': 'Taylor Swift', }, - u'skip': u'VEVO is only available in some countries', + 'skip': 'VEVO is only available in some countries', }, ] @@ -151,8 +169,22 @@ class MTVIE(MTVServicesInfoExtractor): webpage, re.DOTALL) if m_vevo: vevo_id = m_vevo.group(1); - self.to_screen(u'Vevo video detected: %s' % vevo_id) + self.to_screen('Vevo video detected: %s' % vevo_id) return self.url_result('vevo:%s' % vevo_id, ie='Vevo') - uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri') + uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri') return self._get_videos_info(uri) + + +class MTVIggyIE(MTVServicesInfoExtractor): + IE_NAME = 'mtviggy.com' + _VALID_URL = r'https?://www\.mtviggy\.com/videos/.+' + _TEST = { + 'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/', + 'info_dict': { + 'id': '984696', + 'ext': 'mp4', + 'title': 'Arcade Fire: Behind the Scenes at the Biggest Music Experiment Yet', + } + } + _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'