_ Git - youtube-dl/blob - youtube_dl/extractor/vevo.py

   1 import re
   2 import json
   3 import xml.etree.ElementTree
   4 import datetime
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     determine_ext,
   9     ExtractorError,
  10 )
  11
  12
  13 class VevoIE(InfoExtractor):
  14     """
  15     Accepts urls from vevo.com or in the format 'vevo:{id}'
  16     (currently used by MTVIE)
  17     """
  18     _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
  19     _TEST = {
  20         u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
  21         u'file': u'GB1101300280.mp4',
  22         u'info_dict': {
  23             u"upload_date": u"20130624",
  24             u"uploader": u"Hurts",
  25             u"title": u"Somebody to Die For",
  26             u'duration': 230,
  27         }
  28     }
  29
  30     def _real_extract(self, url):
  31         mobj = re.match(self._VALID_URL, url)
  32         video_id = mobj.group('id')
  33
  34         json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
  35         info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
  36
  37         self.report_extraction(video_id)
  38         video_info = json.loads(info_json)['video']
  39         last_version = {'version': -1}
  40         for version in video_info['videoVersions']:
  41             # These are the HTTP downloads, other types are for different manifests
  42             if version['sourceType'] == 2:
  43                 if version['version'] > last_version['version']:
  44                     last_version = version
  45         if last_version['version'] == -1:
  46             raise ExtractorError(u'Unable to extract last version of the video')
  47
  48         renditions = xml.etree.ElementTree.fromstring(last_version['data'])
  49         formats = []
  50         # Already sorted from worst to best quality
  51         for rend in renditions.findall('rendition'):
  52             attr = rend.attrib
  53             f_url = attr['url']
  54             formats.append({
  55                 'url': f_url,
  56                 'ext': determine_ext(f_url),
  57                 'height': int(attr['frameheight']),
  58                 'width': int(attr['frameWidth']),
  59             })
  60
  61         date_epoch = int(self._search_regex(
  62             r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000
  63         upload_date = datetime.datetime.fromtimestamp(date_epoch)
  64         info = {
  65             'id': video_id,
  66             'title': video_info['title'],
  67             'formats': formats,
  68             'thumbnail': video_info['imageUrl'],
  69             'upload_date': upload_date.strftime('%Y%m%d'),
  70             'uploader': video_info['mainArtists'][0]['artistName'],
  71             'duration': video_info['duration'],
  72         }
  73
  74         # TODO: Remove when #980 has been merged
  75         info.update(formats[-1])
  76
  77         return info