_ Git - youtube-dl/blob - youtube_dl/extractor/internetvideoarchive.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..compat import (
   5     compat_parse_qs,
   6     compat_urlparse,
   7 )
   8 from ..utils import (
   9     determine_ext,
  10     int_or_none,
  11     xpath_text,
  12 )
  13
  14
  15 class InternetVideoArchiveIE(InfoExtractor):
  16     _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
  17
  18     _TEST = {
  19         'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
  20         'info_dict': {
  21             'id': '194487',
  22             'ext': 'mp4',
  23             'title': 'KICK-ASS 2',
  24             'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
  25         },
  26         'params': {
  27             # m3u8 download
  28             'skip_download': True,
  29         },
  30     }
  31
  32     @staticmethod
  33     def _build_json_url(query):
  34         return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
  35
  36     @staticmethod
  37     def _build_xml_url(query):
  38         return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
  39
  40     def _real_extract(self, url):
  41         query = compat_urlparse.urlparse(url).query
  42         query_dic = compat_parse_qs(query)
  43         video_id = query_dic['publishedid'][0]
  44
  45         if '/player/' in url:
  46             configuration = self._download_json(url, video_id)
  47
  48             # There are multiple videos in the playlist whlie only the first one
  49             # matches the video played in browsers
  50             video_info = configuration['playlist'][0]
  51
  52             formats = []
  53             for source in video_info['sources']:
  54                 file_url = source['file']
  55                 if determine_ext(file_url) == 'm3u8':
  56                     formats.extend(self._extract_m3u8_formats(
  57                         file_url, video_id, ext='mp4', m3u8_id='hls'))
  58                 else:
  59                     a_format = {
  60                         'url': file_url,
  61                     }
  62
  63                     if source.get('label') and source['label'][-4:] == ' kbs':
  64                         tbr = int_or_none(source['label'][:-4])
  65                         a_format.update({
  66                             'tbr': tbr,
  67                             'format_id': 'http-%d' % tbr,
  68                         })
  69                         formats.append(a_format)
  70
  71             self._sort_formats(formats)
  72
  73             title = video_info['title']
  74             description = video_info.get('description')
  75             thumbnail = video_info.get('image')
  76         else:
  77             configuration = self._download_xml(url, video_id)
  78             formats = [{
  79                 'url': xpath_text(configuration, './file', 'file URL', fatal=True),
  80             }]
  81             thumbnail = xpath_text(configuration, './image', 'thumbnail')
  82             title = 'InternetVideoArchive video %s' % video_id
  83             description = None
  84
  85         return {
  86             'id': video_id,
  87             'title': title,
  88             'formats': formats,
  89             'thumbnail': thumbnail,
  90             'description': description,
  91         }