_ Git - youtube-dl/blob - youtube_dl/extractor/crackle.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals, division
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..compat import compat_str
   8 from ..utils import (
   9     determine_ext,
  10     float_or_none,
  11     int_or_none,
  12     parse_age_limit,
  13     parse_duration,
  14 )
  15
  16
  17 class CrackleIE(InfoExtractor):
  18     _GEO_COUNTRIES = ['US']
  19     _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
  20     _TEST = {
  21         'url': 'https://www.crackle.com/andromeda/2502343',
  22         'info_dict': {
  23             'id': '2502343',
  24             'ext': 'mp4',
  25             'title': 'Under The Night',
  26             'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
  27             'duration': 2583,
  28             'view_count': int,
  29             'average_rating': 0,
  30             'age_limit': 14,
  31             'genre': 'Action, Sci-Fi',
  32             'creator': 'Allan Kroeker',
  33             'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
  34             'release_year': 2000,
  35             'series': 'Andromeda',
  36             'episode': 'Under The Night',
  37             'season_number': 1,
  38             'episode_number': 1,
  39         },
  40         'params': {
  41             # m3u8 download
  42             'skip_download': True,
  43         }
  44     }
  45
  46     def _real_extract(self, url):
  47         video_id = self._match_id(url)
  48
  49         media = self._download_json(
  50             'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
  51             % (video_id, self._GEO_COUNTRIES[0]), video_id, query={
  52                 'disableProtocols': 'true',
  53                 'format': 'json'
  54             })
  55
  56         title = media['Title']
  57
  58         formats = []
  59         for e in media['MediaURLs']:
  60             if e.get('UseDRM') is True:
  61                 continue
  62             format_url = e.get('Path')
  63             if not format_url or not isinstance(format_url, compat_str):
  64                 continue
  65             ext = determine_ext(format_url)
  66             if ext == 'm3u8':
  67                 formats.extend(self._extract_m3u8_formats(
  68                     format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  69                     m3u8_id='hls', fatal=False))
  70             elif ext == 'mpd':
  71                 formats.extend(self._extract_mpd_formats(
  72                     format_url, video_id, mpd_id='dash', fatal=False))
  73         self._sort_formats(formats)
  74
  75         description = media.get('Description')
  76         duration = int_or_none(media.get(
  77             'DurationInSeconds')) or parse_duration(media.get('Duration'))
  78         view_count = int_or_none(media.get('CountViews'))
  79         average_rating = float_or_none(media.get('UserRating'))
  80         age_limit = parse_age_limit(media.get('Rating'))
  81         genre = media.get('Genre')
  82         release_year = int_or_none(media.get('ReleaseYear'))
  83         creator = media.get('Directors')
  84         artist = media.get('Cast')
  85
  86         if media.get('MediaTypeDisplayValue') == 'Full Episode':
  87             series = media.get('ShowName')
  88             episode = title
  89             season_number = int_or_none(media.get('Season'))
  90             episode_number = int_or_none(media.get('Episode'))
  91         else:
  92             series = episode = season_number = episode_number = None
  93
  94         subtitles = {}
  95         cc_files = media.get('ClosedCaptionFiles')
  96         if isinstance(cc_files, list):
  97             for cc_file in cc_files:
  98                 if not isinstance(cc_file, dict):
  99                     continue
 100                 cc_url = cc_file.get('Path')
 101                 if not cc_url or not isinstance(cc_url, compat_str):
 102                     continue
 103                 lang = cc_file.get('Locale') or 'en'
 104                 subtitles.setdefault(lang, []).append({'url': cc_url})
 105
 106         thumbnails = []
 107         images = media.get('Images')
 108         if isinstance(images, list):
 109             for image_key, image_url in images.items():
 110                 mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
 111                 if not mobj:
 112                     continue
 113                 thumbnails.append({
 114                     'url': image_url,
 115                     'width': int(mobj.group(1)),
 116                     'height': int(mobj.group(2)),
 117                 })
 118
 119         return {
 120             'id': video_id,
 121             'title': title,
 122             'description': description,
 123             'duration': duration,
 124             'view_count': view_count,
 125             'average_rating': average_rating,
 126             'age_limit': age_limit,
 127             'genre': genre,
 128             'creator': creator,
 129             'artist': artist,
 130             'release_year': release_year,
 131             'series': series,
 132             'episode': episode,
 133             'season_number': season_number,
 134             'episode_number': episode_number,
 135             'thumbnails': thumbnails,
 136             'subtitles': subtitles,
 137             'formats': formats,
 138         }