_ Git - youtube-dl/blob - youtube_dl/extractor/teamcoco.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import json
   5
   6 from .turner import TurnerBaseIE
   7 from ..utils import (
   8     determine_ext,
   9     ExtractorError,
  10     int_or_none,
  11     mimetype2ext,
  12     parse_duration,
  13     parse_iso8601,
  14     qualities,
  15 )
  16
  17
  18 class TeamcocoIE(TurnerBaseIE):
  19     _VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
  20     _TESTS = [
  21         {
  22             'url': 'http://teamcoco.com/video/mary-kay-remote',
  23             'md5': '55d532f81992f5c92046ad02fec34d7d',
  24             'info_dict': {
  25                 'id': '80187',
  26                 'ext': 'mp4',
  27                 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
  28                 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
  29                 'duration': 495.0,
  30                 'upload_date': '20140402',
  31                 'timestamp': 1396407600,
  32             }
  33         }, {
  34             'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
  35             'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
  36             'info_dict': {
  37                 'id': '19705',
  38                 'ext': 'mp4',
  39                 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
  40                 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
  41                 'duration': 288,
  42                 'upload_date': '20111104',
  43                 'timestamp': 1320405840,
  44             }
  45         }, {
  46             'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
  47             'info_dict': {
  48                 'id': '88748',
  49                 'ext': 'mp4',
  50                 'title': 'Timothy Olyphant Raises A Toast To “Justified”',
  51                 'description': 'md5:15501f23f020e793aeca761205e42c24',
  52                 'upload_date': '20150415',
  53                 'timestamp': 1429088400,
  54             },
  55             'params': {
  56                 'skip_download': True,  # m3u8 downloads
  57             }
  58         }, {
  59             'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
  60             'info_dict': {
  61                 'id': '89341',
  62                 'ext': 'mp4',
  63                 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
  64                 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
  65             },
  66             'params': {
  67                 'skip_download': True,  # m3u8 downloads
  68             },
  69             'skip': 'This video is no longer available.',
  70         }, {
  71             'url': 'http://teamcoco.com/video/the-conan-audiencey-awards-for-04/25/18',
  72             'only_matching': True,
  73         }, {
  74             'url': 'http://teamcoco.com/italy/conan-jordan-schlansky-hit-the-streets-of-florence',
  75             'only_matching': True,
  76         }, {
  77             'url': 'http://teamcoco.com/haiti/conan-s-haitian-history-lesson',
  78             'only_matching': True,
  79         }, {
  80             'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
  81             'only_matching': True,
  82         }
  83     ]
  84
  85     def _graphql_call(self, query_template, object_type, object_id):
  86         find_object = 'find' + object_type
  87         return self._download_json(
  88             'http://teamcoco.com/graphql/', object_id, data=json.dumps({
  89                 'query': query_template % (find_object, object_id)
  90             }))['data'][find_object]
  91
  92     def _real_extract(self, url):
  93         display_id = self._match_id(url)
  94
  95         response = self._graphql_call('''{
  96   %s(slug: "%s") {
  97     ... on RecordSlug {
  98       record {
  99         id
 100         title
 101         teaser
 102         publishOn
 103         thumb {
 104           preview
 105         }
 106         file {
 107           url
 108         }
 109         tags {
 110           name
 111         }
 112         duration
 113         turnerMediaId
 114         turnerMediaAuthToken
 115       }
 116     }
 117     ... on NotFoundSlug {
 118       status
 119     }
 120   }
 121 }''', 'Slug', display_id)
 122         if response.get('status'):
 123             raise ExtractorError('This video is no longer available.', expected=True)
 124
 125         record = response['record']
 126         video_id = record['id']
 127
 128         info = {
 129             'id': video_id,
 130             'display_id': display_id,
 131             'title': record['title'],
 132             'thumbnail': record.get('thumb', {}).get('preview'),
 133             'description': record.get('teaser'),
 134             'duration': parse_duration(record.get('duration')),
 135             'timestamp': parse_iso8601(record.get('publishOn')),
 136         }
 137
 138         media_id = record.get('turnerMediaId')
 139         if media_id:
 140             self._initialize_geo_bypass({
 141                 'countries': ['US'],
 142             })
 143             info.update(self._extract_ngtv_info(media_id, {
 144                 'accessToken': record['turnerMediaAuthToken'],
 145                 'accessTokenType': 'jws',
 146             }))
 147         else:
 148             video_sources = self._graphql_call('''{
 149   %s(id: "%s") {
 150     src
 151   }
 152 }''', 'RecordVideoSource', video_id) or {}
 153
 154             formats = []
 155             get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
 156             for format_id, src in video_sources.get('src', {}).items():
 157                 if not isinstance(src, dict):
 158                     continue
 159                 src_url = src.get('src')
 160                 if not src_url:
 161                     continue
 162                 ext = determine_ext(src_url, mimetype2ext(src.get('type')))
 163                 if format_id == 'hls' or ext == 'm3u8':
 164                     # compat_urllib_parse.urljoin does not work here
 165                     if src_url.startswith('/'):
 166                         src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
 167                     formats.extend(self._extract_m3u8_formats(
 168                         src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
 169                 else:
 170                     if src_url.startswith('/mp4:protected/'):
 171                         # TODO Correct extraction for these files
 172                         continue
 173                     tbr = int_or_none(self._search_regex(
 174                         r'(\d+)k\.mp4', src_url, 'tbr', default=None))
 175
 176                     formats.append({
 177                         'url': src_url,
 178                         'ext': ext,
 179                         'tbr': tbr,
 180                         'format_id': format_id,
 181                         'quality': get_quality(format_id),
 182                     })
 183             if not formats:
 184                 formats = self._extract_m3u8_formats(
 185                     record['file']['url'], video_id, 'mp4', fatal=False)
 186             self._sort_formats(formats)
 187             info['formats'] = formats
 188
 189         return info