_ Git - youtube-dl/blob - youtube_dl/extractor/teamcoco.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import json
   5
   6 from .turner import TurnerBaseIE
   7 from ..utils import (
   8     determine_ext,
   9     ExtractorError,
  10     int_or_none,
  11     mimetype2ext,
  12     parse_duration,
  13     parse_iso8601,
  14     qualities,
  15 )
  16
  17
  18 class TeamcocoIE(TurnerBaseIE):
  19     _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
  20     _TESTS = [
  21         {
  22             'url': 'http://teamcoco.com/video/mary-kay-remote',
  23             'md5': '55d532f81992f5c92046ad02fec34d7d',
  24             'info_dict': {
  25                 'id': '80187',
  26                 'ext': 'mp4',
  27                 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
  28                 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
  29                 'duration': 495.0,
  30                 'upload_date': '20140402',
  31                 'timestamp': 1396407600,
  32             }
  33         }, {
  34             'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
  35             'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
  36             'info_dict': {
  37                 'id': '19705',
  38                 'ext': 'mp4',
  39                 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
  40                 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
  41                 'duration': 288,
  42                 'upload_date': '20111104',
  43                 'timestamp': 1320405840,
  44             }
  45         }, {
  46             'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
  47             'info_dict': {
  48                 'id': '88748',
  49                 'ext': 'mp4',
  50                 'title': 'Timothy Olyphant Raises A Toast To “Justified”',
  51                 'description': 'md5:15501f23f020e793aeca761205e42c24',
  52                 'upload_date': '20150415',
  53                 'timestamp': 1429088400,
  54             },
  55             'params': {
  56                 'skip_download': True,  # m3u8 downloads
  57             }
  58         }, {
  59             'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
  60             'info_dict': {
  61                 'id': '89341',
  62                 'ext': 'mp4',
  63                 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
  64                 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
  65             },
  66             'params': {
  67                 'skip_download': True,  # m3u8 downloads
  68             },
  69             'skip': 'This video is no longer available.',
  70         }, {
  71             'url': 'http://teamcoco.com/video/the-conan-audiencey-awards-for-04/25/18',
  72             'only_matching': True,
  73         }, {
  74             'url': 'http://teamcoco.com/italy/conan-jordan-schlansky-hit-the-streets-of-florence',
  75             'only_matching': True,
  76         }, {
  77             'url': 'http://teamcoco.com/haiti/conan-s-haitian-history-lesson',
  78             'only_matching': True,
  79         }, {
  80             'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
  81             'only_matching': True,
  82         }, {
  83             'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
  84             'only_matching': True,
  85         }
  86     ]
  87
  88     def _graphql_call(self, query_template, object_type, object_id):
  89         find_object = 'find' + object_type
  90         return self._download_json(
  91             'https://teamcoco.com/graphql', object_id, data=json.dumps({
  92                 'query': query_template % (find_object, object_id)
  93             }).encode(), headers={
  94                 'Content-Type': 'application/json',
  95             })['data'][find_object]
  96
  97     def _real_extract(self, url):
  98         display_id = self._match_id(url)
  99
 100         response = self._graphql_call('''{
 101   %s(slug: "%s") {
 102     ... on RecordSlug {
 103       record {
 104         id
 105         title
 106         teaser
 107         publishOn
 108         thumb {
 109           preview
 110         }
 111         file {
 112           url
 113         }
 114         tags {
 115           name
 116         }
 117         duration
 118         turnerMediaId
 119         turnerMediaAuthToken
 120       }
 121     }
 122     ... on NotFoundSlug {
 123       status
 124     }
 125   }
 126 }''', 'Slug', display_id)
 127         if response.get('status'):
 128             raise ExtractorError('This video is no longer available.', expected=True)
 129
 130         record = response['record']
 131         video_id = record['id']
 132
 133         info = {
 134             'id': video_id,
 135             'display_id': display_id,
 136             'title': record['title'],
 137             'thumbnail': record.get('thumb', {}).get('preview'),
 138             'description': record.get('teaser'),
 139             'duration': parse_duration(record.get('duration')),
 140             'timestamp': parse_iso8601(record.get('publishOn')),
 141         }
 142
 143         media_id = record.get('turnerMediaId')
 144         if media_id:
 145             self._initialize_geo_bypass({
 146                 'countries': ['US'],
 147             })
 148             info.update(self._extract_ngtv_info(media_id, {
 149                 'accessToken': record['turnerMediaAuthToken'],
 150                 'accessTokenType': 'jws',
 151             }))
 152         else:
 153             d = self._download_json(
 154                 'https://teamcoco.com/_truman/d/' + video_id,
 155                 video_id, fatal=False) or {}
 156             video_sources = d.get('meta') or {}
 157             if not video_sources:
 158                 video_sources = self._graphql_call('''{
 159   %s(id: "%s") {
 160     src
 161   }
 162 }''', 'RecordVideoSource', video_id) or {}
 163
 164             formats = []
 165             get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
 166             for format_id, src in video_sources.get('src', {}).items():
 167                 if not isinstance(src, dict):
 168                     continue
 169                 src_url = src.get('src')
 170                 if not src_url:
 171                     continue
 172                 ext = determine_ext(src_url, mimetype2ext(src.get('type')))
 173                 if format_id == 'hls' or ext == 'm3u8':
 174                     # compat_urllib_parse.urljoin does not work here
 175                     if src_url.startswith('/'):
 176                         src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
 177                     formats.extend(self._extract_m3u8_formats(
 178                         src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
 179                 else:
 180                     if src_url.startswith('/mp4:protected/'):
 181                         # TODO Correct extraction for these files
 182                         continue
 183                     tbr = int_or_none(self._search_regex(
 184                         r'(\d+)k\.mp4', src_url, 'tbr', default=None))
 185
 186                     formats.append({
 187                         'url': src_url,
 188                         'ext': ext,
 189                         'tbr': tbr,
 190                         'format_id': format_id,
 191                         'quality': get_quality(format_id),
 192                     })
 193             if not formats:
 194                 formats = self._extract_m3u8_formats(
 195                     record['file']['url'], video_id, 'mp4', fatal=False)
 196             self._sort_formats(formats)
 197             info['formats'] = formats
 198
 199         return info