[teamcoco] Fix extraction for full episodes(closes #16573)
authorRemita Amine <remitamine@gmail.com>
Wed, 30 May 2018 12:21:07 +0000 (13:21 +0100)
committerRemita Amine <remitamine@gmail.com>
Wed, 30 May 2018 12:21:07 +0000 (13:21 +0100)
youtube_dl/extractor/tbs.py
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/turner.py

index edc31729d35f250d2b8267e87719ea54ce9480e3..784f8ed6639d9ef2eb4e73e4c5645d309e6bb500 100644 (file)
@@ -4,6 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .turner import TurnerBaseIE
+from ..compat import (
+    compat_urllib_parse_urlparse,
+    compat_parse_qs,
+)
 from ..utils import (
     float_or_none,
     int_or_none,
@@ -38,48 +42,22 @@ class TBSIE(TurnerBaseIE):
     def _real_extract(self, url):
         site, display_id = re.match(self._VALID_URL, url).groups()
         webpage = self._download_webpage(url, display_id)
-        video_data = self._parse_json(self._search_regex(
+        drupal_settings = self._parse_json(self._search_regex(
             r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
-            webpage, 'drupal setting'), display_id)['turner_playlist'][0]
+            webpage, 'drupal setting'), display_id)
+        video_data = drupal_settings['turner_playlist'][0]
 
         media_id = video_data['mediaID']
         title = video_data['title']
+        tokenizer_query = compat_parse_qs(compat_urllib_parse_urlparse(
+            drupal_settings['ngtv_token_url']).query)
 
-        streams_data = self._download_json(
-            'http://medium.ngtv.io/media/%s/tv' % media_id,
-            media_id)['media']['tv']
-        duration = None
-        chapters = []
-        formats = []
-        for supported_type in ('unprotected', 'bulkaes'):
-            stream_data = streams_data.get(supported_type, {})
-            m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
-            if not m3u8_url:
-                continue
-            if stream_data.get('playlistProtection') == 'spe':
-                m3u8_url = self._add_akamai_spe_token(
-                    'http://token.vgtf.net/token/token_spe',
-                    m3u8_url, media_id, {
-                        'url': url,
-                        'site_name': site[:3].upper(),
-                        'auth_required': video_data.get('authRequired') == '1',
-                    })
-            formats.extend(self._extract_m3u8_formats(
-                m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
-
-            duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
-
-            if not chapters:
-                for chapter in stream_data.get('contentSegments', []):
-                    start_time = float_or_none(chapter.get('start'))
-                    duration = float_or_none(chapter.get('duration'))
-                    if start_time is None or duration is None:
-                        continue
-                    chapters.append({
-                        'start_time': start_time,
-                        'end_time': start_time + duration,
-                    })
-        self._sort_formats(formats)
+        info = self._extract_ngtv_info(
+            media_id, tokenizer_query, {
+                'url': url,
+                'site_name': site[:3].upper(),
+                'auth_required': video_data.get('authRequired') == '1',
+            })
 
         thumbnails = []
         for image_id, image in video_data.get('images', {}).items():
@@ -98,15 +76,14 @@ class TBSIE(TurnerBaseIE):
                 })
             thumbnails.append(i)
 
-        return {
+        info.update({
             'id': media_id,
             'title': title,
             'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
-            'duration': duration,
+            'duration': float_or_none(video_data.get('duration')) or info.get('duration'),
             'timestamp': int_or_none(video_data.get('created')),
             'season_number': int_or_none(video_data.get('season')),
             'episode_number': int_or_none(video_data.get('episode')),
-            'cahpters': chapters,
             'thumbnails': thumbnails,
-            'formats': formats,
-        }
+        })
+        return info
index 63fd4fe1c78ef295d8e2447acffddcb1fb400ab6..73469cc5d1b6247cb44dc33aaa96805dfd1768a0 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import json
 
-from .common import InfoExtractor
+from .turner import TurnerBaseIE
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -15,7 +15,7 @@ from ..utils import (
 )
 
 
-class TeamcocoIE(InfoExtractor):
+class TeamcocoIE(TurnerBaseIE):
     _VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
     _TESTS = [
         {
@@ -110,6 +110,8 @@ class TeamcocoIE(InfoExtractor):
           name
         }
         duration
+        turnerMediaId
+        turnerMediaAuthToken
       }
     }
     ... on NotFoundSlug {
@@ -123,53 +125,65 @@ class TeamcocoIE(InfoExtractor):
         record = response['record']
         video_id = record['id']
 
-        video_sources = self._graphql_call('''{
-  %s(id: "%s") {
-    src
-  }
-}''', 'RecordVideoSource', video_id) or {}
-
-        formats = []
-        get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
-        for format_id, src in video_sources.get('src', {}).items():
-            if not isinstance(src, dict):
-                continue
-            src_url = src.get('src')
-            if not src_url:
-                continue
-            ext = determine_ext(src_url, mimetype2ext(src.get('type')))
-            if format_id == 'hls' or ext == 'm3u8':
-                # compat_urllib_parse.urljoin does not work here
-                if src_url.startswith('/'):
-                    src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
-                formats.extend(self._extract_m3u8_formats(
-                    src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
-            else:
-                if src_url.startswith('/mp4:protected/'):
-                    # TODO Correct extraction for these files
-                    continue
-                tbr = int_or_none(self._search_regex(
-                    r'(\d+)k\.mp4', src_url, 'tbr', default=None))
-
-                formats.append({
-                    'url': src_url,
-                    'ext': ext,
-                    'tbr': tbr,
-                    'format_id': format_id,
-                    'quality': get_quality(format_id),
-                })
-        if not formats:
-            formats = self._extract_m3u8_formats(
-                record['file']['url'], video_id, 'mp4', fatal=False)
-        self._sort_formats(formats)
-
-        return {
+        info = {
             'id': video_id,
             'display_id': display_id,
-            'formats': formats,
             'title': record['title'],
             'thumbnail': record.get('thumb', {}).get('preview'),
             'description': record.get('teaser'),
             'duration': parse_duration(record.get('duration')),
             'timestamp': parse_iso8601(record.get('publishOn')),
         }
+
+        media_id = record.get('turnerMediaId')
+        if media_id:
+            self._initialize_geo_bypass({
+                'countries': ['US'],
+            })
+            info.update(self._extract_ngtv_info(media_id, {
+                'accessToken': record['turnerMediaAuthToken'],
+                'accessTokenType': 'jws',
+            }))
+        else:
+            video_sources = self._graphql_call('''{
+  %s(id: "%s") {
+    src
+  }
+}''', 'RecordVideoSource', video_id) or {}
+
+            formats = []
+            get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
+            for format_id, src in video_sources.get('src', {}).items():
+                if not isinstance(src, dict):
+                    continue
+                src_url = src.get('src')
+                if not src_url:
+                    continue
+                ext = determine_ext(src_url, mimetype2ext(src.get('type')))
+                if format_id == 'hls' or ext == 'm3u8':
+                    # compat_urllib_parse.urljoin does not work here
+                    if src_url.startswith('/'):
+                        src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
+                    formats.extend(self._extract_m3u8_formats(
+                        src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+                else:
+                    if src_url.startswith('/mp4:protected/'):
+                        # TODO Correct extraction for these files
+                        continue
+                    tbr = int_or_none(self._search_regex(
+                        r'(\d+)k\.mp4', src_url, 'tbr', default=None))
+
+                    formats.append({
+                        'url': src_url,
+                        'ext': ext,
+                        'tbr': tbr,
+                        'format_id': format_id,
+                        'quality': get_quality(format_id),
+                    })
+            if not formats:
+                formats = self._extract_m3u8_formats(
+                    record['file']['url'], video_id, 'mp4', fatal=False)
+            self._sort_formats(formats)
+            info['formats'] = formats
+
+        return info
index e73b64aebd80977cbf9551f20e5dadac2a15a9df..2b7b0d6e1b046d7184b194b5688af5295e3b8436 100644 (file)
@@ -9,6 +9,7 @@ from ..utils import (
     xpath_text,
     int_or_none,
     determine_ext,
+    float_or_none,
     parse_duration,
     xpath_attr,
     update_url_query,
@@ -23,14 +24,17 @@ class TurnerBaseIE(AdobePassIE):
     def _extract_timestamp(self, video_data):
         return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
 
-    def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data):
+    def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None):
         secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
         token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
         if not token:
             query = {
                 'path': secure_path,
-                'videoId': content_id,
             }
+            if custom_tokenizer_query:
+                query.update(custom_tokenizer_query)
+            else:
+                query['videoId'] = content_id
             if ap_data.get('auth_required'):
                 query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
             auth = self._download_xml(
@@ -188,3 +192,42 @@ class TurnerBaseIE(AdobePassIE):
             'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
             'is_live': is_live,
         }
+
+    def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
+        streams_data = self._download_json(
+            'http://medium.ngtv.io/media/%s/tv' % media_id,
+            media_id)['media']['tv']
+        duration = None
+        chapters = []
+        formats = []
+        for supported_type in ('unprotected', 'bulkaes'):
+            stream_data = streams_data.get(supported_type, {})
+            m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
+            if not m3u8_url:
+                continue
+            if stream_data.get('playlistProtection') == 'spe':
+                m3u8_url = self._add_akamai_spe_token(
+                    'http://token.ngtv.io/token/token_spe',
+                    m3u8_url, media_id, ap_data or {}, tokenizer_query)
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
+
+            duration = float_or_none(stream_data.get('totalRuntime'))
+
+            if not chapters:
+                for chapter in stream_data.get('contentSegments', []):
+                    start_time = float_or_none(chapter.get('start'))
+                    chapter_duration = float_or_none(chapter.get('duration'))
+                    if start_time is None or chapter_duration is None:
+                        continue
+                    chapters.append({
+                        'start_time': start_time,
+                        'end_time': start_time + chapter_duration,
+                    })
+        self._sort_formats(formats)
+
+        return {
+            'formats': formats,
+            'chapters': chapters,
+            'duration': duration,
+        }