[youtube] Fix extraction.

[youtube-dl] / youtube_dl / extractor / teamcoco.py
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py

index 63fd4fe1c78ef295d8e2447acffddcb1fb400ab6..5793b711f6dc1103f7d33b41abb766eabfab1aea 100644 (file)
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
  
  import json
  
-from .common import InfoExtractor
+from .turner import TurnerBaseIE
  from ..utils import (
      determine_ext,
      ExtractorError,
@@ -15,8 +15,8 @@ from ..utils import (
  )
  
  
-class TeamcocoIE(InfoExtractor):
-    _VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
+class TeamcocoIE(TurnerBaseIE):
+    _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
      _TESTS = [
          {
              'url': 'http://teamcoco.com/video/mary-kay-remote',
@@ -79,97 +79,127 @@ class TeamcocoIE(InfoExtractor):
          }, {
              'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
              'only_matching': True,
+        }, {
+            'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
+            'only_matching': True,
          }
      ]
+    _RECORD_TEMPL = '''id
+        title
+        teaser
+        publishOn
+        thumb {
+          preview
+        }
+        tags {
+          name
+        }
+        duration
+        turnerMediaId
+        turnerMediaAuthToken'''
  
      def _graphql_call(self, query_template, object_type, object_id):
          find_object = 'find' + object_type
          return self._download_json(
-            'http://teamcoco.com/graphql/', object_id, data=json.dumps({
+            'https://teamcoco.com/graphql', object_id, data=json.dumps({
                  'query': query_template % (find_object, object_id)
-            }))['data'][find_object]
+            }).encode(), headers={
+                'Content-Type': 'application/json',
+            })['data'][find_object]
  
      def _real_extract(self, url):
          display_id = self._match_id(url)
  
          response = self._graphql_call('''{
-  %s(slug: "%s") {
+  %%s(slug: "%%s") {
      ... on RecordSlug {
        record {
+        %s
+      }
+    }
+    ... on PageSlug {
+      child {
          id
-        title
-        teaser
-        publishOn
-        thumb {
-          preview
-        }
-        file {
-          url
-        }
-        tags {
-          name
-        }
-        duration
        }
      }
      ... on NotFoundSlug {
        status
      }
    }
-}''', 'Slug', display_id)
+}''' % self._RECORD_TEMPL, 'Slug', display_id)
          if response.get('status'):
              raise ExtractorError('This video is no longer available.', expected=True)
  
-        record = response['record']
-        video_id = record['id']
-
-        video_sources = self._graphql_call('''{
-  %s(id: "%s") {
-    src
+        child = response.get('child')
+        if child:
+            record = self._graphql_call('''{
+  %%s(id: "%%s") {
+    ... on Video {
+      %s
+    }
    }
-}''', 'RecordVideoSource', video_id) or {}
-
-        formats = []
-        get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
-        for format_id, src in video_sources.get('src', {}).items():
-            if not isinstance(src, dict):
-                continue
-            src_url = src.get('src')
-            if not src_url:
-                continue
-            ext = determine_ext(src_url, mimetype2ext(src.get('type')))
-            if format_id == 'hls' or ext == 'm3u8':
-                # compat_urllib_parse.urljoin does not work here
-                if src_url.startswith('/'):
-                    src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
-                formats.extend(self._extract_m3u8_formats(
-                    src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
-            else:
-                if src_url.startswith('/mp4:protected/'):
-                    # TODO Correct extraction for these files
-                    continue
-                tbr = int_or_none(self._search_regex(
-                    r'(\d+)k\.mp4', src_url, 'tbr', default=None))
-
-                formats.append({
-                    'url': src_url,
-                    'ext': ext,
-                    'tbr': tbr,
-                    'format_id': format_id,
-                    'quality': get_quality(format_id),
-                })
-        if not formats:
-            formats = self._extract_m3u8_formats(
-                record['file']['url'], video_id, 'mp4', fatal=False)
-        self._sort_formats(formats)
+}''' % self._RECORD_TEMPL, 'Record', child['id'])
+        else:
+            record = response['record']
+        video_id = record['id']
  
-        return {
+        info = {
              'id': video_id,
              'display_id': display_id,
-            'formats': formats,
              'title': record['title'],
              'thumbnail': record.get('thumb', {}).get('preview'),
              'description': record.get('teaser'),
              'duration': parse_duration(record.get('duration')),
              'timestamp': parse_iso8601(record.get('publishOn')),
          }
+
+        media_id = record.get('turnerMediaId')
+        if media_id:
+            self._initialize_geo_bypass({
+                'countries': ['US'],
+            })
+            info.update(self._extract_ngtv_info(media_id, {
+                'accessToken': record['turnerMediaAuthToken'],
+                'accessTokenType': 'jws',
+            }))
+        else:
+            video_sources = self._download_json(
+                'https://teamcoco.com/_truman/d/' + video_id,
+                video_id)['meta']['src']
+            if isinstance(video_sources, dict):
+                video_sources = video_sources.values()
+
+            formats = []
+            get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
+            for src in video_sources:
+                if not isinstance(src, dict):
+                    continue
+                src_url = src.get('src')
+                if not src_url:
+                    continue
+                format_id = src.get('label')
+                ext = determine_ext(src_url, mimetype2ext(src.get('type')))
+                if format_id == 'hls' or ext == 'm3u8':
+                    # compat_urllib_parse.urljoin does not work here
+                    if src_url.startswith('/'):
+                        src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
+                    formats.extend(self._extract_m3u8_formats(
+                        src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+                else:
+                    if src_url.startswith('/mp4:protected/'):
+                        # TODO Correct extraction for these files
+                        continue
+                    tbr = int_or_none(self._search_regex(
+                        r'(\d+)k\.mp4', src_url, 'tbr', default=None))
+
+                    formats.append({
+                        'url': src_url,
+                        'ext': ext,
+                        'tbr': tbr,
+                        'format_id': format_id,
+                        'quality': get_quality(format_id),
+                    })
+            self._sort_formats(formats)
+            info['formats'] = formats
+
+        return info