[youtube] Fix extraction (closes #20758, closes #20759, closes #20761, closes #20762...

[youtube-dl] / youtube_dl / extractor / teamcoco.py
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py

index 63fd4fe1c78ef295d8e2447acffddcb1fb400ab6..7640cf00a1d87bc2582e355899fca3e017babc69 100644 (file)
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
  
  import json
  
-from .common import InfoExtractor
+from .turner import TurnerBaseIE
  from ..utils import (
      determine_ext,
      ExtractorError,
@@ -15,8 +15,8 @@ from ..utils import (
  )
  
  
-class TeamcocoIE(InfoExtractor):
-    _VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
+class TeamcocoIE(TurnerBaseIE):
+    _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
      _TESTS = [
          {
              'url': 'http://teamcoco.com/video/mary-kay-remote',
@@ -79,15 +79,20 @@ class TeamcocoIE(InfoExtractor):
          }, {
              'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
              'only_matching': True,
+        }, {
+            'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
+            'only_matching': True,
          }
      ]
  
      def _graphql_call(self, query_template, object_type, object_id):
          find_object = 'find' + object_type
          return self._download_json(
-            'http://teamcoco.com/graphql/', object_id, data=json.dumps({
+            'https://teamcoco.com/graphql', object_id, data=json.dumps({
                  'query': query_template % (find_object, object_id)
-            }))['data'][find_object]
+            }).encode(), headers={
+                'Content-Type': 'application/json',
+            })['data'][find_object]
  
      def _real_extract(self, url):
          display_id = self._match_id(url)
@@ -110,6 +115,8 @@ class TeamcocoIE(InfoExtractor):
            name
          }
          duration
+        turnerMediaId
+        turnerMediaAuthToken
        }
      }
      ... on NotFoundSlug {
@@ -123,53 +130,70 @@ class TeamcocoIE(InfoExtractor):
          record = response['record']
          video_id = record['id']
  
-        video_sources = self._graphql_call('''{
-  %s(id: "%s") {
-    src
-  }
-}''', 'RecordVideoSource', video_id) or {}
-
-        formats = []
-        get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
-        for format_id, src in video_sources.get('src', {}).items():
-            if not isinstance(src, dict):
-                continue
-            src_url = src.get('src')
-            if not src_url:
-                continue
-            ext = determine_ext(src_url, mimetype2ext(src.get('type')))
-            if format_id == 'hls' or ext == 'm3u8':
-                # compat_urllib_parse.urljoin does not work here
-                if src_url.startswith('/'):
-                    src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
-                formats.extend(self._extract_m3u8_formats(
-                    src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
-            else:
-                if src_url.startswith('/mp4:protected/'):
-                    # TODO Correct extraction for these files
-                    continue
-                tbr = int_or_none(self._search_regex(
-                    r'(\d+)k\.mp4', src_url, 'tbr', default=None))
-
-                formats.append({
-                    'url': src_url,
-                    'ext': ext,
-                    'tbr': tbr,
-                    'format_id': format_id,
-                    'quality': get_quality(format_id),
-                })
-        if not formats:
-            formats = self._extract_m3u8_formats(
-                record['file']['url'], video_id, 'mp4', fatal=False)
-        self._sort_formats(formats)
-
-        return {
+        info = {
              'id': video_id,
              'display_id': display_id,
-            'formats': formats,
              'title': record['title'],
              'thumbnail': record.get('thumb', {}).get('preview'),
              'description': record.get('teaser'),
              'duration': parse_duration(record.get('duration')),
              'timestamp': parse_iso8601(record.get('publishOn')),
          }
+
+        media_id = record.get('turnerMediaId')
+        if media_id:
+            self._initialize_geo_bypass({
+                'countries': ['US'],
+            })
+            info.update(self._extract_ngtv_info(media_id, {
+                'accessToken': record['turnerMediaAuthToken'],
+                'accessTokenType': 'jws',
+            }))
+        else:
+            d = self._download_json(
+                'https://teamcoco.com/_truman/d/' + video_id,
+                video_id, fatal=False) or {}
+            video_sources = d.get('meta') or {}
+            if not video_sources:
+                video_sources = self._graphql_call('''{
+  %s(id: "%s") {
+    src
+  }
+}''', 'RecordVideoSource', video_id) or {}
+
+            formats = []
+            get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
+            for format_id, src in video_sources.get('src', {}).items():
+                if not isinstance(src, dict):
+                    continue
+                src_url = src.get('src')
+                if not src_url:
+                    continue
+                ext = determine_ext(src_url, mimetype2ext(src.get('type')))
+                if format_id == 'hls' or ext == 'm3u8':
+                    # compat_urllib_parse.urljoin does not work here
+                    if src_url.startswith('/'):
+                        src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
+                    formats.extend(self._extract_m3u8_formats(
+                        src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+                else:
+                    if src_url.startswith('/mp4:protected/'):
+                        # TODO Correct extraction for these files
+                        continue
+                    tbr = int_or_none(self._search_regex(
+                        r'(\d+)k\.mp4', src_url, 'tbr', default=None))
+
+                    formats.append({
+                        'url': src_url,
+                        'ext': ext,
+                        'tbr': tbr,
+                        'format_id': format_id,
+                        'quality': get_quality(format_id),
+                    })
+            if not formats:
+                formats = self._extract_m3u8_formats(
+                    record['file']['url'], video_id, 'mp4', fatal=False)
+            self._sort_formats(formats)
+            info['formats'] = formats
+
+        return info