[americastestkitchen] Improve (closes #13996)
authorSergey M․ <dstftw@gmail.com>
Fri, 22 Sep 2017 23:28:46 +0000 (06:28 +0700)
committerSergey M․ <dstftw@gmail.com>
Fri, 22 Sep 2017 23:29:20 +0000 (06:29 +0700)
youtube_dl/extractor/americastestkitchen.py

index f231e7f6ef1c9ba1da40082f56b9ecc40817f522..01736872dc79709ea756a3d3af255662e19491bf 100755 (executable)
@@ -1,85 +1,85 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    int_or_none,
+    try_get,
+    unified_strdate,
+)
 
 
 class AmericasTestKitchenIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/episode/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
     _TESTS = [{
-        'url':
-        'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
+        'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
         'md5': 'b861c3e365ac38ad319cfd509c30577f',
         'info_dict': {
             'id': '1_5g5zua6e',
-            'title': 'atk_s17_e24.mp4',
+            'title': 'Summer Dinner Party',
             'ext': 'mp4',
-            'description': '<p>Host Julia Collin Davison goes into the test kitchen with test cook Dan Souza to learn how to make the ultimate Grill-Roasted Beef Tenderloin. Next, equipment expert Adam Ried reviews gas grills in the Equipment Corner. Then, gadget guru Lisa McManus uncovers the best quirky gadgets. Finally, test cook Erin McMurrer shows host Bridget Lancaster how to make an elegant Pear-Walnut Upside-Down Cake.</p>',
+            'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
+            'thumbnail': r're:^https?://.*\.jpg',
             'timestamp': 1497285541,
             'upload_date': '20170612',
             'uploader_id': 'roger.metcalf@americastestkitchen.com',
-            'release_date': '2017-06-17',
-            'thumbnail': 'http://d3cizcpymoenau.cloudfront.net/images/35973/e24-tenderloin-16.jpg',
-            'episode_number': 24,
+            'release_date': '20170617',
+            'series': "America's Test Kitchen",
+            'season_number': 17,
             'episode': 'Summer Dinner Party',
-            'episode_id': '548-summer-dinner-party',
-            'season_number': 17
+            'episode_number': 24,
         },
         'params': {
-            # m3u8 download
             'skip_download': True,
         },
     }, {
-        'url':
-        'https://www.americastestkitchen.com/episode/546-a-spanish-affair',
-        'only_matching':
-        True,
+        'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+
         webpage = self._download_webpage(url, video_id)
 
         partner_id = self._search_regex(
-            r'partner_id/(?P<partner_id>\d+)',
-            webpage,
-            'partner_id',
-            group='partner_id')
+            r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
+            webpage, 'kaltura partner id')
 
         video_data = self._parse_json(
             self._search_regex(
-                r'window\.__INITIAL_STATE__\s*=\s*({.+?});\s*</script>',
+                r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
                 webpage, 'initial context'),
             video_id)
 
-        episode_data = video_data['episodeDetail']['content']['data']
-        episode_content_meta = episode_data['full_video']
-        external_id = episode_content_meta['external_id']
+        ep_data = try_get(
+            video_data,
+            (lambda x: x['episodeDetail']['content']['data'],
+             lambda x: x['videoDetail']['content']['data']), dict)
+        ep_meta = ep_data.get('full_video', {})
+        external_id = ep_data.get('external_id') or ep_meta['external_id']
 
-        # photo data
-        photo_data = episode_content_meta.get('photo')
-        thumbnail = photo_data.get('image_url') if photo_data else None
+        title = ep_data.get('title') or ep_meta.get('title')
+        description = clean_html(ep_meta.get('episode_description') or ep_data.get(
+            'description') or ep_meta.get('description'))
+        thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
+        release_date = unified_strdate(ep_data.get('aired_at'))
 
-        # meta
-        release_date = episode_data.get('aired_at')
-        description = episode_content_meta.get('description')
-        episode_number = int(episode_content_meta.get('episode_number'))
-        episode = episode_content_meta.get('title')
-        episode_id = episode_content_meta.get('episode_slug')
-        season_number = int(episode_content_meta.get('season_number'))
+        season_number = int_or_none(ep_meta.get('season_number'))
+        episode = ep_meta.get('title')
+        episode_number = int_or_none(ep_meta.get('episode_number'))
 
         return {
             '_type': 'url_transparent',
             'url': 'kaltura:%s:%s' % (partner_id, external_id),
             'ie_key': 'Kaltura',
-            'id': video_id,
-            'release_date': release_date,
-            'thumbnail': thumbnail,
+            'title': title,
             'description': description,
-            'episode_number': episode_number,
+            'thumbnail': thumbnail,
+            'release_date': release_date,
+            'series': "America's Test Kitchen",
+            'season_number': season_number,
             'episode': episode,
-            'episode_id': episode_id,
-            'season_number': season_number
+            'episode_number': episode_number,
         }