[appletrailers] extract info from an alternative source if available(closes #8422...
authorRemita Amine <remitamine@gmail.com>
Thu, 23 Jun 2016 14:49:42 +0000 (15:49 +0100)
committerRemita Amine <remitamine@gmail.com>
Thu, 23 Jun 2016 14:49:42 +0000 (15:49 +0100)
youtube_dl/extractor/appletrailers.py

index be40f85b487057b4cb319dba102cec76519880a5..babbd0265c351182260c88f2a953b8feca4792aa 100644 (file)
@@ -7,6 +7,8 @@ from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
     int_or_none,
+    parse_duration,
+    unified_strdate,
 )
 
 
@@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor):
     _TESTS = [{
         'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
         'info_dict': {
-            'id': 'manofsteel',
+            'id': '5111',
+            'title': 'Man of Steel',
         },
         'playlist': [
             {
@@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor):
             'id': 'blackthorn',
         },
         'playlist_mincount': 2,
+        'expected_warnings': ['Unable to download JSON metadata'],
+    }, {
+        # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
+        'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
+        'info_dict': {
+            'id': '15881',
+            'title': 'Kung Fu Panda 3',
+        },
+        'playlist_mincount': 4,
     }, {
         'url': 'http://trailers.apple.com/ca/metropole/autrui/',
         'only_matching': True,
@@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor):
         movie = mobj.group('movie')
         uploader_id = mobj.group('company')
 
+        webpage = self._download_webpage(url, movie)
+        film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
+        film_data = self._download_json(
+            'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
+            film_id, fatal=False)
+
+        if film_data:
+            entries = []
+            for clip in film_data.get('clips', []):
+                clip_title = clip['title']
+
+                formats = []
+                for version, version_data in clip.get('versions', {}).items():
+                    for size, size_data in version_data.get('sizes', {}).items():
+                        src = size_data.get('src')
+                        if not src:
+                            continue
+                        formats.append({
+                            'format_id': '%s-%s' % (version, size),
+                            'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
+                            'width': int_or_none(size_data.get('width')),
+                            'height': int_or_none(size_data.get('height')),
+                            'language': version[:2],
+                        })
+                self._sort_formats(formats)
+
+                entries.append({
+                    'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
+                    'formats': formats,
+                    'title': clip_title,
+                    'thumbnail': clip.get('screen') or clip.get('runtime'),
+                    'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
+                    'upload_date': unified_strdate(clip.get('posted')),
+                    'uploader_id': uploader_id,
+                })
+
+            page_data = film_data.get('page', {})
+            return self.playlist_result(entries, film_id, page_data.get('movie_title'))
+
         playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
 
         def fix_html(s):