Merge branch 'pr-fix_bbc_mediaselector' of https://github.com/atomicdryad/youtube...

author Sergey M․ <dstftw@gmail.com>

Wed, 29 Jul 2015 17:55:40 +0000 (23:55 +0600)

committer Sergey M․ <dstftw@gmail.com>

Wed, 29 Jul 2015 17:55:40 +0000 (23:55 +0600)
author Sergey M․ <dstftw@gmail.com>
Wed, 29 Jul 2015 17:55:40 +0000 (23:55 +0600)
committer Sergey M․ <dstftw@gmail.com>
Wed, 29 Jul 2015 17:55:40 +0000 (23:55 +0600)
diff --combined youtube_dl/extractor/bbc.py

index 01d07c9c0c3e9dedb41ba560e7aecae5ff26c2e5,0f0ea7cfdd382e16d667012e4244403c52701b27..3d9366644a8be26da027dd7e8ecff96341273a5a
--- 1/youtube_dl/extractor/bbc.py
--- 2/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@@ -14,7 -14,6 +14,6 @@@ from ..utils import 
   )
   from ..compat import compat_HTTPError
   
- 
   class BBCCoUkIE(InfoExtractor):
       IE_NAME = 'bbc.co.uk'
       IE_DESC = 'BBC iPlayer'
@@@ -271,8 -270,16 +270,16 @@@
           return subtitles
   
       def _download_media_selector(self, programme_id):
-         return self._download_media_selector_url(
-             self._MEDIASELECTOR_URL % programme_id, programme_id)
+         try:
+             return self._download_media_selector_url(
+                 self._MEDIASELECTOR_URL % programme_id, programme_id)
+         except ExtractorError as e:
+             if hasattr(self, '_MEDIASELECTOR_ALT_URL') and str(e) == 'bbc returned error: notukerror':
+                  # notukerror on bbc.com/travel using bbc news mediaselector: fallback to /mediaselector/5/
+                  return self._download_media_selector_url(
+                      self._MEDIASELECTOR_ALT_URL % programme_id, programme_id)
+             else:
+                  raise
   
       def _download_media_selector_url(self, url, programme_id=None):
           try:
@@@ -297,7 -304,6 +304,6 @@@
                   formats.extend(self._extract_video(media, programme_id))
               elif kind == 'captions':
                   subtitles = self.extract_subtitles(media, programme_id)
- 
           return formats, subtitles
   
       def _download_playlist(self, playlist_id):
@@@ -426,9 -432,10 +432,10 @@@ class BBCIE(BBCCoUkIE)
       IE_DESC = 'BBC'
       _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
   
-     # fails with notukerror for some videos
-     # _MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s'
-     _MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s'
+     # fails with notukerror for some videos ( non news sites such as bbc.com/travel )
+     _MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s'
+     # limited selection of formats but may work where the above does not
+     _MEDIASELECTOR_ALT_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s'
   
       _TESTS = [{
           # article with multiple videos embedded with data-media-meta containing
@@@ -450,20 -457,12 +457,20 @@@
           },
           'playlist_count': 9,
           'skip': 'Save time',
+ +    }, {
+ +        # article with multiple videos embedded with `new SMP()`
+ +        'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
+ +        'info_dict': {
+ +            'id': '3662a707-0af9-3149-963f-47bea720b460',
+ +            'title': 'BBC Blogs - Adam Curtis - BUGGER',
+ +        },
+ +        'playlist_count': 18,
       }, {
           # single video embedded with mediaAssetPage.init()
           'url': 'http://www.bbc.com/news/world-europe-32041533',
           'info_dict': {
               'id': 'p02mprgb',
-             'ext': 'flv',
+             'ext': 'mp4',
               'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
               'duration': 47,
               'timestamp': 1427219242,
@@@ -523,7 -522,7 +530,7 @@@
           'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
           'info_dict': {
               'id': 'p018zqqg',
-             'ext': 'flv',
+             'ext': 'mp4',
               'title': 'Hyundai Santa Fe Sport: Rock star',
               'description': 'md5:b042a26142c4154a6e472933cf20793d',
               'timestamp': 1368473503,
@@@ -538,7 -537,7 +545,7 @@@
           'url': 'http://www.bbc.com/sport/0/football/33653409',
           'info_dict': {
               'id': 'p02xycnp',
-             'ext': 'flv',
+             'ext': 'mp4',
               'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
               'description': 'md5:398fca0e2e701c609d726e034fa1fc89',
               'duration': 140,
@@@ -645,30 -644,12 +652,30 @@@
   
           playlist_title = self._html_search_regex(
               r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
- -        playlist_description = self._og_search_description(webpage)
+ +        playlist_description = self._og_search_description(webpage, default=None)
+ +
+ +        def extract_all(pattern):
+ +            return list(filter(None, map(
+ +                lambda s: self._parse_json(s, playlist_id, fatal=False),
+ +                re.findall(pattern, webpage))))
+ +
+ +        # Multiple video article (e.g.
+ +        # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
+ +        EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+[\da-z]{8}(?:\b[^"]+)?'
+ +        entries = []
+ +        for match in extract_all(r'new\s+SMP\(({.+?})\)'):
+ +            embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
+ +            if embed_url and re.match(EMBED_URL, embed_url):
+ +                entries.append(embed_url)
+ +        entries.extend(re.findall(
+ +            r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
+ +        if entries:
+ +            return self.playlist_result(
+ +                [self.url_result(entry, 'BBCCoUk') for entry in entries],
+ +                playlist_id, playlist_title, playlist_description)
   
           # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
- -        medias = list(filter(None, map(
- -            lambda s: self._parse_json(s, playlist_id, fatal=False),
- -            re.findall(r"data-media-meta='({[^']+})'", webpage))))
+ +        medias = extract_all(r"data-media-meta='({[^']+})'")
   
           if not medias:
               # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
author	Sergey M․ <dstftw@gmail.com>
	Wed, 29 Jul 2015 17:55:40 +0000 (23:55 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Wed, 29 Jul 2015 17:55:40 +0000 (23:55 +0600)