]> git.bitcoin.ninja Git - youtube-dl/commitdiff
Merge branch 'pr-fix_bbc_mediaselector' of https://github.com/atomicdryad/youtube...
authorSergey M․ <dstftw@gmail.com>
Wed, 29 Jul 2015 17:55:40 +0000 (23:55 +0600)
committerSergey M․ <dstftw@gmail.com>
Wed, 29 Jul 2015 17:55:40 +0000 (23:55 +0600)
1  2 
youtube_dl/extractor/bbc.py

index 01d07c9c0c3e9dedb41ba560e7aecae5ff26c2e5,0f0ea7cfdd382e16d667012e4244403c52701b27..3d9366644a8be26da027dd7e8ecff96341273a5a
@@@ -14,7 -14,6 +14,6 @@@ from ..utils import 
  )
  from ..compat import compat_HTTPError
  
  class BBCCoUkIE(InfoExtractor):
      IE_NAME = 'bbc.co.uk'
      IE_DESC = 'BBC iPlayer'
          return subtitles
  
      def _download_media_selector(self, programme_id):
-         return self._download_media_selector_url(
-             self._MEDIASELECTOR_URL % programme_id, programme_id)
+         try:
+             return self._download_media_selector_url(
+                 self._MEDIASELECTOR_URL % programme_id, programme_id)
+         except ExtractorError as e:
+             if hasattr(self, '_MEDIASELECTOR_ALT_URL') and str(e) == 'bbc returned error: notukerror':
+                  # notukerror on bbc.com/travel using bbc news mediaselector: fallback to /mediaselector/5/
+                  return self._download_media_selector_url(
+                      self._MEDIASELECTOR_ALT_URL % programme_id, programme_id)
+             else:
+                  raise
  
      def _download_media_selector_url(self, url, programme_id=None):
          try:
                  formats.extend(self._extract_video(media, programme_id))
              elif kind == 'captions':
                  subtitles = self.extract_subtitles(media, programme_id)
          return formats, subtitles
  
      def _download_playlist(self, playlist_id):
@@@ -426,9 -432,10 +432,10 @@@ class BBCIE(BBCCoUkIE)
      IE_DESC = 'BBC'
      _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
  
-     # fails with notukerror for some videos
-     # _MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s'
-     _MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s'
+     # fails with notukerror for some videos ( non news sites such as bbc.com/travel )
+     _MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s'
+     # limited selection of formats but may work where the above does not
+     _MEDIASELECTOR_ALT_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s'
  
      _TESTS = [{
          # article with multiple videos embedded with data-media-meta containing
          },
          'playlist_count': 9,
          'skip': 'Save time',
 +    }, {
 +        # article with multiple videos embedded with `new SMP()`
 +        'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
 +        'info_dict': {
 +            'id': '3662a707-0af9-3149-963f-47bea720b460',
 +            'title': 'BBC Blogs - Adam Curtis - BUGGER',
 +        },
 +        'playlist_count': 18,
      }, {
          # single video embedded with mediaAssetPage.init()
          'url': 'http://www.bbc.com/news/world-europe-32041533',
          'info_dict': {
              'id': 'p02mprgb',
-             'ext': 'flv',
+             'ext': 'mp4',
              'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
              'duration': 47,
              'timestamp': 1427219242,
          'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
          'info_dict': {
              'id': 'p018zqqg',
-             'ext': 'flv',
+             'ext': 'mp4',
              'title': 'Hyundai Santa Fe Sport: Rock star',
              'description': 'md5:b042a26142c4154a6e472933cf20793d',
              'timestamp': 1368473503,
          'url': 'http://www.bbc.com/sport/0/football/33653409',
          'info_dict': {
              'id': 'p02xycnp',
-             'ext': 'flv',
+             'ext': 'mp4',
              'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
              'description': 'md5:398fca0e2e701c609d726e034fa1fc89',
              'duration': 140,
  
          playlist_title = self._html_search_regex(
              r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
 -        playlist_description = self._og_search_description(webpage)
 +        playlist_description = self._og_search_description(webpage, default=None)
 +
 +        def extract_all(pattern):
 +            return list(filter(None, map(
 +                lambda s: self._parse_json(s, playlist_id, fatal=False),
 +                re.findall(pattern, webpage))))
 +
 +        # Multiple video article (e.g.
 +        # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
 +        EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+[\da-z]{8}(?:\b[^"]+)?'
 +        entries = []
 +        for match in extract_all(r'new\s+SMP\(({.+?})\)'):
 +            embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
 +            if embed_url and re.match(EMBED_URL, embed_url):
 +                entries.append(embed_url)
 +        entries.extend(re.findall(
 +            r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
 +        if entries:
 +            return self.playlist_result(
 +                [self.url_result(entry, 'BBCCoUk') for entry in entries],
 +                playlist_id, playlist_title, playlist_description)
  
          # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
 -        medias = list(filter(None, map(
 -            lambda s: self._parse_json(s, playlist_id, fatal=False),
 -            re.findall(r"data-media-meta='({[^']+})'", webpage))))
 +        medias = extract_all(r"data-media-meta='({[^']+})'")
  
          if not medias:
              # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)