[youtube:shared] Relax _VALID_URL
[youtube-dl] / youtube_dl / extractor / bbc.py
index 50c1da185b74695f9197826e7e217fbaffe495c8..9cb7630a1d15ac495bb45e29f2f8edea0c4be9e6 100644 (file)
@@ -44,8 +44,6 @@ class BBCCoUkIE(InfoExtractor):
 
     _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
     _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
-    # Unified Streaming Platform
-    _USP_RE = r'/([^/]+)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
 
     _NAMESPACES = (
         _MEDIASELECTION_NS,
@@ -57,11 +55,12 @@ class BBCCoUkIE(InfoExtractor):
             'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
             'info_dict': {
                 'id': 'b039d07m',
-                'ext': 'mp4',
+                'ext': 'flv',
                 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
                 'description': 'The Canadian poet and songwriter reflects on his musical career.',
             },
             'params': {
+                # rtmp download
                 'skip_download': True,
             }
         },
@@ -93,7 +92,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'this episode is not currently available',
+            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
         },
         {
             'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
@@ -108,7 +107,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'this episode is not currently available',
+            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
         }, {
             'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
             'info_dict': {
@@ -128,12 +127,13 @@ class BBCCoUkIE(InfoExtractor):
             'note': 'Audio',
             'info_dict': {
                 'id': 'p022h44j',
-                'ext': 'mp4',
+                'ext': 'flv',
                 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
                 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
                 'duration': 227,
             },
             'params': {
+                # rtmp download
                 'skip_download': True,
             }
         }, {
@@ -141,12 +141,13 @@ class BBCCoUkIE(InfoExtractor):
             'note': 'Video',
             'info_dict': {
                 'id': 'p025c103',
-                'ext': 'mp4',
+                'ext': 'flv',
                 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
                 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
                 'duration': 226,
             },
             'params': {
+                # rtmp download
                 'skip_download': True,
             }
         }, {
@@ -162,7 +163,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'this episode is not currently available',
+            'skip': 'geolocation',
         }, {
             'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
             'info_dict': {
@@ -176,7 +177,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'this episode is not currently available',
+            'skip': 'geolocation',
         }, {
             # iptv-all mediaset fails with geolocation however there is no geo restriction
             # for this programme at all
@@ -191,17 +192,18 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'this episode is not currently available on BBC iPlayer Radio',
+            'skip': 'Now it\'s really geo-restricted',
         }, {
             # compact player (https://github.com/rg3/youtube-dl/issues/8147)
             'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
             'info_dict': {
                 'id': 'p028bfkj',
-                'ext': 'mp4',
+                'ext': 'flv',
                 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
                 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
             },
             'params': {
+                # rtmp download
                 'skip_download': True,
             },
         }, {
@@ -246,15 +248,9 @@ class BBCCoUkIE(InfoExtractor):
             elif transfer_format == 'dash':
                 pass
             elif transfer_format == 'hls':
-                is_unified_streaming = re.search(self._USP_RE, href)
-                if is_unified_streaming:
-                    href = re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href)
-                m3u8_formats = self._extract_m3u8_formats(
+                formats.extend(self._extract_m3u8_formats(
                     href, programme_id, ext='mp4', entry_protocol='m3u8_native',
-                    m3u8_id=supplier, fatal=False)
-                if is_unified_streaming:
-                    self._check_formats(m3u8_formats, programme_id)
-                formats.extend(m3u8_formats)
+                    m3u8_id=supplier, fatal=False))
             # Direct link
             else:
                 formats.append({
@@ -309,14 +305,13 @@ class BBCCoUkIE(InfoExtractor):
         for connection in self._extract_connections(media):
             conn_formats = self._extract_connection(connection, programme_id)
             for format in conn_formats:
-                if format.get('protocol') != 'm3u8_native':
-                    format.update({
-                        'width': width,
-                        'height': height,
-                        'vbr': vbr,
-                        'vcodec': vcodec,
-                        'filesize': file_size,
-                    })
+                format.update({
+                    'width': width,
+                    'height': height,
+                    'vbr': vbr,
+                    'vcodec': vcodec,
+                    'filesize': file_size,
+                })
                 if service:
                     format['format_id'] = '%s_%s' % (service, format['format_id'])
             formats.extend(conn_formats)
@@ -594,7 +589,8 @@ class BBCIE(BBCCoUkIE):
         'info_dict': {
             'id': '150615_telabyad_kentin_cogu',
             'ext': 'mp4',
-            'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
+            'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi",
+            'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
             'timestamp': 1434397334,
             'upload_date': '20150615',
         },
@@ -608,6 +604,7 @@ class BBCIE(BBCCoUkIE):
             'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
             'ext': 'mp4',
             'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
+            'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
             'timestamp': 1434713142,
             'upload_date': '20150619',
         },
@@ -823,8 +820,20 @@ class BBCIE(BBCCoUkIE):
                         # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
                         playlist = data_playable.get('otherSettings', {}).get('playlist', {})
                         if playlist:
-                            entries.append(self._extract_from_playlist_sxml(
-                                playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
+                            for key in ('progressiveDownload', 'streaming'):
+                                playlist_url = playlist.get('%sUrl' % key)
+                                if not playlist_url:
+                                    continue
+                                try:
+                                    entries.append(self._extract_from_playlist_sxml(
+                                        playlist_url, playlist_id, timestamp))
+                                except Exception as e:
+                                    # Some playlist URL may fail with 500, at the same time
+                                    # the other one may work fine (e.g.
+                                    # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
+                                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
+                                        continue
+                                    raise
 
         if entries:
             return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
@@ -1003,10 +1012,10 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
 
 class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
     IE_NAME = 'bbc.co.uk:iplayer:playlist'
-    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
+    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
     _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
     _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
         'info_dict': {
             'id': 'b05rcz9v',
@@ -1014,7 +1023,17 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
             'description': 'French thriller serial about a missing teenager.',
         },
         'playlist_mincount': 6,
-    }
+        'skip': 'This programme is not currently available on BBC iPlayer',
+    }, {
+        # Available for over a year unlike 30 days for most other programmes
+        'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
+        'info_dict': {
+            'id': 'p02tcc32',
+            'title': 'Bohemian Icons',
+            'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
+        },
+        'playlist_mincount': 10,
+    }]
 
     def _extract_title_and_description(self, webpage):
         title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)