[bbc] Improve extraction from sxml playlists
[youtube-dl] / youtube_dl / extractor / bbc.py
index 23c6e505beef83c597717b1c72c6e7cd10a69060..e3b14c854d0184bba5b20cce5834d39456dc1ac2 100644 (file)
@@ -55,11 +55,12 @@ class BBCCoUkIE(InfoExtractor):
             'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
             'info_dict': {
                 'id': 'b039d07m',
-                'ext': 'mp4',
+                'ext': 'flv',
                 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
                 'description': 'The Canadian poet and songwriter reflects on his musical career.',
             },
             'params': {
+                # rtmp download
                 'skip_download': True,
             }
         },
@@ -91,7 +92,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'this episode is not currently available',
+            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
         },
         {
             'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
@@ -106,7 +107,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'this episode is not currently available',
+            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
         }, {
             'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
             'info_dict': {
@@ -126,12 +127,13 @@ class BBCCoUkIE(InfoExtractor):
             'note': 'Audio',
             'info_dict': {
                 'id': 'p022h44j',
-                'ext': 'mp4',
+                'ext': 'flv',
                 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
                 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
                 'duration': 227,
             },
             'params': {
+                # rtmp download
                 'skip_download': True,
             }
         }, {
@@ -139,12 +141,13 @@ class BBCCoUkIE(InfoExtractor):
             'note': 'Video',
             'info_dict': {
                 'id': 'p025c103',
-                'ext': 'mp4',
+                'ext': 'flv',
                 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
                 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
                 'duration': 226,
             },
             'params': {
+                # rtmp download
                 'skip_download': True,
             }
         }, {
@@ -160,7 +163,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'this episode is not currently available',
+            'skip': 'geolocation',
         }, {
             'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
             'info_dict': {
@@ -174,7 +177,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'this episode is not currently available',
+            'skip': 'geolocation',
         }, {
             # iptv-all mediaset fails with geolocation however there is no geo restriction
             # for this programme at all
@@ -189,17 +192,18 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'this episode is not currently available on BBC iPlayer Radio',
+            'skip': 'Now it\'s really geo-restricted',
         }, {
             # compact player (https://github.com/rg3/youtube-dl/issues/8147)
             'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
             'info_dict': {
                 'id': 'p028bfkj',
-                'ext': 'mp4',
+                'ext': 'flv',
                 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
                 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
             },
             'params': {
+                # rtmp download
                 'skip_download': True,
             },
         }, {
@@ -245,7 +249,7 @@ class BBCCoUkIE(InfoExtractor):
                 pass
             elif transfer_format == 'hls':
                 formats.extend(self._extract_m3u8_formats(
-                    href, programme_id, 'mp4', 'm3u8_native',
+                    href, programme_id, ext='mp4', entry_protocol='m3u8_native',
                     m3u8_id=supplier, fatal=False))
             # Direct link
             else:
@@ -301,14 +305,13 @@ class BBCCoUkIE(InfoExtractor):
         for connection in self._extract_connections(media):
             conn_formats = self._extract_connection(connection, programme_id)
             for format in conn_formats:
-                if format.get('protocol') != 'm3u8_native':
-                    format.update({
-                        'width': width,
-                        'height': height,
-                        'vbr': vbr,
-                        'vcodec': vcodec,
-                        'filesize': file_size,
-                    })
+                format.update({
+                    'width': width,
+                    'height': height,
+                    'vbr': vbr,
+                    'vcodec': vcodec,
+                    'filesize': file_size,
+                })
                 if service:
                     format['format_id'] = '%s_%s' % (service, format['format_id'])
             formats.extend(conn_formats)
@@ -586,7 +589,8 @@ class BBCIE(BBCCoUkIE):
         'info_dict': {
             'id': '150615_telabyad_kentin_cogu',
             'ext': 'mp4',
-            'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
+            'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi",
+            'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
             'timestamp': 1434397334,
             'upload_date': '20150615',
         },
@@ -600,6 +604,7 @@ class BBCIE(BBCCoUkIE):
             'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
             'ext': 'mp4',
             'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
+            'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
             'timestamp': 1434713142,
             'upload_date': '20150619',
         },
@@ -815,8 +820,20 @@ class BBCIE(BBCCoUkIE):
                         # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
                         playlist = data_playable.get('otherSettings', {}).get('playlist', {})
                         if playlist:
-                            entries.append(self._extract_from_playlist_sxml(
-                                playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
+                            for key in ('progressiveDownload', 'streaming'):
+                                playlist_url = playlist.get('%sUrl' % key)
+                                if not playlist_url:
+                                    continue
+                                try:
+                                    entries.append(self._extract_from_playlist_sxml(
+                                        playlist_url, playlist_id, timestamp))
+                                except Exception as e:
+                                    # Some playlist URL may fail with 500, at the same time
+                                    # the other one may work fine (e.g.
+                                    # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
+                                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
+                                        continue
+                                    raise
 
         if entries:
             return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)