[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests
authorRemita Amine <remitamine@gmail.com>
Wed, 13 Jul 2016 14:57:50 +0000 (15:57 +0100)
committerRemita Amine <remitamine@gmail.com>
Wed, 13 Jul 2016 14:58:24 +0000 (15:58 +0100)
youtube_dl/extractor/bbc.py

index 4b3cd8c65a65967c5ad017d53126e9ccc76a71ef..50c1da185b74695f9197826e7e217fbaffe495c8 100644 (file)
@@ -44,6 +44,8 @@ class BBCCoUkIE(InfoExtractor):
 
     _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
     _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
+    # Unified Streaming Platform
+    _USP_RE = r'/([^/]+)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
 
     _NAMESPACES = (
         _MEDIASELECTION_NS,
@@ -55,12 +57,11 @@ class BBCCoUkIE(InfoExtractor):
             'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
             'info_dict': {
                 'id': 'b039d07m',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
                 'description': 'The Canadian poet and songwriter reflects on his musical career.',
             },
             'params': {
-                # rtmp download
                 'skip_download': True,
             }
         },
@@ -92,7 +93,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
+            'skip': 'this episode is not currently available',
         },
         {
             'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
@@ -107,7 +108,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
+            'skip': 'this episode is not currently available',
         }, {
             'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
             'info_dict': {
@@ -127,13 +128,12 @@ class BBCCoUkIE(InfoExtractor):
             'note': 'Audio',
             'info_dict': {
                 'id': 'p022h44j',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
                 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
                 'duration': 227,
             },
             'params': {
-                # rtmp download
                 'skip_download': True,
             }
         }, {
@@ -141,13 +141,12 @@ class BBCCoUkIE(InfoExtractor):
             'note': 'Video',
             'info_dict': {
                 'id': 'p025c103',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
                 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
                 'duration': 226,
             },
             'params': {
-                # rtmp download
                 'skip_download': True,
             }
         }, {
@@ -163,7 +162,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'geolocation',
+            'skip': 'this episode is not currently available',
         }, {
             'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
             'info_dict': {
@@ -177,7 +176,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'geolocation',
+            'skip': 'this episode is not currently available',
         }, {
             # iptv-all mediaset fails with geolocation however there is no geo restriction
             # for this programme at all
@@ -192,18 +191,17 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-            'skip': 'Now it\'s really geo-restricted',
+            'skip': 'this episode is not currently available on BBC iPlayer Radio',
         }, {
             # compact player (https://github.com/rg3/youtube-dl/issues/8147)
             'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
             'info_dict': {
                 'id': 'p028bfkj',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
                 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
             },
             'params': {
-                # rtmp download
                 'skip_download': True,
             },
         }, {
@@ -248,9 +246,15 @@ class BBCCoUkIE(InfoExtractor):
             elif transfer_format == 'dash':
                 pass
             elif transfer_format == 'hls':
-                formats.extend(self._extract_m3u8_formats(
+                is_unified_streaming = re.search(self._USP_RE, href)
+                if is_unified_streaming:
+                    href = re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href)
+                m3u8_formats = self._extract_m3u8_formats(
                     href, programme_id, ext='mp4', entry_protocol='m3u8_native',
-                    m3u8_id=supplier, fatal=False))
+                    m3u8_id=supplier, fatal=False)
+                if is_unified_streaming:
+                    self._check_formats(m3u8_formats, programme_id)
+                formats.extend(m3u8_formats)
             # Direct link
             else:
                 formats.append({
@@ -305,13 +309,14 @@ class BBCCoUkIE(InfoExtractor):
         for connection in self._extract_connections(media):
             conn_formats = self._extract_connection(connection, programme_id)
             for format in conn_formats:
-                format.update({
-                    'width': width,
-                    'height': height,
-                    'vbr': vbr,
-                    'vcodec': vcodec,
-                    'filesize': file_size,
-                })
+                if format.get('protocol') != 'm3u8_native':
+                    format.update({
+                        'width': width,
+                        'height': height,
+                        'vbr': vbr,
+                        'vcodec': vcodec,
+                        'filesize': file_size,
+                    })
                 if service:
                     format['format_id'] = '%s_%s' % (service, format['format_id'])
             formats.extend(conn_formats)