X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbbc.py;h=9cb7630a1d15ac495bb45e29f2f8edea0c4be9e6;hb=42b7a5afe09e485503cbe9794c7ad18c46dc838d;hp=50c1da185b74695f9197826e7e217fbaffe495c8;hpb=0385aa6199206e4ba7745efec73be26c5826286a;p=youtube-dl diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 50c1da185..9cb7630a1 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -44,8 +44,6 @@ class BBCCoUkIE(InfoExtractor): _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection' _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist' - # Unified Streaming Platform - _USP_RE = r'/([^/]+)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' _NAMESPACES = ( _MEDIASELECTION_NS, @@ -57,11 +55,12 @@ class BBCCoUkIE(InfoExtractor): 'url': 'http://www.bbc.co.uk/programmes/b039g8p7', 'info_dict': { 'id': 'b039d07m', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4', 'description': 'The Canadian poet and songwriter reflects on his musical career.', }, 'params': { + # rtmp download 'skip_download': True, } }, @@ -93,7 +92,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'this episode is not currently available', + 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion', @@ -108,7 +107,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'this episode is not currently available', + 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', }, { 'url': 'http://www.bbc.co.uk/programmes/b04v20dw', 'info_dict': { @@ -128,12 +127,13 @@ class BBCCoUkIE(InfoExtractor): 'note': 'Audio', 'info_dict': { 'id': 'p022h44j', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances', 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.", 'duration': 227, }, 'params': { + # rtmp download 'skip_download': True, } }, { @@ -141,12 +141,13 @@ class BBCCoUkIE(InfoExtractor): 'note': 'Video', 'info_dict': { 'id': 'p025c103', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)', 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014', 'duration': 226, }, 'params': { + # rtmp download 'skip_download': True, } }, { @@ -162,7 +163,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'this episode is not currently available', + 'skip': 'geolocation', }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition', 'info_dict': { @@ -176,7 +177,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'this episode is not currently available', + 'skip': 'geolocation', }, { # iptv-all mediaset fails with geolocation however there is no geo restriction # for this programme at all @@ -191,17 +192,18 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'this episode is not currently available on BBC iPlayer Radio', + 'skip': 'Now it\'s really geo-restricted', }, { # compact player (https://github.com/rg3/youtube-dl/issues/8147) 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', 'info_dict': { 'id': 'p028bfkj', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', }, 'params': { + # rtmp download 'skip_download': True, }, }, { @@ -246,15 +248,9 @@ class BBCCoUkIE(InfoExtractor): elif transfer_format == 'dash': pass elif transfer_format == 'hls': - is_unified_streaming = re.search(self._USP_RE, href) - if is_unified_streaming: - href = re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href) - m3u8_formats = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( href, programme_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id=supplier, fatal=False) - if is_unified_streaming: - self._check_formats(m3u8_formats, programme_id) - formats.extend(m3u8_formats) + m3u8_id=supplier, fatal=False)) # Direct link else: formats.append({ @@ -309,14 +305,13 @@ class BBCCoUkIE(InfoExtractor): for connection in self._extract_connections(media): conn_formats = self._extract_connection(connection, programme_id) for format in conn_formats: - if format.get('protocol') != 'm3u8_native': - format.update({ - 'width': width, - 'height': height, - 'vbr': vbr, - 'vcodec': vcodec, - 'filesize': file_size, - }) + format.update({ + 'width': width, + 'height': height, + 'vbr': vbr, + 'vcodec': vcodec, + 'filesize': file_size, + }) if service: format['format_id'] = '%s_%s' % (service, format['format_id']) formats.extend(conn_formats) @@ -594,7 +589,8 @@ class BBCIE(BBCCoUkIE): 'info_dict': { 'id': '150615_telabyad_kentin_cogu', 'ext': 'mp4', - 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde", + 'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi", + 'description': 'md5:33a4805a855c9baf7115fcbde57e7025', 'timestamp': 1434397334, 'upload_date': '20150615', }, @@ -608,6 +604,7 @@ class BBCIE(BBCCoUkIE): 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw', 'ext': 'mp4', 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción', + 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8', 'timestamp': 1434713142, 'upload_date': '20150619', }, @@ -823,8 +820,20 @@ class BBCIE(BBCCoUkIE): # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani) playlist = data_playable.get('otherSettings', {}).get('playlist', {}) if playlist: - entries.append(self._extract_from_playlist_sxml( - playlist.get('progressiveDownloadUrl'), playlist_id, timestamp)) + for key in ('progressiveDownload', 'streaming'): + playlist_url = playlist.get('%sUrl' % key) + if not playlist_url: + continue + try: + entries.append(self._extract_from_playlist_sxml( + playlist_url, playlist_id, timestamp)) + except Exception as e: + # Some playlist URL may fail with 500, at the same time + # the other one may work fine (e.g. + # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu) + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: + continue + raise if entries: return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) @@ -1003,10 +1012,10 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor): class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): IE_NAME = 'bbc.co.uk:iplayer:playlist' - _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P%s)' % BBCCoUkIE._ID_REGEX + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P%s)' % BBCCoUkIE._ID_REGEX _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s' _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)' - _TEST = { + _TESTS = [{ 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v', 'info_dict': { 'id': 'b05rcz9v', @@ -1014,7 +1023,17 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): 'description': 'French thriller serial about a missing teenager.', }, 'playlist_mincount': 6, - } + 'skip': 'This programme is not currently available on BBC iPlayer', + }, { + # Available for over a year unlike 30 days for most other programmes + 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32', + 'info_dict': { + 'id': 'p02tcc32', + 'title': 'Bohemian Icons', + 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', + }, + 'playlist_mincount': 10, + }] def _extract_title_and_description(self, webpage): title = self._search_regex(r'

([^<]+)

', webpage, 'title', fatal=False)