X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbbccouk.py;h=01c02d360cd7255b14aa7aa8259de52e44701884;hb=4bb4a18876f5489db77365528638da8d46890a38;hp=6895ccad206ffe3de0068f6def0e4cb13ed11850;hpb=2e3fd9ec2fc950bf1e2fd3874e5e027f2c1351e7;p=youtube-dl diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index 6895ccad2..01c02d360 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -1,9 +1,10 @@ from __future__ import unicode_literals -import re +import xml.etree.ElementTree from .subtitles import SubtitlesInfoExtractor from ..utils import ExtractorError +from ..compat import compat_HTTPError class BBCCoUkIE(SubtitlesInfoExtractor): @@ -13,13 +14,13 @@ class BBCCoUkIE(SubtitlesInfoExtractor): _TESTS = [ { - 'url': 'http://www.bbc.co.uk/programmes/p01q7wz1', + 'url': 'http://www.bbc.co.uk/programmes/b039g8p7', 'info_dict': { - 'id': 'p01q7wz4', + 'id': 'b039d07m', 'ext': 'flv', - 'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix', - 'description': 'Blu Mar Ten deliver a Guest Mix for Friction.', - 'duration': 1936, + 'title': 'Kaleidoscope: Leonard Cohen', + 'description': 'md5:db4755d7a665ae72343779f7dacb402c', + 'duration': 1740, }, 'params': { # rtmp download @@ -38,23 +39,39 @@ class BBCCoUkIE(SubtitlesInfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, + 'skip': 'Episode is no longer available on BBC iPlayer Radio', }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/', 'info_dict': { 'id': 'b00yng1d', 'ext': 'flv', - 'title': 'The Man in Black: Series 3: The Printed Name', - 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.", - 'duration': 1800, + 'title': 'The Voice UK: Series 3: Blind Auditions 5', + 'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.", + 'duration': 5100, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', - } + }, + { + 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion', + 'info_dict': { + 'id': 'b03k3pb7', + 'ext': 'flv', + 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction", + 'description': '2. Invasion', + 'duration': 3600, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', + }, ] def _extract_asx_playlist(self, connection, programme_id): @@ -101,6 +118,10 @@ class BBCCoUkIE(SubtitlesInfoExtractor): return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item') def _extract_medias(self, media_selection): + error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error') + if error is not None: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True) return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media') def _extract_connections(self, media): @@ -157,49 +178,73 @@ class BBCCoUkIE(SubtitlesInfoExtractor): subtitles[lang] = srt return subtitles - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - group_id = mobj.group('id') - - playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id, - 'Downloading playlist XML') - - no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems') - if no_items is not None: - reason = no_items.get('reason') - if reason == 'preAvailability': - msg = 'Episode %s is not yet available' % group_id - elif reason == 'postAvailability': - msg = 'Episode %s is no longer available' % group_id + def _download_media_selector(self, programme_id): + try: + media_selection = self._download_xml( + 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id, + programme_id, 'Downloading media selection XML') + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: + media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8')) else: - msg = 'Episode %s is not available: %s' % (group_id, reason) - raise ExtractorError(msg, expected=True) + raise formats = [] subtitles = None - for item in self._extract_items(playlist): - kind = item.get('kind') - if kind != 'programme' and kind != 'radioProgramme': - continue - title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text - description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text + for media in self._extract_medias(media_selection): + kind = media.get('kind') + if kind == 'audio': + formats.extend(self._extract_audio(media, programme_id)) + elif kind == 'video': + formats.extend(self._extract_video(media, programme_id)) + elif kind == 'captions': + subtitles = self._extract_captions(media, programme_id) - programme_id = item.get('identifier') - duration = int(item.get('duration')) + return formats, subtitles - media_selection = self._download_xml( - 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id, - programme_id, 'Downloading media selection XML') + def _real_extract(self, url): + group_id = self._match_id(url) + + webpage = self._download_webpage(url, group_id, 'Downloading video page') - for media in self._extract_medias(media_selection): - kind = media.get('kind') - if kind == 'audio': - formats.extend(self._extract_audio(media, programme_id)) - elif kind == 'video': - formats.extend(self._extract_video(media, programme_id)) - elif kind == 'captions': - subtitles = self._extract_captions(media, programme_id) + programme_id = self._search_regex( + r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False) + if programme_id: + player = self._download_json( + 'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id, + group_id)['jsConf']['player'] + title = player['title'] + description = player['subtitle'] + duration = player['duration'] + formats, subtitles = self._download_media_selector(programme_id) + else: + playlist = self._download_xml( + 'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, + group_id, 'Downloading playlist XML') + + no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems') + if no_items is not None: + reason = no_items.get('reason') + if reason == 'preAvailability': + msg = 'Episode %s is not yet available' % group_id + elif reason == 'postAvailability': + msg = 'Episode %s is no longer available' % group_id + elif reason == 'noMedia': + msg = 'Episode %s is not currently available' % group_id + else: + msg = 'Episode %s is not available: %s' % (group_id, reason) + raise ExtractorError(msg, expected=True) + + for item in self._extract_items(playlist): + kind = item.get('kind') + if kind != 'programme' and kind != 'radioProgramme': + continue + title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text + description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text + programme_id = item.get('identifier') + duration = int(item.get('duration')) + formats, subtitles = self._download_media_selector(programme_id) if self._downloader.params.get('listsubtitles', False): self._list_available_subtitles(programme_id, subtitles) @@ -214,4 +259,4 @@ class BBCCoUkIE(SubtitlesInfoExtractor): 'duration': duration, 'formats': formats, 'subtitles': subtitles, - } \ No newline at end of file + }