X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbbc.py;h=cc2f6fed2ccd6ec4b0747fb2f45df4fbf3841e77;hb=9d5332518c51fb0df69b844f9258e61bd7ecd390;hp=9a1b6e3dce7dd3247b0076b36280e7e4e0550c90;hpb=75e8b2ac87e77db5a752317fcf03cef54c1536d0;p=youtube-dl diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 9a1b6e3dc..cc2f6fed2 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -21,6 +21,10 @@ class BBCCoUkIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P[\da-z]{8})' _MEDIASELECTOR_URLS = [ + # Provides HQ HLS streams with even better quality that pc mediaset but fails + # with geolocation in some cases when it's even not geo restricted at all (e.g. + # http://www.bbc.co.uk/programmes/b06bp7lf) + 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s', 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s', ] @@ -152,6 +156,21 @@ class BBCCoUkIE(InfoExtractor): 'skip_download': True, }, 'skip': 'geolocation', + }, { + # iptv-all mediaset fails with geolocation however there is no geo restriction + # for this programme at all + 'url': 'http://www.bbc.co.uk/programmes/b06bp7lf', + 'info_dict': { + 'id': 'b06bp7kf', + 'ext': 'flv', + 'title': "Annie Mac's Friday Night, B.Traits sits in for Annie", + 'description': 'B.Traits sits in for Annie Mac with a Mini-Mix from Disclosure.', + 'duration': 10800, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, }, { 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4', 'only_matching': True, @@ -189,6 +208,12 @@ class BBCCoUkIE(InfoExtractor): # Skip DASH until supported elif transfer_format == 'dash': pass + elif transfer_format == 'hls': + m3u8_formats = self._extract_m3u8_formats( + href, programme_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id=supplier, fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) # Direct link else: formats.append({ @@ -287,7 +312,7 @@ class BBCCoUkIE(InfoExtractor): return self._download_media_selector_url( mediaselector_url % programme_id, programme_id) except BBCCoUkIE.MediaSelectionError as e: - if e.id == 'notukerror': + if e.id in ('notukerror', 'geolocation'): last_exception = e continue self._raise_extractor_error(e) @@ -526,6 +551,18 @@ class BBCIE(BBCCoUkIE): 'params': { 'skip_download': True, } + }, { + # single video from video playlist embedded with vxp-playlist-data JSON + 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376', + 'info_dict': { + 'id': 'p02w6qjc', + 'ext': 'mp4', + 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''', + 'duration': 56, + }, + 'params': { + 'skip_download': True, + } }, { # single video story with digitalData 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret', @@ -695,13 +732,36 @@ class BBCIE(BBCCoUkIE): if not medias: # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international) - media_asset_page = self._parse_json( + media_asset = self._search_regex( + r'mediaAssetPage\.init\(\s*({.+?}), "/', + webpage, 'media asset', default=None) + if media_asset: + media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False) + medias = [] + for video in media_asset_page.get('videos', {}).values(): + medias.extend(video.values()) + + if not medias: + # Multiple video playlist with single `now playing` entry (e.g. + # http://www.bbc.com/news/video_and_audio/must_see/33767813) + vxp_playlist = self._parse_json( self._search_regex( - r'mediaAssetPage\.init\(\s*({.+?}), "/', webpage, 'media asset'), + r']+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)', + webpage, 'playlist data'), playlist_id) - medias = [] - for video in media_asset_page.get('videos', {}).values(): - medias.extend(video.values()) + playlist_medias = [] + for item in vxp_playlist: + media = item.get('media') + if not media: + continue + playlist_medias.append(media) + # Download single video if found media with asset id matching the video id from URL + if item.get('advert', {}).get('assetId') == playlist_id: + medias = [media] + break + # Fallback to the whole playlist + if not medias: + medias = playlist_medias entries = [] for num, media_meta in enumerate(medias, start=1):