X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbbc.py;h=79ded6ba1de68770a5d2fb4d603b5b5a1c03fdcc;hb=da20951a57bddd4a0102cd776ff93a2adc6db77d;hp=dd65b8d86a36f3bbb486072260f14ab652a1868e;hpb=40fcba5edb0f54f09e33a193a0ffefb5668ca694;p=youtube-dl diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index dd65b8d86..79ded6ba1 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -6,14 +6,18 @@ import itertools from .common import InfoExtractor from ..utils import ( + clean_html, dict_get, ExtractorError, float_or_none, + get_element_by_class, int_or_none, parse_duration, parse_iso8601, try_get, unescapeHTML, + urlencode_postdata, + urljoin, ) from ..compat import ( compat_etree_fromstring, @@ -32,12 +36,15 @@ class BBCCoUkIE(InfoExtractor): (?: programmes/(?!articles/)| iplayer(?:/[^/]+)?/(?:episode/|playlist/)| - music/clips[/#]| + music/(?:clips|audiovideo/popular)[/#]| radio/player/ ) (?P%s)(?!/(?:episodes|broadcasts|clips)) ''' % _ID_REGEX + _LOGIN_URL = 'https://account.bbc.com/signin' + _NETRC_MACHINE = 'bbc' + _MEDIASELECTOR_URLS = [ # Provides HQ HLS streams with even better quality that pc mediaset but fails # with geolocation in some cases when it's even not geo restricted at all (e.g. @@ -222,11 +229,46 @@ class BBCCoUkIE(InfoExtractor): }, { 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf', 'only_matching': True, - } - ] + }, { + 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55', + 'only_matching': True, + }] _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading signin page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'username': username, + 'password': password, + }) + + post_url = urljoin(self._LOGIN_URL, self._search_regex( + r']+action=(["\'])(?P.+?)\1', login_page, + 'post url', default=self._LOGIN_URL, group='url')) + + response, urlh = self._download_webpage_handle( + post_url, None, 'Logging in', data=urlencode_postdata(login_form), + headers={'Referer': self._LOGIN_URL}) + + if self._LOGIN_URL in urlh.geturl(): + error = clean_html(get_element_by_class('form-message', response)) + if error: + raise ExtractorError( + 'Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + def _real_initialize(self): + self._login() + class MediaSelectionError(Exception): def __init__(self, id): self.id = id @@ -483,6 +525,12 @@ class BBCCoUkIE(InfoExtractor): webpage = self._download_webpage(url, group_id, 'Downloading video page') + error = self._search_regex( + r']+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<', + webpage, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + programme_id = None duration = None