from __future__ import unicode_literals
import re
+import itertools
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_etree_fromstring,
compat_HTTPError,
+ compat_urlparse,
)
}
]
+ _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
+
class MediaSelectionError(Exception):
def __init__(self, id):
self.id = id
formats.extend(self._extract_m3u8_formats(
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False))
+ if re.search(self._USP_RE, href):
+ usp_formats = self._extract_m3u8_formats(
+ re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
+ programme_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_id, fatal=False)
+ for f in usp_formats:
+ if f.get('height') and f['height'] > 720:
+ continue
+ formats.append(f)
elif transfer_format == 'hds':
formats.extend(self._extract_f4m_formats(
href, programme_id, f4m_id=format_id, fatal=False))
class BBCCoUkArticleIE(InfoExtractor):
- _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
IE_NAME = 'bbc.co.uk:article'
IE_DESC = 'BBC articles'
class BBCCoUkPlaylistBaseIE(InfoExtractor):
+ def _entries(self, webpage, url, playlist_id):
+ single_page = 'page' in compat_urlparse.parse_qs(
+ compat_urlparse.urlparse(url).query)
+ for page_num in itertools.count(2):
+ for video_id in re.findall(
+ self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
+ yield self.url_result(
+ self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
+ if single_page:
+ return
+ next_page = self._search_regex(
+ r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
+ webpage, 'next page url', default=None, group='url')
+ if not next_page:
+ break
+ webpage = self._download_webpage(
+ compat_urlparse.urljoin(url, next_page), playlist_id,
+ 'Downloading page %d' % page_num, page_num)
+
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
- entries = [
- self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
- for video_id in re.findall(
- self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)]
-
title, description = self._extract_title_and_description(webpage)
- return self.playlist_result(entries, playlist_id, title, description)
+ return self.playlist_result(
+ self._entries(webpage, url, playlist_id),
+ playlist_id, title, description)
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
'description': 'French thriller serial about a missing teenager.',
},
'playlist_mincount': 7,
+ }, {
+ # multipage playlist, explicit page
+ 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
+ 'info_dict': {
+ 'id': 'b00mfl7n',
+ 'title': 'Frozen Planet - Clips - BBC One',
+ 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ # multipage playlist, all pages
+ 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
+ 'info_dict': {
+ 'id': 'b00mfl7n',
+ 'title': 'Frozen Planet - Clips - BBC One',
+ 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
+ },
+ 'playlist_mincount': 142,
}, {
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
'only_matching': True,