[bbc.co.uk] Fix regex
[youtube-dl] / youtube_dl / extractor / bbccouk.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import ExtractorError
7
8
9 class BBCCoUkIE(InfoExtractor):
10     IE_NAME = 'bbc.co.uk'
11     IE_DESC = 'BBC - iPlayer Radio'
12     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
13
14     _TEST = {
15         'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
16         'info_dict': {
17             'id': 'p01q7wz4',
18             'ext': 'flv',
19             'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
20             'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
21             'duration': 1936,
22         },
23         'params': {
24             # rtmp download
25             'skip_download': True,
26         }
27     }
28
29     def _real_extract(self, url):
30         mobj = re.match(self._VALID_URL, url)
31         group_id = mobj.group('id')
32
33         playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
34             'Downloading playlist XML')
35
36         item = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}item')
37         if item is None:
38             no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
39             if no_items is not None:
40                 reason = no_items.get('reason')
41                 if reason == 'preAvailability':
42                     msg = 'Episode %s is not yet available' % group_id
43                 elif reason == 'postAvailability':
44                     msg = 'Episode %s is no longer available' % group_id
45                 else:
46                     msg = 'Episode %s is not available: %s' % (group_id, reason)
47                 raise ExtractorError(msg, expected=True)
48             raise ExtractorError('Failed to extract media for episode %s' % group_id, expected=True)
49
50         title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
51         description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
52
53         radio_programme_id = item.get('identifier')
54         duration = int(item.get('duration'))
55
56         media_selection = self._download_xml(
57             'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s'  % radio_programme_id,
58             radio_programme_id, 'Downloading media selection XML')
59
60         formats = []
61         for media in media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media'):
62             bitrate = int(media.get('bitrate'))
63             encoding = media.get('encoding')
64             service = media.get('service')
65             connection = media.find('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
66             protocol = connection.get('protocol')
67             priority = connection.get('priority')
68             supplier = connection.get('supplier')
69             if protocol == 'http':
70                 href = connection.get('href')
71                 # ASX playlist
72                 if supplier == 'asx':
73                     asx = self._download_xml(href, radio_programme_id, 'Downloading %s ASX playlist' % service)
74                     for i, ref in enumerate(asx.findall('./Entry/ref')):
75                         formats.append({
76                             'url': ref.get('href'),
77                             'format_id': '%s_ref%s' % (service, i),
78                             'abr': bitrate,
79                             'acodec': encoding,
80                             'preference': priority,
81                         })
82                     continue
83                 # Direct link
84                 formats.append({
85                     'url': href,
86                     'format_id': service,
87                     'abr': bitrate,
88                     'acodec': encoding,
89                     'preference': priority,
90                 })
91             elif protocol == 'rtmp':
92                 application = connection.get('application', 'ondemand')
93                 auth_string = connection.get('authString')
94                 identifier = connection.get('identifier')
95                 server = connection.get('server')
96                 formats.append({
97                     'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
98                     'play_path': identifier,
99                     'app': '%s?%s' % (application, auth_string),
100                     'rtmp_live': False,
101                     'ext': 'flv',
102                     'format_id': service,
103                     'abr': bitrate,
104                     'acodec': encoding,
105                     'preference': priority,
106                 })
107
108         self._sort_formats(formats)
109
110         return {
111             'id': radio_programme_id,
112             'title': title,
113             'description': description,
114             'duration': duration,
115             'formats': formats,
116         }