X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Frai.py;h=d22311031f42d5a4d35c66e9182db2993ddda8ce;hb=70d35d166c1cfb14af20fb6d45ed820b6249f941;hp=ed15a5f10721c38f1f0ead1c8d77997a6a7fbd92;hpb=449c66577640a0c3f0b383204a1e7284429a61c3;p=youtube-dl diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index ed15a5f10..d22311031 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -17,6 +17,7 @@ from ..utils import ( parse_duration, strip_or_none, try_get, + unescapeHTML, unified_strdate, unified_timestamp, update_url_query, @@ -191,11 +192,12 @@ class RaiPlayIE(RaiBaseIE): info = { 'id': video_id, - 'title': title, + 'title': self._live_title(title) if relinker_info.get( + 'is_live') else title, 'alt_title': media.get('subtitle'), 'description': media.get('description'), - 'uploader': media.get('channel'), - 'creator': media.get('editor'), + 'uploader': strip_or_none(media.get('channel')), + 'creator': strip_or_none(media.get('editor')), 'duration': parse_duration(video.get('duration')), 'timestamp': timestamp, 'thumbnails': thumbnails, @@ -212,21 +214,75 @@ class RaiPlayIE(RaiBaseIE): class RaiPlayLiveIE(RaiBaseIE): - _VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P\w*)' + _VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P[^/?#&]+)' _TEST = { - 'url': 'http://www.raiplay.it/dirette/rai3', - 'only_matching': True, + 'url': 'http://www.raiplay.it/dirette/rainews24', + 'info_dict': { + 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', + 'display_id': 'rainews24', + 'ext': 'mp4', + 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:6eca31500550f9376819f174e5644754', + 'uploader': 'Rai News 24', + 'creator': 'Rai News 24', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + }, } def _real_extract(self, url): - channel = self._match_id(url) + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE, + webpage, 'content id') + + return { + '_type': 'url_transparent', + 'ie_key': RaiPlayIE.ie_key(), + 'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id, + 'id': video_id, + 'display_id': display_id, + } + + +class RaiPlayPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', + 'info_dict': { + 'id': 'nondirloalmiocapo', + 'title': 'Non dirlo al mio capo', + 'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', + }, + 'playlist_mincount': 12, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + title = self._html_search_meta( + ('programma', 'nomeProgramma'), webpage, 'title') + description = unescapeHTML(self._html_search_meta( + ('description', 'og:description'), webpage, 'description')) + print(description) - webpage = self._download_webpage(url, channel) - re_id = r']*)data-uniquename=(["\'])[\w-]*(?P%s)(\2)([^>]*?)>' % RaiBaseIE._UUID_RE - video_id = self._html_search_regex(re_id, webpage, 'livestream-id', group='id') + entries = [] + for mobj in re.finditer( + r']+\bhref=(["\'])(?P/raiplay/video/.+?)\1', + webpage): + video_url = urljoin(url, mobj.group('path')) + entries.append(self.url_result( + video_url, ie=RaiPlayIE.ie_key(), + video_id=RaiPlayIE._match_id(video_url))) - return self.url_result('http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id, - RaiPlayIE.ie_key(), video_id) + return self.playlist_result(entries, playlist_id, title, description) class RaiIE(RaiBaseIE): @@ -325,11 +381,11 @@ class RaiIE(RaiBaseIE): media_type = media['type'] if 'Audio' in media_type: relinker_info = { - 'formats': { + 'formats': [{ 'format_id': media.get('formatoAudio'), 'url': media['audioUrl'], 'ext': media.get('formatoAudio'), - } + }] } elif 'Video' in media_type: relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)