X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fceskatelevize.py;h=2f866f3ef925c8402f00a3c0f922cf530eaa2010;hb=4b7df0d30cfe62ab25f462974c2ed0fc5a82eb3f;hp=59f2a8e451953b45d84957369a2d8b0a2d029ae4;hpb=f00fc78674ada70ea9bab361254cb6c6ce666c66;p=youtube-dl diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 59f2a8e45..2f866f3ef 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -2,13 +2,14 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, compat_urllib_parse, compat_urllib_parse_urlparse, +) +from ..utils import ( ExtractorError, ) @@ -16,31 +17,46 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P[^?#]+)' - _TESTS = [{ - 'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka', - 'info_dict': { - 'id': '213512120230004', - 'ext': 'flv', - 'title': 'První republika: Španělská chřipka', - 'duration': 3107.4, + _TESTS = [ + { + 'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka', + 'info_dict': { + 'id': '213512120230004', + 'ext': 'flv', + 'title': 'První republika: Španělská chřipka', + 'duration': 3107.4, + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, + 'skip': 'Works only from Czech Republic.', }, - 'params': { - 'skip_download': True, # requires rtmpdump + { + 'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt', + 'info_dict': { + 'id': '20138143440', + 'ext': 'flv', + 'title': 'Tsatsiki, maminka a policajt', + 'duration': 6754.1, + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, + 'skip': 'Works only from Czech Republic.', }, - 'skip': 'Works only from Czech Republic.', - }, { - 'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt', - 'info_dict': { - 'id': '20138143440', - 'ext': 'flv', - 'title': 'Tsatsiki, maminka a policajt', - 'duration': 6754.1, + { + 'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', + 'info_dict': { + 'id': '14716', + 'ext': 'flv', + 'title': 'První republika: Zpěvačka z Dupárny Bobina', + 'duration': 90, + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, }, - 'params': { - 'skip_download': True, # requires rtmpdump - }, - 'skip': 'Works only from Czech Republic.', - }] + ] def _real_extract(self, url): url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') @@ -50,9 +66,9 @@ class CeskaTelevizeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - if '

Chyba konfigurace prohlížeče.

' not in webpage: - msg = self._html_search_regex(r'

(.+?)

', webpage, 'error-message') - raise ExtractorError(msg.replace('
', ' ')) + NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' + if '%s

' % NOT_AVAILABLE_STRING in webpage: + raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') @@ -72,13 +88,13 @@ class CeskaTelevizeIE(InfoExtractor): req.add_header('X-Requested-With', 'XMLHttpRequest') req.add_header('Referer', url) - playlistpage = self._download_webpage(req, video_id) + playlistpage = self._download_json(req, video_id) - req = compat_urllib_request.Request(compat_urllib_parse.unquote(json.loads(playlistpage)['url'])) + req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url'])) req.add_header('Referer', url) playlist = self._download_xml(req, video_id) - + formats = [] for i in playlist.find('smilRoot/body'): if 'AD' not in i.attrib['id']: