X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ftvp.py;h=a645800057fc6dc88850885cba7737243c95574e;hb=62420c73cb16472ead562339e22b038fac7aa950;hp=63fb57bbe453749b065fcff1b62292df3b0c8ec0;hpb=6c5ad80cdcd3f51b61a9d21c55e21d51e6b2f39a;p=youtube-dl diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 63fb57bbe..a64580005 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -1,60 +1,37 @@ -# encoding: utf-8 -import re -import json +from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - RegexNotFoundError, -) + class TvpIE(InfoExtractor): - IE_NAME = u'tvp.pl' + IE_NAME = 'tvp.pl' _VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P\d+)/(?P\d+)' - _INFO_URL = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' - _TEST = { - u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238', - u'file': u'31.10.2013-12878238.wmv', - u'info_dict': { - u'title': u'31.10.2013', - u'description': u'31.10.2013', + 'url': 'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238', + 'md5': '148408967a6a468953c0a75cbdaf0d7a', + 'info_dict': { + 'id': '12878238', + 'ext': 'wmv', + 'title': '31.10.2013 - Odcinek 2', + 'description': '31.10.2013 - Odcinek 2', }, + 'skip': 'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.' } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id, "Downloading video webpage") - json_params = self._download_webpage(self._INFO_URL % video_id, video_id, "Downloading video metadata") - - try: - params = json.loads(json_params) - except: - raise ExtractorError(u'Invalid JSON') - - self.report_extraction(video_id) - try: - video_url = params['video_url'] - except KeyError: - raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1]) - - try: - title = self._og_search_title(webpage) - except RegexNotFoundError: - title = video_id - info = { + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id + params = self._download_json( + json_url, video_id, "Downloading video metadata") + video_url = params['video_url'] + + return { 'id': video_id, - 'title': title, + 'title': self._og_search_title(webpage), 'ext': 'wmv', 'url': video_url, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), } - try: - info['description'] = self._og_search_description(webpage) - info['thumbnail'] = self._og_search_thumbnail(webpage) - except RegexNotFoundError: - pass - - return info