X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcspan.py;h=e54009622aa353d618326ca51044aa567c2eef55;hb=0b65e5d40f9d6d9a25fd463a4ab0db95022c534e;hp=7bf03c584c7388b162c9b3912a4aa0f410ed5b22;hpb=36034aecc287f67a9f93fa00f374f45dcb0e2f77;p=youtube-dl diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 7bf03c584..e54009622 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -1,51 +1,49 @@ +from __future__ import unicode_literals + +import json import re from .common import InfoExtractor from ..utils import ( - compat_urllib_parse, + unescapeHTML, ) + class CSpanIE(InfoExtractor): - _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)' + _VALID_URL = r'http://(?:www\.)?c-spanvideo\.org/program/(?P.*)' + IE_DESC = 'C-SPAN' _TEST = { - u'url': u'http://www.c-spanvideo.org/program/HolderonV', - u'file': u'315139.flv', - u'md5': u'74a623266956f69e4df0068ab6c80fe4', - u'info_dict': { - u"title": u"Attorney General Eric Holder on Voting Rights Act Decision" + 'url': 'http://www.c-spanvideo.org/program/HolderonV', + 'file': '315139.mp4', + 'md5': '8e44ce11f0f725527daccc453f553eb0', + 'info_dict': { + 'title': 'Attorney General Eric Holder on Voting Rights Act Decision', + 'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.', }, - u'skip': u'Requires rtmpdump' + 'skip': 'Regularly fails on travis, for unknown reasons', } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - prog_name = mobj.group(1) + prog_name = mobj.group('name') webpage = self._download_webpage(url, prog_name) - video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id') - data = compat_urllib_parse.urlencode({'programid': video_id, - 'dynamic':'1'}) - info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data - video_info = self._download_webpage(info_url, video_id, u'Downloading video info') - - self.report_extraction(video_id) - - title = self._html_search_regex(r'(.*?)', - video_info, 'title') - description = self._html_search_regex(r'(.*?)', - video_info, 'video url') - url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443') - path = self._search_regex(r'(.*?)', - video_info, 'rtmp play path') - - return {'id': video_id, - 'title': title, - 'ext': 'flv', - 'url': url, - 'play_path': path, - 'description': description, - 'thumbnail': self._og_search_thumbnail(webpage), - } + video_id = self._search_regex(r'prog(?:ram)?id=(.*?)&', webpage, 'video id') + + title = self._html_search_regex( + r'\n\s*]*>(.*?)', webpage, 'title') + description = self._og_search_description(webpage) + + info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id + data_json = self._download_webpage( + info_url, video_id, 'Downloading video info') + data = json.loads(data_json) + + url = unescapeHTML(data['video']['files'][0]['path']['#text']) + + return { + 'id': video_id, + 'title': title, + 'url': url, + 'description': description, + 'thumbnail': self._og_search_thumbnail(webpage), + }