X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fcspan.py;h=92a827a4ba1776559757417438978ed26752c678;hb=4447fb23320b9214ab3188717794d00b18887617;hp=b78edf729f94a010bdd5969780226bd41994f27c;hpb=af9c2a07aea530b3bee560a953e94ac92fcd49c9;p=youtube-dl diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index b78edf729..92a827a4b 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -12,10 +12,11 @@ from ..utils import ( ExtractorError, ) from .senateisvp import SenateISVPIE +from .ustream import UstreamIE class CSpanIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P[0-9a-f]+)' + _VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P[0-9a-f]+)' IE_DESC = 'C-SPAN' _TESTS = [{ 'url': 'http://www.c-span.org/video/?313572-1/HolderonV', @@ -51,15 +52,36 @@ class CSpanIE(InfoExtractor): 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', 'info_dict': { 'id': 'judiciary031715', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Immigration Reforms Needed to Protect Skilled American Workers', + }, + 'params': { + 'skip_download': True, # m3u8 downloads } + }, { + # Ustream embedded video + 'url': 'https://www.c-span.org/video/?114917-1/armed-services', + 'info_dict': { + 'id': '58428542', + 'ext': 'flv', + 'title': 'USHR07 Armed Services Committee', + 'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee', + 'timestamp': 1423060374, + 'upload_date': '20150204', + 'uploader': 'HouseCommittee', + 'uploader_id': '12987475', + }, }] def _real_extract(self, url): video_id = self._match_id(url) video_type = None webpage = self._download_webpage(url, video_id) + + ustream_url = UstreamIE._extract_url(webpage) + if ustream_url: + return self.url_result(ustream_url, UstreamIE.ie_key()) + # We first look for clipid, because clipprog always appears before patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] results = list(filter(None, (re.search(p, webpage) for p in patterns))) @@ -113,7 +135,7 @@ class CSpanIE(InfoExtractor): 'tbr': int_or_none(get_text_attr(quality, 'bitrate')), }) if not formats: - path = get_text_attr(f, 'path') + path = unescapeHTML(get_text_attr(f, 'path')) if not path: continue formats = self._extract_m3u8_formats(