X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsenateisvp.py;h=474ebb49bcb4c24830d9bb178a5b90de434d4eb1;hb=3047121c639428235191ff5f7afbda7ecda38779;hp=807979d13c90fd2766be20b8f74049248a5981c3;hpb=c6391cd587a26eb3d9bba7296be804f14612e919;p=youtube-dl diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index 807979d13..474ebb49b 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + unsmuggle_url, +) from ..compat import ( compat_parse_qs, compat_urlparse, @@ -45,18 +48,17 @@ class SenateISVPIE(InfoExtractor): ["arch", "", "http://ussenate-f.akamaihd.net/"] ] _IE_NAME = 'senate.gov' - _VALID_URL = r'http://www\.senate\.gov/isvp/\?(?P.+)' + _VALID_URL = r'http://www\.senate\.gov/isvp/?\?(?P.+)' _TESTS = [{ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', - 'md5': '7314c4b96dad66dd8e63dc3518ceaa6f', 'info_dict': { 'id': 'judiciary031715', 'ext': 'flv', 'title': 'Integrated Senate Video Player', + 'thumbnail': 're:^https?://.*\.(?:jpg|png)$', } }, { 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', - 'md5': '2917c827513700aa9b70eaebf25116da', 'info_dict': { 'id': 'commerce011514', 'ext': 'flv', @@ -70,14 +72,28 @@ class SenateISVPIE(InfoExtractor): 'ext': 'mp4', 'title': 'Integrated Senate Video Player' } + }, { + # From http://www.c-span.org/video/?96791-1 + 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', + 'only_matching': True, }] + @staticmethod + def _search_iframe_url(webpage): + mobj = re.search( + r"]+src=['\"](?Phttp://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", + webpage) + if mobj: + return mobj.group('url') + def _get_info_for_comm(self, committee): for entry in self._COMM_MAP: if entry[0] == committee: return entry[1:] def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs')) if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): raise ExtractorError('Invalid URL', expected=True) @@ -86,7 +102,12 @@ class SenateISVPIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'([^<]+)', webpage, video_id) + if smuggled_data.get('force_title'): + title = smuggled_data['force_title'] + else: + title = self._html_search_regex(r'([^<]+)', webpage, video_id) + poster = qs.get('poster') + thumbnail = poster[0] if poster else None video_type = qs['type'][0] committee = video_type if video_type == 'arch' else qs['comm'][0] @@ -100,9 +121,9 @@ class SenateISVPIE(InfoExtractor): 'url': compat_urlparse.urljoin(domain, filename) + '?v=3.1.0&fp=&r=&g=', }] else: - hdcore_sign = '?hdcore=3.1.0' + hdcore_sign = 'hdcore=3.1.0' url_params = (domain, video_id, stream_num) - f4m_url = '%s/z/%s_1@%s/manifest.f4m' % url_params + hdcore_sign + f4m_url = '%s/z/%s_1@%s/manifest.f4m?' % url_params + hdcore_sign m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): # URLs without the extra param induce an 404 error @@ -116,14 +137,9 @@ class SenateISVPIE(InfoExtractor): self._sort_formats(formats) - info_dict = { + return { 'id': video_id, 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, } - - if len(formats) >= 1: - info_dict.update({'formats': formats}) - else: - info_dict.update(formats[0]) - - return info_dict