X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fpbs.py;h=6baed773fc6bf741a69f1baf222148065ef169c4;hb=e64b0fca147c1512c8d31d02aedefed78411bbd9;hp=335e44bdc7bfa5d9d86de38a200a890ce14e1040;hpb=f50365e91cfccf33d0b5696c7f989944bcf748e7;p=youtube-dl diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 335e44bdc..6baed773f 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -10,6 +10,7 @@ from ..utils import ( int_or_none, js_to_json, strip_jsonp, + strip_or_none, unified_strdate, US_RATINGS, ) @@ -200,7 +201,7 @@ class PBSIE(InfoExtractor): 'id': '2365006249', 'ext': 'mp4', 'title': 'Constitution USA with Peter Sagal - A More Perfect Union', - 'description': 'md5:36f341ae62e251b8f5bd2b754b95a071', + 'description': 'md5:31b664af3c65fd07fa460d306b837d00', 'duration': 3190, }, }, @@ -211,7 +212,7 @@ class PBSIE(InfoExtractor): 'id': '2365297690', 'ext': 'mp4', 'title': 'FRONTLINE - Losing Iraq', - 'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9', + 'description': 'md5:5979a4d069b157f622d02bff62fbe654', 'duration': 5050, }, }, @@ -222,7 +223,7 @@ class PBSIE(InfoExtractor): 'id': '2201174722', 'ext': 'mp4', 'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist', - 'description': 'md5:95a19f568689d09a166dff9edada3301', + 'description': 'md5:86ab9a3d04458b876147b355788b8781', 'duration': 801, }, }, @@ -235,7 +236,7 @@ class PBSIE(InfoExtractor): 'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full', 'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b', 'duration': 6559, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -248,7 +249,7 @@ class PBSIE(InfoExtractor): 'description': 'md5:c741d14e979fc53228c575894094f157', 'title': 'NOVA - Killer Typhoon', 'duration': 3172, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20140122', 'age_limit': 10, }, @@ -267,9 +268,9 @@ class PBSIE(InfoExtractor): 'display_id': 'player', 'ext': 'mp4', 'title': 'American Experience - Death and the Civil War, Chapter 1', - 'description': 'md5:1b80a74e0380ed2a4fb335026de1600d', + 'description': 'md5:67fa89a9402e2ee7d08f53b920674c18', 'duration': 682, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { 'skip_download': True, # requires ffmpeg @@ -285,7 +286,7 @@ class PBSIE(InfoExtractor): 'title': 'FRONTLINE - United States of Secrets (Part One)', 'description': 'md5:55756bd5c551519cc4b7703e373e217e', 'duration': 6851, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -293,15 +294,15 @@ class PBSIE(InfoExtractor): # "', webpage): url = self._search_regex( @@ -432,10 +437,10 @@ class PBSIE(InfoExtractor): video_id = mobj.group('id') display_id = video_id - return video_id, display_id, None + return video_id, display_id, None, description def _real_extract(self, url): - video_id, display_id, upload_date = self._extract_webpage(url) + video_id, display_id, upload_date, description = self._extract_webpage(url) if isinstance(video_id, list): entries = [self.url_result( @@ -480,7 +485,8 @@ class PBSIE(InfoExtractor): redirect_info = self._download_json( '%s?format=json' % redirect['url'], display_id, - 'Downloading %s video url info' % (redirect_id or num)) + 'Downloading %s video url info' % (redirect_id or num), + headers=self.geo_verification_headers()) if redirect_info['status'] == 'error': raise ExtractorError( @@ -510,12 +516,12 @@ class PBSIE(InfoExtractor): if http_url: for m3u8_format in m3u8_formats: bitrate = self._search_regex(r'(\d+)k', m3u8_format['url'], 'bitrate', default=None) - # lower qualities(150k and 192k) are not available as http formats - # https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656 - # we will try to extract any http format higher than than the lowest quality documented in - # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications - # as there also undocumented http formats formats(4500k and 6500k) - # http://www.pbs.org/video/2365815229/ + # Lower qualities (150k and 192k) are not available as HTTP formats (see [1]), + # we won't try extracting them. + # Since summer 2016 higher quality formats (4500k and 6500k) are also available + # albeit they are not documented in [2]. + # 1. https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656 + # 2. https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications if not bitrate or int(bitrate) < 400: continue f_url = re.sub(r'\d+k|baseline', bitrate + 'k', http_url) @@ -562,13 +568,16 @@ class PBSIE(InfoExtractor): # Try turning it to 'program - title' naming scheme if possible alt_title = info.get('program', {}).get('title') if alt_title: - info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title']) + info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + r'[\s\-:]+', '', info['title']) + + description = info.get('description') or info.get( + 'program', {}).get('description') or description return { 'id': video_id, 'display_id': display_id, 'title': info['title'], - 'description': info.get('description') or info.get('program', {}).get('description'), + 'description': description, 'thumbnail': info.get('image_url'), 'duration': int_or_none(info.get('duration')), 'age_limit': age_limit,