X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fpbs.py;h=8889e4a1aaa3e41f49a63b53c010cf69d0842b1b;hb=685e87b61f785b096745cda5ea64ea0b950f56d1;hp=0727e381b52e3c2b4e520d38c88762b285604c3b;hpb=7a6d33a9a5390d0dab7e9162d6b2552cb0fe23a5;p=youtube-dl diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 0727e381b..8889e4a1a 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -10,6 +10,7 @@ from ..utils import ( int_or_none, float_or_none, js_to_json, + orderedSet, strip_jsonp, strip_or_none, unified_strdate, @@ -188,7 +189,7 @@ class PBSIE(InfoExtractor): # Direct video URL (?:%s)/(?:viralplayer|video)/(?P[0-9]+)/? | # Article with embedded player (or direct video) - (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P[^/]+?)(?:\.html)?/?(?:$|[?\#]) | + (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P[^/]+)/ ) @@ -264,6 +265,13 @@ class PBSIE(InfoExtractor): }, 'playlist_count': 2, }, + { + 'url': 'http://www.pbs.org/wgbh/americanexperience/films/great-war/', + 'info_dict': { + 'id': 'great-war', + }, + 'playlist_count': 3, + }, { 'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/', 'info_dict': { @@ -337,6 +345,21 @@ class PBSIE(InfoExtractor): 'formats': 'mincount:8', }, }, + { + # https://github.com/rg3/youtube-dl/issues/13801 + 'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/', + 'info_dict': { + 'id': '3003333873', + 'ext': 'mp4', + 'title': 'PBS NewsHour - full episode July 31, 2017', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'duration': 3265, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', 'only_matching': True, @@ -382,10 +405,10 @@ class PBSIE(InfoExtractor): # tabbed frontline videos MULTI_PART_REGEXES = ( r']+class="videotab[^"]*"[^>]+vid="(\d+)"', - r']+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)', + r']+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)', ) for p in MULTI_PART_REGEXES: - tabbed_videos = re.findall(p, webpage) + tabbed_videos = orderedSet(re.findall(p, webpage)) if tabbed_videos: return tabbed_videos, presumptive_id, upload_date, description @@ -425,6 +448,9 @@ class PBSIE(InfoExtractor): if url: break + if not url: + url = self._og_search_url(webpage) + mobj = re.match(self._VALID_URL, url) player_id = mobj.group('player_id')