X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fpbs.py;h=a28ee17caa5ec19b245bf39cf43e3fa448bc936a;hb=f7560859a3e25ccaa74123428d42f821299a2bed;hp=8889e4a1aaa3e41f49a63b53c010cf69d0842b1b;hpb=7b67b60773b70ac74edd3993eeea6fe9b790c664;p=youtube-dl diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 8889e4a1a..a28ee17ca 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -187,7 +187,7 @@ class PBSIE(InfoExtractor): _VALID_URL = r'''(?x)https?:// (?: # Direct video URL - (?:%s)/(?:viralplayer|video)/(?P[0-9]+)/? | + (?:%s)/(?:(?:vir|port)alplayer|video)/(?P[0-9]+)(?:[?/]|$) | # Article with embedded player (or direct video) (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player @@ -367,6 +367,10 @@ class PBSIE(InfoExtractor): { 'url': 'http://watch.knpb.org/video/2365616055/', 'only_matching': True, + }, + { + 'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=', + 'only_matching': True, } ] _ERRORS = { @@ -417,6 +421,7 @@ class PBSIE(InfoExtractor): r'class="coveplayerid">([^<]+)<', # coveplayer r']+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/ r'', # jwplayer + r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',", ] media_id = self._search_regex( @@ -500,7 +505,7 @@ class PBSIE(InfoExtractor): if player: video_info = self._parse_json( self._search_regex( - r'(?s)PBS\.videoData\s*=\s*({.+?});\n', + [r'(?s)PBS\.videoData\s*=\s*({.+?});\n', r'window\.videoBridge\s*=\s*({.+?});'], player, '%s video data' % page, default='{}'), display_id, transform_source=js_to_json, fatal=False) if video_info: @@ -508,10 +513,14 @@ class PBSIE(InfoExtractor): if not info: info = video_info if not chapters: - for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player): - chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False) - if not chapter: - continue + raw_chapters = video_info.get('chapters') or [] + if not raw_chapters: + for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player): + chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False) + if not chapter: + continue + raw_chapters.append(chapter) + for chapter in raw_chapters: start_time = float_or_none(chapter.get('start_time'), 1000) duration = float_or_none(chapter.get('duration'), 1000) if start_time is None or duration is None: