X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fpbs.py;h=f6f423597fe4952427f226fe276e17d2539eaddc;hb=42b7a5afe09e485503cbe9794c7ad18c46dc838d;hp=75c36a621dd873aab5690587796d2eb903e09fee;hpb=350d7963db671884acd43f56f41bd499efd8e74a;p=youtube-dl diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 75c36a621..f6f423597 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -512,13 +512,18 @@ class PBSIE(InfoExtractor): if http_url: for m3u8_format in m3u8_formats: bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None) - if not bitrate: + # extract only the formats that we know that they will be available as http format. + # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications + if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'): + continue + f_url = re.sub(r'\d+k|baseline', bitrate, http_url) + # This may produce invalid links sometimes (e.g. + # http://www.pbs.org/wgbh/frontline/film/suicide-plan) + if not self._is_valid_url(f_url, display_id, 'http-%s video' % bitrate): continue - if bitrate == '192k': - bitrate = 'baseline' f = m3u8_format.copy() f.update({ - 'url': re.sub(r'\d+k|baseline', bitrate, http_url), + 'url': f_url, 'format_id': m3u8_format['format_id'].replace('hls', 'http'), 'protocol': 'http', }) @@ -537,6 +542,19 @@ class PBSIE(InfoExtractor): 'ext': 'ttml', 'url': closed_captions_url, }] + mobj = re.search(r'/(\d+)_Encoded\.dfxp', closed_captions_url) + if mobj: + ttml_caption_suffix, ttml_caption_id = mobj.group(0, 1) + ttml_caption_id = int(ttml_caption_id) + subtitles['en'].extend([{ + 'url': closed_captions_url.replace( + ttml_caption_suffix, '/%d_Encoded.srt' % (ttml_caption_id + 1)), + 'ext': 'srt', + }, { + 'url': closed_captions_url.replace( + ttml_caption_suffix, '/%d_Encoded.vtt' % (ttml_caption_id + 2)), + 'ext': 'vtt', + }]) # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc) # Try turning it to 'program - title' naming scheme if possible