import re
from .common import InfoExtractor
+from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
+ js_to_json,
strip_jsonp,
unified_strdate,
US_RATINGS,
class PBSIE(InfoExtractor):
_STATIONS = (
- (r'(?:video|www)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
+ (r'(?:video|www|player)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
(r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/
(r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/
(r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org
for vid_id in video_id]
return self.playlist_result(entries, display_id)
- info = self._download_json(
- 'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
- display_id)
+ try:
+ info = self._download_json(
+ 'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
+ display_id, 'Downloading video info JSON')
+ except ExtractorError as e:
+ if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404:
+ raise
+ # videoInfo API may not work for some videos, fallback to portalplayer API
+ player = self._download_webpage(
+ 'http://player.pbs.org/portalplayer/%s' % video_id, display_id)
+ info = self._parse_json(
+ self._search_regex(
+ r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
+ player, 'video data', default='{}'),
+ display_id, transform_source=js_to_json, fatal=False)
formats = []
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
'id': video_id,
'display_id': display_id,
'title': info['title'],
- 'description': info['program'].get('description'),
+ 'description': info.get('description') or info.get('program', {}).get('description'),
'thumbnail': info.get('image_url'),
'duration': int_or_none(info.get('duration')),
'age_limit': age_limit,