from ..utils import (
int_or_none,
parse_iso8601,
+ mimetype2ext,
+ determine_ext,
+ ExtractorError,
)
class AMPIE(InfoExtractor):
- def _get_media_node(self, item, name, default=None):
- media_name = 'media-%s' % name
- media_group = item.get('media-group') or item
- return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
-
# parse Akamai Adaptive Media Player feed
def _extract_feed_info(self, url):
- item = self._download_json(
- url, None,
- 'Downloading Akamai AMP feed',
- 'Unable to download Akamai AMP feed'
- )['channel']['item']
+ feed = self._download_json(
+ url, None, 'Downloading Akamai AMP feed',
+ 'Unable to download Akamai AMP feed')
+ item = feed.get('channel', {}).get('item')
+ if not item:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
video_id = item['guid']
-
+
+ def get_media_node(name, default=None):
+ media_name = 'media-%s' % name
+ media_group = item.get('media-group') or item
+ return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
+
thumbnails = []
- media_thumbnail = self._get_media_node(item, 'thumbnail')
+ media_thumbnail = get_media_node('thumbnail')
if media_thumbnail:
if isinstance(media_thumbnail, dict):
media_thumbnail = [media_thumbnail]
for thumbnail_data in media_thumbnail:
- thumbnail = thumbnail_data['@attributes']
+ thumbnail = thumbnail_data.get('@attributes', {})
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
thumbnails.append({
- 'url': self._proto_relative_url(thumbnail['url'], 'http:'),
+ 'url': self._proto_relative_url(thumbnail_url, 'http:'),
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
subtitles = {}
- media_subtitle = self._get_media_node(item, 'subTitle')
+ media_subtitle = get_media_node('subTitle')
if media_subtitle:
if isinstance(media_subtitle, dict):
media_subtitle = [media_subtitle]
for subtitle_data in media_subtitle:
- subtitle = subtitle_data['@attributes']
- lang = subtitle.get('lang') or 'en'
- subtitles[lang] = [{'url': subtitle['href']}]
+ subtitle = subtitle_data.get('@attributes', {})
+ subtitle_href = subtitle.get('href')
+ if not subtitle_href:
+ continue
+ subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
+ 'url': subtitle_href,
+ 'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
+ })
formats = []
- media_content = self._get_media_node(item, 'content')
+ media_content = get_media_node('content')
if isinstance(media_content, dict):
media_content = [media_content]
for media_data in media_content:
- media = media_data['@attributes']
- media_type = media['type']
- if media_type == 'video/f4m':
- f4m_formats = self._extract_f4m_formats(media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id, f4m_id='hds', fatal=False)
- if f4m_formats:
- formats.extend(f4m_formats)
- elif media_type == 'application/x-mpegURL':
- m3u8_formats = self._extract_m3u8_formats(media['url'], video_id, m3u8_id='hls', fatal=False)
- if m3u8_formats:
- formats.extend(m3u8_formats)
+ media = media_data.get('@attributes', {})
+ media_url = media.get('url')
+ if not media_url:
+ continue
+ ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
+ video_id, f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
else:
formats.append({
- 'format_id': media_data['media-category']['@attributes']['label'],
+ 'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
'url': media['url'],
- 'preference': 1,
- 'vbr': int_or_none(media.get('bitrate')),
+ 'tbr': int_or_none(media.get('bitrate')),
'filesize': int_or_none(media.get('fileSize')),
+ 'ext': ext,
})
self._sort_formats(formats)
+ timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
+
return {
'id': video_id,
- 'title': self._get_media_node(item, 'title'),
- 'description': self._get_media_node(item, 'description'),
+ 'title': get_media_node('title'),
+ 'description': get_media_node('description'),
'thumbnails': thumbnails,
- 'timestamp': parse_iso8601(item.get('pubDate'), ' '),
+ 'timestamp': timestamp,
'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
+ 'subtitles': subtitles,
'formats': formats,
}