X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Famp.py;h=7ff098cfa0c9789122dab4787f877a1a9bf45352;hb=HEAD;hp=b573b928082b0b9f8f479d976f91a490cc671c1e;hpb=3793090b1b1c1e3462b80dd3045a3573545cfb29;p=youtube-dl diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py index b573b9280..7ff098cfa 100644 --- a/youtube_dl/extractor/amp.py +++ b/youtube_dl/extractor/amp.py @@ -3,82 +3,100 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + determine_ext, + ExtractorError, int_or_none, + mimetype2ext, parse_iso8601, + url_or_none, ) class AMPIE(InfoExtractor): - def _get_media_node(self, item, name, default=None): - media_name = 'media-%s' % name - media_group = item.get('media-group') or item - return media_group.get(media_name) or item.get(media_name) or item.get(name, default) - # parse Akamai Adaptive Media Player feed def _extract_feed_info(self, url): - item = self._download_json( - url, None, - 'Downloading Akamai AMP feed', - 'Unable to download Akamai AMP feed' - )['channel']['item'] + feed = self._download_json( + url, None, 'Downloading Akamai AMP feed', + 'Unable to download Akamai AMP feed') + item = feed.get('channel', {}).get('item') + if not item: + raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error'])) video_id = item['guid'] - + + def get_media_node(name, default=None): + media_name = 'media-%s' % name + media_group = item.get('media-group') or item + return media_group.get(media_name) or item.get(media_name) or item.get(name, default) + thumbnails = [] - media_thumbnail = self._get_media_node(item, 'thumbnail') + media_thumbnail = get_media_node('thumbnail') if media_thumbnail: if isinstance(media_thumbnail, dict): media_thumbnail = [media_thumbnail] for thumbnail_data in media_thumbnail: - thumbnail = thumbnail_data['@attributes'] + thumbnail = thumbnail_data.get('@attributes', {}) + thumbnail_url = url_or_none(thumbnail.get('url')) + if not thumbnail_url: + continue thumbnails.append({ - 'url': self._proto_relative_url(thumbnail['url'], 'http:'), + 'url': self._proto_relative_url(thumbnail_url, 'http:'), 'width': int_or_none(thumbnail.get('width')), 'height': int_or_none(thumbnail.get('height')), }) subtitles = {} - media_subtitle = self._get_media_node(item, 'subTitle') + media_subtitle = get_media_node('subTitle') if media_subtitle: if isinstance(media_subtitle, dict): media_subtitle = [media_subtitle] for subtitle_data in media_subtitle: - subtitle = subtitle_data['@attributes'] - lang = subtitle.get('lang') or 'en' - subtitles[lang] = [{'url': subtitle['href']}] + subtitle = subtitle_data.get('@attributes', {}) + subtitle_href = url_or_none(subtitle.get('href')) + if not subtitle_href: + continue + subtitles.setdefault(subtitle.get('lang') or 'en', []).append({ + 'url': subtitle_href, + 'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href), + }) formats = [] - media_content = self._get_media_node(item, 'content') + media_content = get_media_node('content') if isinstance(media_content, dict): media_content = [media_content] for media_data in media_content: - media = media_data['@attributes'] - media_type = media['type'] - if media_type == 'video/f4m': - f4m_formats = self._extract_f4m_formats(media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) - elif media_type == 'application/x-mpegURL': - m3u8_formats = self._extract_m3u8_formats(media['url'], video_id, m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + media = media_data.get('@attributes', {}) + media_url = url_or_none(media.get('url')) + if not media_url: + continue + ext = mimetype2ext(media.get('type')) or determine_ext(media_url) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', + video_id, f4m_id='hds', fatal=False)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) else: formats.append({ - 'format_id': media_data['media-category']['@attributes']['label'], - 'url': media['url'], - 'preference': 1, - 'vbr': int_or_none(media.get('bitrate')), + 'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'), + 'url': media_url, + 'tbr': int_or_none(media.get('bitrate')), 'filesize': int_or_none(media.get('fileSize')), + 'ext': ext, }) self._sort_formats(formats) + timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date')) + return { 'id': video_id, - 'title': self._get_media_node(item, 'title'), - 'description': self._get_media_node(item, 'description'), + 'title': get_media_node('title'), + 'description': get_media_node('description'), 'thumbnails': thumbnails, - 'timestamp': parse_iso8601(item.get('pubDate'), ' '), + 'timestamp': timestamp, 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')), + 'subtitles': subtitles, 'formats': formats, }