X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Faol.py;h=e87994a6aaacea39f3e6997b541d321ec665a3d7;hb=HEAD;hp=b51eafc45928f8e6ff4ce571763593f71b715583;hpb=121c09c7be1ac2944f3432122104c1952bfd1f04;p=youtube-dl diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index b51eafc45..e87994a6a 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -1,70 +1,133 @@ +# coding: utf-8 from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) +from ..utils import ( + ExtractorError, + int_or_none, + url_or_none, +) class AolIE(InfoExtractor): - IE_NAME = 'on.aol.com' - _VALID_URL = r'''(?x) - (?: - aol-video:| - http://on\.aol\.com/ - (?: - video/.*-| - playlist/(?P[^/?#]+?)-(?P[0-9]+)[?#].*_videoid= - ) - ) - (?P[0-9]+) - (?:$|\?) - ''' + IE_NAME = 'aol.com' + _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P[0-9a-f]+)' _TESTS = [{ - 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', + # video with 5min ID + 'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/', 'md5': '18ef68f48740e86ae94b98da815eec42', 'info_dict': { 'id': '518167793', 'ext': 'mp4', 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam', + 'description': 'A major phone scam has cost thousands of taxpayers more than $1 million, with less than a month until income tax returns are due to the IRS.', + 'timestamp': 1395405060, + 'upload_date': '20140321', + 'uploader': 'Newsy Studio', }, - 'add_ie': ['FiveMin'], + 'params': { + # m3u8 download + 'skip_download': True, + } }, { - 'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316', + # video with vidible ID + 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', 'info_dict': { - 'id': '152147', - 'title': 'Brace Yourself - Today\'s Weirdest News', + 'id': '5707d6b8e4b090497b04f706', + 'ext': 'mp4', + 'title': 'Netflix is Raising Rates', + 'description': 'Netflix is rewarding millions of it’s long-standing members with an increase in cost. Veuer’s Carly Figueroa has more.', + 'upload_date': '20160408', + 'timestamp': 1460123280, + 'uploader': 'Veuer', }, - 'playlist_mincount': 10, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/', + 'only_matching': True, + }, { + 'url': 'aol-video:5707d6b8e4b090497b04f706', + 'only_matching': True, + }, { + 'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.ca/video/view/u-s-woman-s-family-arrested-for-murder-first-pinned-on-panhandler-police/5c7ccf45bc03931fa04b2fe1/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.co.uk/video/view/-one-dead-and-22-hurt-in-bus-crash-/5cb3a6f3d21f1a072b457347/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.de/video/view/eva-braun-privataufnahmen-von-hitlers-geliebter-werden-digitalisiert/5cb2d49de98ab54c113d3d5d/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/', + 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - playlist_id = mobj.group('playlist_id') - if not playlist_id or self._downloader.params.get('noplaylist'): - return self.url_result('5min:%s' % video_id) + video_id = self._match_id(url) - self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) + response = self._download_json( + 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, + video_id)['response'] + if response['statusText'] != 'Ok': + raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) - webpage = self._download_webpage(url, playlist_id) - title = self._html_search_regex( - r'

(.+?)

', webpage, 'title') - playlist_html = self._search_regex( - r"(?s)(.*?)", webpage, - 'playlist HTML') - entries = [{ - '_type': 'url', - 'url': 'aol-video:%s' % m.group('id'), - 'ie_key': 'Aol', - } for m in re.finditer( - r"[0-9]+)'\s+class='video-thumb'>", - playlist_html)] + video_data = response['data'] + formats = [] + m3u8_url = url_or_none(video_data.get('videoMasterPlaylist')) + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + for rendition in video_data.get('renditions', []): + video_url = url_or_none(rendition.get('url')) + if not video_url: + continue + ext = rendition.get('format') + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + else: + f = { + 'url': video_url, + 'format_id': rendition.get('quality'), + } + mobj = re.search(r'(\d+)x(\d+)', video_url) + if mobj: + f.update({ + 'width': int(mobj.group(1)), + 'height': int(mobj.group(2)), + }) + else: + qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query) + f.update({ + 'width': int_or_none(qs.get('w', [None])[0]), + 'height': int_or_none(qs.get('h', [None])[0]), + }) + formats.append(f) + self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) return { - '_type': 'playlist', - 'id': playlist_id, - 'display_id': mobj.group('playlist_display_id'), - 'title': title, - 'entries': entries, + 'id': video_id, + 'title': video_data['title'], + 'duration': int_or_none(video_data.get('duration')), + 'timestamp': int_or_none(video_data.get('publishDate')), + 'view_count': int_or_none(video_data.get('views')), + 'description': video_data.get('description'), + 'uploader': video_data.get('videoOwner'), + 'formats': formats, }