From: remitamine Date: Sat, 16 Apr 2016 21:00:49 +0000 (+0100) Subject: Merge pull request #9195 from remitamine/ffmpeg-pipe X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=3014b0ae835e3f42b5f3628464ed7e4b2557ef6c;hp=b9f2fdd37fe2085deb09710a2084c940e9920304;p=youtube-dl Merge pull request #9195 from remitamine/ffmpeg-pipe [downloader/external] enable piping for FFmpegFD(closes #2124) --- diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index 3eed91279..a52d26cec 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -30,14 +30,14 @@ class AudiomackIE(InfoExtractor): # audiomack wrapper around soundcloud song { 'add_ie': ['Soundcloud'], - 'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', + 'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle', 'info_dict': { - 'id': '172419696', + 'id': '258901379', 'ext': 'mp3', - 'description': 'md5:1fc3272ed7a635cce5be1568c2822997', - 'title': 'Young Thug ft Lil Wayne - Take Kare', - 'uploader': 'Young Thug World', - 'upload_date': '20141016', + 'description': 'mamba day freestyle for the legend Kobe Bryant ', + 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]', + 'uploader': 'ILOVEMAKONNEN', + 'upload_date': '20160414', } }, ] diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 425f08f2b..74c4510f9 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -671,6 +671,7 @@ class BBCIE(BBCCoUkIE): 'info_dict': { 'id': '34475836', 'title': 'Jurgen Klopp: Furious football from a witty and winning coach', + 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.', }, 'playlist_count': 3, }, { diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index c621a08d5..051d783a2 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -5,7 +5,6 @@ from ..utils import ( xpath_text, xpath_element, int_or_none, - ExtractorError, find_xpath_attr, ) @@ -64,7 +63,7 @@ class CBSIE(CBSBaseIE): 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', 'only_matching': True, }] - TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?manifest=m3u&mbr=true' + TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' def _real_extract(self, url): display_id = self._match_id(url) @@ -84,11 +83,11 @@ class CBSIE(CBSBaseIE): pid = xpath_text(item, 'pid') if not pid: continue - try: - tp_formats, tp_subtitles = self._extract_theplatform_smil( - self.TP_RELEASE_URL_TEMPLATE % pid, content_id, 'Downloading %s SMIL data' % pid) - except ExtractorError: - continue + tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid + if '.m3u8' in xpath_text(item, 'contentUrl', default=''): + tp_release_url += '&manifest=m3u' + tp_formats, tp_subtitles = self._extract_theplatform_smil( + tp_release_url, content_id, 'Downloading %s SMIL data' % pid) formats.extend(tp_formats) subtitles = self._merge_subtitles(subtitles, tp_subtitles) self._sort_formats(formats) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 7bbf617d4..fa3cb7023 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( ExtractorError, int_or_none, @@ -55,8 +56,13 @@ class EaglePlatformIE(InfoExtractor): raise ExtractorError(' '.join(response['errors']), expected=True) def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'): - response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) - self._handle_error(response) + try: + response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError): + response = self._parse_json(ee.cause.read().decode('utf-8'), video_id) + self._handle_error(response) + raise return response def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2ae9bc9a8..06b3d5e24 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -412,7 +412,12 @@ from .minoto import MinotoIE from .miomio import MioMioIE from .mit import TechTVMITIE, MITIE, OCWMITIE from .mitele import MiTeleIE -from .mixcloud import MixcloudIE +from .mixcloud import ( + MixcloudIE, + MixcloudUserIE, + MixcloudPlaylistIE, + MixcloudStreamIE, +) from .mlb import MLBIE from .mnet import MnetIE from .mpora import MporaIE @@ -420,7 +425,6 @@ from .moevideo import MoeVideoIE from .mofosex import MofosexIE from .mojvideo import MojvideoIE from .moniker import MonikerIE -from .mooshare import MooshareIE from .morningstar import MorningstarIE from .motherless import MotherlessIE from .motorsport import MotorsportIE @@ -465,7 +469,6 @@ from .ndr import ( from .ndtv import NDTVIE from .netzkino import NetzkinoIE from .nerdcubed import NerdCubedFeedIE -from .nerdist import NerdistIE from .neteasemusic import ( NetEaseMusicIE, NetEaseMusicAlbumIE, @@ -730,6 +733,7 @@ from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .tapely import TapelyIE from .tass import TassIE +from .tdslifeway import TDSLifewayIE from .teachertube import ( TeacherTubeIE, TeacherTubeUserIE, @@ -832,7 +836,6 @@ from .twitter import ( TwitterIE, TwitterAmplifyIE, ) -from .ubu import UbuIE from .udemy import ( UdemyIE, UdemyCourseIE @@ -917,7 +920,6 @@ from .vulture import VultureIE from .walla import WallaIE from .washingtonpost import WashingtonPostIE from .wat import WatIE -from .wayofthemaster import WayOfTheMasterIE from .wdr import ( WDRIE, WDRMobileIE, diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py index ea32b621c..ba1c15414 100644 --- a/youtube_dl/extractor/gazeta.py +++ b/youtube_dl/extractor/gazeta.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class GazetaIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P[A-Za-z0-9-_.]+)\.s?html)' + _VALID_URL = r'(?Phttps?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P[A-Za-z0-9-_.]+)\.s?html)' _TESTS = [{ 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml', 'md5': 'd49c9bdc6e5a7888f27475dc215ee789', @@ -18,9 +18,22 @@ class GazetaIE(InfoExtractor): 'description': 'md5:38617526050bd17b234728e7f9620a71', 'thumbnail': 're:^https?://.*\.jpg', }, + 'skip': 'video not found', }, { 'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml', 'only_matching': True, + }, { + 'url': 'http://www.gazeta.ru/video/main/main/2015/06/22/platit_ili_ne_platit_po_isku_yukosa.shtml', + 'info_dict': { + 'id': '252048', + 'ext': 'mp4', + 'title': '"Если по иску ЮКОСа придется платить, это будет большой удар по бюджету"', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['EaglePlatform'], }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2aadd6a12..95d233259 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -60,6 +60,7 @@ from .googledrive import GoogleDriveIE from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE from .instagram import InstagramIE +from .liveleak import LiveLeakIE class GenericIE(InfoExtractor): @@ -104,7 +105,8 @@ class GenericIE(InfoExtractor): 'skip_download': True, # infinite live stream }, 'expected_warnings': [ - r'501.*Not Implemented' + r'501.*Not Implemented', + r'400.*Bad Request', ], }, # Direct link with incorrect MIME type @@ -1140,6 +1142,18 @@ class GenericIE(InfoExtractor): 'upload_date': '20160409', }, }, + # LiveLeak embed + { + 'url': 'http://www.wykop.pl/link/3088787/', + 'md5': 'ace83b9ed19b21f68e1b50e844fdf95d', + 'info_dict': { + 'id': '874_1459135191', + 'ext': 'mp4', + 'title': 'Man shows poor quality of new apartment building', + 'description': 'The wall is like a sand pile.', + 'uploader': 'Lake8737', + } + }, ] def report_following_redirect(self, new_url): @@ -1942,7 +1956,13 @@ class GenericIE(InfoExtractor): # Look for Instagram embeds instagram_embed_url = InstagramIE._extract_embed_url(webpage) if instagram_embed_url is not None: - return self.url_result(instagram_embed_url, InstagramIE.ie_key()) + return self.url_result( + self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key()) + + # Look for LiveLeak embeds + liveleak_url = LiveLeakIE._extract_url(webpage) + if liveleak_url: + return self.url_result(liveleak_url, 'LiveLeak') def check_video(vurl): if YoutubeIE.suitable(vurl): diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py index a38eae421..059073749 100644 --- a/youtube_dl/extractor/huffpost.py +++ b/youtube_dl/extractor/huffpost.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, parse_duration, unified_strdate, ) @@ -29,7 +30,12 @@ class HuffPostIE(InfoExtractor): 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ', 'duration': 1549, 'upload_date': '20140124', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': ['HTTP Error 404: Not Found'], } def _real_extract(self, url): @@ -45,7 +51,7 @@ class HuffPostIE(InfoExtractor): description = data.get('description') thumbnails = [] - for url in data['images'].values(): + for url in filter(None, data['images'].values()): m = re.match('.*-([0-9]+x[0-9]+)\.', url) if not m: continue @@ -54,13 +60,25 @@ class HuffPostIE(InfoExtractor): 'resolution': m.group(1), }) - formats = [{ - 'format': key, - 'format_id': key.replace('/', '.'), - 'ext': 'mp4', - 'url': url, - 'vcodec': 'none' if key.startswith('audio/') else None, - } for key, url in data.get('sources', {}).get('live', {}).items()] + formats = [] + sources = data.get('sources', {}) + live_sources = list(sources.get('live', {}).items()) + list(sources.get('live_again', {}).items()) + for key, url in live_sources: + ext = determine_ext(url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False)) + else: + formats.append({ + 'format': key, + 'format_id': key.replace('/', '.'), + 'ext': 'mp4', + 'url': url, + 'vcodec': 'none' if key.startswith('audio/') else None, + }) if not formats and data.get('fivemin_id'): return self.url_result('5min:%s' % data['fivemin_id']) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 11bb58d8a..3cbe77ad8 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -12,7 +12,7 @@ from ..utils import ( class InstagramIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P[^/?#&]+)' + _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/p/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', @@ -38,10 +38,19 @@ class InstagramIE(InfoExtractor): }, { 'url': 'https://instagram.com/p/-Cmh1cukG2/', 'only_matching': True, + }, { + 'url': 'http://instagram.com/p/9o6LshA7zy/embed/', + 'only_matching': True, }] @staticmethod def _extract_embed_url(webpage): + mobj = re.search( + r']+src=(["\'])(?P(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1', + webpage) + if mobj: + return mobj.group('url') + blockquote_el = get_element_by_attribute( 'class', 'instagram-media', webpage) if blockquote_el is None: @@ -53,7 +62,9 @@ class InstagramIE(InfoExtractor): return mobj.group('link') def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + url = mobj.group('url') webpage = self._download_webpage(url, video_id) uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 88570f261..ea8fbb329 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -165,7 +165,7 @@ class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' IE_DESC = '爱奇艺' - _VALID_URL = r'https?://(?:[^.]+\.)?iqiyi\.com/.+\.html' + _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html' _NETRC_MACHINE = 'iqiyi' @@ -273,6 +273,9 @@ class IqiyiIE(InfoExtractor): 'title': '灌篮高手 国语版', }, 'playlist_count': 101, + }, { + 'url': 'http://www.pps.tv/w_19rrbav0ph.html', + 'only_matching': True, }] _FORMATS_MAP = [ diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py index b4c30b7f3..a6050c4de 100644 --- a/youtube_dl/extractor/karaoketv.py +++ b/youtube_dl/extractor/karaoketv.py @@ -2,39 +2,63 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote_plus -from ..utils import ( - js_to_json, -) class KaraoketvIE(InfoExtractor): - _VALID_URL = r'https?://karaoketv\.co\.il/\?container=songs&id=(?P[0-9]+)' + _VALID_URL = r'http://www.karaoketv.co.il/[^/]+/(?P\d+)' _TEST = { - 'url': 'http://karaoketv.co.il/?container=songs&id=171568', + 'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F', 'info_dict': { - 'id': '171568', - 'ext': 'mp4', - 'title': 'אל העולם שלך - רותם כהן - שרים קריוקי', + 'id': '58356', + 'ext': 'flv', + 'title': 'קריוקי של איזון', + }, + 'params': { + # rtmp download + 'skip_download': True, } } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + api_page_url = self._search_regex( + r']+src=(["\'])(?Phttps?://www\.karaoke\.co\.il/api_play\.php\?.+?)\1', + webpage, 'API play URL', group='url') + + api_page = self._download_webpage(api_page_url, video_id) + video_cdn_url = self._search_regex( + r']+src=(["\'])(?Phttps?://www\.video-cdn\.com/embed/iframe/.+?)\1', + api_page, 'video cdn URL', group='url') + + video_cdn = self._download_webpage(video_cdn_url, video_id) + play_path = self._parse_json( + self._search_regex( + r'var\s+options\s*=\s*({.+?});', video_cdn, 'options'), + video_id)['clip']['url'] - page_video_url = self._og_search_video_url(webpage, video_id) - config_json = compat_urllib_parse_unquote_plus(self._search_regex( - r'config=(.*)', page_video_url, 'configuration')) + settings = self._parse_json( + self._search_regex( + r'var\s+settings\s*=\s*({.+?});', video_cdn, 'servers', default='{}'), + video_id, fatal=False) or {} - urls_info_json = self._download_json( - config_json, video_id, 'Downloading configuration', - transform_source=js_to_json) + servers = settings.get('servers') + if not servers or not isinstance(servers, list): + servers = ('wowzail.video-cdn.com:80/vodcdn', ) - url = urls_info_json['playlist'][0]['url'] + formats = [{ + 'url': 'rtmp://%s' % server if not server.startswith('rtmp') else server, + 'play_path': play_path, + 'app': 'vodcdn', + 'page_url': video_cdn_url, + 'player_url': 'http://www.video-cdn.com/assets/flowplayer/flowplayer.commercial-3.2.18.swf', + 'rtmp_real_time': True, + 'ext': 'flv', + } for server in servers] return { 'id': video_id, 'title': self._og_search_title(webpage), - 'url': url, + 'formats': formats, } diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 86c17c931..c0ece5113 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -268,7 +268,7 @@ class KuwoCategoryIE(InfoExtractor): 'title': '八十年代精选', 'description': '这些都是属于八十年代的回忆!', }, - 'playlist_count': 30, + 'playlist_count': 24, } def _real_extract(self, url): diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 4684994e1..29fba5f30 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -53,6 +53,14 @@ class LiveLeakIE(InfoExtractor): } }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P[\w_]+)(?:.*)', + webpage) + if mobj: + return 'http://www.liveleak.com/view?i=%s' % mobj.group('id') + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py index 2338e7f96..2100583df 100644 --- a/youtube_dl/extractor/mdr.py +++ b/youtube_dl/extractor/mdr.py @@ -49,8 +49,8 @@ class MDRIE(InfoExtractor): 'ext': 'mp4', 'title': 'Beutolomäus und der geheime Weihnachtswunsch', 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd', - 'timestamp': 1419047100, - 'upload_date': '20141220', + 'timestamp': 1450950000, + 'upload_date': '20151224', 'duration': 4628, 'uploader': 'KIKA', }, @@ -71,8 +71,8 @@ class MDRIE(InfoExtractor): webpage = self._download_webpage(url, video_id) data_url = self._search_regex( - r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P\\?/.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1', - webpage, 'data url', default=None, group='url').replace('\/', '/') + r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1', + webpage, 'data url', group='url').replace('\/', '/') doc = self._download_xml( compat_urlparse.urljoin(url, data_url), video_id) diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py index 949ad11db..e48eba3fa 100644 --- a/youtube_dl/extractor/ministrygrid.py +++ b/youtube_dl/extractor/ministrygrid.py @@ -1,8 +1,5 @@ from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -20,21 +17,28 @@ class MinistryGridIE(InfoExtractor): 'id': '3453494717001', 'ext': 'mp4', 'title': 'The Gospel by Numbers', + 'thumbnail': 're:^https?://.*\.jpg', + 'upload_date': '20140410', 'description': 'Coming soon from T4G 2014!', - 'uploader': 'LifeWay Christian Resources (MG)', + 'uploader_id': '2034960640001', + 'timestamp': 1397145591, + }, + 'params': { + # m3u8 download + 'skip_download': True, }, + 'add_ie': ['TDSLifeway'], } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - portlets_json = self._search_regex( - r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list') - portlets = json.loads(portlets_json) + portlets = self._parse_json(self._search_regex( + r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'), + video_id) pl_id = self._search_regex( - r'