From: Yen Chi Hsuan Date: Thu, 8 Sep 2016 09:28:46 +0000 (+0800) Subject: Merge pull request #10594 from stepshal/https_support X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=17bf6e71ccc12516c0611e19c52f2daa1f8117df;hp=89f257d6e57131a266efae629334fe5f4bcf96e9 Merge pull request #10594 from stepshal/https_support Add support for https for rest of the exctractors. --- diff --git a/AUTHORS b/AUTHORS index 78660f014..937742c5d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -184,3 +184,4 @@ Pratyush Singh Aleksander Nitecki Sebastian Blunt Matěj Cepl +Xie Yanbo diff --git a/ChangeLog b/ChangeLog index f6858f1d8..cec87d5cd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,9 @@ version Extractors +* [foxgay] Fix extraction (#10480) ++ [miaopai] New extractor (#10556) +* [gamestar] Fix metadata extraction (#10479) + [bilibili] Support episodes (#10190) + [tvnoe] New extractor (#10524) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9fe824c67..8d9c2ae13 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -476,6 +476,7 @@ from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE from .mgtv import MGTVIE +from .miaopai import MiaoPaiIE from .microsoftvirtualacademy import ( MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, @@ -723,6 +724,7 @@ from .revision3 import ( ) from .rice import RICEIE from .ringtv import RingTVIE +from .rmcdecouverte import RMCDecouverteIE from .ro220 import Ro220IE from .rockstargames import RockstarGamesIE from .roosterteeth import RoosterTeethIE diff --git a/youtube_dl/extractor/foxgay.py b/youtube_dl/extractor/foxgay.py index 70c1a815d..39174fcec 100644 --- a/youtube_dl/extractor/foxgay.py +++ b/youtube_dl/extractor/foxgay.py @@ -1,18 +1,24 @@ from __future__ import unicode_literals +import itertools + from .common import InfoExtractor +from ..utils import ( + get_element_by_id, + remove_end, +) class FoxgayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P\d+)\.shtml' _TEST = { 'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml', - 'md5': '80d72beab5d04e1655a56ad37afe6841', + 'md5': '344558ccfea74d33b7adbce22e577f54', 'info_dict': { 'id': '2582', 'ext': 'mp4', - 'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a', - 'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf', + 'title': 'Fuck Turkish-style', + 'description': 'md5:6ae2d9486921891efe89231ace13ffdf', 'age_limit': 18, 'thumbnail': 're:https?://.*\.jpg$', }, @@ -22,27 +28,35 @@ class FoxgayIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r'(?P<title>.*?)', - webpage, 'title', fatal=False) - description = self._html_search_regex( - r'

(?P.*?)

', - webpage, 'description', fatal=False) + title = remove_end(self._html_search_regex( + r'([^<]+)', webpage, 'title'), ' - Foxgay.com') + description = get_element_by_id('inf_tit', webpage) + # The default user-agent with foxgay cookies leads to pages without videos + self._downloader.cookiejar.clear('.foxgay.com') # Find the URL for the iFrame which contains the actual video. + iframe_url = self._html_search_regex( + r']+src=([\'"])(?P[^\'"]+)\1', webpage, + 'video frame', group='url') iframe = self._download_webpage( - self._html_search_regex(r'iframe src="(?P.*?)"', webpage, 'video frame'), - video_id) - video_url = self._html_search_regex( - r"v_path = '(?Phttp://.*?)'", iframe, 'url') - thumb_url = self._html_search_regex( - r"t_path = '(?Phttp://.*?)'", iframe, 'thumbnail', fatal=False) + iframe_url, video_id, headers={'User-Agent': 'curl/7.50.1'}, + note='Downloading video frame') + video_data = self._parse_json(self._search_regex( + r'video_data\s*=\s*([^;]+);', iframe, 'video data'), video_id) + + formats = [{ + 'url': source, + 'height': resolution, + } for source, resolution in zip( + video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))] + + self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'url': video_url, + 'formats': formats, 'description': description, - 'thumbnail': thumb_url, + 'thumbnail': video_data.get('act_vid', {}).get('thumb'), 'age_limit': 18, } diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 69058a583..341e72733 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -1,14 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( int_or_none, - parse_duration, - str_to_int, - unified_strdate, + remove_end, ) @@ -21,8 +17,9 @@ class GameStarIE(InfoExtractor): 'id': '76110', 'ext': 'mp4', 'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil', - 'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.', - 'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg', + 'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1406542020, 'upload_date': '20140728', 'duration': 17 } @@ -32,41 +29,27 @@ class GameStarIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - og_title = self._og_search_title(webpage) - title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title) - url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id - description = self._og_search_description(webpage).strip() - - thumbnail = self._proto_relative_url( - self._og_search_thumbnail(webpage), scheme='http:') - - upload_date = unified_strdate(self._html_search_regex( - r'Datum: ([0-9]+\.[0-9]+\.[0-9]+)  ', - webpage, 'upload_date', fatal=False)) - - duration = parse_duration(self._html_search_regex( - r'  Länge: ([0-9]+:[0-9]+)', webpage, 'duration', - fatal=False)) - - view_count = str_to_int(self._html_search_regex( - r'  Zuschauer: ([0-9\.]+)  ', webpage, - 'view_count', fatal=False)) + # TODO: there are multiple ld+json objects in the webpage, + # while _search_json_ld finds only the first one + json_ld = self._parse_json(self._search_regex( + r'(?s)]+type=(["\'])application/ld\+json\1[^>]*>(?P[^<]+VideoObject[^<]+)', + webpage, 'JSON-LD', group='json_ld'), video_id) + info_dict = self._json_ld(json_ld, video_id) + info_dict['title'] = remove_end(info_dict['title'], ' - GameStar') + view_count = json_ld.get('interactionCount') comment_count = int_or_none(self._html_search_regex( - r'>Kommentieren \(([0-9]+)\)', webpage, 'comment_count', + r'([0-9]+) Kommentare', webpage, 'comment_count', fatal=False)) - return { + info_dict.update({ 'id': video_id, - 'title': title, 'url': url, 'ext': 'mp4', - 'thumbnail': thumbnail, - 'description': description, - 'upload_date': upload_date, - 'duration': duration, 'view_count': view_count, 'comment_count': comment_count - } + }) + + return info_dict diff --git a/youtube_dl/extractor/miaopai.py b/youtube_dl/extractor/miaopai.py new file mode 100644 index 000000000..f9e35ac7f --- /dev/null +++ b/youtube_dl/extractor/miaopai.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class MiaoPaiIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P[-A-Za-z0-9~_]+)' + _TEST = { + 'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm', + 'md5': '095ed3f1cd96b821add957bdc29f845b', + 'info_dict': { + 'id': 'n~0hO7sfV1nBEw4Y29-Hqg__', + 'ext': 'mp4', + 'title': '西游记音乐会的秒拍视频', + 'thumbnail': 're:^https?://.*/n~0hO7sfV1nBEw4Y29-Hqg___m.jpg', + } + } + + _USER_AGENT_IPAD = 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD}) + + title = self._html_search_regex( + r'([^<]+)', webpage, 'title') + thumbnail = self._html_search_regex( + r']+class=(?P[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P[\'"])(?P[^\'"]+)(?P=q2)', + webpage, 'thumbnail', fatal=False, group='url') + videos = self._parse_html5_media_entries(url, webpage, video_id) + info = videos[0] + + info.update({ + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + }) + return info diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index 978d5d5bf..91ee9c4e9 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -35,7 +35,8 @@ class MoeVideoIE(InfoExtractor): 'height': 360, 'duration': 179, 'filesize': 17822500, - } + }, + 'skip': 'Video has been removed', }, { 'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a', diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index c6eee3b72..7335dc2af 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -15,7 +15,111 @@ from ..utils import ( ) -class ProSiebenSat1IE(InfoExtractor): +class ProSiebenSat1BaseIE(InfoExtractor): + def _extract_video_info(self, url, clip_id): + client_location = url + + video = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos', + clip_id, 'Downloading videos JSON', query={ + 'access_token': self._TOKEN, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'ids': clip_id, + })[0] + + if video.get('is_protected') is True: + raise ExtractorError('This video is DRM protected.', expected=True) + + duration = float_or_none(video.get('duration')) + source_ids = [compat_str(source['id']) for source in video['sources']] + + client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + + sources = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, + clip_id, 'Downloading sources JSON', query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + }) + server_id = sources['server_id'] + + def fix_bitrate(bitrate): + bitrate = int_or_none(bitrate) + if not bitrate: + return None + return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate + + formats = [] + for source_id in source_ids: + client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + urls = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, + clip_id, 'Downloading urls JSON', fatal=False, query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'server_id': server_id, + 'source_ids': source_id, + }) + if not urls: + continue + if urls.get('status_code') != 0: + raise ExtractorError('This video is unavailable', expected=True) + urls_sources = urls['sources'] + if isinstance(urls_sources, dict): + urls_sources = urls_sources.values() + for source in urls_sources: + source_url = source.get('url') + if not source_url: + continue + protocol = source.get('protocol') + mimetype = source.get('mimetype') + if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': + formats.extend(self._extract_f4m_formats( + source_url, clip_id, f4m_id='hds', fatal=False)) + elif mimetype == 'application/x-mpegURL': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + tbr = fix_bitrate(source['bitrate']) + if protocol in ('rtmp', 'rtmpe'): + mobj = re.search(r'^(?Prtmpe?://[^/]+)/(?P.+)$', source_url) + if not mobj: + continue + path = mobj.group('path') + mp4colon_index = path.rfind('mp4:') + app = path[:mp4colon_index] + play_path = path[mp4colon_index:] + formats.append({ + 'url': '%s/%s' % (mobj.group('url'), app), + 'app': app, + 'play_path': play_path, + 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', + 'page_url': 'http://www.prosieben.de', + 'tbr': tbr, + 'ext': 'flv', + 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), + }) + else: + formats.append({ + 'url': source_url, + 'tbr': tbr, + 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), + }) + self._sort_formats(formats) + + return { + 'duration': duration, + 'formats': formats, + } + + +class ProSiebenSat1IE(ProSiebenSat1BaseIE): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P.+)' @@ -188,6 +292,9 @@ class ProSiebenSat1IE(InfoExtractor): }, ] + _TOKEN = 'prosieben' + _SALT = '01!8d8F_)r9]4s[qeuXfP%' + _CLIENT_NAME = 'kolibri-2.0.19-splec4' _CLIPID_REGEXES = [ r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', @@ -234,123 +341,22 @@ class ProSiebenSat1IE(InfoExtractor): def _extract_clip(self, url, webpage): clip_id = self._html_search_regex( self._CLIPID_REGEXES, webpage, 'clip id') - - access_token = 'prosieben' - client_name = 'kolibri-2.0.19-splec4' - client_location = url - - video = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos', - clip_id, 'Downloading videos JSON', query={ - 'access_token': access_token, - 'client_location': client_location, - 'client_name': client_name, - 'ids': clip_id, - })[0] - - if video.get('is_protected') is True: - raise ExtractorError('This video is DRM protected.', expected=True) - - duration = float_or_none(video.get('duration')) - source_ids = [compat_str(source['id']) for source in video['sources']] - - g = '01!8d8F_)r9]4s[qeuXfP%' - client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]).encode('utf-8')).hexdigest() - - sources = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, - clip_id, 'Downloading sources JSON', query={ - 'access_token': access_token, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': client_name, - }) - server_id = sources['server_id'] - title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title') - - def fix_bitrate(bitrate): - bitrate = int_or_none(bitrate) - if not bitrate: - return None - return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate - - formats = [] - for source_id in source_ids: - client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, client_location, source_id, g, client_name]).encode('utf-8')).hexdigest() - urls = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, - clip_id, 'Downloading urls JSON', fatal=False, query={ - 'access_token': access_token, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': client_name, - 'server_id': server_id, - 'source_ids': source_id, - }) - if not urls: - continue - if urls.get('status_code') != 0: - raise ExtractorError('This video is unavailable', expected=True) - urls_sources = urls['sources'] - if isinstance(urls_sources, dict): - urls_sources = urls_sources.values() - for source in urls_sources: - source_url = source.get('url') - if not source_url: - continue - protocol = source.get('protocol') - mimetype = source.get('mimetype') - if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': - formats.extend(self._extract_f4m_formats( - source_url, clip_id, f4m_id='hds', fatal=False)) - elif mimetype == 'application/x-mpegURL': - formats.extend(self._extract_m3u8_formats( - source_url, clip_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - else: - tbr = fix_bitrate(source['bitrate']) - if protocol in ('rtmp', 'rtmpe'): - mobj = re.search(r'^(?Prtmpe?://[^/]+)/(?P.+)$', source_url) - if not mobj: - continue - path = mobj.group('path') - mp4colon_index = path.rfind('mp4:') - app = path[:mp4colon_index] - play_path = path[mp4colon_index:] - formats.append({ - 'url': '%s/%s' % (mobj.group('url'), app), - 'app': app, - 'play_path': play_path, - 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', - 'page_url': 'http://www.prosieben.de', - 'tbr': tbr, - 'ext': 'flv', - 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), - }) - else: - formats.append({ - 'url': source_url, - 'tbr': tbr, - 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), - }) - self._sort_formats(formats) - + info = self._extract_video_info(url, clip_id) description = self._html_search_regex( self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._html_search_regex( self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) - return { + info.update({ 'id': clip_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, - 'duration': duration, - 'formats': formats, - } + }) + return info def _extract_playlist(self, url, webpage): playlist_id = self._html_search_regex( diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py index fca30e1aa..9c2ccbe2d 100644 --- a/youtube_dl/extractor/puls4.py +++ b/youtube_dl/extractor/puls4.py @@ -1,88 +1,51 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from .common import InfoExtractor +from .prosiebensat1 import ProSiebenSat1BaseIE from ..utils import ( - ExtractorError, unified_strdate, - int_or_none, + parse_duration, + compat_str, ) -class Puls4IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P[0-9]+)' +class Puls4IE(ProSiebenSat1BaseIE): + _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P(?:[^/]+/)*?videos/[^?#]+)' _TESTS = [{ - 'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816', - 'md5': '49f6a6629747eeec43cef6a46b5df81d', + 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', + 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', 'info_dict': { - 'id': '2716816', - 'ext': 'mp4', - 'title': 'Pro und Contra vom 23.02.2015', - 'description': 'md5:293e44634d9477a67122489994675db6', - 'duration': 2989, - 'upload_date': '20150224', + 'id': '118118', + 'ext': 'flv', + 'title': 'Tobias Homberger von myclubs im #2min2miotalk', + 'description': 'md5:f9def7c5e8745d6026d8885487d91955', + 'upload_date': '20160830', 'uploader': 'PULS_4', }, - 'skip': 'Only works from Germany', - }, { - 'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106', - 'md5': '6a48316c8903ece8dab9b9a7bf7a59ec', - 'info_dict': { - 'id': '1298106', - 'ext': 'mp4', - 'title': 'Lucky Fritz', - }, - 'skip': 'Only works from Germany', }] + _TOKEN = 'puls4' + _SALT = '01!kaNgaiNgah1Ie4AeSha' + _CLIENT_NAME = '' def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - error_message = self._html_search_regex( - r']+class="message-error"[^>]*>(.+?)
', - webpage, 'error message', default=None) - if error_message: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) - - real_url = self._html_search_regex( - r'\"fsk-button\".+?href=\"([^"]+)', - webpage, 'fsk_button', default=None) - if real_url: - webpage = self._download_webpage(real_url, video_id) - - player = self._search_regex( - r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}', - webpage, 'player') - - player_json = self._parse_json( - '[%s]' % player, video_id, - transform_source=lambda s: s.replace('undefined,', '')) - - formats = None - result = None - - for v in player_json: - if isinstance(v, list) and not formats: - formats = [{ - 'url': f['url'], - 'format': 'hd' if f.get('hd') else 'sd', - 'width': int_or_none(f.get('size_x')), - 'height': int_or_none(f.get('size_y')), - 'tbr': int_or_none(f.get('bitrate')), - } for f in v] - self._sort_formats(formats) - elif isinstance(v, dict) and not result: - result = { - 'id': video_id, - 'title': v['videopartname'].strip(), - 'description': v.get('videotitle'), - 'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')), - 'upload_date': unified_strdate(v.get('clipreleasetime')), - 'uploader': v.get('channel'), - } - - result['formats'] = formats - - return result + path = self._match_id(url) + content_path = self._download_json( + 'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url'] + media = self._download_json( + 'http://www.puls4.com' + content_path, + content_path)['mediaCurrent'] + player_content = media['playerContent'] + info = self._extract_video_info(url, player_content['id']) + info.update({ + 'id': compat_str(media['objectId']), + 'title': player_content['title'], + 'description': media.get('description'), + 'thumbnail': media.get('previewLink'), + 'upload_date': unified_strdate(media.get('date')), + 'duration': parse_duration(player_content.get('duration')), + 'episode': player_content.get('episodePartName'), + 'show': media.get('channel'), + 'season_id': player_content.get('seasonId'), + 'uploader': player_content.get('sourceCompany'), + }) + return info diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py new file mode 100644 index 000000000..f3bb4fa66 --- /dev/null +++ b/youtube_dl/extractor/rmcdecouverte.py @@ -0,0 +1,39 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .brightcove import BrightcoveLegacyIE +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) + + +class RMCDecouverteIE(InfoExtractor): + _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P\d+)' + + _TEST = { + 'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=1430&title=LES%20HEROS%20DU%2088e%20ETAGE', + 'info_dict': { + 'id': '5111223049001', + 'ext': 'mp4', + 'title': ': LES HEROS DU 88e ETAGE', + 'description': 'Découvrez comment la bravoure de deux hommes dans la Tour Nord du World Trade Center a sauvé la vie d\'innombrables personnes le 11 septembre 2001.', + 'uploader_id': '1969646226001', + 'upload_date': '20160904', + 'timestamp': 1472951103, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Only works from France', + } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) + brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0] + return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index abad3ff64..88eb83d74 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -1,10 +1,14 @@ # encoding: utf-8 from __future__ import unicode_literals + import re from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE -from ..compat import compat_parse_qs +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) class TlcDeIE(InfoExtractor): @@ -35,5 +39,5 @@ class TlcDeIE(InfoExtractor): title = mobj.group('title') webpage = self._download_webpage(url, title) brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0] + brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0] return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)