From: remitamine Date: Thu, 3 Dec 2015 19:33:22 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' into bliptv X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=78653a33aa00ba5205940c2baac5d9f019795b88;hp=-c Merge remote-tracking branch 'upstream/master' into bliptv --- 78653a33aa00ba5205940c2baac5d9f019795b88 diff --combined youtube_dl/extractor/__init__.py index f9c40e6cd,62f32f8c8..5b842204f --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@@ -38,6 -38,7 +38,7 @@@ from .arte import ) from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE + from .audimedia import AudiMediaIE from .audiomack import AudiomackIE, AudiomackAlbumIE from .azubu import AzubuIE from .baidu import BaiduVideoIE @@@ -45,6 -46,7 +46,7 @@@ from .bambuser import BambuserIE, Bambu from .bandcamp import BandcampIE, BandcampAlbumIE from .bbc import ( BBCCoUkIE, + BBCCoUkArticleIE, BBCIE, ) from .beeg import BeegIE @@@ -54,11 -56,15 +56,14 @@@ from .bet import BetI from .bild import BildIE from .bilibili import BiliBiliIE from .blinkx import BlinkxIE -from .bliptv import BlipTVIE, BlipTVUserIE from .bloomberg import BloombergIE from .bpb import BpbIE from .br import BRIE from .breakcom import BreakIE - from .brightcove import BrightcoveIE + from .brightcove import ( + BrightcoveLegacyIE, + BrightcoveNewIE, + ) from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE @@@ -88,6 -94,7 +93,7 @@@ from .cliphunter import CliphunterI from .clipsyndicate import ClipsyndicateIE from .cloudy import CloudyIE from .clubic import ClubicIE + from .clyp import ClypIE from .cmt import CMTIE from .cnet import CNETIE from .cnn import ( @@@ -121,10 -128,12 +127,12 @@@ from .dbtv import DBTVI from .dcn import DCNIE from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE + from .democracynow import DemocracynowIE from .dfb import DFBIE from .dhm import DHMIE from .dotsub import DotsubIE from .douyutv import DouyuTVIE + from .dplay import DPlayIE from .dramafever import ( DramaFeverIE, DramaFeverSeriesIE, @@@ -208,13 -217,15 +216,15 @@@ from .gfycat import GfycatI from .giantbomb import GiantBombIE from .giga import GigaIE from .glide import GlideIE - from .globo import GloboIE + from .globo import ( + GloboIE, + GloboArticleIE, + ) from .godtube import GodTubeIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE - from .gorillavid import GorillaVidIE from .goshgay import GoshgayIE from .groupon import GrouponIE from .hark import HarkIE @@@ -262,7 -273,6 +272,7 @@@ from .jadorecettepub import JadoreCette from .jeuxvideo import JeuxVideoIE from .jove import JoveIE from .jukebox import JukeboxIE +from .jwplatform import JWPlatformIE from .jpopsukitv import JpopsukiIE from .kaltura import KalturaIE from .kanalplay import KanalPlayIE @@@ -317,7 -327,6 +327,7 @@@ from .lynda import from .m6 import M6IE from .macgamestore import MacGameStoreIE from .mailru import MailRuIE +from .makertv import MakerTVIE from .malemotion import MalemotionIE from .mdr import MDRIE from .metacafe import MetacafeIE @@@ -413,7 -422,10 +423,10 @@@ from .nowness import NownessPlaylistIE, NownessSeriesIE, ) - from .nowtv import NowTVIE + from .nowtv import ( + NowTVIE, + NowTVListIE, + ) from .nowvideo import NowVideoIE from .npo import ( NPOIE, @@@ -451,10 -463,7 +464,7 @@@ from .orf import from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE - from .periscope import ( - PeriscopeIE, - QuickscopeIE, - ) + from .periscope import PeriscopeIE from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE @@@ -546,6 -555,10 +556,10 @@@ from .shahid import ShahidI from .shared import SharedIE from .sharesix import ShareSixIE from .sina import SinaIE + from .skynewsarabia import ( + SkyNewsArabiaIE, + SkyNewsArabiaArticleIE, + ) from .slideshare import SlideshareIE from .slutload import SlutloadIE from .smotri import ( @@@ -568,7 -581,8 +582,8 @@@ from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE, - SoundcloudPlaylistIE + SoundcloudPlaylistIE, + SoundcloudSearchIE ) from .soundgasm import ( SoundgasmIE, @@@ -587,6 -601,7 +602,7 @@@ from .spankwire import SpankwireI from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE from .spike import SpikeIE + from .stitcher import StitcherIE from .sport5 import Sport5IE from .sportbox import ( SportBoxIE, @@@ -691,7 -706,7 +707,7 @@@ from .twitch import TwitchBookmarksIE, TwitchStreamIE, ) - from .twitter import TwitterCardIE + from .twitter import TwitterCardIE, TwitterIE from .ubu import UbuIE from .udemy import ( UdemyIE, @@@ -718,7 -733,6 +734,6 @@@ from .vh1 import VH1I from .vice import ViceIE from .viddler import ViddlerIE from .videodetective import VideoDetectiveIE - from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE from .videopremium import VideoPremiumIE @@@ -728,6 -742,7 +743,7 @@@ from .vidme import VidmeI from .vidzi import VidziIE from .vier import VierIE, VierVideosIE from .viewster import ViewsterIE + from .viidea import ViideaIE from .vimeo import ( VimeoIE, VimeoAlbumIE, @@@ -780,6 -795,7 +796,7 @@@ from .wrzuta import WrzutaI from .wsj import WSJIE from .xbef import XBefIE from .xboxclips import XboxClipsIE + from .xfileshare import XFileShareIE from .xhamster import ( XHamsterIE, XHamsterEmbedIE, @@@ -823,6 -839,7 +840,7 @@@ from .youtube import YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeUserIE, + YoutubeUserPlaylistsIE, YoutubeWatchLaterIE, ) from .zapiks import ZapiksIE diff --combined youtube_dl/extractor/generic.py index 285c0ff66,5075d131e..4d38b0c9d --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@@ -9,8 -9,8 +9,8 @@@ import sy from .common import InfoExtractor from .youtube import YoutubeIE from ..compat import ( + compat_etree_fromstring, compat_urllib_parse_unquote, - compat_urllib_request, compat_urlparse, compat_xml_parse_error, ) @@@ -21,7 -21,7 +21,7 @@@ from ..utils import HEADRequest, is_html, orderedSet, - parse_xml, + sanitized_Request, smuggle_url, unescapeHTML, unified_strdate, @@@ -30,7 -30,10 +30,10 @@@ url_basename, xpath_text, ) - from .brightcove import BrightcoveIE + from .brightcove import ( + BrightcoveLegacyIE, + BrightcoveNewIE, + ) from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE @@@ -41,6 -44,7 +44,6 @@@ from .myvi import MyviI from .condenast import CondeNastIE from .udn import UDNEmbedIE from .senateisvp import SenateISVPIE -from .bliptv import BlipTVIE from .svt import SVTIE from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE @@@ -140,6 -144,7 +143,7 @@@ class GenericIE(InfoExtractor) 'ext': 'mp4', 'title': 'Automatics, robotics and biocybernetics', 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482', + 'upload_date': '20130627', 'formats': 'mincount:16', 'subtitles': 'mincount:1', }, @@@ -273,7 -278,7 +277,7 @@@ # it also tests brightcove videos that need to set the 'Referer' in the # http requests { - 'add_ie': ['Brightcove'], + 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', 'info_dict': { 'id': '2765128793001', @@@ -297,7 -302,7 +301,7 @@@ 'uploader': 'thestar.com', 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', }, - 'add_ie': ['Brightcove'], + 'add_ie': ['BrightcoveLegacy'], }, { 'url': 'http://www.championat.com/video/football/v/87/87499.html', @@@ -312,7 -317,7 +316,7 @@@ }, { # https://github.com/rg3/youtube-dl/issues/3541 - 'add_ie': ['Brightcove'], + 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', 'info_dict': { 'id': '3866516442001', @@@ -818,6 -823,19 +822,19 @@@ 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', }, }, + # Kaltura embed protected with referrer + { + 'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero', + 'info_dict': { + 'id': '1_g4fbemnq', + 'ext': 'mp4', + 'title': 'Violetta - Achter De Schermen - Ruggero', + 'description': 'Achter de schermen met Ruggero', + 'timestamp': 1435133761, + 'upload_date': '20150624', + 'uploader_id': 'echojecka', + }, + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', @@@ -1029,6 -1047,31 +1046,31 @@@ 'ext': 'mp4', 'title': 'cinemasnob', }, + }, + # BrightcoveInPageEmbed embed + { + 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/', + 'info_dict': { + 'id': '4238694884001', + 'ext': 'flv', + 'title': 'Tabletop: Dread, Last Thoughts', + 'description': 'Tabletop: Dread, Last Thoughts', + 'duration': 51690, + }, + }, + # JWPlayer with M3U8 + { + 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video', + 'info_dict': { + 'id': 'playlist', + 'ext': 'mp4', + 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ', + 'uploader': 'ren.tv', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + } } ] @@@ -1172,7 -1215,7 +1214,7 @@@ full_response = None if head_response is False: - request = compat_urllib_request.Request(url) + request = sanitized_Request(url) request.add_header('Accept-Encoding', '*') full_response = self._request_webpage(request, video_id) head_response = full_response @@@ -1201,7 -1244,7 +1243,7 @@@ '%s on generic information extractor.' % ('Forcing' if force else 'Falling back')) if not full_response: - request = compat_urllib_request.Request(url) + request = sanitized_Request(url) # Some webservers may serve compressed content of rather big size (e.g. gzipped flac) # making it impossible to download only chunk of the file (yet we need only 512kB to # test whether it's HTML or not). According to youtube-dl default Accept-Encoding @@@ -1236,7 -1279,7 +1278,7 @@@ # Is it an RSS feed, a SMIL file or a XSPF playlist? try: - doc = parse_xml(webpage) + doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): @@@ -1288,14 -1331,14 +1330,14 @@@ return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) - # Look for BrightCove: - bc_urls = BrightcoveIE._extract_brightcove_urls(webpage) + # Look for Brightcove Legacy Studio embeds + bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage) if bc_urls: self.to_screen('Brightcove video detected.') entries = [{ '_type': 'url', 'url': smuggle_url(bc_url, {'Referer': url}), - 'ie_key': 'Brightcove' + 'ie_key': 'BrightcoveLegacy' } for bc_url in bc_urls] return { @@@ -1305,6 -1348,11 +1347,11 @@@ 'entries': entries, } + # Look for Brightcove New Studio embeds + bc_urls = BrightcoveNewIE._extract_urls(webpage) + if bc_urls: + return _playlist_from_matches(bc_urls, ie='BrightcoveNew') + # Look for embedded rtl.nl player matches = re.findall( r']+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', @@@ -1388,6 -1436,11 +1435,6 @@@ 'id': match.group('id') } - # Look for embedded blip.tv player - bliptv_url = BlipTVIE._extract_url(webpage) - if bliptv_url: - return self.url_result(bliptv_url, 'BlipTV') - # Look for SVT player svt_url = SVTIE._extract_url(webpage) if svt_url: @@@ -1665,10 -1718,12 +1712,12 @@@ return self.url_result(mobj.group('url'), 'Zapiks') # Look for Kaltura embeds - mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P[^']+)',.*?'entry_id'\s*:\s*'(?P[^']+)',", webpage) or - re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P[^\2]+?)\2', webpage)) + mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P[^']+)',", webpage) or + re.search(r'(?s)(?P["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P["\'])(?P.+?)(?P=q2)', webpage)) if mobj is not None: - return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura') + return self.url_result(smuggle_url( + 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), + {'source_url': url}), 'Kaltura') # Look for Eagle.Platform embeds mobj = re.search( @@@ -1713,7 -1768,7 +1762,7 @@@ # Look for UDN embeds mobj = re.search( - r']+src="(?P%s)"' % UDNEmbedIE._VALID_URL, webpage) + r']+src="(?P%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage) if mobj is not None: return self.url_result( compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') @@@ -1833,6 -1888,7 +1882,7 @@@ entries = [] for video_url in found: + video_url = video_url.replace('\\/', '/') video_url = compat_urlparse.urljoin(url, video_url) video_id = compat_urllib_parse_unquote(os.path.basename(video_url)) @@@ -1844,25 -1900,24 +1894,24 @@@ # here's a fun little line of code for you: video_id = os.path.splitext(video_id)[0] + entry_info_dict = { + 'id': video_id, + 'uploader': video_uploader, + 'title': video_title, + 'age_limit': age_limit, + } + ext = determine_ext(video_url) if ext == 'smil': - entries.append({ - 'id': video_id, - 'formats': self._extract_smil_formats(video_url, video_id), - 'uploader': video_uploader, - 'title': video_title, - 'age_limit': age_limit, - }) + entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id) elif ext == 'xspf': return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id) + elif ext == 'm3u8': + entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4') else: - entries.append({ - 'id': video_id, - 'url': video_url, - 'uploader': video_uploader, - 'title': video_title, - 'age_limit': age_limit, - }) + entry_info_dict['url'] = video_url + + entries.append(entry_info_dict) if len(entries) == 1: return entries[0]