X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=86dc7930771a959cc2b6f28700fc840a4893d242;hb=ec85ded83cbfa652ba94cb080aab52d8b270212a;hp=0bb263ce7710f2869e13b1e1ad5f33773c823a94;hpb=26aae566902251f9674593a2b0f0ca7477b96a56;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0bb263ce7..86dc79307 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -47,6 +47,8 @@ from .svt import SVTIE from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE +from .drtuber import DrTuberIE +from .redtube import RedTubeIE from .vimeo import VimeoIE from .dailymotion import ( DailymotionIE, @@ -54,10 +56,10 @@ from .dailymotion import ( ) from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE -from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE from .pladform import PladformIE from .videomore import VideomoreIE +from .webcaster import WebcasterFeedIE from .googledrive import GoogleDriveIE from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE @@ -71,8 +73,11 @@ from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE from .soundcloud import SoundcloudIE +from .tunein import TuneInBaseIE from .vbox7 import Vbox7IE from .dbtv import DBTVIE +from .piksel import PikselIE +from .videa import VideaIE class GenericIE(InfoExtractor): @@ -234,7 +239,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Tikibad ontruimd wegens brand', 'description': 'md5:05ca046ff47b931f9b04855015e163a4', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 33, }, 'params': { @@ -295,7 +300,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20130224', 'uploader_id': 'TheVerge', - 'description': 're:^Chris Ziegler takes a look at the\.*', + 'description': r're:^Chris Ziegler takes a look at the\.*', 'uploader': 'The Verge', 'title': 'First Firefox OS phones side-by-side', }, @@ -341,10 +346,10 @@ class GenericIE(InfoExtractor): }, 'skip': 'There is a limit of 200 free downloads / month for the test song', }, - # embedded brightcove video - # it also tests brightcove videos that need to set the 'Referer' in the - # http requests { + # embedded brightcove video + # it also tests brightcove videos that need to set the 'Referer' + # in the http requests 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', 'info_dict': { @@ -358,6 +363,24 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + { + # embedded with itemprop embedURL and video id spelled as `idVideo` + 'add_id': ['BrightcoveLegacy'], + 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/', + 'info_dict': { + 'id': '5255628253001', + 'ext': 'mp4', + 'title': 'md5:37c519b1128915607601e75a87995fc0', + 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26', + 'uploader': 'BFM BUSINESS', + 'uploader_id': '876450612001', + 'timestamp': 1482255315, + 'upload_date': '20161220', + }, + 'params': { + 'skip_download': True, + }, + }, { # https://github.com/rg3/youtube-dl/issues/2253 'url': 'http://bcove.me/i6nfkrc3', @@ -516,7 +539,7 @@ class GenericIE(InfoExtractor): 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e', 'ext': 'mp4', 'title': 'Ужастики, русский трейлер (2015)', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 153, } }, @@ -736,7 +759,7 @@ class GenericIE(InfoExtractor): 'duration': 48, 'timestamp': 1401537900, 'upload_date': '20140531', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, # Wistia embed @@ -806,6 +829,21 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 7, }, + # TuneIn station embed + { + 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/', + 'info_dict': { + 'id': '204146', + 'ext': 'mp3', + 'title': 'CNRV', + 'location': 'Paris, France', + 'is_live': True, + }, + 'params': { + # Live stream + 'skip_download': True, + }, + }, # Livestream embed { 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast', @@ -970,6 +1008,20 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + { + # Kaltura embedded, some fileExt broken (#11480) + 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics', + 'info_dict': { + 'id': '1_sgtvehim', + 'ext': 'mp4', + 'title': 'Our "Standard Models" of particle physics and cosmology', + 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861', + 'timestamp': 1321158993, + 'upload_date': '20111113', + 'uploader_id': 'kps1', + }, + 'add_ie': ['Kaltura'], + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', @@ -979,7 +1031,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Навальный вышел на свободу', 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 87, 'view_count': int, 'age_limit': 0, @@ -993,7 +1045,7 @@ class GenericIE(InfoExtractor): 'id': '12820', 'ext': 'mp4', 'title': "'O Sole Mio", - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 216, 'view_count': int, }, @@ -1006,7 +1058,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Тайны перевала Дятлова • 1 серия 2 часть', 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 694, 'age_limit': 0, }, @@ -1018,7 +1070,7 @@ class GenericIE(InfoExtractor): 'id': '3519514', 'ext': 'mp4', 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer', - 'thumbnail': 're:^https?://.*\.png$', + 'thumbnail': r're:^https?://.*\.png$', 'duration': 45.115, }, }, @@ -1101,7 +1153,7 @@ class GenericIE(InfoExtractor): 'id': '300346', 'ext': 'mp4', 'title': '中一中男師變性 全校師生力挺', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { # m3u8 download @@ -1147,7 +1199,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Sauvons les abeilles ! - Le débat', 'description': 'md5:d9082128b1c5277987825d684939ca26', - 'thumbnail': 're:^https?://.*\.jpe?g$', + 'thumbnail': r're:^https?://.*\.jpe?g$', 'timestamp': 1434970506, 'upload_date': '20150622', 'uploader': 'Public Sénat', @@ -1161,7 +1213,7 @@ class GenericIE(InfoExtractor): 'id': '2855', 'ext': 'mp4', 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You', - 'thumbnail': 're:^https?://.*\.jpe?g$', + 'thumbnail': r're:^https?://.*\.jpe?g$', 'uploader': 'ClickHole', 'uploader_id': 'clickhole', } @@ -1187,16 +1239,6 @@ class GenericIE(InfoExtractor): 'duration': 248.667, }, }, - # ScreenwaveMedia embed - { - 'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1', - 'md5': '24ace5baba0d35d55c6810b51f34e9e0', - 'info_dict': { - 'id': 'cinemasnob-55d26273809dd', - 'ext': 'mp4', - 'title': 'cinemasnob', - }, - }, # BrightcoveInPageEmbed embed { 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/', @@ -1397,6 +1439,15 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 3, }, + { + # Videa embeds + 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html', + 'info_dict': { + 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style', + 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum', + }, + 'playlist_mincount': 2, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -1981,11 +2032,6 @@ class GenericIE(InfoExtractor): if sportbox_urls: return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') - # Look for embedded PornHub player - pornhub_url = PornHubIE._extract_url(webpage) - if pornhub_url: - return self.url_result(pornhub_url, 'PornHub') - # Look for embedded XHamster player xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) if xhamster_urls: @@ -1996,6 +2042,21 @@ class GenericIE(InfoExtractor): if tnaflix_urls: return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key()) + # Look for embedded PornHub player + pornhub_urls = PornHubIE._extract_urls(webpage) + if pornhub_urls: + return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key()) + + # Look for embedded DrTuber player + drtuber_urls = DrTuberIE._extract_urls(webpage) + if drtuber_urls: + return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key()) + + # Look for embedded RedTube player + redtube_urls = RedTubeIE._extract_urls(webpage) + if redtube_urls: + return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key()) + # Look for embedded Tvigle player mobj = re.search( r']+?src=(["\'])(?P(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) @@ -2043,6 +2104,11 @@ class GenericIE(InfoExtractor): if soundcloud_urls: return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) + # Look for tunein player + tunein_urls = TuneInBaseIE._extract_urls(webpage) + if tunein_urls: + return _playlist_from_matches(tunein_urls) + # Look for embedded mtvservices player mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage) if mtvservices_url: @@ -2128,6 +2194,11 @@ class GenericIE(InfoExtractor): if videomore_url: return self.url_result(videomore_url) + # Look for Webcaster embeds + webcaster_url = WebcasterFeedIE._extract_url(self, webpage) + if webcaster_url: + return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key()) + # Look for Playwire embeds mobj = re.search( r']+data-config=(["\'])(?P(?:https?:)?//config\.playwire\.com/.+?)\1', webpage) @@ -2194,11 +2265,6 @@ class GenericIE(InfoExtractor): if jwplatform_url: return self.url_result(jwplatform_url, 'JWPlatform') - # Look for ScreenwaveMedia embeds - mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage) - if mobj is not None: - return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia') - # Look for Digiteka embeds digiteka_url = DigitekaIE._extract_url(webpage) if digiteka_url: @@ -2209,6 +2275,11 @@ class GenericIE(InfoExtractor): if arkena_url: return self.url_result(arkena_url, ArkenaIE.ie_key()) + # Look for Piksel embeds + piksel_url = PikselIE._extract_url(webpage) + if piksel_url: + return self.url_result(piksel_url, PikselIE.ie_key()) + # Look for Limelight embeds mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P[a-z0-9]{32})', webpage) if mobj: @@ -2220,6 +2291,16 @@ class GenericIE(InfoExtractor): return self.url_result('limelight:%s:%s' % ( lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2)) + mobj = re.search( + r'''(?sx) + ]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*? + ]+ + name=(["\'])flashVars\2[^>]+ + value=(["\'])(?:(?!\3).)*mediaId=(?P[a-z0-9]{32}) + ''', webpage) + if mobj: + return self.url_result('limelight:media:%s' % mobj.group('id')) + # Look for AdobeTVVideo embeds mobj = re.search( r']+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', @@ -2308,6 +2389,11 @@ class GenericIE(InfoExtractor): if dbtv_urls: return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key()) + # Look for Videa embeds + videa_urls = VideaIE._extract_urls(webpage) + if videa_urls: + return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') @@ -2453,7 +2539,7 @@ class GenericIE(InfoExtractor): entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id) elif ext == 'f4m': entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id) - elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url): + elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url: # Just matching .ism/manifest is not enough to be reliably sure # whether it's actually an ISM manifest or some other streaming # manifest since there are various streaming URL formats