Merge branch 'googledrive' of github.com:remitamine/youtube-dl into remitamine-google...

author remitamine <remitamine@gmail.com>

Mon, 21 Dec 2015 02:15:19 +0000 (03:15 +0100)

committer remitamine <remitamine@gmail.com>

Mon, 21 Dec 2015 02:15:19 +0000 (03:15 +0100)
author remitamine <remitamine@gmail.com>
Mon, 21 Dec 2015 02:15:19 +0000 (03:15 +0100)
committer remitamine <remitamine@gmail.com>
Mon, 21 Dec 2015 02:15:19 +0000 (03:15 +0100)
diff --combined youtube_dl/extractor/__init__.py

index 908581bf7448a8e44ccb6e04bbda0ab85c02b32a,6655d7eb5466b2a0a972e533c1531ad976dc1160..e7b536df03ef25c7dfbf2bc63264928016c07543
--- 1/youtube_dl/extractor/__init__.py
--- 2/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@@ -3,15 -3,9 +3,15 @@@ from __future__ import unicode_literal
   from .abc import ABCIE
   from .abc7news import Abc7NewsIE
   from .academicearth import AcademicEarthCourseIE
+ +from .acast import (
+ +    ACastIE,
+ +    ACastChannelIE,
+ +)
   from .addanime import AddAnimeIE
   from .adobetv import (
       AdobeTVIE,
+ +    AdobeTVShowIE,
+ +    AdobeTVChannelIE,
       AdobeTVVideoIE,
   )
   from .adultswim import AdultSwimIE
@@@ -44,17 -38,12 +44,17 @@@ from .arte import 
   )
   from .atresplayer import AtresPlayerIE
   from .atttechchannel import ATTTechChannelIE
+ +from .audimedia import AudiMediaIE
   from .audiomack import AudiomackIE, AudiomackAlbumIE
   from .azubu import AzubuIE
   from .baidu import BaiduVideoIE
   from .bambuser import BambuserIE, BambuserChannelIE
   from .bandcamp import BandcampIE, BandcampAlbumIE
- -from .bbccouk import BBCCoUkIE
+ +from .bbc import (
+ +    BBCCoUkIE,
+ +    BBCCoUkArticleIE,
+ +    BBCIE,
+ +)
   from .beeg import BeegIE
   from .behindkink import BehindKinkIE
   from .beatportpro import BeatportProIE
@@@ -67,10 -56,7 +67,10 @@@ from .bloomberg import BloombergI
   from .bpb import BpbIE
   from .br import BRIE
   from .breakcom import BreakIE
- -from .brightcove import BrightcoveIE
+ +from .brightcove import (
+ +    BrightcoveLegacyIE,
+ +    BrightcoveNewIE,
+ +)
   from .buzzfeed import BuzzFeedIE
   from .byutv import BYUtvIE
   from .c56 import C56IE
@@@ -78,6 -64,7 +78,6 @@@ from .camdemy import 
       CamdemyIE,
       CamdemyFolderIE
   )
- -from .canal13cl import Canal13clIE
   from .canalplus import CanalplusIE
   from .canalc2 import Canalc2IE
   from .cbs import CBSIE
@@@ -86,7 -73,6 +86,7 @@@ from .cbssports import CBSSportsI
   from .ccc import CCCIE
   from .ceskatelevize import CeskaTelevizeIE
   from .channel9 import Channel9IE
+ +from .chaturbate import ChaturbateIE
   from .chilloutzone import ChilloutzoneIE
   from .chirbit import (
       ChirbitIE,
@@@ -99,7 -85,6 +99,7 @@@ from .cliphunter import CliphunterI
   from .clipsyndicate import ClipsyndicateIE
   from .cloudy import CloudyIE
   from .clubic import ClubicIE
+ +from .clyp import ClypIE
   from .cmt import CMTIE
   from .cnet import CNETIE
   from .cnn import (
@@@ -130,15 -115,12 +130,15 @@@ from .dailymotion import 
   )
   from .daum import DaumIE
   from .dbtv import DBTVIE
+ +from .dcn import DCNIE
   from .dctp import DctpTvIE
   from .deezer import DeezerPlaylistIE
+ +from .democracynow import DemocracynowIE
   from .dfb import DFBIE
   from .dhm import DHMIE
   from .dotsub import DotsubIE
   from .douyutv import DouyuTVIE
+ +from .dplay import DPlayIE
   from .dramafever import (
       DramaFeverIE,
       DramaFeverSeriesIE,
@@@ -152,6 -134,7 +152,6 @@@ from .dump import DumpI
   from .dumpert import DumpertIE
   from .defense import DefenseGouvFrIE
   from .discovery import DiscoveryIE
- -from .divxstage import DivxStageIE
   from .dropbox import DropboxIE
   from .eagleplatform import EaglePlatformIE
   from .ebaumsworld import EbaumsWorldIE
@@@ -171,8 -154,6 +171,8 @@@ from .eporner import EpornerI
   from .eroprofile import EroProfileIE
   from .escapist import EscapistIE
   from .espn import ESPNIE
+ +from .esri import EsriVideoIE
+ +from .europa import EuropaIE
   from .everyonesmixtape import EveryonesMixtapeIE
   from .exfm import ExfmIE
   from .expotv import ExpoTVIE
@@@ -180,12 -161,14 +180,12 @@@ from .extremetube import ExtremeTubeI
   from .facebook import FacebookIE
   from .faz import FazIE
   from .fc2 import FC2IE
+ +from .fczenit import FczenitIE
   from .firstpost import FirstpostIE
   from .firsttv import FirstTVIE
   from .fivemin import FiveMinIE
   from .fivetv import FiveTVIE
- -from .fktv import (
- -    FKTVIE,
- -    FKTVPosteckeIE,
- -)
+ +from .fktv import FKTVIE
   from .flickr import FlickrIE
   from .folketinget import FolketingetIE
   from .footyroom import FootyRoomIE
@@@ -205,9 -188,7 +205,9 @@@ from .francetv import 
   from .freesound import FreesoundIE
   from .freespeech import FreespeechIE
   from .freevideo import FreeVideoIE
+ +from .funimation import FunimationIE
   from .funnyordie import FunnyOrDieIE
+ +from .gameinformer import GameInformerIE
   from .gamekings import GamekingsIE
   from .gameone import (
       GameOneIE,
@@@ -224,17 -205,15 +224,18 @@@ from .gfycat import GfycatI
   from .giantbomb import GiantBombIE
   from .giga import GigaIE
   from .glide import GlideIE
- -from .globo import GloboIE
+ +from .globo import (
+ +    GloboIE,
+ +    GloboArticleIE,
+ +)
   from .godtube import GodTubeIE
   from .goldenmoustache import GoldenMoustacheIE
   from .golem import GolemIE
+ from .googledrive import GoogleDriveIE
   from .googleplus import GooglePlusIE
   from .googlesearch import GoogleSearchIE
- -from .gorillavid import GorillaVidIE
   from .goshgay import GoshgayIE
+ +from .gputechconf import GPUTechConfIE
   from .groupon import GrouponIE
   from .hark import HarkIE
   from .hearthisat import HearThisAtIE
@@@ -246,6 -225,7 +247,6 @@@ from .historicfilms import HistoricFilm
   from .history import HistoryIE
   from .hitbox import HitboxIE, HitboxLiveIE
   from .hornbunny import HornBunnyIE
- -from .hostingbulk import HostingBulkIE
   from .hotnewhiphop import HotNewHipHopIE
   from .howcast import HowcastIE
   from .howstuffworks import HowStuffWorksIE
@@@ -257,21 -237,13 +258,21 @@@ from .imdb import 
       ImdbIE,
       ImdbListIE
   )
- -from .imgur import ImgurIE
+ +from .imgur import (
+ +    ImgurIE,
+ +    ImgurAlbumIE,
+ +)
   from .ina import InaIE
+ +from .indavideo import (
+ +    IndavideoIE,
+ +    IndavideoEmbedIE,
+ +)
   from .infoq import InfoQIE
   from .instagram import InstagramIE, InstagramUserIE
   from .internetvideoarchive import InternetVideoArchiveIE
   from .iprima import IPrimaIE
   from .iqiyi import IqiyiIE
+ +from .ir90tv import Ir90TvIE
   from .ivi import (
       IviIE,
       IviCompilationIE
@@@ -315,11 -287,6 +316,11 @@@ from .lifenews import 
       LifeNewsIE,
       LifeEmbedIE,
   )
+ +from .limelight import (
+ +    LimelightMediaIE,
+ +    LimelightChannelIE,
+ +    LimelightChannelListIE,
+ +)
   from .liveleak import LiveLeakIE
   from .livestream import (
       LivestreamIE,
@@@ -337,6 -304,7 +338,6 @@@ from .macgamestore import MacGameStoreI
   from .mailru import MailRuIE
   from .malemotion import MalemotionIE
   from .mdr import MDRIE
- -from .megavideoz import MegaVideozIE
   from .metacafe import MetacafeIE
   from .metacritic import MetacriticIE
   from .mgoon import MgoonIE
@@@ -358,16 -326,16 +359,16 @@@ from .motherless import MotherlessI
   from .motorsport import MotorsportIE
   from .movieclips import MovieClipsIE
   from .moviezine import MoviezineIE
- -from .movshare import MovShareIE
   from .mtv import (
       MTVIE,
       MTVServicesEmbeddedIE,
       MTVIggyIE,
+ +    MTVDEIE,
   )
   from .muenchentv import MuenchenTVIE
   from .musicplayon import MusicPlayOnIE
- -from .musicvault import MusicVaultIE
   from .muzu import MuzuTVIE
+ +from .mwave import MwaveIE
   from .myspace import MySpaceIE, MySpaceAlbumIE
   from .myspass import MySpassIE
   from .myvi import MyviIE
@@@ -381,14 -349,10 +382,14 @@@ from .nbc import 
       NBCNewsIE,
       NBCSportsIE,
       NBCSportsVPlayerIE,
+ +    MSNBCIE,
   )
   from .ndr import (
       NDRIE,
       NJoyIE,
+ +    NDREmbedBaseIE,
+ +    NDREmbedIE,
+ +    NJoyEmbedIE,
   )
   from .ndtv import NDTVIE
   from .netzkino import NetzkinoIE
@@@ -423,22 -387,10 +424,22 @@@ from .noco import NocoI
   from .normalboots import NormalbootsIE
   from .nosvideo import NosVideoIE
   from .nova import NovaIE
- -from .novamov import NovaMovIE
- -from .nowness import NownessIE
- -from .nowtv import NowTVIE
- -from .nowvideo import NowVideoIE
+ +from .novamov import (
+ +    NovaMovIE,
+ +    WholeCloudIE,
+ +    NowVideoIE,
+ +    VideoWeedIE,
+ +    CloudTimeIE,
+ +)
+ +from .nowness import (
+ +    NownessIE,
+ +    NownessPlaylistIE,
+ +    NownessSeriesIE,
+ +)
+ +from .nowtv import (
+ +    NowTVIE,
+ +    NowTVListIE,
+ +)
   from .npo import (
       NPOIE,
       NPOLiveIE,
@@@ -466,6 -418,7 +467,6 @@@ from .ooyala import 
       OoyalaIE,
       OoyalaExternalIE,
   )
- -from .openfilm import OpenFilmIE
   from .orf import (
       ORFTVthekIE,
       ORFOE1IE,
@@@ -475,7 -428,6 +476,7 @@@
   from .parliamentliveuk import ParliamentLiveUKIE
   from .patreon import PatreonIE
   from .pbs import PBSIE
+ +from .periscope import PeriscopeIE
   from .philharmoniedeparis import PhilharmonieDeParisIE
   from .phoenix import PhoenixIE
   from .photobucket import PhotobucketIE
@@@ -484,13 -436,8 +485,13 @@@ from .planetaplay import PlanetaPlayI
   from .pladform import PladformIE
   from .played import PlayedIE
   from .playfm import PlayFMIE
+ +from .playtvak import PlaytvakIE
   from .playvid import PlayvidIE
   from .playwire import PlaywireIE
+ +from .pluralsight import (
+ +    PluralsightIE,
+ +    PluralsightCourseIE,
+ +)
   from .podomatic import PodomaticIE
   from .porn91 import Porn91IE
   from .pornhd import PornHdIE
@@@ -536,7 -483,6 +537,7 @@@ from .rtl2 import RTL2I
   from .rtp import RTPIE
   from .rts import RTSIE
   from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
+ +from .rtvnh import RTVNHIE
   from .ruhd import RUHDIE
   from .rutube import (
       RutubeIE,
@@@ -563,14 -509,9 +564,14 @@@ from .senateisvp import SenateISVPI
   from .servingsys import ServingSysIE
   from .sexu import SexuIE
   from .sexykarma import SexyKarmaIE
+ +from .shahid import ShahidIE
   from .shared import SharedIE
   from .sharesix import ShareSixIE
   from .sina import SinaIE
+ +from .skynewsarabia import (
+ +    SkyNewsArabiaIE,
+ +    SkyNewsArabiaArticleIE,
+ +)
   from .slideshare import SlideshareIE
   from .slutload import SlutloadIE
   from .smotri import (
@@@ -593,8 -534,7 +594,8 @@@ from .soundcloud import 
       SoundcloudIE,
       SoundcloudSetIE,
       SoundcloudUserIE,
- -    SoundcloudPlaylistIE
+ +    SoundcloudPlaylistIE,
+ +    SoundcloudSearchIE
   )
   from .soundgasm import (
       SoundgasmIE,
@@@ -613,7 -553,6 +614,7 @@@ from .spankwire import SpankwireI
   from .spiegel import SpiegelIE, SpiegelArticleIE
   from .spiegeltv import SpiegeltvIE
   from .spike import SpikeIE
+ +from .stitcher import StitcherIE
   from .sport5 import Sport5IE
   from .sportbox import (
       SportBoxIE,
@@@ -647,10 -586,8 +648,10 @@@ from .teachingchannel import TeachingCh
   from .teamcoco import TeamcocoIE
   from .techtalks import TechTalksIE
   from .ted import TEDIE
+ +from .tele13 import Tele13IE
   from .telebruxelles import TeleBruxellesIE
   from .telecinco import TelecincoIE
+ +from .telegraaf import TelegraafIE
   from .telemb import TeleMBIE
   from .teletask import TeleTaskIE
   from .tenplay import TenPlayIE
@@@ -658,10 -595,7 +659,10 @@@ from .testurl import TestURLI
   from .testtube import TestTubeIE
   from .tf1 import TF1IE
   from .theonion import TheOnionIE
- -from .theplatform import ThePlatformIE
+ +from .theplatform import (
+ +    ThePlatformIE,
+ +    ThePlatformFeedIE,
+ +)
   from .thesixtyone import TheSixtyOneIE
   from .thisamericanlife import ThisAmericanLifeIE
   from .thisav import ThisAVIE
@@@ -676,7 -610,6 +677,7 @@@ from .tnaflix import 
       EMPFlixIE,
       MovieFapIE,
   )
+ +from .toggle import ToggleIE
   from .thvideo import (
       THVideoIE,
       THVideoPlaylistIE
@@@ -720,7 -653,7 +721,7 @@@ from .twitch import 
       TwitchBookmarksIE,
       TwitchStreamIE,
   )
- -from .twitter import TwitterCardIE
+ +from .twitter import TwitterCardIE, TwitterIE
   from .ubu import UbuIE
   from .udemy import (
       UdemyIE,
@@@ -746,16 -679,18 +747,16 @@@ from .vgtv import 
   from .vh1 import VH1IE
   from .vice import ViceIE
   from .viddler import ViddlerIE
- -from .videobam import VideoBamIE
   from .videodetective import VideoDetectiveIE
- -from .videolecturesnet import VideoLecturesNetIE
   from .videofyme import VideofyMeIE
   from .videomega import VideoMegaIE
   from .videopremium import VideoPremiumIE
   from .videott import VideoTtIE
- -from .videoweed import VideoWeedIE
   from .vidme import VidmeIE
   from .vidzi import VidziIE
   from .vier import VierIE, VierVideosIE
   from .viewster import ViewsterIE
+ +from .viidea import ViideaIE
   from .vimeo import (
       VimeoIE,
       VimeoAlbumIE,
@@@ -779,7 -714,6 +780,7 @@@ from .vk import 
       VKIE,
       VKUserVideosIE,
   )
+ +from .vlive import VLiveIE
   from .vodlocker import VodlockerIE
   from .voicerepublic import VoiceRepublicIE
   from .vporn import VpornIE
@@@ -808,7 -742,6 +809,7 @@@ from .wrzuta import WrzutaI
   from .wsj import WSJIE
   from .xbef import XBefIE
   from .xboxclips import XboxClipsIE
+ +from .xfileshare import XFileShareIE
   from .xhamster import (
       XHamsterIE,
       XHamsterEmbedIE,
@@@ -852,7 -785,6 +853,7 @@@ from .youtube import 
       YoutubeTruncatedIDIE,
       YoutubeTruncatedURLIE,
       YoutubeUserIE,
+ +    YoutubePlaylistsIE,
       YoutubeWatchLaterIE,
   )
   from .zapiks import ZapiksIE
diff --combined youtube_dl/extractor/generic.py

index c2e8f9b62d846f2fcd5af61097a3e698cdbe53dc,abd98e500d621c19cbb9928a35a04d5fdf40e327..7cf13fddfe37fa1bf2d2e6329c060f9d4c6286f7
--- 1/youtube_dl/extractor/generic.py
--- 2/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@@ -4,13 -4,12 +4,13 @@@ from __future__ import unicode_literal
   
   import os
   import re
+ +import sys
   
   from .common import InfoExtractor
   from .youtube import YoutubeIE
   from ..compat import (
+ +    compat_etree_fromstring,
       compat_urllib_parse_unquote,
- -    compat_urllib_request,
       compat_urlparse,
       compat_xml_parse_error,
   )
@@@ -21,7 -20,7 +21,7 @@@ from ..utils import 
       HEADRequest,
       is_html,
       orderedSet,
- -    parse_xml,
+ +    sanitized_Request,
       smuggle_url,
       unescapeHTML,
       unified_strdate,
@@@ -30,10 -29,7 +30,10 @@@
       url_basename,
       xpath_text,
   )
- -from .brightcove import BrightcoveIE
+ +from .brightcove import (
+ +    BrightcoveLegacyIE,
+ +    BrightcoveNewIE,
+ +)
   from .nbc import NBCSportsVPlayerIE
   from .ooyala import OoyalaIE
   from .rutv import RUTVIE
@@@ -52,9 -48,7 +52,10 @@@ from .vimeo import VimeoI
   from .dailymotion import DailymotionCloudIE
   from .onionstudios import OnionStudiosIE
   from .snagfilms import SnagFilmsEmbedIE
+ +from .screenwavemedia import ScreenwaveMediaIE
+ +from .mtv import MTVServicesEmbeddedIE
+ +from .pladform import PladformIE
+ from .googledrive import GoogleDriveIE
   
   
   class GenericIE(InfoExtractor):
@@@ -137,90 -131,6 +138,90 @@@
                   'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
               }
           },
+ +        # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
+ +        {
+ +            'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
+ +            'info_dict': {
+ +                'id': 'smil',
+ +                'ext': 'mp4',
+ +                'title': 'Automatics, robotics and biocybernetics',
+ +                'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+ +                'upload_date': '20130627',
+ +                'formats': 'mincount:16',
+ +                'subtitles': 'mincount:1',
+ +            },
+ +            'params': {
+ +                'force_generic_extractor': True,
+ +                'skip_download': True,
+ +            },
+ +        },
+ +        # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
+ +        {
+ +            'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
+ +            'info_dict': {
+ +                'id': 'hds',
+ +                'ext': 'flv',
+ +                'title': 'hds',
+ +                'formats': 'mincount:1',
+ +            },
+ +            'params': {
+ +                'skip_download': True,
+ +            },
+ +        },
+ +        # SMIL from https://www.restudy.dk/video/play/id/1637
+ +        {
+ +            'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
+ +            'info_dict': {
+ +                'id': 'video_1637',
+ +                'ext': 'flv',
+ +                'title': 'video_1637',
+ +                'formats': 'mincount:3',
+ +            },
+ +            'params': {
+ +                'skip_download': True,
+ +            },
+ +        },
+ +        # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
+ +        {
+ +            'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
+ +            'info_dict': {
+ +                'id': 'smil-service',
+ +                'ext': 'flv',
+ +                'title': 'smil-service',
+ +                'formats': 'mincount:1',
+ +            },
+ +            'params': {
+ +                'skip_download': True,
+ +            },
+ +        },
+ +        # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
+ +        {
+ +            'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
+ +            'info_dict': {
+ +                'id': '4719370',
+ +                'ext': 'mp4',
+ +                'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
+ +                'formats': 'mincount:3',
+ +            },
+ +            'params': {
+ +                'skip_download': True,
+ +            },
+ +        },
+ +        # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
+ +        {
+ +            'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
+ +            'info_dict': {
+ +                'id': 'mZlp2ctYIUEB',
+ +                'ext': 'mp4',
+ +                'title': 'Tikibad ontruimd wegens brand',
+ +                'description': 'md5:05ca046ff47b931f9b04855015e163a4',
+ +                'thumbnail': 're:^https?://.*\.jpg$',
+ +                'duration': 33,
+ +            },
+ +            'params': {
+ +                'skip_download': True,
+ +            },
+ +        },
           # google redirect
           {
               'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@@@ -237,22 -147,6 +238,22 @@@
                   'skip_download': False,
               }
           },
+ +        {
+ +            # redirect in Refresh HTTP header
+ +            'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
+ +            'info_dict': {
+ +                'id': 'pO8h3EaFRdo',
+ +                'ext': 'mp4',
+ +                'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
+ +                'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
+ +                'upload_date': '20150917',
+ +                'uploader_id': 'brtvofficial',
+ +                'uploader': 'Boiler Room',
+ +            },
+ +            'params': {
+ +                'skip_download': False,
+ +            },
+ +        },
           {
               'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
               'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
@@@ -279,7 -173,7 +280,7 @@@
           # it also tests brightcove videos that need to set the 'Referer' in the
           # http requests
           {
- -            'add_ie': ['Brightcove'],
+ +            'add_ie': ['BrightcoveLegacy'],
               'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
               'info_dict': {
                   'id': '2765128793001',
@@@ -303,7 -197,7 +304,7 @@@
                   'uploader': 'thestar.com',
                   'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
               },
- -            'add_ie': ['Brightcove'],
+ +            'add_ie': ['BrightcoveLegacy'],
           },
           {
               'url': 'http://www.championat.com/video/football/v/87/87499.html',
@@@ -318,7 -212,7 +319,7 @@@
           },
           {
               # https://github.com/rg3/youtube-dl/issues/3541
- -            'add_ie': ['Brightcove'],
+ +            'add_ie': ['BrightcoveLegacy'],
               'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
               'info_dict': {
                   'id': '3866516442001',
@@@ -340,24 -234,9 +341,24 @@@
                   'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
                   'ext': 'mp4',
                   'title': '2cc213299525360.mov',  # that's what we get
+ +                'duration': 238.231,
               },
               'add_ie': ['Ooyala'],
           },
+ +        {
+ +            # ooyala video embedded with http://player.ooyala.com/iframe.js
+ +            'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
+ +            'info_dict': {
+ +                'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
+ +                'ext': 'mp4',
+ +                'title': '"Steve Jobs: Man in the Machine" trailer',
+ +                'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
+ +                'duration': 135.427,
+ +            },
+ +            'params': {
+ +                'skip_download': True,
+ +            },
+ +        },
           # multiple ooyala embeds on SBN network websites
           {
               'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
@@@ -398,6 -277,14 +399,6 @@@
                   'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
               },
           },
- -        # BBC iPlayer embeds
- -        {
- -            'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
- -            'info_dict': {
- -                'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
- -            },
- -            'playlist_mincount': 18,
- -        },
           # RUTV embed
           {
               'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
@@@ -826,19 -713,6 +827,19 @@@
                   'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
               },
           },
+ +        # Kaltura embed protected with referrer
+ +        {
+ +            'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
+ +            'info_dict': {
+ +                'id': '1_g4fbemnq',
+ +                'ext': 'mp4',
+ +                'title': 'Violetta - Achter De Schermen - Ruggero',
+ +                'description': 'Achter de schermen met Ruggero',
+ +                'timestamp': 1435133761,
+ +                'upload_date': '20150624',
+ +                'uploader_id': 'echojecka',
+ +            },
+ +        },
           # Eagle.Platform embed (generic URL)
           {
               'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@@ -963,9 -837,8 +964,9 @@@
               'info_dict': {
                   'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
                   'ext': 'mp4',
- -                'description': 'VIDEO: Index/Match versus VLOOKUP.',
+ +                'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
                   'title': 'This is what separates the Excel masters from the wannabes',
+ +                'duration': 191.933,
               },
               'params': {
                   # m3u8 downloads
@@@ -1041,41 -914,6 +1042,41 @@@
                   'description': 'New experience with Acrobat DC',
                   'duration': 248.667,
               },
+ +        },
+ +        # ScreenwaveMedia embed
+ +        {
+ +            'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
+ +            'md5': '24ace5baba0d35d55c6810b51f34e9e0',
+ +            'info_dict': {
+ +                'id': 'cinemasnob-55d26273809dd',
+ +                'ext': 'mp4',
+ +                'title': 'cinemasnob',
+ +            },
+ +        },
+ +        # BrightcoveInPageEmbed embed
+ +        {
+ +            'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
+ +            'info_dict': {
+ +                'id': '4238694884001',
+ +                'ext': 'flv',
+ +                'title': 'Tabletop: Dread, Last Thoughts',
+ +                'description': 'Tabletop: Dread, Last Thoughts',
+ +                'duration': 51690,
+ +            },
+ +        },
+ +        # JWPlayer with M3U8
+ +        {
+ +            'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
+ +            'info_dict': {
+ +                'id': 'playlist',
+ +                'ext': 'mp4',
+ +                'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
+ +                'uploader': 'ren.tv',
+ +            },
+ +            'params': {
+ +                # m3u8 downloads
+ +                'skip_download': True,
+ +            }
           }
       ]
   
@@@ -1219,7 -1057,7 +1220,7 @@@
   
           full_response = None
           if head_response is False:
- -            request = compat_urllib_request.Request(url)
+ +            request = sanitized_Request(url)
               request.add_header('Accept-Encoding', '*')
               full_response = self._request_webpage(request, video_id)
               head_response = full_response
@@@ -1248,7 -1086,7 +1249,7 @@@
                   '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
   
           if not full_response:
- -            request = compat_urllib_request.Request(url)
+ +            request = sanitized_Request(url)
               # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
               # making it impossible to download only chunk of the file (yet we need only 512kB to
               # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
@@@ -1281,15 -1119,11 +1282,15 @@@
   
           self.report_extraction(video_id)
   
- -        # Is it an RSS feed?
+ +        # Is it an RSS feed, a SMIL file or a XSPF playlist?
           try:
- -            doc = parse_xml(webpage)
+ +            doc = compat_etree_fromstring(webpage.encode('utf-8'))
               if doc.tag == 'rss':
                   return self._extract_rss(url, video_id, doc)
+ +            elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
+ +                return self._parse_smil(doc, url, video_id)
+ +            elif doc.tag == '{http://xspf.org/ns/0/}playlist':
+ +                return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
           except compat_xml_parse_error:
               pass
   
@@@ -1335,14 -1169,14 +1336,14 @@@
               return self.playlist_result(
                   urlrs, playlist_id=video_id, playlist_title=video_title)
   
- -        # Look for BrightCove:
- -        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
+ +        # Look for Brightcove Legacy Studio embeds
+ +        bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
           if bc_urls:
               self.to_screen('Brightcove video detected.')
               entries = [{
                   '_type': 'url',
                   'url': smuggle_url(bc_url, {'Referer': url}),
- -                'ie_key': 'Brightcove'
+ +                'ie_key': 'BrightcoveLegacy'
               } for bc_url in bc_urls]
   
               return {
@@@ -1352,11 -1186,6 +1353,11 @@@
                   'entries': entries,
               }
   
+ +        # Look for Brightcove New Studio embeds
+ +        bc_urls = BrightcoveNewIE._extract_urls(webpage)
+ +        if bc_urls:
+ +            return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
+ +
           # Look for embedded rtl.nl player
           matches = re.findall(
               r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
@@@ -1500,12 -1329,12 +1501,12 @@@
               return self.url_result(mobj.group('url'))
   
           # Look for Ooyala videos
- -        mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
+ +        mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
                   re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
                   re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
                   re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
           if mobj is not None:
- -            return OoyalaIE._build_url_result(mobj.group('ec'))
+ +            return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
   
           # Look for multiple Ooyala embeds on SBN network websites
           mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
@@@ -1513,7 -1342,7 +1514,7 @@@
               embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
               if embeds:
                   return _playlist_from_matches(
- -                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
+ +                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
   
           # Look for Aparat videos
           mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
@@@ -1663,9 -1492,12 +1664,9 @@@
               return self.url_result(url, ie='Vulture')
   
           # Look for embedded mtvservices player
- -        mobj = re.search(
- -            r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
- -            webpage)
- -        if mobj is not None:
- -            url = unescapeHTML(mobj.group('url'))
- -            return self.url_result(url, ie='MTVServicesEmbedded')
+ +        mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
+ +        if mtvservices_url:
+ +            return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
   
           # Look for embedded yahoo player
           mobj = re.search(
@@@ -1704,7 -1536,7 +1705,7 @@@
               return self.url_result(mobj.group('url'), 'MLB')
   
           mobj = re.search(
- -            r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
+ +            r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
               webpage)
           if mobj is not None:
               return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
@@@ -1722,12 -1554,10 +1723,12 @@@
               return self.url_result(mobj.group('url'), 'Zapiks')
   
           # Look for Kaltura embeds
- -        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
- -                re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
+ +        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
+ +                re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
           if mobj is not None:
- -            return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
+ +            return self.url_result(smuggle_url(
+ +                'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
+ +                {'source_url': url}), 'Kaltura')
   
           # Look for Eagle.Platform embeds
           mobj = re.search(
@@@ -1742,9 -1572,10 +1743,9 @@@
               return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
   
           # Look for Pladform embeds
- -        mobj = re.search(
- -            r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
- -        if mobj is not None:
- -            return self.url_result(mobj.group('url'), 'Pladform')
+ +        pladform_url = PladformIE._extract_url(webpage)
+ +        if pladform_url:
+ +            return self.url_result(pladform_url)
   
           # Look for Playwire embeds
           mobj = re.search(
@@@ -1769,9 -1600,14 +1770,14 @@@
           if nbc_sports_url:
               return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
   
+         # Look for Google Drive embeds
+         google_drive_url = GoogleDriveIE._extract_url(webpage)
+         if google_drive_url:
+             return self.url_result(google_drive_url, 'GoogleDrive')
+ 
           # Look for UDN embeds
           mobj = re.search(
- -            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
+ +            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
           if mobj is not None:
               return self.url_result(
                   compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
@@@ -1796,11 -1632,6 +1802,11 @@@
           if snagfilms_url:
               return self.url_result(snagfilms_url)
   
+ +        # Look for ScreenwaveMedia embeds
+ +        mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
+ +        if mobj is not None:
+ +            return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
+ +
           # Look for AdobeTVVideo embeds
           mobj = re.search(
               r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
@@@ -1838,7 -1669,7 +1844,7 @@@
           if not found:
               # Broaden the findall a little bit: JWPlayer JS loader
               found = filter_video(re.findall(
- -                r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
+ +                r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
           if not found:
               # Flow player
               found = filter_video(re.findall(r'''(?xs)
@@@ -1864,7 -1695,7 +1870,7 @@@
                   found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
           if not found:
               # HTML5 video
- -            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
+ +            found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
           if not found:
               REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
               found = re.search(
@@@ -1875,9 -1706,6 +1881,9 @@@
                   # Look also in Refresh HTTP header
                   refresh_header = head_response.headers.get('Refresh')
                   if refresh_header:
+ +                    # In python 2 response HTTP headers are bytestrings
+ +                    if sys.version_info < (3, 0) and isinstance(refresh_header, str):
+ +                        refresh_header = refresh_header.decode('iso-8859-1')
                       found = re.search(REDIRECT_REGEX, refresh_header)
               if found:
                   new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
@@@ -1891,7 -1719,6 +1897,7 @@@
   
           entries = []
           for video_url in found:
+ +            video_url = video_url.replace('\\/', '/')
               video_url = compat_urlparse.urljoin(url, video_url)
               video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
   
@@@ -1903,24 -1730,22 +1909,24 @@@
               # here's a fun little line of code for you:
               video_id = os.path.splitext(video_id)[0]
   
- -            if determine_ext(video_url) == 'smil':
- -                entries.append({
- -                    'id': video_id,
- -                    'formats': self._extract_smil_formats(video_url, video_id),
- -                    'uploader': video_uploader,
- -                    'title': video_title,
- -                    'age_limit': age_limit,
- -                })
+ +            entry_info_dict = {
+ +                'id': video_id,
+ +                'uploader': video_uploader,
+ +                'title': video_title,
+ +                'age_limit': age_limit,
+ +            }
+ +
+ +            ext = determine_ext(video_url)
+ +            if ext == 'smil':
+ +                entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
+ +            elif ext == 'xspf':
+ +                return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
+ +            elif ext == 'm3u8':
+ +                entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
               else:
- -                entries.append({
- -                    'id': video_id,
- -                    'url': video_url,
- -                    'uploader': video_uploader,
- -                    'title': video_title,
- -                    'age_limit': age_limit,
- -                })
+ +                entry_info_dict['url'] = video_url
+ +
+ +            entries.append(entry_info_dict)
   
           if len(entries) == 1:
               return entries[0]
author	remitamine <remitamine@gmail.com>
	Mon, 21 Dec 2015 02:15:19 +0000 (03:15 +0100)
committer	remitamine <remitamine@gmail.com>
	Mon, 21 Dec 2015 02:15:19 +0000 (03:15 +0100)
		1	2
youtube_dl/extractor/__init__.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/extractor/generic.py	patch \|	diff1 \|	diff2 \|	blob \| history