_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7 import sys
   8
   9 from .common import InfoExtractor
  10 from .youtube import YoutubeIE
  11 from ..compat import (
  12     compat_etree_fromstring,
  13     compat_urllib_parse_unquote,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     orderedSet,
  24     sanitized_Request,
  25     smuggle_url,
  26     unescapeHTML,
  27     unified_strdate,
  28     unsmuggle_url,
  29     UnsupportedError,
  30     url_basename,
  31     xpath_text,
  32 )
  33 from .brightcove import (
  34     BrightcoveLegacyIE,
  35     BrightcoveNewIE,
  36 )
  37 from .nbc import NBCSportsVPlayerIE
  38 from .ooyala import OoyalaIE
  39 from .rutv import RUTVIE
  40 from .tvc import TVCIE
  41 from .sportbox import SportBoxEmbedIE
  42 from .smotri import SmotriIE
  43 from .myvi import MyviIE
  44 from .condenast import CondeNastIE
  45 from .udn import UDNEmbedIE
  46 from .senateisvp import SenateISVPIE
  47 from .svt import SVTIE
  48 from .pornhub import PornHubIE
  49 from .xhamster import XHamsterEmbedIE
  50 from .vimeo import VimeoIE
  51 from .dailymotion import DailymotionCloudIE
  52 from .onionstudios import OnionStudiosIE
  53 from .snagfilms import SnagFilmsEmbedIE
  54 from .screenwavemedia import ScreenwaveMediaIE
  55 from .mtv import MTVServicesEmbeddedIE
  56 from .pladform import PladformIE
  57 from .videomore import VideomoreIE
  58 from .googledrive import GoogleDriveIE
  59 from .jwplatform import JWPlatformIE
  60 from .digiteka import DigitekaIE
  61
  62
  63 class GenericIE(InfoExtractor):
  64     IE_DESC = 'Generic downloader that works on some sites'
  65     _VALID_URL = r'.*'
  66     IE_NAME = 'generic'
  67     _TESTS = [
  68         # Direct link to a video
  69         {
  70             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  71             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  72             'info_dict': {
  73                 'id': 'trailer',
  74                 'ext': 'mp4',
  75                 'title': 'trailer',
  76                 'upload_date': '20100513',
  77             }
  78         },
  79         # Direct link to media delivered compressed (until Accept-Encoding is *)
  80         {
  81             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  82             'md5': '128c42e68b13950268b648275386fc74',
  83             'info_dict': {
  84                 'id': 'FictionJunction-Parallel_Hearts',
  85                 'ext': 'flac',
  86                 'title': 'FictionJunction-Parallel_Hearts',
  87                 'upload_date': '20140522',
  88             },
  89             'expected_warnings': [
  90                 'URL could be a direct video link, returning it as such.'
  91             ]
  92         },
  93         # Direct download with broken HEAD
  94         {
  95             'url': 'http://ai-radio.org:8000/radio.opus',
  96             'info_dict': {
  97                 'id': 'radio',
  98                 'ext': 'opus',
  99                 'title': 'radio',
 100             },
 101             'params': {
 102                 'skip_download': True,  # infinite live stream
 103             },
 104             'expected_warnings': [
 105                 r'501.*Not Implemented'
 106             ],
 107         },
 108         # Direct link with incorrect MIME type
 109         {
 110             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 111             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 112             'info_dict': {
 113                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 114                 'id': '5_Lennart_Poettering_-_Systemd',
 115                 'ext': 'webm',
 116                 'title': '5_Lennart_Poettering_-_Systemd',
 117                 'upload_date': '20141120',
 118             },
 119             'expected_warnings': [
 120                 'URL could be a direct video link, returning it as such.'
 121             ]
 122         },
 123         # RSS feed
 124         {
 125             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 126             'info_dict': {
 127                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 128                 'title': 'Zero Punctuation',
 129                 'description': 're:.*groundbreaking video review series.*'
 130             },
 131             'playlist_mincount': 11,
 132         },
 133         # RSS feed with enclosure
 134         {
 135             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 136             'info_dict': {
 137                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 138                 'ext': 'm4v',
 139                 'upload_date': '20150228',
 140                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 141             }
 142         },
 143         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 144         {
 145             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 146             'info_dict': {
 147                 'id': 'smil',
 148                 'ext': 'mp4',
 149                 'title': 'Automatics, robotics and biocybernetics',
 150                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
 151                 'upload_date': '20130627',
 152                 'formats': 'mincount:16',
 153                 'subtitles': 'mincount:1',
 154             },
 155             'params': {
 156                 'force_generic_extractor': True,
 157                 'skip_download': True,
 158             },
 159         },
 160         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 161         {
 162             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 163             'info_dict': {
 164                 'id': 'hds',
 165                 'ext': 'flv',
 166                 'title': 'hds',
 167                 'formats': 'mincount:1',
 168             },
 169             'params': {
 170                 'skip_download': True,
 171             },
 172         },
 173         # SMIL from https://www.restudy.dk/video/play/id/1637
 174         {
 175             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 176             'info_dict': {
 177                 'id': 'video_1637',
 178                 'ext': 'flv',
 179                 'title': 'video_1637',
 180                 'formats': 'mincount:3',
 181             },
 182             'params': {
 183                 'skip_download': True,
 184             },
 185         },
 186         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 187         {
 188             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 189             'info_dict': {
 190                 'id': 'smil-service',
 191                 'ext': 'flv',
 192                 'title': 'smil-service',
 193                 'formats': 'mincount:1',
 194             },
 195             'params': {
 196                 'skip_download': True,
 197             },
 198         },
 199         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 200         {
 201             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 202             'info_dict': {
 203                 'id': '4719370',
 204                 'ext': 'mp4',
 205                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 206                 'formats': 'mincount:3',
 207             },
 208             'params': {
 209                 'skip_download': True,
 210             },
 211         },
 212         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 213         {
 214             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 215             'info_dict': {
 216                 'id': 'mZlp2ctYIUEB',
 217                 'ext': 'mp4',
 218                 'title': 'Tikibad ontruimd wegens brand',
 219                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 220                 'thumbnail': 're:^https?://.*\.jpg$',
 221                 'duration': 33,
 222             },
 223             'params': {
 224                 'skip_download': True,
 225             },
 226         },
 227         # google redirect
 228         {
 229             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 230             'info_dict': {
 231                 'id': 'cmQHVoWB5FY',
 232                 'ext': 'mp4',
 233                 'upload_date': '20130224',
 234                 'uploader_id': 'TheVerge',
 235                 'description': 're:^Chris Ziegler takes a look at the\.*',
 236                 'uploader': 'The Verge',
 237                 'title': 'First Firefox OS phones side-by-side',
 238             },
 239             'params': {
 240                 'skip_download': False,
 241             }
 242         },
 243         {
 244             # redirect in Refresh HTTP header
 245             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
 246             'info_dict': {
 247                 'id': 'pO8h3EaFRdo',
 248                 'ext': 'mp4',
 249                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 250                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
 251                 'upload_date': '20150917',
 252                 'uploader_id': 'brtvofficial',
 253                 'uploader': 'Boiler Room',
 254             },
 255             'params': {
 256                 'skip_download': False,
 257             },
 258         },
 259         {
 260             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 261             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 262             'info_dict': {
 263                 'id': '13601338388002',
 264                 'ext': 'mp4',
 265                 'uploader': 'www.hodiho.fr',
 266                 'title': 'R\u00e9gis plante sa Jeep',
 267             }
 268         },
 269         # bandcamp page with custom domain
 270         {
 271             'add_ie': ['Bandcamp'],
 272             'url': 'http://bronyrock.com/track/the-pony-mash',
 273             'info_dict': {
 274                 'id': '3235767654',
 275                 'ext': 'mp3',
 276                 'title': 'The Pony Mash',
 277                 'uploader': 'M_Pallante',
 278             },
 279             'skip': 'There is a limit of 200 free downloads / month for the test song',
 280         },
 281         # embedded brightcove video
 282         # it also tests brightcove videos that need to set the 'Referer' in the
 283         # http requests
 284         {
 285             'add_ie': ['BrightcoveLegacy'],
 286             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 287             'info_dict': {
 288                 'id': '2765128793001',
 289                 'ext': 'mp4',
 290                 'title': 'Le cours de bourse : l’analyse technique',
 291                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 292                 'uploader': 'BFM BUSINESS',
 293             },
 294             'params': {
 295                 'skip_download': True,
 296             },
 297         },
 298         {
 299             # https://github.com/rg3/youtube-dl/issues/2253
 300             'url': 'http://bcove.me/i6nfkrc3',
 301             'md5': '0ba9446db037002366bab3b3eb30c88c',
 302             'info_dict': {
 303                 'id': '3101154703001',
 304                 'ext': 'mp4',
 305                 'title': 'Still no power',
 306                 'uploader': 'thestar.com',
 307                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 308             },
 309             'add_ie': ['BrightcoveLegacy'],
 310         },
 311         {
 312             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 313             'md5': 'fb973ecf6e4a78a67453647444222983',
 314             'info_dict': {
 315                 'id': '3414141473001',
 316                 'ext': 'mp4',
 317                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 318                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 319                 'uploader': 'Championat',
 320             },
 321         },
 322         {
 323             # https://github.com/rg3/youtube-dl/issues/3541
 324             'add_ie': ['BrightcoveLegacy'],
 325             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 326             'info_dict': {
 327                 'id': '3866516442001',
 328                 'ext': 'mp4',
 329                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 330                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 331                 'uploader': 'SBS Broadcasting',
 332             },
 333             'skip': 'Restricted to Netherlands',
 334             'params': {
 335                 'skip_download': True,  # m3u8 download
 336             },
 337         },
 338         # ooyala video
 339         {
 340             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 341             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 342             'info_dict': {
 343                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 344                 'ext': 'mp4',
 345                 'title': '2cc213299525360.mov',  # that's what we get
 346                 'duration': 238.231,
 347             },
 348             'add_ie': ['Ooyala'],
 349         },
 350         {
 351             # ooyala video embedded with http://player.ooyala.com/iframe.js
 352             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 353             'info_dict': {
 354                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 355                 'ext': 'mp4',
 356                 'title': '"Steve Jobs: Man in the Machine" trailer',
 357                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
 358                 'duration': 135.427,
 359             },
 360             'params': {
 361                 'skip_download': True,
 362             },
 363         },
 364         # multiple ooyala embeds on SBN network websites
 365         {
 366             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 367             'info_dict': {
 368                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 369                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 370             },
 371             'playlist_mincount': 3,
 372             'params': {
 373                 'skip_download': True,
 374             },
 375             'add_ie': ['Ooyala'],
 376         },
 377         # embed.ly video
 378         {
 379             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 380             'info_dict': {
 381                 'id': '9ODmcdjQcHQ',
 382                 'ext': 'mp4',
 383                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 384                 'upload_date': '20140225',
 385                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 386                 'uploader': 'Tested',
 387                 'uploader_id': 'testedcom',
 388             },
 389             # No need to test YoutubeIE here
 390             'params': {
 391                 'skip_download': True,
 392             },
 393         },
 394         # funnyordie embed
 395         {
 396             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 397             'info_dict': {
 398                 'id': '18e820ec3f',
 399                 'ext': 'mp4',
 400                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 401                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 402             },
 403         },
 404         # RUTV embed
 405         {
 406             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 407             'info_dict': {
 408                 'id': '776940',
 409                 'ext': 'mp4',
 410                 'title': 'Охотское море стало целиком российским',
 411                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 412             },
 413             'params': {
 414                 # m3u8 download
 415                 'skip_download': True,
 416             },
 417         },
 418         # TVC embed
 419         {
 420             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 421             'info_dict': {
 422                 'id': '55304',
 423                 'ext': 'mp4',
 424                 'title': 'Дошкольное воспитание',
 425             },
 426         },
 427         # SportBox embed
 428         {
 429             'url': 'http://www.vestifinance.ru/articles/25753',
 430             'info_dict': {
 431                 'id': '25753',
 432                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 433             },
 434             'playlist': [{
 435                 'info_dict': {
 436                     'id': '370908',
 437                     'title': 'Госзаказ. День 3',
 438                     'ext': 'mp4',
 439                 }
 440             }, {
 441                 'info_dict': {
 442                     'id': '370905',
 443                     'title': 'Госзаказ. День 2',
 444                     'ext': 'mp4',
 445                 }
 446             }, {
 447                 'info_dict': {
 448                     'id': '370902',
 449                     'title': 'Госзаказ. День 1',
 450                     'ext': 'mp4',
 451                 }
 452             }],
 453             'params': {
 454                 # m3u8 download
 455                 'skip_download': True,
 456             },
 457         },
 458         # Myvi.ru embed
 459         {
 460             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 461             'info_dict': {
 462                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 463                 'ext': 'mp4',
 464                 'title': 'Ужастики, русский трейлер (2015)',
 465                 'thumbnail': 're:^https?://.*\.jpg$',
 466                 'duration': 153,
 467             }
 468         },
 469         # XHamster embed
 470         {
 471             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 472             'info_dict': {
 473                 'id': 'showthread',
 474                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 475             },
 476             'playlist_mincount': 7,
 477         },
 478         # Embedded TED video
 479         {
 480             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 481             'md5': '65fdff94098e4a607385a60c5177c638',
 482             'info_dict': {
 483                 'id': '1969',
 484                 'ext': 'mp4',
 485                 'title': 'Hidden miracles of the natural world',
 486                 'uploader': 'Louie Schwartzberg',
 487                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 488             }
 489         },
 490         # Embedded Ustream video
 491         {
 492             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 493             'md5': '27b99cdb639c9b12a79bca876a073417',
 494             'info_dict': {
 495                 'id': '45734260',
 496                 'ext': 'flv',
 497                 'uploader': 'AU SPA:  The NSA and Privacy',
 498                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 499             }
 500         },
 501         # nowvideo embed hidden behind percent encoding
 502         {
 503             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 504             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 505             'info_dict': {
 506                 'id': '06e53103ca9aa',
 507                 'ext': 'flv',
 508                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 509                 'description': 'No description',
 510             },
 511         },
 512         # arte embed
 513         {
 514             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 515             'md5': '7653032cbb25bf6c80d80f217055fa43',
 516             'info_dict': {
 517                 'id': '048195-004_PLUS7-F',
 518                 'ext': 'flv',
 519                 'title': 'X:enius',
 520                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 521                 'upload_date': '20140320',
 522             },
 523             'params': {
 524                 'skip_download': 'Requires rtmpdump'
 525             }
 526         },
 527         # francetv embed
 528         {
 529             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 530             'info_dict': {
 531                 'id': 'EV_30231',
 532                 'ext': 'mp4',
 533                 'title': 'Alcaline, le concert avec Calogero',
 534                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 535                 'upload_date': '20150226',
 536                 'timestamp': 1424989860,
 537                 'duration': 5400,
 538             },
 539             'params': {
 540                 # m3u8 downloads
 541                 'skip_download': True,
 542             },
 543             'expected_warnings': [
 544                 'Forbidden'
 545             ]
 546         },
 547         # Condé Nast embed
 548         {
 549             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 550             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 551             'info_dict': {
 552                 'id': '53501be369702d3275860000',
 553                 'ext': 'mp4',
 554                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 555             }
 556         },
 557         # Dailymotion embed
 558         {
 559             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 560             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 561             'info_dict': {
 562                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 563                 'ext': 'mp4',
 564                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 565                 'uploader': 'Spi0n',
 566             },
 567             'add_ie': ['Dailymotion'],
 568         },
 569         # YouTube embed
 570         {
 571             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 572             'info_dict': {
 573                 'id': 'FXRb4ykk4S0',
 574                 'ext': 'mp4',
 575                 'title': 'The NBL Auction 2014',
 576                 'uploader': 'BADMINTON England',
 577                 'uploader_id': 'BADMINTONEvents',
 578                 'upload_date': '20140603',
 579                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 580             },
 581             'add_ie': ['Youtube'],
 582             'params': {
 583                 'skip_download': True,
 584             }
 585         },
 586         # MTVSercices embed
 587         {
 588             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 589             'md5': '35727f82f58c76d996fc188f9755b0d5',
 590             'info_dict': {
 591                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 592                 'ext': 'mp4',
 593                 'title': 'Review',
 594                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 595             },
 596         },
 597         # YouTube embed via <data-embed-url="">
 598         {
 599             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 600             'info_dict': {
 601                 'id': '4vAffPZIT44',
 602                 'ext': 'mp4',
 603                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 604                 'uploader': 'Gameloft',
 605                 'uploader_id': 'gameloft',
 606                 'upload_date': '20140828',
 607                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 608             },
 609             'params': {
 610                 'skip_download': True,
 611             }
 612         },
 613         # Camtasia studio
 614         {
 615             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 616             'playlist': [{
 617                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 618                 'info_dict': {
 619                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 620                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 621                     'ext': 'flv',
 622                     'duration': 2235.90,
 623                 }
 624             }, {
 625                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 626                 'info_dict': {
 627                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 628                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 629                     'ext': 'flv',
 630                     'duration': 2235.93,
 631                 }
 632             }],
 633             'info_dict': {
 634                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 635             }
 636         },
 637         # Flowplayer
 638         {
 639             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 640             'md5': '9d65602bf31c6e20014319c7d07fba27',
 641             'info_dict': {
 642                 'id': '5123ea6d5e5a7',
 643                 'ext': 'mp4',
 644                 'age_limit': 18,
 645                 'uploader': 'www.handjobhub.com',
 646                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 647             }
 648         },
 649         # Multiple brightcove videos
 650         # https://github.com/rg3/youtube-dl/issues/2283
 651         {
 652             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 653             'info_dict': {
 654                 'id': 'always-never',
 655                 'title': 'Always / Never - The New Yorker',
 656             },
 657             'playlist_count': 3,
 658             'params': {
 659                 'extract_flat': False,
 660                 'skip_download': True,
 661             }
 662         },
 663         # MLB embed
 664         {
 665             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 666             'md5': '96f09a37e44da40dd083e12d9a683327',
 667             'info_dict': {
 668                 'id': '33322633',
 669                 'ext': 'mp4',
 670                 'title': 'Ump changes call to ball',
 671                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 672                 'duration': 48,
 673                 'timestamp': 1401537900,
 674                 'upload_date': '20140531',
 675                 'thumbnail': 're:^https?://.*\.jpg$',
 676             },
 677         },
 678         # Wistia embed
 679         {
 680             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 681             'md5': '8788b683c777a5cf25621eaf286d0c23',
 682             'info_dict': {
 683                 'id': '1cfaf6b7ea',
 684                 'ext': 'mov',
 685                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 686                 'duration': 643.0,
 687                 'filesize': 182808282,
 688                 'uploader': 'education-portal.com',
 689             },
 690         },
 691         {
 692             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 693             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 694             'info_dict': {
 695                 'id': 'uxjb0lwrcz',
 696                 'ext': 'mp4',
 697                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 698                 'duration': 1715.0,
 699                 'uploader': 'thoughtworks.wistia.com',
 700             },
 701         },
 702         # Soundcloud embed
 703         {
 704             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 705             'info_dict': {
 706                 'id': '174391317',
 707                 'ext': 'mp3',
 708                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 709                 'uploader': 'Sophos Security',
 710                 'title': 'Chet Chat 171 - Oct 29, 2014',
 711                 'upload_date': '20141029',
 712             }
 713         },
 714         # Livestream embed
 715         {
 716             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 717             'info_dict': {
 718                 'id': '67864563',
 719                 'ext': 'flv',
 720                 'upload_date': '20141112',
 721                 'title': 'Rosetta #CometLanding webcast HL 10',
 722             }
 723         },
 724         # LazyYT
 725         {
 726             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 727             'info_dict': {
 728                 'id': '1986',
 729                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 730             },
 731             'playlist_mincount': 2,
 732         },
 733         # Cinchcast embed
 734         {
 735             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 736             'info_dict': {
 737                 'id': '7141703',
 738                 'ext': 'mp3',
 739                 'upload_date': '20141126',
 740                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 741             }
 742         },
 743         # Cinerama player
 744         {
 745             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 746             'info_dict': {
 747                 'id': '730m_DandD_1901_512k',
 748                 'ext': 'mp4',
 749                 'uploader': 'www.abc.net.au',
 750                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 751             }
 752         },
 753         # embedded viddler video
 754         {
 755             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 756             'info_dict': {
 757                 'id': '4d03aad9',
 758                 'ext': 'mp4',
 759                 'uploader': 'deadspin',
 760                 'title': 'WALL-TO-GORTAT',
 761                 'timestamp': 1422285291,
 762                 'upload_date': '20150126',
 763             },
 764             'add_ie': ['Viddler'],
 765         },
 766         # Libsyn embed
 767         {
 768             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 769             'info_dict': {
 770                 'id': '3377616',
 771                 'ext': 'mp3',
 772                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 773                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 774                 'upload_date': '20150220',
 775             },
 776         },
 777         # jwplayer YouTube
 778         {
 779             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 780             'info_dict': {
 781                 'id': 'Mrj4DVp2zeA',
 782                 'ext': 'mp4',
 783                 'upload_date': '20150212',
 784                 'uploader': 'The National Archives UK',
 785                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 786                 'uploader_id': 'NationalArchives08',
 787                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 788             },
 789         },
 790         # rtl.nl embed
 791         {
 792             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 793             'playlist_mincount': 5,
 794             'info_dict': {
 795                 'id': 'aanslagen-kopenhagen',
 796                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 797             }
 798         },
 799         # Zapiks embed
 800         {
 801             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 802             'info_dict': {
 803                 'id': '118046',
 804                 'ext': 'mp4',
 805                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 806             }
 807         },
 808         # Kaltura embed
 809         {
 810             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 811             'info_dict': {
 812                 'id': '1_eergr3h1',
 813                 'ext': 'mp4',
 814                 'upload_date': '20150226',
 815                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 816                 'timestamp': int,
 817                 'title': 'John Carlson Postgame 2/25/15',
 818             },
 819         },
 820         # Kaltura embed (different embed code)
 821         {
 822             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 823             'info_dict': {
 824                 'id': '1_a52wc67y',
 825                 'ext': 'flv',
 826                 'upload_date': '20150127',
 827                 'uploader_id': 'PremierMedia',
 828                 'timestamp': int,
 829                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 830             },
 831         },
 832         # Kaltura embed protected with referrer
 833         {
 834             'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
 835             'info_dict': {
 836                 'id': '1_g4fbemnq',
 837                 'ext': 'mp4',
 838                 'title': 'Violetta - Achter De Schermen - Ruggero',
 839                 'description': 'Achter de schermen met Ruggero',
 840                 'timestamp': 1435133761,
 841                 'upload_date': '20150624',
 842                 'uploader_id': 'echojecka',
 843             },
 844         },
 845         # Eagle.Platform embed (generic URL)
 846         {
 847             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 848             'info_dict': {
 849                 'id': '227304',
 850                 'ext': 'mp4',
 851                 'title': 'Навальный вышел на свободу',
 852                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 853                 'thumbnail': 're:^https?://.*\.jpg$',
 854                 'duration': 87,
 855                 'view_count': int,
 856                 'age_limit': 0,
 857             },
 858         },
 859         # ClipYou (Eagle.Platform) embed (custom URL)
 860         {
 861             'url': 'http://muz-tv.ru/play/7129/',
 862             'info_dict': {
 863                 'id': '12820',
 864                 'ext': 'mp4',
 865                 'title': "'O Sole Mio",
 866                 'thumbnail': 're:^https?://.*\.jpg$',
 867                 'duration': 216,
 868                 'view_count': int,
 869             },
 870         },
 871         # Pladform embed
 872         {
 873             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 874             'info_dict': {
 875                 'id': '100183293',
 876                 'ext': 'mp4',
 877                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 878                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 879                 'thumbnail': 're:^https?://.*\.jpg$',
 880                 'duration': 694,
 881                 'age_limit': 0,
 882             },
 883         },
 884         # Playwire embed
 885         {
 886             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 887             'info_dict': {
 888                 'id': '3519514',
 889                 'ext': 'mp4',
 890                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 891                 'thumbnail': 're:^https?://.*\.png$',
 892                 'duration': 45.115,
 893             },
 894         },
 895         # 5min embed
 896         {
 897             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 898             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 899             'info_dict': {
 900                 'id': '518726732',
 901                 'ext': 'mp4',
 902                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 903             },
 904         },
 905         # SVT embed
 906         {
 907             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 908             'info_dict': {
 909                 'id': '2900353',
 910                 'ext': 'flv',
 911                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 912                 'duration': 27,
 913                 'age_limit': 0,
 914             },
 915         },
 916         # Crooks and Liars embed
 917         {
 918             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 919             'info_dict': {
 920                 'id': '8RUoRhRi',
 921                 'ext': 'mp4',
 922                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 923                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 924                 'timestamp': 1428207000,
 925                 'upload_date': '20150405',
 926                 'uploader': 'Heather',
 927             },
 928         },
 929         # Crooks and Liars external embed
 930         {
 931             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 932             'info_dict': {
 933                 'id': 'MTE3MjUtMzQ2MzA',
 934                 'ext': 'mp4',
 935                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 936                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 937                 'timestamp': 1265032391,
 938                 'upload_date': '20100201',
 939                 'uploader': 'Heather',
 940             },
 941         },
 942         # NBC Sports vplayer embed
 943         {
 944             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 945             'info_dict': {
 946                 'id': 'ln7x1qSThw4k',
 947                 'ext': 'flv',
 948                 'title': "PFT Live: New leader in the 'new-look' defense",
 949                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 950             },
 951         },
 952         # UDN embed
 953         {
 954             'url': 'http://www.udn.com/news/story/7314/822787',
 955             'md5': 'fd2060e988c326991037b9aff9df21a6',
 956             'info_dict': {
 957                 'id': '300346',
 958                 'ext': 'mp4',
 959                 'title': '中一中男師變性 全校師生力挺',
 960                 'thumbnail': 're:^https?://.*\.jpg$',
 961             }
 962         },
 963         # Ooyala embed
 964         {
 965             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 966             'info_dict': {
 967                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 968                 'ext': 'mp4',
 969                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
 970                 'title': 'This is what separates the Excel masters from the wannabes',
 971                 'duration': 191.933,
 972             },
 973             'params': {
 974                 # m3u8 downloads
 975                 'skip_download': True,
 976             }
 977         },
 978         # Contains a SMIL manifest
 979         {
 980             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 981             'info_dict': {
 982                 'id': 'file',
 983                 'ext': 'flv',
 984                 'title': '+ Football: Lottery Champions League Europe',
 985                 'uploader': 'www.telewebion.com',
 986             },
 987             'params': {
 988                 # rtmpe downloads
 989                 'skip_download': True,
 990             }
 991         },
 992         # Brightcove URL in single quotes
 993         {
 994             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
 995             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
 996             'info_dict': {
 997                 'id': '4255764656001',
 998                 'ext': 'mp4',
 999                 'title': 'SN Presents: Russell Martin, World Citizen',
1000                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1001                 'uploader': 'Rogers Sportsnet',
1002             },
1003         },
1004         # Dailymotion Cloud video
1005         {
1006             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1007             'md5': '49444254273501a64675a7e68c502681',
1008             'info_dict': {
1009                 'id': '5585de919473990de4bee11b',
1010                 'ext': 'mp4',
1011                 'title': 'Le débat',
1012                 'thumbnail': 're:^https?://.*\.jpe?g$',
1013             }
1014         },
1015         # OnionStudios embed
1016         {
1017             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1018             'info_dict': {
1019                 'id': '2855',
1020                 'ext': 'mp4',
1021                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1022                 'thumbnail': 're:^https?://.*\.jpe?g$',
1023                 'uploader': 'ClickHole',
1024                 'uploader_id': 'clickhole',
1025             }
1026         },
1027         # SnagFilms embed
1028         {
1029             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1030             'info_dict': {
1031                 'id': '74849a00-85a9-11e1-9660-123139220831',
1032                 'ext': 'mp4',
1033                 'title': '#whilewewatch',
1034             }
1035         },
1036         # AdobeTVVideo embed
1037         {
1038             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1039             'md5': '43662b577c018ad707a63766462b1e87',
1040             'info_dict': {
1041                 'id': '2456',
1042                 'ext': 'mp4',
1043                 'title': 'New experience with Acrobat DC',
1044                 'description': 'New experience with Acrobat DC',
1045                 'duration': 248.667,
1046             },
1047         },
1048         # ScreenwaveMedia embed
1049         {
1050             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1051             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1052             'info_dict': {
1053                 'id': 'cinemasnob-55d26273809dd',
1054                 'ext': 'mp4',
1055                 'title': 'cinemasnob',
1056             },
1057         },
1058         # BrightcoveInPageEmbed embed
1059         {
1060             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1061             'info_dict': {
1062                 'id': '4238694884001',
1063                 'ext': 'flv',
1064                 'title': 'Tabletop: Dread, Last Thoughts',
1065                 'description': 'Tabletop: Dread, Last Thoughts',
1066                 'duration': 51690,
1067             },
1068         },
1069         # JWPlayer with M3U8
1070         {
1071             'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1072             'info_dict': {
1073                 'id': 'playlist',
1074                 'ext': 'mp4',
1075                 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1076                 'uploader': 'ren.tv',
1077             },
1078             'params': {
1079                 # m3u8 downloads
1080                 'skip_download': True,
1081             }
1082         }
1083     ]
1084
1085     def report_following_redirect(self, new_url):
1086         """Report information extraction."""
1087         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1088
1089     def _extract_rss(self, url, video_id, doc):
1090         playlist_title = doc.find('./channel/title').text
1091         playlist_desc_el = doc.find('./channel/description')
1092         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1093
1094         entries = []
1095         for it in doc.findall('./channel/item'):
1096             next_url = xpath_text(it, 'link', fatal=False)
1097             if not next_url:
1098                 enclosure_nodes = it.findall('./enclosure')
1099                 for e in enclosure_nodes:
1100                     next_url = e.attrib.get('url')
1101                     if next_url:
1102                         break
1103
1104             if not next_url:
1105                 continue
1106
1107             entries.append({
1108                 '_type': 'url',
1109                 'url': next_url,
1110                 'title': it.find('title').text,
1111             })
1112
1113         return {
1114             '_type': 'playlist',
1115             'id': url,
1116             'title': playlist_title,
1117             'description': playlist_desc,
1118             'entries': entries,
1119         }
1120
1121     def _extract_camtasia(self, url, video_id, webpage):
1122         """ Returns None if no camtasia video can be found. """
1123
1124         camtasia_cfg = self._search_regex(
1125             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1126             webpage, 'camtasia configuration file', default=None)
1127         if camtasia_cfg is None:
1128             return None
1129
1130         title = self._html_search_meta('DC.title', webpage, fatal=True)
1131
1132         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1133         camtasia_cfg = self._download_xml(
1134             camtasia_url, video_id,
1135             note='Downloading camtasia configuration',
1136             errnote='Failed to download camtasia configuration')
1137         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1138
1139         entries = []
1140         for n in fileset_node.getchildren():
1141             url_n = n.find('./uri')
1142             if url_n is None:
1143                 continue
1144
1145             entries.append({
1146                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1147                 'title': '%s - %s' % (title, n.tag),
1148                 'url': compat_urlparse.urljoin(url, url_n.text),
1149                 'duration': float_or_none(n.find('./duration').text),
1150             })
1151
1152         return {
1153             '_type': 'playlist',
1154             'entries': entries,
1155             'title': title,
1156         }
1157
1158     def _real_extract(self, url):
1159         if url.startswith('//'):
1160             return {
1161                 '_type': 'url',
1162                 'url': self.http_scheme() + url,
1163             }
1164
1165         parsed_url = compat_urlparse.urlparse(url)
1166         if not parsed_url.scheme:
1167             default_search = self._downloader.params.get('default_search')
1168             if default_search is None:
1169                 default_search = 'fixup_error'
1170
1171             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1172                 if '/' in url:
1173                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1174                     return self.url_result('http://' + url)
1175                 elif default_search != 'fixup_error':
1176                     if default_search == 'auto_warning':
1177                         if re.match(r'^(?:url|URL)$', url):
1178                             raise ExtractorError(
1179                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1180                                 expected=True)
1181                         else:
1182                             self._downloader.report_warning(
1183                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1184                     return self.url_result('ytsearch:' + url)
1185
1186             if default_search in ('error', 'fixup_error'):
1187                 raise ExtractorError(
1188                     '%r is not a valid URL. '
1189                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1190                     % (url, url), expected=True)
1191             else:
1192                 if ':' not in default_search:
1193                     default_search += ':'
1194                 return self.url_result(default_search + url)
1195
1196         url, smuggled_data = unsmuggle_url(url)
1197         force_videoid = None
1198         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1199         if smuggled_data and 'force_videoid' in smuggled_data:
1200             force_videoid = smuggled_data['force_videoid']
1201             video_id = force_videoid
1202         else:
1203             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1204
1205         self.to_screen('%s: Requesting header' % video_id)
1206
1207         head_req = HEADRequest(url)
1208         head_response = self._request_webpage(
1209             head_req, video_id,
1210             note=False, errnote='Could not send HEAD request to %s' % url,
1211             fatal=False)
1212
1213         if head_response is not False:
1214             # Check for redirect
1215             new_url = head_response.geturl()
1216             if url != new_url:
1217                 self.report_following_redirect(new_url)
1218                 if force_videoid:
1219                     new_url = smuggle_url(
1220                         new_url, {'force_videoid': force_videoid})
1221                 return self.url_result(new_url)
1222
1223         full_response = None
1224         if head_response is False:
1225             request = sanitized_Request(url)
1226             request.add_header('Accept-Encoding', '*')
1227             full_response = self._request_webpage(request, video_id)
1228             head_response = full_response
1229
1230         # Check for direct link to a video
1231         content_type = head_response.headers.get('Content-Type', '')
1232         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|dash\+xml|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
1233         if m:
1234             upload_date = unified_strdate(
1235                 head_response.headers.get('Last-Modified'))
1236             formats = []
1237             if m.group('format_id').endswith('mpegurl'):
1238                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1239             elif m.group('format_id').startswith('dash+xml'):
1240                 formats = self._extract_mpd_formats(url, video_id)
1241             else:
1242                 formats = [{
1243                     'format_id': m.group('format_id'),
1244                     'url': url,
1245                     'vcodec': 'none' if m.group('type') == 'audio' else None
1246                 }]
1247             return {
1248                 'id': video_id,
1249                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1250                 'direct': True,
1251                 'formats': formats,
1252                 'upload_date': upload_date,
1253             }
1254
1255         if not self._downloader.params.get('test', False) and not is_intentional:
1256             force = self._downloader.params.get('force_generic_extractor', False)
1257             self._downloader.report_warning(
1258                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1259
1260         if not full_response:
1261             request = sanitized_Request(url)
1262             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1263             # making it impossible to download only chunk of the file (yet we need only 512kB to
1264             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1265             # that will always result in downloading the whole file that is not desirable.
1266             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1267             # to accept raw bytes and being able to download only a chunk.
1268             # It may probably better to solve this by checking Content-Type for application/octet-stream
1269             # after HEAD request finishes, but not sure if we can rely on this.
1270             request.add_header('Accept-Encoding', '*')
1271             full_response = self._request_webpage(request, video_id)
1272
1273         # Maybe it's a direct link to a video?
1274         # Be careful not to download the whole thing!
1275         first_bytes = full_response.read(512)
1276         if not is_html(first_bytes):
1277             self._downloader.report_warning(
1278                 'URL could be a direct video link, returning it as such.')
1279             upload_date = unified_strdate(
1280                 head_response.headers.get('Last-Modified'))
1281             return {
1282                 'id': video_id,
1283                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1284                 'direct': True,
1285                 'url': url,
1286                 'upload_date': upload_date,
1287             }
1288
1289         webpage = self._webpage_read_content(
1290             full_response, url, video_id, prefix=first_bytes)
1291
1292         self.report_extraction(video_id)
1293
1294         # Is it an RSS feed, a SMIL file or a XSPF playlist?
1295         try:
1296             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1297             if doc.tag == 'rss':
1298                 return self._extract_rss(url, video_id, doc)
1299             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1300                 return self._parse_smil(doc, url, video_id)
1301             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1302                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1303         except compat_xml_parse_error:
1304             pass
1305
1306         # Is it a Camtasia project?
1307         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1308         if camtasia_res is not None:
1309             return camtasia_res
1310
1311         # Sometimes embedded video player is hidden behind percent encoding
1312         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1313         # Unescaping the whole page allows to handle those cases in a generic way
1314         webpage = compat_urllib_parse_unquote(webpage)
1315
1316         # it's tempting to parse this further, but you would
1317         # have to take into account all the variations like
1318         #   Video Title - Site Name
1319         #   Site Name | Video Title
1320         #   Video Title - Tagline | Site Name
1321         # and so on and so forth; it's just not practical
1322         video_title = self._html_search_regex(
1323             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1324             default='video')
1325
1326         # Try to detect age limit automatically
1327         age_limit = self._rta_search(webpage)
1328         # And then there are the jokers who advertise that they use RTA,
1329         # but actually don't.
1330         AGE_LIMIT_MARKERS = [
1331             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1332         ]
1333         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1334             age_limit = 18
1335
1336         # video uploader is domain name
1337         video_uploader = self._search_regex(
1338             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1339
1340         # Helper method
1341         def _playlist_from_matches(matches, getter=None, ie=None):
1342             urlrs = orderedSet(
1343                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1344                 for m in matches)
1345             return self.playlist_result(
1346                 urlrs, playlist_id=video_id, playlist_title=video_title)
1347
1348         # Look for Brightcove Legacy Studio embeds
1349         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1350         if bc_urls:
1351             self.to_screen('Brightcove video detected.')
1352             entries = [{
1353                 '_type': 'url',
1354                 'url': smuggle_url(bc_url, {'Referer': url}),
1355                 'ie_key': 'BrightcoveLegacy'
1356             } for bc_url in bc_urls]
1357
1358             return {
1359                 '_type': 'playlist',
1360                 'title': video_title,
1361                 'id': video_id,
1362                 'entries': entries,
1363             }
1364
1365         # Look for Brightcove New Studio embeds
1366         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1367         if bc_urls:
1368             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1369
1370         # Look for embedded rtl.nl player
1371         matches = re.findall(
1372             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1373             webpage)
1374         if matches:
1375             return _playlist_from_matches(matches, ie='RtlNl')
1376
1377         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1378         if vimeo_url is not None:
1379             return self.url_result(vimeo_url)
1380
1381         vid_me_embed_url = self._search_regex(
1382             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1383             webpage, 'vid.me embed', default=None)
1384         if vid_me_embed_url is not None:
1385             return self.url_result(vid_me_embed_url, 'Vidme')
1386
1387         # Look for embedded YouTube player
1388         matches = re.findall(r'''(?x)
1389             (?:
1390                 <iframe[^>]+?src=|
1391                 data-video-url=|
1392                 <embed[^>]+?src=|
1393                 embedSWF\(?:\s*|
1394                 new\s+SWFObject\(
1395             )
1396             (["\'])
1397                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1398                 (?:embed|v|p)/.+?)
1399             \1''', webpage)
1400         if matches:
1401             return _playlist_from_matches(
1402                 matches, lambda m: unescapeHTML(m[1]))
1403
1404         # Look for lazyYT YouTube embed
1405         matches = re.findall(
1406             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1407         if matches:
1408             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1409
1410         # Look for embedded Dailymotion player
1411         matches = re.findall(
1412             r'<(?:embed|iframe)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
1413         if matches:
1414             return _playlist_from_matches(
1415                 matches, lambda m: unescapeHTML(m[1]))
1416
1417         # Look for embedded Dailymotion playlist player (#3822)
1418         m = re.search(
1419             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1420         if m:
1421             playlists = re.findall(
1422                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1423             if playlists:
1424                 return _playlist_from_matches(
1425                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1426
1427         # Look for embedded Wistia player
1428         match = re.search(
1429             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1430         if match:
1431             embed_url = self._proto_relative_url(
1432                 unescapeHTML(match.group('url')))
1433             return {
1434                 '_type': 'url_transparent',
1435                 'url': embed_url,
1436                 'ie_key': 'Wistia',
1437                 'uploader': video_uploader,
1438                 'title': video_title,
1439                 'id': video_id,
1440             }
1441
1442         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1443         if match:
1444             return {
1445                 '_type': 'url_transparent',
1446                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1447                 'ie_key': 'Wistia',
1448                 'uploader': video_uploader,
1449                 'title': video_title,
1450                 'id': match.group('id')
1451             }
1452
1453         # Look for SVT player
1454         svt_url = SVTIE._extract_url(webpage)
1455         if svt_url:
1456             return self.url_result(svt_url, 'SVT')
1457
1458         # Look for embedded condenast player
1459         matches = re.findall(
1460             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1461             webpage)
1462         if matches:
1463             return {
1464                 '_type': 'playlist',
1465                 'entries': [{
1466                     '_type': 'url',
1467                     'ie_key': 'CondeNast',
1468                     'url': ma,
1469                 } for ma in matches],
1470                 'title': video_title,
1471                 'id': video_id,
1472             }
1473
1474         # Look for Bandcamp pages with custom domain
1475         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1476         if mobj is not None:
1477             burl = unescapeHTML(mobj.group(1))
1478             # Don't set the extractor because it can be a track url or an album
1479             return self.url_result(burl)
1480
1481         # Look for embedded Vevo player
1482         mobj = re.search(
1483             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1484         if mobj is not None:
1485             return self.url_result(mobj.group('url'))
1486
1487         # Look for embedded Viddler player
1488         mobj = re.search(
1489             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1490             webpage)
1491         if mobj is not None:
1492             return self.url_result(mobj.group('url'))
1493
1494         # Look for NYTimes player
1495         mobj = re.search(
1496             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1497             webpage)
1498         if mobj is not None:
1499             return self.url_result(mobj.group('url'))
1500
1501         # Look for Libsyn player
1502         mobj = re.search(
1503             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1504         if mobj is not None:
1505             return self.url_result(mobj.group('url'))
1506
1507         # Look for Ooyala videos
1508         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1509                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1510                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1511                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1512         if mobj is not None:
1513             return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
1514
1515         # Look for multiple Ooyala embeds on SBN network websites
1516         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1517         if mobj is not None:
1518             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1519             if embeds:
1520                 return _playlist_from_matches(
1521                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
1522
1523         # Look for Aparat videos
1524         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1525         if mobj is not None:
1526             return self.url_result(mobj.group(1), 'Aparat')
1527
1528         # Look for MPORA videos
1529         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1530         if mobj is not None:
1531             return self.url_result(mobj.group(1), 'Mpora')
1532
1533         # Look for embedded NovaMov-based player
1534         mobj = re.search(
1535             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1536                     (?P<url>http://(?:(?:embed|www)\.)?
1537                         (?:novamov\.com|
1538                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1539                            videoweed\.(?:es|com)|
1540                            movshare\.(?:net|sx|ag)|
1541                            divxstage\.(?:eu|net|ch|co|at|ag))
1542                         /embed\.php.+?)\1''', webpage)
1543         if mobj is not None:
1544             return self.url_result(mobj.group('url'))
1545
1546         # Look for embedded Facebook player
1547         mobj = re.search(
1548             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1549         if mobj is not None:
1550             return self.url_result(mobj.group('url'), 'Facebook')
1551
1552         # Look for embedded VK player
1553         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1554         if mobj is not None:
1555             return self.url_result(mobj.group('url'), 'VK')
1556
1557         # Look for embedded ivi player
1558         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1559         if mobj is not None:
1560             return self.url_result(mobj.group('url'), 'Ivi')
1561
1562         # Look for embedded Huffington Post player
1563         mobj = re.search(
1564             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1565         if mobj is not None:
1566             return self.url_result(mobj.group('url'), 'HuffPost')
1567
1568         # Look for embed.ly
1569         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1570         if mobj is not None:
1571             return self.url_result(mobj.group('url'))
1572         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1573         if mobj is not None:
1574             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1575
1576         # Look for funnyordie embed
1577         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1578         if matches:
1579             return _playlist_from_matches(
1580                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1581
1582         # Look for BBC iPlayer embed
1583         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1584         if matches:
1585             return _playlist_from_matches(matches, ie='BBCCoUk')
1586
1587         # Look for embedded RUTV player
1588         rutv_url = RUTVIE._extract_url(webpage)
1589         if rutv_url:
1590             return self.url_result(rutv_url, 'RUTV')
1591
1592         # Look for embedded TVC player
1593         tvc_url = TVCIE._extract_url(webpage)
1594         if tvc_url:
1595             return self.url_result(tvc_url, 'TVC')
1596
1597         # Look for embedded SportBox player
1598         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1599         if sportbox_urls:
1600             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1601
1602         # Look for embedded PornHub player
1603         pornhub_url = PornHubIE._extract_url(webpage)
1604         if pornhub_url:
1605             return self.url_result(pornhub_url, 'PornHub')
1606
1607         # Look for embedded XHamster player
1608         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1609         if xhamster_urls:
1610             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1611
1612         # Look for embedded Tvigle player
1613         mobj = re.search(
1614             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1615         if mobj is not None:
1616             return self.url_result(mobj.group('url'), 'Tvigle')
1617
1618         # Look for embedded TED player
1619         mobj = re.search(
1620             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1621         if mobj is not None:
1622             return self.url_result(mobj.group('url'), 'TED')
1623
1624         # Look for embedded Ustream videos
1625         mobj = re.search(
1626             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1627         if mobj is not None:
1628             return self.url_result(mobj.group('url'), 'Ustream')
1629
1630         # Look for embedded arte.tv player
1631         mobj = re.search(
1632             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1633             webpage)
1634         if mobj is not None:
1635             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1636
1637         # Look for embedded francetv player
1638         mobj = re.search(
1639             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1640             webpage)
1641         if mobj is not None:
1642             return self.url_result(mobj.group('url'))
1643
1644         # Look for embedded smotri.com player
1645         smotri_url = SmotriIE._extract_url(webpage)
1646         if smotri_url:
1647             return self.url_result(smotri_url, 'Smotri')
1648
1649         # Look for embedded Myvi.ru player
1650         myvi_url = MyviIE._extract_url(webpage)
1651         if myvi_url:
1652             return self.url_result(myvi_url)
1653
1654         # Look for embedded soundcloud player
1655         mobj = re.search(
1656             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1657             webpage)
1658         if mobj is not None:
1659             url = unescapeHTML(mobj.group('url'))
1660             return self.url_result(url)
1661
1662         # Look for embedded vulture.com player
1663         mobj = re.search(
1664             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1665             webpage)
1666         if mobj is not None:
1667             url = unescapeHTML(mobj.group('url'))
1668             return self.url_result(url, ie='Vulture')
1669
1670         # Look for embedded mtvservices player
1671         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1672         if mtvservices_url:
1673             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1674
1675         # Look for embedded yahoo player
1676         mobj = re.search(
1677             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1678             webpage)
1679         if mobj is not None:
1680             return self.url_result(mobj.group('url'), 'Yahoo')
1681
1682         # Look for embedded sbs.com.au player
1683         mobj = re.search(
1684             r'''(?x)
1685             (?:
1686                 <meta\s+property="og:video"\s+content=|
1687                 <iframe[^>]+?src=
1688             )
1689             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1690             webpage)
1691         if mobj is not None:
1692             return self.url_result(mobj.group('url'), 'SBS')
1693
1694         # Look for embedded Cinchcast player
1695         mobj = re.search(
1696             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1697             webpage)
1698         if mobj is not None:
1699             return self.url_result(mobj.group('url'), 'Cinchcast')
1700
1701         mobj = re.search(
1702             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1703             webpage)
1704         if not mobj:
1705             mobj = re.search(
1706                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1707                 webpage)
1708         if mobj is not None:
1709             return self.url_result(mobj.group('url'), 'MLB')
1710
1711         mobj = re.search(
1712             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1713             webpage)
1714         if mobj is not None:
1715             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1716
1717         mobj = re.search(
1718             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1719             webpage)
1720         if mobj is not None:
1721             return self.url_result(mobj.group('url'), 'Livestream')
1722
1723         # Look for Zapiks embed
1724         mobj = re.search(
1725             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1726         if mobj is not None:
1727             return self.url_result(mobj.group('url'), 'Zapiks')
1728
1729         # Look for Kaltura embeds
1730         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
1731                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
1732         if mobj is not None:
1733             return self.url_result(smuggle_url(
1734                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1735                 {'source_url': url}), 'Kaltura')
1736
1737         # Look for Eagle.Platform embeds
1738         mobj = re.search(
1739             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1740         if mobj is not None:
1741             return self.url_result(mobj.group('url'), 'EaglePlatform')
1742
1743         # Look for ClipYou (uses Eagle.Platform) embeds
1744         mobj = re.search(
1745             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1746         if mobj is not None:
1747             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1748
1749         # Look for Pladform embeds
1750         pladform_url = PladformIE._extract_url(webpage)
1751         if pladform_url:
1752             return self.url_result(pladform_url)
1753
1754         # Look for Videomore embeds
1755         videomore_url = VideomoreIE._extract_url(webpage)
1756         if videomore_url:
1757             return self.url_result(videomore_url)
1758
1759         # Look for Playwire embeds
1760         mobj = re.search(
1761             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1762         if mobj is not None:
1763             return self.url_result(mobj.group('url'))
1764
1765         # Look for 5min embeds
1766         mobj = re.search(
1767             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1768         if mobj is not None:
1769             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1770
1771         # Look for Crooks and Liars embeds
1772         mobj = re.search(
1773             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1774         if mobj is not None:
1775             return self.url_result(mobj.group('url'))
1776
1777         # Look for NBC Sports VPlayer embeds
1778         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1779         if nbc_sports_url:
1780             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1781
1782         # Look for Google Drive embeds
1783         google_drive_url = GoogleDriveIE._extract_url(webpage)
1784         if google_drive_url:
1785             return self.url_result(google_drive_url, 'GoogleDrive')
1786
1787         # Look for UDN embeds
1788         mobj = re.search(
1789             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
1790         if mobj is not None:
1791             return self.url_result(
1792                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1793
1794         # Look for Senate ISVP iframe
1795         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1796         if senate_isvp_url:
1797             return self.url_result(senate_isvp_url, 'SenateISVP')
1798
1799         # Look for Dailymotion Cloud videos
1800         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1801         if dmcloud_url:
1802             return self.url_result(dmcloud_url, 'DailymotionCloud')
1803
1804         # Look for OnionStudios embeds
1805         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1806         if onionstudios_url:
1807             return self.url_result(onionstudios_url)
1808
1809         # Look for SnagFilms embeds
1810         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1811         if snagfilms_url:
1812             return self.url_result(snagfilms_url)
1813
1814         # Look for JWPlatform embeds
1815         jwplatform_url = JWPlatformIE._extract_url(webpage)
1816         if jwplatform_url:
1817             return self.url_result(jwplatform_url, 'JWPlatform')
1818
1819         # Look for ScreenwaveMedia embeds
1820         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
1821         if mobj is not None:
1822             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
1823
1824         # Look for Digiteka embeds
1825         digiteka_url = DigitekaIE._extract_url(webpage)
1826         if digiteka_url:
1827             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
1828
1829         # Look for Limelight embeds
1830         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
1831         if mobj:
1832             lm = {
1833                 'Media': 'media',
1834                 'Channel': 'channel',
1835                 'ChannelList': 'channel_list',
1836             }
1837             return self.url_result('limelight:%s:%s' % (
1838                 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
1839
1840         # Look for AdobeTVVideo embeds
1841         mobj = re.search(
1842             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1843             webpage)
1844         if mobj is not None:
1845             return self.url_result(
1846                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1847                 'AdobeTVVideo')
1848
1849         def check_video(vurl):
1850             if YoutubeIE.suitable(vurl):
1851                 return True
1852             vpath = compat_urlparse.urlparse(vurl).path
1853             vext = determine_ext(vpath)
1854             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1855
1856         def filter_video(urls):
1857             return list(filter(check_video, urls))
1858
1859         # Start with something easy: JW Player in SWFObject
1860         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1861         if not found:
1862             # Look for gorilla-vid style embedding
1863             found = filter_video(re.findall(r'''(?sx)
1864                 (?:
1865                     jw_plugins|
1866                     JWPlayerOptions|
1867                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1868                 )
1869                 .*?
1870                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1871         if not found:
1872             # Broaden the search a little bit
1873             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1874         if not found:
1875             # Broaden the findall a little bit: JWPlayer JS loader
1876             found = filter_video(re.findall(
1877                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1878         if not found:
1879             # Flow player
1880             found = filter_video(re.findall(r'''(?xs)
1881                 flowplayer\("[^"]+",\s*
1882                     \{[^}]+?\}\s*,
1883                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1884                         ["']?url["']?\s*:\s*["']([^"']+)["']
1885             ''', webpage))
1886         if not found:
1887             # Cinerama player
1888             found = re.findall(
1889                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1890         if not found:
1891             # Try to find twitter cards info
1892             found = filter_video(re.findall(
1893                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1894         if not found:
1895             # We look for Open Graph info:
1896             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1897             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1898             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1899             if m_video_type is not None:
1900                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1901         if not found:
1902             # HTML5 video
1903             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1904         if not found:
1905             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1906             found = re.search(
1907                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1908                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1909                 webpage)
1910             if not found:
1911                 # Look also in Refresh HTTP header
1912                 refresh_header = head_response.headers.get('Refresh')
1913                 if refresh_header:
1914                     # In python 2 response HTTP headers are bytestrings
1915                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
1916                         refresh_header = refresh_header.decode('iso-8859-1')
1917                     found = re.search(REDIRECT_REGEX, refresh_header)
1918             if found:
1919                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
1920                 self.report_following_redirect(new_url)
1921                 return {
1922                     '_type': 'url',
1923                     'url': new_url,
1924                 }
1925         if not found:
1926             raise UnsupportedError(url)
1927
1928         entries = []
1929         for video_url in found:
1930             video_url = video_url.replace('\\/', '/')
1931             video_url = compat_urlparse.urljoin(url, video_url)
1932             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
1933
1934             # Sometimes, jwplayer extraction will result in a YouTube URL
1935             if YoutubeIE.suitable(video_url):
1936                 entries.append(self.url_result(video_url, 'Youtube'))
1937                 continue
1938
1939             # here's a fun little line of code for you:
1940             video_id = os.path.splitext(video_id)[0]
1941
1942             entry_info_dict = {
1943                 'id': video_id,
1944                 'uploader': video_uploader,
1945                 'title': video_title,
1946                 'age_limit': age_limit,
1947             }
1948
1949             ext = determine_ext(video_url)
1950             if ext == 'smil':
1951                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
1952             elif ext == 'xspf':
1953                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
1954             elif ext == 'm3u8':
1955                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
1956             else:
1957                 entry_info_dict['url'] = video_url
1958
1959             entries.append(entry_info_dict)
1960
1961         if len(entries) == 1:
1962             return entries[0]
1963         else:
1964             for num, e in enumerate(entries, start=1):
1965                 # 'url' results don't have a title
1966                 if e.get('title') is not None:
1967                     e['title'] = '%s (%d)' % (e['title'], num)
1968             return {
1969                 '_type': 'playlist',
1970                 'entries': entries,
1971             }