_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7 import sys
   8
   9 from .common import InfoExtractor
  10 from .youtube import YoutubeIE
  11 from ..compat import (
  12     compat_urllib_parse_unquote,
  13     compat_urllib_request,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     orderedSet,
  24     parse_xml,
  25     smuggle_url,
  26     unescapeHTML,
  27     unified_strdate,
  28     unsmuggle_url,
  29     UnsupportedError,
  30     url_basename,
  31     xpath_text,
  32 )
  33 from .brightcove import BrightcoveIE
  34 from .nbc import NBCSportsVPlayerIE
  35 from .ooyala import OoyalaIE
  36 from .rutv import RUTVIE
  37 from .tvc import TVCIE
  38 from .sportbox import SportBoxEmbedIE
  39 from .smotri import SmotriIE
  40 from .myvi import MyviIE
  41 from .condenast import CondeNastIE
  42 from .udn import UDNEmbedIE
  43 from .senateisvp import SenateISVPIE
  44 from .bliptv import BlipTVIE
  45 from .svt import SVTIE
  46 from .pornhub import PornHubIE
  47 from .xhamster import XHamsterEmbedIE
  48 from .vimeo import VimeoIE
  49 from .dailymotion import DailymotionCloudIE
  50 from .onionstudios import OnionStudiosIE
  51 from .snagfilms import SnagFilmsEmbedIE
  52 from .screenwavemedia import ScreenwaveMediaIE
  53 from .ultimedia import UltimediaIE
  54
  55
  56 class GenericIE(InfoExtractor):
  57     IE_DESC = 'Generic downloader that works on some sites'
  58     _VALID_URL = r'.*'
  59     IE_NAME = 'generic'
  60     _TESTS = [
  61         # Direct link to a video
  62         {
  63             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  64             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  65             'info_dict': {
  66                 'id': 'trailer',
  67                 'ext': 'mp4',
  68                 'title': 'trailer',
  69                 'upload_date': '20100513',
  70             }
  71         },
  72         # Direct link to media delivered compressed (until Accept-Encoding is *)
  73         {
  74             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  75             'md5': '128c42e68b13950268b648275386fc74',
  76             'info_dict': {
  77                 'id': 'FictionJunction-Parallel_Hearts',
  78                 'ext': 'flac',
  79                 'title': 'FictionJunction-Parallel_Hearts',
  80                 'upload_date': '20140522',
  81             },
  82             'expected_warnings': [
  83                 'URL could be a direct video link, returning it as such.'
  84             ]
  85         },
  86         # Direct download with broken HEAD
  87         {
  88             'url': 'http://ai-radio.org:8000/radio.opus',
  89             'info_dict': {
  90                 'id': 'radio',
  91                 'ext': 'opus',
  92                 'title': 'radio',
  93             },
  94             'params': {
  95                 'skip_download': True,  # infinite live stream
  96             },
  97             'expected_warnings': [
  98                 r'501.*Not Implemented'
  99             ],
 100         },
 101         # Direct link with incorrect MIME type
 102         {
 103             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 104             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 105             'info_dict': {
 106                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 107                 'id': '5_Lennart_Poettering_-_Systemd',
 108                 'ext': 'webm',
 109                 'title': '5_Lennart_Poettering_-_Systemd',
 110                 'upload_date': '20141120',
 111             },
 112             'expected_warnings': [
 113                 'URL could be a direct video link, returning it as such.'
 114             ]
 115         },
 116         # RSS feed
 117         {
 118             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 119             'info_dict': {
 120                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 121                 'title': 'Zero Punctuation',
 122                 'description': 're:.*groundbreaking video review series.*'
 123             },
 124             'playlist_mincount': 11,
 125         },
 126         # RSS feed with enclosure
 127         {
 128             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 129             'info_dict': {
 130                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 131                 'ext': 'm4v',
 132                 'upload_date': '20150228',
 133                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 134             }
 135         },
 136         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 137         {
 138             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 139             'info_dict': {
 140                 'id': 'smil',
 141                 'ext': 'mp4',
 142                 'title': 'Automatics, robotics and biocybernetics',
 143                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
 144                 'formats': 'mincount:16',
 145                 'subtitles': 'mincount:1',
 146             },
 147             'params': {
 148                 'force_generic_extractor': True,
 149                 'skip_download': True,
 150             },
 151         },
 152         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 153         {
 154             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 155             'info_dict': {
 156                 'id': 'hds',
 157                 'ext': 'flv',
 158                 'title': 'hds',
 159                 'formats': 'mincount:1',
 160             },
 161             'params': {
 162                 'skip_download': True,
 163             },
 164         },
 165         # SMIL from https://www.restudy.dk/video/play/id/1637
 166         {
 167             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 168             'info_dict': {
 169                 'id': 'video_1637',
 170                 'ext': 'flv',
 171                 'title': 'video_1637',
 172                 'formats': 'mincount:3',
 173             },
 174             'params': {
 175                 'skip_download': True,
 176             },
 177         },
 178         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 179         {
 180             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 181             'info_dict': {
 182                 'id': 'smil-service',
 183                 'ext': 'flv',
 184                 'title': 'smil-service',
 185                 'formats': 'mincount:1',
 186             },
 187             'params': {
 188                 'skip_download': True,
 189             },
 190         },
 191         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 192         {
 193             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 194             'info_dict': {
 195                 'id': '4719370',
 196                 'ext': 'mp4',
 197                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 198                 'formats': 'mincount:3',
 199             },
 200             'params': {
 201                 'skip_download': True,
 202             },
 203         },
 204         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 205         {
 206             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 207             'info_dict': {
 208                 'id': 'mZlp2ctYIUEB',
 209                 'ext': 'mp4',
 210                 'title': 'Tikibad ontruimd wegens brand',
 211                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 212                 'thumbnail': 're:^https?://.*\.jpg$',
 213                 'duration': 33,
 214             },
 215             'params': {
 216                 'skip_download': True,
 217             },
 218         },
 219         # google redirect
 220         {
 221             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 222             'info_dict': {
 223                 'id': 'cmQHVoWB5FY',
 224                 'ext': 'mp4',
 225                 'upload_date': '20130224',
 226                 'uploader_id': 'TheVerge',
 227                 'description': 're:^Chris Ziegler takes a look at the\.*',
 228                 'uploader': 'The Verge',
 229                 'title': 'First Firefox OS phones side-by-side',
 230             },
 231             'params': {
 232                 'skip_download': False,
 233             }
 234         },
 235         {
 236             # redirect in Refresh HTTP header
 237             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
 238             'info_dict': {
 239                 'id': 'pO8h3EaFRdo',
 240                 'ext': 'mp4',
 241                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 242                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
 243                 'upload_date': '20150917',
 244                 'uploader_id': 'brtvofficial',
 245                 'uploader': 'Boiler Room',
 246             },
 247             'params': {
 248                 'skip_download': False,
 249             },
 250         },
 251         {
 252             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 253             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 254             'info_dict': {
 255                 'id': '13601338388002',
 256                 'ext': 'mp4',
 257                 'uploader': 'www.hodiho.fr',
 258                 'title': 'R\u00e9gis plante sa Jeep',
 259             }
 260         },
 261         # bandcamp page with custom domain
 262         {
 263             'add_ie': ['Bandcamp'],
 264             'url': 'http://bronyrock.com/track/the-pony-mash',
 265             'info_dict': {
 266                 'id': '3235767654',
 267                 'ext': 'mp3',
 268                 'title': 'The Pony Mash',
 269                 'uploader': 'M_Pallante',
 270             },
 271             'skip': 'There is a limit of 200 free downloads / month for the test song',
 272         },
 273         # embedded brightcove video
 274         # it also tests brightcove videos that need to set the 'Referer' in the
 275         # http requests
 276         {
 277             'add_ie': ['Brightcove'],
 278             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 279             'info_dict': {
 280                 'id': '2765128793001',
 281                 'ext': 'mp4',
 282                 'title': 'Le cours de bourse : l’analyse technique',
 283                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 284                 'uploader': 'BFM BUSINESS',
 285             },
 286             'params': {
 287                 'skip_download': True,
 288             },
 289         },
 290         {
 291             # https://github.com/rg3/youtube-dl/issues/2253
 292             'url': 'http://bcove.me/i6nfkrc3',
 293             'md5': '0ba9446db037002366bab3b3eb30c88c',
 294             'info_dict': {
 295                 'id': '3101154703001',
 296                 'ext': 'mp4',
 297                 'title': 'Still no power',
 298                 'uploader': 'thestar.com',
 299                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 300             },
 301             'add_ie': ['Brightcove'],
 302         },
 303         {
 304             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 305             'md5': 'fb973ecf6e4a78a67453647444222983',
 306             'info_dict': {
 307                 'id': '3414141473001',
 308                 'ext': 'mp4',
 309                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 310                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 311                 'uploader': 'Championat',
 312             },
 313         },
 314         {
 315             # https://github.com/rg3/youtube-dl/issues/3541
 316             'add_ie': ['Brightcove'],
 317             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 318             'info_dict': {
 319                 'id': '3866516442001',
 320                 'ext': 'mp4',
 321                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 322                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 323                 'uploader': 'SBS Broadcasting',
 324             },
 325             'skip': 'Restricted to Netherlands',
 326             'params': {
 327                 'skip_download': True,  # m3u8 download
 328             },
 329         },
 330         # ooyala video
 331         {
 332             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 333             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 334             'info_dict': {
 335                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 336                 'ext': 'mp4',
 337                 'title': '2cc213299525360.mov',  # that's what we get
 338             },
 339             'add_ie': ['Ooyala'],
 340         },
 341         {
 342             # ooyala video embedded with http://player.ooyala.com/iframe.js
 343             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 344             'info_dict': {
 345                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 346                 'ext': 'mp4',
 347                 'title': '"Steve Jobs: Man in the Machine" trailer',
 348                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
 349             },
 350             'params': {
 351                 'skip_download': True,
 352             },
 353         },
 354         # multiple ooyala embeds on SBN network websites
 355         {
 356             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 357             'info_dict': {
 358                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 359                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 360             },
 361             'playlist_mincount': 3,
 362             'params': {
 363                 'skip_download': True,
 364             },
 365             'add_ie': ['Ooyala'],
 366         },
 367         # embed.ly video
 368         {
 369             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 370             'info_dict': {
 371                 'id': '9ODmcdjQcHQ',
 372                 'ext': 'mp4',
 373                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 374                 'upload_date': '20140225',
 375                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 376                 'uploader': 'Tested',
 377                 'uploader_id': 'testedcom',
 378             },
 379             # No need to test YoutubeIE here
 380             'params': {
 381                 'skip_download': True,
 382             },
 383         },
 384         # funnyordie embed
 385         {
 386             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 387             'info_dict': {
 388                 'id': '18e820ec3f',
 389                 'ext': 'mp4',
 390                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 391                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 392             },
 393         },
 394         # RUTV embed
 395         {
 396             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 397             'info_dict': {
 398                 'id': '776940',
 399                 'ext': 'mp4',
 400                 'title': 'Охотское море стало целиком российским',
 401                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 402             },
 403             'params': {
 404                 # m3u8 download
 405                 'skip_download': True,
 406             },
 407         },
 408         # TVC embed
 409         {
 410             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 411             'info_dict': {
 412                 'id': '55304',
 413                 'ext': 'mp4',
 414                 'title': 'Дошкольное воспитание',
 415             },
 416         },
 417         # SportBox embed
 418         {
 419             'url': 'http://www.vestifinance.ru/articles/25753',
 420             'info_dict': {
 421                 'id': '25753',
 422                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 423             },
 424             'playlist': [{
 425                 'info_dict': {
 426                     'id': '370908',
 427                     'title': 'Госзаказ. День 3',
 428                     'ext': 'mp4',
 429                 }
 430             }, {
 431                 'info_dict': {
 432                     'id': '370905',
 433                     'title': 'Госзаказ. День 2',
 434                     'ext': 'mp4',
 435                 }
 436             }, {
 437                 'info_dict': {
 438                     'id': '370902',
 439                     'title': 'Госзаказ. День 1',
 440                     'ext': 'mp4',
 441                 }
 442             }],
 443             'params': {
 444                 # m3u8 download
 445                 'skip_download': True,
 446             },
 447         },
 448         # Myvi.ru embed
 449         {
 450             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 451             'info_dict': {
 452                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 453                 'ext': 'mp4',
 454                 'title': 'Ужастики, русский трейлер (2015)',
 455                 'thumbnail': 're:^https?://.*\.jpg$',
 456                 'duration': 153,
 457             }
 458         },
 459         # XHamster embed
 460         {
 461             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 462             'info_dict': {
 463                 'id': 'showthread',
 464                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 465             },
 466             'playlist_mincount': 7,
 467         },
 468         # Embedded TED video
 469         {
 470             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 471             'md5': '65fdff94098e4a607385a60c5177c638',
 472             'info_dict': {
 473                 'id': '1969',
 474                 'ext': 'mp4',
 475                 'title': 'Hidden miracles of the natural world',
 476                 'uploader': 'Louie Schwartzberg',
 477                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 478             }
 479         },
 480         # Embeded Ustream video
 481         {
 482             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 483             'md5': '27b99cdb639c9b12a79bca876a073417',
 484             'info_dict': {
 485                 'id': '45734260',
 486                 'ext': 'flv',
 487                 'uploader': 'AU SPA:  The NSA and Privacy',
 488                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 489             }
 490         },
 491         # nowvideo embed hidden behind percent encoding
 492         {
 493             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 494             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 495             'info_dict': {
 496                 'id': '06e53103ca9aa',
 497                 'ext': 'flv',
 498                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 499                 'description': 'No description',
 500             },
 501         },
 502         # arte embed
 503         {
 504             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 505             'md5': '7653032cbb25bf6c80d80f217055fa43',
 506             'info_dict': {
 507                 'id': '048195-004_PLUS7-F',
 508                 'ext': 'flv',
 509                 'title': 'X:enius',
 510                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 511                 'upload_date': '20140320',
 512             },
 513             'params': {
 514                 'skip_download': 'Requires rtmpdump'
 515             }
 516         },
 517         # francetv embed
 518         {
 519             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 520             'info_dict': {
 521                 'id': 'EV_30231',
 522                 'ext': 'mp4',
 523                 'title': 'Alcaline, le concert avec Calogero',
 524                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 525                 'upload_date': '20150226',
 526                 'timestamp': 1424989860,
 527                 'duration': 5400,
 528             },
 529             'params': {
 530                 # m3u8 downloads
 531                 'skip_download': True,
 532             },
 533             'expected_warnings': [
 534                 'Forbidden'
 535             ]
 536         },
 537         # Condé Nast embed
 538         {
 539             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 540             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 541             'info_dict': {
 542                 'id': '53501be369702d3275860000',
 543                 'ext': 'mp4',
 544                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 545             }
 546         },
 547         # Dailymotion embed
 548         {
 549             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 550             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 551             'info_dict': {
 552                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 553                 'ext': 'mp4',
 554                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 555                 'uploader': 'Spi0n',
 556             },
 557             'add_ie': ['Dailymotion'],
 558         },
 559         # YouTube embed
 560         {
 561             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 562             'info_dict': {
 563                 'id': 'FXRb4ykk4S0',
 564                 'ext': 'mp4',
 565                 'title': 'The NBL Auction 2014',
 566                 'uploader': 'BADMINTON England',
 567                 'uploader_id': 'BADMINTONEvents',
 568                 'upload_date': '20140603',
 569                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 570             },
 571             'add_ie': ['Youtube'],
 572             'params': {
 573                 'skip_download': True,
 574             }
 575         },
 576         # MTVSercices embed
 577         {
 578             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 579             'md5': '35727f82f58c76d996fc188f9755b0d5',
 580             'info_dict': {
 581                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 582                 'ext': 'mp4',
 583                 'title': 'Review',
 584                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 585             },
 586         },
 587         # YouTube embed via <data-embed-url="">
 588         {
 589             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 590             'info_dict': {
 591                 'id': '4vAffPZIT44',
 592                 'ext': 'mp4',
 593                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 594                 'uploader': 'Gameloft',
 595                 'uploader_id': 'gameloft',
 596                 'upload_date': '20140828',
 597                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 598             },
 599             'params': {
 600                 'skip_download': True,
 601             }
 602         },
 603         # Camtasia studio
 604         {
 605             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 606             'playlist': [{
 607                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 608                 'info_dict': {
 609                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 610                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 611                     'ext': 'flv',
 612                     'duration': 2235.90,
 613                 }
 614             }, {
 615                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 616                 'info_dict': {
 617                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 618                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 619                     'ext': 'flv',
 620                     'duration': 2235.93,
 621                 }
 622             }],
 623             'info_dict': {
 624                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 625             }
 626         },
 627         # Flowplayer
 628         {
 629             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 630             'md5': '9d65602bf31c6e20014319c7d07fba27',
 631             'info_dict': {
 632                 'id': '5123ea6d5e5a7',
 633                 'ext': 'mp4',
 634                 'age_limit': 18,
 635                 'uploader': 'www.handjobhub.com',
 636                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 637             }
 638         },
 639         # Multiple brightcove videos
 640         # https://github.com/rg3/youtube-dl/issues/2283
 641         {
 642             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 643             'info_dict': {
 644                 'id': 'always-never',
 645                 'title': 'Always / Never - The New Yorker',
 646             },
 647             'playlist_count': 3,
 648             'params': {
 649                 'extract_flat': False,
 650                 'skip_download': True,
 651             }
 652         },
 653         # MLB embed
 654         {
 655             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 656             'md5': '96f09a37e44da40dd083e12d9a683327',
 657             'info_dict': {
 658                 'id': '33322633',
 659                 'ext': 'mp4',
 660                 'title': 'Ump changes call to ball',
 661                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 662                 'duration': 48,
 663                 'timestamp': 1401537900,
 664                 'upload_date': '20140531',
 665                 'thumbnail': 're:^https?://.*\.jpg$',
 666             },
 667         },
 668         # Wistia embed
 669         {
 670             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 671             'md5': '8788b683c777a5cf25621eaf286d0c23',
 672             'info_dict': {
 673                 'id': '1cfaf6b7ea',
 674                 'ext': 'mov',
 675                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 676                 'duration': 643.0,
 677                 'filesize': 182808282,
 678                 'uploader': 'education-portal.com',
 679             },
 680         },
 681         {
 682             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 683             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 684             'info_dict': {
 685                 'id': 'uxjb0lwrcz',
 686                 'ext': 'mp4',
 687                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 688                 'duration': 1715.0,
 689                 'uploader': 'thoughtworks.wistia.com',
 690             },
 691         },
 692         # Soundcloud embed
 693         {
 694             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 695             'info_dict': {
 696                 'id': '174391317',
 697                 'ext': 'mp3',
 698                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 699                 'uploader': 'Sophos Security',
 700                 'title': 'Chet Chat 171 - Oct 29, 2014',
 701                 'upload_date': '20141029',
 702             }
 703         },
 704         # Livestream embed
 705         {
 706             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 707             'info_dict': {
 708                 'id': '67864563',
 709                 'ext': 'flv',
 710                 'upload_date': '20141112',
 711                 'title': 'Rosetta #CometLanding webcast HL 10',
 712             }
 713         },
 714         # LazyYT
 715         {
 716             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 717             'info_dict': {
 718                 'id': '1986',
 719                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 720             },
 721             'playlist_mincount': 2,
 722         },
 723         # Cinchcast embed
 724         {
 725             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 726             'info_dict': {
 727                 'id': '7141703',
 728                 'ext': 'mp3',
 729                 'upload_date': '20141126',
 730                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 731             }
 732         },
 733         # Cinerama player
 734         {
 735             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 736             'info_dict': {
 737                 'id': '730m_DandD_1901_512k',
 738                 'ext': 'mp4',
 739                 'uploader': 'www.abc.net.au',
 740                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 741             }
 742         },
 743         # embedded viddler video
 744         {
 745             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 746             'info_dict': {
 747                 'id': '4d03aad9',
 748                 'ext': 'mp4',
 749                 'uploader': 'deadspin',
 750                 'title': 'WALL-TO-GORTAT',
 751                 'timestamp': 1422285291,
 752                 'upload_date': '20150126',
 753             },
 754             'add_ie': ['Viddler'],
 755         },
 756         # Libsyn embed
 757         {
 758             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 759             'info_dict': {
 760                 'id': '3377616',
 761                 'ext': 'mp3',
 762                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 763                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 764                 'upload_date': '20150220',
 765             },
 766         },
 767         # jwplayer YouTube
 768         {
 769             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 770             'info_dict': {
 771                 'id': 'Mrj4DVp2zeA',
 772                 'ext': 'mp4',
 773                 'upload_date': '20150212',
 774                 'uploader': 'The National Archives UK',
 775                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 776                 'uploader_id': 'NationalArchives08',
 777                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 778             },
 779         },
 780         # rtl.nl embed
 781         {
 782             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 783             'playlist_mincount': 5,
 784             'info_dict': {
 785                 'id': 'aanslagen-kopenhagen',
 786                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 787             }
 788         },
 789         # Zapiks embed
 790         {
 791             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 792             'info_dict': {
 793                 'id': '118046',
 794                 'ext': 'mp4',
 795                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 796             }
 797         },
 798         # Kaltura embed
 799         {
 800             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 801             'info_dict': {
 802                 'id': '1_eergr3h1',
 803                 'ext': 'mp4',
 804                 'upload_date': '20150226',
 805                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 806                 'timestamp': int,
 807                 'title': 'John Carlson Postgame 2/25/15',
 808             },
 809         },
 810         # Kaltura embed (different embed code)
 811         {
 812             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 813             'info_dict': {
 814                 'id': '1_a52wc67y',
 815                 'ext': 'flv',
 816                 'upload_date': '20150127',
 817                 'uploader_id': 'PremierMedia',
 818                 'timestamp': int,
 819                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 820             },
 821         },
 822         # Eagle.Platform embed (generic URL)
 823         {
 824             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 825             'info_dict': {
 826                 'id': '227304',
 827                 'ext': 'mp4',
 828                 'title': 'Навальный вышел на свободу',
 829                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 830                 'thumbnail': 're:^https?://.*\.jpg$',
 831                 'duration': 87,
 832                 'view_count': int,
 833                 'age_limit': 0,
 834             },
 835         },
 836         # ClipYou (Eagle.Platform) embed (custom URL)
 837         {
 838             'url': 'http://muz-tv.ru/play/7129/',
 839             'info_dict': {
 840                 'id': '12820',
 841                 'ext': 'mp4',
 842                 'title': "'O Sole Mio",
 843                 'thumbnail': 're:^https?://.*\.jpg$',
 844                 'duration': 216,
 845                 'view_count': int,
 846             },
 847         },
 848         # Pladform embed
 849         {
 850             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 851             'info_dict': {
 852                 'id': '100183293',
 853                 'ext': 'mp4',
 854                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 855                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 856                 'thumbnail': 're:^https?://.*\.jpg$',
 857                 'duration': 694,
 858                 'age_limit': 0,
 859             },
 860         },
 861         # Playwire embed
 862         {
 863             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 864             'info_dict': {
 865                 'id': '3519514',
 866                 'ext': 'mp4',
 867                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 868                 'thumbnail': 're:^https?://.*\.png$',
 869                 'duration': 45.115,
 870             },
 871         },
 872         # 5min embed
 873         {
 874             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 875             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 876             'info_dict': {
 877                 'id': '518726732',
 878                 'ext': 'mp4',
 879                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 880             },
 881         },
 882         # SVT embed
 883         {
 884             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 885             'info_dict': {
 886                 'id': '2900353',
 887                 'ext': 'flv',
 888                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 889                 'duration': 27,
 890                 'age_limit': 0,
 891             },
 892         },
 893         # Crooks and Liars embed
 894         {
 895             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 896             'info_dict': {
 897                 'id': '8RUoRhRi',
 898                 'ext': 'mp4',
 899                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 900                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 901                 'timestamp': 1428207000,
 902                 'upload_date': '20150405',
 903                 'uploader': 'Heather',
 904             },
 905         },
 906         # Crooks and Liars external embed
 907         {
 908             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 909             'info_dict': {
 910                 'id': 'MTE3MjUtMzQ2MzA',
 911                 'ext': 'mp4',
 912                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 913                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 914                 'timestamp': 1265032391,
 915                 'upload_date': '20100201',
 916                 'uploader': 'Heather',
 917             },
 918         },
 919         # NBC Sports vplayer embed
 920         {
 921             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 922             'info_dict': {
 923                 'id': 'ln7x1qSThw4k',
 924                 'ext': 'flv',
 925                 'title': "PFT Live: New leader in the 'new-look' defense",
 926                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 927             },
 928         },
 929         # UDN embed
 930         {
 931             'url': 'http://www.udn.com/news/story/7314/822787',
 932             'md5': 'fd2060e988c326991037b9aff9df21a6',
 933             'info_dict': {
 934                 'id': '300346',
 935                 'ext': 'mp4',
 936                 'title': '中一中男師變性 全校師生力挺',
 937                 'thumbnail': 're:^https?://.*\.jpg$',
 938             }
 939         },
 940         # Ooyala embed
 941         {
 942             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 943             'info_dict': {
 944                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 945                 'ext': 'mp4',
 946                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 947                 'title': 'This is what separates the Excel masters from the wannabes',
 948             },
 949             'params': {
 950                 # m3u8 downloads
 951                 'skip_download': True,
 952             }
 953         },
 954         # Contains a SMIL manifest
 955         {
 956             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 957             'info_dict': {
 958                 'id': 'file',
 959                 'ext': 'flv',
 960                 'title': '+ Football: Lottery Champions League Europe',
 961                 'uploader': 'www.telewebion.com',
 962             },
 963             'params': {
 964                 # rtmpe downloads
 965                 'skip_download': True,
 966             }
 967         },
 968         # Brightcove URL in single quotes
 969         {
 970             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
 971             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
 972             'info_dict': {
 973                 'id': '4255764656001',
 974                 'ext': 'mp4',
 975                 'title': 'SN Presents: Russell Martin, World Citizen',
 976                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
 977                 'uploader': 'Rogers Sportsnet',
 978             },
 979         },
 980         # Dailymotion Cloud video
 981         {
 982             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
 983             'md5': '49444254273501a64675a7e68c502681',
 984             'info_dict': {
 985                 'id': '5585de919473990de4bee11b',
 986                 'ext': 'mp4',
 987                 'title': 'Le débat',
 988                 'thumbnail': 're:^https?://.*\.jpe?g$',
 989             }
 990         },
 991         # OnionStudios embed
 992         {
 993             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
 994             'info_dict': {
 995                 'id': '2855',
 996                 'ext': 'mp4',
 997                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
 998                 'thumbnail': 're:^https?://.*\.jpe?g$',
 999                 'uploader': 'ClickHole',
1000                 'uploader_id': 'clickhole',
1001             }
1002         },
1003         # SnagFilms embed
1004         {
1005             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1006             'info_dict': {
1007                 'id': '74849a00-85a9-11e1-9660-123139220831',
1008                 'ext': 'mp4',
1009                 'title': '#whilewewatch',
1010             }
1011         },
1012         # AdobeTVVideo embed
1013         {
1014             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1015             'md5': '43662b577c018ad707a63766462b1e87',
1016             'info_dict': {
1017                 'id': '2456',
1018                 'ext': 'mp4',
1019                 'title': 'New experience with Acrobat DC',
1020                 'description': 'New experience with Acrobat DC',
1021                 'duration': 248.667,
1022             },
1023         },
1024         # ScreenwaveMedia embed
1025         {
1026             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1027             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1028             'info_dict': {
1029                 'id': 'cinemasnob-55d26273809dd',
1030                 'ext': 'mp4',
1031                 'title': 'cinemasnob',
1032             },
1033         },
1034         # Ultimedia embed
1035         {
1036             'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
1037             'md5': '25551df6e7c7ab8096ceeeae048c5f64',
1038             'info_dict': {
1039                 'id': 'r303r',
1040                 'ext': 'mp4',
1041                 'title': 'Kosheen - Pride (live)',
1042                 'thumbnail': 're:^https?://.*\.jpg',
1043                 'duration': 293,
1044                 'upload_date': '20081103',
1045                 'timestamp': 1225733392,
1046                 'uploader_id': '33m03',
1047             },
1048         }
1049     ]
1050
1051     def report_following_redirect(self, new_url):
1052         """Report information extraction."""
1053         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1054
1055     def _extract_rss(self, url, video_id, doc):
1056         playlist_title = doc.find('./channel/title').text
1057         playlist_desc_el = doc.find('./channel/description')
1058         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1059
1060         entries = []
1061         for it in doc.findall('./channel/item'):
1062             next_url = xpath_text(it, 'link', fatal=False)
1063             if not next_url:
1064                 enclosure_nodes = it.findall('./enclosure')
1065                 for e in enclosure_nodes:
1066                     next_url = e.attrib.get('url')
1067                     if next_url:
1068                         break
1069
1070             if not next_url:
1071                 continue
1072
1073             entries.append({
1074                 '_type': 'url',
1075                 'url': next_url,
1076                 'title': it.find('title').text,
1077             })
1078
1079         return {
1080             '_type': 'playlist',
1081             'id': url,
1082             'title': playlist_title,
1083             'description': playlist_desc,
1084             'entries': entries,
1085         }
1086
1087     def _extract_camtasia(self, url, video_id, webpage):
1088         """ Returns None if no camtasia video can be found. """
1089
1090         camtasia_cfg = self._search_regex(
1091             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1092             webpage, 'camtasia configuration file', default=None)
1093         if camtasia_cfg is None:
1094             return None
1095
1096         title = self._html_search_meta('DC.title', webpage, fatal=True)
1097
1098         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1099         camtasia_cfg = self._download_xml(
1100             camtasia_url, video_id,
1101             note='Downloading camtasia configuration',
1102             errnote='Failed to download camtasia configuration')
1103         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1104
1105         entries = []
1106         for n in fileset_node.getchildren():
1107             url_n = n.find('./uri')
1108             if url_n is None:
1109                 continue
1110
1111             entries.append({
1112                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1113                 'title': '%s - %s' % (title, n.tag),
1114                 'url': compat_urlparse.urljoin(url, url_n.text),
1115                 'duration': float_or_none(n.find('./duration').text),
1116             })
1117
1118         return {
1119             '_type': 'playlist',
1120             'entries': entries,
1121             'title': title,
1122         }
1123
1124     def _real_extract(self, url):
1125         if url.startswith('//'):
1126             return {
1127                 '_type': 'url',
1128                 'url': self.http_scheme() + url,
1129             }
1130
1131         parsed_url = compat_urlparse.urlparse(url)
1132         if not parsed_url.scheme:
1133             default_search = self._downloader.params.get('default_search')
1134             if default_search is None:
1135                 default_search = 'fixup_error'
1136
1137             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1138                 if '/' in url:
1139                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1140                     return self.url_result('http://' + url)
1141                 elif default_search != 'fixup_error':
1142                     if default_search == 'auto_warning':
1143                         if re.match(r'^(?:url|URL)$', url):
1144                             raise ExtractorError(
1145                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1146                                 expected=True)
1147                         else:
1148                             self._downloader.report_warning(
1149                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1150                     return self.url_result('ytsearch:' + url)
1151
1152             if default_search in ('error', 'fixup_error'):
1153                 raise ExtractorError(
1154                     '%r is not a valid URL. '
1155                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1156                     % (url, url), expected=True)
1157             else:
1158                 if ':' not in default_search:
1159                     default_search += ':'
1160                 return self.url_result(default_search + url)
1161
1162         url, smuggled_data = unsmuggle_url(url)
1163         force_videoid = None
1164         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1165         if smuggled_data and 'force_videoid' in smuggled_data:
1166             force_videoid = smuggled_data['force_videoid']
1167             video_id = force_videoid
1168         else:
1169             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1170
1171         self.to_screen('%s: Requesting header' % video_id)
1172
1173         head_req = HEADRequest(url)
1174         head_response = self._request_webpage(
1175             head_req, video_id,
1176             note=False, errnote='Could not send HEAD request to %s' % url,
1177             fatal=False)
1178
1179         if head_response is not False:
1180             # Check for redirect
1181             new_url = head_response.geturl()
1182             if url != new_url:
1183                 self.report_following_redirect(new_url)
1184                 if force_videoid:
1185                     new_url = smuggle_url(
1186                         new_url, {'force_videoid': force_videoid})
1187                 return self.url_result(new_url)
1188
1189         full_response = None
1190         if head_response is False:
1191             request = compat_urllib_request.Request(url)
1192             request.add_header('Accept-Encoding', '*')
1193             full_response = self._request_webpage(request, video_id)
1194             head_response = full_response
1195
1196         # Check for direct link to a video
1197         content_type = head_response.headers.get('Content-Type', '')
1198         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1199         if m:
1200             upload_date = unified_strdate(
1201                 head_response.headers.get('Last-Modified'))
1202             return {
1203                 'id': video_id,
1204                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1205                 'direct': True,
1206                 'formats': [{
1207                     'format_id': m.group('format_id'),
1208                     'url': url,
1209                     'vcodec': 'none' if m.group('type') == 'audio' else None
1210                 }],
1211                 'upload_date': upload_date,
1212             }
1213
1214         if not self._downloader.params.get('test', False) and not is_intentional:
1215             force = self._downloader.params.get('force_generic_extractor', False)
1216             self._downloader.report_warning(
1217                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1218
1219         if not full_response:
1220             request = compat_urllib_request.Request(url)
1221             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1222             # making it impossible to download only chunk of the file (yet we need only 512kB to
1223             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1224             # that will always result in downloading the whole file that is not desirable.
1225             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1226             # to accept raw bytes and being able to download only a chunk.
1227             # It may probably better to solve this by checking Content-Type for application/octet-stream
1228             # after HEAD request finishes, but not sure if we can rely on this.
1229             request.add_header('Accept-Encoding', '*')
1230             full_response = self._request_webpage(request, video_id)
1231
1232         # Maybe it's a direct link to a video?
1233         # Be careful not to download the whole thing!
1234         first_bytes = full_response.read(512)
1235         if not is_html(first_bytes):
1236             self._downloader.report_warning(
1237                 'URL could be a direct video link, returning it as such.')
1238             upload_date = unified_strdate(
1239                 head_response.headers.get('Last-Modified'))
1240             return {
1241                 'id': video_id,
1242                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1243                 'direct': True,
1244                 'url': url,
1245                 'upload_date': upload_date,
1246             }
1247
1248         webpage = self._webpage_read_content(
1249             full_response, url, video_id, prefix=first_bytes)
1250
1251         self.report_extraction(video_id)
1252
1253         # Is it an RSS feed, a SMIL file or a XSPF playlist?
1254         try:
1255             doc = parse_xml(webpage)
1256             if doc.tag == 'rss':
1257                 return self._extract_rss(url, video_id, doc)
1258             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1259                 return self._parse_smil(doc, url, video_id)
1260             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1261                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1262         except compat_xml_parse_error:
1263             pass
1264
1265         # Is it a Camtasia project?
1266         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1267         if camtasia_res is not None:
1268             return camtasia_res
1269
1270         # Sometimes embedded video player is hidden behind percent encoding
1271         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1272         # Unescaping the whole page allows to handle those cases in a generic way
1273         webpage = compat_urllib_parse_unquote(webpage)
1274
1275         # it's tempting to parse this further, but you would
1276         # have to take into account all the variations like
1277         #   Video Title - Site Name
1278         #   Site Name | Video Title
1279         #   Video Title - Tagline | Site Name
1280         # and so on and so forth; it's just not practical
1281         video_title = self._html_search_regex(
1282             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1283             default='video')
1284
1285         # Try to detect age limit automatically
1286         age_limit = self._rta_search(webpage)
1287         # And then there are the jokers who advertise that they use RTA,
1288         # but actually don't.
1289         AGE_LIMIT_MARKERS = [
1290             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1291         ]
1292         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1293             age_limit = 18
1294
1295         # video uploader is domain name
1296         video_uploader = self._search_regex(
1297             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1298
1299         # Helper method
1300         def _playlist_from_matches(matches, getter=None, ie=None):
1301             urlrs = orderedSet(
1302                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1303                 for m in matches)
1304             return self.playlist_result(
1305                 urlrs, playlist_id=video_id, playlist_title=video_title)
1306
1307         # Look for BrightCove:
1308         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1309         if bc_urls:
1310             self.to_screen('Brightcove video detected.')
1311             entries = [{
1312                 '_type': 'url',
1313                 'url': smuggle_url(bc_url, {'Referer': url}),
1314                 'ie_key': 'Brightcove'
1315             } for bc_url in bc_urls]
1316
1317             return {
1318                 '_type': 'playlist',
1319                 'title': video_title,
1320                 'id': video_id,
1321                 'entries': entries,
1322             }
1323
1324         # Look for embedded rtl.nl player
1325         matches = re.findall(
1326             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1327             webpage)
1328         if matches:
1329             return _playlist_from_matches(matches, ie='RtlNl')
1330
1331         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1332         if vimeo_url is not None:
1333             return self.url_result(vimeo_url)
1334
1335         vid_me_embed_url = self._search_regex(
1336             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1337             webpage, 'vid.me embed', default=None)
1338         if vid_me_embed_url is not None:
1339             return self.url_result(vid_me_embed_url, 'Vidme')
1340
1341         # Look for embedded YouTube player
1342         matches = re.findall(r'''(?x)
1343             (?:
1344                 <iframe[^>]+?src=|
1345                 data-video-url=|
1346                 <embed[^>]+?src=|
1347                 embedSWF\(?:\s*|
1348                 new\s+SWFObject\(
1349             )
1350             (["\'])
1351                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1352                 (?:embed|v|p)/.+?)
1353             \1''', webpage)
1354         if matches:
1355             return _playlist_from_matches(
1356                 matches, lambda m: unescapeHTML(m[1]))
1357
1358         # Look for lazyYT YouTube embed
1359         matches = re.findall(
1360             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1361         if matches:
1362             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1363
1364         # Look for embedded Dailymotion player
1365         matches = re.findall(
1366             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1367         if matches:
1368             return _playlist_from_matches(
1369                 matches, lambda m: unescapeHTML(m[1]))
1370
1371         # Look for embedded Dailymotion playlist player (#3822)
1372         m = re.search(
1373             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1374         if m:
1375             playlists = re.findall(
1376                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1377             if playlists:
1378                 return _playlist_from_matches(
1379                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1380
1381         # Look for embedded Wistia player
1382         match = re.search(
1383             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1384         if match:
1385             embed_url = self._proto_relative_url(
1386                 unescapeHTML(match.group('url')))
1387             return {
1388                 '_type': 'url_transparent',
1389                 'url': embed_url,
1390                 'ie_key': 'Wistia',
1391                 'uploader': video_uploader,
1392                 'title': video_title,
1393                 'id': video_id,
1394             }
1395
1396         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1397         if match:
1398             return {
1399                 '_type': 'url_transparent',
1400                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1401                 'ie_key': 'Wistia',
1402                 'uploader': video_uploader,
1403                 'title': video_title,
1404                 'id': match.group('id')
1405             }
1406
1407         # Look for embedded blip.tv player
1408         bliptv_url = BlipTVIE._extract_url(webpage)
1409         if bliptv_url:
1410             return self.url_result(bliptv_url, 'BlipTV')
1411
1412         # Look for SVT player
1413         svt_url = SVTIE._extract_url(webpage)
1414         if svt_url:
1415             return self.url_result(svt_url, 'SVT')
1416
1417         # Look for embedded condenast player
1418         matches = re.findall(
1419             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1420             webpage)
1421         if matches:
1422             return {
1423                 '_type': 'playlist',
1424                 'entries': [{
1425                     '_type': 'url',
1426                     'ie_key': 'CondeNast',
1427                     'url': ma,
1428                 } for ma in matches],
1429                 'title': video_title,
1430                 'id': video_id,
1431             }
1432
1433         # Look for Bandcamp pages with custom domain
1434         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1435         if mobj is not None:
1436             burl = unescapeHTML(mobj.group(1))
1437             # Don't set the extractor because it can be a track url or an album
1438             return self.url_result(burl)
1439
1440         # Look for embedded Vevo player
1441         mobj = re.search(
1442             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1443         if mobj is not None:
1444             return self.url_result(mobj.group('url'))
1445
1446         # Look for embedded Viddler player
1447         mobj = re.search(
1448             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1449             webpage)
1450         if mobj is not None:
1451             return self.url_result(mobj.group('url'))
1452
1453         # Look for NYTimes player
1454         mobj = re.search(
1455             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1456             webpage)
1457         if mobj is not None:
1458             return self.url_result(mobj.group('url'))
1459
1460         # Look for Libsyn player
1461         mobj = re.search(
1462             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1463         if mobj is not None:
1464             return self.url_result(mobj.group('url'))
1465
1466         # Look for Ooyala videos
1467         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1468                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1469                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1470                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1471         if mobj is not None:
1472             return OoyalaIE._build_url_result(mobj.group('ec'))
1473
1474         # Look for multiple Ooyala embeds on SBN network websites
1475         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1476         if mobj is not None:
1477             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1478             if embeds:
1479                 return _playlist_from_matches(
1480                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1481
1482         # Look for Aparat videos
1483         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1484         if mobj is not None:
1485             return self.url_result(mobj.group(1), 'Aparat')
1486
1487         # Look for MPORA videos
1488         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1489         if mobj is not None:
1490             return self.url_result(mobj.group(1), 'Mpora')
1491
1492         # Look for embedded NovaMov-based player
1493         mobj = re.search(
1494             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1495                     (?P<url>http://(?:(?:embed|www)\.)?
1496                         (?:novamov\.com|
1497                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1498                            videoweed\.(?:es|com)|
1499                            movshare\.(?:net|sx|ag)|
1500                            divxstage\.(?:eu|net|ch|co|at|ag))
1501                         /embed\.php.+?)\1''', webpage)
1502         if mobj is not None:
1503             return self.url_result(mobj.group('url'))
1504
1505         # Look for embedded Facebook player
1506         mobj = re.search(
1507             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1508         if mobj is not None:
1509             return self.url_result(mobj.group('url'), 'Facebook')
1510
1511         # Look for embedded VK player
1512         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1513         if mobj is not None:
1514             return self.url_result(mobj.group('url'), 'VK')
1515
1516         # Look for embedded ivi player
1517         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1518         if mobj is not None:
1519             return self.url_result(mobj.group('url'), 'Ivi')
1520
1521         # Look for embedded Huffington Post player
1522         mobj = re.search(
1523             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1524         if mobj is not None:
1525             return self.url_result(mobj.group('url'), 'HuffPost')
1526
1527         # Look for embed.ly
1528         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1529         if mobj is not None:
1530             return self.url_result(mobj.group('url'))
1531         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1532         if mobj is not None:
1533             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1534
1535         # Look for funnyordie embed
1536         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1537         if matches:
1538             return _playlist_from_matches(
1539                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1540
1541         # Look for BBC iPlayer embed
1542         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1543         if matches:
1544             return _playlist_from_matches(matches, ie='BBCCoUk')
1545
1546         # Look for embedded RUTV player
1547         rutv_url = RUTVIE._extract_url(webpage)
1548         if rutv_url:
1549             return self.url_result(rutv_url, 'RUTV')
1550
1551         # Look for embedded TVC player
1552         tvc_url = TVCIE._extract_url(webpage)
1553         if tvc_url:
1554             return self.url_result(tvc_url, 'TVC')
1555
1556         # Look for embedded SportBox player
1557         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1558         if sportbox_urls:
1559             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1560
1561         # Look for embedded PornHub player
1562         pornhub_url = PornHubIE._extract_url(webpage)
1563         if pornhub_url:
1564             return self.url_result(pornhub_url, 'PornHub')
1565
1566         # Look for embedded XHamster player
1567         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1568         if xhamster_urls:
1569             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1570
1571         # Look for embedded Tvigle player
1572         mobj = re.search(
1573             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1574         if mobj is not None:
1575             return self.url_result(mobj.group('url'), 'Tvigle')
1576
1577         # Look for embedded TED player
1578         mobj = re.search(
1579             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1580         if mobj is not None:
1581             return self.url_result(mobj.group('url'), 'TED')
1582
1583         # Look for embedded Ustream videos
1584         mobj = re.search(
1585             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1586         if mobj is not None:
1587             return self.url_result(mobj.group('url'), 'Ustream')
1588
1589         # Look for embedded arte.tv player
1590         mobj = re.search(
1591             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1592             webpage)
1593         if mobj is not None:
1594             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1595
1596         # Look for embedded francetv player
1597         mobj = re.search(
1598             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1599             webpage)
1600         if mobj is not None:
1601             return self.url_result(mobj.group('url'))
1602
1603         # Look for embedded smotri.com player
1604         smotri_url = SmotriIE._extract_url(webpage)
1605         if smotri_url:
1606             return self.url_result(smotri_url, 'Smotri')
1607
1608         # Look for embedded Myvi.ru player
1609         myvi_url = MyviIE._extract_url(webpage)
1610         if myvi_url:
1611             return self.url_result(myvi_url)
1612
1613         # Look for embeded soundcloud player
1614         mobj = re.search(
1615             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1616             webpage)
1617         if mobj is not None:
1618             url = unescapeHTML(mobj.group('url'))
1619             return self.url_result(url)
1620
1621         # Look for embedded vulture.com player
1622         mobj = re.search(
1623             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1624             webpage)
1625         if mobj is not None:
1626             url = unescapeHTML(mobj.group('url'))
1627             return self.url_result(url, ie='Vulture')
1628
1629         # Look for embedded mtvservices player
1630         mobj = re.search(
1631             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1632             webpage)
1633         if mobj is not None:
1634             url = unescapeHTML(mobj.group('url'))
1635             return self.url_result(url, ie='MTVServicesEmbedded')
1636
1637         # Look for embedded yahoo player
1638         mobj = re.search(
1639             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1640             webpage)
1641         if mobj is not None:
1642             return self.url_result(mobj.group('url'), 'Yahoo')
1643
1644         # Look for embedded sbs.com.au player
1645         mobj = re.search(
1646             r'''(?x)
1647             (?:
1648                 <meta\s+property="og:video"\s+content=|
1649                 <iframe[^>]+?src=
1650             )
1651             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1652             webpage)
1653         if mobj is not None:
1654             return self.url_result(mobj.group('url'), 'SBS')
1655
1656         # Look for embedded Cinchcast player
1657         mobj = re.search(
1658             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1659             webpage)
1660         if mobj is not None:
1661             return self.url_result(mobj.group('url'), 'Cinchcast')
1662
1663         mobj = re.search(
1664             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1665             webpage)
1666         if not mobj:
1667             mobj = re.search(
1668                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1669                 webpage)
1670         if mobj is not None:
1671             return self.url_result(mobj.group('url'), 'MLB')
1672
1673         mobj = re.search(
1674             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1675             webpage)
1676         if mobj is not None:
1677             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1678
1679         mobj = re.search(
1680             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1681             webpage)
1682         if mobj is not None:
1683             return self.url_result(mobj.group('url'), 'Livestream')
1684
1685         # Look for Zapiks embed
1686         mobj = re.search(
1687             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1688         if mobj is not None:
1689             return self.url_result(mobj.group('url'), 'Zapiks')
1690
1691         # Look for Kaltura embeds
1692         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
1693                 re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
1694         if mobj is not None:
1695             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1696
1697         # Look for Eagle.Platform embeds
1698         mobj = re.search(
1699             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1700         if mobj is not None:
1701             return self.url_result(mobj.group('url'), 'EaglePlatform')
1702
1703         # Look for ClipYou (uses Eagle.Platform) embeds
1704         mobj = re.search(
1705             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1706         if mobj is not None:
1707             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1708
1709         # Look for Pladform embeds
1710         mobj = re.search(
1711             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1712         if mobj is not None:
1713             return self.url_result(mobj.group('url'), 'Pladform')
1714
1715         # Look for Playwire embeds
1716         mobj = re.search(
1717             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1718         if mobj is not None:
1719             return self.url_result(mobj.group('url'))
1720
1721         # Look for 5min embeds
1722         mobj = re.search(
1723             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1724         if mobj is not None:
1725             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1726
1727         # Look for Crooks and Liars embeds
1728         mobj = re.search(
1729             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1730         if mobj is not None:
1731             return self.url_result(mobj.group('url'))
1732
1733         # Look for NBC Sports VPlayer embeds
1734         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1735         if nbc_sports_url:
1736             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1737
1738         # Look for UDN embeds
1739         mobj = re.search(
1740             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1741         if mobj is not None:
1742             return self.url_result(
1743                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1744
1745         # Look for Senate ISVP iframe
1746         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1747         if senate_isvp_url:
1748             return self.url_result(senate_isvp_url, 'SenateISVP')
1749
1750         # Look for Dailymotion Cloud videos
1751         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1752         if dmcloud_url:
1753             return self.url_result(dmcloud_url, 'DailymotionCloud')
1754
1755         # Look for OnionStudios embeds
1756         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1757         if onionstudios_url:
1758             return self.url_result(onionstudios_url)
1759
1760         # Look for SnagFilms embeds
1761         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1762         if snagfilms_url:
1763             return self.url_result(snagfilms_url)
1764
1765         # Look for ScreenwaveMedia embeds
1766         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
1767         if mobj is not None:
1768             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
1769
1770         # Look for Ulltimedia embeds
1771         ultimedia_url = UltimediaIE._extract_url(webpage)
1772         if ultimedia_url:
1773             return self.url_result(self._proto_relative_url(ultimedia_url), 'Ultimedia')
1774
1775         # Look for AdobeTVVideo embeds
1776         mobj = re.search(
1777             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1778             webpage)
1779         if mobj is not None:
1780             return self.url_result(
1781                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1782                 'AdobeTVVideo')
1783
1784         def check_video(vurl):
1785             if YoutubeIE.suitable(vurl):
1786                 return True
1787             vpath = compat_urlparse.urlparse(vurl).path
1788             vext = determine_ext(vpath)
1789             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1790
1791         def filter_video(urls):
1792             return list(filter(check_video, urls))
1793
1794         # Start with something easy: JW Player in SWFObject
1795         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1796         if not found:
1797             # Look for gorilla-vid style embedding
1798             found = filter_video(re.findall(r'''(?sx)
1799                 (?:
1800                     jw_plugins|
1801                     JWPlayerOptions|
1802                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1803                 )
1804                 .*?
1805                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1806         if not found:
1807             # Broaden the search a little bit
1808             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1809         if not found:
1810             # Broaden the findall a little bit: JWPlayer JS loader
1811             found = filter_video(re.findall(
1812                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1813         if not found:
1814             # Flow player
1815             found = filter_video(re.findall(r'''(?xs)
1816                 flowplayer\("[^"]+",\s*
1817                     \{[^}]+?\}\s*,
1818                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1819                         ["']?url["']?\s*:\s*["']([^"']+)["']
1820             ''', webpage))
1821         if not found:
1822             # Cinerama player
1823             found = re.findall(
1824                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1825         if not found:
1826             # Try to find twitter cards info
1827             found = filter_video(re.findall(
1828                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1829         if not found:
1830             # We look for Open Graph info:
1831             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1832             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1833             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1834             if m_video_type is not None:
1835                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1836         if not found:
1837             # HTML5 video
1838             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1839         if not found:
1840             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1841             found = re.search(
1842                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1843                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1844                 webpage)
1845             if not found:
1846                 # Look also in Refresh HTTP header
1847                 refresh_header = head_response.headers.get('Refresh')
1848                 if refresh_header:
1849                     # In python 2 response HTTP headers are bytestrings
1850                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
1851                         refresh_header = refresh_header.decode('iso-8859-1')
1852                     found = re.search(REDIRECT_REGEX, refresh_header)
1853             if found:
1854                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
1855                 self.report_following_redirect(new_url)
1856                 return {
1857                     '_type': 'url',
1858                     'url': new_url,
1859                 }
1860         if not found:
1861             raise UnsupportedError(url)
1862
1863         entries = []
1864         for video_url in found:
1865             video_url = compat_urlparse.urljoin(url, video_url)
1866             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
1867
1868             # Sometimes, jwplayer extraction will result in a YouTube URL
1869             if YoutubeIE.suitable(video_url):
1870                 entries.append(self.url_result(video_url, 'Youtube'))
1871                 continue
1872
1873             # here's a fun little line of code for you:
1874             video_id = os.path.splitext(video_id)[0]
1875
1876             ext = determine_ext(video_url)
1877             if ext == 'smil':
1878                 entries.append({
1879                     'id': video_id,
1880                     'formats': self._extract_smil_formats(video_url, video_id),
1881                     'uploader': video_uploader,
1882                     'title': video_title,
1883                     'age_limit': age_limit,
1884                 })
1885             elif ext == 'xspf':
1886                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
1887             else:
1888                 entries.append({
1889                     'id': video_id,
1890                     'url': video_url,
1891                     'uploader': video_uploader,
1892                     'title': video_title,
1893                     'age_limit': age_limit,
1894                 })
1895
1896         if len(entries) == 1:
1897             return entries[0]
1898         else:
1899             for num, e in enumerate(entries, start=1):
1900                 # 'url' results don't have a title
1901                 if e.get('title') is not None:
1902                     e['title'] = '%s (%d)' % (e['title'], num)
1903             return {
1904                 '_type': 'playlist',
1905                 'entries': entries,
1906             }