_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse,
  12     compat_urlparse,
  13     compat_xml_parse_error,
  14 )
  15 from ..utils import (
  16     determine_ext,
  17     ExtractorError,
  18     float_or_none,
  19     HEADRequest,
  20     is_html,
  21     orderedSet,
  22     parse_xml,
  23     smuggle_url,
  24     unescapeHTML,
  25     unified_strdate,
  26     unsmuggle_url,
  27     UnsupportedError,
  28     url_basename,
  29     xpath_text,
  30 )
  31 from .brightcove import BrightcoveIE
  32 from .nbc import NBCSportsVPlayerIE
  33 from .ooyala import OoyalaIE
  34 from .rutv import RUTVIE
  35 from .smotri import SmotriIE
  36 from .condenast import CondeNastIE
  37 from .udn import UDNEmbedIE
  38
  39
  40 class GenericIE(InfoExtractor):
  41     IE_DESC = 'Generic downloader that works on some sites'
  42     _VALID_URL = r'.*'
  43     IE_NAME = 'generic'
  44     _TESTS = [
  45         {
  46             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
  47             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
  48             'info_dict': {
  49                 'id': '13601338388002',
  50                 'ext': 'mp4',
  51                 'uploader': 'www.hodiho.fr',
  52                 'title': 'R\u00e9gis plante sa Jeep',
  53             }
  54         },
  55         # bandcamp page with custom domain
  56         {
  57             'add_ie': ['Bandcamp'],
  58             'url': 'http://bronyrock.com/track/the-pony-mash',
  59             'info_dict': {
  60                 'id': '3235767654',
  61                 'ext': 'mp3',
  62                 'title': 'The Pony Mash',
  63                 'uploader': 'M_Pallante',
  64             },
  65             'skip': 'There is a limit of 200 free downloads / month for the test song',
  66         },
  67         # embedded brightcove video
  68         # it also tests brightcove videos that need to set the 'Referer' in the
  69         # http requests
  70         {
  71             'add_ie': ['Brightcove'],
  72             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
  73             'info_dict': {
  74                 'id': '2765128793001',
  75                 'ext': 'mp4',
  76                 'title': 'Le cours de bourse : l’analyse technique',
  77                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
  78                 'uploader': 'BFM BUSINESS',
  79             },
  80             'params': {
  81                 'skip_download': True,
  82             },
  83         },
  84         {
  85             # https://github.com/rg3/youtube-dl/issues/2253
  86             'url': 'http://bcove.me/i6nfkrc3',
  87             'md5': '0ba9446db037002366bab3b3eb30c88c',
  88             'info_dict': {
  89                 'id': '3101154703001',
  90                 'ext': 'mp4',
  91                 'title': 'Still no power',
  92                 'uploader': 'thestar.com',
  93                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
  94             },
  95             'add_ie': ['Brightcove'],
  96         },
  97         {
  98             'url': 'http://www.championat.com/video/football/v/87/87499.html',
  99             'md5': 'fb973ecf6e4a78a67453647444222983',
 100             'info_dict': {
 101                 'id': '3414141473001',
 102                 'ext': 'mp4',
 103                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 104                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 105                 'uploader': 'Championat',
 106             },
 107         },
 108         {
 109             # https://github.com/rg3/youtube-dl/issues/3541
 110             'add_ie': ['Brightcove'],
 111             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 112             'info_dict': {
 113                 'id': '3866516442001',
 114                 'ext': 'mp4',
 115                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 116                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 117                 'uploader': 'SBS Broadcasting',
 118             },
 119             'skip': 'Restricted to Netherlands',
 120             'params': {
 121                 'skip_download': True,  # m3u8 download
 122             },
 123         },
 124         # Direct link to a video
 125         {
 126             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
 127             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
 128             'info_dict': {
 129                 'id': 'trailer',
 130                 'ext': 'mp4',
 131                 'title': 'trailer',
 132                 'upload_date': '20100513',
 133             }
 134         },
 135         # ooyala video
 136         {
 137             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 138             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 139             'info_dict': {
 140                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 141                 'ext': 'mp4',
 142                 'title': '2cc213299525360.mov',  # that's what we get
 143             },
 144             'add_ie': ['Ooyala'],
 145         },
 146         # multiple ooyala embeds on SBN network websites
 147         {
 148             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 149             'info_dict': {
 150                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 151                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 152             },
 153             'playlist_mincount': 3,
 154             'params': {
 155                 'skip_download': True,
 156             },
 157             'add_ie': ['Ooyala'],
 158         },
 159         # google redirect
 160         {
 161             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 162             'info_dict': {
 163                 'id': 'cmQHVoWB5FY',
 164                 'ext': 'mp4',
 165                 'upload_date': '20130224',
 166                 'uploader_id': 'TheVerge',
 167                 'description': 're:^Chris Ziegler takes a look at the\.*',
 168                 'uploader': 'The Verge',
 169                 'title': 'First Firefox OS phones side-by-side',
 170             },
 171             'params': {
 172                 'skip_download': False,
 173             }
 174         },
 175         # embed.ly video
 176         {
 177             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 178             'info_dict': {
 179                 'id': '9ODmcdjQcHQ',
 180                 'ext': 'mp4',
 181                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 182                 'upload_date': '20140225',
 183                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 184                 'uploader': 'Tested',
 185                 'uploader_id': 'testedcom',
 186             },
 187             # No need to test YoutubeIE here
 188             'params': {
 189                 'skip_download': True,
 190             },
 191         },
 192         # funnyordie embed
 193         {
 194             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 195             'info_dict': {
 196                 'id': '18e820ec3f',
 197                 'ext': 'mp4',
 198                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 199                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 200             },
 201         },
 202         # BBC iPlayer embeds
 203         {
 204             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 205             'info_dict': {
 206                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 207             },
 208             'playlist_mincount': 18,
 209         },
 210         # RUTV embed
 211         {
 212             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 213             'info_dict': {
 214                 'id': '776940',
 215                 'ext': 'mp4',
 216                 'title': 'Охотское море стало целиком российским',
 217                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 218             },
 219             'params': {
 220                 # m3u8 download
 221                 'skip_download': True,
 222             },
 223         },
 224         # Embedded TED video
 225         {
 226             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 227             'md5': '65fdff94098e4a607385a60c5177c638',
 228             'info_dict': {
 229                 'id': '1969',
 230                 'ext': 'mp4',
 231                 'title': 'Hidden miracles of the natural world',
 232                 'uploader': 'Louie Schwartzberg',
 233                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 234             }
 235         },
 236         # Embeded Ustream video
 237         {
 238             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 239             'md5': '27b99cdb639c9b12a79bca876a073417',
 240             'info_dict': {
 241                 'id': '45734260',
 242                 'ext': 'flv',
 243                 'uploader': 'AU SPA:  The NSA and Privacy',
 244                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 245             }
 246         },
 247         # nowvideo embed hidden behind percent encoding
 248         {
 249             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 250             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 251             'info_dict': {
 252                 'id': '06e53103ca9aa',
 253                 'ext': 'flv',
 254                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 255                 'description': 'No description',
 256             },
 257         },
 258         # arte embed
 259         {
 260             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 261             'md5': '7653032cbb25bf6c80d80f217055fa43',
 262             'info_dict': {
 263                 'id': '048195-004_PLUS7-F',
 264                 'ext': 'flv',
 265                 'title': 'X:enius',
 266                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 267                 'upload_date': '20140320',
 268             },
 269             'params': {
 270                 'skip_download': 'Requires rtmpdump'
 271             }
 272         },
 273         # Condé Nast embed
 274         {
 275             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 276             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 277             'info_dict': {
 278                 'id': '53501be369702d3275860000',
 279                 'ext': 'mp4',
 280                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 281             }
 282         },
 283         # Dailymotion embed
 284         {
 285             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 286             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 287             'info_dict': {
 288                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 289                 'ext': 'mp4',
 290                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 291                 'uploader': 'Spi0n',
 292             },
 293             'add_ie': ['Dailymotion'],
 294         },
 295         # YouTube embed
 296         {
 297             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 298             'info_dict': {
 299                 'id': 'FXRb4ykk4S0',
 300                 'ext': 'mp4',
 301                 'title': 'The NBL Auction 2014',
 302                 'uploader': 'BADMINTON England',
 303                 'uploader_id': 'BADMINTONEvents',
 304                 'upload_date': '20140603',
 305                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 306             },
 307             'add_ie': ['Youtube'],
 308             'params': {
 309                 'skip_download': True,
 310             }
 311         },
 312         # MTVSercices embed
 313         {
 314             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 315             'md5': '35727f82f58c76d996fc188f9755b0d5',
 316             'info_dict': {
 317                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 318                 'ext': 'mp4',
 319                 'title': 'Review',
 320                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 321             },
 322         },
 323         # YouTube embed via <data-embed-url="">
 324         {
 325             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 326             'info_dict': {
 327                 'id': '4vAffPZIT44',
 328                 'ext': 'mp4',
 329                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 330                 'uploader': 'Gameloft',
 331                 'uploader_id': 'gameloft',
 332                 'upload_date': '20140828',
 333                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 334             },
 335             'params': {
 336                 'skip_download': True,
 337             }
 338         },
 339         # Camtasia studio
 340         {
 341             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 342             'playlist': [{
 343                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 344                 'info_dict': {
 345                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 346                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 347                     'ext': 'flv',
 348                     'duration': 2235.90,
 349                 }
 350             }, {
 351                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 352                 'info_dict': {
 353                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 354                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 355                     'ext': 'flv',
 356                     'duration': 2235.93,
 357                 }
 358             }],
 359             'info_dict': {
 360                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 361             }
 362         },
 363         # Flowplayer
 364         {
 365             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 366             'md5': '9d65602bf31c6e20014319c7d07fba27',
 367             'info_dict': {
 368                 'id': '5123ea6d5e5a7',
 369                 'ext': 'mp4',
 370                 'age_limit': 18,
 371                 'uploader': 'www.handjobhub.com',
 372                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 373             }
 374         },
 375         # RSS feed
 376         {
 377             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 378             'info_dict': {
 379                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 380                 'title': 'Zero Punctuation',
 381                 'description': 're:.*groundbreaking video review series.*'
 382             },
 383             'playlist_mincount': 11,
 384         },
 385         # Multiple brightcove videos
 386         # https://github.com/rg3/youtube-dl/issues/2283
 387         {
 388             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 389             'info_dict': {
 390                 'id': 'always-never',
 391                 'title': 'Always / Never - The New Yorker',
 392             },
 393             'playlist_count': 3,
 394             'params': {
 395                 'extract_flat': False,
 396                 'skip_download': True,
 397             }
 398         },
 399         # MLB embed
 400         {
 401             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 402             'md5': '96f09a37e44da40dd083e12d9a683327',
 403             'info_dict': {
 404                 'id': '33322633',
 405                 'ext': 'mp4',
 406                 'title': 'Ump changes call to ball',
 407                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 408                 'duration': 48,
 409                 'timestamp': 1401537900,
 410                 'upload_date': '20140531',
 411                 'thumbnail': 're:^https?://.*\.jpg$',
 412             },
 413         },
 414         # Wistia embed
 415         {
 416             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 417             'md5': '8788b683c777a5cf25621eaf286d0c23',
 418             'info_dict': {
 419                 'id': '1cfaf6b7ea',
 420                 'ext': 'mov',
 421                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 422                 'duration': 643.0,
 423                 'filesize': 182808282,
 424                 'uploader': 'education-portal.com',
 425             },
 426         },
 427         {
 428             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 429             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 430             'info_dict': {
 431                 'id': 'uxjb0lwrcz',
 432                 'ext': 'mp4',
 433                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 434                 'duration': 1715.0,
 435                 'uploader': 'thoughtworks.wistia.com',
 436             },
 437         },
 438         # Direct download with broken HEAD
 439         {
 440             'url': 'http://ai-radio.org:8000/radio.opus',
 441             'info_dict': {
 442                 'id': 'radio',
 443                 'ext': 'opus',
 444                 'title': 'radio',
 445             },
 446             'params': {
 447                 'skip_download': True,  # infinite live stream
 448             },
 449             'expected_warnings': [
 450                 r'501.*Not Implemented'
 451             ],
 452         },
 453         # Soundcloud embed
 454         {
 455             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 456             'info_dict': {
 457                 'id': '174391317',
 458                 'ext': 'mp3',
 459                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 460                 'uploader': 'Sophos Security',
 461                 'title': 'Chet Chat 171 - Oct 29, 2014',
 462                 'upload_date': '20141029',
 463             }
 464         },
 465         # Livestream embed
 466         {
 467             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 468             'info_dict': {
 469                 'id': '67864563',
 470                 'ext': 'flv',
 471                 'upload_date': '20141112',
 472                 'title': 'Rosetta #CometLanding webcast HL 10',
 473             }
 474         },
 475         # LazyYT
 476         {
 477             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 478             'info_dict': {
 479                 'id': '1986',
 480                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 481             },
 482             'playlist_mincount': 2,
 483         },
 484         # Direct link with incorrect MIME type
 485         {
 486             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 487             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 488             'info_dict': {
 489                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 490                 'id': '5_Lennart_Poettering_-_Systemd',
 491                 'ext': 'webm',
 492                 'title': '5_Lennart_Poettering_-_Systemd',
 493                 'upload_date': '20141120',
 494             },
 495             'expected_warnings': [
 496                 'URL could be a direct video link, returning it as such.'
 497             ]
 498         },
 499         # Cinchcast embed
 500         {
 501             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 502             'info_dict': {
 503                 'id': '7141703',
 504                 'ext': 'mp3',
 505                 'upload_date': '20141126',
 506                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 507             }
 508         },
 509         # Cinerama player
 510         {
 511             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 512             'info_dict': {
 513                 'id': '730m_DandD_1901_512k',
 514                 'ext': 'mp4',
 515                 'uploader': 'www.abc.net.au',
 516                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 517             }
 518         },
 519         # embedded viddler video
 520         {
 521             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 522             'info_dict': {
 523                 'id': '4d03aad9',
 524                 'ext': 'mp4',
 525                 'uploader': 'deadspin',
 526                 'title': 'WALL-TO-GORTAT',
 527                 'timestamp': 1422285291,
 528                 'upload_date': '20150126',
 529             },
 530             'add_ie': ['Viddler'],
 531         },
 532         # Libsyn embed
 533         {
 534             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 535             'info_dict': {
 536                 'id': '3377616',
 537                 'ext': 'mp3',
 538                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 539                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 540                 'upload_date': '20150220',
 541             },
 542         },
 543         # jwplayer YouTube
 544         {
 545             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 546             'info_dict': {
 547                 'id': 'Mrj4DVp2zeA',
 548                 'ext': 'mp4',
 549                 'upload_date': '20150212',
 550                 'uploader': 'The National Archives UK',
 551                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 552                 'uploader_id': 'NationalArchives08',
 553                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 554             },
 555         },
 556         # rtl.nl embed
 557         {
 558             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 559             'playlist_mincount': 5,
 560             'info_dict': {
 561                 'id': 'aanslagen-kopenhagen',
 562                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 563             }
 564         },
 565         # Zapiks embed
 566         {
 567             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 568             'info_dict': {
 569                 'id': '118046',
 570                 'ext': 'mp4',
 571                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 572             }
 573         },
 574         # Kaltura embed
 575         {
 576             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 577             'info_dict': {
 578                 'id': '1_eergr3h1',
 579                 'ext': 'mp4',
 580                 'upload_date': '20150226',
 581                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 582                 'timestamp': int,
 583                 'title': 'John Carlson Postgame 2/25/15',
 584             },
 585         },
 586         # Eagle.Platform embed (generic URL)
 587         {
 588             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 589             'info_dict': {
 590                 'id': '227304',
 591                 'ext': 'mp4',
 592                 'title': 'Навальный вышел на свободу',
 593                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 594                 'thumbnail': 're:^https?://.*\.jpg$',
 595                 'duration': 87,
 596                 'view_count': int,
 597                 'age_limit': 0,
 598             },
 599         },
 600         # ClipYou (Eagle.Platform) embed (custom URL)
 601         {
 602             'url': 'http://muz-tv.ru/play/7129/',
 603             'info_dict': {
 604                 'id': '12820',
 605                 'ext': 'mp4',
 606                 'title': "'O Sole Mio",
 607                 'thumbnail': 're:^https?://.*\.jpg$',
 608                 'duration': 216,
 609                 'view_count': int,
 610             },
 611         },
 612         # Pladform embed
 613         {
 614             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 615             'info_dict': {
 616                 'id': '100183293',
 617                 'ext': 'mp4',
 618                 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
 619                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 620                 'thumbnail': 're:^https?://.*\.jpg$',
 621                 'duration': 694,
 622                 'age_limit': 0,
 623             },
 624         },
 625         # Playwire embed
 626         {
 627             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 628             'info_dict': {
 629                 'id': '3519514',
 630                 'ext': 'mp4',
 631                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 632                 'thumbnail': 're:^https?://.*\.png$',
 633                 'duration': 45.115,
 634             },
 635         },
 636         # 5min embed
 637         {
 638             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 639             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 640             'info_dict': {
 641                 'id': '518726732',
 642                 'ext': 'mp4',
 643                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 644             },
 645         },
 646         # RSS feed with enclosure
 647         {
 648             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 649             'info_dict': {
 650                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 651                 'ext': 'm4v',
 652                 'upload_date': '20150228',
 653                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 654             }
 655         },
 656         # Crooks and Liars embed
 657         {
 658             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 659             'info_dict': {
 660                 'id': '8RUoRhRi',
 661                 'ext': 'mp4',
 662                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 663                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 664                 'timestamp': 1428207000,
 665                 'upload_date': '20150405',
 666                 'uploader': 'Heather',
 667             },
 668         },
 669         # Crooks and Liars external embed
 670         {
 671             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 672             'info_dict': {
 673                 'id': 'MTE3MjUtMzQ2MzA',
 674                 'ext': 'mp4',
 675                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 676                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 677                 'timestamp': 1265032391,
 678                 'upload_date': '20100201',
 679                 'uploader': 'Heather',
 680             },
 681         },
 682         # NBC Sports vplayer embed
 683         {
 684             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 685             'info_dict': {
 686                 'id': 'ln7x1qSThw4k',
 687                 'ext': 'flv',
 688                 'title': "PFT Live: New leader in the 'new-look' defense",
 689                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 690             },
 691         },
 692         # UDN embed
 693         {
 694             'url': 'http://www.udn.com/news/story/7314/822787',
 695             'md5': 'fd2060e988c326991037b9aff9df21a6',
 696             'info_dict': {
 697                 'id': '300346',
 698                 'ext': 'mp4',
 699                 'title': '中一中男師變性 全校師生力挺',
 700                 'thumbnail': 're:^https?://.*\.jpg$',
 701             }
 702         },
 703         # Ooyala embed
 704         {
 705             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 706             'info_dict': {
 707                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 708                 'ext': 'mp4',
 709                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 710                 'title': 'This is what separates the Excel masters from the wannabes',
 711             },
 712             'params': {
 713                 # m3u8 downloads
 714                 'skip_download': True,
 715             }
 716         }
 717     ]
 718
 719     def report_following_redirect(self, new_url):
 720         """Report information extraction."""
 721         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 722
 723     def _extract_rss(self, url, video_id, doc):
 724         playlist_title = doc.find('./channel/title').text
 725         playlist_desc_el = doc.find('./channel/description')
 726         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 727
 728         entries = []
 729         for it in doc.findall('./channel/item'):
 730             next_url = xpath_text(it, 'link', fatal=False)
 731             if not next_url:
 732                 enclosure_nodes = it.findall('./enclosure')
 733                 for e in enclosure_nodes:
 734                     next_url = e.attrib.get('url')
 735                     if next_url:
 736                         break
 737
 738             if not next_url:
 739                 continue
 740
 741             entries.append({
 742                 '_type': 'url',
 743                 'url': next_url,
 744                 'title': it.find('title').text,
 745             })
 746
 747         return {
 748             '_type': 'playlist',
 749             'id': url,
 750             'title': playlist_title,
 751             'description': playlist_desc,
 752             'entries': entries,
 753         }
 754
 755     def _extract_camtasia(self, url, video_id, webpage):
 756         """ Returns None if no camtasia video can be found. """
 757
 758         camtasia_cfg = self._search_regex(
 759             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 760             webpage, 'camtasia configuration file', default=None)
 761         if camtasia_cfg is None:
 762             return None
 763
 764         title = self._html_search_meta('DC.title', webpage, fatal=True)
 765
 766         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 767         camtasia_cfg = self._download_xml(
 768             camtasia_url, video_id,
 769             note='Downloading camtasia configuration',
 770             errnote='Failed to download camtasia configuration')
 771         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 772
 773         entries = []
 774         for n in fileset_node.getchildren():
 775             url_n = n.find('./uri')
 776             if url_n is None:
 777                 continue
 778
 779             entries.append({
 780                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 781                 'title': '%s - %s' % (title, n.tag),
 782                 'url': compat_urlparse.urljoin(url, url_n.text),
 783                 'duration': float_or_none(n.find('./duration').text),
 784             })
 785
 786         return {
 787             '_type': 'playlist',
 788             'entries': entries,
 789             'title': title,
 790         }
 791
 792     def _real_extract(self, url):
 793         if url.startswith('//'):
 794             return {
 795                 '_type': 'url',
 796                 'url': self.http_scheme() + url,
 797             }
 798
 799         parsed_url = compat_urlparse.urlparse(url)
 800         if not parsed_url.scheme:
 801             default_search = self._downloader.params.get('default_search')
 802             if default_search is None:
 803                 default_search = 'fixup_error'
 804
 805             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 806                 if '/' in url:
 807                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 808                     return self.url_result('http://' + url)
 809                 elif default_search != 'fixup_error':
 810                     if default_search == 'auto_warning':
 811                         if re.match(r'^(?:url|URL)$', url):
 812                             raise ExtractorError(
 813                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 814                                 expected=True)
 815                         else:
 816                             self._downloader.report_warning(
 817                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 818                     return self.url_result('ytsearch:' + url)
 819
 820             if default_search in ('error', 'fixup_error'):
 821                 raise ExtractorError(
 822                     '%r is not a valid URL. '
 823                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 824                     % (url, url), expected=True)
 825             else:
 826                 if ':' not in default_search:
 827                     default_search += ':'
 828                 return self.url_result(default_search + url)
 829
 830         url, smuggled_data = unsmuggle_url(url)
 831         force_videoid = None
 832         is_intentional = smuggled_data and smuggled_data.get('to_generic')
 833         if smuggled_data and 'force_videoid' in smuggled_data:
 834             force_videoid = smuggled_data['force_videoid']
 835             video_id = force_videoid
 836         else:
 837             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
 838
 839         self.to_screen('%s: Requesting header' % video_id)
 840
 841         head_req = HEADRequest(url)
 842         head_response = self._request_webpage(
 843             head_req, video_id,
 844             note=False, errnote='Could not send HEAD request to %s' % url,
 845             fatal=False)
 846
 847         if head_response is not False:
 848             # Check for redirect
 849             new_url = head_response.geturl()
 850             if url != new_url:
 851                 self.report_following_redirect(new_url)
 852                 if force_videoid:
 853                     new_url = smuggle_url(
 854                         new_url, {'force_videoid': force_videoid})
 855                 return self.url_result(new_url)
 856
 857         full_response = None
 858         if head_response is False:
 859             full_response = self._request_webpage(url, video_id)
 860             head_response = full_response
 861
 862         # Check for direct link to a video
 863         content_type = head_response.headers.get('Content-Type', '')
 864         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
 865         if m:
 866             upload_date = unified_strdate(
 867                 head_response.headers.get('Last-Modified'))
 868             return {
 869                 'id': video_id,
 870                 'title': os.path.splitext(url_basename(url))[0],
 871                 'direct': True,
 872                 'formats': [{
 873                     'format_id': m.group('format_id'),
 874                     'url': url,
 875                     'vcodec': 'none' if m.group('type') == 'audio' else None
 876                 }],
 877                 'upload_date': upload_date,
 878             }
 879
 880         if not self._downloader.params.get('test', False) and not is_intentional:
 881             self._downloader.report_warning('Falling back on generic information extractor.')
 882
 883         if not full_response:
 884             full_response = self._request_webpage(url, video_id)
 885
 886         # Maybe it's a direct link to a video?
 887         # Be careful not to download the whole thing!
 888         first_bytes = full_response.read(512)
 889         if not is_html(first_bytes):
 890             self._downloader.report_warning(
 891                 'URL could be a direct video link, returning it as such.')
 892             upload_date = unified_strdate(
 893                 head_response.headers.get('Last-Modified'))
 894             return {
 895                 'id': video_id,
 896                 'title': os.path.splitext(url_basename(url))[0],
 897                 'direct': True,
 898                 'url': url,
 899                 'upload_date': upload_date,
 900             }
 901
 902         webpage = self._webpage_read_content(
 903             full_response, url, video_id, prefix=first_bytes)
 904
 905         self.report_extraction(video_id)
 906
 907         # Is it an RSS feed?
 908         try:
 909             doc = parse_xml(webpage)
 910             if doc.tag == 'rss':
 911                 return self._extract_rss(url, video_id, doc)
 912         except compat_xml_parse_error:
 913             pass
 914
 915         # Is it a Camtasia project?
 916         camtasia_res = self._extract_camtasia(url, video_id, webpage)
 917         if camtasia_res is not None:
 918             return camtasia_res
 919
 920         # Sometimes embedded video player is hidden behind percent encoding
 921         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
 922         # Unescaping the whole page allows to handle those cases in a generic way
 923         webpage = compat_urllib_parse.unquote(webpage)
 924
 925         # it's tempting to parse this further, but you would
 926         # have to take into account all the variations like
 927         #   Video Title - Site Name
 928         #   Site Name | Video Title
 929         #   Video Title - Tagline | Site Name
 930         # and so on and so forth; it's just not practical
 931         video_title = self._html_search_regex(
 932             r'(?s)<title>(.*?)</title>', webpage, 'video title',
 933             default='video')
 934
 935         # Try to detect age limit automatically
 936         age_limit = self._rta_search(webpage)
 937         # And then there are the jokers who advertise that they use RTA,
 938         # but actually don't.
 939         AGE_LIMIT_MARKERS = [
 940             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
 941         ]
 942         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
 943             age_limit = 18
 944
 945         # video uploader is domain name
 946         video_uploader = self._search_regex(
 947             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 948
 949         # Helper method
 950         def _playlist_from_matches(matches, getter=None, ie=None):
 951             urlrs = orderedSet(
 952                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
 953                 for m in matches)
 954             return self.playlist_result(
 955                 urlrs, playlist_id=video_id, playlist_title=video_title)
 956
 957         # Look for BrightCove:
 958         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
 959         if bc_urls:
 960             self.to_screen('Brightcove video detected.')
 961             entries = [{
 962                 '_type': 'url',
 963                 'url': smuggle_url(bc_url, {'Referer': url}),
 964                 'ie_key': 'Brightcove'
 965             } for bc_url in bc_urls]
 966
 967             return {
 968                 '_type': 'playlist',
 969                 'title': video_title,
 970                 'id': video_id,
 971                 'entries': entries,
 972             }
 973
 974         # Look for embedded rtl.nl player
 975         matches = re.findall(
 976             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
 977             webpage)
 978         if matches:
 979             return _playlist_from_matches(matches, ie='RtlNl')
 980
 981         # Look for embedded (iframe) Vimeo player
 982         mobj = re.search(
 983             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
 984         if mobj:
 985             player_url = unescapeHTML(mobj.group('url'))
 986             surl = smuggle_url(player_url, {'Referer': url})
 987             return self.url_result(surl)
 988         # Look for embedded (swf embed) Vimeo player
 989         mobj = re.search(
 990             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
 991         if mobj:
 992             return self.url_result(mobj.group(1))
 993
 994         # Look for embedded YouTube player
 995         matches = re.findall(r'''(?x)
 996             (?:
 997                 <iframe[^>]+?src=|
 998                 data-video-url=|
 999                 <embed[^>]+?src=|
1000                 embedSWF\(?:\s*|
1001                 new\s+SWFObject\(
1002             )
1003             (["\'])
1004                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1005                 (?:embed|v|p)/.+?)
1006             \1''', webpage)
1007         if matches:
1008             return _playlist_from_matches(
1009                 matches, lambda m: unescapeHTML(m[1]))
1010
1011         # Look for lazyYT YouTube embed
1012         matches = re.findall(
1013             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1014         if matches:
1015             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1016
1017         # Look for embedded Dailymotion player
1018         matches = re.findall(
1019             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1020         if matches:
1021             return _playlist_from_matches(
1022                 matches, lambda m: unescapeHTML(m[1]))
1023
1024         # Look for embedded Dailymotion playlist player (#3822)
1025         m = re.search(
1026             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1027         if m:
1028             playlists = re.findall(
1029                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1030             if playlists:
1031                 return _playlist_from_matches(
1032                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1033
1034         # Look for embedded Wistia player
1035         match = re.search(
1036             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1037         if match:
1038             embed_url = self._proto_relative_url(
1039                 unescapeHTML(match.group('url')))
1040             return {
1041                 '_type': 'url_transparent',
1042                 'url': embed_url,
1043                 'ie_key': 'Wistia',
1044                 'uploader': video_uploader,
1045                 'title': video_title,
1046                 'id': video_id,
1047             }
1048
1049         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1050         if match:
1051             return {
1052                 '_type': 'url_transparent',
1053                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1054                 'ie_key': 'Wistia',
1055                 'uploader': video_uploader,
1056                 'title': video_title,
1057                 'id': match.group('id')
1058             }
1059
1060         # Look for embedded blip.tv player
1061         mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
1062         if mobj:
1063             return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
1064         mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
1065         if mobj:
1066             return self.url_result(mobj.group(1), 'BlipTV')
1067
1068         # Look for embedded condenast player
1069         matches = re.findall(
1070             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1071             webpage)
1072         if matches:
1073             return {
1074                 '_type': 'playlist',
1075                 'entries': [{
1076                     '_type': 'url',
1077                     'ie_key': 'CondeNast',
1078                     'url': ma,
1079                 } for ma in matches],
1080                 'title': video_title,
1081                 'id': video_id,
1082             }
1083
1084         # Look for Bandcamp pages with custom domain
1085         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1086         if mobj is not None:
1087             burl = unescapeHTML(mobj.group(1))
1088             # Don't set the extractor because it can be a track url or an album
1089             return self.url_result(burl)
1090
1091         # Look for embedded Vevo player
1092         mobj = re.search(
1093             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1094         if mobj is not None:
1095             return self.url_result(mobj.group('url'))
1096
1097         # Look for embedded Viddler player
1098         mobj = re.search(
1099             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1100             webpage)
1101         if mobj is not None:
1102             return self.url_result(mobj.group('url'))
1103
1104         # Look for NYTimes player
1105         mobj = re.search(
1106             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1107             webpage)
1108         if mobj is not None:
1109             return self.url_result(mobj.group('url'))
1110
1111         # Look for Libsyn player
1112         mobj = re.search(
1113             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1114         if mobj is not None:
1115             return self.url_result(mobj.group('url'))
1116
1117         # Look for Ooyala videos
1118         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1119                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1120                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1121                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1122         if mobj is not None:
1123             return OoyalaIE._build_url_result(mobj.group('ec'))
1124
1125         # Look for multiple Ooyala embeds on SBN network websites
1126         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1127         if mobj is not None:
1128             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1129             if embeds:
1130                 return _playlist_from_matches(
1131                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1132
1133         # Look for Aparat videos
1134         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1135         if mobj is not None:
1136             return self.url_result(mobj.group(1), 'Aparat')
1137
1138         # Look for MPORA videos
1139         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1140         if mobj is not None:
1141             return self.url_result(mobj.group(1), 'Mpora')
1142
1143         # Look for embedded NovaMov-based player
1144         mobj = re.search(
1145             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1146                     (?P<url>http://(?:(?:embed|www)\.)?
1147                         (?:novamov\.com|
1148                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1149                            videoweed\.(?:es|com)|
1150                            movshare\.(?:net|sx|ag)|
1151                            divxstage\.(?:eu|net|ch|co|at|ag))
1152                         /embed\.php.+?)\1''', webpage)
1153         if mobj is not None:
1154             return self.url_result(mobj.group('url'))
1155
1156         # Look for embedded Facebook player
1157         mobj = re.search(
1158             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1159         if mobj is not None:
1160             return self.url_result(mobj.group('url'), 'Facebook')
1161
1162         # Look for embedded VK player
1163         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1164         if mobj is not None:
1165             return self.url_result(mobj.group('url'), 'VK')
1166
1167         # Look for embedded ivi player
1168         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1169         if mobj is not None:
1170             return self.url_result(mobj.group('url'), 'Ivi')
1171
1172         # Look for embedded Huffington Post player
1173         mobj = re.search(
1174             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1175         if mobj is not None:
1176             return self.url_result(mobj.group('url'), 'HuffPost')
1177
1178         # Look for embed.ly
1179         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1180         if mobj is not None:
1181             return self.url_result(mobj.group('url'))
1182         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1183         if mobj is not None:
1184             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1185
1186         # Look for funnyordie embed
1187         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1188         if matches:
1189             return _playlist_from_matches(
1190                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1191
1192         # Look for BBC iPlayer embed
1193         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1194         if matches:
1195             return _playlist_from_matches(matches, ie='BBCCoUk')
1196
1197         # Look for embedded RUTV player
1198         rutv_url = RUTVIE._extract_url(webpage)
1199         if rutv_url:
1200             return self.url_result(rutv_url, 'RUTV')
1201
1202         # Look for embedded TED player
1203         mobj = re.search(
1204             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1205         if mobj is not None:
1206             return self.url_result(mobj.group('url'), 'TED')
1207
1208         # Look for embedded Ustream videos
1209         mobj = re.search(
1210             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1211         if mobj is not None:
1212             return self.url_result(mobj.group('url'), 'Ustream')
1213
1214         # Look for embedded arte.tv player
1215         mobj = re.search(
1216             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1217             webpage)
1218         if mobj is not None:
1219             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1220
1221         # Look for embedded smotri.com player
1222         smotri_url = SmotriIE._extract_url(webpage)
1223         if smotri_url:
1224             return self.url_result(smotri_url, 'Smotri')
1225
1226         # Look for embeded soundcloud player
1227         mobj = re.search(
1228             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1229             webpage)
1230         if mobj is not None:
1231             url = unescapeHTML(mobj.group('url'))
1232             return self.url_result(url)
1233
1234         # Look for embedded vulture.com player
1235         mobj = re.search(
1236             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1237             webpage)
1238         if mobj is not None:
1239             url = unescapeHTML(mobj.group('url'))
1240             return self.url_result(url, ie='Vulture')
1241
1242         # Look for embedded mtvservices player
1243         mobj = re.search(
1244             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1245             webpage)
1246         if mobj is not None:
1247             url = unescapeHTML(mobj.group('url'))
1248             return self.url_result(url, ie='MTVServicesEmbedded')
1249
1250         # Look for embedded yahoo player
1251         mobj = re.search(
1252             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1253             webpage)
1254         if mobj is not None:
1255             return self.url_result(mobj.group('url'), 'Yahoo')
1256
1257         # Look for embedded sbs.com.au player
1258         mobj = re.search(
1259             r'''(?x)
1260             (?:
1261                 <meta\s+property="og:video"\s+content=|
1262                 <iframe[^>]+?src=
1263             )
1264             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1265             webpage)
1266         if mobj is not None:
1267             return self.url_result(mobj.group('url'), 'SBS')
1268
1269         # Look for embedded Cinchcast player
1270         mobj = re.search(
1271             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1272             webpage)
1273         if mobj is not None:
1274             return self.url_result(mobj.group('url'), 'Cinchcast')
1275
1276         mobj = re.search(
1277             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1278             webpage)
1279         if mobj is not None:
1280             return self.url_result(mobj.group('url'), 'MLB')
1281
1282         mobj = re.search(
1283             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1284             webpage)
1285         if mobj is not None:
1286             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1287
1288         mobj = re.search(
1289             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1290             webpage)
1291         if mobj is not None:
1292             return self.url_result(mobj.group('url'), 'Livestream')
1293
1294         # Look for Zapiks embed
1295         mobj = re.search(
1296             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1297         if mobj is not None:
1298             return self.url_result(mobj.group('url'), 'Zapiks')
1299
1300         # Look for Kaltura embeds
1301         mobj = re.search(
1302             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1303         if mobj is not None:
1304             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1305
1306         # Look for Eagle.Platform embeds
1307         mobj = re.search(
1308             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1309         if mobj is not None:
1310             return self.url_result(mobj.group('url'), 'EaglePlatform')
1311
1312         # Look for ClipYou (uses Eagle.Platform) embeds
1313         mobj = re.search(
1314             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1315         if mobj is not None:
1316             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1317
1318         # Look for Pladform embeds
1319         mobj = re.search(
1320             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1321         if mobj is not None:
1322             return self.url_result(mobj.group('url'), 'Pladform')
1323
1324         # Look for Playwire embeds
1325         mobj = re.search(
1326             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1327         if mobj is not None:
1328             return self.url_result(mobj.group('url'))
1329
1330         # Look for 5min embeds
1331         mobj = re.search(
1332             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1333         if mobj is not None:
1334             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1335
1336         # Look for Crooks and Liars embeds
1337         mobj = re.search(
1338             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1339         if mobj is not None:
1340             return self.url_result(mobj.group('url'))
1341
1342         # Look for NBC Sports VPlayer embeds
1343         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1344         if nbc_sports_url:
1345             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1346
1347         # Look for UDN embeds
1348         mobj = re.search(
1349             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1350         if mobj is not None:
1351             return self.url_result(
1352                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1353
1354         def check_video(vurl):
1355             if YoutubeIE.suitable(vurl):
1356                 return True
1357             vpath = compat_urlparse.urlparse(vurl).path
1358             vext = determine_ext(vpath)
1359             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1360
1361         def filter_video(urls):
1362             return list(filter(check_video, urls))
1363
1364         # Start with something easy: JW Player in SWFObject
1365         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1366         if not found:
1367             # Look for gorilla-vid style embedding
1368             found = filter_video(re.findall(r'''(?sx)
1369                 (?:
1370                     jw_plugins|
1371                     JWPlayerOptions|
1372                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1373                 )
1374                 .*?
1375                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1376         if not found:
1377             # Broaden the search a little bit
1378             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1379         if not found:
1380             # Broaden the findall a little bit: JWPlayer JS loader
1381             found = filter_video(re.findall(
1382                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1383         if not found:
1384             # Flow player
1385             found = filter_video(re.findall(r'''(?xs)
1386                 flowplayer\("[^"]+",\s*
1387                     \{[^}]+?\}\s*,
1388                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1389                         ["']?url["']?\s*:\s*["']([^"']+)["']
1390             ''', webpage))
1391         if not found:
1392             # Cinerama player
1393             found = re.findall(
1394                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1395         if not found:
1396             # Try to find twitter cards info
1397             found = filter_video(re.findall(
1398                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1399         if not found:
1400             # We look for Open Graph info:
1401             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1402             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1403             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1404             if m_video_type is not None:
1405                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1406         if not found:
1407             # HTML5 video
1408             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1409         if not found:
1410             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1411             found = re.search(
1412                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1413                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1414                 webpage)
1415             if not found:
1416                 # Look also in Refresh HTTP header
1417                 refresh_header = head_response.headers.get('Refresh')
1418                 if refresh_header:
1419                     found = re.search(REDIRECT_REGEX, refresh_header)
1420             if found:
1421                 new_url = found.group(1)
1422                 self.report_following_redirect(new_url)
1423                 return {
1424                     '_type': 'url',
1425                     'url': new_url,
1426                 }
1427         if not found:
1428             raise UnsupportedError(url)
1429
1430         entries = []
1431         for video_url in found:
1432             video_url = compat_urlparse.urljoin(url, video_url)
1433             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1434
1435             # Sometimes, jwplayer extraction will result in a YouTube URL
1436             if YoutubeIE.suitable(video_url):
1437                 entries.append(self.url_result(video_url, 'Youtube'))
1438                 continue
1439
1440             # here's a fun little line of code for you:
1441             video_id = os.path.splitext(video_id)[0]
1442
1443             entries.append({
1444                 'id': video_id,
1445                 'url': video_url,
1446                 'uploader': video_uploader,
1447                 'title': video_title,
1448                 'age_limit': age_limit,
1449             })
1450
1451         if len(entries) == 1:
1452             return entries[0]
1453         else:
1454             for num, e in enumerate(entries, start=1):
1455                 # 'url' results don't have a title
1456                 if e.get('title') is not None:
1457                     e['title'] = '%s (%d)' % (e['title'], num)
1458             return {
1459                 '_type': 'playlist',
1460                 'entries': entries,
1461             }