_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse,
  12     compat_urlparse,
  13     compat_xml_parse_error,
  14 )
  15 from ..utils import (
  16     determine_ext,
  17     ExtractorError,
  18     float_or_none,
  19     HEADRequest,
  20     is_html,
  21     orderedSet,
  22     parse_xml,
  23     smuggle_url,
  24     unescapeHTML,
  25     unified_strdate,
  26     unsmuggle_url,
  27     UnsupportedError,
  28     url_basename,
  29     xpath_text,
  30 )
  31 from .brightcove import BrightcoveIE
  32 from .nbc import NBCSportsVPlayerIE
  33 from .ooyala import OoyalaIE
  34 from .rutv import RUTVIE
  35 from .smotri import SmotriIE
  36 from .condenast import CondeNastIE
  37 from .udn import UDNEmbedIE
  38 from .senateisvp import SenateISVPIE
  39 from .bliptv import BlipTVIE
  40
  41
  42 class GenericIE(InfoExtractor):
  43     IE_DESC = 'Generic downloader that works on some sites'
  44     _VALID_URL = r'.*'
  45     IE_NAME = 'generic'
  46     _TESTS = [
  47         {
  48             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
  49             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
  50             'info_dict': {
  51                 'id': '13601338388002',
  52                 'ext': 'mp4',
  53                 'uploader': 'www.hodiho.fr',
  54                 'title': 'R\u00e9gis plante sa Jeep',
  55             }
  56         },
  57         # bandcamp page with custom domain
  58         {
  59             'add_ie': ['Bandcamp'],
  60             'url': 'http://bronyrock.com/track/the-pony-mash',
  61             'info_dict': {
  62                 'id': '3235767654',
  63                 'ext': 'mp3',
  64                 'title': 'The Pony Mash',
  65                 'uploader': 'M_Pallante',
  66             },
  67             'skip': 'There is a limit of 200 free downloads / month for the test song',
  68         },
  69         # embedded brightcove video
  70         # it also tests brightcove videos that need to set the 'Referer' in the
  71         # http requests
  72         {
  73             'add_ie': ['Brightcove'],
  74             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
  75             'info_dict': {
  76                 'id': '2765128793001',
  77                 'ext': 'mp4',
  78                 'title': 'Le cours de bourse : l’analyse technique',
  79                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
  80                 'uploader': 'BFM BUSINESS',
  81             },
  82             'params': {
  83                 'skip_download': True,
  84             },
  85         },
  86         {
  87             # https://github.com/rg3/youtube-dl/issues/2253
  88             'url': 'http://bcove.me/i6nfkrc3',
  89             'md5': '0ba9446db037002366bab3b3eb30c88c',
  90             'info_dict': {
  91                 'id': '3101154703001',
  92                 'ext': 'mp4',
  93                 'title': 'Still no power',
  94                 'uploader': 'thestar.com',
  95                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
  96             },
  97             'add_ie': ['Brightcove'],
  98         },
  99         {
 100             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 101             'md5': 'fb973ecf6e4a78a67453647444222983',
 102             'info_dict': {
 103                 'id': '3414141473001',
 104                 'ext': 'mp4',
 105                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 106                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 107                 'uploader': 'Championat',
 108             },
 109         },
 110         {
 111             # https://github.com/rg3/youtube-dl/issues/3541
 112             'add_ie': ['Brightcove'],
 113             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 114             'info_dict': {
 115                 'id': '3866516442001',
 116                 'ext': 'mp4',
 117                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 118                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 119                 'uploader': 'SBS Broadcasting',
 120             },
 121             'skip': 'Restricted to Netherlands',
 122             'params': {
 123                 'skip_download': True,  # m3u8 download
 124             },
 125         },
 126         # Direct link to a video
 127         {
 128             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
 129             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
 130             'info_dict': {
 131                 'id': 'trailer',
 132                 'ext': 'mp4',
 133                 'title': 'trailer',
 134                 'upload_date': '20100513',
 135             }
 136         },
 137         # ooyala video
 138         {
 139             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 140             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 141             'info_dict': {
 142                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 143                 'ext': 'mp4',
 144                 'title': '2cc213299525360.mov',  # that's what we get
 145             },
 146             'add_ie': ['Ooyala'],
 147         },
 148         # multiple ooyala embeds on SBN network websites
 149         {
 150             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 151             'info_dict': {
 152                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 153                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 154             },
 155             'playlist_mincount': 3,
 156             'params': {
 157                 'skip_download': True,
 158             },
 159             'add_ie': ['Ooyala'],
 160         },
 161         # google redirect
 162         {
 163             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 164             'info_dict': {
 165                 'id': 'cmQHVoWB5FY',
 166                 'ext': 'mp4',
 167                 'upload_date': '20130224',
 168                 'uploader_id': 'TheVerge',
 169                 'description': 're:^Chris Ziegler takes a look at the\.*',
 170                 'uploader': 'The Verge',
 171                 'title': 'First Firefox OS phones side-by-side',
 172             },
 173             'params': {
 174                 'skip_download': False,
 175             }
 176         },
 177         # embed.ly video
 178         {
 179             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 180             'info_dict': {
 181                 'id': '9ODmcdjQcHQ',
 182                 'ext': 'mp4',
 183                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 184                 'upload_date': '20140225',
 185                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 186                 'uploader': 'Tested',
 187                 'uploader_id': 'testedcom',
 188             },
 189             # No need to test YoutubeIE here
 190             'params': {
 191                 'skip_download': True,
 192             },
 193         },
 194         # funnyordie embed
 195         {
 196             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 197             'info_dict': {
 198                 'id': '18e820ec3f',
 199                 'ext': 'mp4',
 200                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 201                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 202             },
 203         },
 204         # BBC iPlayer embeds
 205         {
 206             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 207             'info_dict': {
 208                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 209             },
 210             'playlist_mincount': 18,
 211         },
 212         # RUTV embed
 213         {
 214             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 215             'info_dict': {
 216                 'id': '776940',
 217                 'ext': 'mp4',
 218                 'title': 'Охотское море стало целиком российским',
 219                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 220             },
 221             'params': {
 222                 # m3u8 download
 223                 'skip_download': True,
 224             },
 225         },
 226         # Embedded TED video
 227         {
 228             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 229             'md5': '65fdff94098e4a607385a60c5177c638',
 230             'info_dict': {
 231                 'id': '1969',
 232                 'ext': 'mp4',
 233                 'title': 'Hidden miracles of the natural world',
 234                 'uploader': 'Louie Schwartzberg',
 235                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 236             }
 237         },
 238         # Embeded Ustream video
 239         {
 240             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 241             'md5': '27b99cdb639c9b12a79bca876a073417',
 242             'info_dict': {
 243                 'id': '45734260',
 244                 'ext': 'flv',
 245                 'uploader': 'AU SPA:  The NSA and Privacy',
 246                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 247             }
 248         },
 249         # nowvideo embed hidden behind percent encoding
 250         {
 251             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 252             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 253             'info_dict': {
 254                 'id': '06e53103ca9aa',
 255                 'ext': 'flv',
 256                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 257                 'description': 'No description',
 258             },
 259         },
 260         # arte embed
 261         {
 262             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 263             'md5': '7653032cbb25bf6c80d80f217055fa43',
 264             'info_dict': {
 265                 'id': '048195-004_PLUS7-F',
 266                 'ext': 'flv',
 267                 'title': 'X:enius',
 268                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 269                 'upload_date': '20140320',
 270             },
 271             'params': {
 272                 'skip_download': 'Requires rtmpdump'
 273             }
 274         },
 275         # Condé Nast embed
 276         {
 277             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 278             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 279             'info_dict': {
 280                 'id': '53501be369702d3275860000',
 281                 'ext': 'mp4',
 282                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 283             }
 284         },
 285         # Dailymotion embed
 286         {
 287             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 288             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 289             'info_dict': {
 290                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 291                 'ext': 'mp4',
 292                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 293                 'uploader': 'Spi0n',
 294             },
 295             'add_ie': ['Dailymotion'],
 296         },
 297         # YouTube embed
 298         {
 299             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 300             'info_dict': {
 301                 'id': 'FXRb4ykk4S0',
 302                 'ext': 'mp4',
 303                 'title': 'The NBL Auction 2014',
 304                 'uploader': 'BADMINTON England',
 305                 'uploader_id': 'BADMINTONEvents',
 306                 'upload_date': '20140603',
 307                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 308             },
 309             'add_ie': ['Youtube'],
 310             'params': {
 311                 'skip_download': True,
 312             }
 313         },
 314         # MTVSercices embed
 315         {
 316             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 317             'md5': '35727f82f58c76d996fc188f9755b0d5',
 318             'info_dict': {
 319                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 320                 'ext': 'mp4',
 321                 'title': 'Review',
 322                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 323             },
 324         },
 325         # YouTube embed via <data-embed-url="">
 326         {
 327             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 328             'info_dict': {
 329                 'id': '4vAffPZIT44',
 330                 'ext': 'mp4',
 331                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 332                 'uploader': 'Gameloft',
 333                 'uploader_id': 'gameloft',
 334                 'upload_date': '20140828',
 335                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 336             },
 337             'params': {
 338                 'skip_download': True,
 339             }
 340         },
 341         # Camtasia studio
 342         {
 343             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 344             'playlist': [{
 345                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 346                 'info_dict': {
 347                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 348                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 349                     'ext': 'flv',
 350                     'duration': 2235.90,
 351                 }
 352             }, {
 353                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 354                 'info_dict': {
 355                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 356                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 357                     'ext': 'flv',
 358                     'duration': 2235.93,
 359                 }
 360             }],
 361             'info_dict': {
 362                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 363             }
 364         },
 365         # Flowplayer
 366         {
 367             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 368             'md5': '9d65602bf31c6e20014319c7d07fba27',
 369             'info_dict': {
 370                 'id': '5123ea6d5e5a7',
 371                 'ext': 'mp4',
 372                 'age_limit': 18,
 373                 'uploader': 'www.handjobhub.com',
 374                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 375             }
 376         },
 377         # RSS feed
 378         {
 379             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 380             'info_dict': {
 381                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 382                 'title': 'Zero Punctuation',
 383                 'description': 're:.*groundbreaking video review series.*'
 384             },
 385             'playlist_mincount': 11,
 386         },
 387         # Multiple brightcove videos
 388         # https://github.com/rg3/youtube-dl/issues/2283
 389         {
 390             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 391             'info_dict': {
 392                 'id': 'always-never',
 393                 'title': 'Always / Never - The New Yorker',
 394             },
 395             'playlist_count': 3,
 396             'params': {
 397                 'extract_flat': False,
 398                 'skip_download': True,
 399             }
 400         },
 401         # MLB embed
 402         {
 403             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 404             'md5': '96f09a37e44da40dd083e12d9a683327',
 405             'info_dict': {
 406                 'id': '33322633',
 407                 'ext': 'mp4',
 408                 'title': 'Ump changes call to ball',
 409                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 410                 'duration': 48,
 411                 'timestamp': 1401537900,
 412                 'upload_date': '20140531',
 413                 'thumbnail': 're:^https?://.*\.jpg$',
 414             },
 415         },
 416         # Wistia embed
 417         {
 418             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 419             'md5': '8788b683c777a5cf25621eaf286d0c23',
 420             'info_dict': {
 421                 'id': '1cfaf6b7ea',
 422                 'ext': 'mov',
 423                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 424                 'duration': 643.0,
 425                 'filesize': 182808282,
 426                 'uploader': 'education-portal.com',
 427             },
 428         },
 429         {
 430             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 431             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 432             'info_dict': {
 433                 'id': 'uxjb0lwrcz',
 434                 'ext': 'mp4',
 435                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 436                 'duration': 1715.0,
 437                 'uploader': 'thoughtworks.wistia.com',
 438             },
 439         },
 440         # Direct download with broken HEAD
 441         {
 442             'url': 'http://ai-radio.org:8000/radio.opus',
 443             'info_dict': {
 444                 'id': 'radio',
 445                 'ext': 'opus',
 446                 'title': 'radio',
 447             },
 448             'params': {
 449                 'skip_download': True,  # infinite live stream
 450             },
 451             'expected_warnings': [
 452                 r'501.*Not Implemented'
 453             ],
 454         },
 455         # Soundcloud embed
 456         {
 457             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 458             'info_dict': {
 459                 'id': '174391317',
 460                 'ext': 'mp3',
 461                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 462                 'uploader': 'Sophos Security',
 463                 'title': 'Chet Chat 171 - Oct 29, 2014',
 464                 'upload_date': '20141029',
 465             }
 466         },
 467         # Livestream embed
 468         {
 469             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 470             'info_dict': {
 471                 'id': '67864563',
 472                 'ext': 'flv',
 473                 'upload_date': '20141112',
 474                 'title': 'Rosetta #CometLanding webcast HL 10',
 475             }
 476         },
 477         # LazyYT
 478         {
 479             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 480             'info_dict': {
 481                 'id': '1986',
 482                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 483             },
 484             'playlist_mincount': 2,
 485         },
 486         # Direct link with incorrect MIME type
 487         {
 488             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 489             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 490             'info_dict': {
 491                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 492                 'id': '5_Lennart_Poettering_-_Systemd',
 493                 'ext': 'webm',
 494                 'title': '5_Lennart_Poettering_-_Systemd',
 495                 'upload_date': '20141120',
 496             },
 497             'expected_warnings': [
 498                 'URL could be a direct video link, returning it as such.'
 499             ]
 500         },
 501         # Cinchcast embed
 502         {
 503             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 504             'info_dict': {
 505                 'id': '7141703',
 506                 'ext': 'mp3',
 507                 'upload_date': '20141126',
 508                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 509             }
 510         },
 511         # Cinerama player
 512         {
 513             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 514             'info_dict': {
 515                 'id': '730m_DandD_1901_512k',
 516                 'ext': 'mp4',
 517                 'uploader': 'www.abc.net.au',
 518                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 519             }
 520         },
 521         # embedded viddler video
 522         {
 523             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 524             'info_dict': {
 525                 'id': '4d03aad9',
 526                 'ext': 'mp4',
 527                 'uploader': 'deadspin',
 528                 'title': 'WALL-TO-GORTAT',
 529                 'timestamp': 1422285291,
 530                 'upload_date': '20150126',
 531             },
 532             'add_ie': ['Viddler'],
 533         },
 534         # Libsyn embed
 535         {
 536             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 537             'info_dict': {
 538                 'id': '3377616',
 539                 'ext': 'mp3',
 540                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 541                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 542                 'upload_date': '20150220',
 543             },
 544         },
 545         # jwplayer YouTube
 546         {
 547             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 548             'info_dict': {
 549                 'id': 'Mrj4DVp2zeA',
 550                 'ext': 'mp4',
 551                 'upload_date': '20150212',
 552                 'uploader': 'The National Archives UK',
 553                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 554                 'uploader_id': 'NationalArchives08',
 555                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 556             },
 557         },
 558         # rtl.nl embed
 559         {
 560             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 561             'playlist_mincount': 5,
 562             'info_dict': {
 563                 'id': 'aanslagen-kopenhagen',
 564                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 565             }
 566         },
 567         # Zapiks embed
 568         {
 569             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 570             'info_dict': {
 571                 'id': '118046',
 572                 'ext': 'mp4',
 573                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 574             }
 575         },
 576         # Kaltura embed
 577         {
 578             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 579             'info_dict': {
 580                 'id': '1_eergr3h1',
 581                 'ext': 'mp4',
 582                 'upload_date': '20150226',
 583                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 584                 'timestamp': int,
 585                 'title': 'John Carlson Postgame 2/25/15',
 586             },
 587         },
 588         # Eagle.Platform embed (generic URL)
 589         {
 590             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 591             'info_dict': {
 592                 'id': '227304',
 593                 'ext': 'mp4',
 594                 'title': 'Навальный вышел на свободу',
 595                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 596                 'thumbnail': 're:^https?://.*\.jpg$',
 597                 'duration': 87,
 598                 'view_count': int,
 599                 'age_limit': 0,
 600             },
 601         },
 602         # ClipYou (Eagle.Platform) embed (custom URL)
 603         {
 604             'url': 'http://muz-tv.ru/play/7129/',
 605             'info_dict': {
 606                 'id': '12820',
 607                 'ext': 'mp4',
 608                 'title': "'O Sole Mio",
 609                 'thumbnail': 're:^https?://.*\.jpg$',
 610                 'duration': 216,
 611                 'view_count': int,
 612             },
 613         },
 614         # Pladform embed
 615         {
 616             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 617             'info_dict': {
 618                 'id': '100183293',
 619                 'ext': 'mp4',
 620                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 621                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 622                 'thumbnail': 're:^https?://.*\.jpg$',
 623                 'duration': 694,
 624                 'age_limit': 0,
 625             },
 626         },
 627         # Playwire embed
 628         {
 629             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 630             'info_dict': {
 631                 'id': '3519514',
 632                 'ext': 'mp4',
 633                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 634                 'thumbnail': 're:^https?://.*\.png$',
 635                 'duration': 45.115,
 636             },
 637         },
 638         # 5min embed
 639         {
 640             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 641             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 642             'info_dict': {
 643                 'id': '518726732',
 644                 'ext': 'mp4',
 645                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 646             },
 647         },
 648         # RSS feed with enclosure
 649         {
 650             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 651             'info_dict': {
 652                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 653                 'ext': 'm4v',
 654                 'upload_date': '20150228',
 655                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 656             }
 657         },
 658         # Crooks and Liars embed
 659         {
 660             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 661             'info_dict': {
 662                 'id': '8RUoRhRi',
 663                 'ext': 'mp4',
 664                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 665                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 666                 'timestamp': 1428207000,
 667                 'upload_date': '20150405',
 668                 'uploader': 'Heather',
 669             },
 670         },
 671         # Crooks and Liars external embed
 672         {
 673             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 674             'info_dict': {
 675                 'id': 'MTE3MjUtMzQ2MzA',
 676                 'ext': 'mp4',
 677                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 678                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 679                 'timestamp': 1265032391,
 680                 'upload_date': '20100201',
 681                 'uploader': 'Heather',
 682             },
 683         },
 684         # NBC Sports vplayer embed
 685         {
 686             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 687             'info_dict': {
 688                 'id': 'ln7x1qSThw4k',
 689                 'ext': 'flv',
 690                 'title': "PFT Live: New leader in the 'new-look' defense",
 691                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 692             },
 693         },
 694         # UDN embed
 695         {
 696             'url': 'http://www.udn.com/news/story/7314/822787',
 697             'md5': 'fd2060e988c326991037b9aff9df21a6',
 698             'info_dict': {
 699                 'id': '300346',
 700                 'ext': 'mp4',
 701                 'title': '中一中男師變性 全校師生力挺',
 702                 'thumbnail': 're:^https?://.*\.jpg$',
 703             }
 704         },
 705         # Ooyala embed
 706         {
 707             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 708             'info_dict': {
 709                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 710                 'ext': 'mp4',
 711                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 712                 'title': 'This is what separates the Excel masters from the wannabes',
 713             },
 714             'params': {
 715                 # m3u8 downloads
 716                 'skip_download': True,
 717             }
 718         },
 719         # Contains a SMIL manifest
 720         {
 721             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 722             'info_dict': {
 723                 'id': 'file',
 724                 'ext': 'flv',
 725                 'title': '+ Football: Lottery Champions League Europe',
 726                 'uploader': 'www.telewebion.com',
 727             },
 728             'params': {
 729                 # rtmpe downloads
 730                 'skip_download': True,
 731             }
 732         }
 733     ]
 734
 735     def report_following_redirect(self, new_url):
 736         """Report information extraction."""
 737         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 738
 739     def _extract_rss(self, url, video_id, doc):
 740         playlist_title = doc.find('./channel/title').text
 741         playlist_desc_el = doc.find('./channel/description')
 742         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 743
 744         entries = []
 745         for it in doc.findall('./channel/item'):
 746             next_url = xpath_text(it, 'link', fatal=False)
 747             if not next_url:
 748                 enclosure_nodes = it.findall('./enclosure')
 749                 for e in enclosure_nodes:
 750                     next_url = e.attrib.get('url')
 751                     if next_url:
 752                         break
 753
 754             if not next_url:
 755                 continue
 756
 757             entries.append({
 758                 '_type': 'url',
 759                 'url': next_url,
 760                 'title': it.find('title').text,
 761             })
 762
 763         return {
 764             '_type': 'playlist',
 765             'id': url,
 766             'title': playlist_title,
 767             'description': playlist_desc,
 768             'entries': entries,
 769         }
 770
 771     def _extract_camtasia(self, url, video_id, webpage):
 772         """ Returns None if no camtasia video can be found. """
 773
 774         camtasia_cfg = self._search_regex(
 775             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 776             webpage, 'camtasia configuration file', default=None)
 777         if camtasia_cfg is None:
 778             return None
 779
 780         title = self._html_search_meta('DC.title', webpage, fatal=True)
 781
 782         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 783         camtasia_cfg = self._download_xml(
 784             camtasia_url, video_id,
 785             note='Downloading camtasia configuration',
 786             errnote='Failed to download camtasia configuration')
 787         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 788
 789         entries = []
 790         for n in fileset_node.getchildren():
 791             url_n = n.find('./uri')
 792             if url_n is None:
 793                 continue
 794
 795             entries.append({
 796                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 797                 'title': '%s - %s' % (title, n.tag),
 798                 'url': compat_urlparse.urljoin(url, url_n.text),
 799                 'duration': float_or_none(n.find('./duration').text),
 800             })
 801
 802         return {
 803             '_type': 'playlist',
 804             'entries': entries,
 805             'title': title,
 806         }
 807
 808     def _real_extract(self, url):
 809         if url.startswith('//'):
 810             return {
 811                 '_type': 'url',
 812                 'url': self.http_scheme() + url,
 813             }
 814
 815         parsed_url = compat_urlparse.urlparse(url)
 816         if not parsed_url.scheme:
 817             default_search = self._downloader.params.get('default_search')
 818             if default_search is None:
 819                 default_search = 'fixup_error'
 820
 821             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 822                 if '/' in url:
 823                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 824                     return self.url_result('http://' + url)
 825                 elif default_search != 'fixup_error':
 826                     if default_search == 'auto_warning':
 827                         if re.match(r'^(?:url|URL)$', url):
 828                             raise ExtractorError(
 829                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 830                                 expected=True)
 831                         else:
 832                             self._downloader.report_warning(
 833                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 834                     return self.url_result('ytsearch:' + url)
 835
 836             if default_search in ('error', 'fixup_error'):
 837                 raise ExtractorError(
 838                     '%r is not a valid URL. '
 839                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 840                     % (url, url), expected=True)
 841             else:
 842                 if ':' not in default_search:
 843                     default_search += ':'
 844                 return self.url_result(default_search + url)
 845
 846         url, smuggled_data = unsmuggle_url(url)
 847         force_videoid = None
 848         is_intentional = smuggled_data and smuggled_data.get('to_generic')
 849         if smuggled_data and 'force_videoid' in smuggled_data:
 850             force_videoid = smuggled_data['force_videoid']
 851             video_id = force_videoid
 852         else:
 853             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
 854
 855         self.to_screen('%s: Requesting header' % video_id)
 856
 857         head_req = HEADRequest(url)
 858         head_response = self._request_webpage(
 859             head_req, video_id,
 860             note=False, errnote='Could not send HEAD request to %s' % url,
 861             fatal=False)
 862
 863         if head_response is not False:
 864             # Check for redirect
 865             new_url = head_response.geturl()
 866             if url != new_url:
 867                 self.report_following_redirect(new_url)
 868                 if force_videoid:
 869                     new_url = smuggle_url(
 870                         new_url, {'force_videoid': force_videoid})
 871                 return self.url_result(new_url)
 872
 873         full_response = None
 874         if head_response is False:
 875             full_response = self._request_webpage(url, video_id)
 876             head_response = full_response
 877
 878         # Check for direct link to a video
 879         content_type = head_response.headers.get('Content-Type', '')
 880         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
 881         if m:
 882             upload_date = unified_strdate(
 883                 head_response.headers.get('Last-Modified'))
 884             return {
 885                 'id': video_id,
 886                 'title': os.path.splitext(url_basename(url))[0],
 887                 'direct': True,
 888                 'formats': [{
 889                     'format_id': m.group('format_id'),
 890                     'url': url,
 891                     'vcodec': 'none' if m.group('type') == 'audio' else None
 892                 }],
 893                 'upload_date': upload_date,
 894             }
 895
 896         if not self._downloader.params.get('test', False) and not is_intentional:
 897             self._downloader.report_warning('Falling back on generic information extractor.')
 898
 899         if not full_response:
 900             full_response = self._request_webpage(url, video_id)
 901
 902         # Maybe it's a direct link to a video?
 903         # Be careful not to download the whole thing!
 904         first_bytes = full_response.read(512)
 905         if not is_html(first_bytes):
 906             self._downloader.report_warning(
 907                 'URL could be a direct video link, returning it as such.')
 908             upload_date = unified_strdate(
 909                 head_response.headers.get('Last-Modified'))
 910             return {
 911                 'id': video_id,
 912                 'title': os.path.splitext(url_basename(url))[0],
 913                 'direct': True,
 914                 'url': url,
 915                 'upload_date': upload_date,
 916             }
 917
 918         webpage = self._webpage_read_content(
 919             full_response, url, video_id, prefix=first_bytes)
 920
 921         self.report_extraction(video_id)
 922
 923         # Is it an RSS feed?
 924         try:
 925             doc = parse_xml(webpage)
 926             if doc.tag == 'rss':
 927                 return self._extract_rss(url, video_id, doc)
 928         except compat_xml_parse_error:
 929             pass
 930
 931         # Is it a Camtasia project?
 932         camtasia_res = self._extract_camtasia(url, video_id, webpage)
 933         if camtasia_res is not None:
 934             return camtasia_res
 935
 936         # Sometimes embedded video player is hidden behind percent encoding
 937         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
 938         # Unescaping the whole page allows to handle those cases in a generic way
 939         webpage = compat_urllib_parse.unquote(webpage)
 940
 941         # it's tempting to parse this further, but you would
 942         # have to take into account all the variations like
 943         #   Video Title - Site Name
 944         #   Site Name | Video Title
 945         #   Video Title - Tagline | Site Name
 946         # and so on and so forth; it's just not practical
 947         video_title = self._html_search_regex(
 948             r'(?s)<title>(.*?)</title>', webpage, 'video title',
 949             default='video')
 950
 951         # Try to detect age limit automatically
 952         age_limit = self._rta_search(webpage)
 953         # And then there are the jokers who advertise that they use RTA,
 954         # but actually don't.
 955         AGE_LIMIT_MARKERS = [
 956             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
 957         ]
 958         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
 959             age_limit = 18
 960
 961         # video uploader is domain name
 962         video_uploader = self._search_regex(
 963             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 964
 965         # Helper method
 966         def _playlist_from_matches(matches, getter=None, ie=None):
 967             urlrs = orderedSet(
 968                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
 969                 for m in matches)
 970             return self.playlist_result(
 971                 urlrs, playlist_id=video_id, playlist_title=video_title)
 972
 973         # Look for BrightCove:
 974         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
 975         if bc_urls:
 976             self.to_screen('Brightcove video detected.')
 977             entries = [{
 978                 '_type': 'url',
 979                 'url': smuggle_url(bc_url, {'Referer': url}),
 980                 'ie_key': 'Brightcove'
 981             } for bc_url in bc_urls]
 982
 983             return {
 984                 '_type': 'playlist',
 985                 'title': video_title,
 986                 'id': video_id,
 987                 'entries': entries,
 988             }
 989
 990         # Look for embedded rtl.nl player
 991         matches = re.findall(
 992             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
 993             webpage)
 994         if matches:
 995             return _playlist_from_matches(matches, ie='RtlNl')
 996
 997         # Look for embedded (iframe) Vimeo player
 998         mobj = re.search(
 999             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
1000         if mobj:
1001             player_url = unescapeHTML(mobj.group('url'))
1002             surl = smuggle_url(player_url, {'Referer': url})
1003             return self.url_result(surl)
1004         # Look for embedded (swf embed) Vimeo player
1005         mobj = re.search(
1006             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1007         if mobj:
1008             return self.url_result(mobj.group(1))
1009
1010         # Look for embedded YouTube player
1011         matches = re.findall(r'''(?x)
1012             (?:
1013                 <iframe[^>]+?src=|
1014                 data-video-url=|
1015                 <embed[^>]+?src=|
1016                 embedSWF\(?:\s*|
1017                 new\s+SWFObject\(
1018             )
1019             (["\'])
1020                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1021                 (?:embed|v|p)/.+?)
1022             \1''', webpage)
1023         if matches:
1024             return _playlist_from_matches(
1025                 matches, lambda m: unescapeHTML(m[1]))
1026
1027         # Look for lazyYT YouTube embed
1028         matches = re.findall(
1029             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1030         if matches:
1031             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1032
1033         # Look for embedded Dailymotion player
1034         matches = re.findall(
1035             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1036         if matches:
1037             return _playlist_from_matches(
1038                 matches, lambda m: unescapeHTML(m[1]))
1039
1040         # Look for embedded Dailymotion playlist player (#3822)
1041         m = re.search(
1042             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1043         if m:
1044             playlists = re.findall(
1045                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1046             if playlists:
1047                 return _playlist_from_matches(
1048                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1049
1050         # Look for embedded Wistia player
1051         match = re.search(
1052             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1053         if match:
1054             embed_url = self._proto_relative_url(
1055                 unescapeHTML(match.group('url')))
1056             return {
1057                 '_type': 'url_transparent',
1058                 'url': embed_url,
1059                 'ie_key': 'Wistia',
1060                 'uploader': video_uploader,
1061                 'title': video_title,
1062                 'id': video_id,
1063             }
1064
1065         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1066         if match:
1067             return {
1068                 '_type': 'url_transparent',
1069                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1070                 'ie_key': 'Wistia',
1071                 'uploader': video_uploader,
1072                 'title': video_title,
1073                 'id': match.group('id')
1074             }
1075
1076         # Look for embedded blip.tv player
1077         bliptv_url = BlipTVIE._extract_url(webpage)
1078         if bliptv_url:
1079             return self.url_result(bliptv_url, 'BlipTV')
1080
1081         # Look for embedded condenast player
1082         matches = re.findall(
1083             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1084             webpage)
1085         if matches:
1086             return {
1087                 '_type': 'playlist',
1088                 'entries': [{
1089                     '_type': 'url',
1090                     'ie_key': 'CondeNast',
1091                     'url': ma,
1092                 } for ma in matches],
1093                 'title': video_title,
1094                 'id': video_id,
1095             }
1096
1097         # Look for Bandcamp pages with custom domain
1098         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1099         if mobj is not None:
1100             burl = unescapeHTML(mobj.group(1))
1101             # Don't set the extractor because it can be a track url or an album
1102             return self.url_result(burl)
1103
1104         # Look for embedded Vevo player
1105         mobj = re.search(
1106             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1107         if mobj is not None:
1108             return self.url_result(mobj.group('url'))
1109
1110         # Look for embedded Viddler player
1111         mobj = re.search(
1112             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1113             webpage)
1114         if mobj is not None:
1115             return self.url_result(mobj.group('url'))
1116
1117         # Look for NYTimes player
1118         mobj = re.search(
1119             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1120             webpage)
1121         if mobj is not None:
1122             return self.url_result(mobj.group('url'))
1123
1124         # Look for Libsyn player
1125         mobj = re.search(
1126             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1127         if mobj is not None:
1128             return self.url_result(mobj.group('url'))
1129
1130         # Look for Ooyala videos
1131         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1132                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1133                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1134                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1135         if mobj is not None:
1136             return OoyalaIE._build_url_result(mobj.group('ec'))
1137
1138         # Look for multiple Ooyala embeds on SBN network websites
1139         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1140         if mobj is not None:
1141             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1142             if embeds:
1143                 return _playlist_from_matches(
1144                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1145
1146         # Look for Aparat videos
1147         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1148         if mobj is not None:
1149             return self.url_result(mobj.group(1), 'Aparat')
1150
1151         # Look for MPORA videos
1152         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1153         if mobj is not None:
1154             return self.url_result(mobj.group(1), 'Mpora')
1155
1156         # Look for embedded NovaMov-based player
1157         mobj = re.search(
1158             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1159                     (?P<url>http://(?:(?:embed|www)\.)?
1160                         (?:novamov\.com|
1161                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1162                            videoweed\.(?:es|com)|
1163                            movshare\.(?:net|sx|ag)|
1164                            divxstage\.(?:eu|net|ch|co|at|ag))
1165                         /embed\.php.+?)\1''', webpage)
1166         if mobj is not None:
1167             return self.url_result(mobj.group('url'))
1168
1169         # Look for embedded Facebook player
1170         mobj = re.search(
1171             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1172         if mobj is not None:
1173             return self.url_result(mobj.group('url'), 'Facebook')
1174
1175         # Look for embedded VK player
1176         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1177         if mobj is not None:
1178             return self.url_result(mobj.group('url'), 'VK')
1179
1180         # Look for embedded ivi player
1181         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1182         if mobj is not None:
1183             return self.url_result(mobj.group('url'), 'Ivi')
1184
1185         # Look for embedded Huffington Post player
1186         mobj = re.search(
1187             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1188         if mobj is not None:
1189             return self.url_result(mobj.group('url'), 'HuffPost')
1190
1191         # Look for embed.ly
1192         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1193         if mobj is not None:
1194             return self.url_result(mobj.group('url'))
1195         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1196         if mobj is not None:
1197             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1198
1199         # Look for funnyordie embed
1200         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1201         if matches:
1202             return _playlist_from_matches(
1203                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1204
1205         # Look for BBC iPlayer embed
1206         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1207         if matches:
1208             return _playlist_from_matches(matches, ie='BBCCoUk')
1209
1210         # Look for embedded RUTV player
1211         rutv_url = RUTVIE._extract_url(webpage)
1212         if rutv_url:
1213             return self.url_result(rutv_url, 'RUTV')
1214
1215         # Look for embedded TED player
1216         mobj = re.search(
1217             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1218         if mobj is not None:
1219             return self.url_result(mobj.group('url'), 'TED')
1220
1221         # Look for embedded Ustream videos
1222         mobj = re.search(
1223             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1224         if mobj is not None:
1225             return self.url_result(mobj.group('url'), 'Ustream')
1226
1227         # Look for embedded arte.tv player
1228         mobj = re.search(
1229             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1230             webpage)
1231         if mobj is not None:
1232             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1233
1234         # Look for embedded smotri.com player
1235         smotri_url = SmotriIE._extract_url(webpage)
1236         if smotri_url:
1237             return self.url_result(smotri_url, 'Smotri')
1238
1239         # Look for embeded soundcloud player
1240         mobj = re.search(
1241             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1242             webpage)
1243         if mobj is not None:
1244             url = unescapeHTML(mobj.group('url'))
1245             return self.url_result(url)
1246
1247         # Look for embedded vulture.com player
1248         mobj = re.search(
1249             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1250             webpage)
1251         if mobj is not None:
1252             url = unescapeHTML(mobj.group('url'))
1253             return self.url_result(url, ie='Vulture')
1254
1255         # Look for embedded mtvservices player
1256         mobj = re.search(
1257             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1258             webpage)
1259         if mobj is not None:
1260             url = unescapeHTML(mobj.group('url'))
1261             return self.url_result(url, ie='MTVServicesEmbedded')
1262
1263         # Look for embedded yahoo player
1264         mobj = re.search(
1265             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1266             webpage)
1267         if mobj is not None:
1268             return self.url_result(mobj.group('url'), 'Yahoo')
1269
1270         # Look for embedded sbs.com.au player
1271         mobj = re.search(
1272             r'''(?x)
1273             (?:
1274                 <meta\s+property="og:video"\s+content=|
1275                 <iframe[^>]+?src=
1276             )
1277             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1278             webpage)
1279         if mobj is not None:
1280             return self.url_result(mobj.group('url'), 'SBS')
1281
1282         # Look for embedded Cinchcast player
1283         mobj = re.search(
1284             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1285             webpage)
1286         if mobj is not None:
1287             return self.url_result(mobj.group('url'), 'Cinchcast')
1288
1289         mobj = re.search(
1290             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1291             webpage)
1292         if mobj is not None:
1293             return self.url_result(mobj.group('url'), 'MLB')
1294
1295         mobj = re.search(
1296             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1297             webpage)
1298         if mobj is not None:
1299             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1300
1301         mobj = re.search(
1302             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1303             webpage)
1304         if mobj is not None:
1305             return self.url_result(mobj.group('url'), 'Livestream')
1306
1307         # Look for Zapiks embed
1308         mobj = re.search(
1309             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1310         if mobj is not None:
1311             return self.url_result(mobj.group('url'), 'Zapiks')
1312
1313         # Look for Kaltura embeds
1314         mobj = re.search(
1315             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1316         if mobj is not None:
1317             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1318
1319         # Look for Eagle.Platform embeds
1320         mobj = re.search(
1321             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1322         if mobj is not None:
1323             return self.url_result(mobj.group('url'), 'EaglePlatform')
1324
1325         # Look for ClipYou (uses Eagle.Platform) embeds
1326         mobj = re.search(
1327             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1328         if mobj is not None:
1329             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1330
1331         # Look for Pladform embeds
1332         mobj = re.search(
1333             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1334         if mobj is not None:
1335             return self.url_result(mobj.group('url'), 'Pladform')
1336
1337         # Look for Playwire embeds
1338         mobj = re.search(
1339             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1340         if mobj is not None:
1341             return self.url_result(mobj.group('url'))
1342
1343         # Look for 5min embeds
1344         mobj = re.search(
1345             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1346         if mobj is not None:
1347             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1348
1349         # Look for Crooks and Liars embeds
1350         mobj = re.search(
1351             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1352         if mobj is not None:
1353             return self.url_result(mobj.group('url'))
1354
1355         # Look for NBC Sports VPlayer embeds
1356         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1357         if nbc_sports_url:
1358             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1359
1360         # Look for UDN embeds
1361         mobj = re.search(
1362             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1363         if mobj is not None:
1364             return self.url_result(
1365                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1366
1367         # Look for Senate ISVP iframe
1368         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1369         if senate_isvp_url:
1370             return self.url_result(surl, 'SenateISVP')
1371
1372         def check_video(vurl):
1373             if YoutubeIE.suitable(vurl):
1374                 return True
1375             vpath = compat_urlparse.urlparse(vurl).path
1376             vext = determine_ext(vpath)
1377             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1378
1379         def filter_video(urls):
1380             return list(filter(check_video, urls))
1381
1382         # Start with something easy: JW Player in SWFObject
1383         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1384         if not found:
1385             # Look for gorilla-vid style embedding
1386             found = filter_video(re.findall(r'''(?sx)
1387                 (?:
1388                     jw_plugins|
1389                     JWPlayerOptions|
1390                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1391                 )
1392                 .*?
1393                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1394         if not found:
1395             # Broaden the search a little bit
1396             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1397         if not found:
1398             # Broaden the findall a little bit: JWPlayer JS loader
1399             found = filter_video(re.findall(
1400                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1401         if not found:
1402             # Flow player
1403             found = filter_video(re.findall(r'''(?xs)
1404                 flowplayer\("[^"]+",\s*
1405                     \{[^}]+?\}\s*,
1406                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1407                         ["']?url["']?\s*:\s*["']([^"']+)["']
1408             ''', webpage))
1409         if not found:
1410             # Cinerama player
1411             found = re.findall(
1412                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1413         if not found:
1414             # Try to find twitter cards info
1415             found = filter_video(re.findall(
1416                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1417         if not found:
1418             # We look for Open Graph info:
1419             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1420             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1421             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1422             if m_video_type is not None:
1423                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1424         if not found:
1425             # HTML5 video
1426             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1427         if not found:
1428             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1429             found = re.search(
1430                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1431                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1432                 webpage)
1433             if not found:
1434                 # Look also in Refresh HTTP header
1435                 refresh_header = head_response.headers.get('Refresh')
1436                 if refresh_header:
1437                     found = re.search(REDIRECT_REGEX, refresh_header)
1438             if found:
1439                 new_url = found.group(1)
1440                 self.report_following_redirect(new_url)
1441                 return {
1442                     '_type': 'url',
1443                     'url': new_url,
1444                 }
1445         if not found:
1446             raise UnsupportedError(url)
1447
1448         entries = []
1449         for video_url in found:
1450             video_url = compat_urlparse.urljoin(url, video_url)
1451             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1452
1453             # Sometimes, jwplayer extraction will result in a YouTube URL
1454             if YoutubeIE.suitable(video_url):
1455                 entries.append(self.url_result(video_url, 'Youtube'))
1456                 continue
1457
1458             # here's a fun little line of code for you:
1459             video_id = os.path.splitext(video_id)[0]
1460
1461             if determine_ext(video_url) == 'smil':
1462                 entries.append({
1463                     'id': video_id,
1464                     'formats': self._extract_smil_formats(video_url, video_id),
1465                     'uploader': video_uploader,
1466                     'title': video_title,
1467                     'age_limit': age_limit,
1468                 })
1469             else:
1470                 entries.append({
1471                     'id': video_id,
1472                     'url': video_url,
1473                     'uploader': video_uploader,
1474                     'title': video_title,
1475                     'age_limit': age_limit,
1476                 })
1477
1478         if len(entries) == 1:
1479             return entries[0]
1480         else:
1481             for num, e in enumerate(entries, start=1):
1482                 # 'url' results don't have a title
1483                 if e.get('title') is not None:
1484                     e['title'] = '%s (%d)' % (e['title'], num)
1485             return {
1486                 '_type': 'playlist',
1487                 'entries': entries,
1488             }