_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse,
  12     compat_urlparse,
  13     compat_xml_parse_error,
  14 )
  15 from ..utils import (
  16     determine_ext,
  17     ExtractorError,
  18     float_or_none,
  19     HEADRequest,
  20     is_html,
  21     orderedSet,
  22     parse_xml,
  23     smuggle_url,
  24     unescapeHTML,
  25     unified_strdate,
  26     unsmuggle_url,
  27     UnsupportedError,
  28     url_basename,
  29     url_infer_protocol,
  30     xpath_text,
  31 )
  32 from .brightcove import BrightcoveIE
  33 from .nbc import NBCSportsVPlayerIE
  34 from .ooyala import OoyalaIE
  35 from .rutv import RUTVIE
  36 from .smotri import SmotriIE
  37 from .condenast import CondeNastIE
  38 from .udn import UDNEmbedIE
  39
  40
  41 class GenericIE(InfoExtractor):
  42     IE_DESC = 'Generic downloader that works on some sites'
  43     _VALID_URL = r'.*'
  44     IE_NAME = 'generic'
  45     _TESTS = [
  46         {
  47             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
  48             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
  49             'info_dict': {
  50                 'id': '13601338388002',
  51                 'ext': 'mp4',
  52                 'uploader': 'www.hodiho.fr',
  53                 'title': 'R\u00e9gis plante sa Jeep',
  54             }
  55         },
  56         # bandcamp page with custom domain
  57         {
  58             'add_ie': ['Bandcamp'],
  59             'url': 'http://bronyrock.com/track/the-pony-mash',
  60             'info_dict': {
  61                 'id': '3235767654',
  62                 'ext': 'mp3',
  63                 'title': 'The Pony Mash',
  64                 'uploader': 'M_Pallante',
  65             },
  66             'skip': 'There is a limit of 200 free downloads / month for the test song',
  67         },
  68         # embedded brightcove video
  69         # it also tests brightcove videos that need to set the 'Referer' in the
  70         # http requests
  71         {
  72             'add_ie': ['Brightcove'],
  73             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
  74             'info_dict': {
  75                 'id': '2765128793001',
  76                 'ext': 'mp4',
  77                 'title': 'Le cours de bourse : l’analyse technique',
  78                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
  79                 'uploader': 'BFM BUSINESS',
  80             },
  81             'params': {
  82                 'skip_download': True,
  83             },
  84         },
  85         {
  86             # https://github.com/rg3/youtube-dl/issues/2253
  87             'url': 'http://bcove.me/i6nfkrc3',
  88             'md5': '0ba9446db037002366bab3b3eb30c88c',
  89             'info_dict': {
  90                 'id': '3101154703001',
  91                 'ext': 'mp4',
  92                 'title': 'Still no power',
  93                 'uploader': 'thestar.com',
  94                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
  95             },
  96             'add_ie': ['Brightcove'],
  97         },
  98         {
  99             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 100             'md5': 'fb973ecf6e4a78a67453647444222983',
 101             'info_dict': {
 102                 'id': '3414141473001',
 103                 'ext': 'mp4',
 104                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 105                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 106                 'uploader': 'Championat',
 107             },
 108         },
 109         {
 110             # https://github.com/rg3/youtube-dl/issues/3541
 111             'add_ie': ['Brightcove'],
 112             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 113             'info_dict': {
 114                 'id': '3866516442001',
 115                 'ext': 'mp4',
 116                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 117                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 118                 'uploader': 'SBS Broadcasting',
 119             },
 120             'skip': 'Restricted to Netherlands',
 121             'params': {
 122                 'skip_download': True,  # m3u8 download
 123             },
 124         },
 125         # Direct link to a video
 126         {
 127             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
 128             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
 129             'info_dict': {
 130                 'id': 'trailer',
 131                 'ext': 'mp4',
 132                 'title': 'trailer',
 133                 'upload_date': '20100513',
 134             }
 135         },
 136         # ooyala video
 137         {
 138             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 139             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 140             'info_dict': {
 141                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 142                 'ext': 'mp4',
 143                 'title': '2cc213299525360.mov',  # that's what we get
 144             },
 145             'add_ie': ['Ooyala'],
 146         },
 147         # multiple ooyala embeds on SBN network websites
 148         {
 149             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 150             'info_dict': {
 151                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 152                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 153             },
 154             'playlist_mincount': 3,
 155             'params': {
 156                 'skip_download': True,
 157             },
 158             'add_ie': ['Ooyala'],
 159         },
 160         # google redirect
 161         {
 162             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 163             'info_dict': {
 164                 'id': 'cmQHVoWB5FY',
 165                 'ext': 'mp4',
 166                 'upload_date': '20130224',
 167                 'uploader_id': 'TheVerge',
 168                 'description': 're:^Chris Ziegler takes a look at the\.*',
 169                 'uploader': 'The Verge',
 170                 'title': 'First Firefox OS phones side-by-side',
 171             },
 172             'params': {
 173                 'skip_download': False,
 174             }
 175         },
 176         # embed.ly video
 177         {
 178             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 179             'info_dict': {
 180                 'id': '9ODmcdjQcHQ',
 181                 'ext': 'mp4',
 182                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 183                 'upload_date': '20140225',
 184                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 185                 'uploader': 'Tested',
 186                 'uploader_id': 'testedcom',
 187             },
 188             # No need to test YoutubeIE here
 189             'params': {
 190                 'skip_download': True,
 191             },
 192         },
 193         # funnyordie embed
 194         {
 195             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 196             'info_dict': {
 197                 'id': '18e820ec3f',
 198                 'ext': 'mp4',
 199                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 200                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 201             },
 202         },
 203         # BBC iPlayer embeds
 204         {
 205             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 206             'info_dict': {
 207                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 208             },
 209             'playlist_mincount': 18,
 210         },
 211         # RUTV embed
 212         {
 213             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 214             'info_dict': {
 215                 'id': '776940',
 216                 'ext': 'mp4',
 217                 'title': 'Охотское море стало целиком российским',
 218                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 219             },
 220             'params': {
 221                 # m3u8 download
 222                 'skip_download': True,
 223             },
 224         },
 225         # Embedded TED video
 226         {
 227             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 228             'md5': '65fdff94098e4a607385a60c5177c638',
 229             'info_dict': {
 230                 'id': '1969',
 231                 'ext': 'mp4',
 232                 'title': 'Hidden miracles of the natural world',
 233                 'uploader': 'Louie Schwartzberg',
 234                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 235             }
 236         },
 237         # Embeded Ustream video
 238         {
 239             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 240             'md5': '27b99cdb639c9b12a79bca876a073417',
 241             'info_dict': {
 242                 'id': '45734260',
 243                 'ext': 'flv',
 244                 'uploader': 'AU SPA:  The NSA and Privacy',
 245                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 246             }
 247         },
 248         # nowvideo embed hidden behind percent encoding
 249         {
 250             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 251             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 252             'info_dict': {
 253                 'id': '06e53103ca9aa',
 254                 'ext': 'flv',
 255                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 256                 'description': 'No description',
 257             },
 258         },
 259         # arte embed
 260         {
 261             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 262             'md5': '7653032cbb25bf6c80d80f217055fa43',
 263             'info_dict': {
 264                 'id': '048195-004_PLUS7-F',
 265                 'ext': 'flv',
 266                 'title': 'X:enius',
 267                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 268                 'upload_date': '20140320',
 269             },
 270             'params': {
 271                 'skip_download': 'Requires rtmpdump'
 272             }
 273         },
 274         # Condé Nast embed
 275         {
 276             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 277             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 278             'info_dict': {
 279                 'id': '53501be369702d3275860000',
 280                 'ext': 'mp4',
 281                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 282             }
 283         },
 284         # Dailymotion embed
 285         {
 286             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 287             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 288             'info_dict': {
 289                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 290                 'ext': 'mp4',
 291                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 292                 'uploader': 'Spi0n',
 293             },
 294             'add_ie': ['Dailymotion'],
 295         },
 296         # YouTube embed
 297         {
 298             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 299             'info_dict': {
 300                 'id': 'FXRb4ykk4S0',
 301                 'ext': 'mp4',
 302                 'title': 'The NBL Auction 2014',
 303                 'uploader': 'BADMINTON England',
 304                 'uploader_id': 'BADMINTONEvents',
 305                 'upload_date': '20140603',
 306                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 307             },
 308             'add_ie': ['Youtube'],
 309             'params': {
 310                 'skip_download': True,
 311             }
 312         },
 313         # MTVSercices embed
 314         {
 315             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 316             'md5': '35727f82f58c76d996fc188f9755b0d5',
 317             'info_dict': {
 318                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 319                 'ext': 'mp4',
 320                 'title': 'Review',
 321                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 322             },
 323         },
 324         # YouTube embed via <data-embed-url="">
 325         {
 326             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 327             'info_dict': {
 328                 'id': '4vAffPZIT44',
 329                 'ext': 'mp4',
 330                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 331                 'uploader': 'Gameloft',
 332                 'uploader_id': 'gameloft',
 333                 'upload_date': '20140828',
 334                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 335             },
 336             'params': {
 337                 'skip_download': True,
 338             }
 339         },
 340         # Camtasia studio
 341         {
 342             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 343             'playlist': [{
 344                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 345                 'info_dict': {
 346                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 347                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 348                     'ext': 'flv',
 349                     'duration': 2235.90,
 350                 }
 351             }, {
 352                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 353                 'info_dict': {
 354                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 355                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 356                     'ext': 'flv',
 357                     'duration': 2235.93,
 358                 }
 359             }],
 360             'info_dict': {
 361                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 362             }
 363         },
 364         # Flowplayer
 365         {
 366             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 367             'md5': '9d65602bf31c6e20014319c7d07fba27',
 368             'info_dict': {
 369                 'id': '5123ea6d5e5a7',
 370                 'ext': 'mp4',
 371                 'age_limit': 18,
 372                 'uploader': 'www.handjobhub.com',
 373                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 374             }
 375         },
 376         # RSS feed
 377         {
 378             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 379             'info_dict': {
 380                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 381                 'title': 'Zero Punctuation',
 382                 'description': 're:.*groundbreaking video review series.*'
 383             },
 384             'playlist_mincount': 11,
 385         },
 386         # Multiple brightcove videos
 387         # https://github.com/rg3/youtube-dl/issues/2283
 388         {
 389             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 390             'info_dict': {
 391                 'id': 'always-never',
 392                 'title': 'Always / Never - The New Yorker',
 393             },
 394             'playlist_count': 3,
 395             'params': {
 396                 'extract_flat': False,
 397                 'skip_download': True,
 398             }
 399         },
 400         # MLB embed
 401         {
 402             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 403             'md5': '96f09a37e44da40dd083e12d9a683327',
 404             'info_dict': {
 405                 'id': '33322633',
 406                 'ext': 'mp4',
 407                 'title': 'Ump changes call to ball',
 408                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 409                 'duration': 48,
 410                 'timestamp': 1401537900,
 411                 'upload_date': '20140531',
 412                 'thumbnail': 're:^https?://.*\.jpg$',
 413             },
 414         },
 415         # Wistia embed
 416         {
 417             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 418             'md5': '8788b683c777a5cf25621eaf286d0c23',
 419             'info_dict': {
 420                 'id': '1cfaf6b7ea',
 421                 'ext': 'mov',
 422                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 423                 'duration': 643.0,
 424                 'filesize': 182808282,
 425                 'uploader': 'education-portal.com',
 426             },
 427         },
 428         {
 429             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 430             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 431             'info_dict': {
 432                 'id': 'uxjb0lwrcz',
 433                 'ext': 'mp4',
 434                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 435                 'duration': 1715.0,
 436                 'uploader': 'thoughtworks.wistia.com',
 437             },
 438         },
 439         # Direct download with broken HEAD
 440         {
 441             'url': 'http://ai-radio.org:8000/radio.opus',
 442             'info_dict': {
 443                 'id': 'radio',
 444                 'ext': 'opus',
 445                 'title': 'radio',
 446             },
 447             'params': {
 448                 'skip_download': True,  # infinite live stream
 449             },
 450             'expected_warnings': [
 451                 r'501.*Not Implemented'
 452             ],
 453         },
 454         # Soundcloud embed
 455         {
 456             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 457             'info_dict': {
 458                 'id': '174391317',
 459                 'ext': 'mp3',
 460                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 461                 'uploader': 'Sophos Security',
 462                 'title': 'Chet Chat 171 - Oct 29, 2014',
 463                 'upload_date': '20141029',
 464             }
 465         },
 466         # Livestream embed
 467         {
 468             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 469             'info_dict': {
 470                 'id': '67864563',
 471                 'ext': 'flv',
 472                 'upload_date': '20141112',
 473                 'title': 'Rosetta #CometLanding webcast HL 10',
 474             }
 475         },
 476         # LazyYT
 477         {
 478             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 479             'info_dict': {
 480                 'id': '1986',
 481                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 482             },
 483             'playlist_mincount': 2,
 484         },
 485         # Direct link with incorrect MIME type
 486         {
 487             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 488             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 489             'info_dict': {
 490                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 491                 'id': '5_Lennart_Poettering_-_Systemd',
 492                 'ext': 'webm',
 493                 'title': '5_Lennart_Poettering_-_Systemd',
 494                 'upload_date': '20141120',
 495             },
 496             'expected_warnings': [
 497                 'URL could be a direct video link, returning it as such.'
 498             ]
 499         },
 500         # Cinchcast embed
 501         {
 502             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 503             'info_dict': {
 504                 'id': '7141703',
 505                 'ext': 'mp3',
 506                 'upload_date': '20141126',
 507                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 508             }
 509         },
 510         # Cinerama player
 511         {
 512             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 513             'info_dict': {
 514                 'id': '730m_DandD_1901_512k',
 515                 'ext': 'mp4',
 516                 'uploader': 'www.abc.net.au',
 517                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 518             }
 519         },
 520         # embedded viddler video
 521         {
 522             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 523             'info_dict': {
 524                 'id': '4d03aad9',
 525                 'ext': 'mp4',
 526                 'uploader': 'deadspin',
 527                 'title': 'WALL-TO-GORTAT',
 528                 'timestamp': 1422285291,
 529                 'upload_date': '20150126',
 530             },
 531             'add_ie': ['Viddler'],
 532         },
 533         # Libsyn embed
 534         {
 535             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 536             'info_dict': {
 537                 'id': '3377616',
 538                 'ext': 'mp3',
 539                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 540                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 541                 'upload_date': '20150220',
 542             },
 543         },
 544         # jwplayer YouTube
 545         {
 546             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 547             'info_dict': {
 548                 'id': 'Mrj4DVp2zeA',
 549                 'ext': 'mp4',
 550                 'upload_date': '20150212',
 551                 'uploader': 'The National Archives UK',
 552                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 553                 'uploader_id': 'NationalArchives08',
 554                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 555             },
 556         },
 557         # rtl.nl embed
 558         {
 559             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 560             'playlist_mincount': 5,
 561             'info_dict': {
 562                 'id': 'aanslagen-kopenhagen',
 563                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 564             }
 565         },
 566         # Zapiks embed
 567         {
 568             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 569             'info_dict': {
 570                 'id': '118046',
 571                 'ext': 'mp4',
 572                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 573             }
 574         },
 575         # Kaltura embed
 576         {
 577             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 578             'info_dict': {
 579                 'id': '1_eergr3h1',
 580                 'ext': 'mp4',
 581                 'upload_date': '20150226',
 582                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 583                 'timestamp': int,
 584                 'title': 'John Carlson Postgame 2/25/15',
 585             },
 586         },
 587         # Eagle.Platform embed (generic URL)
 588         {
 589             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 590             'info_dict': {
 591                 'id': '227304',
 592                 'ext': 'mp4',
 593                 'title': 'Навальный вышел на свободу',
 594                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 595                 'thumbnail': 're:^https?://.*\.jpg$',
 596                 'duration': 87,
 597                 'view_count': int,
 598                 'age_limit': 0,
 599             },
 600         },
 601         # ClipYou (Eagle.Platform) embed (custom URL)
 602         {
 603             'url': 'http://muz-tv.ru/play/7129/',
 604             'info_dict': {
 605                 'id': '12820',
 606                 'ext': 'mp4',
 607                 'title': "'O Sole Mio",
 608                 'thumbnail': 're:^https?://.*\.jpg$',
 609                 'duration': 216,
 610                 'view_count': int,
 611             },
 612         },
 613         # Pladform embed
 614         {
 615             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 616             'info_dict': {
 617                 'id': '100183293',
 618                 'ext': 'mp4',
 619                 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
 620                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 621                 'thumbnail': 're:^https?://.*\.jpg$',
 622                 'duration': 694,
 623                 'age_limit': 0,
 624             },
 625         },
 626         # 5min embed
 627         {
 628             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 629             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 630             'info_dict': {
 631                 'id': '518726732',
 632                 'ext': 'mp4',
 633                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 634             },
 635         },
 636         # RSS feed with enclosure
 637         {
 638             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 639             'info_dict': {
 640                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 641                 'ext': 'm4v',
 642                 'upload_date': '20150228',
 643                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 644             }
 645         },
 646         # NBC Sports vplayer embed
 647         {
 648             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 649             'info_dict': {
 650                 'id': 'ln7x1qSThw4k',
 651                 'ext': 'flv',
 652                 'title': "PFT Live: New leader in the 'new-look' defense",
 653                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 654             },
 655         },
 656         # UDN embed
 657         {
 658             'url': 'http://www.udn.com/news/story/7314/822787',
 659             'md5': 'de06b4c90b042c128395a88f0384817e',
 660             'info_dict': {
 661                 'id': '300040',
 662                 'ext': 'mp4',
 663                 'title': '生物老師男變女 全校挺"做自己"',
 664                 'thumbnail': 're:^https?://.*\.jpg$',
 665             }
 666         }
 667     ]
 668
 669     def report_following_redirect(self, new_url):
 670         """Report information extraction."""
 671         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 672
 673     def _extract_rss(self, url, video_id, doc):
 674         playlist_title = doc.find('./channel/title').text
 675         playlist_desc_el = doc.find('./channel/description')
 676         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 677
 678         entries = []
 679         for it in doc.findall('./channel/item'):
 680             next_url = xpath_text(it, 'link', fatal=False)
 681             if not next_url:
 682                 enclosure_nodes = it.findall('./enclosure')
 683                 for e in enclosure_nodes:
 684                     next_url = e.attrib.get('url')
 685                     if next_url:
 686                         break
 687
 688             if not next_url:
 689                 continue
 690
 691             entries.append({
 692                 '_type': 'url',
 693                 'url': next_url,
 694                 'title': it.find('title').text,
 695             })
 696
 697         return {
 698             '_type': 'playlist',
 699             'id': url,
 700             'title': playlist_title,
 701             'description': playlist_desc,
 702             'entries': entries,
 703         }
 704
 705     def _extract_camtasia(self, url, video_id, webpage):
 706         """ Returns None if no camtasia video can be found. """
 707
 708         camtasia_cfg = self._search_regex(
 709             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 710             webpage, 'camtasia configuration file', default=None)
 711         if camtasia_cfg is None:
 712             return None
 713
 714         title = self._html_search_meta('DC.title', webpage, fatal=True)
 715
 716         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 717         camtasia_cfg = self._download_xml(
 718             camtasia_url, video_id,
 719             note='Downloading camtasia configuration',
 720             errnote='Failed to download camtasia configuration')
 721         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 722
 723         entries = []
 724         for n in fileset_node.getchildren():
 725             url_n = n.find('./uri')
 726             if url_n is None:
 727                 continue
 728
 729             entries.append({
 730                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 731                 'title': '%s - %s' % (title, n.tag),
 732                 'url': compat_urlparse.urljoin(url, url_n.text),
 733                 'duration': float_or_none(n.find('./duration').text),
 734             })
 735
 736         return {
 737             '_type': 'playlist',
 738             'entries': entries,
 739             'title': title,
 740         }
 741
 742     def _real_extract(self, url):
 743         if url.startswith('//'):
 744             return {
 745                 '_type': 'url',
 746                 'url': self.http_scheme() + url,
 747             }
 748
 749         parsed_url = compat_urlparse.urlparse(url)
 750         if not parsed_url.scheme:
 751             default_search = self._downloader.params.get('default_search')
 752             if default_search is None:
 753                 default_search = 'fixup_error'
 754
 755             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 756                 if '/' in url:
 757                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 758                     return self.url_result('http://' + url)
 759                 elif default_search != 'fixup_error':
 760                     if default_search == 'auto_warning':
 761                         if re.match(r'^(?:url|URL)$', url):
 762                             raise ExtractorError(
 763                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 764                                 expected=True)
 765                         else:
 766                             self._downloader.report_warning(
 767                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 768                     return self.url_result('ytsearch:' + url)
 769
 770             if default_search in ('error', 'fixup_error'):
 771                 raise ExtractorError(
 772                     '%r is not a valid URL. '
 773                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 774                     % (url, url), expected=True)
 775             else:
 776                 if ':' not in default_search:
 777                     default_search += ':'
 778                 return self.url_result(default_search + url)
 779
 780         url, smuggled_data = unsmuggle_url(url)
 781         force_videoid = None
 782         is_intentional = smuggled_data and smuggled_data.get('to_generic')
 783         if smuggled_data and 'force_videoid' in smuggled_data:
 784             force_videoid = smuggled_data['force_videoid']
 785             video_id = force_videoid
 786         else:
 787             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
 788
 789         self.to_screen('%s: Requesting header' % video_id)
 790
 791         head_req = HEADRequest(url)
 792         head_response = self._request_webpage(
 793             head_req, video_id,
 794             note=False, errnote='Could not send HEAD request to %s' % url,
 795             fatal=False)
 796
 797         if head_response is not False:
 798             # Check for redirect
 799             new_url = head_response.geturl()
 800             if url != new_url:
 801                 self.report_following_redirect(new_url)
 802                 if force_videoid:
 803                     new_url = smuggle_url(
 804                         new_url, {'force_videoid': force_videoid})
 805                 return self.url_result(new_url)
 806
 807         full_response = None
 808         if head_response is False:
 809             full_response = self._request_webpage(url, video_id)
 810             head_response = full_response
 811
 812         # Check for direct link to a video
 813         content_type = head_response.headers.get('Content-Type', '')
 814         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
 815         if m:
 816             upload_date = unified_strdate(
 817                 head_response.headers.get('Last-Modified'))
 818             return {
 819                 'id': video_id,
 820                 'title': os.path.splitext(url_basename(url))[0],
 821                 'direct': True,
 822                 'formats': [{
 823                     'format_id': m.group('format_id'),
 824                     'url': url,
 825                     'vcodec': 'none' if m.group('type') == 'audio' else None
 826                 }],
 827                 'upload_date': upload_date,
 828             }
 829
 830         if not self._downloader.params.get('test', False) and not is_intentional:
 831             self._downloader.report_warning('Falling back on generic information extractor.')
 832
 833         if not full_response:
 834             full_response = self._request_webpage(url, video_id)
 835
 836         # Maybe it's a direct link to a video?
 837         # Be careful not to download the whole thing!
 838         first_bytes = full_response.read(512)
 839         if not is_html(first_bytes):
 840             self._downloader.report_warning(
 841                 'URL could be a direct video link, returning it as such.')
 842             upload_date = unified_strdate(
 843                 head_response.headers.get('Last-Modified'))
 844             return {
 845                 'id': video_id,
 846                 'title': os.path.splitext(url_basename(url))[0],
 847                 'direct': True,
 848                 'url': url,
 849                 'upload_date': upload_date,
 850             }
 851
 852         webpage = self._webpage_read_content(
 853             full_response, url, video_id, prefix=first_bytes)
 854
 855         self.report_extraction(video_id)
 856
 857         # Is it an RSS feed?
 858         try:
 859             doc = parse_xml(webpage)
 860             if doc.tag == 'rss':
 861                 return self._extract_rss(url, video_id, doc)
 862         except compat_xml_parse_error:
 863             pass
 864
 865         # Is it a Camtasia project?
 866         camtasia_res = self._extract_camtasia(url, video_id, webpage)
 867         if camtasia_res is not None:
 868             return camtasia_res
 869
 870         # Sometimes embedded video player is hidden behind percent encoding
 871         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
 872         # Unescaping the whole page allows to handle those cases in a generic way
 873         webpage = compat_urllib_parse.unquote(webpage)
 874
 875         # it's tempting to parse this further, but you would
 876         # have to take into account all the variations like
 877         #   Video Title - Site Name
 878         #   Site Name | Video Title
 879         #   Video Title - Tagline | Site Name
 880         # and so on and so forth; it's just not practical
 881         video_title = self._html_search_regex(
 882             r'(?s)<title>(.*?)</title>', webpage, 'video title',
 883             default='video')
 884
 885         # Try to detect age limit automatically
 886         age_limit = self._rta_search(webpage)
 887         # And then there are the jokers who advertise that they use RTA,
 888         # but actually don't.
 889         AGE_LIMIT_MARKERS = [
 890             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
 891         ]
 892         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
 893             age_limit = 18
 894
 895         # video uploader is domain name
 896         video_uploader = self._search_regex(
 897             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 898
 899         # Helper method
 900         def _playlist_from_matches(matches, getter=None, ie=None):
 901             urlrs = orderedSet(
 902                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
 903                 for m in matches)
 904             return self.playlist_result(
 905                 urlrs, playlist_id=video_id, playlist_title=video_title)
 906
 907         # Look for BrightCove:
 908         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
 909         if bc_urls:
 910             self.to_screen('Brightcove video detected.')
 911             entries = [{
 912                 '_type': 'url',
 913                 'url': smuggle_url(bc_url, {'Referer': url}),
 914                 'ie_key': 'Brightcove'
 915             } for bc_url in bc_urls]
 916
 917             return {
 918                 '_type': 'playlist',
 919                 'title': video_title,
 920                 'id': video_id,
 921                 'entries': entries,
 922             }
 923
 924         # Look for embedded rtl.nl player
 925         matches = re.findall(
 926             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
 927             webpage)
 928         if matches:
 929             return _playlist_from_matches(matches, ie='RtlNl')
 930
 931         # Look for embedded (iframe) Vimeo player
 932         mobj = re.search(
 933             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
 934         if mobj:
 935             player_url = unescapeHTML(mobj.group('url'))
 936             surl = smuggle_url(player_url, {'Referer': url})
 937             return self.url_result(surl)
 938         # Look for embedded (swf embed) Vimeo player
 939         mobj = re.search(
 940             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
 941         if mobj:
 942             return self.url_result(mobj.group(1))
 943
 944         # Look for embedded YouTube player
 945         matches = re.findall(r'''(?x)
 946             (?:
 947                 <iframe[^>]+?src=|
 948                 data-video-url=|
 949                 <embed[^>]+?src=|
 950                 embedSWF\(?:\s*|
 951                 new\s+SWFObject\(
 952             )
 953             (["\'])
 954                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
 955                 (?:embed|v|p)/.+?)
 956             \1''', webpage)
 957         if matches:
 958             return _playlist_from_matches(
 959                 matches, lambda m: unescapeHTML(m[1]))
 960
 961         # Look for lazyYT YouTube embed
 962         matches = re.findall(
 963             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
 964         if matches:
 965             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
 966
 967         # Look for embedded Dailymotion player
 968         matches = re.findall(
 969             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
 970         if matches:
 971             return _playlist_from_matches(
 972                 matches, lambda m: unescapeHTML(m[1]))
 973
 974         # Look for embedded Dailymotion playlist player (#3822)
 975         m = re.search(
 976             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
 977         if m:
 978             playlists = re.findall(
 979                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
 980             if playlists:
 981                 return _playlist_from_matches(
 982                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
 983
 984         # Look for embedded Wistia player
 985         match = re.search(
 986             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
 987         if match:
 988             embed_url = self._proto_relative_url(
 989                 unescapeHTML(match.group('url')))
 990             return {
 991                 '_type': 'url_transparent',
 992                 'url': embed_url,
 993                 'ie_key': 'Wistia',
 994                 'uploader': video_uploader,
 995                 'title': video_title,
 996                 'id': video_id,
 997             }
 998
 999         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1000         if match:
1001             return {
1002                 '_type': 'url_transparent',
1003                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1004                 'ie_key': 'Wistia',
1005                 'uploader': video_uploader,
1006                 'title': video_title,
1007                 'id': match.group('id')
1008             }
1009
1010         # Look for embedded blip.tv player
1011         mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
1012         if mobj:
1013             return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
1014         mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
1015         if mobj:
1016             return self.url_result(mobj.group(1), 'BlipTV')
1017
1018         # Look for embedded condenast player
1019         matches = re.findall(
1020             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1021             webpage)
1022         if matches:
1023             return {
1024                 '_type': 'playlist',
1025                 'entries': [{
1026                     '_type': 'url',
1027                     'ie_key': 'CondeNast',
1028                     'url': ma,
1029                 } for ma in matches],
1030                 'title': video_title,
1031                 'id': video_id,
1032             }
1033
1034         # Look for Bandcamp pages with custom domain
1035         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1036         if mobj is not None:
1037             burl = unescapeHTML(mobj.group(1))
1038             # Don't set the extractor because it can be a track url or an album
1039             return self.url_result(burl)
1040
1041         # Look for embedded Vevo player
1042         mobj = re.search(
1043             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1044         if mobj is not None:
1045             return self.url_result(mobj.group('url'))
1046
1047         # Look for embedded Viddler player
1048         mobj = re.search(
1049             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1050             webpage)
1051         if mobj is not None:
1052             return self.url_result(mobj.group('url'))
1053
1054         # Look for NYTimes player
1055         mobj = re.search(
1056             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1057             webpage)
1058         if mobj is not None:
1059             return self.url_result(mobj.group('url'))
1060
1061         # Look for Libsyn player
1062         mobj = re.search(
1063             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1064         if mobj is not None:
1065             return self.url_result(mobj.group('url'))
1066
1067         # Look for Ooyala videos
1068         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1069                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1070                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
1071         if mobj is not None:
1072             return OoyalaIE._build_url_result(mobj.group('ec'))
1073
1074         # Look for multiple Ooyala embeds on SBN network websites
1075         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1076         if mobj is not None:
1077             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1078             if embeds:
1079                 return _playlist_from_matches(
1080                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1081
1082         # Look for Aparat videos
1083         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1084         if mobj is not None:
1085             return self.url_result(mobj.group(1), 'Aparat')
1086
1087         # Look for MPORA videos
1088         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1089         if mobj is not None:
1090             return self.url_result(mobj.group(1), 'Mpora')
1091
1092         # Look for embedded NovaMov-based player
1093         mobj = re.search(
1094             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1095                     (?P<url>http://(?:(?:embed|www)\.)?
1096                         (?:novamov\.com|
1097                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1098                            videoweed\.(?:es|com)|
1099                            movshare\.(?:net|sx|ag)|
1100                            divxstage\.(?:eu|net|ch|co|at|ag))
1101                         /embed\.php.+?)\1''', webpage)
1102         if mobj is not None:
1103             return self.url_result(mobj.group('url'))
1104
1105         # Look for embedded Facebook player
1106         mobj = re.search(
1107             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1108         if mobj is not None:
1109             return self.url_result(mobj.group('url'), 'Facebook')
1110
1111         # Look for embedded VK player
1112         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1113         if mobj is not None:
1114             return self.url_result(mobj.group('url'), 'VK')
1115
1116         # Look for embedded ivi player
1117         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1118         if mobj is not None:
1119             return self.url_result(mobj.group('url'), 'Ivi')
1120
1121         # Look for embedded Huffington Post player
1122         mobj = re.search(
1123             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1124         if mobj is not None:
1125             return self.url_result(mobj.group('url'), 'HuffPost')
1126
1127         # Look for embed.ly
1128         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1129         if mobj is not None:
1130             return self.url_result(mobj.group('url'))
1131         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1132         if mobj is not None:
1133             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1134
1135         # Look for funnyordie embed
1136         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1137         if matches:
1138             return _playlist_from_matches(
1139                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1140
1141         # Look for BBC iPlayer embed
1142         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1143         if matches:
1144             return _playlist_from_matches(matches, ie='BBCCoUk')
1145
1146         # Look for embedded RUTV player
1147         rutv_url = RUTVIE._extract_url(webpage)
1148         if rutv_url:
1149             return self.url_result(rutv_url, 'RUTV')
1150
1151         # Look for embedded TED player
1152         mobj = re.search(
1153             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1154         if mobj is not None:
1155             return self.url_result(mobj.group('url'), 'TED')
1156
1157         # Look for embedded Ustream videos
1158         mobj = re.search(
1159             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1160         if mobj is not None:
1161             return self.url_result(mobj.group('url'), 'Ustream')
1162
1163         # Look for embedded arte.tv player
1164         mobj = re.search(
1165             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1166             webpage)
1167         if mobj is not None:
1168             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1169
1170         # Look for embedded smotri.com player
1171         smotri_url = SmotriIE._extract_url(webpage)
1172         if smotri_url:
1173             return self.url_result(smotri_url, 'Smotri')
1174
1175         # Look for embeded soundcloud player
1176         mobj = re.search(
1177             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1178             webpage)
1179         if mobj is not None:
1180             url = unescapeHTML(mobj.group('url'))
1181             return self.url_result(url)
1182
1183         # Look for embedded vulture.com player
1184         mobj = re.search(
1185             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1186             webpage)
1187         if mobj is not None:
1188             url = unescapeHTML(mobj.group('url'))
1189             return self.url_result(url, ie='Vulture')
1190
1191         # Look for embedded mtvservices player
1192         mobj = re.search(
1193             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1194             webpage)
1195         if mobj is not None:
1196             url = unescapeHTML(mobj.group('url'))
1197             return self.url_result(url, ie='MTVServicesEmbedded')
1198
1199         # Look for embedded yahoo player
1200         mobj = re.search(
1201             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1202             webpage)
1203         if mobj is not None:
1204             return self.url_result(mobj.group('url'), 'Yahoo')
1205
1206         # Look for embedded sbs.com.au player
1207         mobj = re.search(
1208             r'''(?x)
1209             (?:
1210                 <meta\s+property="og:video"\s+content=|
1211                 <iframe[^>]+?src=
1212             )
1213             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1214             webpage)
1215         if mobj is not None:
1216             return self.url_result(mobj.group('url'), 'SBS')
1217
1218         # Look for embedded Cinchcast player
1219         mobj = re.search(
1220             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1221             webpage)
1222         if mobj is not None:
1223             return self.url_result(mobj.group('url'), 'Cinchcast')
1224
1225         mobj = re.search(
1226             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1227             webpage)
1228         if mobj is not None:
1229             return self.url_result(mobj.group('url'), 'MLB')
1230
1231         mobj = re.search(
1232             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1233             webpage)
1234         if mobj is not None:
1235             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1236
1237         mobj = re.search(
1238             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1239             webpage)
1240         if mobj is not None:
1241             return self.url_result(mobj.group('url'), 'Livestream')
1242
1243         # Look for Zapiks embed
1244         mobj = re.search(
1245             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1246         if mobj is not None:
1247             return self.url_result(mobj.group('url'), 'Zapiks')
1248
1249         # Look for Kaltura embeds
1250         mobj = re.search(
1251             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1252         if mobj is not None:
1253             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1254
1255         # Look for Eagle.Platform embeds
1256         mobj = re.search(
1257             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1258         if mobj is not None:
1259             return self.url_result(mobj.group('url'), 'EaglePlatform')
1260
1261         # Look for ClipYou (uses Eagle.Platform) embeds
1262         mobj = re.search(
1263             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1264         if mobj is not None:
1265             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1266
1267         # Look for Pladform embeds
1268         mobj = re.search(
1269             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1270         if mobj is not None:
1271             return self.url_result(mobj.group('url'), 'Pladform')
1272
1273         # Look for 5min embeds
1274         mobj = re.search(
1275             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1276         if mobj is not None:
1277             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1278
1279         # Look for NBC Sports VPlayer embeds
1280         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1281         if nbc_sports_url:
1282             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1283
1284         # Look for UDN embeds
1285         mobj = re.search(
1286             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1287         if mobj is not None:
1288             return self.url_result(
1289                 url_infer_protocol(url, mobj.group('url')), 'UDNEmbed')
1290
1291         def check_video(vurl):
1292             if YoutubeIE.suitable(vurl):
1293                 return True
1294             vpath = compat_urlparse.urlparse(vurl).path
1295             vext = determine_ext(vpath)
1296             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1297
1298         def filter_video(urls):
1299             return list(filter(check_video, urls))
1300
1301         # Start with something easy: JW Player in SWFObject
1302         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1303         if not found:
1304             # Look for gorilla-vid style embedding
1305             found = filter_video(re.findall(r'''(?sx)
1306                 (?:
1307                     jw_plugins|
1308                     JWPlayerOptions|
1309                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1310                 )
1311                 .*?
1312                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1313         if not found:
1314             # Broaden the search a little bit
1315             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1316         if not found:
1317             # Broaden the findall a little bit: JWPlayer JS loader
1318             found = filter_video(re.findall(
1319                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1320         if not found:
1321             # Flow player
1322             found = filter_video(re.findall(r'''(?xs)
1323                 flowplayer\("[^"]+",\s*
1324                     \{[^}]+?\}\s*,
1325                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1326                         ["']?url["']?\s*:\s*["']([^"']+)["']
1327             ''', webpage))
1328         if not found:
1329             # Cinerama player
1330             found = re.findall(
1331                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1332         if not found:
1333             # Try to find twitter cards info
1334             found = filter_video(re.findall(
1335                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1336         if not found:
1337             # We look for Open Graph info:
1338             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1339             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1340             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1341             if m_video_type is not None:
1342                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1343         if not found:
1344             # HTML5 video
1345             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1346         if not found:
1347             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1348             found = re.search(
1349                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1350                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1351                 webpage)
1352             if not found:
1353                 # Look also in Refresh HTTP header
1354                 refresh_header = head_response.headers.get('Refresh')
1355                 if refresh_header:
1356                     found = re.search(REDIRECT_REGEX, refresh_header)
1357             if found:
1358                 new_url = found.group(1)
1359                 self.report_following_redirect(new_url)
1360                 return {
1361                     '_type': 'url',
1362                     'url': new_url,
1363                 }
1364         if not found:
1365             raise UnsupportedError(url)
1366
1367         entries = []
1368         for video_url in found:
1369             video_url = compat_urlparse.urljoin(url, video_url)
1370             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1371
1372             # Sometimes, jwplayer extraction will result in a YouTube URL
1373             if YoutubeIE.suitable(video_url):
1374                 entries.append(self.url_result(video_url, 'Youtube'))
1375                 continue
1376
1377             # here's a fun little line of code for you:
1378             video_id = os.path.splitext(video_id)[0]
1379
1380             entries.append({
1381                 'id': video_id,
1382                 'url': video_url,
1383                 'uploader': video_uploader,
1384                 'title': video_title,
1385                 'age_limit': age_limit,
1386             })
1387
1388         if len(entries) == 1:
1389             return entries[0]
1390         else:
1391             for num, e in enumerate(entries, start=1):
1392                 # 'url' results don't have a title
1393                 if e.get('title') is not None:
1394                     e['title'] = '%s (%d)' % (e['title'], num)
1395             return {
1396                 '_type': 'playlist',
1397                 'entries': entries,
1398             }