_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse,
  12     compat_urllib_parse_unquote,
  13     compat_urllib_request,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     orderedSet,
  24     parse_xml,
  25     smuggle_url,
  26     unescapeHTML,
  27     unified_strdate,
  28     unsmuggle_url,
  29     UnsupportedError,
  30     url_basename,
  31     xpath_text,
  32 )
  33 from .brightcove import BrightcoveIE
  34 from .nbc import NBCSportsVPlayerIE
  35 from .ooyala import OoyalaIE
  36 from .rutv import RUTVIE
  37 from .sportbox import SportBoxEmbedIE
  38 from .smotri import SmotriIE
  39 from .condenast import CondeNastIE
  40 from .udn import UDNEmbedIE
  41 from .senateisvp import SenateISVPIE
  42 from .bliptv import BlipTVIE
  43 from .svt import SVTIE
  44
  45
  46 class GenericIE(InfoExtractor):
  47     IE_DESC = 'Generic downloader that works on some sites'
  48     _VALID_URL = r'.*'
  49     IE_NAME = 'generic'
  50     _TESTS = [
  51         {
  52             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
  53             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
  54             'info_dict': {
  55                 'id': '13601338388002',
  56                 'ext': 'mp4',
  57                 'uploader': 'www.hodiho.fr',
  58                 'title': 'R\u00e9gis plante sa Jeep',
  59             }
  60         },
  61         # bandcamp page with custom domain
  62         {
  63             'add_ie': ['Bandcamp'],
  64             'url': 'http://bronyrock.com/track/the-pony-mash',
  65             'info_dict': {
  66                 'id': '3235767654',
  67                 'ext': 'mp3',
  68                 'title': 'The Pony Mash',
  69                 'uploader': 'M_Pallante',
  70             },
  71             'skip': 'There is a limit of 200 free downloads / month for the test song',
  72         },
  73         # embedded brightcove video
  74         # it also tests brightcove videos that need to set the 'Referer' in the
  75         # http requests
  76         {
  77             'add_ie': ['Brightcove'],
  78             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
  79             'info_dict': {
  80                 'id': '2765128793001',
  81                 'ext': 'mp4',
  82                 'title': 'Le cours de bourse : l’analyse technique',
  83                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
  84                 'uploader': 'BFM BUSINESS',
  85             },
  86             'params': {
  87                 'skip_download': True,
  88             },
  89         },
  90         {
  91             # https://github.com/rg3/youtube-dl/issues/2253
  92             'url': 'http://bcove.me/i6nfkrc3',
  93             'md5': '0ba9446db037002366bab3b3eb30c88c',
  94             'info_dict': {
  95                 'id': '3101154703001',
  96                 'ext': 'mp4',
  97                 'title': 'Still no power',
  98                 'uploader': 'thestar.com',
  99                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 100             },
 101             'add_ie': ['Brightcove'],
 102         },
 103         {
 104             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 105             'md5': 'fb973ecf6e4a78a67453647444222983',
 106             'info_dict': {
 107                 'id': '3414141473001',
 108                 'ext': 'mp4',
 109                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 110                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 111                 'uploader': 'Championat',
 112             },
 113         },
 114         {
 115             # https://github.com/rg3/youtube-dl/issues/3541
 116             'add_ie': ['Brightcove'],
 117             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 118             'info_dict': {
 119                 'id': '3866516442001',
 120                 'ext': 'mp4',
 121                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 122                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 123                 'uploader': 'SBS Broadcasting',
 124             },
 125             'skip': 'Restricted to Netherlands',
 126             'params': {
 127                 'skip_download': True,  # m3u8 download
 128             },
 129         },
 130         # Direct link to a video
 131         {
 132             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
 133             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
 134             'info_dict': {
 135                 'id': 'trailer',
 136                 'ext': 'mp4',
 137                 'title': 'trailer',
 138                 'upload_date': '20100513',
 139             }
 140         },
 141         # Direct link to a media delivered compressed (requires Accept-Encoding == *)
 142         {
 143             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
 144             'md5': '128c42e68b13950268b648275386fc74',
 145             'info_dict': {
 146                 'id': 'FictionJunction-Parallel_Hearts',
 147                 'ext': 'flac',
 148                 'title': 'FictionJunction-Parallel_Hearts',
 149                 'upload_date': '20140522',
 150             },
 151             'expected_warnings': [
 152                 'URL could be a direct video link, returning it as such.'
 153             ]
 154         },
 155         # ooyala video
 156         {
 157             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 158             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 159             'info_dict': {
 160                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 161                 'ext': 'mp4',
 162                 'title': '2cc213299525360.mov',  # that's what we get
 163             },
 164             'add_ie': ['Ooyala'],
 165         },
 166         # multiple ooyala embeds on SBN network websites
 167         {
 168             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 169             'info_dict': {
 170                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 171                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 172             },
 173             'playlist_mincount': 3,
 174             'params': {
 175                 'skip_download': True,
 176             },
 177             'add_ie': ['Ooyala'],
 178         },
 179         # google redirect
 180         {
 181             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 182             'info_dict': {
 183                 'id': 'cmQHVoWB5FY',
 184                 'ext': 'mp4',
 185                 'upload_date': '20130224',
 186                 'uploader_id': 'TheVerge',
 187                 'description': 're:^Chris Ziegler takes a look at the\.*',
 188                 'uploader': 'The Verge',
 189                 'title': 'First Firefox OS phones side-by-side',
 190             },
 191             'params': {
 192                 'skip_download': False,
 193             }
 194         },
 195         # embed.ly video
 196         {
 197             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 198             'info_dict': {
 199                 'id': '9ODmcdjQcHQ',
 200                 'ext': 'mp4',
 201                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 202                 'upload_date': '20140225',
 203                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 204                 'uploader': 'Tested',
 205                 'uploader_id': 'testedcom',
 206             },
 207             # No need to test YoutubeIE here
 208             'params': {
 209                 'skip_download': True,
 210             },
 211         },
 212         # funnyordie embed
 213         {
 214             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 215             'info_dict': {
 216                 'id': '18e820ec3f',
 217                 'ext': 'mp4',
 218                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 219                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 220             },
 221         },
 222         # BBC iPlayer embeds
 223         {
 224             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 225             'info_dict': {
 226                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 227             },
 228             'playlist_mincount': 18,
 229         },
 230         # RUTV embed
 231         {
 232             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 233             'info_dict': {
 234                 'id': '776940',
 235                 'ext': 'mp4',
 236                 'title': 'Охотское море стало целиком российским',
 237                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 238             },
 239             'params': {
 240                 # m3u8 download
 241                 'skip_download': True,
 242             },
 243         },
 244         # SportBox embed
 245         {
 246             'url': 'http://www.vestifinance.ru/articles/25753',
 247             'info_dict': {
 248                 'id': '25753',
 249                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 250             },
 251             'playlist': [{
 252                 'info_dict': {
 253                     'id': '370908',
 254                     'title': 'Госзаказ. День 3',
 255                     'ext': 'mp4',
 256                 }
 257             }, {
 258                 'info_dict': {
 259                     'id': '370905',
 260                     'title': 'Госзаказ. День 2',
 261                     'ext': 'mp4',
 262                 }
 263             }, {
 264                 'info_dict': {
 265                     'id': '370902',
 266                     'title': 'Госзаказ. День 1',
 267                     'ext': 'mp4',
 268                 }
 269             }],
 270             'params': {
 271                 # m3u8 download
 272                 'skip_download': True,
 273             },
 274         },
 275         # Embedded TED video
 276         {
 277             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 278             'md5': '65fdff94098e4a607385a60c5177c638',
 279             'info_dict': {
 280                 'id': '1969',
 281                 'ext': 'mp4',
 282                 'title': 'Hidden miracles of the natural world',
 283                 'uploader': 'Louie Schwartzberg',
 284                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 285             }
 286         },
 287         # Embeded Ustream video
 288         {
 289             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 290             'md5': '27b99cdb639c9b12a79bca876a073417',
 291             'info_dict': {
 292                 'id': '45734260',
 293                 'ext': 'flv',
 294                 'uploader': 'AU SPA:  The NSA and Privacy',
 295                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 296             }
 297         },
 298         # nowvideo embed hidden behind percent encoding
 299         {
 300             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 301             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 302             'info_dict': {
 303                 'id': '06e53103ca9aa',
 304                 'ext': 'flv',
 305                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 306                 'description': 'No description',
 307             },
 308         },
 309         # arte embed
 310         {
 311             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 312             'md5': '7653032cbb25bf6c80d80f217055fa43',
 313             'info_dict': {
 314                 'id': '048195-004_PLUS7-F',
 315                 'ext': 'flv',
 316                 'title': 'X:enius',
 317                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 318                 'upload_date': '20140320',
 319             },
 320             'params': {
 321                 'skip_download': 'Requires rtmpdump'
 322             }
 323         },
 324         # Condé Nast embed
 325         {
 326             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 327             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 328             'info_dict': {
 329                 'id': '53501be369702d3275860000',
 330                 'ext': 'mp4',
 331                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 332             }
 333         },
 334         # Dailymotion embed
 335         {
 336             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 337             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 338             'info_dict': {
 339                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 340                 'ext': 'mp4',
 341                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 342                 'uploader': 'Spi0n',
 343             },
 344             'add_ie': ['Dailymotion'],
 345         },
 346         # YouTube embed
 347         {
 348             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 349             'info_dict': {
 350                 'id': 'FXRb4ykk4S0',
 351                 'ext': 'mp4',
 352                 'title': 'The NBL Auction 2014',
 353                 'uploader': 'BADMINTON England',
 354                 'uploader_id': 'BADMINTONEvents',
 355                 'upload_date': '20140603',
 356                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 357             },
 358             'add_ie': ['Youtube'],
 359             'params': {
 360                 'skip_download': True,
 361             }
 362         },
 363         # MTVSercices embed
 364         {
 365             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 366             'md5': '35727f82f58c76d996fc188f9755b0d5',
 367             'info_dict': {
 368                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 369                 'ext': 'mp4',
 370                 'title': 'Review',
 371                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 372             },
 373         },
 374         # YouTube embed via <data-embed-url="">
 375         {
 376             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 377             'info_dict': {
 378                 'id': '4vAffPZIT44',
 379                 'ext': 'mp4',
 380                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 381                 'uploader': 'Gameloft',
 382                 'uploader_id': 'gameloft',
 383                 'upload_date': '20140828',
 384                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 385             },
 386             'params': {
 387                 'skip_download': True,
 388             }
 389         },
 390         # Camtasia studio
 391         {
 392             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 393             'playlist': [{
 394                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 395                 'info_dict': {
 396                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 397                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 398                     'ext': 'flv',
 399                     'duration': 2235.90,
 400                 }
 401             }, {
 402                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 403                 'info_dict': {
 404                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 405                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 406                     'ext': 'flv',
 407                     'duration': 2235.93,
 408                 }
 409             }],
 410             'info_dict': {
 411                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 412             }
 413         },
 414         # Flowplayer
 415         {
 416             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 417             'md5': '9d65602bf31c6e20014319c7d07fba27',
 418             'info_dict': {
 419                 'id': '5123ea6d5e5a7',
 420                 'ext': 'mp4',
 421                 'age_limit': 18,
 422                 'uploader': 'www.handjobhub.com',
 423                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 424             }
 425         },
 426         # RSS feed
 427         {
 428             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 429             'info_dict': {
 430                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 431                 'title': 'Zero Punctuation',
 432                 'description': 're:.*groundbreaking video review series.*'
 433             },
 434             'playlist_mincount': 11,
 435         },
 436         # Multiple brightcove videos
 437         # https://github.com/rg3/youtube-dl/issues/2283
 438         {
 439             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 440             'info_dict': {
 441                 'id': 'always-never',
 442                 'title': 'Always / Never - The New Yorker',
 443             },
 444             'playlist_count': 3,
 445             'params': {
 446                 'extract_flat': False,
 447                 'skip_download': True,
 448             }
 449         },
 450         # MLB embed
 451         {
 452             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 453             'md5': '96f09a37e44da40dd083e12d9a683327',
 454             'info_dict': {
 455                 'id': '33322633',
 456                 'ext': 'mp4',
 457                 'title': 'Ump changes call to ball',
 458                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 459                 'duration': 48,
 460                 'timestamp': 1401537900,
 461                 'upload_date': '20140531',
 462                 'thumbnail': 're:^https?://.*\.jpg$',
 463             },
 464         },
 465         # Wistia embed
 466         {
 467             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 468             'md5': '8788b683c777a5cf25621eaf286d0c23',
 469             'info_dict': {
 470                 'id': '1cfaf6b7ea',
 471                 'ext': 'mov',
 472                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 473                 'duration': 643.0,
 474                 'filesize': 182808282,
 475                 'uploader': 'education-portal.com',
 476             },
 477         },
 478         {
 479             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 480             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 481             'info_dict': {
 482                 'id': 'uxjb0lwrcz',
 483                 'ext': 'mp4',
 484                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 485                 'duration': 1715.0,
 486                 'uploader': 'thoughtworks.wistia.com',
 487             },
 488         },
 489         # Direct download with broken HEAD
 490         {
 491             'url': 'http://ai-radio.org:8000/radio.opus',
 492             'info_dict': {
 493                 'id': 'radio',
 494                 'ext': 'opus',
 495                 'title': 'radio',
 496             },
 497             'params': {
 498                 'skip_download': True,  # infinite live stream
 499             },
 500             'expected_warnings': [
 501                 r'501.*Not Implemented'
 502             ],
 503         },
 504         # Soundcloud embed
 505         {
 506             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 507             'info_dict': {
 508                 'id': '174391317',
 509                 'ext': 'mp3',
 510                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 511                 'uploader': 'Sophos Security',
 512                 'title': 'Chet Chat 171 - Oct 29, 2014',
 513                 'upload_date': '20141029',
 514             }
 515         },
 516         # Livestream embed
 517         {
 518             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 519             'info_dict': {
 520                 'id': '67864563',
 521                 'ext': 'flv',
 522                 'upload_date': '20141112',
 523                 'title': 'Rosetta #CometLanding webcast HL 10',
 524             }
 525         },
 526         # LazyYT
 527         {
 528             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 529             'info_dict': {
 530                 'id': '1986',
 531                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 532             },
 533             'playlist_mincount': 2,
 534         },
 535         # Direct link with incorrect MIME type
 536         {
 537             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 538             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 539             'info_dict': {
 540                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 541                 'id': '5_Lennart_Poettering_-_Systemd',
 542                 'ext': 'webm',
 543                 'title': '5_Lennart_Poettering_-_Systemd',
 544                 'upload_date': '20141120',
 545             },
 546             'expected_warnings': [
 547                 'URL could be a direct video link, returning it as such.'
 548             ]
 549         },
 550         # Cinchcast embed
 551         {
 552             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 553             'info_dict': {
 554                 'id': '7141703',
 555                 'ext': 'mp3',
 556                 'upload_date': '20141126',
 557                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 558             }
 559         },
 560         # Cinerama player
 561         {
 562             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 563             'info_dict': {
 564                 'id': '730m_DandD_1901_512k',
 565                 'ext': 'mp4',
 566                 'uploader': 'www.abc.net.au',
 567                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 568             }
 569         },
 570         # embedded viddler video
 571         {
 572             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 573             'info_dict': {
 574                 'id': '4d03aad9',
 575                 'ext': 'mp4',
 576                 'uploader': 'deadspin',
 577                 'title': 'WALL-TO-GORTAT',
 578                 'timestamp': 1422285291,
 579                 'upload_date': '20150126',
 580             },
 581             'add_ie': ['Viddler'],
 582         },
 583         # Libsyn embed
 584         {
 585             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 586             'info_dict': {
 587                 'id': '3377616',
 588                 'ext': 'mp3',
 589                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 590                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 591                 'upload_date': '20150220',
 592             },
 593         },
 594         # jwplayer YouTube
 595         {
 596             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 597             'info_dict': {
 598                 'id': 'Mrj4DVp2zeA',
 599                 'ext': 'mp4',
 600                 'upload_date': '20150212',
 601                 'uploader': 'The National Archives UK',
 602                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 603                 'uploader_id': 'NationalArchives08',
 604                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 605             },
 606         },
 607         # rtl.nl embed
 608         {
 609             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 610             'playlist_mincount': 5,
 611             'info_dict': {
 612                 'id': 'aanslagen-kopenhagen',
 613                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 614             }
 615         },
 616         # Zapiks embed
 617         {
 618             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 619             'info_dict': {
 620                 'id': '118046',
 621                 'ext': 'mp4',
 622                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 623             }
 624         },
 625         # Kaltura embed
 626         {
 627             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 628             'info_dict': {
 629                 'id': '1_eergr3h1',
 630                 'ext': 'mp4',
 631                 'upload_date': '20150226',
 632                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 633                 'timestamp': int,
 634                 'title': 'John Carlson Postgame 2/25/15',
 635             },
 636         },
 637         # Eagle.Platform embed (generic URL)
 638         {
 639             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 640             'info_dict': {
 641                 'id': '227304',
 642                 'ext': 'mp4',
 643                 'title': 'Навальный вышел на свободу',
 644                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 645                 'thumbnail': 're:^https?://.*\.jpg$',
 646                 'duration': 87,
 647                 'view_count': int,
 648                 'age_limit': 0,
 649             },
 650         },
 651         # ClipYou (Eagle.Platform) embed (custom URL)
 652         {
 653             'url': 'http://muz-tv.ru/play/7129/',
 654             'info_dict': {
 655                 'id': '12820',
 656                 'ext': 'mp4',
 657                 'title': "'O Sole Mio",
 658                 'thumbnail': 're:^https?://.*\.jpg$',
 659                 'duration': 216,
 660                 'view_count': int,
 661             },
 662         },
 663         # Pladform embed
 664         {
 665             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 666             'info_dict': {
 667                 'id': '100183293',
 668                 'ext': 'mp4',
 669                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 670                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 671                 'thumbnail': 're:^https?://.*\.jpg$',
 672                 'duration': 694,
 673                 'age_limit': 0,
 674             },
 675         },
 676         # Playwire embed
 677         {
 678             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 679             'info_dict': {
 680                 'id': '3519514',
 681                 'ext': 'mp4',
 682                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 683                 'thumbnail': 're:^https?://.*\.png$',
 684                 'duration': 45.115,
 685             },
 686         },
 687         # 5min embed
 688         {
 689             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 690             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 691             'info_dict': {
 692                 'id': '518726732',
 693                 'ext': 'mp4',
 694                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 695             },
 696         },
 697         # SVT embed
 698         {
 699             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 700             'info_dict': {
 701                 'id': '2900353',
 702                 'ext': 'flv',
 703                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 704                 'duration': 27,
 705                 'age_limit': 0,
 706             },
 707         },
 708         # RSS feed with enclosure
 709         {
 710             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 711             'info_dict': {
 712                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 713                 'ext': 'm4v',
 714                 'upload_date': '20150228',
 715                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 716             }
 717         },
 718         # Crooks and Liars embed
 719         {
 720             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 721             'info_dict': {
 722                 'id': '8RUoRhRi',
 723                 'ext': 'mp4',
 724                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 725                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 726                 'timestamp': 1428207000,
 727                 'upload_date': '20150405',
 728                 'uploader': 'Heather',
 729             },
 730         },
 731         # Crooks and Liars external embed
 732         {
 733             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 734             'info_dict': {
 735                 'id': 'MTE3MjUtMzQ2MzA',
 736                 'ext': 'mp4',
 737                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 738                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 739                 'timestamp': 1265032391,
 740                 'upload_date': '20100201',
 741                 'uploader': 'Heather',
 742             },
 743         },
 744         # NBC Sports vplayer embed
 745         {
 746             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 747             'info_dict': {
 748                 'id': 'ln7x1qSThw4k',
 749                 'ext': 'flv',
 750                 'title': "PFT Live: New leader in the 'new-look' defense",
 751                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 752             },
 753         },
 754         # UDN embed
 755         {
 756             'url': 'http://www.udn.com/news/story/7314/822787',
 757             'md5': 'fd2060e988c326991037b9aff9df21a6',
 758             'info_dict': {
 759                 'id': '300346',
 760                 'ext': 'mp4',
 761                 'title': '中一中男師變性 全校師生力挺',
 762                 'thumbnail': 're:^https?://.*\.jpg$',
 763             }
 764         },
 765         # Ooyala embed
 766         {
 767             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 768             'info_dict': {
 769                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 770                 'ext': 'mp4',
 771                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 772                 'title': 'This is what separates the Excel masters from the wannabes',
 773             },
 774             'params': {
 775                 # m3u8 downloads
 776                 'skip_download': True,
 777             }
 778         },
 779         # Contains a SMIL manifest
 780         {
 781             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 782             'info_dict': {
 783                 'id': 'file',
 784                 'ext': 'flv',
 785                 'title': '+ Football: Lottery Champions League Europe',
 786                 'uploader': 'www.telewebion.com',
 787             },
 788             'params': {
 789                 # rtmpe downloads
 790                 'skip_download': True,
 791             }
 792         }
 793     ]
 794
 795     def report_following_redirect(self, new_url):
 796         """Report information extraction."""
 797         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 798
 799     def _extract_rss(self, url, video_id, doc):
 800         playlist_title = doc.find('./channel/title').text
 801         playlist_desc_el = doc.find('./channel/description')
 802         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 803
 804         entries = []
 805         for it in doc.findall('./channel/item'):
 806             next_url = xpath_text(it, 'link', fatal=False)
 807             if not next_url:
 808                 enclosure_nodes = it.findall('./enclosure')
 809                 for e in enclosure_nodes:
 810                     next_url = e.attrib.get('url')
 811                     if next_url:
 812                         break
 813
 814             if not next_url:
 815                 continue
 816
 817             entries.append({
 818                 '_type': 'url',
 819                 'url': next_url,
 820                 'title': it.find('title').text,
 821             })
 822
 823         return {
 824             '_type': 'playlist',
 825             'id': url,
 826             'title': playlist_title,
 827             'description': playlist_desc,
 828             'entries': entries,
 829         }
 830
 831     def _extract_camtasia(self, url, video_id, webpage):
 832         """ Returns None if no camtasia video can be found. """
 833
 834         camtasia_cfg = self._search_regex(
 835             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 836             webpage, 'camtasia configuration file', default=None)
 837         if camtasia_cfg is None:
 838             return None
 839
 840         title = self._html_search_meta('DC.title', webpage, fatal=True)
 841
 842         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 843         camtasia_cfg = self._download_xml(
 844             camtasia_url, video_id,
 845             note='Downloading camtasia configuration',
 846             errnote='Failed to download camtasia configuration')
 847         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 848
 849         entries = []
 850         for n in fileset_node.getchildren():
 851             url_n = n.find('./uri')
 852             if url_n is None:
 853                 continue
 854
 855             entries.append({
 856                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 857                 'title': '%s - %s' % (title, n.tag),
 858                 'url': compat_urlparse.urljoin(url, url_n.text),
 859                 'duration': float_or_none(n.find('./duration').text),
 860             })
 861
 862         return {
 863             '_type': 'playlist',
 864             'entries': entries,
 865             'title': title,
 866         }
 867
 868     def _real_extract(self, url):
 869         if url.startswith('//'):
 870             return {
 871                 '_type': 'url',
 872                 'url': self.http_scheme() + url,
 873             }
 874
 875         parsed_url = compat_urlparse.urlparse(url)
 876         if not parsed_url.scheme:
 877             default_search = self._downloader.params.get('default_search')
 878             if default_search is None:
 879                 default_search = 'fixup_error'
 880
 881             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 882                 if '/' in url:
 883                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 884                     return self.url_result('http://' + url)
 885                 elif default_search != 'fixup_error':
 886                     if default_search == 'auto_warning':
 887                         if re.match(r'^(?:url|URL)$', url):
 888                             raise ExtractorError(
 889                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 890                                 expected=True)
 891                         else:
 892                             self._downloader.report_warning(
 893                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 894                     return self.url_result('ytsearch:' + url)
 895
 896             if default_search in ('error', 'fixup_error'):
 897                 raise ExtractorError(
 898                     '%r is not a valid URL. '
 899                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 900                     % (url, url), expected=True)
 901             else:
 902                 if ':' not in default_search:
 903                     default_search += ':'
 904                 return self.url_result(default_search + url)
 905
 906         url, smuggled_data = unsmuggle_url(url)
 907         force_videoid = None
 908         is_intentional = smuggled_data and smuggled_data.get('to_generic')
 909         if smuggled_data and 'force_videoid' in smuggled_data:
 910             force_videoid = smuggled_data['force_videoid']
 911             video_id = force_videoid
 912         else:
 913             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
 914
 915         self.to_screen('%s: Requesting header' % video_id)
 916
 917         head_req = HEADRequest(url)
 918         head_response = self._request_webpage(
 919             head_req, video_id,
 920             note=False, errnote='Could not send HEAD request to %s' % url,
 921             fatal=False)
 922
 923         if head_response is not False:
 924             # Check for redirect
 925             new_url = head_response.geturl()
 926             if url != new_url:
 927                 self.report_following_redirect(new_url)
 928                 if force_videoid:
 929                     new_url = smuggle_url(
 930                         new_url, {'force_videoid': force_videoid})
 931                 return self.url_result(new_url)
 932
 933         full_response = None
 934         if head_response is False:
 935             request = compat_urllib_request.Request(url)
 936             request.add_header('Accept-Encoding', '*')
 937             full_response = self._request_webpage(request, video_id)
 938             head_response = full_response
 939
 940         # Check for direct link to a video
 941         content_type = head_response.headers.get('Content-Type', '')
 942         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
 943         if m:
 944             upload_date = unified_strdate(
 945                 head_response.headers.get('Last-Modified'))
 946             return {
 947                 'id': video_id,
 948                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
 949                 'direct': True,
 950                 'formats': [{
 951                     'format_id': m.group('format_id'),
 952                     'url': url,
 953                     'vcodec': 'none' if m.group('type') == 'audio' else None
 954                 }],
 955                 'upload_date': upload_date,
 956             }
 957
 958         if not self._downloader.params.get('test', False) and not is_intentional:
 959             self._downloader.report_warning('Falling back on generic information extractor.')
 960
 961         if not full_response:
 962             request = compat_urllib_request.Request(url)
 963             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
 964             # making it impossible to download only chunk of the file (yet we need only 512kB to
 965             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
 966             # that will always result in downloading the whole file that is not desirable.
 967             # Therefore for extraction pass we have to override Accept-Encoding to any in order
 968             # to accept raw bytes and being able to download only a chunk.
 969             # It may probably better to solve this by checking Content-Type for application/octet-stream
 970             # after HEAD request finishes, but not sure if we can rely on this.
 971             request.add_header('Accept-Encoding', '*')
 972             full_response = self._request_webpage(request, video_id)
 973
 974         # Maybe it's a direct link to a video?
 975         # Be careful not to download the whole thing!
 976         first_bytes = full_response.read(512)
 977         if not is_html(first_bytes):
 978             self._downloader.report_warning(
 979                 'URL could be a direct video link, returning it as such.')
 980             upload_date = unified_strdate(
 981                 head_response.headers.get('Last-Modified'))
 982             return {
 983                 'id': video_id,
 984                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
 985                 'direct': True,
 986                 'url': url,
 987                 'upload_date': upload_date,
 988             }
 989
 990         webpage = self._webpage_read_content(
 991             full_response, url, video_id, prefix=first_bytes)
 992
 993         self.report_extraction(video_id)
 994
 995         # Is it an RSS feed?
 996         try:
 997             doc = parse_xml(webpage)
 998             if doc.tag == 'rss':
 999                 return self._extract_rss(url, video_id, doc)
1000         except compat_xml_parse_error:
1001             pass
1002
1003         # Is it a Camtasia project?
1004         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1005         if camtasia_res is not None:
1006             return camtasia_res
1007
1008         # Sometimes embedded video player is hidden behind percent encoding
1009         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1010         # Unescaping the whole page allows to handle those cases in a generic way
1011         webpage = compat_urllib_parse.unquote(webpage)
1012
1013         # it's tempting to parse this further, but you would
1014         # have to take into account all the variations like
1015         #   Video Title - Site Name
1016         #   Site Name | Video Title
1017         #   Video Title - Tagline | Site Name
1018         # and so on and so forth; it's just not practical
1019         video_title = self._html_search_regex(
1020             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1021             default='video')
1022
1023         # Try to detect age limit automatically
1024         age_limit = self._rta_search(webpage)
1025         # And then there are the jokers who advertise that they use RTA,
1026         # but actually don't.
1027         AGE_LIMIT_MARKERS = [
1028             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1029         ]
1030         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1031             age_limit = 18
1032
1033         # video uploader is domain name
1034         video_uploader = self._search_regex(
1035             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1036
1037         # Helper method
1038         def _playlist_from_matches(matches, getter=None, ie=None):
1039             urlrs = orderedSet(
1040                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1041                 for m in matches)
1042             return self.playlist_result(
1043                 urlrs, playlist_id=video_id, playlist_title=video_title)
1044
1045         # Look for BrightCove:
1046         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1047         if bc_urls:
1048             self.to_screen('Brightcove video detected.')
1049             entries = [{
1050                 '_type': 'url',
1051                 'url': smuggle_url(bc_url, {'Referer': url}),
1052                 'ie_key': 'Brightcove'
1053             } for bc_url in bc_urls]
1054
1055             return {
1056                 '_type': 'playlist',
1057                 'title': video_title,
1058                 'id': video_id,
1059                 'entries': entries,
1060             }
1061
1062         # Look for embedded rtl.nl player
1063         matches = re.findall(
1064             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
1065             webpage)
1066         if matches:
1067             return _playlist_from_matches(matches, ie='RtlNl')
1068
1069         # Look for embedded (iframe) Vimeo player
1070         mobj = re.search(
1071             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
1072         if mobj:
1073             player_url = unescapeHTML(mobj.group('url'))
1074             surl = smuggle_url(player_url, {'Referer': url})
1075             return self.url_result(surl)
1076         # Look for embedded (swf embed) Vimeo player
1077         mobj = re.search(
1078             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1079         if mobj:
1080             return self.url_result(mobj.group(1))
1081
1082         # Look for embedded YouTube player
1083         matches = re.findall(r'''(?x)
1084             (?:
1085                 <iframe[^>]+?src=|
1086                 data-video-url=|
1087                 <embed[^>]+?src=|
1088                 embedSWF\(?:\s*|
1089                 new\s+SWFObject\(
1090             )
1091             (["\'])
1092                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1093                 (?:embed|v|p)/.+?)
1094             \1''', webpage)
1095         if matches:
1096             return _playlist_from_matches(
1097                 matches, lambda m: unescapeHTML(m[1]))
1098
1099         # Look for lazyYT YouTube embed
1100         matches = re.findall(
1101             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1102         if matches:
1103             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1104
1105         # Look for embedded Dailymotion player
1106         matches = re.findall(
1107             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1108         if matches:
1109             return _playlist_from_matches(
1110                 matches, lambda m: unescapeHTML(m[1]))
1111
1112         # Look for embedded Dailymotion playlist player (#3822)
1113         m = re.search(
1114             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1115         if m:
1116             playlists = re.findall(
1117                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1118             if playlists:
1119                 return _playlist_from_matches(
1120                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1121
1122         # Look for embedded Wistia player
1123         match = re.search(
1124             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1125         if match:
1126             embed_url = self._proto_relative_url(
1127                 unescapeHTML(match.group('url')))
1128             return {
1129                 '_type': 'url_transparent',
1130                 'url': embed_url,
1131                 'ie_key': 'Wistia',
1132                 'uploader': video_uploader,
1133                 'title': video_title,
1134                 'id': video_id,
1135             }
1136
1137         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1138         if match:
1139             return {
1140                 '_type': 'url_transparent',
1141                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1142                 'ie_key': 'Wistia',
1143                 'uploader': video_uploader,
1144                 'title': video_title,
1145                 'id': match.group('id')
1146             }
1147
1148         # Look for embedded blip.tv player
1149         bliptv_url = BlipTVIE._extract_url(webpage)
1150         if bliptv_url:
1151             return self.url_result(bliptv_url, 'BlipTV')
1152
1153         # Look for SVT player
1154         svt_url = SVTIE._extract_url(webpage)
1155         if svt_url:
1156             return self.url_result(svt_url, 'SVT')
1157
1158         # Look for embedded condenast player
1159         matches = re.findall(
1160             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1161             webpage)
1162         if matches:
1163             return {
1164                 '_type': 'playlist',
1165                 'entries': [{
1166                     '_type': 'url',
1167                     'ie_key': 'CondeNast',
1168                     'url': ma,
1169                 } for ma in matches],
1170                 'title': video_title,
1171                 'id': video_id,
1172             }
1173
1174         # Look for Bandcamp pages with custom domain
1175         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1176         if mobj is not None:
1177             burl = unescapeHTML(mobj.group(1))
1178             # Don't set the extractor because it can be a track url or an album
1179             return self.url_result(burl)
1180
1181         # Look for embedded Vevo player
1182         mobj = re.search(
1183             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1184         if mobj is not None:
1185             return self.url_result(mobj.group('url'))
1186
1187         # Look for embedded Viddler player
1188         mobj = re.search(
1189             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1190             webpage)
1191         if mobj is not None:
1192             return self.url_result(mobj.group('url'))
1193
1194         # Look for NYTimes player
1195         mobj = re.search(
1196             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1197             webpage)
1198         if mobj is not None:
1199             return self.url_result(mobj.group('url'))
1200
1201         # Look for Libsyn player
1202         mobj = re.search(
1203             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1204         if mobj is not None:
1205             return self.url_result(mobj.group('url'))
1206
1207         # Look for Ooyala videos
1208         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1209                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1210                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1211                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1212         if mobj is not None:
1213             return OoyalaIE._build_url_result(mobj.group('ec'))
1214
1215         # Look for multiple Ooyala embeds on SBN network websites
1216         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1217         if mobj is not None:
1218             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1219             if embeds:
1220                 return _playlist_from_matches(
1221                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1222
1223         # Look for Aparat videos
1224         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1225         if mobj is not None:
1226             return self.url_result(mobj.group(1), 'Aparat')
1227
1228         # Look for MPORA videos
1229         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1230         if mobj is not None:
1231             return self.url_result(mobj.group(1), 'Mpora')
1232
1233         # Look for embedded NovaMov-based player
1234         mobj = re.search(
1235             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1236                     (?P<url>http://(?:(?:embed|www)\.)?
1237                         (?:novamov\.com|
1238                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1239                            videoweed\.(?:es|com)|
1240                            movshare\.(?:net|sx|ag)|
1241                            divxstage\.(?:eu|net|ch|co|at|ag))
1242                         /embed\.php.+?)\1''', webpage)
1243         if mobj is not None:
1244             return self.url_result(mobj.group('url'))
1245
1246         # Look for embedded Facebook player
1247         mobj = re.search(
1248             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1249         if mobj is not None:
1250             return self.url_result(mobj.group('url'), 'Facebook')
1251
1252         # Look for embedded VK player
1253         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1254         if mobj is not None:
1255             return self.url_result(mobj.group('url'), 'VK')
1256
1257         # Look for embedded ivi player
1258         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1259         if mobj is not None:
1260             return self.url_result(mobj.group('url'), 'Ivi')
1261
1262         # Look for embedded Huffington Post player
1263         mobj = re.search(
1264             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1265         if mobj is not None:
1266             return self.url_result(mobj.group('url'), 'HuffPost')
1267
1268         # Look for embed.ly
1269         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1270         if mobj is not None:
1271             return self.url_result(mobj.group('url'))
1272         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1273         if mobj is not None:
1274             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1275
1276         # Look for funnyordie embed
1277         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1278         if matches:
1279             return _playlist_from_matches(
1280                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1281
1282         # Look for BBC iPlayer embed
1283         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1284         if matches:
1285             return _playlist_from_matches(matches, ie='BBCCoUk')
1286
1287         # Look for embedded RUTV player
1288         rutv_url = RUTVIE._extract_url(webpage)
1289         if rutv_url:
1290             return self.url_result(rutv_url, 'RUTV')
1291
1292         # Look for embedded SportBox player
1293         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1294         if sportbox_urls:
1295             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1296
1297         # Look for embedded TED player
1298         mobj = re.search(
1299             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1300         if mobj is not None:
1301             return self.url_result(mobj.group('url'), 'TED')
1302
1303         # Look for embedded Ustream videos
1304         mobj = re.search(
1305             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1306         if mobj is not None:
1307             return self.url_result(mobj.group('url'), 'Ustream')
1308
1309         # Look for embedded arte.tv player
1310         mobj = re.search(
1311             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1312             webpage)
1313         if mobj is not None:
1314             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1315
1316         # Look for embedded smotri.com player
1317         smotri_url = SmotriIE._extract_url(webpage)
1318         if smotri_url:
1319             return self.url_result(smotri_url, 'Smotri')
1320
1321         # Look for embeded soundcloud player
1322         mobj = re.search(
1323             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1324             webpage)
1325         if mobj is not None:
1326             url = unescapeHTML(mobj.group('url'))
1327             return self.url_result(url)
1328
1329         # Look for embedded vulture.com player
1330         mobj = re.search(
1331             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1332             webpage)
1333         if mobj is not None:
1334             url = unescapeHTML(mobj.group('url'))
1335             return self.url_result(url, ie='Vulture')
1336
1337         # Look for embedded mtvservices player
1338         mobj = re.search(
1339             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1340             webpage)
1341         if mobj is not None:
1342             url = unescapeHTML(mobj.group('url'))
1343             return self.url_result(url, ie='MTVServicesEmbedded')
1344
1345         # Look for embedded yahoo player
1346         mobj = re.search(
1347             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1348             webpage)
1349         if mobj is not None:
1350             return self.url_result(mobj.group('url'), 'Yahoo')
1351
1352         # Look for embedded sbs.com.au player
1353         mobj = re.search(
1354             r'''(?x)
1355             (?:
1356                 <meta\s+property="og:video"\s+content=|
1357                 <iframe[^>]+?src=
1358             )
1359             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1360             webpage)
1361         if mobj is not None:
1362             return self.url_result(mobj.group('url'), 'SBS')
1363
1364         # Look for embedded Cinchcast player
1365         mobj = re.search(
1366             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1367             webpage)
1368         if mobj is not None:
1369             return self.url_result(mobj.group('url'), 'Cinchcast')
1370
1371         mobj = re.search(
1372             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1373             webpage)
1374         if not mobj:
1375             mobj = re.search(
1376                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1377                 webpage)
1378         if mobj is not None:
1379             return self.url_result(mobj.group('url'), 'MLB')
1380
1381         mobj = re.search(
1382             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1383             webpage)
1384         if mobj is not None:
1385             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1386
1387         mobj = re.search(
1388             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1389             webpage)
1390         if mobj is not None:
1391             return self.url_result(mobj.group('url'), 'Livestream')
1392
1393         # Look for Zapiks embed
1394         mobj = re.search(
1395             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1396         if mobj is not None:
1397             return self.url_result(mobj.group('url'), 'Zapiks')
1398
1399         # Look for Kaltura embeds
1400         mobj = re.search(
1401             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1402         if mobj is not None:
1403             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1404
1405         # Look for Eagle.Platform embeds
1406         mobj = re.search(
1407             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1408         if mobj is not None:
1409             return self.url_result(mobj.group('url'), 'EaglePlatform')
1410
1411         # Look for ClipYou (uses Eagle.Platform) embeds
1412         mobj = re.search(
1413             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1414         if mobj is not None:
1415             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1416
1417         # Look for Pladform embeds
1418         mobj = re.search(
1419             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1420         if mobj is not None:
1421             return self.url_result(mobj.group('url'), 'Pladform')
1422
1423         # Look for Playwire embeds
1424         mobj = re.search(
1425             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1426         if mobj is not None:
1427             return self.url_result(mobj.group('url'))
1428
1429         # Look for 5min embeds
1430         mobj = re.search(
1431             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1432         if mobj is not None:
1433             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1434
1435         # Look for Crooks and Liars embeds
1436         mobj = re.search(
1437             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1438         if mobj is not None:
1439             return self.url_result(mobj.group('url'))
1440
1441         # Look for NBC Sports VPlayer embeds
1442         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1443         if nbc_sports_url:
1444             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1445
1446         # Look for UDN embeds
1447         mobj = re.search(
1448             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1449         if mobj is not None:
1450             return self.url_result(
1451                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1452
1453         # Look for Senate ISVP iframe
1454         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1455         if senate_isvp_url:
1456             return self.url_result(senate_isvp_url, 'SenateISVP')
1457
1458         def check_video(vurl):
1459             if YoutubeIE.suitable(vurl):
1460                 return True
1461             vpath = compat_urlparse.urlparse(vurl).path
1462             vext = determine_ext(vpath)
1463             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1464
1465         def filter_video(urls):
1466             return list(filter(check_video, urls))
1467
1468         # Start with something easy: JW Player in SWFObject
1469         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1470         if not found:
1471             # Look for gorilla-vid style embedding
1472             found = filter_video(re.findall(r'''(?sx)
1473                 (?:
1474                     jw_plugins|
1475                     JWPlayerOptions|
1476                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1477                 )
1478                 .*?
1479                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1480         if not found:
1481             # Broaden the search a little bit
1482             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1483         if not found:
1484             # Broaden the findall a little bit: JWPlayer JS loader
1485             found = filter_video(re.findall(
1486                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1487         if not found:
1488             # Flow player
1489             found = filter_video(re.findall(r'''(?xs)
1490                 flowplayer\("[^"]+",\s*
1491                     \{[^}]+?\}\s*,
1492                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1493                         ["']?url["']?\s*:\s*["']([^"']+)["']
1494             ''', webpage))
1495         if not found:
1496             # Cinerama player
1497             found = re.findall(
1498                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1499         if not found:
1500             # Try to find twitter cards info
1501             found = filter_video(re.findall(
1502                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1503         if not found:
1504             # We look for Open Graph info:
1505             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1506             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1507             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1508             if m_video_type is not None:
1509                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1510         if not found:
1511             # HTML5 video
1512             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1513         if not found:
1514             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1515             found = re.search(
1516                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1517                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1518                 webpage)
1519             if not found:
1520                 # Look also in Refresh HTTP header
1521                 refresh_header = head_response.headers.get('Refresh')
1522                 if refresh_header:
1523                     found = re.search(REDIRECT_REGEX, refresh_header)
1524             if found:
1525                 new_url = compat_urlparse.urljoin(url, found.group(1))
1526                 self.report_following_redirect(new_url)
1527                 return {
1528                     '_type': 'url',
1529                     'url': new_url,
1530                 }
1531         if not found:
1532             raise UnsupportedError(url)
1533
1534         entries = []
1535         for video_url in found:
1536             video_url = compat_urlparse.urljoin(url, video_url)
1537             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1538
1539             # Sometimes, jwplayer extraction will result in a YouTube URL
1540             if YoutubeIE.suitable(video_url):
1541                 entries.append(self.url_result(video_url, 'Youtube'))
1542                 continue
1543
1544             # here's a fun little line of code for you:
1545             video_id = os.path.splitext(video_id)[0]
1546
1547             if determine_ext(video_url) == 'smil':
1548                 entries.append({
1549                     'id': video_id,
1550                     'formats': self._extract_smil_formats(video_url, video_id),
1551                     'uploader': video_uploader,
1552                     'title': video_title,
1553                     'age_limit': age_limit,
1554                 })
1555             else:
1556                 entries.append({
1557                     'id': video_id,
1558                     'url': video_url,
1559                     'uploader': video_uploader,
1560                     'title': video_title,
1561                     'age_limit': age_limit,
1562                 })
1563
1564         if len(entries) == 1:
1565             return entries[0]
1566         else:
1567             for num, e in enumerate(entries, start=1):
1568                 # 'url' results don't have a title
1569                 if e.get('title') is not None:
1570                     e['title'] = '%s (%d)' % (e['title'], num)
1571             return {
1572                 '_type': 'playlist',
1573                 'entries': entries,
1574             }