_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse,
  12     compat_urllib_parse_unquote,
  13     compat_urllib_request,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     orderedSet,
  24     parse_xml,
  25     smuggle_url,
  26     unescapeHTML,
  27     unified_strdate,
  28     unsmuggle_url,
  29     UnsupportedError,
  30     url_basename,
  31     xpath_text,
  32 )
  33 from .brightcove import BrightcoveIE
  34 from .nbc import NBCSportsVPlayerIE
  35 from .ooyala import OoyalaIE
  36 from .rutv import RUTVIE
  37 from .tvc import TVCIE
  38 from .sportbox import SportBoxEmbedIE
  39 from .smotri import SmotriIE
  40 from .myvi import MyviIE
  41 from .condenast import CondeNastIE
  42 from .udn import UDNEmbedIE
  43 from .senateisvp import SenateISVPIE
  44 from .bliptv import BlipTVIE
  45 from .svt import SVTIE
  46 from .pornhub import PornHubIE
  47 from .xhamster import XHamsterEmbedIE
  48 from .vimeo import VimeoIE
  49 from .dailymotion import DailymotionCloudIE
  50 from .onionstudios import OnionStudiosIE
  51 from .snagfilms import SnagFilmsEmbedIE
  52
  53
  54 class GenericIE(InfoExtractor):
  55     IE_DESC = 'Generic downloader that works on some sites'
  56     _VALID_URL = r'.*'
  57     IE_NAME = 'generic'
  58     _TESTS = [
  59         # Direct link to a video
  60         {
  61             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  62             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  63             'info_dict': {
  64                 'id': 'trailer',
  65                 'ext': 'mp4',
  66                 'title': 'trailer',
  67                 'upload_date': '20100513',
  68             }
  69         },
  70         # Direct link to media delivered compressed (until Accept-Encoding is *)
  71         {
  72             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  73             'md5': '128c42e68b13950268b648275386fc74',
  74             'info_dict': {
  75                 'id': 'FictionJunction-Parallel_Hearts',
  76                 'ext': 'flac',
  77                 'title': 'FictionJunction-Parallel_Hearts',
  78                 'upload_date': '20140522',
  79             },
  80             'expected_warnings': [
  81                 'URL could be a direct video link, returning it as such.'
  82             ]
  83         },
  84         # Direct download with broken HEAD
  85         {
  86             'url': 'http://ai-radio.org:8000/radio.opus',
  87             'info_dict': {
  88                 'id': 'radio',
  89                 'ext': 'opus',
  90                 'title': 'radio',
  91             },
  92             'params': {
  93                 'skip_download': True,  # infinite live stream
  94             },
  95             'expected_warnings': [
  96                 r'501.*Not Implemented'
  97             ],
  98         },
  99         # Direct link with incorrect MIME type
 100         {
 101             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 102             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 103             'info_dict': {
 104                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 105                 'id': '5_Lennart_Poettering_-_Systemd',
 106                 'ext': 'webm',
 107                 'title': '5_Lennart_Poettering_-_Systemd',
 108                 'upload_date': '20141120',
 109             },
 110             'expected_warnings': [
 111                 'URL could be a direct video link, returning it as such.'
 112             ]
 113         },
 114         # RSS feed
 115         {
 116             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 117             'info_dict': {
 118                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 119                 'title': 'Zero Punctuation',
 120                 'description': 're:.*groundbreaking video review series.*'
 121             },
 122             'playlist_mincount': 11,
 123         },
 124         # RSS feed with enclosure
 125         {
 126             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 127             'info_dict': {
 128                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 129                 'ext': 'm4v',
 130                 'upload_date': '20150228',
 131                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 132             }
 133         },
 134         # google redirect
 135         {
 136             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 137             'info_dict': {
 138                 'id': 'cmQHVoWB5FY',
 139                 'ext': 'mp4',
 140                 'upload_date': '20130224',
 141                 'uploader_id': 'TheVerge',
 142                 'description': 're:^Chris Ziegler takes a look at the\.*',
 143                 'uploader': 'The Verge',
 144                 'title': 'First Firefox OS phones side-by-side',
 145             },
 146             'params': {
 147                 'skip_download': False,
 148             }
 149         },
 150         {
 151             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 152             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 153             'info_dict': {
 154                 'id': '13601338388002',
 155                 'ext': 'mp4',
 156                 'uploader': 'www.hodiho.fr',
 157                 'title': 'R\u00e9gis plante sa Jeep',
 158             }
 159         },
 160         # bandcamp page with custom domain
 161         {
 162             'add_ie': ['Bandcamp'],
 163             'url': 'http://bronyrock.com/track/the-pony-mash',
 164             'info_dict': {
 165                 'id': '3235767654',
 166                 'ext': 'mp3',
 167                 'title': 'The Pony Mash',
 168                 'uploader': 'M_Pallante',
 169             },
 170             'skip': 'There is a limit of 200 free downloads / month for the test song',
 171         },
 172         # embedded brightcove video
 173         # it also tests brightcove videos that need to set the 'Referer' in the
 174         # http requests
 175         {
 176             'add_ie': ['Brightcove'],
 177             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 178             'info_dict': {
 179                 'id': '2765128793001',
 180                 'ext': 'mp4',
 181                 'title': 'Le cours de bourse : l’analyse technique',
 182                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 183                 'uploader': 'BFM BUSINESS',
 184             },
 185             'params': {
 186                 'skip_download': True,
 187             },
 188         },
 189         {
 190             # https://github.com/rg3/youtube-dl/issues/2253
 191             'url': 'http://bcove.me/i6nfkrc3',
 192             'md5': '0ba9446db037002366bab3b3eb30c88c',
 193             'info_dict': {
 194                 'id': '3101154703001',
 195                 'ext': 'mp4',
 196                 'title': 'Still no power',
 197                 'uploader': 'thestar.com',
 198                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 199             },
 200             'add_ie': ['Brightcove'],
 201         },
 202         {
 203             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 204             'md5': 'fb973ecf6e4a78a67453647444222983',
 205             'info_dict': {
 206                 'id': '3414141473001',
 207                 'ext': 'mp4',
 208                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 209                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 210                 'uploader': 'Championat',
 211             },
 212         },
 213         {
 214             # https://github.com/rg3/youtube-dl/issues/3541
 215             'add_ie': ['Brightcove'],
 216             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 217             'info_dict': {
 218                 'id': '3866516442001',
 219                 'ext': 'mp4',
 220                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 221                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 222                 'uploader': 'SBS Broadcasting',
 223             },
 224             'skip': 'Restricted to Netherlands',
 225             'params': {
 226                 'skip_download': True,  # m3u8 download
 227             },
 228         },
 229         # ooyala video
 230         {
 231             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 232             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 233             'info_dict': {
 234                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 235                 'ext': 'mp4',
 236                 'title': '2cc213299525360.mov',  # that's what we get
 237             },
 238             'add_ie': ['Ooyala'],
 239         },
 240         # multiple ooyala embeds on SBN network websites
 241         {
 242             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 243             'info_dict': {
 244                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 245                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 246             },
 247             'playlist_mincount': 3,
 248             'params': {
 249                 'skip_download': True,
 250             },
 251             'add_ie': ['Ooyala'],
 252         },
 253         # embed.ly video
 254         {
 255             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 256             'info_dict': {
 257                 'id': '9ODmcdjQcHQ',
 258                 'ext': 'mp4',
 259                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 260                 'upload_date': '20140225',
 261                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 262                 'uploader': 'Tested',
 263                 'uploader_id': 'testedcom',
 264             },
 265             # No need to test YoutubeIE here
 266             'params': {
 267                 'skip_download': True,
 268             },
 269         },
 270         # funnyordie embed
 271         {
 272             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 273             'info_dict': {
 274                 'id': '18e820ec3f',
 275                 'ext': 'mp4',
 276                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 277                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 278             },
 279         },
 280         # BBC iPlayer embeds
 281         {
 282             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 283             'info_dict': {
 284                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 285             },
 286             'playlist_mincount': 18,
 287         },
 288         # RUTV embed
 289         {
 290             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 291             'info_dict': {
 292                 'id': '776940',
 293                 'ext': 'mp4',
 294                 'title': 'Охотское море стало целиком российским',
 295                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 296             },
 297             'params': {
 298                 # m3u8 download
 299                 'skip_download': True,
 300             },
 301         },
 302         # TVC embed
 303         {
 304             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 305             'info_dict': {
 306                 'id': '55304',
 307                 'ext': 'mp4',
 308                 'title': 'Дошкольное воспитание',
 309             },
 310         },
 311         # SportBox embed
 312         {
 313             'url': 'http://www.vestifinance.ru/articles/25753',
 314             'info_dict': {
 315                 'id': '25753',
 316                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 317             },
 318             'playlist': [{
 319                 'info_dict': {
 320                     'id': '370908',
 321                     'title': 'Госзаказ. День 3',
 322                     'ext': 'mp4',
 323                 }
 324             }, {
 325                 'info_dict': {
 326                     'id': '370905',
 327                     'title': 'Госзаказ. День 2',
 328                     'ext': 'mp4',
 329                 }
 330             }, {
 331                 'info_dict': {
 332                     'id': '370902',
 333                     'title': 'Госзаказ. День 1',
 334                     'ext': 'mp4',
 335                 }
 336             }],
 337             'params': {
 338                 # m3u8 download
 339                 'skip_download': True,
 340             },
 341         },
 342         # Myvi.ru embed
 343         {
 344             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 345             'info_dict': {
 346                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 347                 'ext': 'mp4',
 348                 'title': 'Ужастики, русский трейлер (2015)',
 349                 'thumbnail': 're:^https?://.*\.jpg$',
 350                 'duration': 153,
 351             }
 352         },
 353         # XHamster embed
 354         {
 355             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 356             'info_dict': {
 357                 'id': 'showthread',
 358                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 359             },
 360             'playlist_mincount': 7,
 361         },
 362         # Embedded TED video
 363         {
 364             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 365             'md5': '65fdff94098e4a607385a60c5177c638',
 366             'info_dict': {
 367                 'id': '1969',
 368                 'ext': 'mp4',
 369                 'title': 'Hidden miracles of the natural world',
 370                 'uploader': 'Louie Schwartzberg',
 371                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 372             }
 373         },
 374         # Embeded Ustream video
 375         {
 376             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 377             'md5': '27b99cdb639c9b12a79bca876a073417',
 378             'info_dict': {
 379                 'id': '45734260',
 380                 'ext': 'flv',
 381                 'uploader': 'AU SPA:  The NSA and Privacy',
 382                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 383             }
 384         },
 385         # nowvideo embed hidden behind percent encoding
 386         {
 387             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 388             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 389             'info_dict': {
 390                 'id': '06e53103ca9aa',
 391                 'ext': 'flv',
 392                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 393                 'description': 'No description',
 394             },
 395         },
 396         # arte embed
 397         {
 398             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 399             'md5': '7653032cbb25bf6c80d80f217055fa43',
 400             'info_dict': {
 401                 'id': '048195-004_PLUS7-F',
 402                 'ext': 'flv',
 403                 'title': 'X:enius',
 404                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 405                 'upload_date': '20140320',
 406             },
 407             'params': {
 408                 'skip_download': 'Requires rtmpdump'
 409             }
 410         },
 411         # Condé Nast embed
 412         {
 413             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 414             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 415             'info_dict': {
 416                 'id': '53501be369702d3275860000',
 417                 'ext': 'mp4',
 418                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 419             }
 420         },
 421         # Dailymotion embed
 422         {
 423             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 424             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 425             'info_dict': {
 426                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 427                 'ext': 'mp4',
 428                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 429                 'uploader': 'Spi0n',
 430             },
 431             'add_ie': ['Dailymotion'],
 432         },
 433         # YouTube embed
 434         {
 435             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 436             'info_dict': {
 437                 'id': 'FXRb4ykk4S0',
 438                 'ext': 'mp4',
 439                 'title': 'The NBL Auction 2014',
 440                 'uploader': 'BADMINTON England',
 441                 'uploader_id': 'BADMINTONEvents',
 442                 'upload_date': '20140603',
 443                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 444             },
 445             'add_ie': ['Youtube'],
 446             'params': {
 447                 'skip_download': True,
 448             }
 449         },
 450         # MTVSercices embed
 451         {
 452             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 453             'md5': '35727f82f58c76d996fc188f9755b0d5',
 454             'info_dict': {
 455                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 456                 'ext': 'mp4',
 457                 'title': 'Review',
 458                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 459             },
 460         },
 461         # YouTube embed via <data-embed-url="">
 462         {
 463             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 464             'info_dict': {
 465                 'id': '4vAffPZIT44',
 466                 'ext': 'mp4',
 467                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 468                 'uploader': 'Gameloft',
 469                 'uploader_id': 'gameloft',
 470                 'upload_date': '20140828',
 471                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 472             },
 473             'params': {
 474                 'skip_download': True,
 475             }
 476         },
 477         # Camtasia studio
 478         {
 479             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 480             'playlist': [{
 481                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 482                 'info_dict': {
 483                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 484                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 485                     'ext': 'flv',
 486                     'duration': 2235.90,
 487                 }
 488             }, {
 489                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 490                 'info_dict': {
 491                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 492                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 493                     'ext': 'flv',
 494                     'duration': 2235.93,
 495                 }
 496             }],
 497             'info_dict': {
 498                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 499             }
 500         },
 501         # Flowplayer
 502         {
 503             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 504             'md5': '9d65602bf31c6e20014319c7d07fba27',
 505             'info_dict': {
 506                 'id': '5123ea6d5e5a7',
 507                 'ext': 'mp4',
 508                 'age_limit': 18,
 509                 'uploader': 'www.handjobhub.com',
 510                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 511             }
 512         },
 513         # Multiple brightcove videos
 514         # https://github.com/rg3/youtube-dl/issues/2283
 515         {
 516             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 517             'info_dict': {
 518                 'id': 'always-never',
 519                 'title': 'Always / Never - The New Yorker',
 520             },
 521             'playlist_count': 3,
 522             'params': {
 523                 'extract_flat': False,
 524                 'skip_download': True,
 525             }
 526         },
 527         # MLB embed
 528         {
 529             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 530             'md5': '96f09a37e44da40dd083e12d9a683327',
 531             'info_dict': {
 532                 'id': '33322633',
 533                 'ext': 'mp4',
 534                 'title': 'Ump changes call to ball',
 535                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 536                 'duration': 48,
 537                 'timestamp': 1401537900,
 538                 'upload_date': '20140531',
 539                 'thumbnail': 're:^https?://.*\.jpg$',
 540             },
 541         },
 542         # Wistia embed
 543         {
 544             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 545             'md5': '8788b683c777a5cf25621eaf286d0c23',
 546             'info_dict': {
 547                 'id': '1cfaf6b7ea',
 548                 'ext': 'mov',
 549                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 550                 'duration': 643.0,
 551                 'filesize': 182808282,
 552                 'uploader': 'education-portal.com',
 553             },
 554         },
 555         {
 556             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 557             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 558             'info_dict': {
 559                 'id': 'uxjb0lwrcz',
 560                 'ext': 'mp4',
 561                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 562                 'duration': 1715.0,
 563                 'uploader': 'thoughtworks.wistia.com',
 564             },
 565         },
 566         # Soundcloud embed
 567         {
 568             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 569             'info_dict': {
 570                 'id': '174391317',
 571                 'ext': 'mp3',
 572                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 573                 'uploader': 'Sophos Security',
 574                 'title': 'Chet Chat 171 - Oct 29, 2014',
 575                 'upload_date': '20141029',
 576             }
 577         },
 578         # Livestream embed
 579         {
 580             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 581             'info_dict': {
 582                 'id': '67864563',
 583                 'ext': 'flv',
 584                 'upload_date': '20141112',
 585                 'title': 'Rosetta #CometLanding webcast HL 10',
 586             }
 587         },
 588         # LazyYT
 589         {
 590             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 591             'info_dict': {
 592                 'id': '1986',
 593                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 594             },
 595             'playlist_mincount': 2,
 596         },
 597         # Cinchcast embed
 598         {
 599             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 600             'info_dict': {
 601                 'id': '7141703',
 602                 'ext': 'mp3',
 603                 'upload_date': '20141126',
 604                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 605             }
 606         },
 607         # Cinerama player
 608         {
 609             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 610             'info_dict': {
 611                 'id': '730m_DandD_1901_512k',
 612                 'ext': 'mp4',
 613                 'uploader': 'www.abc.net.au',
 614                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 615             }
 616         },
 617         # embedded viddler video
 618         {
 619             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 620             'info_dict': {
 621                 'id': '4d03aad9',
 622                 'ext': 'mp4',
 623                 'uploader': 'deadspin',
 624                 'title': 'WALL-TO-GORTAT',
 625                 'timestamp': 1422285291,
 626                 'upload_date': '20150126',
 627             },
 628             'add_ie': ['Viddler'],
 629         },
 630         # Libsyn embed
 631         {
 632             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 633             'info_dict': {
 634                 'id': '3377616',
 635                 'ext': 'mp3',
 636                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 637                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 638                 'upload_date': '20150220',
 639             },
 640         },
 641         # jwplayer YouTube
 642         {
 643             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 644             'info_dict': {
 645                 'id': 'Mrj4DVp2zeA',
 646                 'ext': 'mp4',
 647                 'upload_date': '20150212',
 648                 'uploader': 'The National Archives UK',
 649                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 650                 'uploader_id': 'NationalArchives08',
 651                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 652             },
 653         },
 654         # rtl.nl embed
 655         {
 656             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 657             'playlist_mincount': 5,
 658             'info_dict': {
 659                 'id': 'aanslagen-kopenhagen',
 660                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 661             }
 662         },
 663         # Zapiks embed
 664         {
 665             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 666             'info_dict': {
 667                 'id': '118046',
 668                 'ext': 'mp4',
 669                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 670             }
 671         },
 672         # Kaltura embed
 673         {
 674             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 675             'info_dict': {
 676                 'id': '1_eergr3h1',
 677                 'ext': 'mp4',
 678                 'upload_date': '20150226',
 679                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 680                 'timestamp': int,
 681                 'title': 'John Carlson Postgame 2/25/15',
 682             },
 683         },
 684         # Kaltura embed (different embed code)
 685         {
 686             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 687             'info_dict': {
 688                 'id': '1_a52wc67y',
 689                 'ext': 'flv',
 690                 'upload_date': '20150127',
 691                 'uploader_id': 'PremierMedia',
 692                 'timestamp': int,
 693                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 694             },
 695         },
 696         # Eagle.Platform embed (generic URL)
 697         {
 698             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 699             'info_dict': {
 700                 'id': '227304',
 701                 'ext': 'mp4',
 702                 'title': 'Навальный вышел на свободу',
 703                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 704                 'thumbnail': 're:^https?://.*\.jpg$',
 705                 'duration': 87,
 706                 'view_count': int,
 707                 'age_limit': 0,
 708             },
 709         },
 710         # ClipYou (Eagle.Platform) embed (custom URL)
 711         {
 712             'url': 'http://muz-tv.ru/play/7129/',
 713             'info_dict': {
 714                 'id': '12820',
 715                 'ext': 'mp4',
 716                 'title': "'O Sole Mio",
 717                 'thumbnail': 're:^https?://.*\.jpg$',
 718                 'duration': 216,
 719                 'view_count': int,
 720             },
 721         },
 722         # Pladform embed
 723         {
 724             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 725             'info_dict': {
 726                 'id': '100183293',
 727                 'ext': 'mp4',
 728                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 729                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 730                 'thumbnail': 're:^https?://.*\.jpg$',
 731                 'duration': 694,
 732                 'age_limit': 0,
 733             },
 734         },
 735         # Playwire embed
 736         {
 737             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 738             'info_dict': {
 739                 'id': '3519514',
 740                 'ext': 'mp4',
 741                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 742                 'thumbnail': 're:^https?://.*\.png$',
 743                 'duration': 45.115,
 744             },
 745         },
 746         # 5min embed
 747         {
 748             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 749             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 750             'info_dict': {
 751                 'id': '518726732',
 752                 'ext': 'mp4',
 753                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 754             },
 755         },
 756         # SVT embed
 757         {
 758             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 759             'info_dict': {
 760                 'id': '2900353',
 761                 'ext': 'flv',
 762                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 763                 'duration': 27,
 764                 'age_limit': 0,
 765             },
 766         },
 767         # Crooks and Liars embed
 768         {
 769             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 770             'info_dict': {
 771                 'id': '8RUoRhRi',
 772                 'ext': 'mp4',
 773                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 774                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 775                 'timestamp': 1428207000,
 776                 'upload_date': '20150405',
 777                 'uploader': 'Heather',
 778             },
 779         },
 780         # Crooks and Liars external embed
 781         {
 782             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 783             'info_dict': {
 784                 'id': 'MTE3MjUtMzQ2MzA',
 785                 'ext': 'mp4',
 786                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 787                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 788                 'timestamp': 1265032391,
 789                 'upload_date': '20100201',
 790                 'uploader': 'Heather',
 791             },
 792         },
 793         # NBC Sports vplayer embed
 794         {
 795             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 796             'info_dict': {
 797                 'id': 'ln7x1qSThw4k',
 798                 'ext': 'flv',
 799                 'title': "PFT Live: New leader in the 'new-look' defense",
 800                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 801             },
 802         },
 803         # UDN embed
 804         {
 805             'url': 'http://www.udn.com/news/story/7314/822787',
 806             'md5': 'fd2060e988c326991037b9aff9df21a6',
 807             'info_dict': {
 808                 'id': '300346',
 809                 'ext': 'mp4',
 810                 'title': '中一中男師變性 全校師生力挺',
 811                 'thumbnail': 're:^https?://.*\.jpg$',
 812             }
 813         },
 814         # Ooyala embed
 815         {
 816             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 817             'info_dict': {
 818                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 819                 'ext': 'mp4',
 820                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 821                 'title': 'This is what separates the Excel masters from the wannabes',
 822             },
 823             'params': {
 824                 # m3u8 downloads
 825                 'skip_download': True,
 826             }
 827         },
 828         # Contains a SMIL manifest
 829         {
 830             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 831             'info_dict': {
 832                 'id': 'file',
 833                 'ext': 'flv',
 834                 'title': '+ Football: Lottery Champions League Europe',
 835                 'uploader': 'www.telewebion.com',
 836             },
 837             'params': {
 838                 # rtmpe downloads
 839                 'skip_download': True,
 840             }
 841         },
 842         # Brightcove URL in single quotes
 843         {
 844             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
 845             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
 846             'info_dict': {
 847                 'id': '4255764656001',
 848                 'ext': 'mp4',
 849                 'title': 'SN Presents: Russell Martin, World Citizen',
 850                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
 851                 'uploader': 'Rogers Sportsnet',
 852             },
 853         },
 854         # Dailymotion Cloud video
 855         {
 856             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
 857             'md5': '49444254273501a64675a7e68c502681',
 858             'info_dict': {
 859                 'id': '5585de919473990de4bee11b',
 860                 'ext': 'mp4',
 861                 'title': 'Le débat',
 862                 'thumbnail': 're:^https?://.*\.jpe?g$',
 863             }
 864         },
 865         # OnionStudios embed
 866         {
 867             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
 868             'info_dict': {
 869                 'id': '2855',
 870                 'ext': 'mp4',
 871                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
 872                 'thumbnail': 're:^https?://.*\.jpe?g$',
 873                 'uploader': 'ClickHole',
 874                 'uploader_id': 'clickhole',
 875             }
 876         },
 877         # SnagFilms embed
 878         {
 879             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
 880             'info_dict': {
 881                 'id': '74849a00-85a9-11e1-9660-123139220831',
 882                 'ext': 'mp4',
 883                 'title': '#whilewewatch',
 884             }
 885         },
 886         # AdobeTVVideo embed
 887         {
 888             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
 889             'md5': '43662b577c018ad707a63766462b1e87',
 890             'info_dict': {
 891                 'id': '2456',
 892                 'ext': 'mp4',
 893                 'title': 'New experience with Acrobat DC',
 894                 'description': 'New experience with Acrobat DC',
 895                 'duration': 248.667,
 896             },
 897         }
 898     ]
 899
 900     def report_following_redirect(self, new_url):
 901         """Report information extraction."""
 902         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 903
 904     def _extract_rss(self, url, video_id, doc):
 905         playlist_title = doc.find('./channel/title').text
 906         playlist_desc_el = doc.find('./channel/description')
 907         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 908
 909         entries = []
 910         for it in doc.findall('./channel/item'):
 911             next_url = xpath_text(it, 'link', fatal=False)
 912             if not next_url:
 913                 enclosure_nodes = it.findall('./enclosure')
 914                 for e in enclosure_nodes:
 915                     next_url = e.attrib.get('url')
 916                     if next_url:
 917                         break
 918
 919             if not next_url:
 920                 continue
 921
 922             entries.append({
 923                 '_type': 'url',
 924                 'url': next_url,
 925                 'title': it.find('title').text,
 926             })
 927
 928         return {
 929             '_type': 'playlist',
 930             'id': url,
 931             'title': playlist_title,
 932             'description': playlist_desc,
 933             'entries': entries,
 934         }
 935
 936     def _extract_camtasia(self, url, video_id, webpage):
 937         """ Returns None if no camtasia video can be found. """
 938
 939         camtasia_cfg = self._search_regex(
 940             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 941             webpage, 'camtasia configuration file', default=None)
 942         if camtasia_cfg is None:
 943             return None
 944
 945         title = self._html_search_meta('DC.title', webpage, fatal=True)
 946
 947         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 948         camtasia_cfg = self._download_xml(
 949             camtasia_url, video_id,
 950             note='Downloading camtasia configuration',
 951             errnote='Failed to download camtasia configuration')
 952         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 953
 954         entries = []
 955         for n in fileset_node.getchildren():
 956             url_n = n.find('./uri')
 957             if url_n is None:
 958                 continue
 959
 960             entries.append({
 961                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 962                 'title': '%s - %s' % (title, n.tag),
 963                 'url': compat_urlparse.urljoin(url, url_n.text),
 964                 'duration': float_or_none(n.find('./duration').text),
 965             })
 966
 967         return {
 968             '_type': 'playlist',
 969             'entries': entries,
 970             'title': title,
 971         }
 972
 973     def _real_extract(self, url):
 974         if url.startswith('//'):
 975             return {
 976                 '_type': 'url',
 977                 'url': self.http_scheme() + url,
 978             }
 979
 980         parsed_url = compat_urlparse.urlparse(url)
 981         if not parsed_url.scheme:
 982             default_search = self._downloader.params.get('default_search')
 983             if default_search is None:
 984                 default_search = 'fixup_error'
 985
 986             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 987                 if '/' in url:
 988                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 989                     return self.url_result('http://' + url)
 990                 elif default_search != 'fixup_error':
 991                     if default_search == 'auto_warning':
 992                         if re.match(r'^(?:url|URL)$', url):
 993                             raise ExtractorError(
 994                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 995                                 expected=True)
 996                         else:
 997                             self._downloader.report_warning(
 998                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 999                     return self.url_result('ytsearch:' + url)
1000
1001             if default_search in ('error', 'fixup_error'):
1002                 raise ExtractorError(
1003                     '%r is not a valid URL. '
1004                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1005                     % (url, url), expected=True)
1006             else:
1007                 if ':' not in default_search:
1008                     default_search += ':'
1009                 return self.url_result(default_search + url)
1010
1011         url, smuggled_data = unsmuggle_url(url)
1012         force_videoid = None
1013         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1014         if smuggled_data and 'force_videoid' in smuggled_data:
1015             force_videoid = smuggled_data['force_videoid']
1016             video_id = force_videoid
1017         else:
1018             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1019
1020         self.to_screen('%s: Requesting header' % video_id)
1021
1022         head_req = HEADRequest(url)
1023         head_response = self._request_webpage(
1024             head_req, video_id,
1025             note=False, errnote='Could not send HEAD request to %s' % url,
1026             fatal=False)
1027
1028         if head_response is not False:
1029             # Check for redirect
1030             new_url = head_response.geturl()
1031             if url != new_url:
1032                 self.report_following_redirect(new_url)
1033                 if force_videoid:
1034                     new_url = smuggle_url(
1035                         new_url, {'force_videoid': force_videoid})
1036                 return self.url_result(new_url)
1037
1038         full_response = None
1039         if head_response is False:
1040             request = compat_urllib_request.Request(url)
1041             request.add_header('Accept-Encoding', '*')
1042             full_response = self._request_webpage(request, video_id)
1043             head_response = full_response
1044
1045         # Check for direct link to a video
1046         content_type = head_response.headers.get('Content-Type', '')
1047         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1048         if m:
1049             upload_date = unified_strdate(
1050                 head_response.headers.get('Last-Modified'))
1051             return {
1052                 'id': video_id,
1053                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1054                 'direct': True,
1055                 'formats': [{
1056                     'format_id': m.group('format_id'),
1057                     'url': url,
1058                     'vcodec': 'none' if m.group('type') == 'audio' else None
1059                 }],
1060                 'upload_date': upload_date,
1061             }
1062
1063         if not self._downloader.params.get('test', False) and not is_intentional:
1064             force = self._downloader.params.get('force_generic_extractor', False)
1065             self._downloader.report_warning(
1066                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1067
1068         if not full_response:
1069             request = compat_urllib_request.Request(url)
1070             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1071             # making it impossible to download only chunk of the file (yet we need only 512kB to
1072             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1073             # that will always result in downloading the whole file that is not desirable.
1074             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1075             # to accept raw bytes and being able to download only a chunk.
1076             # It may probably better to solve this by checking Content-Type for application/octet-stream
1077             # after HEAD request finishes, but not sure if we can rely on this.
1078             request.add_header('Accept-Encoding', '*')
1079             full_response = self._request_webpage(request, video_id)
1080
1081         # Maybe it's a direct link to a video?
1082         # Be careful not to download the whole thing!
1083         first_bytes = full_response.read(512)
1084         if not is_html(first_bytes):
1085             self._downloader.report_warning(
1086                 'URL could be a direct video link, returning it as such.')
1087             upload_date = unified_strdate(
1088                 head_response.headers.get('Last-Modified'))
1089             return {
1090                 'id': video_id,
1091                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1092                 'direct': True,
1093                 'url': url,
1094                 'upload_date': upload_date,
1095             }
1096
1097         webpage = self._webpage_read_content(
1098             full_response, url, video_id, prefix=first_bytes)
1099
1100         self.report_extraction(video_id)
1101
1102         # Is it an RSS feed?
1103         try:
1104             doc = parse_xml(webpage)
1105             if doc.tag == 'rss':
1106                 return self._extract_rss(url, video_id, doc)
1107         except compat_xml_parse_error:
1108             pass
1109
1110         # Is it a Camtasia project?
1111         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1112         if camtasia_res is not None:
1113             return camtasia_res
1114
1115         # Sometimes embedded video player is hidden behind percent encoding
1116         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1117         # Unescaping the whole page allows to handle those cases in a generic way
1118         webpage = compat_urllib_parse_unquote(webpage)
1119
1120         # it's tempting to parse this further, but you would
1121         # have to take into account all the variations like
1122         #   Video Title - Site Name
1123         #   Site Name | Video Title
1124         #   Video Title - Tagline | Site Name
1125         # and so on and so forth; it's just not practical
1126         video_title = self._html_search_regex(
1127             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1128             default='video')
1129
1130         # Try to detect age limit automatically
1131         age_limit = self._rta_search(webpage)
1132         # And then there are the jokers who advertise that they use RTA,
1133         # but actually don't.
1134         AGE_LIMIT_MARKERS = [
1135             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1136         ]
1137         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1138             age_limit = 18
1139
1140         # video uploader is domain name
1141         video_uploader = self._search_regex(
1142             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1143
1144         # Helper method
1145         def _playlist_from_matches(matches, getter=None, ie=None):
1146             urlrs = orderedSet(
1147                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1148                 for m in matches)
1149             return self.playlist_result(
1150                 urlrs, playlist_id=video_id, playlist_title=video_title)
1151
1152         # Look for BrightCove:
1153         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1154         if bc_urls:
1155             self.to_screen('Brightcove video detected.')
1156             entries = [{
1157                 '_type': 'url',
1158                 'url': smuggle_url(bc_url, {'Referer': url}),
1159                 'ie_key': 'Brightcove'
1160             } for bc_url in bc_urls]
1161
1162             return {
1163                 '_type': 'playlist',
1164                 'title': video_title,
1165                 'id': video_id,
1166                 'entries': entries,
1167             }
1168
1169         # Look for embedded rtl.nl player
1170         matches = re.findall(
1171             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1172             webpage)
1173         if matches:
1174             return _playlist_from_matches(matches, ie='RtlNl')
1175
1176         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1177         if vimeo_url is not None:
1178             return self.url_result(vimeo_url)
1179
1180         # Look for embedded YouTube player
1181         matches = re.findall(r'''(?x)
1182             (?:
1183                 <iframe[^>]+?src=|
1184                 data-video-url=|
1185                 <embed[^>]+?src=|
1186                 embedSWF\(?:\s*|
1187                 new\s+SWFObject\(
1188             )
1189             (["\'])
1190                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1191                 (?:embed|v|p)/.+?)
1192             \1''', webpage)
1193         if matches:
1194             return _playlist_from_matches(
1195                 matches, lambda m: unescapeHTML(m[1]))
1196
1197         # Look for lazyYT YouTube embed
1198         matches = re.findall(
1199             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1200         if matches:
1201             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1202
1203         # Look for embedded Dailymotion player
1204         matches = re.findall(
1205             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1206         if matches:
1207             return _playlist_from_matches(
1208                 matches, lambda m: unescapeHTML(m[1]))
1209
1210         # Look for embedded Dailymotion playlist player (#3822)
1211         m = re.search(
1212             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1213         if m:
1214             playlists = re.findall(
1215                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1216             if playlists:
1217                 return _playlist_from_matches(
1218                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1219
1220         # Look for embedded Wistia player
1221         match = re.search(
1222             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1223         if match:
1224             embed_url = self._proto_relative_url(
1225                 unescapeHTML(match.group('url')))
1226             return {
1227                 '_type': 'url_transparent',
1228                 'url': embed_url,
1229                 'ie_key': 'Wistia',
1230                 'uploader': video_uploader,
1231                 'title': video_title,
1232                 'id': video_id,
1233             }
1234
1235         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1236         if match:
1237             return {
1238                 '_type': 'url_transparent',
1239                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1240                 'ie_key': 'Wistia',
1241                 'uploader': video_uploader,
1242                 'title': video_title,
1243                 'id': match.group('id')
1244             }
1245
1246         # Look for embedded blip.tv player
1247         bliptv_url = BlipTVIE._extract_url(webpage)
1248         if bliptv_url:
1249             return self.url_result(bliptv_url, 'BlipTV')
1250
1251         # Look for SVT player
1252         svt_url = SVTIE._extract_url(webpage)
1253         if svt_url:
1254             return self.url_result(svt_url, 'SVT')
1255
1256         # Look for embedded condenast player
1257         matches = re.findall(
1258             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1259             webpage)
1260         if matches:
1261             return {
1262                 '_type': 'playlist',
1263                 'entries': [{
1264                     '_type': 'url',
1265                     'ie_key': 'CondeNast',
1266                     'url': ma,
1267                 } for ma in matches],
1268                 'title': video_title,
1269                 'id': video_id,
1270             }
1271
1272         # Look for Bandcamp pages with custom domain
1273         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1274         if mobj is not None:
1275             burl = unescapeHTML(mobj.group(1))
1276             # Don't set the extractor because it can be a track url or an album
1277             return self.url_result(burl)
1278
1279         # Look for embedded Vevo player
1280         mobj = re.search(
1281             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1282         if mobj is not None:
1283             return self.url_result(mobj.group('url'))
1284
1285         # Look for embedded Viddler player
1286         mobj = re.search(
1287             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1288             webpage)
1289         if mobj is not None:
1290             return self.url_result(mobj.group('url'))
1291
1292         # Look for NYTimes player
1293         mobj = re.search(
1294             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1295             webpage)
1296         if mobj is not None:
1297             return self.url_result(mobj.group('url'))
1298
1299         # Look for Libsyn player
1300         mobj = re.search(
1301             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1302         if mobj is not None:
1303             return self.url_result(mobj.group('url'))
1304
1305         # Look for Ooyala videos
1306         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1307                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1308                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1309                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1310         if mobj is not None:
1311             return OoyalaIE._build_url_result(mobj.group('ec'))
1312
1313         # Look for multiple Ooyala embeds on SBN network websites
1314         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1315         if mobj is not None:
1316             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1317             if embeds:
1318                 return _playlist_from_matches(
1319                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1320
1321         # Look for Aparat videos
1322         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1323         if mobj is not None:
1324             return self.url_result(mobj.group(1), 'Aparat')
1325
1326         # Look for MPORA videos
1327         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1328         if mobj is not None:
1329             return self.url_result(mobj.group(1), 'Mpora')
1330
1331         # Look for embedded NovaMov-based player
1332         mobj = re.search(
1333             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1334                     (?P<url>http://(?:(?:embed|www)\.)?
1335                         (?:novamov\.com|
1336                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1337                            videoweed\.(?:es|com)|
1338                            movshare\.(?:net|sx|ag)|
1339                            divxstage\.(?:eu|net|ch|co|at|ag))
1340                         /embed\.php.+?)\1''', webpage)
1341         if mobj is not None:
1342             return self.url_result(mobj.group('url'))
1343
1344         # Look for embedded Facebook player
1345         mobj = re.search(
1346             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1347         if mobj is not None:
1348             return self.url_result(mobj.group('url'), 'Facebook')
1349
1350         # Look for embedded VK player
1351         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1352         if mobj is not None:
1353             return self.url_result(mobj.group('url'), 'VK')
1354
1355         # Look for embedded ivi player
1356         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1357         if mobj is not None:
1358             return self.url_result(mobj.group('url'), 'Ivi')
1359
1360         # Look for embedded Huffington Post player
1361         mobj = re.search(
1362             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1363         if mobj is not None:
1364             return self.url_result(mobj.group('url'), 'HuffPost')
1365
1366         # Look for embed.ly
1367         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1368         if mobj is not None:
1369             return self.url_result(mobj.group('url'))
1370         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1371         if mobj is not None:
1372             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1373
1374         # Look for funnyordie embed
1375         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1376         if matches:
1377             return _playlist_from_matches(
1378                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1379
1380         # Look for BBC iPlayer embed
1381         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1382         if matches:
1383             return _playlist_from_matches(matches, ie='BBCCoUk')
1384
1385         # Look for embedded RUTV player
1386         rutv_url = RUTVIE._extract_url(webpage)
1387         if rutv_url:
1388             return self.url_result(rutv_url, 'RUTV')
1389
1390         # Look for embedded TVC player
1391         tvc_url = TVCIE._extract_url(webpage)
1392         if tvc_url:
1393             return self.url_result(tvc_url, 'TVC')
1394
1395         # Look for embedded SportBox player
1396         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1397         if sportbox_urls:
1398             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1399
1400         # Look for embedded PornHub player
1401         pornhub_url = PornHubIE._extract_url(webpage)
1402         if pornhub_url:
1403             return self.url_result(pornhub_url, 'PornHub')
1404
1405         # Look for embedded XHamster player
1406         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1407         if xhamster_urls:
1408             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1409
1410         # Look for embedded Tvigle player
1411         mobj = re.search(
1412             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1413         if mobj is not None:
1414             return self.url_result(mobj.group('url'), 'Tvigle')
1415
1416         # Look for embedded TED player
1417         mobj = re.search(
1418             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1419         if mobj is not None:
1420             return self.url_result(mobj.group('url'), 'TED')
1421
1422         # Look for embedded Ustream videos
1423         mobj = re.search(
1424             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1425         if mobj is not None:
1426             return self.url_result(mobj.group('url'), 'Ustream')
1427
1428         # Look for embedded arte.tv player
1429         mobj = re.search(
1430             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1431             webpage)
1432         if mobj is not None:
1433             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1434
1435         # Look for embedded smotri.com player
1436         smotri_url = SmotriIE._extract_url(webpage)
1437         if smotri_url:
1438             return self.url_result(smotri_url, 'Smotri')
1439
1440         # Look for embedded Myvi.ru player
1441         myvi_url = MyviIE._extract_url(webpage)
1442         if myvi_url:
1443             return self.url_result(myvi_url)
1444
1445         # Look for embeded soundcloud player
1446         mobj = re.search(
1447             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1448             webpage)
1449         if mobj is not None:
1450             url = unescapeHTML(mobj.group('url'))
1451             return self.url_result(url)
1452
1453         # Look for embedded vulture.com player
1454         mobj = re.search(
1455             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1456             webpage)
1457         if mobj is not None:
1458             url = unescapeHTML(mobj.group('url'))
1459             return self.url_result(url, ie='Vulture')
1460
1461         # Look for embedded mtvservices player
1462         mobj = re.search(
1463             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1464             webpage)
1465         if mobj is not None:
1466             url = unescapeHTML(mobj.group('url'))
1467             return self.url_result(url, ie='MTVServicesEmbedded')
1468
1469         # Look for embedded yahoo player
1470         mobj = re.search(
1471             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1472             webpage)
1473         if mobj is not None:
1474             return self.url_result(mobj.group('url'), 'Yahoo')
1475
1476         # Look for embedded sbs.com.au player
1477         mobj = re.search(
1478             r'''(?x)
1479             (?:
1480                 <meta\s+property="og:video"\s+content=|
1481                 <iframe[^>]+?src=
1482             )
1483             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1484             webpage)
1485         if mobj is not None:
1486             return self.url_result(mobj.group('url'), 'SBS')
1487
1488         # Look for embedded Cinchcast player
1489         mobj = re.search(
1490             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1491             webpage)
1492         if mobj is not None:
1493             return self.url_result(mobj.group('url'), 'Cinchcast')
1494
1495         mobj = re.search(
1496             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1497             webpage)
1498         if not mobj:
1499             mobj = re.search(
1500                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1501                 webpage)
1502         if mobj is not None:
1503             return self.url_result(mobj.group('url'), 'MLB')
1504
1505         mobj = re.search(
1506             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1507             webpage)
1508         if mobj is not None:
1509             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1510
1511         mobj = re.search(
1512             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1513             webpage)
1514         if mobj is not None:
1515             return self.url_result(mobj.group('url'), 'Livestream')
1516
1517         # Look for Zapiks embed
1518         mobj = re.search(
1519             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1520         if mobj is not None:
1521             return self.url_result(mobj.group('url'), 'Zapiks')
1522
1523         # Look for Kaltura embeds
1524         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
1525                 re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
1526         if mobj is not None:
1527             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1528
1529         # Look for Eagle.Platform embeds
1530         mobj = re.search(
1531             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1532         if mobj is not None:
1533             return self.url_result(mobj.group('url'), 'EaglePlatform')
1534
1535         # Look for ClipYou (uses Eagle.Platform) embeds
1536         mobj = re.search(
1537             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1538         if mobj is not None:
1539             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1540
1541         # Look for Pladform embeds
1542         mobj = re.search(
1543             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1544         if mobj is not None:
1545             return self.url_result(mobj.group('url'), 'Pladform')
1546
1547         # Look for Playwire embeds
1548         mobj = re.search(
1549             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1550         if mobj is not None:
1551             return self.url_result(mobj.group('url'))
1552
1553         # Look for 5min embeds
1554         mobj = re.search(
1555             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1556         if mobj is not None:
1557             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1558
1559         # Look for Crooks and Liars embeds
1560         mobj = re.search(
1561             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1562         if mobj is not None:
1563             return self.url_result(mobj.group('url'))
1564
1565         # Look for NBC Sports VPlayer embeds
1566         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1567         if nbc_sports_url:
1568             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1569
1570         # Look for UDN embeds
1571         mobj = re.search(
1572             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1573         if mobj is not None:
1574             return self.url_result(
1575                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1576
1577         # Look for Senate ISVP iframe
1578         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1579         if senate_isvp_url:
1580             return self.url_result(senate_isvp_url, 'SenateISVP')
1581
1582         # Look for Dailymotion Cloud videos
1583         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1584         if dmcloud_url:
1585             return self.url_result(dmcloud_url, 'DailymotionCloud')
1586
1587         # Look for OnionStudios embeds
1588         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1589         if onionstudios_url:
1590             return self.url_result(onionstudios_url)
1591
1592         # Look for SnagFilms embeds
1593         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1594         if snagfilms_url:
1595             return self.url_result(snagfilms_url)
1596
1597         # Look for AdobeTVVideo embeds
1598         mobj = re.search(
1599             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1600             webpage)
1601         if mobj is not None:
1602             return self.url_result(
1603                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1604                 'AdobeTVVideo')
1605
1606         def check_video(vurl):
1607             if YoutubeIE.suitable(vurl):
1608                 return True
1609             vpath = compat_urlparse.urlparse(vurl).path
1610             vext = determine_ext(vpath)
1611             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1612
1613         def filter_video(urls):
1614             return list(filter(check_video, urls))
1615
1616         # Start with something easy: JW Player in SWFObject
1617         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1618         if not found:
1619             # Look for gorilla-vid style embedding
1620             found = filter_video(re.findall(r'''(?sx)
1621                 (?:
1622                     jw_plugins|
1623                     JWPlayerOptions|
1624                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1625                 )
1626                 .*?
1627                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1628         if not found:
1629             # Broaden the search a little bit
1630             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1631         if not found:
1632             # Broaden the findall a little bit: JWPlayer JS loader
1633             found = filter_video(re.findall(
1634                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1635         if not found:
1636             # Flow player
1637             found = filter_video(re.findall(r'''(?xs)
1638                 flowplayer\("[^"]+",\s*
1639                     \{[^}]+?\}\s*,
1640                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1641                         ["']?url["']?\s*:\s*["']([^"']+)["']
1642             ''', webpage))
1643         if not found:
1644             # Cinerama player
1645             found = re.findall(
1646                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1647         if not found:
1648             # Try to find twitter cards info
1649             found = filter_video(re.findall(
1650                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1651         if not found:
1652             # We look for Open Graph info:
1653             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1654             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1655             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1656             if m_video_type is not None:
1657                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1658         if not found:
1659             # HTML5 video
1660             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1661         if not found:
1662             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1663             found = re.search(
1664                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1665                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1666                 webpage)
1667             if not found:
1668                 # Look also in Refresh HTTP header
1669                 refresh_header = head_response.headers.get('Refresh')
1670                 if refresh_header:
1671                     found = re.search(REDIRECT_REGEX, refresh_header)
1672             if found:
1673                 new_url = compat_urlparse.urljoin(url, found.group(1))
1674                 self.report_following_redirect(new_url)
1675                 return {
1676                     '_type': 'url',
1677                     'url': new_url,
1678                 }
1679         if not found:
1680             raise UnsupportedError(url)
1681
1682         entries = []
1683         for video_url in found:
1684             video_url = compat_urlparse.urljoin(url, video_url)
1685             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1686
1687             # Sometimes, jwplayer extraction will result in a YouTube URL
1688             if YoutubeIE.suitable(video_url):
1689                 entries.append(self.url_result(video_url, 'Youtube'))
1690                 continue
1691
1692             # here's a fun little line of code for you:
1693             video_id = os.path.splitext(video_id)[0]
1694
1695             if determine_ext(video_url) == 'smil':
1696                 entries.append({
1697                     'id': video_id,
1698                     'formats': self._extract_smil_formats(video_url, video_id),
1699                     'uploader': video_uploader,
1700                     'title': video_title,
1701                     'age_limit': age_limit,
1702                 })
1703             else:
1704                 entries.append({
1705                     'id': video_id,
1706                     'url': video_url,
1707                     'uploader': video_uploader,
1708                     'title': video_title,
1709                     'age_limit': age_limit,
1710                 })
1711
1712         if len(entries) == 1:
1713             return entries[0]
1714         else:
1715             for num, e in enumerate(entries, start=1):
1716                 # 'url' results don't have a title
1717                 if e.get('title') is not None:
1718                     e['title'] = '%s (%d)' % (e['title'], num)
1719             return {
1720                 '_type': 'playlist',
1721                 'entries': entries,
1722             }