_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse_unquote,
  12     compat_urllib_request,
  13     compat_urlparse,
  14     compat_xml_parse_error,
  15 )
  16 from ..utils import (
  17     determine_ext,
  18     ExtractorError,
  19     float_or_none,
  20     HEADRequest,
  21     is_html,
  22     orderedSet,
  23     parse_xml,
  24     smuggle_url,
  25     unescapeHTML,
  26     unified_strdate,
  27     unsmuggle_url,
  28     UnsupportedError,
  29     url_basename,
  30     xpath_text,
  31 )
  32 from .brightcove import BrightcoveIE
  33 from .nbc import NBCSportsVPlayerIE
  34 from .ooyala import OoyalaIE
  35 from .rutv import RUTVIE
  36 from .tvc import TVCIE
  37 from .sportbox import SportBoxEmbedIE
  38 from .smotri import SmotriIE
  39 from .myvi import MyviIE
  40 from .condenast import CondeNastIE
  41 from .udn import UDNEmbedIE
  42 from .senateisvp import SenateISVPIE
  43 from .bliptv import BlipTVIE
  44 from .svt import SVTIE
  45 from .pornhub import PornHubIE
  46 from .xhamster import XHamsterEmbedIE
  47 from .vimeo import VimeoIE
  48 from .dailymotion import DailymotionCloudIE
  49 from .onionstudios import OnionStudiosIE
  50 from .snagfilms import SnagFilmsEmbedIE
  51
  52
  53 class GenericIE(InfoExtractor):
  54     IE_DESC = 'Generic downloader that works on some sites'
  55     _VALID_URL = r'.*'
  56     IE_NAME = 'generic'
  57     _TESTS = [
  58         # Direct link to a video
  59         {
  60             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  61             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  62             'info_dict': {
  63                 'id': 'trailer',
  64                 'ext': 'mp4',
  65                 'title': 'trailer',
  66                 'upload_date': '20100513',
  67             }
  68         },
  69         # Direct link to media delivered compressed (until Accept-Encoding is *)
  70         {
  71             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  72             'md5': '128c42e68b13950268b648275386fc74',
  73             'info_dict': {
  74                 'id': 'FictionJunction-Parallel_Hearts',
  75                 'ext': 'flac',
  76                 'title': 'FictionJunction-Parallel_Hearts',
  77                 'upload_date': '20140522',
  78             },
  79             'expected_warnings': [
  80                 'URL could be a direct video link, returning it as such.'
  81             ]
  82         },
  83         # Direct download with broken HEAD
  84         {
  85             'url': 'http://ai-radio.org:8000/radio.opus',
  86             'info_dict': {
  87                 'id': 'radio',
  88                 'ext': 'opus',
  89                 'title': 'radio',
  90             },
  91             'params': {
  92                 'skip_download': True,  # infinite live stream
  93             },
  94             'expected_warnings': [
  95                 r'501.*Not Implemented'
  96             ],
  97         },
  98         # Direct link with incorrect MIME type
  99         {
 100             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 101             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 102             'info_dict': {
 103                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 104                 'id': '5_Lennart_Poettering_-_Systemd',
 105                 'ext': 'webm',
 106                 'title': '5_Lennart_Poettering_-_Systemd',
 107                 'upload_date': '20141120',
 108             },
 109             'expected_warnings': [
 110                 'URL could be a direct video link, returning it as such.'
 111             ]
 112         },
 113         # RSS feed
 114         {
 115             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 116             'info_dict': {
 117                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 118                 'title': 'Zero Punctuation',
 119                 'description': 're:.*groundbreaking video review series.*'
 120             },
 121             'playlist_mincount': 11,
 122         },
 123         # RSS feed with enclosure
 124         {
 125             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 126             'info_dict': {
 127                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 128                 'ext': 'm4v',
 129                 'upload_date': '20150228',
 130                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 131             }
 132         },
 133         # google redirect
 134         {
 135             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 136             'info_dict': {
 137                 'id': 'cmQHVoWB5FY',
 138                 'ext': 'mp4',
 139                 'upload_date': '20130224',
 140                 'uploader_id': 'TheVerge',
 141                 'description': 're:^Chris Ziegler takes a look at the\.*',
 142                 'uploader': 'The Verge',
 143                 'title': 'First Firefox OS phones side-by-side',
 144             },
 145             'params': {
 146                 'skip_download': False,
 147             }
 148         },
 149         {
 150             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 151             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 152             'info_dict': {
 153                 'id': '13601338388002',
 154                 'ext': 'mp4',
 155                 'uploader': 'www.hodiho.fr',
 156                 'title': 'R\u00e9gis plante sa Jeep',
 157             }
 158         },
 159         # bandcamp page with custom domain
 160         {
 161             'add_ie': ['Bandcamp'],
 162             'url': 'http://bronyrock.com/track/the-pony-mash',
 163             'info_dict': {
 164                 'id': '3235767654',
 165                 'ext': 'mp3',
 166                 'title': 'The Pony Mash',
 167                 'uploader': 'M_Pallante',
 168             },
 169             'skip': 'There is a limit of 200 free downloads / month for the test song',
 170         },
 171         # embedded brightcove video
 172         # it also tests brightcove videos that need to set the 'Referer' in the
 173         # http requests
 174         {
 175             'add_ie': ['Brightcove'],
 176             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 177             'info_dict': {
 178                 'id': '2765128793001',
 179                 'ext': 'mp4',
 180                 'title': 'Le cours de bourse : l’analyse technique',
 181                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 182                 'uploader': 'BFM BUSINESS',
 183             },
 184             'params': {
 185                 'skip_download': True,
 186             },
 187         },
 188         {
 189             # https://github.com/rg3/youtube-dl/issues/2253
 190             'url': 'http://bcove.me/i6nfkrc3',
 191             'md5': '0ba9446db037002366bab3b3eb30c88c',
 192             'info_dict': {
 193                 'id': '3101154703001',
 194                 'ext': 'mp4',
 195                 'title': 'Still no power',
 196                 'uploader': 'thestar.com',
 197                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 198             },
 199             'add_ie': ['Brightcove'],
 200         },
 201         {
 202             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 203             'md5': 'fb973ecf6e4a78a67453647444222983',
 204             'info_dict': {
 205                 'id': '3414141473001',
 206                 'ext': 'mp4',
 207                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 208                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 209                 'uploader': 'Championat',
 210             },
 211         },
 212         {
 213             # https://github.com/rg3/youtube-dl/issues/3541
 214             'add_ie': ['Brightcove'],
 215             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 216             'info_dict': {
 217                 'id': '3866516442001',
 218                 'ext': 'mp4',
 219                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 220                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 221                 'uploader': 'SBS Broadcasting',
 222             },
 223             'skip': 'Restricted to Netherlands',
 224             'params': {
 225                 'skip_download': True,  # m3u8 download
 226             },
 227         },
 228         # ooyala video
 229         {
 230             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 231             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 232             'info_dict': {
 233                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 234                 'ext': 'mp4',
 235                 'title': '2cc213299525360.mov',  # that's what we get
 236             },
 237             'add_ie': ['Ooyala'],
 238         },
 239         # multiple ooyala embeds on SBN network websites
 240         {
 241             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 242             'info_dict': {
 243                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 244                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 245             },
 246             'playlist_mincount': 3,
 247             'params': {
 248                 'skip_download': True,
 249             },
 250             'add_ie': ['Ooyala'],
 251         },
 252         # embed.ly video
 253         {
 254             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 255             'info_dict': {
 256                 'id': '9ODmcdjQcHQ',
 257                 'ext': 'mp4',
 258                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 259                 'upload_date': '20140225',
 260                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 261                 'uploader': 'Tested',
 262                 'uploader_id': 'testedcom',
 263             },
 264             # No need to test YoutubeIE here
 265             'params': {
 266                 'skip_download': True,
 267             },
 268         },
 269         # funnyordie embed
 270         {
 271             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 272             'info_dict': {
 273                 'id': '18e820ec3f',
 274                 'ext': 'mp4',
 275                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 276                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 277             },
 278         },
 279         # BBC iPlayer embeds
 280         {
 281             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 282             'info_dict': {
 283                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 284             },
 285             'playlist_mincount': 18,
 286         },
 287         # RUTV embed
 288         {
 289             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 290             'info_dict': {
 291                 'id': '776940',
 292                 'ext': 'mp4',
 293                 'title': 'Охотское море стало целиком российским',
 294                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 295             },
 296             'params': {
 297                 # m3u8 download
 298                 'skip_download': True,
 299             },
 300         },
 301         # TVC embed
 302         {
 303             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 304             'info_dict': {
 305                 'id': '55304',
 306                 'ext': 'mp4',
 307                 'title': 'Дошкольное воспитание',
 308             },
 309         },
 310         # SportBox embed
 311         {
 312             'url': 'http://www.vestifinance.ru/articles/25753',
 313             'info_dict': {
 314                 'id': '25753',
 315                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 316             },
 317             'playlist': [{
 318                 'info_dict': {
 319                     'id': '370908',
 320                     'title': 'Госзаказ. День 3',
 321                     'ext': 'mp4',
 322                 }
 323             }, {
 324                 'info_dict': {
 325                     'id': '370905',
 326                     'title': 'Госзаказ. День 2',
 327                     'ext': 'mp4',
 328                 }
 329             }, {
 330                 'info_dict': {
 331                     'id': '370902',
 332                     'title': 'Госзаказ. День 1',
 333                     'ext': 'mp4',
 334                 }
 335             }],
 336             'params': {
 337                 # m3u8 download
 338                 'skip_download': True,
 339             },
 340         },
 341         # Myvi.ru embed
 342         {
 343             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 344             'info_dict': {
 345                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 346                 'ext': 'mp4',
 347                 'title': 'Ужастики, русский трейлер (2015)',
 348                 'thumbnail': 're:^https?://.*\.jpg$',
 349                 'duration': 153,
 350             }
 351         },
 352         # XHamster embed
 353         {
 354             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 355             'info_dict': {
 356                 'id': 'showthread',
 357                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 358             },
 359             'playlist_mincount': 7,
 360         },
 361         # Embedded TED video
 362         {
 363             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 364             'md5': '65fdff94098e4a607385a60c5177c638',
 365             'info_dict': {
 366                 'id': '1969',
 367                 'ext': 'mp4',
 368                 'title': 'Hidden miracles of the natural world',
 369                 'uploader': 'Louie Schwartzberg',
 370                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 371             }
 372         },
 373         # Embeded Ustream video
 374         {
 375             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 376             'md5': '27b99cdb639c9b12a79bca876a073417',
 377             'info_dict': {
 378                 'id': '45734260',
 379                 'ext': 'flv',
 380                 'uploader': 'AU SPA:  The NSA and Privacy',
 381                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 382             }
 383         },
 384         # nowvideo embed hidden behind percent encoding
 385         {
 386             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 387             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 388             'info_dict': {
 389                 'id': '06e53103ca9aa',
 390                 'ext': 'flv',
 391                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 392                 'description': 'No description',
 393             },
 394         },
 395         # arte embed
 396         {
 397             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 398             'md5': '7653032cbb25bf6c80d80f217055fa43',
 399             'info_dict': {
 400                 'id': '048195-004_PLUS7-F',
 401                 'ext': 'flv',
 402                 'title': 'X:enius',
 403                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 404                 'upload_date': '20140320',
 405             },
 406             'params': {
 407                 'skip_download': 'Requires rtmpdump'
 408             }
 409         },
 410         # francetv embed
 411         {
 412             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 413             'info_dict': {
 414                 'id': 'EV_30231',
 415                 'ext': 'mp4',
 416                 'title': 'Alcaline, le concert avec Calogero',
 417                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 418                 'upload_date': '20150226',
 419                 'timestamp': 1424989860,
 420                 'duration': 5400,
 421             },
 422             'params': {
 423                 # m3u8 downloads
 424                 'skip_download': True,
 425             },
 426             'expected_warnings': [
 427                 'Forbidden'
 428             ]
 429         },
 430         # Condé Nast embed
 431         {
 432             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 433             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 434             'info_dict': {
 435                 'id': '53501be369702d3275860000',
 436                 'ext': 'mp4',
 437                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 438             }
 439         },
 440         # Dailymotion embed
 441         {
 442             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 443             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 444             'info_dict': {
 445                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 446                 'ext': 'mp4',
 447                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 448                 'uploader': 'Spi0n',
 449             },
 450             'add_ie': ['Dailymotion'],
 451         },
 452         # YouTube embed
 453         {
 454             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 455             'info_dict': {
 456                 'id': 'FXRb4ykk4S0',
 457                 'ext': 'mp4',
 458                 'title': 'The NBL Auction 2014',
 459                 'uploader': 'BADMINTON England',
 460                 'uploader_id': 'BADMINTONEvents',
 461                 'upload_date': '20140603',
 462                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 463             },
 464             'add_ie': ['Youtube'],
 465             'params': {
 466                 'skip_download': True,
 467             }
 468         },
 469         # MTVSercices embed
 470         {
 471             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 472             'md5': '35727f82f58c76d996fc188f9755b0d5',
 473             'info_dict': {
 474                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 475                 'ext': 'mp4',
 476                 'title': 'Review',
 477                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 478             },
 479         },
 480         # YouTube embed via <data-embed-url="">
 481         {
 482             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 483             'info_dict': {
 484                 'id': '4vAffPZIT44',
 485                 'ext': 'mp4',
 486                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 487                 'uploader': 'Gameloft',
 488                 'uploader_id': 'gameloft',
 489                 'upload_date': '20140828',
 490                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 491             },
 492             'params': {
 493                 'skip_download': True,
 494             }
 495         },
 496         # Camtasia studio
 497         {
 498             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 499             'playlist': [{
 500                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 501                 'info_dict': {
 502                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 503                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 504                     'ext': 'flv',
 505                     'duration': 2235.90,
 506                 }
 507             }, {
 508                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 509                 'info_dict': {
 510                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 511                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 512                     'ext': 'flv',
 513                     'duration': 2235.93,
 514                 }
 515             }],
 516             'info_dict': {
 517                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 518             }
 519         },
 520         # Flowplayer
 521         {
 522             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 523             'md5': '9d65602bf31c6e20014319c7d07fba27',
 524             'info_dict': {
 525                 'id': '5123ea6d5e5a7',
 526                 'ext': 'mp4',
 527                 'age_limit': 18,
 528                 'uploader': 'www.handjobhub.com',
 529                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 530             }
 531         },
 532         # Multiple brightcove videos
 533         # https://github.com/rg3/youtube-dl/issues/2283
 534         {
 535             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 536             'info_dict': {
 537                 'id': 'always-never',
 538                 'title': 'Always / Never - The New Yorker',
 539             },
 540             'playlist_count': 3,
 541             'params': {
 542                 'extract_flat': False,
 543                 'skip_download': True,
 544             }
 545         },
 546         # MLB embed
 547         {
 548             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 549             'md5': '96f09a37e44da40dd083e12d9a683327',
 550             'info_dict': {
 551                 'id': '33322633',
 552                 'ext': 'mp4',
 553                 'title': 'Ump changes call to ball',
 554                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 555                 'duration': 48,
 556                 'timestamp': 1401537900,
 557                 'upload_date': '20140531',
 558                 'thumbnail': 're:^https?://.*\.jpg$',
 559             },
 560         },
 561         # Wistia embed
 562         {
 563             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 564             'md5': '8788b683c777a5cf25621eaf286d0c23',
 565             'info_dict': {
 566                 'id': '1cfaf6b7ea',
 567                 'ext': 'mov',
 568                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 569                 'duration': 643.0,
 570                 'filesize': 182808282,
 571                 'uploader': 'education-portal.com',
 572             },
 573         },
 574         {
 575             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 576             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 577             'info_dict': {
 578                 'id': 'uxjb0lwrcz',
 579                 'ext': 'mp4',
 580                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 581                 'duration': 1715.0,
 582                 'uploader': 'thoughtworks.wistia.com',
 583             },
 584         },
 585         # Soundcloud embed
 586         {
 587             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 588             'info_dict': {
 589                 'id': '174391317',
 590                 'ext': 'mp3',
 591                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 592                 'uploader': 'Sophos Security',
 593                 'title': 'Chet Chat 171 - Oct 29, 2014',
 594                 'upload_date': '20141029',
 595             }
 596         },
 597         # Livestream embed
 598         {
 599             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 600             'info_dict': {
 601                 'id': '67864563',
 602                 'ext': 'flv',
 603                 'upload_date': '20141112',
 604                 'title': 'Rosetta #CometLanding webcast HL 10',
 605             }
 606         },
 607         # LazyYT
 608         {
 609             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 610             'info_dict': {
 611                 'id': '1986',
 612                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 613             },
 614             'playlist_mincount': 2,
 615         },
 616         # Cinchcast embed
 617         {
 618             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 619             'info_dict': {
 620                 'id': '7141703',
 621                 'ext': 'mp3',
 622                 'upload_date': '20141126',
 623                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 624             }
 625         },
 626         # Cinerama player
 627         {
 628             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 629             'info_dict': {
 630                 'id': '730m_DandD_1901_512k',
 631                 'ext': 'mp4',
 632                 'uploader': 'www.abc.net.au',
 633                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 634             }
 635         },
 636         # embedded viddler video
 637         {
 638             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 639             'info_dict': {
 640                 'id': '4d03aad9',
 641                 'ext': 'mp4',
 642                 'uploader': 'deadspin',
 643                 'title': 'WALL-TO-GORTAT',
 644                 'timestamp': 1422285291,
 645                 'upload_date': '20150126',
 646             },
 647             'add_ie': ['Viddler'],
 648         },
 649         # Libsyn embed
 650         {
 651             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 652             'info_dict': {
 653                 'id': '3377616',
 654                 'ext': 'mp3',
 655                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 656                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 657                 'upload_date': '20150220',
 658             },
 659         },
 660         # jwplayer YouTube
 661         {
 662             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 663             'info_dict': {
 664                 'id': 'Mrj4DVp2zeA',
 665                 'ext': 'mp4',
 666                 'upload_date': '20150212',
 667                 'uploader': 'The National Archives UK',
 668                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 669                 'uploader_id': 'NationalArchives08',
 670                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 671             },
 672         },
 673         # rtl.nl embed
 674         {
 675             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 676             'playlist_mincount': 5,
 677             'info_dict': {
 678                 'id': 'aanslagen-kopenhagen',
 679                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 680             }
 681         },
 682         # Zapiks embed
 683         {
 684             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 685             'info_dict': {
 686                 'id': '118046',
 687                 'ext': 'mp4',
 688                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 689             }
 690         },
 691         # Kaltura embed
 692         {
 693             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 694             'info_dict': {
 695                 'id': '1_eergr3h1',
 696                 'ext': 'mp4',
 697                 'upload_date': '20150226',
 698                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 699                 'timestamp': int,
 700                 'title': 'John Carlson Postgame 2/25/15',
 701             },
 702         },
 703         # Kaltura embed (different embed code)
 704         {
 705             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 706             'info_dict': {
 707                 'id': '1_a52wc67y',
 708                 'ext': 'flv',
 709                 'upload_date': '20150127',
 710                 'uploader_id': 'PremierMedia',
 711                 'timestamp': int,
 712                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 713             },
 714         },
 715         # Eagle.Platform embed (generic URL)
 716         {
 717             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 718             'info_dict': {
 719                 'id': '227304',
 720                 'ext': 'mp4',
 721                 'title': 'Навальный вышел на свободу',
 722                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 723                 'thumbnail': 're:^https?://.*\.jpg$',
 724                 'duration': 87,
 725                 'view_count': int,
 726                 'age_limit': 0,
 727             },
 728         },
 729         # ClipYou (Eagle.Platform) embed (custom URL)
 730         {
 731             'url': 'http://muz-tv.ru/play/7129/',
 732             'info_dict': {
 733                 'id': '12820',
 734                 'ext': 'mp4',
 735                 'title': "'O Sole Mio",
 736                 'thumbnail': 're:^https?://.*\.jpg$',
 737                 'duration': 216,
 738                 'view_count': int,
 739             },
 740         },
 741         # Pladform embed
 742         {
 743             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 744             'info_dict': {
 745                 'id': '100183293',
 746                 'ext': 'mp4',
 747                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 748                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 749                 'thumbnail': 're:^https?://.*\.jpg$',
 750                 'duration': 694,
 751                 'age_limit': 0,
 752             },
 753         },
 754         # Playwire embed
 755         {
 756             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 757             'info_dict': {
 758                 'id': '3519514',
 759                 'ext': 'mp4',
 760                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 761                 'thumbnail': 're:^https?://.*\.png$',
 762                 'duration': 45.115,
 763             },
 764         },
 765         # 5min embed
 766         {
 767             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 768             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 769             'info_dict': {
 770                 'id': '518726732',
 771                 'ext': 'mp4',
 772                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 773             },
 774         },
 775         # SVT embed
 776         {
 777             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 778             'info_dict': {
 779                 'id': '2900353',
 780                 'ext': 'flv',
 781                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 782                 'duration': 27,
 783                 'age_limit': 0,
 784             },
 785         },
 786         # Crooks and Liars embed
 787         {
 788             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 789             'info_dict': {
 790                 'id': '8RUoRhRi',
 791                 'ext': 'mp4',
 792                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 793                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 794                 'timestamp': 1428207000,
 795                 'upload_date': '20150405',
 796                 'uploader': 'Heather',
 797             },
 798         },
 799         # Crooks and Liars external embed
 800         {
 801             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 802             'info_dict': {
 803                 'id': 'MTE3MjUtMzQ2MzA',
 804                 'ext': 'mp4',
 805                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 806                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 807                 'timestamp': 1265032391,
 808                 'upload_date': '20100201',
 809                 'uploader': 'Heather',
 810             },
 811         },
 812         # NBC Sports vplayer embed
 813         {
 814             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 815             'info_dict': {
 816                 'id': 'ln7x1qSThw4k',
 817                 'ext': 'flv',
 818                 'title': "PFT Live: New leader in the 'new-look' defense",
 819                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 820             },
 821         },
 822         # UDN embed
 823         {
 824             'url': 'http://www.udn.com/news/story/7314/822787',
 825             'md5': 'fd2060e988c326991037b9aff9df21a6',
 826             'info_dict': {
 827                 'id': '300346',
 828                 'ext': 'mp4',
 829                 'title': '中一中男師變性 全校師生力挺',
 830                 'thumbnail': 're:^https?://.*\.jpg$',
 831             }
 832         },
 833         # Ooyala embed
 834         {
 835             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 836             'info_dict': {
 837                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 838                 'ext': 'mp4',
 839                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 840                 'title': 'This is what separates the Excel masters from the wannabes',
 841             },
 842             'params': {
 843                 # m3u8 downloads
 844                 'skip_download': True,
 845             }
 846         },
 847         # Contains a SMIL manifest
 848         {
 849             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 850             'info_dict': {
 851                 'id': 'file',
 852                 'ext': 'flv',
 853                 'title': '+ Football: Lottery Champions League Europe',
 854                 'uploader': 'www.telewebion.com',
 855             },
 856             'params': {
 857                 # rtmpe downloads
 858                 'skip_download': True,
 859             }
 860         },
 861         # Brightcove URL in single quotes
 862         {
 863             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
 864             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
 865             'info_dict': {
 866                 'id': '4255764656001',
 867                 'ext': 'mp4',
 868                 'title': 'SN Presents: Russell Martin, World Citizen',
 869                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
 870                 'uploader': 'Rogers Sportsnet',
 871             },
 872         },
 873         # Dailymotion Cloud video
 874         {
 875             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
 876             'md5': '49444254273501a64675a7e68c502681',
 877             'info_dict': {
 878                 'id': '5585de919473990de4bee11b',
 879                 'ext': 'mp4',
 880                 'title': 'Le débat',
 881                 'thumbnail': 're:^https?://.*\.jpe?g$',
 882             }
 883         },
 884         # OnionStudios embed
 885         {
 886             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
 887             'info_dict': {
 888                 'id': '2855',
 889                 'ext': 'mp4',
 890                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
 891                 'thumbnail': 're:^https?://.*\.jpe?g$',
 892                 'uploader': 'ClickHole',
 893                 'uploader_id': 'clickhole',
 894             }
 895         },
 896         # SnagFilms embed
 897         {
 898             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
 899             'info_dict': {
 900                 'id': '74849a00-85a9-11e1-9660-123139220831',
 901                 'ext': 'mp4',
 902                 'title': '#whilewewatch',
 903             }
 904         },
 905         # AdobeTVVideo embed
 906         {
 907             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
 908             'md5': '43662b577c018ad707a63766462b1e87',
 909             'info_dict': {
 910                 'id': '2456',
 911                 'ext': 'mp4',
 912                 'title': 'New experience with Acrobat DC',
 913                 'description': 'New experience with Acrobat DC',
 914                 'duration': 248.667,
 915             },
 916         }
 917     ]
 918
 919     def report_following_redirect(self, new_url):
 920         """Report information extraction."""
 921         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 922
 923     def _extract_rss(self, url, video_id, doc):
 924         playlist_title = doc.find('./channel/title').text
 925         playlist_desc_el = doc.find('./channel/description')
 926         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 927
 928         entries = []
 929         for it in doc.findall('./channel/item'):
 930             next_url = xpath_text(it, 'link', fatal=False)
 931             if not next_url:
 932                 enclosure_nodes = it.findall('./enclosure')
 933                 for e in enclosure_nodes:
 934                     next_url = e.attrib.get('url')
 935                     if next_url:
 936                         break
 937
 938             if not next_url:
 939                 continue
 940
 941             entries.append({
 942                 '_type': 'url',
 943                 'url': next_url,
 944                 'title': it.find('title').text,
 945             })
 946
 947         return {
 948             '_type': 'playlist',
 949             'id': url,
 950             'title': playlist_title,
 951             'description': playlist_desc,
 952             'entries': entries,
 953         }
 954
 955     def _extract_camtasia(self, url, video_id, webpage):
 956         """ Returns None if no camtasia video can be found. """
 957
 958         camtasia_cfg = self._search_regex(
 959             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 960             webpage, 'camtasia configuration file', default=None)
 961         if camtasia_cfg is None:
 962             return None
 963
 964         title = self._html_search_meta('DC.title', webpage, fatal=True)
 965
 966         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 967         camtasia_cfg = self._download_xml(
 968             camtasia_url, video_id,
 969             note='Downloading camtasia configuration',
 970             errnote='Failed to download camtasia configuration')
 971         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 972
 973         entries = []
 974         for n in fileset_node.getchildren():
 975             url_n = n.find('./uri')
 976             if url_n is None:
 977                 continue
 978
 979             entries.append({
 980                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 981                 'title': '%s - %s' % (title, n.tag),
 982                 'url': compat_urlparse.urljoin(url, url_n.text),
 983                 'duration': float_or_none(n.find('./duration').text),
 984             })
 985
 986         return {
 987             '_type': 'playlist',
 988             'entries': entries,
 989             'title': title,
 990         }
 991
 992     def _real_extract(self, url):
 993         if url.startswith('//'):
 994             return {
 995                 '_type': 'url',
 996                 'url': self.http_scheme() + url,
 997             }
 998
 999         parsed_url = compat_urlparse.urlparse(url)
1000         if not parsed_url.scheme:
1001             default_search = self._downloader.params.get('default_search')
1002             if default_search is None:
1003                 default_search = 'fixup_error'
1004
1005             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1006                 if '/' in url:
1007                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1008                     return self.url_result('http://' + url)
1009                 elif default_search != 'fixup_error':
1010                     if default_search == 'auto_warning':
1011                         if re.match(r'^(?:url|URL)$', url):
1012                             raise ExtractorError(
1013                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1014                                 expected=True)
1015                         else:
1016                             self._downloader.report_warning(
1017                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1018                     return self.url_result('ytsearch:' + url)
1019
1020             if default_search in ('error', 'fixup_error'):
1021                 raise ExtractorError(
1022                     '%r is not a valid URL. '
1023                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1024                     % (url, url), expected=True)
1025             else:
1026                 if ':' not in default_search:
1027                     default_search += ':'
1028                 return self.url_result(default_search + url)
1029
1030         url, smuggled_data = unsmuggle_url(url)
1031         force_videoid = None
1032         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1033         if smuggled_data and 'force_videoid' in smuggled_data:
1034             force_videoid = smuggled_data['force_videoid']
1035             video_id = force_videoid
1036         else:
1037             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1038
1039         self.to_screen('%s: Requesting header' % video_id)
1040
1041         head_req = HEADRequest(url)
1042         head_response = self._request_webpage(
1043             head_req, video_id,
1044             note=False, errnote='Could not send HEAD request to %s' % url,
1045             fatal=False)
1046
1047         if head_response is not False:
1048             # Check for redirect
1049             new_url = head_response.geturl()
1050             if url != new_url:
1051                 self.report_following_redirect(new_url)
1052                 if force_videoid:
1053                     new_url = smuggle_url(
1054                         new_url, {'force_videoid': force_videoid})
1055                 return self.url_result(new_url)
1056
1057         full_response = None
1058         if head_response is False:
1059             request = compat_urllib_request.Request(url)
1060             request.add_header('Accept-Encoding', '*')
1061             full_response = self._request_webpage(request, video_id)
1062             head_response = full_response
1063
1064         # Check for direct link to a video
1065         content_type = head_response.headers.get('Content-Type', '')
1066         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1067         if m:
1068             upload_date = unified_strdate(
1069                 head_response.headers.get('Last-Modified'))
1070             return {
1071                 'id': video_id,
1072                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1073                 'direct': True,
1074                 'formats': [{
1075                     'format_id': m.group('format_id'),
1076                     'url': url,
1077                     'vcodec': 'none' if m.group('type') == 'audio' else None
1078                 }],
1079                 'upload_date': upload_date,
1080             }
1081
1082         if not self._downloader.params.get('test', False) and not is_intentional:
1083             force = self._downloader.params.get('force_generic_extractor', False)
1084             self._downloader.report_warning(
1085                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1086
1087         if not full_response:
1088             request = compat_urllib_request.Request(url)
1089             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1090             # making it impossible to download only chunk of the file (yet we need only 512kB to
1091             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1092             # that will always result in downloading the whole file that is not desirable.
1093             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1094             # to accept raw bytes and being able to download only a chunk.
1095             # It may probably better to solve this by checking Content-Type for application/octet-stream
1096             # after HEAD request finishes, but not sure if we can rely on this.
1097             request.add_header('Accept-Encoding', '*')
1098             full_response = self._request_webpage(request, video_id)
1099
1100         # Maybe it's a direct link to a video?
1101         # Be careful not to download the whole thing!
1102         first_bytes = full_response.read(512)
1103         if not is_html(first_bytes):
1104             self._downloader.report_warning(
1105                 'URL could be a direct video link, returning it as such.')
1106             upload_date = unified_strdate(
1107                 head_response.headers.get('Last-Modified'))
1108             return {
1109                 'id': video_id,
1110                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1111                 'direct': True,
1112                 'url': url,
1113                 'upload_date': upload_date,
1114             }
1115
1116         webpage = self._webpage_read_content(
1117             full_response, url, video_id, prefix=first_bytes)
1118
1119         self.report_extraction(video_id)
1120
1121         # Is it an RSS feed?
1122         try:
1123             doc = parse_xml(webpage)
1124             if doc.tag == 'rss':
1125                 return self._extract_rss(url, video_id, doc)
1126         except compat_xml_parse_error:
1127             pass
1128
1129         # Is it a Camtasia project?
1130         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1131         if camtasia_res is not None:
1132             return camtasia_res
1133
1134         # Sometimes embedded video player is hidden behind percent encoding
1135         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1136         # Unescaping the whole page allows to handle those cases in a generic way
1137         webpage = compat_urllib_parse_unquote(webpage)
1138
1139         # it's tempting to parse this further, but you would
1140         # have to take into account all the variations like
1141         #   Video Title - Site Name
1142         #   Site Name | Video Title
1143         #   Video Title - Tagline | Site Name
1144         # and so on and so forth; it's just not practical
1145         video_title = self._html_search_regex(
1146             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1147             default='video')
1148
1149         # Try to detect age limit automatically
1150         age_limit = self._rta_search(webpage)
1151         # And then there are the jokers who advertise that they use RTA,
1152         # but actually don't.
1153         AGE_LIMIT_MARKERS = [
1154             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1155         ]
1156         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1157             age_limit = 18
1158
1159         # video uploader is domain name
1160         video_uploader = self._search_regex(
1161             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1162
1163         # Helper method
1164         def _playlist_from_matches(matches, getter=None, ie=None):
1165             urlrs = orderedSet(
1166                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1167                 for m in matches)
1168             return self.playlist_result(
1169                 urlrs, playlist_id=video_id, playlist_title=video_title)
1170
1171         # Look for BrightCove:
1172         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1173         if bc_urls:
1174             self.to_screen('Brightcove video detected.')
1175             entries = [{
1176                 '_type': 'url',
1177                 'url': smuggle_url(bc_url, {'Referer': url}),
1178                 'ie_key': 'Brightcove'
1179             } for bc_url in bc_urls]
1180
1181             return {
1182                 '_type': 'playlist',
1183                 'title': video_title,
1184                 'id': video_id,
1185                 'entries': entries,
1186             }
1187
1188         # Look for embedded rtl.nl player
1189         matches = re.findall(
1190             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1191             webpage)
1192         if matches:
1193             return _playlist_from_matches(matches, ie='RtlNl')
1194
1195         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1196         if vimeo_url is not None:
1197             return self.url_result(vimeo_url)
1198
1199         vid_me_embed_url = self._search_regex(
1200             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1201             webpage, 'vid.me embed', default=None)
1202         if vid_me_embed_url is not None:
1203             return self.url_result(vid_me_embed_url, 'Vidme')
1204
1205         # Look for embedded YouTube player
1206         matches = re.findall(r'''(?x)
1207             (?:
1208                 <iframe[^>]+?src=|
1209                 data-video-url=|
1210                 <embed[^>]+?src=|
1211                 embedSWF\(?:\s*|
1212                 new\s+SWFObject\(
1213             )
1214             (["\'])
1215                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1216                 (?:embed|v|p)/.+?)
1217             \1''', webpage)
1218         if matches:
1219             return _playlist_from_matches(
1220                 matches, lambda m: unescapeHTML(m[1]))
1221
1222         # Look for lazyYT YouTube embed
1223         matches = re.findall(
1224             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1225         if matches:
1226             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1227
1228         # Look for embedded Dailymotion player
1229         matches = re.findall(
1230             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1231         if matches:
1232             return _playlist_from_matches(
1233                 matches, lambda m: unescapeHTML(m[1]))
1234
1235         # Look for embedded Dailymotion playlist player (#3822)
1236         m = re.search(
1237             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1238         if m:
1239             playlists = re.findall(
1240                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1241             if playlists:
1242                 return _playlist_from_matches(
1243                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1244
1245         # Look for embedded Wistia player
1246         match = re.search(
1247             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1248         if match:
1249             embed_url = self._proto_relative_url(
1250                 unescapeHTML(match.group('url')))
1251             return {
1252                 '_type': 'url_transparent',
1253                 'url': embed_url,
1254                 'ie_key': 'Wistia',
1255                 'uploader': video_uploader,
1256                 'title': video_title,
1257                 'id': video_id,
1258             }
1259
1260         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1261         if match:
1262             return {
1263                 '_type': 'url_transparent',
1264                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1265                 'ie_key': 'Wistia',
1266                 'uploader': video_uploader,
1267                 'title': video_title,
1268                 'id': match.group('id')
1269             }
1270
1271         # Look for embedded blip.tv player
1272         bliptv_url = BlipTVIE._extract_url(webpage)
1273         if bliptv_url:
1274             return self.url_result(bliptv_url, 'BlipTV')
1275
1276         # Look for SVT player
1277         svt_url = SVTIE._extract_url(webpage)
1278         if svt_url:
1279             return self.url_result(svt_url, 'SVT')
1280
1281         # Look for embedded condenast player
1282         matches = re.findall(
1283             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1284             webpage)
1285         if matches:
1286             return {
1287                 '_type': 'playlist',
1288                 'entries': [{
1289                     '_type': 'url',
1290                     'ie_key': 'CondeNast',
1291                     'url': ma,
1292                 } for ma in matches],
1293                 'title': video_title,
1294                 'id': video_id,
1295             }
1296
1297         # Look for Bandcamp pages with custom domain
1298         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1299         if mobj is not None:
1300             burl = unescapeHTML(mobj.group(1))
1301             # Don't set the extractor because it can be a track url or an album
1302             return self.url_result(burl)
1303
1304         # Look for embedded Vevo player
1305         mobj = re.search(
1306             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1307         if mobj is not None:
1308             return self.url_result(mobj.group('url'))
1309
1310         # Look for embedded Viddler player
1311         mobj = re.search(
1312             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1313             webpage)
1314         if mobj is not None:
1315             return self.url_result(mobj.group('url'))
1316
1317         # Look for NYTimes player
1318         mobj = re.search(
1319             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1320             webpage)
1321         if mobj is not None:
1322             return self.url_result(mobj.group('url'))
1323
1324         # Look for Libsyn player
1325         mobj = re.search(
1326             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1327         if mobj is not None:
1328             return self.url_result(mobj.group('url'))
1329
1330         # Look for Ooyala videos
1331         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1332                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1333                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1334                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1335         if mobj is not None:
1336             return OoyalaIE._build_url_result(mobj.group('ec'))
1337
1338         # Look for multiple Ooyala embeds on SBN network websites
1339         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1340         if mobj is not None:
1341             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1342             if embeds:
1343                 return _playlist_from_matches(
1344                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1345
1346         # Look for Aparat videos
1347         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1348         if mobj is not None:
1349             return self.url_result(mobj.group(1), 'Aparat')
1350
1351         # Look for MPORA videos
1352         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1353         if mobj is not None:
1354             return self.url_result(mobj.group(1), 'Mpora')
1355
1356         # Look for embedded NovaMov-based player
1357         mobj = re.search(
1358             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1359                     (?P<url>http://(?:(?:embed|www)\.)?
1360                         (?:novamov\.com|
1361                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1362                            videoweed\.(?:es|com)|
1363                            movshare\.(?:net|sx|ag)|
1364                            divxstage\.(?:eu|net|ch|co|at|ag))
1365                         /embed\.php.+?)\1''', webpage)
1366         if mobj is not None:
1367             return self.url_result(mobj.group('url'))
1368
1369         # Look for embedded Facebook player
1370         mobj = re.search(
1371             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1372         if mobj is not None:
1373             return self.url_result(mobj.group('url'), 'Facebook')
1374
1375         # Look for embedded VK player
1376         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1377         if mobj is not None:
1378             return self.url_result(mobj.group('url'), 'VK')
1379
1380         # Look for embedded ivi player
1381         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1382         if mobj is not None:
1383             return self.url_result(mobj.group('url'), 'Ivi')
1384
1385         # Look for embedded Huffington Post player
1386         mobj = re.search(
1387             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1388         if mobj is not None:
1389             return self.url_result(mobj.group('url'), 'HuffPost')
1390
1391         # Look for embed.ly
1392         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1393         if mobj is not None:
1394             return self.url_result(mobj.group('url'))
1395         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1396         if mobj is not None:
1397             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1398
1399         # Look for funnyordie embed
1400         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1401         if matches:
1402             return _playlist_from_matches(
1403                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1404
1405         # Look for BBC iPlayer embed
1406         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1407         if matches:
1408             return _playlist_from_matches(matches, ie='BBCCoUk')
1409
1410         # Look for embedded RUTV player
1411         rutv_url = RUTVIE._extract_url(webpage)
1412         if rutv_url:
1413             return self.url_result(rutv_url, 'RUTV')
1414
1415         # Look for embedded TVC player
1416         tvc_url = TVCIE._extract_url(webpage)
1417         if tvc_url:
1418             return self.url_result(tvc_url, 'TVC')
1419
1420         # Look for embedded SportBox player
1421         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1422         if sportbox_urls:
1423             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1424
1425         # Look for embedded PornHub player
1426         pornhub_url = PornHubIE._extract_url(webpage)
1427         if pornhub_url:
1428             return self.url_result(pornhub_url, 'PornHub')
1429
1430         # Look for embedded XHamster player
1431         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1432         if xhamster_urls:
1433             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1434
1435         # Look for embedded Tvigle player
1436         mobj = re.search(
1437             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1438         if mobj is not None:
1439             return self.url_result(mobj.group('url'), 'Tvigle')
1440
1441         # Look for embedded TED player
1442         mobj = re.search(
1443             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1444         if mobj is not None:
1445             return self.url_result(mobj.group('url'), 'TED')
1446
1447         # Look for embedded Ustream videos
1448         mobj = re.search(
1449             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1450         if mobj is not None:
1451             return self.url_result(mobj.group('url'), 'Ustream')
1452
1453         # Look for embedded arte.tv player
1454         mobj = re.search(
1455             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1456             webpage)
1457         if mobj is not None:
1458             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1459
1460         # Look for embedded francetv player
1461         mobj = re.search(
1462             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1463             webpage)
1464         if mobj is not None:
1465             return self.url_result(mobj.group('url'))
1466
1467         # Look for embedded smotri.com player
1468         smotri_url = SmotriIE._extract_url(webpage)
1469         if smotri_url:
1470             return self.url_result(smotri_url, 'Smotri')
1471
1472         # Look for embedded Myvi.ru player
1473         myvi_url = MyviIE._extract_url(webpage)
1474         if myvi_url:
1475             return self.url_result(myvi_url)
1476
1477         # Look for embeded soundcloud player
1478         mobj = re.search(
1479             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1480             webpage)
1481         if mobj is not None:
1482             url = unescapeHTML(mobj.group('url'))
1483             return self.url_result(url)
1484
1485         # Look for embedded vulture.com player
1486         mobj = re.search(
1487             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1488             webpage)
1489         if mobj is not None:
1490             url = unescapeHTML(mobj.group('url'))
1491             return self.url_result(url, ie='Vulture')
1492
1493         # Look for embedded mtvservices player
1494         mobj = re.search(
1495             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1496             webpage)
1497         if mobj is not None:
1498             url = unescapeHTML(mobj.group('url'))
1499             return self.url_result(url, ie='MTVServicesEmbedded')
1500
1501         # Look for embedded yahoo player
1502         mobj = re.search(
1503             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1504             webpage)
1505         if mobj is not None:
1506             return self.url_result(mobj.group('url'), 'Yahoo')
1507
1508         # Look for embedded sbs.com.au player
1509         mobj = re.search(
1510             r'''(?x)
1511             (?:
1512                 <meta\s+property="og:video"\s+content=|
1513                 <iframe[^>]+?src=
1514             )
1515             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1516             webpage)
1517         if mobj is not None:
1518             return self.url_result(mobj.group('url'), 'SBS')
1519
1520         # Look for embedded Cinchcast player
1521         mobj = re.search(
1522             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1523             webpage)
1524         if mobj is not None:
1525             return self.url_result(mobj.group('url'), 'Cinchcast')
1526
1527         mobj = re.search(
1528             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1529             webpage)
1530         if not mobj:
1531             mobj = re.search(
1532                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1533                 webpage)
1534         if mobj is not None:
1535             return self.url_result(mobj.group('url'), 'MLB')
1536
1537         mobj = re.search(
1538             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1539             webpage)
1540         if mobj is not None:
1541             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1542
1543         mobj = re.search(
1544             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1545             webpage)
1546         if mobj is not None:
1547             return self.url_result(mobj.group('url'), 'Livestream')
1548
1549         # Look for Zapiks embed
1550         mobj = re.search(
1551             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1552         if mobj is not None:
1553             return self.url_result(mobj.group('url'), 'Zapiks')
1554
1555         # Look for Kaltura embeds
1556         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
1557                 re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
1558         if mobj is not None:
1559             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1560
1561         # Look for Eagle.Platform embeds
1562         mobj = re.search(
1563             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1564         if mobj is not None:
1565             return self.url_result(mobj.group('url'), 'EaglePlatform')
1566
1567         # Look for ClipYou (uses Eagle.Platform) embeds
1568         mobj = re.search(
1569             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1570         if mobj is not None:
1571             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1572
1573         # Look for Pladform embeds
1574         mobj = re.search(
1575             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1576         if mobj is not None:
1577             return self.url_result(mobj.group('url'), 'Pladform')
1578
1579         # Look for Playwire embeds
1580         mobj = re.search(
1581             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1582         if mobj is not None:
1583             return self.url_result(mobj.group('url'))
1584
1585         # Look for 5min embeds
1586         mobj = re.search(
1587             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1588         if mobj is not None:
1589             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1590
1591         # Look for Crooks and Liars embeds
1592         mobj = re.search(
1593             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1594         if mobj is not None:
1595             return self.url_result(mobj.group('url'))
1596
1597         # Look for NBC Sports VPlayer embeds
1598         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1599         if nbc_sports_url:
1600             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1601
1602         # Look for UDN embeds
1603         mobj = re.search(
1604             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1605         if mobj is not None:
1606             return self.url_result(
1607                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1608
1609         # Look for Senate ISVP iframe
1610         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1611         if senate_isvp_url:
1612             return self.url_result(senate_isvp_url, 'SenateISVP')
1613
1614         # Look for Dailymotion Cloud videos
1615         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1616         if dmcloud_url:
1617             return self.url_result(dmcloud_url, 'DailymotionCloud')
1618
1619         # Look for OnionStudios embeds
1620         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1621         if onionstudios_url:
1622             return self.url_result(onionstudios_url)
1623
1624         # Look for SnagFilms embeds
1625         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1626         if snagfilms_url:
1627             return self.url_result(snagfilms_url)
1628
1629         # Look for AdobeTVVideo embeds
1630         mobj = re.search(
1631             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1632             webpage)
1633         if mobj is not None:
1634             return self.url_result(
1635                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1636                 'AdobeTVVideo')
1637
1638         def check_video(vurl):
1639             if YoutubeIE.suitable(vurl):
1640                 return True
1641             vpath = compat_urlparse.urlparse(vurl).path
1642             vext = determine_ext(vpath)
1643             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1644
1645         def filter_video(urls):
1646             return list(filter(check_video, urls))
1647
1648         # Start with something easy: JW Player in SWFObject
1649         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1650         if not found:
1651             # Look for gorilla-vid style embedding
1652             found = filter_video(re.findall(r'''(?sx)
1653                 (?:
1654                     jw_plugins|
1655                     JWPlayerOptions|
1656                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1657                 )
1658                 .*?
1659                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1660         if not found:
1661             # Broaden the search a little bit
1662             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1663         if not found:
1664             # Broaden the findall a little bit: JWPlayer JS loader
1665             found = filter_video(re.findall(
1666                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1667         if not found:
1668             # Flow player
1669             found = filter_video(re.findall(r'''(?xs)
1670                 flowplayer\("[^"]+",\s*
1671                     \{[^}]+?\}\s*,
1672                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1673                         ["']?url["']?\s*:\s*["']([^"']+)["']
1674             ''', webpage))
1675         if not found:
1676             # Cinerama player
1677             found = re.findall(
1678                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1679         if not found:
1680             # Try to find twitter cards info
1681             found = filter_video(re.findall(
1682                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1683         if not found:
1684             # We look for Open Graph info:
1685             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1686             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1687             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1688             if m_video_type is not None:
1689                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1690         if not found:
1691             # HTML5 video
1692             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1693         if not found:
1694             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1695             found = re.search(
1696                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1697                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1698                 webpage)
1699             if not found:
1700                 # Look also in Refresh HTTP header
1701                 refresh_header = head_response.headers.get('Refresh')
1702                 if refresh_header:
1703                     found = re.search(REDIRECT_REGEX, refresh_header)
1704             if found:
1705                 new_url = compat_urlparse.urljoin(url, found.group(1))
1706                 self.report_following_redirect(new_url)
1707                 return {
1708                     '_type': 'url',
1709                     'url': new_url,
1710                 }
1711         if not found:
1712             raise UnsupportedError(url)
1713
1714         entries = []
1715         for video_url in found:
1716             video_url = compat_urlparse.urljoin(url, video_url)
1717             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
1718
1719             # Sometimes, jwplayer extraction will result in a YouTube URL
1720             if YoutubeIE.suitable(video_url):
1721                 entries.append(self.url_result(video_url, 'Youtube'))
1722                 continue
1723
1724             # here's a fun little line of code for you:
1725             video_id = os.path.splitext(video_id)[0]
1726
1727             if determine_ext(video_url) == 'smil':
1728                 entries.append({
1729                     'id': video_id,
1730                     'formats': self._extract_smil_formats(video_url, video_id),
1731                     'uploader': video_uploader,
1732                     'title': video_title,
1733                     'age_limit': age_limit,
1734                 })
1735             else:
1736                 entries.append({
1737                     'id': video_id,
1738                     'url': video_url,
1739                     'uploader': video_uploader,
1740                     'title': video_title,
1741                     'age_limit': age_limit,
1742                 })
1743
1744         if len(entries) == 1:
1745             return entries[0]
1746         else:
1747             for num, e in enumerate(entries, start=1):
1748                 # 'url' results don't have a title
1749                 if e.get('title') is not None:
1750                     e['title'] = '%s (%d)' % (e['title'], num)
1751             return {
1752                 '_type': 'playlist',
1753                 'entries': entries,
1754             }