_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse_unquote,
  12     compat_urllib_request,
  13     compat_urlparse,
  14     compat_xml_parse_error,
  15 )
  16 from ..utils import (
  17     determine_ext,
  18     ExtractorError,
  19     float_or_none,
  20     HEADRequest,
  21     is_html,
  22     orderedSet,
  23     parse_xml,
  24     smuggle_url,
  25     unescapeHTML,
  26     unified_strdate,
  27     unsmuggle_url,
  28     UnsupportedError,
  29     url_basename,
  30     xpath_text,
  31 )
  32 from .brightcove import BrightcoveIE
  33 from .nbc import NBCSportsVPlayerIE
  34 from .ooyala import OoyalaIE
  35 from .rutv import RUTVIE
  36 from .tvc import TVCIE
  37 from .sportbox import SportBoxEmbedIE
  38 from .smotri import SmotriIE
  39 from .myvi import MyviIE
  40 from .condenast import CondeNastIE
  41 from .udn import UDNEmbedIE
  42 from .senateisvp import SenateISVPIE
  43 from .bliptv import BlipTVIE
  44 from .svt import SVTIE
  45 from .pornhub import PornHubIE
  46 from .xhamster import XHamsterEmbedIE
  47 from .vimeo import VimeoIE
  48 from .dailymotion import DailymotionCloudIE
  49 from .onionstudios import OnionStudiosIE
  50 from .snagfilms import SnagFilmsEmbedIE
  51
  52
  53 class GenericIE(InfoExtractor):
  54     IE_DESC = 'Generic downloader that works on some sites'
  55     _VALID_URL = r'.*'
  56     IE_NAME = 'generic'
  57     _TESTS = [
  58         # Direct link to a video
  59         {
  60             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  61             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  62             'info_dict': {
  63                 'id': 'trailer',
  64                 'ext': 'mp4',
  65                 'title': 'trailer',
  66                 'upload_date': '20100513',
  67             }
  68         },
  69         # Direct link to media delivered compressed (until Accept-Encoding is *)
  70         {
  71             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  72             'md5': '128c42e68b13950268b648275386fc74',
  73             'info_dict': {
  74                 'id': 'FictionJunction-Parallel_Hearts',
  75                 'ext': 'flac',
  76                 'title': 'FictionJunction-Parallel_Hearts',
  77                 'upload_date': '20140522',
  78             },
  79             'expected_warnings': [
  80                 'URL could be a direct video link, returning it as such.'
  81             ]
  82         },
  83         # Direct download with broken HEAD
  84         {
  85             'url': 'http://ai-radio.org:8000/radio.opus',
  86             'info_dict': {
  87                 'id': 'radio',
  88                 'ext': 'opus',
  89                 'title': 'radio',
  90             },
  91             'params': {
  92                 'skip_download': True,  # infinite live stream
  93             },
  94             'expected_warnings': [
  95                 r'501.*Not Implemented'
  96             ],
  97         },
  98         # Direct link with incorrect MIME type
  99         {
 100             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 101             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 102             'info_dict': {
 103                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 104                 'id': '5_Lennart_Poettering_-_Systemd',
 105                 'ext': 'webm',
 106                 'title': '5_Lennart_Poettering_-_Systemd',
 107                 'upload_date': '20141120',
 108             },
 109             'expected_warnings': [
 110                 'URL could be a direct video link, returning it as such.'
 111             ]
 112         },
 113         # RSS feed
 114         {
 115             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 116             'info_dict': {
 117                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 118                 'title': 'Zero Punctuation',
 119                 'description': 're:.*groundbreaking video review series.*'
 120             },
 121             'playlist_mincount': 11,
 122         },
 123         # RSS feed with enclosure
 124         {
 125             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 126             'info_dict': {
 127                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 128                 'ext': 'm4v',
 129                 'upload_date': '20150228',
 130                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 131             }
 132         },
 133         # google redirect
 134         {
 135             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 136             'info_dict': {
 137                 'id': 'cmQHVoWB5FY',
 138                 'ext': 'mp4',
 139                 'upload_date': '20130224',
 140                 'uploader_id': 'TheVerge',
 141                 'description': 're:^Chris Ziegler takes a look at the\.*',
 142                 'uploader': 'The Verge',
 143                 'title': 'First Firefox OS phones side-by-side',
 144             },
 145             'params': {
 146                 'skip_download': False,
 147             }
 148         },
 149         {
 150             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 151             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 152             'info_dict': {
 153                 'id': '13601338388002',
 154                 'ext': 'mp4',
 155                 'uploader': 'www.hodiho.fr',
 156                 'title': 'R\u00e9gis plante sa Jeep',
 157             }
 158         },
 159         # bandcamp page with custom domain
 160         {
 161             'add_ie': ['Bandcamp'],
 162             'url': 'http://bronyrock.com/track/the-pony-mash',
 163             'info_dict': {
 164                 'id': '3235767654',
 165                 'ext': 'mp3',
 166                 'title': 'The Pony Mash',
 167                 'uploader': 'M_Pallante',
 168             },
 169             'skip': 'There is a limit of 200 free downloads / month for the test song',
 170         },
 171         # embedded brightcove video
 172         # it also tests brightcove videos that need to set the 'Referer' in the
 173         # http requests
 174         {
 175             'add_ie': ['Brightcove'],
 176             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 177             'info_dict': {
 178                 'id': '2765128793001',
 179                 'ext': 'mp4',
 180                 'title': 'Le cours de bourse : l’analyse technique',
 181                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 182                 'uploader': 'BFM BUSINESS',
 183             },
 184             'params': {
 185                 'skip_download': True,
 186             },
 187         },
 188         {
 189             # https://github.com/rg3/youtube-dl/issues/2253
 190             'url': 'http://bcove.me/i6nfkrc3',
 191             'md5': '0ba9446db037002366bab3b3eb30c88c',
 192             'info_dict': {
 193                 'id': '3101154703001',
 194                 'ext': 'mp4',
 195                 'title': 'Still no power',
 196                 'uploader': 'thestar.com',
 197                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 198             },
 199             'add_ie': ['Brightcove'],
 200         },
 201         {
 202             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 203             'md5': 'fb973ecf6e4a78a67453647444222983',
 204             'info_dict': {
 205                 'id': '3414141473001',
 206                 'ext': 'mp4',
 207                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 208                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 209                 'uploader': 'Championat',
 210             },
 211         },
 212         {
 213             # https://github.com/rg3/youtube-dl/issues/3541
 214             'add_ie': ['Brightcove'],
 215             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 216             'info_dict': {
 217                 'id': '3866516442001',
 218                 'ext': 'mp4',
 219                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 220                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 221                 'uploader': 'SBS Broadcasting',
 222             },
 223             'skip': 'Restricted to Netherlands',
 224             'params': {
 225                 'skip_download': True,  # m3u8 download
 226             },
 227         },
 228         # ooyala video
 229         {
 230             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 231             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 232             'info_dict': {
 233                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 234                 'ext': 'mp4',
 235                 'title': '2cc213299525360.mov',  # that's what we get
 236             },
 237             'add_ie': ['Ooyala'],
 238         },
 239         # multiple ooyala embeds on SBN network websites
 240         {
 241             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 242             'info_dict': {
 243                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 244                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 245             },
 246             'playlist_mincount': 3,
 247             'params': {
 248                 'skip_download': True,
 249             },
 250             'add_ie': ['Ooyala'],
 251         },
 252         # embed.ly video
 253         {
 254             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 255             'info_dict': {
 256                 'id': '9ODmcdjQcHQ',
 257                 'ext': 'mp4',
 258                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 259                 'upload_date': '20140225',
 260                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 261                 'uploader': 'Tested',
 262                 'uploader_id': 'testedcom',
 263             },
 264             # No need to test YoutubeIE here
 265             'params': {
 266                 'skip_download': True,
 267             },
 268         },
 269         # funnyordie embed
 270         {
 271             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 272             'info_dict': {
 273                 'id': '18e820ec3f',
 274                 'ext': 'mp4',
 275                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 276                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 277             },
 278         },
 279         # RUTV embed
 280         {
 281             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 282             'info_dict': {
 283                 'id': '776940',
 284                 'ext': 'mp4',
 285                 'title': 'Охотское море стало целиком российским',
 286                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 287             },
 288             'params': {
 289                 # m3u8 download
 290                 'skip_download': True,
 291             },
 292         },
 293         # TVC embed
 294         {
 295             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 296             'info_dict': {
 297                 'id': '55304',
 298                 'ext': 'mp4',
 299                 'title': 'Дошкольное воспитание',
 300             },
 301         },
 302         # SportBox embed
 303         {
 304             'url': 'http://www.vestifinance.ru/articles/25753',
 305             'info_dict': {
 306                 'id': '25753',
 307                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 308             },
 309             'playlist': [{
 310                 'info_dict': {
 311                     'id': '370908',
 312                     'title': 'Госзаказ. День 3',
 313                     'ext': 'mp4',
 314                 }
 315             }, {
 316                 'info_dict': {
 317                     'id': '370905',
 318                     'title': 'Госзаказ. День 2',
 319                     'ext': 'mp4',
 320                 }
 321             }, {
 322                 'info_dict': {
 323                     'id': '370902',
 324                     'title': 'Госзаказ. День 1',
 325                     'ext': 'mp4',
 326                 }
 327             }],
 328             'params': {
 329                 # m3u8 download
 330                 'skip_download': True,
 331             },
 332         },
 333         # Myvi.ru embed
 334         {
 335             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 336             'info_dict': {
 337                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 338                 'ext': 'mp4',
 339                 'title': 'Ужастики, русский трейлер (2015)',
 340                 'thumbnail': 're:^https?://.*\.jpg$',
 341                 'duration': 153,
 342             }
 343         },
 344         # XHamster embed
 345         {
 346             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 347             'info_dict': {
 348                 'id': 'showthread',
 349                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 350             },
 351             'playlist_mincount': 7,
 352         },
 353         # Embedded TED video
 354         {
 355             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 356             'md5': '65fdff94098e4a607385a60c5177c638',
 357             'info_dict': {
 358                 'id': '1969',
 359                 'ext': 'mp4',
 360                 'title': 'Hidden miracles of the natural world',
 361                 'uploader': 'Louie Schwartzberg',
 362                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 363             }
 364         },
 365         # Embeded Ustream video
 366         {
 367             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 368             'md5': '27b99cdb639c9b12a79bca876a073417',
 369             'info_dict': {
 370                 'id': '45734260',
 371                 'ext': 'flv',
 372                 'uploader': 'AU SPA:  The NSA and Privacy',
 373                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 374             }
 375         },
 376         # nowvideo embed hidden behind percent encoding
 377         {
 378             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 379             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 380             'info_dict': {
 381                 'id': '06e53103ca9aa',
 382                 'ext': 'flv',
 383                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 384                 'description': 'No description',
 385             },
 386         },
 387         # arte embed
 388         {
 389             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 390             'md5': '7653032cbb25bf6c80d80f217055fa43',
 391             'info_dict': {
 392                 'id': '048195-004_PLUS7-F',
 393                 'ext': 'flv',
 394                 'title': 'X:enius',
 395                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 396                 'upload_date': '20140320',
 397             },
 398             'params': {
 399                 'skip_download': 'Requires rtmpdump'
 400             }
 401         },
 402         # francetv embed
 403         {
 404             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 405             'info_dict': {
 406                 'id': 'EV_30231',
 407                 'ext': 'mp4',
 408                 'title': 'Alcaline, le concert avec Calogero',
 409                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 410                 'upload_date': '20150226',
 411                 'timestamp': 1424989860,
 412                 'duration': 5400,
 413             },
 414             'params': {
 415                 # m3u8 downloads
 416                 'skip_download': True,
 417             },
 418             'expected_warnings': [
 419                 'Forbidden'
 420             ]
 421         },
 422         # Condé Nast embed
 423         {
 424             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 425             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 426             'info_dict': {
 427                 'id': '53501be369702d3275860000',
 428                 'ext': 'mp4',
 429                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 430             }
 431         },
 432         # Dailymotion embed
 433         {
 434             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 435             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 436             'info_dict': {
 437                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 438                 'ext': 'mp4',
 439                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 440                 'uploader': 'Spi0n',
 441             },
 442             'add_ie': ['Dailymotion'],
 443         },
 444         # YouTube embed
 445         {
 446             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 447             'info_dict': {
 448                 'id': 'FXRb4ykk4S0',
 449                 'ext': 'mp4',
 450                 'title': 'The NBL Auction 2014',
 451                 'uploader': 'BADMINTON England',
 452                 'uploader_id': 'BADMINTONEvents',
 453                 'upload_date': '20140603',
 454                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 455             },
 456             'add_ie': ['Youtube'],
 457             'params': {
 458                 'skip_download': True,
 459             }
 460         },
 461         # MTVSercices embed
 462         {
 463             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 464             'md5': '35727f82f58c76d996fc188f9755b0d5',
 465             'info_dict': {
 466                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 467                 'ext': 'mp4',
 468                 'title': 'Review',
 469                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 470             },
 471         },
 472         # YouTube embed via <data-embed-url="">
 473         {
 474             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 475             'info_dict': {
 476                 'id': '4vAffPZIT44',
 477                 'ext': 'mp4',
 478                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 479                 'uploader': 'Gameloft',
 480                 'uploader_id': 'gameloft',
 481                 'upload_date': '20140828',
 482                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 483             },
 484             'params': {
 485                 'skip_download': True,
 486             }
 487         },
 488         # Camtasia studio
 489         {
 490             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 491             'playlist': [{
 492                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 493                 'info_dict': {
 494                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 495                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 496                     'ext': 'flv',
 497                     'duration': 2235.90,
 498                 }
 499             }, {
 500                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 501                 'info_dict': {
 502                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 503                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 504                     'ext': 'flv',
 505                     'duration': 2235.93,
 506                 }
 507             }],
 508             'info_dict': {
 509                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 510             }
 511         },
 512         # Flowplayer
 513         {
 514             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 515             'md5': '9d65602bf31c6e20014319c7d07fba27',
 516             'info_dict': {
 517                 'id': '5123ea6d5e5a7',
 518                 'ext': 'mp4',
 519                 'age_limit': 18,
 520                 'uploader': 'www.handjobhub.com',
 521                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 522             }
 523         },
 524         # Multiple brightcove videos
 525         # https://github.com/rg3/youtube-dl/issues/2283
 526         {
 527             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 528             'info_dict': {
 529                 'id': 'always-never',
 530                 'title': 'Always / Never - The New Yorker',
 531             },
 532             'playlist_count': 3,
 533             'params': {
 534                 'extract_flat': False,
 535                 'skip_download': True,
 536             }
 537         },
 538         # MLB embed
 539         {
 540             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 541             'md5': '96f09a37e44da40dd083e12d9a683327',
 542             'info_dict': {
 543                 'id': '33322633',
 544                 'ext': 'mp4',
 545                 'title': 'Ump changes call to ball',
 546                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 547                 'duration': 48,
 548                 'timestamp': 1401537900,
 549                 'upload_date': '20140531',
 550                 'thumbnail': 're:^https?://.*\.jpg$',
 551             },
 552         },
 553         # Wistia embed
 554         {
 555             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 556             'md5': '8788b683c777a5cf25621eaf286d0c23',
 557             'info_dict': {
 558                 'id': '1cfaf6b7ea',
 559                 'ext': 'mov',
 560                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 561                 'duration': 643.0,
 562                 'filesize': 182808282,
 563                 'uploader': 'education-portal.com',
 564             },
 565         },
 566         {
 567             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 568             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 569             'info_dict': {
 570                 'id': 'uxjb0lwrcz',
 571                 'ext': 'mp4',
 572                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 573                 'duration': 1715.0,
 574                 'uploader': 'thoughtworks.wistia.com',
 575             },
 576         },
 577         # Soundcloud embed
 578         {
 579             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 580             'info_dict': {
 581                 'id': '174391317',
 582                 'ext': 'mp3',
 583                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 584                 'uploader': 'Sophos Security',
 585                 'title': 'Chet Chat 171 - Oct 29, 2014',
 586                 'upload_date': '20141029',
 587             }
 588         },
 589         # Livestream embed
 590         {
 591             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 592             'info_dict': {
 593                 'id': '67864563',
 594                 'ext': 'flv',
 595                 'upload_date': '20141112',
 596                 'title': 'Rosetta #CometLanding webcast HL 10',
 597             }
 598         },
 599         # LazyYT
 600         {
 601             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 602             'info_dict': {
 603                 'id': '1986',
 604                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 605             },
 606             'playlist_mincount': 2,
 607         },
 608         # Cinchcast embed
 609         {
 610             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 611             'info_dict': {
 612                 'id': '7141703',
 613                 'ext': 'mp3',
 614                 'upload_date': '20141126',
 615                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 616             }
 617         },
 618         # Cinerama player
 619         {
 620             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 621             'info_dict': {
 622                 'id': '730m_DandD_1901_512k',
 623                 'ext': 'mp4',
 624                 'uploader': 'www.abc.net.au',
 625                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 626             }
 627         },
 628         # embedded viddler video
 629         {
 630             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 631             'info_dict': {
 632                 'id': '4d03aad9',
 633                 'ext': 'mp4',
 634                 'uploader': 'deadspin',
 635                 'title': 'WALL-TO-GORTAT',
 636                 'timestamp': 1422285291,
 637                 'upload_date': '20150126',
 638             },
 639             'add_ie': ['Viddler'],
 640         },
 641         # Libsyn embed
 642         {
 643             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 644             'info_dict': {
 645                 'id': '3377616',
 646                 'ext': 'mp3',
 647                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 648                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 649                 'upload_date': '20150220',
 650             },
 651         },
 652         # jwplayer YouTube
 653         {
 654             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 655             'info_dict': {
 656                 'id': 'Mrj4DVp2zeA',
 657                 'ext': 'mp4',
 658                 'upload_date': '20150212',
 659                 'uploader': 'The National Archives UK',
 660                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 661                 'uploader_id': 'NationalArchives08',
 662                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 663             },
 664         },
 665         # rtl.nl embed
 666         {
 667             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 668             'playlist_mincount': 5,
 669             'info_dict': {
 670                 'id': 'aanslagen-kopenhagen',
 671                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 672             }
 673         },
 674         # Zapiks embed
 675         {
 676             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 677             'info_dict': {
 678                 'id': '118046',
 679                 'ext': 'mp4',
 680                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 681             }
 682         },
 683         # Kaltura embed
 684         {
 685             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 686             'info_dict': {
 687                 'id': '1_eergr3h1',
 688                 'ext': 'mp4',
 689                 'upload_date': '20150226',
 690                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 691                 'timestamp': int,
 692                 'title': 'John Carlson Postgame 2/25/15',
 693             },
 694         },
 695         # Kaltura embed (different embed code)
 696         {
 697             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 698             'info_dict': {
 699                 'id': '1_a52wc67y',
 700                 'ext': 'flv',
 701                 'upload_date': '20150127',
 702                 'uploader_id': 'PremierMedia',
 703                 'timestamp': int,
 704                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 705             },
 706         },
 707         # Eagle.Platform embed (generic URL)
 708         {
 709             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 710             'info_dict': {
 711                 'id': '227304',
 712                 'ext': 'mp4',
 713                 'title': 'Навальный вышел на свободу',
 714                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 715                 'thumbnail': 're:^https?://.*\.jpg$',
 716                 'duration': 87,
 717                 'view_count': int,
 718                 'age_limit': 0,
 719             },
 720         },
 721         # ClipYou (Eagle.Platform) embed (custom URL)
 722         {
 723             'url': 'http://muz-tv.ru/play/7129/',
 724             'info_dict': {
 725                 'id': '12820',
 726                 'ext': 'mp4',
 727                 'title': "'O Sole Mio",
 728                 'thumbnail': 're:^https?://.*\.jpg$',
 729                 'duration': 216,
 730                 'view_count': int,
 731             },
 732         },
 733         # Pladform embed
 734         {
 735             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 736             'info_dict': {
 737                 'id': '100183293',
 738                 'ext': 'mp4',
 739                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 740                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 741                 'thumbnail': 're:^https?://.*\.jpg$',
 742                 'duration': 694,
 743                 'age_limit': 0,
 744             },
 745         },
 746         # Playwire embed
 747         {
 748             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 749             'info_dict': {
 750                 'id': '3519514',
 751                 'ext': 'mp4',
 752                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 753                 'thumbnail': 're:^https?://.*\.png$',
 754                 'duration': 45.115,
 755             },
 756         },
 757         # 5min embed
 758         {
 759             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 760             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 761             'info_dict': {
 762                 'id': '518726732',
 763                 'ext': 'mp4',
 764                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 765             },
 766         },
 767         # SVT embed
 768         {
 769             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 770             'info_dict': {
 771                 'id': '2900353',
 772                 'ext': 'flv',
 773                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 774                 'duration': 27,
 775                 'age_limit': 0,
 776             },
 777         },
 778         # Crooks and Liars embed
 779         {
 780             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 781             'info_dict': {
 782                 'id': '8RUoRhRi',
 783                 'ext': 'mp4',
 784                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 785                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 786                 'timestamp': 1428207000,
 787                 'upload_date': '20150405',
 788                 'uploader': 'Heather',
 789             },
 790         },
 791         # Crooks and Liars external embed
 792         {
 793             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 794             'info_dict': {
 795                 'id': 'MTE3MjUtMzQ2MzA',
 796                 'ext': 'mp4',
 797                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 798                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 799                 'timestamp': 1265032391,
 800                 'upload_date': '20100201',
 801                 'uploader': 'Heather',
 802             },
 803         },
 804         # NBC Sports vplayer embed
 805         {
 806             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 807             'info_dict': {
 808                 'id': 'ln7x1qSThw4k',
 809                 'ext': 'flv',
 810                 'title': "PFT Live: New leader in the 'new-look' defense",
 811                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 812             },
 813         },
 814         # UDN embed
 815         {
 816             'url': 'http://www.udn.com/news/story/7314/822787',
 817             'md5': 'fd2060e988c326991037b9aff9df21a6',
 818             'info_dict': {
 819                 'id': '300346',
 820                 'ext': 'mp4',
 821                 'title': '中一中男師變性 全校師生力挺',
 822                 'thumbnail': 're:^https?://.*\.jpg$',
 823             }
 824         },
 825         # Ooyala embed
 826         {
 827             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 828             'info_dict': {
 829                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 830                 'ext': 'mp4',
 831                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 832                 'title': 'This is what separates the Excel masters from the wannabes',
 833             },
 834             'params': {
 835                 # m3u8 downloads
 836                 'skip_download': True,
 837             }
 838         },
 839         # Contains a SMIL manifest
 840         {
 841             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 842             'info_dict': {
 843                 'id': 'file',
 844                 'ext': 'flv',
 845                 'title': '+ Football: Lottery Champions League Europe',
 846                 'uploader': 'www.telewebion.com',
 847             },
 848             'params': {
 849                 # rtmpe downloads
 850                 'skip_download': True,
 851             }
 852         },
 853         # Brightcove URL in single quotes
 854         {
 855             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
 856             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
 857             'info_dict': {
 858                 'id': '4255764656001',
 859                 'ext': 'mp4',
 860                 'title': 'SN Presents: Russell Martin, World Citizen',
 861                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
 862                 'uploader': 'Rogers Sportsnet',
 863             },
 864         },
 865         # Dailymotion Cloud video
 866         {
 867             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
 868             'md5': '49444254273501a64675a7e68c502681',
 869             'info_dict': {
 870                 'id': '5585de919473990de4bee11b',
 871                 'ext': 'mp4',
 872                 'title': 'Le débat',
 873                 'thumbnail': 're:^https?://.*\.jpe?g$',
 874             }
 875         },
 876         # OnionStudios embed
 877         {
 878             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
 879             'info_dict': {
 880                 'id': '2855',
 881                 'ext': 'mp4',
 882                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
 883                 'thumbnail': 're:^https?://.*\.jpe?g$',
 884                 'uploader': 'ClickHole',
 885                 'uploader_id': 'clickhole',
 886             }
 887         },
 888         # SnagFilms embed
 889         {
 890             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
 891             'info_dict': {
 892                 'id': '74849a00-85a9-11e1-9660-123139220831',
 893                 'ext': 'mp4',
 894                 'title': '#whilewewatch',
 895             }
 896         },
 897         # AdobeTVVideo embed
 898         {
 899             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
 900             'md5': '43662b577c018ad707a63766462b1e87',
 901             'info_dict': {
 902                 'id': '2456',
 903                 'ext': 'mp4',
 904                 'title': 'New experience with Acrobat DC',
 905                 'description': 'New experience with Acrobat DC',
 906                 'duration': 248.667,
 907             },
 908         }
 909     ]
 910
 911     def report_following_redirect(self, new_url):
 912         """Report information extraction."""
 913         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 914
 915     def _extract_rss(self, url, video_id, doc):
 916         playlist_title = doc.find('./channel/title').text
 917         playlist_desc_el = doc.find('./channel/description')
 918         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 919
 920         entries = []
 921         for it in doc.findall('./channel/item'):
 922             next_url = xpath_text(it, 'link', fatal=False)
 923             if not next_url:
 924                 enclosure_nodes = it.findall('./enclosure')
 925                 for e in enclosure_nodes:
 926                     next_url = e.attrib.get('url')
 927                     if next_url:
 928                         break
 929
 930             if not next_url:
 931                 continue
 932
 933             entries.append({
 934                 '_type': 'url',
 935                 'url': next_url,
 936                 'title': it.find('title').text,
 937             })
 938
 939         return {
 940             '_type': 'playlist',
 941             'id': url,
 942             'title': playlist_title,
 943             'description': playlist_desc,
 944             'entries': entries,
 945         }
 946
 947     def _extract_camtasia(self, url, video_id, webpage):
 948         """ Returns None if no camtasia video can be found. """
 949
 950         camtasia_cfg = self._search_regex(
 951             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 952             webpage, 'camtasia configuration file', default=None)
 953         if camtasia_cfg is None:
 954             return None
 955
 956         title = self._html_search_meta('DC.title', webpage, fatal=True)
 957
 958         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 959         camtasia_cfg = self._download_xml(
 960             camtasia_url, video_id,
 961             note='Downloading camtasia configuration',
 962             errnote='Failed to download camtasia configuration')
 963         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 964
 965         entries = []
 966         for n in fileset_node.getchildren():
 967             url_n = n.find('./uri')
 968             if url_n is None:
 969                 continue
 970
 971             entries.append({
 972                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 973                 'title': '%s - %s' % (title, n.tag),
 974                 'url': compat_urlparse.urljoin(url, url_n.text),
 975                 'duration': float_or_none(n.find('./duration').text),
 976             })
 977
 978         return {
 979             '_type': 'playlist',
 980             'entries': entries,
 981             'title': title,
 982         }
 983
 984     def _real_extract(self, url):
 985         if url.startswith('//'):
 986             return {
 987                 '_type': 'url',
 988                 'url': self.http_scheme() + url,
 989             }
 990
 991         parsed_url = compat_urlparse.urlparse(url)
 992         if not parsed_url.scheme:
 993             default_search = self._downloader.params.get('default_search')
 994             if default_search is None:
 995                 default_search = 'fixup_error'
 996
 997             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 998                 if '/' in url:
 999                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1000                     return self.url_result('http://' + url)
1001                 elif default_search != 'fixup_error':
1002                     if default_search == 'auto_warning':
1003                         if re.match(r'^(?:url|URL)$', url):
1004                             raise ExtractorError(
1005                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1006                                 expected=True)
1007                         else:
1008                             self._downloader.report_warning(
1009                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1010                     return self.url_result('ytsearch:' + url)
1011
1012             if default_search in ('error', 'fixup_error'):
1013                 raise ExtractorError(
1014                     '%r is not a valid URL. '
1015                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1016                     % (url, url), expected=True)
1017             else:
1018                 if ':' not in default_search:
1019                     default_search += ':'
1020                 return self.url_result(default_search + url)
1021
1022         url, smuggled_data = unsmuggle_url(url)
1023         force_videoid = None
1024         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1025         if smuggled_data and 'force_videoid' in smuggled_data:
1026             force_videoid = smuggled_data['force_videoid']
1027             video_id = force_videoid
1028         else:
1029             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1030
1031         self.to_screen('%s: Requesting header' % video_id)
1032
1033         head_req = HEADRequest(url)
1034         head_response = self._request_webpage(
1035             head_req, video_id,
1036             note=False, errnote='Could not send HEAD request to %s' % url,
1037             fatal=False)
1038
1039         if head_response is not False:
1040             # Check for redirect
1041             new_url = head_response.geturl()
1042             if url != new_url:
1043                 self.report_following_redirect(new_url)
1044                 if force_videoid:
1045                     new_url = smuggle_url(
1046                         new_url, {'force_videoid': force_videoid})
1047                 return self.url_result(new_url)
1048
1049         full_response = None
1050         if head_response is False:
1051             request = compat_urllib_request.Request(url)
1052             request.add_header('Accept-Encoding', '*')
1053             full_response = self._request_webpage(request, video_id)
1054             head_response = full_response
1055
1056         # Check for direct link to a video
1057         content_type = head_response.headers.get('Content-Type', '')
1058         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1059         if m:
1060             upload_date = unified_strdate(
1061                 head_response.headers.get('Last-Modified'))
1062             return {
1063                 'id': video_id,
1064                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1065                 'direct': True,
1066                 'formats': [{
1067                     'format_id': m.group('format_id'),
1068                     'url': url,
1069                     'vcodec': 'none' if m.group('type') == 'audio' else None
1070                 }],
1071                 'upload_date': upload_date,
1072             }
1073
1074         if not self._downloader.params.get('test', False) and not is_intentional:
1075             force = self._downloader.params.get('force_generic_extractor', False)
1076             self._downloader.report_warning(
1077                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1078
1079         if not full_response:
1080             request = compat_urllib_request.Request(url)
1081             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1082             # making it impossible to download only chunk of the file (yet we need only 512kB to
1083             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1084             # that will always result in downloading the whole file that is not desirable.
1085             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1086             # to accept raw bytes and being able to download only a chunk.
1087             # It may probably better to solve this by checking Content-Type for application/octet-stream
1088             # after HEAD request finishes, but not sure if we can rely on this.
1089             request.add_header('Accept-Encoding', '*')
1090             full_response = self._request_webpage(request, video_id)
1091
1092         # Maybe it's a direct link to a video?
1093         # Be careful not to download the whole thing!
1094         first_bytes = full_response.read(512)
1095         if not is_html(first_bytes):
1096             self._downloader.report_warning(
1097                 'URL could be a direct video link, returning it as such.')
1098             upload_date = unified_strdate(
1099                 head_response.headers.get('Last-Modified'))
1100             return {
1101                 'id': video_id,
1102                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1103                 'direct': True,
1104                 'url': url,
1105                 'upload_date': upload_date,
1106             }
1107
1108         webpage = self._webpage_read_content(
1109             full_response, url, video_id, prefix=first_bytes)
1110
1111         self.report_extraction(video_id)
1112
1113         # Is it an RSS feed or a SMIL file?
1114         try:
1115             doc = parse_xml(webpage)
1116             if doc.tag == 'rss':
1117                 return self._extract_rss(url, video_id, doc)
1118             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1119                 return self._parse_smil(doc, url, video_id)
1120         except compat_xml_parse_error:
1121             pass
1122
1123         # Is it a Camtasia project?
1124         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1125         if camtasia_res is not None:
1126             return camtasia_res
1127
1128         # Sometimes embedded video player is hidden behind percent encoding
1129         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1130         # Unescaping the whole page allows to handle those cases in a generic way
1131         webpage = compat_urllib_parse_unquote(webpage)
1132
1133         # it's tempting to parse this further, but you would
1134         # have to take into account all the variations like
1135         #   Video Title - Site Name
1136         #   Site Name | Video Title
1137         #   Video Title - Tagline | Site Name
1138         # and so on and so forth; it's just not practical
1139         video_title = self._html_search_regex(
1140             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1141             default='video')
1142
1143         # Try to detect age limit automatically
1144         age_limit = self._rta_search(webpage)
1145         # And then there are the jokers who advertise that they use RTA,
1146         # but actually don't.
1147         AGE_LIMIT_MARKERS = [
1148             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1149         ]
1150         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1151             age_limit = 18
1152
1153         # video uploader is domain name
1154         video_uploader = self._search_regex(
1155             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1156
1157         # Helper method
1158         def _playlist_from_matches(matches, getter=None, ie=None):
1159             urlrs = orderedSet(
1160                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1161                 for m in matches)
1162             return self.playlist_result(
1163                 urlrs, playlist_id=video_id, playlist_title=video_title)
1164
1165         # Look for BrightCove:
1166         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1167         if bc_urls:
1168             self.to_screen('Brightcove video detected.')
1169             entries = [{
1170                 '_type': 'url',
1171                 'url': smuggle_url(bc_url, {'Referer': url}),
1172                 'ie_key': 'Brightcove'
1173             } for bc_url in bc_urls]
1174
1175             return {
1176                 '_type': 'playlist',
1177                 'title': video_title,
1178                 'id': video_id,
1179                 'entries': entries,
1180             }
1181
1182         # Look for embedded rtl.nl player
1183         matches = re.findall(
1184             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1185             webpage)
1186         if matches:
1187             return _playlist_from_matches(matches, ie='RtlNl')
1188
1189         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1190         if vimeo_url is not None:
1191             return self.url_result(vimeo_url)
1192
1193         vid_me_embed_url = self._search_regex(
1194             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1195             webpage, 'vid.me embed', default=None)
1196         if vid_me_embed_url is not None:
1197             return self.url_result(vid_me_embed_url, 'Vidme')
1198
1199         # Look for embedded YouTube player
1200         matches = re.findall(r'''(?x)
1201             (?:
1202                 <iframe[^>]+?src=|
1203                 data-video-url=|
1204                 <embed[^>]+?src=|
1205                 embedSWF\(?:\s*|
1206                 new\s+SWFObject\(
1207             )
1208             (["\'])
1209                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1210                 (?:embed|v|p)/.+?)
1211             \1''', webpage)
1212         if matches:
1213             return _playlist_from_matches(
1214                 matches, lambda m: unescapeHTML(m[1]))
1215
1216         # Look for lazyYT YouTube embed
1217         matches = re.findall(
1218             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1219         if matches:
1220             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1221
1222         # Look for embedded Dailymotion player
1223         matches = re.findall(
1224             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1225         if matches:
1226             return _playlist_from_matches(
1227                 matches, lambda m: unescapeHTML(m[1]))
1228
1229         # Look for embedded Dailymotion playlist player (#3822)
1230         m = re.search(
1231             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1232         if m:
1233             playlists = re.findall(
1234                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1235             if playlists:
1236                 return _playlist_from_matches(
1237                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1238
1239         # Look for embedded Wistia player
1240         match = re.search(
1241             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1242         if match:
1243             embed_url = self._proto_relative_url(
1244                 unescapeHTML(match.group('url')))
1245             return {
1246                 '_type': 'url_transparent',
1247                 'url': embed_url,
1248                 'ie_key': 'Wistia',
1249                 'uploader': video_uploader,
1250                 'title': video_title,
1251                 'id': video_id,
1252             }
1253
1254         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1255         if match:
1256             return {
1257                 '_type': 'url_transparent',
1258                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1259                 'ie_key': 'Wistia',
1260                 'uploader': video_uploader,
1261                 'title': video_title,
1262                 'id': match.group('id')
1263             }
1264
1265         # Look for embedded blip.tv player
1266         bliptv_url = BlipTVIE._extract_url(webpage)
1267         if bliptv_url:
1268             return self.url_result(bliptv_url, 'BlipTV')
1269
1270         # Look for SVT player
1271         svt_url = SVTIE._extract_url(webpage)
1272         if svt_url:
1273             return self.url_result(svt_url, 'SVT')
1274
1275         # Look for embedded condenast player
1276         matches = re.findall(
1277             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1278             webpage)
1279         if matches:
1280             return {
1281                 '_type': 'playlist',
1282                 'entries': [{
1283                     '_type': 'url',
1284                     'ie_key': 'CondeNast',
1285                     'url': ma,
1286                 } for ma in matches],
1287                 'title': video_title,
1288                 'id': video_id,
1289             }
1290
1291         # Look for Bandcamp pages with custom domain
1292         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1293         if mobj is not None:
1294             burl = unescapeHTML(mobj.group(1))
1295             # Don't set the extractor because it can be a track url or an album
1296             return self.url_result(burl)
1297
1298         # Look for embedded Vevo player
1299         mobj = re.search(
1300             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1301         if mobj is not None:
1302             return self.url_result(mobj.group('url'))
1303
1304         # Look for embedded Viddler player
1305         mobj = re.search(
1306             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1307             webpage)
1308         if mobj is not None:
1309             return self.url_result(mobj.group('url'))
1310
1311         # Look for NYTimes player
1312         mobj = re.search(
1313             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1314             webpage)
1315         if mobj is not None:
1316             return self.url_result(mobj.group('url'))
1317
1318         # Look for Libsyn player
1319         mobj = re.search(
1320             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1321         if mobj is not None:
1322             return self.url_result(mobj.group('url'))
1323
1324         # Look for Ooyala videos
1325         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1326                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1327                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1328                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1329         if mobj is not None:
1330             return OoyalaIE._build_url_result(mobj.group('ec'))
1331
1332         # Look for multiple Ooyala embeds on SBN network websites
1333         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1334         if mobj is not None:
1335             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1336             if embeds:
1337                 return _playlist_from_matches(
1338                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1339
1340         # Look for Aparat videos
1341         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1342         if mobj is not None:
1343             return self.url_result(mobj.group(1), 'Aparat')
1344
1345         # Look for MPORA videos
1346         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1347         if mobj is not None:
1348             return self.url_result(mobj.group(1), 'Mpora')
1349
1350         # Look for embedded NovaMov-based player
1351         mobj = re.search(
1352             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1353                     (?P<url>http://(?:(?:embed|www)\.)?
1354                         (?:novamov\.com|
1355                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1356                            videoweed\.(?:es|com)|
1357                            movshare\.(?:net|sx|ag)|
1358                            divxstage\.(?:eu|net|ch|co|at|ag))
1359                         /embed\.php.+?)\1''', webpage)
1360         if mobj is not None:
1361             return self.url_result(mobj.group('url'))
1362
1363         # Look for embedded Facebook player
1364         mobj = re.search(
1365             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1366         if mobj is not None:
1367             return self.url_result(mobj.group('url'), 'Facebook')
1368
1369         # Look for embedded VK player
1370         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1371         if mobj is not None:
1372             return self.url_result(mobj.group('url'), 'VK')
1373
1374         # Look for embedded ivi player
1375         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1376         if mobj is not None:
1377             return self.url_result(mobj.group('url'), 'Ivi')
1378
1379         # Look for embedded Huffington Post player
1380         mobj = re.search(
1381             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1382         if mobj is not None:
1383             return self.url_result(mobj.group('url'), 'HuffPost')
1384
1385         # Look for embed.ly
1386         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1387         if mobj is not None:
1388             return self.url_result(mobj.group('url'))
1389         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1390         if mobj is not None:
1391             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1392
1393         # Look for funnyordie embed
1394         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1395         if matches:
1396             return _playlist_from_matches(
1397                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1398
1399         # Look for BBC iPlayer embed
1400         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1401         if matches:
1402             return _playlist_from_matches(matches, ie='BBCCoUk')
1403
1404         # Look for embedded RUTV player
1405         rutv_url = RUTVIE._extract_url(webpage)
1406         if rutv_url:
1407             return self.url_result(rutv_url, 'RUTV')
1408
1409         # Look for embedded TVC player
1410         tvc_url = TVCIE._extract_url(webpage)
1411         if tvc_url:
1412             return self.url_result(tvc_url, 'TVC')
1413
1414         # Look for embedded SportBox player
1415         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1416         if sportbox_urls:
1417             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1418
1419         # Look for embedded PornHub player
1420         pornhub_url = PornHubIE._extract_url(webpage)
1421         if pornhub_url:
1422             return self.url_result(pornhub_url, 'PornHub')
1423
1424         # Look for embedded XHamster player
1425         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1426         if xhamster_urls:
1427             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1428
1429         # Look for embedded Tvigle player
1430         mobj = re.search(
1431             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1432         if mobj is not None:
1433             return self.url_result(mobj.group('url'), 'Tvigle')
1434
1435         # Look for embedded TED player
1436         mobj = re.search(
1437             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1438         if mobj is not None:
1439             return self.url_result(mobj.group('url'), 'TED')
1440
1441         # Look for embedded Ustream videos
1442         mobj = re.search(
1443             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1444         if mobj is not None:
1445             return self.url_result(mobj.group('url'), 'Ustream')
1446
1447         # Look for embedded arte.tv player
1448         mobj = re.search(
1449             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1450             webpage)
1451         if mobj is not None:
1452             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1453
1454         # Look for embedded francetv player
1455         mobj = re.search(
1456             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1457             webpage)
1458         if mobj is not None:
1459             return self.url_result(mobj.group('url'))
1460
1461         # Look for embedded smotri.com player
1462         smotri_url = SmotriIE._extract_url(webpage)
1463         if smotri_url:
1464             return self.url_result(smotri_url, 'Smotri')
1465
1466         # Look for embedded Myvi.ru player
1467         myvi_url = MyviIE._extract_url(webpage)
1468         if myvi_url:
1469             return self.url_result(myvi_url)
1470
1471         # Look for embeded soundcloud player
1472         mobj = re.search(
1473             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1474             webpage)
1475         if mobj is not None:
1476             url = unescapeHTML(mobj.group('url'))
1477             return self.url_result(url)
1478
1479         # Look for embedded vulture.com player
1480         mobj = re.search(
1481             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1482             webpage)
1483         if mobj is not None:
1484             url = unescapeHTML(mobj.group('url'))
1485             return self.url_result(url, ie='Vulture')
1486
1487         # Look for embedded mtvservices player
1488         mobj = re.search(
1489             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1490             webpage)
1491         if mobj is not None:
1492             url = unescapeHTML(mobj.group('url'))
1493             return self.url_result(url, ie='MTVServicesEmbedded')
1494
1495         # Look for embedded yahoo player
1496         mobj = re.search(
1497             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1498             webpage)
1499         if mobj is not None:
1500             return self.url_result(mobj.group('url'), 'Yahoo')
1501
1502         # Look for embedded sbs.com.au player
1503         mobj = re.search(
1504             r'''(?x)
1505             (?:
1506                 <meta\s+property="og:video"\s+content=|
1507                 <iframe[^>]+?src=
1508             )
1509             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1510             webpage)
1511         if mobj is not None:
1512             return self.url_result(mobj.group('url'), 'SBS')
1513
1514         # Look for embedded Cinchcast player
1515         mobj = re.search(
1516             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1517             webpage)
1518         if mobj is not None:
1519             return self.url_result(mobj.group('url'), 'Cinchcast')
1520
1521         mobj = re.search(
1522             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1523             webpage)
1524         if not mobj:
1525             mobj = re.search(
1526                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1527                 webpage)
1528         if mobj is not None:
1529             return self.url_result(mobj.group('url'), 'MLB')
1530
1531         mobj = re.search(
1532             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1533             webpage)
1534         if mobj is not None:
1535             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1536
1537         mobj = re.search(
1538             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1539             webpage)
1540         if mobj is not None:
1541             return self.url_result(mobj.group('url'), 'Livestream')
1542
1543         # Look for Zapiks embed
1544         mobj = re.search(
1545             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1546         if mobj is not None:
1547             return self.url_result(mobj.group('url'), 'Zapiks')
1548
1549         # Look for Kaltura embeds
1550         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
1551                 re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
1552         if mobj is not None:
1553             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1554
1555         # Look for Eagle.Platform embeds
1556         mobj = re.search(
1557             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1558         if mobj is not None:
1559             return self.url_result(mobj.group('url'), 'EaglePlatform')
1560
1561         # Look for ClipYou (uses Eagle.Platform) embeds
1562         mobj = re.search(
1563             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1564         if mobj is not None:
1565             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1566
1567         # Look for Pladform embeds
1568         mobj = re.search(
1569             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1570         if mobj is not None:
1571             return self.url_result(mobj.group('url'), 'Pladform')
1572
1573         # Look for Playwire embeds
1574         mobj = re.search(
1575             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1576         if mobj is not None:
1577             return self.url_result(mobj.group('url'))
1578
1579         # Look for 5min embeds
1580         mobj = re.search(
1581             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1582         if mobj is not None:
1583             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1584
1585         # Look for Crooks and Liars embeds
1586         mobj = re.search(
1587             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1588         if mobj is not None:
1589             return self.url_result(mobj.group('url'))
1590
1591         # Look for NBC Sports VPlayer embeds
1592         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1593         if nbc_sports_url:
1594             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1595
1596         # Look for UDN embeds
1597         mobj = re.search(
1598             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1599         if mobj is not None:
1600             return self.url_result(
1601                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1602
1603         # Look for Senate ISVP iframe
1604         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1605         if senate_isvp_url:
1606             return self.url_result(senate_isvp_url, 'SenateISVP')
1607
1608         # Look for Dailymotion Cloud videos
1609         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1610         if dmcloud_url:
1611             return self.url_result(dmcloud_url, 'DailymotionCloud')
1612
1613         # Look for OnionStudios embeds
1614         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1615         if onionstudios_url:
1616             return self.url_result(onionstudios_url)
1617
1618         # Look for SnagFilms embeds
1619         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1620         if snagfilms_url:
1621             return self.url_result(snagfilms_url)
1622
1623         # Look for AdobeTVVideo embeds
1624         mobj = re.search(
1625             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1626             webpage)
1627         if mobj is not None:
1628             return self.url_result(
1629                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1630                 'AdobeTVVideo')
1631
1632         def check_video(vurl):
1633             if YoutubeIE.suitable(vurl):
1634                 return True
1635             vpath = compat_urlparse.urlparse(vurl).path
1636             vext = determine_ext(vpath)
1637             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1638
1639         def filter_video(urls):
1640             return list(filter(check_video, urls))
1641
1642         # Start with something easy: JW Player in SWFObject
1643         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1644         if not found:
1645             # Look for gorilla-vid style embedding
1646             found = filter_video(re.findall(r'''(?sx)
1647                 (?:
1648                     jw_plugins|
1649                     JWPlayerOptions|
1650                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1651                 )
1652                 .*?
1653                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1654         if not found:
1655             # Broaden the search a little bit
1656             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1657         if not found:
1658             # Broaden the findall a little bit: JWPlayer JS loader
1659             found = filter_video(re.findall(
1660                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1661         if not found:
1662             # Flow player
1663             found = filter_video(re.findall(r'''(?xs)
1664                 flowplayer\("[^"]+",\s*
1665                     \{[^}]+?\}\s*,
1666                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1667                         ["']?url["']?\s*:\s*["']([^"']+)["']
1668             ''', webpage))
1669         if not found:
1670             # Cinerama player
1671             found = re.findall(
1672                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1673         if not found:
1674             # Try to find twitter cards info
1675             found = filter_video(re.findall(
1676                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1677         if not found:
1678             # We look for Open Graph info:
1679             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1680             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1681             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1682             if m_video_type is not None:
1683                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1684         if not found:
1685             # HTML5 video
1686             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1687         if not found:
1688             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1689             found = re.search(
1690                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1691                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1692                 webpage)
1693             if not found:
1694                 # Look also in Refresh HTTP header
1695                 refresh_header = head_response.headers.get('Refresh')
1696                 if refresh_header:
1697                     found = re.search(REDIRECT_REGEX, refresh_header)
1698             if found:
1699                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
1700                 self.report_following_redirect(new_url)
1701                 return {
1702                     '_type': 'url',
1703                     'url': new_url,
1704                 }
1705         if not found:
1706             raise UnsupportedError(url)
1707
1708         entries = []
1709         for video_url in found:
1710             video_url = compat_urlparse.urljoin(url, video_url)
1711             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
1712
1713             # Sometimes, jwplayer extraction will result in a YouTube URL
1714             if YoutubeIE.suitable(video_url):
1715                 entries.append(self.url_result(video_url, 'Youtube'))
1716                 continue
1717
1718             # here's a fun little line of code for you:
1719             video_id = os.path.splitext(video_id)[0]
1720
1721             if determine_ext(video_url) == 'smil':
1722                 entries.append({
1723                     'id': video_id,
1724                     'formats': self._extract_smil_formats(video_url, video_id),
1725                     'uploader': video_uploader,
1726                     'title': video_title,
1727                     'age_limit': age_limit,
1728                 })
1729             else:
1730                 entries.append({
1731                     'id': video_id,
1732                     'url': video_url,
1733                     'uploader': video_uploader,
1734                     'title': video_title,
1735                     'age_limit': age_limit,
1736                 })
1737
1738         if len(entries) == 1:
1739             return entries[0]
1740         else:
1741             for num, e in enumerate(entries, start=1):
1742                 # 'url' results don't have a title
1743                 if e.get('title') is not None:
1744                     e['title'] = '%s (%d)' % (e['title'], num)
1745             return {
1746                 '_type': 'playlist',
1747                 'entries': entries,
1748             }