git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7 import sys
   8
   9 from .common import InfoExtractor
  10 from .youtube import YoutubeIE
  11 from ..compat import (
  12     compat_etree_fromstring,
  13     compat_urllib_parse_unquote,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     orderedSet,
  24     sanitized_Request,
  25     smuggle_url,
  26     unescapeHTML,
  27     unified_strdate,
  28     unsmuggle_url,
  29     UnsupportedError,
  30     url_basename,
  31     xpath_text,
  32 )
  33 from .brightcove import (
  34     BrightcoveLegacyIE,
  35     BrightcoveNewIE,
  36 )
  37 from .nbc import NBCSportsVPlayerIE
  38 from .ooyala import OoyalaIE
  39 from .rutv import RUTVIE
  40 from .tvc import TVCIE
  41 from .sportbox import SportBoxEmbedIE
  42 from .smotri import SmotriIE
  43 from .myvi import MyviIE
  44 from .condenast import CondeNastIE
  45 from .udn import UDNEmbedIE
  46 from .senateisvp import SenateISVPIE
  47 from .svt import SVTIE
  48 from .pornhub import PornHubIE
  49 from .xhamster import XHamsterEmbedIE
  50 from .tnaflix import TNAFlixNetworkEmbedIE
  51 from .vimeo import VimeoIE
  52 from .dailymotion import (
  53     DailymotionIE,
  54     DailymotionCloudIE,
  55 )
  56 from .onionstudios import OnionStudiosIE
  57 from .viewlift import ViewLiftEmbedIE
  58 from .screenwavemedia import ScreenwaveMediaIE
  59 from .mtv import MTVServicesEmbeddedIE
  60 from .pladform import PladformIE
  61 from .videomore import VideomoreIE
  62 from .googledrive import GoogleDriveIE
  63 from .jwplatform import JWPlatformIE
  64 from .digiteka import DigitekaIE
  65 from .arkena import ArkenaIE
  66 from .instagram import InstagramIE
  67 from .liveleak import LiveLeakIE
  68 from .threeqsdn import ThreeQSDNIE
  69 from .theplatform import ThePlatformIE
  70 from .vessel import VesselIE
  71 from .kaltura import KalturaIE
  72 from .eagleplatform import EaglePlatformIE
  73 from .facebook import FacebookIE
  74 from .soundcloud import SoundcloudIE
  75
  76
  77 class GenericIE(InfoExtractor):
  78     IE_DESC = 'Generic downloader that works on some sites'
  79     _VALID_URL = r'.*'
  80     IE_NAME = 'generic'
  81     _TESTS = [
  82         # Direct link to a video
  83         {
  84             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  85             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  86             'info_dict': {
  87                 'id': 'trailer',
  88                 'ext': 'mp4',
  89                 'title': 'trailer',
  90                 'upload_date': '20100513',
  91             }
  92         },
  93         # Direct link to media delivered compressed (until Accept-Encoding is *)
  94         {
  95             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  96             'md5': '128c42e68b13950268b648275386fc74',
  97             'info_dict': {
  98                 'id': 'FictionJunction-Parallel_Hearts',
  99                 'ext': 'flac',
 100                 'title': 'FictionJunction-Parallel_Hearts',
 101                 'upload_date': '20140522',
 102             },
 103             'expected_warnings': [
 104                 'URL could be a direct video link, returning it as such.'
 105             ]
 106         },
 107         # Direct download with broken HEAD
 108         {
 109             'url': 'http://ai-radio.org:8000/radio.opus',
 110             'info_dict': {
 111                 'id': 'radio',
 112                 'ext': 'opus',
 113                 'title': 'radio',
 114             },
 115             'params': {
 116                 'skip_download': True,  # infinite live stream
 117             },
 118             'expected_warnings': [
 119                 r'501.*Not Implemented',
 120                 r'400.*Bad Request',
 121             ],
 122         },
 123         # Direct link with incorrect MIME type
 124         {
 125             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 126             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 127             'info_dict': {
 128                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 129                 'id': '5_Lennart_Poettering_-_Systemd',
 130                 'ext': 'webm',
 131                 'title': '5_Lennart_Poettering_-_Systemd',
 132                 'upload_date': '20141120',
 133             },
 134             'expected_warnings': [
 135                 'URL could be a direct video link, returning it as such.'
 136             ]
 137         },
 138         # RSS feed
 139         {
 140             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 141             'info_dict': {
 142                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 143                 'title': 'Zero Punctuation',
 144                 'description': 're:.*groundbreaking video review series.*'
 145             },
 146             'playlist_mincount': 11,
 147         },
 148         # RSS feed with enclosure
 149         {
 150             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 151             'info_dict': {
 152                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 153                 'ext': 'm4v',
 154                 'upload_date': '20150228',
 155                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 156             }
 157         },
 158         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 159         {
 160             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 161             'info_dict': {
 162                 'id': 'smil',
 163                 'ext': 'mp4',
 164                 'title': 'Automatics, robotics and biocybernetics',
 165                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
 166                 'upload_date': '20130627',
 167                 'formats': 'mincount:16',
 168                 'subtitles': 'mincount:1',
 169             },
 170             'params': {
 171                 'force_generic_extractor': True,
 172                 'skip_download': True,
 173             },
 174         },
 175         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 176         {
 177             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 178             'info_dict': {
 179                 'id': 'hds',
 180                 'ext': 'flv',
 181                 'title': 'hds',
 182                 'formats': 'mincount:1',
 183             },
 184             'params': {
 185                 'skip_download': True,
 186             },
 187         },
 188         # SMIL from https://www.restudy.dk/video/play/id/1637
 189         {
 190             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 191             'info_dict': {
 192                 'id': 'video_1637',
 193                 'ext': 'flv',
 194                 'title': 'video_1637',
 195                 'formats': 'mincount:3',
 196             },
 197             'params': {
 198                 'skip_download': True,
 199             },
 200         },
 201         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 202         {
 203             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 204             'info_dict': {
 205                 'id': 'smil-service',
 206                 'ext': 'flv',
 207                 'title': 'smil-service',
 208                 'formats': 'mincount:1',
 209             },
 210             'params': {
 211                 'skip_download': True,
 212             },
 213         },
 214         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 215         {
 216             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 217             'info_dict': {
 218                 'id': '4719370',
 219                 'ext': 'mp4',
 220                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 221                 'formats': 'mincount:3',
 222             },
 223             'params': {
 224                 'skip_download': True,
 225             },
 226         },
 227         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 228         {
 229             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 230             'info_dict': {
 231                 'id': 'mZlp2ctYIUEB',
 232                 'ext': 'mp4',
 233                 'title': 'Tikibad ontruimd wegens brand',
 234                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 235                 'thumbnail': 're:^https?://.*\.jpg$',
 236                 'duration': 33,
 237             },
 238             'params': {
 239                 'skip_download': True,
 240             },
 241         },
 242         # MPD from http://dash-mse-test.appspot.com/media.html
 243         {
 244             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
 245             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
 246             'info_dict': {
 247                 'id': 'car-20120827-manifest',
 248                 'ext': 'mp4',
 249                 'title': 'car-20120827-manifest',
 250                 'formats': 'mincount:9',
 251                 'upload_date': '20130904',
 252             },
 253             'params': {
 254                 'format': 'bestvideo',
 255             },
 256         },
 257         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
 258         {
 259             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
 260             'info_dict': {
 261                 'id': 'content',
 262                 'ext': 'mp4',
 263                 'title': 'content',
 264                 'formats': 'mincount:8',
 265             },
 266             'params': {
 267                 # m3u8 downloads
 268                 'skip_download': True,
 269             }
 270         },
 271         # m3u8 served with Content-Type: text/plain
 272         {
 273             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
 274             'info_dict': {
 275                 'id': 'index',
 276                 'ext': 'mp4',
 277                 'title': 'index',
 278                 'upload_date': '20140720',
 279                 'formats': 'mincount:11',
 280             },
 281             'params': {
 282                 # m3u8 downloads
 283                 'skip_download': True,
 284             }
 285         },
 286         # google redirect
 287         {
 288             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 289             'info_dict': {
 290                 'id': 'cmQHVoWB5FY',
 291                 'ext': 'mp4',
 292                 'upload_date': '20130224',
 293                 'uploader_id': 'TheVerge',
 294                 'description': 're:^Chris Ziegler takes a look at the\.*',
 295                 'uploader': 'The Verge',
 296                 'title': 'First Firefox OS phones side-by-side',
 297             },
 298             'params': {
 299                 'skip_download': False,
 300             }
 301         },
 302         {
 303             # redirect in Refresh HTTP header
 304             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
 305             'info_dict': {
 306                 'id': 'pO8h3EaFRdo',
 307                 'ext': 'mp4',
 308                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 309                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
 310                 'upload_date': '20150917',
 311                 'uploader_id': 'brtvofficial',
 312                 'uploader': 'Boiler Room',
 313             },
 314             'params': {
 315                 'skip_download': False,
 316             },
 317         },
 318         {
 319             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 320             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 321             'info_dict': {
 322                 'id': '13601338388002',
 323                 'ext': 'mp4',
 324                 'uploader': 'www.hodiho.fr',
 325                 'title': 'R\u00e9gis plante sa Jeep',
 326             }
 327         },
 328         # bandcamp page with custom domain
 329         {
 330             'add_ie': ['Bandcamp'],
 331             'url': 'http://bronyrock.com/track/the-pony-mash',
 332             'info_dict': {
 333                 'id': '3235767654',
 334                 'ext': 'mp3',
 335                 'title': 'The Pony Mash',
 336                 'uploader': 'M_Pallante',
 337             },
 338             'skip': 'There is a limit of 200 free downloads / month for the test song',
 339         },
 340         # embedded brightcove video
 341         # it also tests brightcove videos that need to set the 'Referer' in the
 342         # http requests
 343         {
 344             'add_ie': ['BrightcoveLegacy'],
 345             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 346             'info_dict': {
 347                 'id': '2765128793001',
 348                 'ext': 'mp4',
 349                 'title': 'Le cours de bourse : l’analyse technique',
 350                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 351                 'uploader': 'BFM BUSINESS',
 352             },
 353             'params': {
 354                 'skip_download': True,
 355             },
 356         },
 357         {
 358             # https://github.com/rg3/youtube-dl/issues/2253
 359             'url': 'http://bcove.me/i6nfkrc3',
 360             'md5': '0ba9446db037002366bab3b3eb30c88c',
 361             'info_dict': {
 362                 'id': '3101154703001',
 363                 'ext': 'mp4',
 364                 'title': 'Still no power',
 365                 'uploader': 'thestar.com',
 366                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 367             },
 368             'add_ie': ['BrightcoveLegacy'],
 369         },
 370         {
 371             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 372             'md5': 'fb973ecf6e4a78a67453647444222983',
 373             'info_dict': {
 374                 'id': '3414141473001',
 375                 'ext': 'mp4',
 376                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 377                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 378                 'uploader': 'Championat',
 379             },
 380         },
 381         {
 382             # https://github.com/rg3/youtube-dl/issues/3541
 383             'add_ie': ['BrightcoveLegacy'],
 384             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 385             'info_dict': {
 386                 'id': '3866516442001',
 387                 'ext': 'mp4',
 388                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 389                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 390                 'uploader': 'SBS Broadcasting',
 391             },
 392             'skip': 'Restricted to Netherlands',
 393             'params': {
 394                 'skip_download': True,  # m3u8 download
 395             },
 396         },
 397         # ooyala video
 398         {
 399             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 400             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 401             'info_dict': {
 402                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 403                 'ext': 'mp4',
 404                 'title': '2cc213299525360.mov',  # that's what we get
 405                 'duration': 238.231,
 406             },
 407             'add_ie': ['Ooyala'],
 408         },
 409         {
 410             # ooyala video embedded with http://player.ooyala.com/iframe.js
 411             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 412             'info_dict': {
 413                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 414                 'ext': 'mp4',
 415                 'title': '"Steve Jobs: Man in the Machine" trailer',
 416                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
 417                 'duration': 135.427,
 418             },
 419             'params': {
 420                 'skip_download': True,
 421             },
 422         },
 423         # embed.ly video
 424         {
 425             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 426             'info_dict': {
 427                 'id': '9ODmcdjQcHQ',
 428                 'ext': 'mp4',
 429                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 430                 'upload_date': '20140225',
 431                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 432                 'uploader': 'Tested',
 433                 'uploader_id': 'testedcom',
 434             },
 435             # No need to test YoutubeIE here
 436             'params': {
 437                 'skip_download': True,
 438             },
 439         },
 440         # funnyordie embed
 441         {
 442             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 443             'info_dict': {
 444                 'id': '18e820ec3f',
 445                 'ext': 'mp4',
 446                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 447                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 448             },
 449         },
 450         # RUTV embed
 451         {
 452             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 453             'info_dict': {
 454                 'id': '776940',
 455                 'ext': 'mp4',
 456                 'title': 'Охотское море стало целиком российским',
 457                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 458             },
 459             'params': {
 460                 # m3u8 download
 461                 'skip_download': True,
 462             },
 463         },
 464         # TVC embed
 465         {
 466             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 467             'info_dict': {
 468                 'id': '55304',
 469                 'ext': 'mp4',
 470                 'title': 'Дошкольное воспитание',
 471             },
 472         },
 473         # SportBox embed
 474         {
 475             'url': 'http://www.vestifinance.ru/articles/25753',
 476             'info_dict': {
 477                 'id': '25753',
 478                 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 479             },
 480             'playlist': [{
 481                 'info_dict': {
 482                     'id': '370908',
 483                     'title': 'Госзаказ. День 3',
 484                     'ext': 'mp4',
 485                 }
 486             }, {
 487                 'info_dict': {
 488                     'id': '370905',
 489                     'title': 'Госзаказ. День 2',
 490                     'ext': 'mp4',
 491                 }
 492             }, {
 493                 'info_dict': {
 494                     'id': '370902',
 495                     'title': 'Госзаказ. День 1',
 496                     'ext': 'mp4',
 497                 }
 498             }],
 499             'params': {
 500                 # m3u8 download
 501                 'skip_download': True,
 502             },
 503         },
 504         # Myvi.ru embed
 505         {
 506             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 507             'info_dict': {
 508                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 509                 'ext': 'mp4',
 510                 'title': 'Ужастики, русский трейлер (2015)',
 511                 'thumbnail': 're:^https?://.*\.jpg$',
 512                 'duration': 153,
 513             }
 514         },
 515         # XHamster embed
 516         {
 517             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 518             'info_dict': {
 519                 'id': 'showthread',
 520                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 521             },
 522             'playlist_mincount': 7,
 523         },
 524         # Embedded TED video
 525         {
 526             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 527             'md5': '65fdff94098e4a607385a60c5177c638',
 528             'info_dict': {
 529                 'id': '1969',
 530                 'ext': 'mp4',
 531                 'title': 'Hidden miracles of the natural world',
 532                 'uploader': 'Louie Schwartzberg',
 533                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 534             }
 535         },
 536         # Embedded Ustream video
 537         {
 538             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 539             'md5': '27b99cdb639c9b12a79bca876a073417',
 540             'info_dict': {
 541                 'id': '45734260',
 542                 'ext': 'flv',
 543                 'uploader': 'AU SPA:  The NSA and Privacy',
 544                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 545             }
 546         },
 547         # nowvideo embed hidden behind percent encoding
 548         {
 549             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 550             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 551             'info_dict': {
 552                 'id': '06e53103ca9aa',
 553                 'ext': 'flv',
 554                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 555                 'description': 'No description',
 556             },
 557         },
 558         # arte embed
 559         {
 560             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 561             'md5': '7653032cbb25bf6c80d80f217055fa43',
 562             'info_dict': {
 563                 'id': '048195-004_PLUS7-F',
 564                 'ext': 'flv',
 565                 'title': 'X:enius',
 566                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 567                 'upload_date': '20140320',
 568             },
 569             'params': {
 570                 'skip_download': 'Requires rtmpdump'
 571             }
 572         },
 573         # francetv embed
 574         {
 575             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 576             'info_dict': {
 577                 'id': 'EV_30231',
 578                 'ext': 'mp4',
 579                 'title': 'Alcaline, le concert avec Calogero',
 580                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 581                 'upload_date': '20150226',
 582                 'timestamp': 1424989860,
 583                 'duration': 5400,
 584             },
 585             'params': {
 586                 # m3u8 downloads
 587                 'skip_download': True,
 588             },
 589             'expected_warnings': [
 590                 'Forbidden'
 591             ]
 592         },
 593         # Condé Nast embed
 594         {
 595             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 596             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 597             'info_dict': {
 598                 'id': '53501be369702d3275860000',
 599                 'ext': 'mp4',
 600                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 601             }
 602         },
 603         # Dailymotion embed
 604         {
 605             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 606             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 607             'info_dict': {
 608                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 609                 'ext': 'mp4',
 610                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 611                 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
 612                 'uploader': 'Spi0n',
 613                 'uploader_id': 'xgditw',
 614                 'upload_date': '20140425',
 615                 'timestamp': 1398441542,
 616             },
 617             'add_ie': ['Dailymotion'],
 618         },
 619         # YouTube embed
 620         {
 621             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 622             'info_dict': {
 623                 'id': 'FXRb4ykk4S0',
 624                 'ext': 'mp4',
 625                 'title': 'The NBL Auction 2014',
 626                 'uploader': 'BADMINTON England',
 627                 'uploader_id': 'BADMINTONEvents',
 628                 'upload_date': '20140603',
 629                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 630             },
 631             'add_ie': ['Youtube'],
 632             'params': {
 633                 'skip_download': True,
 634             }
 635         },
 636         # MTVSercices embed
 637         {
 638             'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
 639             'md5': 'ca1aef97695ef2c1d6973256a57e5252',
 640             'info_dict': {
 641                 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
 642                 'ext': 'mp4',
 643                 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
 644                 'description': 'Two valets share their love for movie star Liam Neesons.',
 645                 'timestamp': 1349922600,
 646                 'upload_date': '20121011',
 647             },
 648         },
 649         # YouTube embed via <data-embed-url="">
 650         {
 651             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 652             'info_dict': {
 653                 'id': '4vAffPZIT44',
 654                 'ext': 'mp4',
 655                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 656                 'uploader': 'Gameloft',
 657                 'uploader_id': 'gameloft',
 658                 'upload_date': '20140828',
 659                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 660             },
 661             'params': {
 662                 'skip_download': True,
 663             }
 664         },
 665         # Camtasia studio
 666         {
 667             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 668             'playlist': [{
 669                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 670                 'info_dict': {
 671                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 672                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 673                     'ext': 'flv',
 674                     'duration': 2235.90,
 675                 }
 676             }, {
 677                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 678                 'info_dict': {
 679                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 680                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 681                     'ext': 'flv',
 682                     'duration': 2235.93,
 683                 }
 684             }],
 685             'info_dict': {
 686                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 687             }
 688         },
 689         # Flowplayer
 690         {
 691             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 692             'md5': '9d65602bf31c6e20014319c7d07fba27',
 693             'info_dict': {
 694                 'id': '5123ea6d5e5a7',
 695                 'ext': 'mp4',
 696                 'age_limit': 18,
 697                 'uploader': 'www.handjobhub.com',
 698                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 699             }
 700         },
 701         # Multiple brightcove videos
 702         # https://github.com/rg3/youtube-dl/issues/2283
 703         {
 704             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 705             'info_dict': {
 706                 'id': 'always-never',
 707                 'title': 'Always / Never - The New Yorker',
 708             },
 709             'playlist_count': 3,
 710             'params': {
 711                 'extract_flat': False,
 712                 'skip_download': True,
 713             }
 714         },
 715         # MLB embed
 716         {
 717             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 718             'md5': '96f09a37e44da40dd083e12d9a683327',
 719             'info_dict': {
 720                 'id': '33322633',
 721                 'ext': 'mp4',
 722                 'title': 'Ump changes call to ball',
 723                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 724                 'duration': 48,
 725                 'timestamp': 1401537900,
 726                 'upload_date': '20140531',
 727                 'thumbnail': 're:^https?://.*\.jpg$',
 728             },
 729         },
 730         # Wistia embed
 731         {
 732             'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 733             'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
 734             'info_dict': {
 735                 'id': '6e2wtrbdaf',
 736                 'ext': 'mov',
 737                 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
 738                 'description': 'a Paywall Videos video from Remilon',
 739                 'duration': 644.072,
 740                 'uploader': 'study.com',
 741                 'timestamp': 1459678540,
 742                 'upload_date': '20160403',
 743                 'filesize': 24687186,
 744             },
 745         },
 746         {
 747             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 748             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 749             'info_dict': {
 750                 'id': 'uxjb0lwrcz',
 751                 'ext': 'mp4',
 752                 'title': 'Conversation about Hexagonal Rails Part 1',
 753                 'description': 'a Martin Fowler video from ThoughtWorks',
 754                 'duration': 1715.0,
 755                 'uploader': 'thoughtworks.wistia.com',
 756                 'timestamp': 1401832161,
 757                 'upload_date': '20140603',
 758             },
 759         },
 760         # Wistia standard embed (async)
 761         {
 762             'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
 763             'info_dict': {
 764                 'id': '807fafadvk',
 765                 'ext': 'mp4',
 766                 'title': 'Drip Brennan Dunn Workshop',
 767                 'description': 'a JV Webinars video from getdrip-1',
 768                 'duration': 4986.95,
 769                 'timestamp': 1463607249,
 770                 'upload_date': '20160518',
 771             },
 772             'params': {
 773                 'skip_download': True,
 774             }
 775         },
 776         # Soundcloud embed
 777         {
 778             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 779             'info_dict': {
 780                 'id': '174391317',
 781                 'ext': 'mp3',
 782                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 783                 'uploader': 'Sophos Security',
 784                 'title': 'Chet Chat 171 - Oct 29, 2014',
 785                 'upload_date': '20141029',
 786             }
 787         },
 788         # Livestream embed
 789         {
 790             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 791             'info_dict': {
 792                 'id': '67864563',
 793                 'ext': 'flv',
 794                 'upload_date': '20141112',
 795                 'title': 'Rosetta #CometLanding webcast HL 10',
 796             }
 797         },
 798         # Another Livestream embed, without 'new.' in URL
 799         {
 800             'url': 'https://www.freespeech.org/',
 801             'info_dict': {
 802                 'id': '123537347',
 803                 'ext': 'mp4',
 804                 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 805             },
 806             'params': {
 807                 # Live stream
 808                 'skip_download': True,
 809             },
 810         },
 811         # LazyYT
 812         {
 813             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 814             'info_dict': {
 815                 'id': '1986',
 816                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 817             },
 818             'playlist_mincount': 2,
 819         },
 820         # Cinchcast embed
 821         {
 822             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 823             'info_dict': {
 824                 'id': '7141703',
 825                 'ext': 'mp3',
 826                 'upload_date': '20141126',
 827                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 828             }
 829         },
 830         # Cinerama player
 831         {
 832             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 833             'info_dict': {
 834                 'id': '730m_DandD_1901_512k',
 835                 'ext': 'mp4',
 836                 'uploader': 'www.abc.net.au',
 837                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 838             }
 839         },
 840         # embedded viddler video
 841         {
 842             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 843             'info_dict': {
 844                 'id': '4d03aad9',
 845                 'ext': 'mp4',
 846                 'uploader': 'deadspin',
 847                 'title': 'WALL-TO-GORTAT',
 848                 'timestamp': 1422285291,
 849                 'upload_date': '20150126',
 850             },
 851             'add_ie': ['Viddler'],
 852         },
 853         # Libsyn embed
 854         {
 855             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 856             'info_dict': {
 857                 'id': '3377616',
 858                 'ext': 'mp3',
 859                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 860                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 861                 'upload_date': '20150220',
 862             },
 863             'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
 864         },
 865         # jwplayer YouTube
 866         {
 867             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 868             'info_dict': {
 869                 'id': 'Mrj4DVp2zeA',
 870                 'ext': 'mp4',
 871                 'upload_date': '20150212',
 872                 'uploader': 'The National Archives UK',
 873                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 874                 'uploader_id': 'NationalArchives08',
 875                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 876             },
 877         },
 878         # rtl.nl embed
 879         {
 880             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 881             'playlist_mincount': 5,
 882             'info_dict': {
 883                 'id': 'aanslagen-kopenhagen',
 884                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 885             }
 886         },
 887         # Zapiks embed
 888         {
 889             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 890             'info_dict': {
 891                 'id': '118046',
 892                 'ext': 'mp4',
 893                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 894             }
 895         },
 896         # Kaltura embed (different embed code)
 897         {
 898             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 899             'info_dict': {
 900                 'id': '1_a52wc67y',
 901                 'ext': 'flv',
 902                 'upload_date': '20150127',
 903                 'uploader_id': 'PremierMedia',
 904                 'timestamp': int,
 905                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 906             },
 907         },
 908         # Kaltura embed protected with referrer
 909         {
 910             'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
 911             'info_dict': {
 912                 'id': '1_g4fbemnq',
 913                 'ext': 'mp4',
 914                 'title': 'Violetta - Achter De Schermen - Ruggero',
 915                 'description': 'Achter de schermen met Ruggero',
 916                 'timestamp': 1435133761,
 917                 'upload_date': '20150624',
 918                 'uploader_id': 'echojecka',
 919             },
 920         },
 921         # Kaltura embed with single quotes
 922         {
 923             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
 924             'info_dict': {
 925                 'id': '0_izeg5utt',
 926                 'ext': 'mp4',
 927                 'title': '35871',
 928                 'timestamp': 1355743100,
 929                 'upload_date': '20121217',
 930                 'uploader_id': 'batchUser',
 931             },
 932             'add_ie': ['Kaltura'],
 933         },
 934         {
 935             # Kaltura embedded via quoted entry_id
 936             'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
 937             'info_dict': {
 938                 'id': '0_utuok90b',
 939                 'ext': 'mp4',
 940                 'title': '06_matthew_brender_raj_dutt',
 941                 'timestamp': 1466638791,
 942                 'upload_date': '20160622',
 943             },
 944             'add_ie': ['Kaltura'],
 945             'expected_warnings': [
 946                 'Could not send HEAD request'
 947             ],
 948             'params': {
 949                 'skip_download': True,
 950             }
 951         },
 952         # Eagle.Platform embed (generic URL)
 953         {
 954             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 955             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
 956             'info_dict': {
 957                 'id': '227304',
 958                 'ext': 'mp4',
 959                 'title': 'Навальный вышел на свободу',
 960                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 961                 'thumbnail': 're:^https?://.*\.jpg$',
 962                 'duration': 87,
 963                 'view_count': int,
 964                 'age_limit': 0,
 965             },
 966         },
 967         # ClipYou (Eagle.Platform) embed (custom URL)
 968         {
 969             'url': 'http://muz-tv.ru/play/7129/',
 970             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
 971             'info_dict': {
 972                 'id': '12820',
 973                 'ext': 'mp4',
 974                 'title': "'O Sole Mio",
 975                 'thumbnail': 're:^https?://.*\.jpg$',
 976                 'duration': 216,
 977                 'view_count': int,
 978             },
 979         },
 980         # Pladform embed
 981         {
 982             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 983             'info_dict': {
 984                 'id': '100183293',
 985                 'ext': 'mp4',
 986                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 987                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 988                 'thumbnail': 're:^https?://.*\.jpg$',
 989                 'duration': 694,
 990                 'age_limit': 0,
 991             },
 992         },
 993         # Playwire embed
 994         {
 995             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 996             'info_dict': {
 997                 'id': '3519514',
 998                 'ext': 'mp4',
 999                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
1000                 'thumbnail': 're:^https?://.*\.png$',
1001                 'duration': 45.115,
1002             },
1003         },
1004         # 5min embed
1005         {
1006             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1007             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1008             'info_dict': {
1009                 'id': '518726732',
1010                 'ext': 'mp4',
1011                 'title': 'Facebook Creates "On This Day" | Crunch Report',
1012             },
1013         },
1014         # SVT embed
1015         {
1016             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
1017             'info_dict': {
1018                 'id': '2900353',
1019                 'ext': 'flv',
1020                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
1021                 'duration': 27,
1022                 'age_limit': 0,
1023             },
1024         },
1025         # Crooks and Liars embed
1026         {
1027             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1028             'info_dict': {
1029                 'id': '8RUoRhRi',
1030                 'ext': 'mp4',
1031                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1032                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1033                 'timestamp': 1428207000,
1034                 'upload_date': '20150405',
1035                 'uploader': 'Heather',
1036             },
1037         },
1038         # Crooks and Liars external embed
1039         {
1040             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1041             'info_dict': {
1042                 'id': 'MTE3MjUtMzQ2MzA',
1043                 'ext': 'mp4',
1044                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1045                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1046                 'timestamp': 1265032391,
1047                 'upload_date': '20100201',
1048                 'uploader': 'Heather',
1049             },
1050         },
1051         # NBC Sports vplayer embed
1052         {
1053             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1054             'info_dict': {
1055                 'id': 'ln7x1qSThw4k',
1056                 'ext': 'flv',
1057                 'title': "PFT Live: New leader in the 'new-look' defense",
1058                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1059                 'uploader': 'NBCU-SPORTS',
1060                 'upload_date': '20140107',
1061                 'timestamp': 1389118457,
1062             },
1063         },
1064         # NBC News embed
1065         {
1066             'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1067             'md5': '1aa589c675898ae6d37a17913cf68d66',
1068             'info_dict': {
1069                 'id': '701714499682',
1070                 'ext': 'mp4',
1071                 'title': 'PREVIEW: On Assignment: David Letterman',
1072                 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
1073             },
1074         },
1075         # UDN embed
1076         {
1077             'url': 'https://video.udn.com/news/300346',
1078             'md5': 'fd2060e988c326991037b9aff9df21a6',
1079             'info_dict': {
1080                 'id': '300346',
1081                 'ext': 'mp4',
1082                 'title': '中一中男師變性 全校師生力挺',
1083                 'thumbnail': 're:^https?://.*\.jpg$',
1084             },
1085             'params': {
1086                 # m3u8 download
1087                 'skip_download': True,
1088             },
1089         },
1090         # Ooyala embed
1091         {
1092             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1093             'info_dict': {
1094                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1095                 'ext': 'mp4',
1096                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1097                 'title': 'This is what separates the Excel masters from the wannabes',
1098                 'duration': 191.933,
1099             },
1100             'params': {
1101                 # m3u8 downloads
1102                 'skip_download': True,
1103             }
1104         },
1105         # Brightcove URL in single quotes
1106         {
1107             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1108             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1109             'info_dict': {
1110                 'id': '4255764656001',
1111                 'ext': 'mp4',
1112                 'title': 'SN Presents: Russell Martin, World Citizen',
1113                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1114                 'uploader': 'Rogers Sportsnet',
1115                 'uploader_id': '1704050871',
1116                 'upload_date': '20150525',
1117                 'timestamp': 1432570283,
1118             },
1119         },
1120         # Dailymotion Cloud video
1121         {
1122             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1123             'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
1124             'info_dict': {
1125                 'id': 'x2uy8t3',
1126                 'ext': 'mp4',
1127                 'title': 'Sauvons les abeilles ! - Le débat',
1128                 'description': 'md5:d9082128b1c5277987825d684939ca26',
1129                 'thumbnail': 're:^https?://.*\.jpe?g$',
1130                 'timestamp': 1434970506,
1131                 'upload_date': '20150622',
1132                 'uploader': 'Public Sénat',
1133                 'uploader_id': 'xa9gza',
1134             }
1135         },
1136         # OnionStudios embed
1137         {
1138             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1139             'info_dict': {
1140                 'id': '2855',
1141                 'ext': 'mp4',
1142                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1143                 'thumbnail': 're:^https?://.*\.jpe?g$',
1144                 'uploader': 'ClickHole',
1145                 'uploader_id': 'clickhole',
1146             }
1147         },
1148         # SnagFilms embed
1149         {
1150             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1151             'info_dict': {
1152                 'id': '74849a00-85a9-11e1-9660-123139220831',
1153                 'ext': 'mp4',
1154                 'title': '#whilewewatch',
1155             }
1156         },
1157         # AdobeTVVideo embed
1158         {
1159             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1160             'md5': '43662b577c018ad707a63766462b1e87',
1161             'info_dict': {
1162                 'id': '2456',
1163                 'ext': 'mp4',
1164                 'title': 'New experience with Acrobat DC',
1165                 'description': 'New experience with Acrobat DC',
1166                 'duration': 248.667,
1167             },
1168         },
1169         # ScreenwaveMedia embed
1170         {
1171             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1172             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1173             'info_dict': {
1174                 'id': 'cinemasnob-55d26273809dd',
1175                 'ext': 'mp4',
1176                 'title': 'cinemasnob',
1177             },
1178         },
1179         # BrightcoveInPageEmbed embed
1180         {
1181             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1182             'info_dict': {
1183                 'id': '4238694884001',
1184                 'ext': 'flv',
1185                 'title': 'Tabletop: Dread, Last Thoughts',
1186                 'description': 'Tabletop: Dread, Last Thoughts',
1187                 'duration': 51690,
1188             },
1189         },
1190         # JWPlayer with M3U8
1191         {
1192             'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1193             'info_dict': {
1194                 'id': 'playlist',
1195                 'ext': 'mp4',
1196                 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1197                 'uploader': 'ren.tv',
1198             },
1199             'params': {
1200                 # m3u8 downloads
1201                 'skip_download': True,
1202             }
1203         },
1204         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1205         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1206         {
1207             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1208             'info_dict': {
1209                 'id': '4785848093001',
1210                 'ext': 'mp4',
1211                 'title': 'The Cardinal Pell Interview',
1212                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1213                 'uploader': 'GlobeCast Australia - GlobeStream',
1214                 'uploader_id': '2733773828001',
1215                 'upload_date': '20160304',
1216                 'timestamp': 1457083087,
1217             },
1218             'params': {
1219                 # m3u8 downloads
1220                 'skip_download': True,
1221             },
1222         },
1223         # Another form of arte.tv embed
1224         {
1225             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1226             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1227             'info_dict': {
1228                 'id': '030273-562_PLUS7-F',
1229                 'ext': 'mp4',
1230                 'title': 'ARTE Reportage - Nulle part, en France',
1231                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1232                 'upload_date': '20160409',
1233             },
1234         },
1235         # LiveLeak embed
1236         {
1237             'url': 'http://www.wykop.pl/link/3088787/',
1238             'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1239             'info_dict': {
1240                 'id': '874_1459135191',
1241                 'ext': 'mp4',
1242                 'title': 'Man shows poor quality of new apartment building',
1243                 'description': 'The wall is like a sand pile.',
1244                 'uploader': 'Lake8737',
1245             }
1246         },
1247         # Duplicated embedded video URLs
1248         {
1249             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1250             'info_dict': {
1251                 'id': '149298443_480_16c25b74_2',
1252                 'ext': 'mp4',
1253                 'title': 'vs. Blue Orange Spring Game',
1254                 'uploader': 'www.hudl.com',
1255             },
1256         },
1257         # twitter:player:stream embed
1258         {
1259             'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1260             'info_dict': {
1261                 'id': 'master',
1262                 'ext': 'mp4',
1263                 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1264                 'uploader': 'www.rtl.be',
1265             },
1266             'params': {
1267                 # m3u8 downloads
1268                 'skip_download': True,
1269             },
1270         },
1271         # twitter:player embed
1272         {
1273             'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1274             'md5': 'a3e0df96369831de324f0778e126653c',
1275             'info_dict': {
1276                 'id': '4909620399001',
1277                 'ext': 'mp4',
1278                 'title': 'What Do Black Holes Sound Like?',
1279                 'description': 'what do black holes sound like',
1280                 'upload_date': '20160524',
1281                 'uploader_id': '29913724001',
1282                 'timestamp': 1464107587,
1283                 'uploader': 'TheAtlantic',
1284             },
1285             'add_ie': ['BrightcoveLegacy'],
1286         },
1287         # Facebook <iframe> embed
1288         {
1289             'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
1290             'md5': 'fbcde74f534176ecb015849146dd3aee',
1291             'info_dict': {
1292                 'id': '599637780109885',
1293                 'ext': 'mp4',
1294                 'title': 'Facebook video #599637780109885',
1295             },
1296         },
1297         # Facebook API embed
1298         {
1299             'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
1300             'md5': 'a47372ee61b39a7b90287094d447d94e',
1301             'info_dict': {
1302                 'id': '10153467542406923',
1303                 'ext': 'mp4',
1304                 'title': 'Facebook video #10153467542406923',
1305             },
1306         },
1307         # Wordpress "YouTube Video Importer" plugin
1308         {
1309             'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
1310             'md5': 'd16797741b560b485194eddda8121b48',
1311             'info_dict': {
1312                 'id': 'HNTXWDXV9Is',
1313                 'ext': 'mp4',
1314                 'title': 'Blue Devils Drumline Stanford lot 2016',
1315                 'upload_date': '20160627',
1316                 'uploader_id': 'GENOCIDE8GENERAL10',
1317                 'uploader': 'cylus cyrus',
1318             },
1319         },
1320         {
1321             # video stored on custom kaltura server
1322             'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1323             'md5': '537617d06e64dfed891fa1593c4b30cc',
1324             'info_dict': {
1325                 'id': '0_1iotm5bh',
1326                 'ext': 'mp4',
1327                 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1328                 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1329                 'uploader_id': 'videos.expansion@el-mundo.net',
1330                 'upload_date': '20150429',
1331                 'timestamp': 1430303472,
1332             },
1333             'add_ie': ['Kaltura'],
1334         },
1335         {
1336             # Non-standard Vimeo embed
1337             'url': 'https://openclassrooms.com/courses/understanding-the-web',
1338             'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1339             'info_dict': {
1340                 'id': '148867247',
1341                 'ext': 'mp4',
1342                 'title': 'Understanding the web - Teaser',
1343                 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1344                 'upload_date': '20151214',
1345                 'uploader': 'OpenClassrooms',
1346                 'uploader_id': 'openclassrooms',
1347             },
1348             'add_ie': ['Vimeo'],
1349         },
1350         {
1351             'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1352             'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1353             'info_dict': {
1354                 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1355                 'ext': 'mp4',
1356                 'title': 'Big Buck Bunny',
1357                 'description': 'Royalty free test video',
1358                 'timestamp': 1432816365,
1359                 'upload_date': '20150528',
1360                 'is_live': False,
1361             },
1362             'params': {
1363                 'skip_download': True,
1364             },
1365             'add_ie': [ArkenaIE.ie_key()],
1366         },
1367         # {
1368         #     # TODO: find another test
1369         #     # http://schema.org/VideoObject
1370         #     'url': 'https://flipagram.com/f/nyvTSJMKId',
1371         #     'md5': '888dcf08b7ea671381f00fab74692755',
1372         #     'info_dict': {
1373         #         'id': 'nyvTSJMKId',
1374         #         'ext': 'mp4',
1375         #         'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
1376         #         'description': '#love for cats.',
1377         #         'timestamp': 1461244995,
1378         #         'upload_date': '20160421',
1379         #     },
1380         #     'params': {
1381         #         'force_generic_extractor': True,
1382         #     },
1383         # }
1384     ]
1385
1386     def report_following_redirect(self, new_url):
1387         """Report information extraction."""
1388         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1389
1390     def _extract_rss(self, url, video_id, doc):
1391         playlist_title = doc.find('./channel/title').text
1392         playlist_desc_el = doc.find('./channel/description')
1393         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1394
1395         entries = []
1396         for it in doc.findall('./channel/item'):
1397             next_url = xpath_text(it, 'link', fatal=False)
1398             if not next_url:
1399                 enclosure_nodes = it.findall('./enclosure')
1400                 for e in enclosure_nodes:
1401                     next_url = e.attrib.get('url')
1402                     if next_url:
1403                         break
1404
1405             if not next_url:
1406                 continue
1407
1408             entries.append({
1409                 '_type': 'url',
1410                 'url': next_url,
1411                 'title': it.find('title').text,
1412             })
1413
1414         return {
1415             '_type': 'playlist',
1416             'id': url,
1417             'title': playlist_title,
1418             'description': playlist_desc,
1419             'entries': entries,
1420         }
1421
1422     def _extract_camtasia(self, url, video_id, webpage):
1423         """ Returns None if no camtasia video can be found. """
1424
1425         camtasia_cfg = self._search_regex(
1426             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1427             webpage, 'camtasia configuration file', default=None)
1428         if camtasia_cfg is None:
1429             return None
1430
1431         title = self._html_search_meta('DC.title', webpage, fatal=True)
1432
1433         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1434         camtasia_cfg = self._download_xml(
1435             camtasia_url, video_id,
1436             note='Downloading camtasia configuration',
1437             errnote='Failed to download camtasia configuration')
1438         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1439
1440         entries = []
1441         for n in fileset_node.getchildren():
1442             url_n = n.find('./uri')
1443             if url_n is None:
1444                 continue
1445
1446             entries.append({
1447                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1448                 'title': '%s - %s' % (title, n.tag),
1449                 'url': compat_urlparse.urljoin(url, url_n.text),
1450                 'duration': float_or_none(n.find('./duration').text),
1451             })
1452
1453         return {
1454             '_type': 'playlist',
1455             'entries': entries,
1456             'title': title,
1457         }
1458
1459     def _real_extract(self, url):
1460         if url.startswith('//'):
1461             return {
1462                 '_type': 'url',
1463                 'url': self.http_scheme() + url,
1464             }
1465
1466         parsed_url = compat_urlparse.urlparse(url)
1467         if not parsed_url.scheme:
1468             default_search = self._downloader.params.get('default_search')
1469             if default_search is None:
1470                 default_search = 'fixup_error'
1471
1472             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1473                 if '/' in url:
1474                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1475                     return self.url_result('http://' + url)
1476                 elif default_search != 'fixup_error':
1477                     if default_search == 'auto_warning':
1478                         if re.match(r'^(?:url|URL)$', url):
1479                             raise ExtractorError(
1480                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1481                                 expected=True)
1482                         else:
1483                             self._downloader.report_warning(
1484                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1485                     return self.url_result('ytsearch:' + url)
1486
1487             if default_search in ('error', 'fixup_error'):
1488                 raise ExtractorError(
1489                     '%r is not a valid URL. '
1490                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1491                     % (url, url), expected=True)
1492             else:
1493                 if ':' not in default_search:
1494                     default_search += ':'
1495                 return self.url_result(default_search + url)
1496
1497         url, smuggled_data = unsmuggle_url(url)
1498         force_videoid = None
1499         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1500         if smuggled_data and 'force_videoid' in smuggled_data:
1501             force_videoid = smuggled_data['force_videoid']
1502             video_id = force_videoid
1503         else:
1504             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1505
1506         self.to_screen('%s: Requesting header' % video_id)
1507
1508         head_req = HEADRequest(url)
1509         head_response = self._request_webpage(
1510             head_req, video_id,
1511             note=False, errnote='Could not send HEAD request to %s' % url,
1512             fatal=False)
1513
1514         if head_response is not False:
1515             # Check for redirect
1516             new_url = head_response.geturl()
1517             if url != new_url:
1518                 self.report_following_redirect(new_url)
1519                 if force_videoid:
1520                     new_url = smuggle_url(
1521                         new_url, {'force_videoid': force_videoid})
1522                 return self.url_result(new_url)
1523
1524         full_response = None
1525         if head_response is False:
1526             request = sanitized_Request(url)
1527             request.add_header('Accept-Encoding', '*')
1528             full_response = self._request_webpage(request, video_id)
1529             head_response = full_response
1530
1531         info_dict = {
1532             'id': video_id,
1533             'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1534             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1535         }
1536
1537         # Check for direct link to a video
1538         content_type = head_response.headers.get('Content-Type', '').lower()
1539         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1540         if m:
1541             format_id = m.group('format_id')
1542             if format_id.endswith('mpegurl'):
1543                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1544             elif format_id == 'f4m':
1545                 formats = self._extract_f4m_formats(url, video_id)
1546             else:
1547                 formats = [{
1548                     'format_id': m.group('format_id'),
1549                     'url': url,
1550                     'vcodec': 'none' if m.group('type') == 'audio' else None
1551                 }]
1552                 info_dict['direct'] = True
1553             self._sort_formats(formats)
1554             info_dict['formats'] = formats
1555             return info_dict
1556
1557         if not self._downloader.params.get('test', False) and not is_intentional:
1558             force = self._downloader.params.get('force_generic_extractor', False)
1559             self._downloader.report_warning(
1560                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1561
1562         if not full_response:
1563             request = sanitized_Request(url)
1564             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1565             # making it impossible to download only chunk of the file (yet we need only 512kB to
1566             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1567             # that will always result in downloading the whole file that is not desirable.
1568             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1569             # to accept raw bytes and being able to download only a chunk.
1570             # It may probably better to solve this by checking Content-Type for application/octet-stream
1571             # after HEAD request finishes, but not sure if we can rely on this.
1572             request.add_header('Accept-Encoding', '*')
1573             full_response = self._request_webpage(request, video_id)
1574
1575         first_bytes = full_response.read(512)
1576
1577         # Is it an M3U playlist?
1578         if first_bytes.startswith(b'#EXTM3U'):
1579             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1580             self._sort_formats(info_dict['formats'])
1581             return info_dict
1582
1583         # Maybe it's a direct link to a video?
1584         # Be careful not to download the whole thing!
1585         if not is_html(first_bytes):
1586             self._downloader.report_warning(
1587                 'URL could be a direct video link, returning it as such.')
1588             info_dict.update({
1589                 'direct': True,
1590                 'url': url,
1591             })
1592             return info_dict
1593
1594         webpage = self._webpage_read_content(
1595             full_response, url, video_id, prefix=first_bytes)
1596
1597         self.report_extraction(video_id)
1598
1599         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1600         try:
1601             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1602             if doc.tag == 'rss':
1603                 return self._extract_rss(url, video_id, doc)
1604             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1605                 smil = self._parse_smil(doc, url, video_id)
1606                 self._sort_formats(smil['formats'])
1607                 return smil
1608             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1609                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1610             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1611                 info_dict['formats'] = self._parse_mpd_formats(
1612                     doc, video_id, mpd_base_url=url.rpartition('/')[0])
1613                 self._sort_formats(info_dict['formats'])
1614                 return info_dict
1615             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1616                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1617                 self._sort_formats(info_dict['formats'])
1618                 return info_dict
1619         except compat_xml_parse_error:
1620             pass
1621
1622         # Is it a Camtasia project?
1623         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1624         if camtasia_res is not None:
1625             return camtasia_res
1626
1627         # Sometimes embedded video player is hidden behind percent encoding
1628         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1629         # Unescaping the whole page allows to handle those cases in a generic way
1630         webpage = compat_urllib_parse_unquote(webpage)
1631
1632         # it's tempting to parse this further, but you would
1633         # have to take into account all the variations like
1634         #   Video Title - Site Name
1635         #   Site Name | Video Title
1636         #   Video Title - Tagline | Site Name
1637         # and so on and so forth; it's just not practical
1638         video_title = self._og_search_title(
1639             webpage, default=None) or self._html_search_regex(
1640             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1641             default='video')
1642
1643         # Try to detect age limit automatically
1644         age_limit = self._rta_search(webpage)
1645         # And then there are the jokers who advertise that they use RTA,
1646         # but actually don't.
1647         AGE_LIMIT_MARKERS = [
1648             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1649         ]
1650         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1651             age_limit = 18
1652
1653         # video uploader is domain name
1654         video_uploader = self._search_regex(
1655             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1656
1657         video_description = self._og_search_description(webpage, default=None)
1658         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
1659
1660         # Helper method
1661         def _playlist_from_matches(matches, getter=None, ie=None):
1662             urlrs = orderedSet(
1663                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1664                 for m in matches)
1665             return self.playlist_result(
1666                 urlrs, playlist_id=video_id, playlist_title=video_title)
1667
1668         # Look for Brightcove Legacy Studio embeds
1669         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1670         if bc_urls:
1671             self.to_screen('Brightcove video detected.')
1672             entries = [{
1673                 '_type': 'url',
1674                 'url': smuggle_url(bc_url, {'Referer': url}),
1675                 'ie_key': 'BrightcoveLegacy'
1676             } for bc_url in bc_urls]
1677
1678             return {
1679                 '_type': 'playlist',
1680                 'title': video_title,
1681                 'id': video_id,
1682                 'entries': entries,
1683             }
1684
1685         # Look for Brightcove New Studio embeds
1686         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1687         if bc_urls:
1688             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1689
1690         # Look for ThePlatform embeds
1691         tp_urls = ThePlatformIE._extract_urls(webpage)
1692         if tp_urls:
1693             return _playlist_from_matches(tp_urls, ie='ThePlatform')
1694
1695         # Look for Vessel embeds
1696         vessel_urls = VesselIE._extract_urls(webpage)
1697         if vessel_urls:
1698             return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
1699
1700         # Look for embedded rtl.nl player
1701         matches = re.findall(
1702             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1703             webpage)
1704         if matches:
1705             return _playlist_from_matches(matches, ie='RtlNl')
1706
1707         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1708         if vimeo_url is not None:
1709             return self.url_result(vimeo_url)
1710
1711         vid_me_embed_url = self._search_regex(
1712             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1713             webpage, 'vid.me embed', default=None)
1714         if vid_me_embed_url is not None:
1715             return self.url_result(vid_me_embed_url, 'Vidme')
1716
1717         # Look for embedded YouTube player
1718         matches = re.findall(r'''(?x)
1719             (?:
1720                 <iframe[^>]+?src=|
1721                 data-video-url=|
1722                 <embed[^>]+?src=|
1723                 embedSWF\(?:\s*|
1724                 new\s+SWFObject\(
1725             )
1726             (["\'])
1727                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1728                 (?:embed|v|p)/.+?)
1729             \1''', webpage)
1730         if matches:
1731             return _playlist_from_matches(
1732                 matches, lambda m: unescapeHTML(m[1]))
1733
1734         # Look for lazyYT YouTube embed
1735         matches = re.findall(
1736             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1737         if matches:
1738             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1739
1740         # Look for Wordpress "YouTube Video Importer" plugin
1741         matches = re.findall(r'''(?x)<div[^>]+
1742             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1743             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1744         if matches:
1745             return _playlist_from_matches(matches, lambda m: m[-1])
1746
1747         matches = DailymotionIE._extract_urls(webpage)
1748         if matches:
1749             return _playlist_from_matches(matches)
1750
1751         # Look for embedded Dailymotion playlist player (#3822)
1752         m = re.search(
1753             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1754         if m:
1755             playlists = re.findall(
1756                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1757             if playlists:
1758                 return _playlist_from_matches(
1759                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1760
1761         # Look for embedded Wistia player
1762         match = re.search(
1763             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1764         if match:
1765             embed_url = self._proto_relative_url(
1766                 unescapeHTML(match.group('url')))
1767             return {
1768                 '_type': 'url_transparent',
1769                 'url': embed_url,
1770                 'ie_key': 'Wistia',
1771                 'uploader': video_uploader,
1772             }
1773
1774         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1775         if match:
1776             return {
1777                 '_type': 'url_transparent',
1778                 'url': 'wistia:%s' % match.group('id'),
1779                 'ie_key': 'Wistia',
1780                 'uploader': video_uploader,
1781             }
1782
1783         match = re.search(
1784             r'''(?sx)
1785                 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
1786                 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
1787             ''', webpage)
1788         if match:
1789             return self.url_result(self._proto_relative_url(
1790                 'wistia:%s' % match.group('id')), 'Wistia')
1791
1792         # Look for SVT player
1793         svt_url = SVTIE._extract_url(webpage)
1794         if svt_url:
1795             return self.url_result(svt_url, 'SVT')
1796
1797         # Look for embedded condenast player
1798         matches = re.findall(
1799             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1800             webpage)
1801         if matches:
1802             return {
1803                 '_type': 'playlist',
1804                 'entries': [{
1805                     '_type': 'url',
1806                     'ie_key': 'CondeNast',
1807                     'url': ma,
1808                 } for ma in matches],
1809                 'title': video_title,
1810                 'id': video_id,
1811             }
1812
1813         # Look for Bandcamp pages with custom domain
1814         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1815         if mobj is not None:
1816             burl = unescapeHTML(mobj.group(1))
1817             # Don't set the extractor because it can be a track url or an album
1818             return self.url_result(burl)
1819
1820         # Look for embedded Vevo player
1821         mobj = re.search(
1822             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1823         if mobj is not None:
1824             return self.url_result(mobj.group('url'))
1825
1826         # Look for embedded Viddler player
1827         mobj = re.search(
1828             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1829             webpage)
1830         if mobj is not None:
1831             return self.url_result(mobj.group('url'))
1832
1833         # Look for NYTimes player
1834         mobj = re.search(
1835             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1836             webpage)
1837         if mobj is not None:
1838             return self.url_result(mobj.group('url'))
1839
1840         # Look for Libsyn player
1841         mobj = re.search(
1842             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1843         if mobj is not None:
1844             return self.url_result(mobj.group('url'))
1845
1846         # Look for Ooyala videos
1847         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1848                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1849                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1850                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1851         if mobj is not None:
1852             return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
1853
1854         # Look for multiple Ooyala embeds on SBN network websites
1855         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1856         if mobj is not None:
1857             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1858             if embeds:
1859                 return _playlist_from_matches(
1860                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
1861
1862         # Look for Aparat videos
1863         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1864         if mobj is not None:
1865             return self.url_result(mobj.group(1), 'Aparat')
1866
1867         # Look for MPORA videos
1868         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1869         if mobj is not None:
1870             return self.url_result(mobj.group(1), 'Mpora')
1871
1872         # Look for embedded NovaMov-based player
1873         mobj = re.search(
1874             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1875                     (?P<url>http://(?:(?:embed|www)\.)?
1876                         (?:novamov\.com|
1877                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1878                            videoweed\.(?:es|com)|
1879                            movshare\.(?:net|sx|ag)|
1880                            divxstage\.(?:eu|net|ch|co|at|ag))
1881                         /embed\.php.+?)\1''', webpage)
1882         if mobj is not None:
1883             return self.url_result(mobj.group('url'))
1884
1885         # Look for embedded Facebook player
1886         facebook_url = FacebookIE._extract_url(webpage)
1887         if facebook_url is not None:
1888             return self.url_result(facebook_url, 'Facebook')
1889
1890         # Look for embedded VK player
1891         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1892         if mobj is not None:
1893             return self.url_result(mobj.group('url'), 'VK')
1894
1895         # Look for embedded Odnoklassniki player
1896         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
1897         if mobj is not None:
1898             return self.url_result(mobj.group('url'), 'Odnoklassniki')
1899
1900         # Look for embedded ivi player
1901         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1902         if mobj is not None:
1903             return self.url_result(mobj.group('url'), 'Ivi')
1904
1905         # Look for embedded Huffington Post player
1906         mobj = re.search(
1907             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1908         if mobj is not None:
1909             return self.url_result(mobj.group('url'), 'HuffPost')
1910
1911         # Look for embed.ly
1912         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1913         if mobj is not None:
1914             return self.url_result(mobj.group('url'))
1915         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1916         if mobj is not None:
1917             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1918
1919         # Look for funnyordie embed
1920         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1921         if matches:
1922             return _playlist_from_matches(
1923                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1924
1925         # Look for BBC iPlayer embed
1926         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1927         if matches:
1928             return _playlist_from_matches(matches, ie='BBCCoUk')
1929
1930         # Look for embedded RUTV player
1931         rutv_url = RUTVIE._extract_url(webpage)
1932         if rutv_url:
1933             return self.url_result(rutv_url, 'RUTV')
1934
1935         # Look for embedded TVC player
1936         tvc_url = TVCIE._extract_url(webpage)
1937         if tvc_url:
1938             return self.url_result(tvc_url, 'TVC')
1939
1940         # Look for embedded SportBox player
1941         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1942         if sportbox_urls:
1943             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1944
1945         # Look for embedded PornHub player
1946         pornhub_url = PornHubIE._extract_url(webpage)
1947         if pornhub_url:
1948             return self.url_result(pornhub_url, 'PornHub')
1949
1950         # Look for embedded XHamster player
1951         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1952         if xhamster_urls:
1953             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1954
1955         # Look for embedded TNAFlixNetwork player
1956         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
1957         if tnaflix_urls:
1958             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
1959
1960         # Look for embedded Tvigle player
1961         mobj = re.search(
1962             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1963         if mobj is not None:
1964             return self.url_result(mobj.group('url'), 'Tvigle')
1965
1966         # Look for embedded TED player
1967         mobj = re.search(
1968             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1969         if mobj is not None:
1970             return self.url_result(mobj.group('url'), 'TED')
1971
1972         # Look for embedded Ustream videos
1973         mobj = re.search(
1974             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1975         if mobj is not None:
1976             return self.url_result(mobj.group('url'), 'Ustream')
1977
1978         # Look for embedded arte.tv player
1979         mobj = re.search(
1980             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
1981             webpage)
1982         if mobj is not None:
1983             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1984
1985         # Look for embedded francetv player
1986         mobj = re.search(
1987             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1988             webpage)
1989         if mobj is not None:
1990             return self.url_result(mobj.group('url'))
1991
1992         # Look for embedded smotri.com player
1993         smotri_url = SmotriIE._extract_url(webpage)
1994         if smotri_url:
1995             return self.url_result(smotri_url, 'Smotri')
1996
1997         # Look for embedded Myvi.ru player
1998         myvi_url = MyviIE._extract_url(webpage)
1999         if myvi_url:
2000             return self.url_result(myvi_url)
2001
2002         # Look for embedded soundcloud player
2003         soundcloud_urls = SoundcloudIE._extract_urls(webpage)
2004         if soundcloud_urls:
2005             return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
2006
2007         # Look for embedded mtvservices player
2008         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
2009         if mtvservices_url:
2010             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
2011
2012         # Look for embedded yahoo player
2013         mobj = re.search(
2014             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
2015             webpage)
2016         if mobj is not None:
2017             return self.url_result(mobj.group('url'), 'Yahoo')
2018
2019         # Look for embedded sbs.com.au player
2020         mobj = re.search(
2021             r'''(?x)
2022             (?:
2023                 <meta\s+property="og:video"\s+content=|
2024                 <iframe[^>]+?src=
2025             )
2026             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2027             webpage)
2028         if mobj is not None:
2029             return self.url_result(mobj.group('url'), 'SBS')
2030
2031         # Look for embedded Cinchcast player
2032         mobj = re.search(
2033             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
2034             webpage)
2035         if mobj is not None:
2036             return self.url_result(mobj.group('url'), 'Cinchcast')
2037
2038         mobj = re.search(
2039             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
2040             webpage)
2041         if not mobj:
2042             mobj = re.search(
2043                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
2044                 webpage)
2045         if mobj is not None:
2046             return self.url_result(mobj.group('url'), 'MLB')
2047
2048         mobj = re.search(
2049             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
2050             webpage)
2051         if mobj is not None:
2052             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
2053
2054         mobj = re.search(
2055             r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
2056             webpage)
2057         if mobj is not None:
2058             return self.url_result(mobj.group('url'), 'Livestream')
2059
2060         # Look for Zapiks embed
2061         mobj = re.search(
2062             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
2063         if mobj is not None:
2064             return self.url_result(mobj.group('url'), 'Zapiks')
2065
2066         # Look for Kaltura embeds
2067         kaltura_url = KalturaIE._extract_url(webpage)
2068         if kaltura_url:
2069             return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
2070
2071         # Look for Eagle.Platform embeds
2072         eagleplatform_url = EaglePlatformIE._extract_url(webpage)
2073         if eagleplatform_url:
2074             return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
2075
2076         # Look for ClipYou (uses Eagle.Platform) embeds
2077         mobj = re.search(
2078             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
2079         if mobj is not None:
2080             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
2081
2082         # Look for Pladform embeds
2083         pladform_url = PladformIE._extract_url(webpage)
2084         if pladform_url:
2085             return self.url_result(pladform_url)
2086
2087         # Look for Videomore embeds
2088         videomore_url = VideomoreIE._extract_url(webpage)
2089         if videomore_url:
2090             return self.url_result(videomore_url)
2091
2092         # Look for Playwire embeds
2093         mobj = re.search(
2094             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
2095         if mobj is not None:
2096             return self.url_result(mobj.group('url'))
2097
2098         # Look for 5min embeds
2099         mobj = re.search(
2100             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
2101         if mobj is not None:
2102             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
2103
2104         # Look for Crooks and Liars embeds
2105         mobj = re.search(
2106             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
2107         if mobj is not None:
2108             return self.url_result(mobj.group('url'))
2109
2110         # Look for NBC Sports VPlayer embeds
2111         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
2112         if nbc_sports_url:
2113             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
2114
2115         # Look for NBC News embeds
2116         nbc_news_embed_url = re.search(
2117             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
2118         if nbc_news_embed_url:
2119             return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
2120
2121         # Look for Google Drive embeds
2122         google_drive_url = GoogleDriveIE._extract_url(webpage)
2123         if google_drive_url:
2124             return self.url_result(google_drive_url, 'GoogleDrive')
2125
2126         # Look for UDN embeds
2127         mobj = re.search(
2128             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
2129         if mobj is not None:
2130             return self.url_result(
2131                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
2132
2133         # Look for Senate ISVP iframe
2134         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2135         if senate_isvp_url:
2136             return self.url_result(senate_isvp_url, 'SenateISVP')
2137
2138         # Look for Dailymotion Cloud videos
2139         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
2140         if dmcloud_url:
2141             return self.url_result(dmcloud_url, 'DailymotionCloud')
2142
2143         # Look for OnionStudios embeds
2144         onionstudios_url = OnionStudiosIE._extract_url(webpage)
2145         if onionstudios_url:
2146             return self.url_result(onionstudios_url)
2147
2148         # Look for ViewLift embeds
2149         viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2150         if viewlift_url:
2151             return self.url_result(viewlift_url)
2152
2153         # Look for JWPlatform embeds
2154         jwplatform_url = JWPlatformIE._extract_url(webpage)
2155         if jwplatform_url:
2156             return self.url_result(jwplatform_url, 'JWPlatform')
2157
2158         # Look for ScreenwaveMedia embeds
2159         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
2160         if mobj is not None:
2161             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
2162
2163         # Look for Digiteka embeds
2164         digiteka_url = DigitekaIE._extract_url(webpage)
2165         if digiteka_url:
2166             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2167
2168         # Look for Arkena embeds
2169         arkena_url = ArkenaIE._extract_url(webpage)
2170         if arkena_url:
2171             return self.url_result(arkena_url, ArkenaIE.ie_key())
2172
2173         # Look for Limelight embeds
2174         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
2175         if mobj:
2176             lm = {
2177                 'Media': 'media',
2178                 'Channel': 'channel',
2179                 'ChannelList': 'channel_list',
2180             }
2181             return self.url_result('limelight:%s:%s' % (
2182                 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
2183
2184         # Look for AdobeTVVideo embeds
2185         mobj = re.search(
2186             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2187             webpage)
2188         if mobj is not None:
2189             return self.url_result(
2190                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2191                 'AdobeTVVideo')
2192
2193         # Look for Vine embeds
2194         mobj = re.search(
2195             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2196             webpage)
2197         if mobj is not None:
2198             return self.url_result(
2199                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2200
2201         # Look for Instagram embeds
2202         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2203         if instagram_embed_url is not None:
2204             return self.url_result(
2205                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2206
2207         # Look for LiveLeak embeds
2208         liveleak_url = LiveLeakIE._extract_url(webpage)
2209         if liveleak_url:
2210             return self.url_result(liveleak_url, 'LiveLeak')
2211
2212         # Look for 3Q SDN embeds
2213         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2214         if threeqsdn_url:
2215             return {
2216                 '_type': 'url_transparent',
2217                 'ie_key': ThreeQSDNIE.ie_key(),
2218                 'url': self._proto_relative_url(threeqsdn_url),
2219                 'title': video_title,
2220                 'description': video_description,
2221                 'thumbnail': video_thumbnail,
2222                 'uploader': video_uploader,
2223             }
2224
2225         # Looking for http://schema.org/VideoObject
2226         json_ld = self._search_json_ld(
2227             webpage, video_id, default=None, expected_type='VideoObject')
2228         if json_ld and json_ld.get('url'):
2229             info_dict.update({
2230                 'title': video_title or info_dict['title'],
2231                 'description': video_description,
2232                 'thumbnail': video_thumbnail,
2233                 'age_limit': age_limit
2234             })
2235             info_dict.update(json_ld)
2236             return info_dict
2237
2238         def check_video(vurl):
2239             if YoutubeIE.suitable(vurl):
2240                 return True
2241             vpath = compat_urlparse.urlparse(vurl).path
2242             vext = determine_ext(vpath)
2243             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
2244
2245         def filter_video(urls):
2246             return list(filter(check_video, urls))
2247
2248         # Start with something easy: JW Player in SWFObject
2249         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
2250         if not found:
2251             # Look for gorilla-vid style embedding
2252             found = filter_video(re.findall(r'''(?sx)
2253                 (?:
2254                     jw_plugins|
2255                     JWPlayerOptions|
2256                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2257                 )
2258                 .*?
2259                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
2260         if not found:
2261             # Broaden the search a little bit
2262             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
2263         if not found:
2264             # Broaden the findall a little bit: JWPlayer JS loader
2265             found = filter_video(re.findall(
2266                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
2267         if not found:
2268             # Flow player
2269             found = filter_video(re.findall(r'''(?xs)
2270                 flowplayer\("[^"]+",\s*
2271                     \{[^}]+?\}\s*,
2272                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
2273                         ["']?url["']?\s*:\s*["']([^"']+)["']
2274             ''', webpage))
2275         if not found:
2276             # Cinerama player
2277             found = re.findall(
2278                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
2279         if not found:
2280             # Try to find twitter cards info
2281             # twitter:player:stream should be checked before twitter:player since
2282             # it is expected to contain a raw stream (see
2283             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2284             found = filter_video(re.findall(
2285                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
2286         if not found:
2287             # We look for Open Graph info:
2288             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
2289             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
2290             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2291             if m_video_type is not None:
2292                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
2293         if not found:
2294             # HTML5 video
2295             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
2296         if not found:
2297             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
2298             found = re.search(
2299                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2300                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2301                 webpage)
2302             if not found:
2303                 # Look also in Refresh HTTP header
2304                 refresh_header = head_response.headers.get('Refresh')
2305                 if refresh_header:
2306                     # In python 2 response HTTP headers are bytestrings
2307                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2308                         refresh_header = refresh_header.decode('iso-8859-1')
2309                     found = re.search(REDIRECT_REGEX, refresh_header)
2310             if found:
2311                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2312                 self.report_following_redirect(new_url)
2313                 return {
2314                     '_type': 'url',
2315                     'url': new_url,
2316                 }
2317
2318         if not found:
2319             # twitter:player is a https URL to iframe player that may or may not
2320             # be supported by youtube-dl thus this is checked the very last (see
2321             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2322             embed_url = self._html_search_meta('twitter:player', webpage, default=None)
2323             if embed_url:
2324                 return self.url_result(embed_url)
2325
2326         if not found:
2327             raise UnsupportedError(url)
2328
2329         entries = []
2330         for video_url in orderedSet(found):
2331             video_url = unescapeHTML(video_url)
2332             video_url = video_url.replace('\\/', '/')
2333             video_url = compat_urlparse.urljoin(url, video_url)
2334             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2335
2336             # Sometimes, jwplayer extraction will result in a YouTube URL
2337             if YoutubeIE.suitable(video_url):
2338                 entries.append(self.url_result(video_url, 'Youtube'))
2339                 continue
2340
2341             # here's a fun little line of code for you:
2342             video_id = os.path.splitext(video_id)[0]
2343
2344             entry_info_dict = {
2345                 'id': video_id,
2346                 'uploader': video_uploader,
2347                 'title': video_title,
2348                 'age_limit': age_limit,
2349             }
2350
2351             ext = determine_ext(video_url)
2352             if ext == 'smil':
2353                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2354             elif ext == 'xspf':
2355                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2356             elif ext == 'm3u8':
2357                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2358             elif ext == 'mpd':
2359                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2360             elif ext == 'f4m':
2361                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2362             else:
2363                 entry_info_dict['url'] = video_url
2364
2365             if entry_info_dict.get('formats'):
2366                 self._sort_formats(entry_info_dict['formats'])
2367
2368             entries.append(entry_info_dict)
2369
2370         if len(entries) == 1:
2371             return entries[0]
2372         else:
2373             for num, e in enumerate(entries, start=1):
2374                 # 'url' results don't have a title
2375                 if e.get('title') is not None:
2376                     e['title'] = '%s (%d)' % (e['title'], num)
2377             return {
2378                 '_type': 'playlist',
2379                 'entries': entries,
2380             }