_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7 import sys
   8
   9 from .common import InfoExtractor
  10 from .youtube import YoutubeIE
  11 from ..compat import (
  12     compat_etree_fromstring,
  13     compat_urllib_parse_unquote,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     js_to_json,
  24     orderedSet,
  25     sanitized_Request,
  26     smuggle_url,
  27     unescapeHTML,
  28     unified_strdate,
  29     unsmuggle_url,
  30     UnsupportedError,
  31     xpath_text,
  32 )
  33 from .commonprotocols import RtmpIE
  34 from .brightcove import (
  35     BrightcoveLegacyIE,
  36     BrightcoveNewIE,
  37 )
  38 from .nbc import NBCSportsVPlayerIE
  39 from .ooyala import OoyalaIE
  40 from .rutv import RUTVIE
  41 from .tvc import TVCIE
  42 from .sportbox import SportBoxEmbedIE
  43 from .smotri import SmotriIE
  44 from .myvi import MyviIE
  45 from .condenast import CondeNastIE
  46 from .udn import UDNEmbedIE
  47 from .senateisvp import SenateISVPIE
  48 from .svt import SVTIE
  49 from .pornhub import PornHubIE
  50 from .xhamster import XHamsterEmbedIE
  51 from .tnaflix import TNAFlixNetworkEmbedIE
  52 from .drtuber import DrTuberIE
  53 from .redtube import RedTubeIE
  54 from .vimeo import VimeoIE
  55 from .dailymotion import (
  56     DailymotionIE,
  57     DailymotionCloudIE,
  58 )
  59 from .onionstudios import OnionStudiosIE
  60 from .viewlift import ViewLiftEmbedIE
  61 from .mtv import MTVServicesEmbeddedIE
  62 from .pladform import PladformIE
  63 from .videomore import VideomoreIE
  64 from .webcaster import WebcasterFeedIE
  65 from .googledrive import GoogleDriveIE
  66 from .jwplatform import JWPlatformIE
  67 from .digiteka import DigitekaIE
  68 from .arkena import ArkenaIE
  69 from .instagram import InstagramIE
  70 from .liveleak import LiveLeakIE
  71 from .threeqsdn import ThreeQSDNIE
  72 from .theplatform import ThePlatformIE
  73 from .vessel import VesselIE
  74 from .kaltura import KalturaIE
  75 from .eagleplatform import EaglePlatformIE
  76 from .facebook import FacebookIE
  77 from .soundcloud import SoundcloudIE
  78 from .tunein import TuneInBaseIE
  79 from .vbox7 import Vbox7IE
  80 from .dbtv import DBTVIE
  81 from .piksel import PikselIE
  82 from .videa import VideaIE
  83 from .twentymin import TwentyMinutenIE
  84 from .ustream import UstreamIE
  85 from .openload import OpenloadIE
  86 from .videopress import VideoPressIE
  87 from .rutube import RutubeIE
  88
  89
  90 class GenericIE(InfoExtractor):
  91     IE_DESC = 'Generic downloader that works on some sites'
  92     _VALID_URL = r'.*'
  93     IE_NAME = 'generic'
  94     _TESTS = [
  95         # Direct link to a video
  96         {
  97             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  98             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  99             'info_dict': {
 100                 'id': 'trailer',
 101                 'ext': 'mp4',
 102                 'title': 'trailer',
 103                 'upload_date': '20100513',
 104             }
 105         },
 106         # Direct link to media delivered compressed (until Accept-Encoding is *)
 107         {
 108             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
 109             'md5': '128c42e68b13950268b648275386fc74',
 110             'info_dict': {
 111                 'id': 'FictionJunction-Parallel_Hearts',
 112                 'ext': 'flac',
 113                 'title': 'FictionJunction-Parallel_Hearts',
 114                 'upload_date': '20140522',
 115             },
 116             'expected_warnings': [
 117                 'URL could be a direct video link, returning it as such.'
 118             ],
 119             'skip': 'URL invalid',
 120         },
 121         # Direct download with broken HEAD
 122         {
 123             'url': 'http://ai-radio.org:8000/radio.opus',
 124             'info_dict': {
 125                 'id': 'radio',
 126                 'ext': 'opus',
 127                 'title': 'radio',
 128             },
 129             'params': {
 130                 'skip_download': True,  # infinite live stream
 131             },
 132             'expected_warnings': [
 133                 r'501.*Not Implemented',
 134                 r'400.*Bad Request',
 135             ],
 136         },
 137         # Direct link with incorrect MIME type
 138         {
 139             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 140             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 141             'info_dict': {
 142                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 143                 'id': '5_Lennart_Poettering_-_Systemd',
 144                 'ext': 'webm',
 145                 'title': '5_Lennart_Poettering_-_Systemd',
 146                 'upload_date': '20141120',
 147             },
 148             'expected_warnings': [
 149                 'URL could be a direct video link, returning it as such.'
 150             ]
 151         },
 152         # RSS feed
 153         {
 154             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 155             'info_dict': {
 156                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 157                 'title': 'Zero Punctuation',
 158                 'description': 're:.*groundbreaking video review series.*'
 159             },
 160             'playlist_mincount': 11,
 161         },
 162         # RSS feed with enclosure
 163         {
 164             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 165             'info_dict': {
 166                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 167                 'ext': 'm4v',
 168                 'upload_date': '20150228',
 169                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 170             }
 171         },
 172         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 173         {
 174             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 175             'info_dict': {
 176                 'id': 'smil',
 177                 'ext': 'mp4',
 178                 'title': 'Automatics, robotics and biocybernetics',
 179                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
 180                 'upload_date': '20130627',
 181                 'formats': 'mincount:16',
 182                 'subtitles': 'mincount:1',
 183             },
 184             'params': {
 185                 'force_generic_extractor': True,
 186                 'skip_download': True,
 187             },
 188         },
 189         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 190         {
 191             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 192             'info_dict': {
 193                 'id': 'hds',
 194                 'ext': 'flv',
 195                 'title': 'hds',
 196                 'formats': 'mincount:1',
 197             },
 198             'params': {
 199                 'skip_download': True,
 200             },
 201         },
 202         # SMIL from https://www.restudy.dk/video/play/id/1637
 203         {
 204             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 205             'info_dict': {
 206                 'id': 'video_1637',
 207                 'ext': 'flv',
 208                 'title': 'video_1637',
 209                 'formats': 'mincount:3',
 210             },
 211             'params': {
 212                 'skip_download': True,
 213             },
 214         },
 215         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 216         {
 217             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 218             'info_dict': {
 219                 'id': 'smil-service',
 220                 'ext': 'flv',
 221                 'title': 'smil-service',
 222                 'formats': 'mincount:1',
 223             },
 224             'params': {
 225                 'skip_download': True,
 226             },
 227         },
 228         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 229         {
 230             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 231             'info_dict': {
 232                 'id': '4719370',
 233                 'ext': 'mp4',
 234                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 235                 'formats': 'mincount:3',
 236             },
 237             'params': {
 238                 'skip_download': True,
 239             },
 240         },
 241         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 242         {
 243             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 244             'info_dict': {
 245                 'id': 'mZlp2ctYIUEB',
 246                 'ext': 'mp4',
 247                 'title': 'Tikibad ontruimd wegens brand',
 248                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 249                 'thumbnail': r're:^https?://.*\.jpg$',
 250                 'duration': 33,
 251             },
 252             'params': {
 253                 'skip_download': True,
 254             },
 255         },
 256         # MPD from http://dash-mse-test.appspot.com/media.html
 257         {
 258             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
 259             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
 260             'info_dict': {
 261                 'id': 'car-20120827-manifest',
 262                 'ext': 'mp4',
 263                 'title': 'car-20120827-manifest',
 264                 'formats': 'mincount:9',
 265                 'upload_date': '20130904',
 266             },
 267             'params': {
 268                 'format': 'bestvideo',
 269             },
 270         },
 271         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
 272         {
 273             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
 274             'info_dict': {
 275                 'id': 'content',
 276                 'ext': 'mp4',
 277                 'title': 'content',
 278                 'formats': 'mincount:8',
 279             },
 280             'params': {
 281                 # m3u8 downloads
 282                 'skip_download': True,
 283             },
 284             'skip': 'video gone',
 285         },
 286         # m3u8 served with Content-Type: text/plain
 287         {
 288             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
 289             'info_dict': {
 290                 'id': 'index',
 291                 'ext': 'mp4',
 292                 'title': 'index',
 293                 'upload_date': '20140720',
 294                 'formats': 'mincount:11',
 295             },
 296             'params': {
 297                 # m3u8 downloads
 298                 'skip_download': True,
 299             },
 300             'skip': 'video gone',
 301         },
 302         # google redirect
 303         {
 304             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 305             'info_dict': {
 306                 'id': 'cmQHVoWB5FY',
 307                 'ext': 'mp4',
 308                 'upload_date': '20130224',
 309                 'uploader_id': 'TheVerge',
 310                 'description': r're:^Chris Ziegler takes a look at the\.*',
 311                 'uploader': 'The Verge',
 312                 'title': 'First Firefox OS phones side-by-side',
 313             },
 314             'params': {
 315                 'skip_download': False,
 316             }
 317         },
 318         {
 319             # redirect in Refresh HTTP header
 320             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
 321             'info_dict': {
 322                 'id': 'pO8h3EaFRdo',
 323                 'ext': 'mp4',
 324                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 325                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
 326                 'upload_date': '20150917',
 327                 'uploader_id': 'brtvofficial',
 328                 'uploader': 'Boiler Room',
 329             },
 330             'params': {
 331                 'skip_download': False,
 332             },
 333         },
 334         {
 335             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 336             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 337             'info_dict': {
 338                 'id': '13601338388002',
 339                 'ext': 'mp4',
 340                 'uploader': 'www.hodiho.fr',
 341                 'title': 'R\u00e9gis plante sa Jeep',
 342             }
 343         },
 344         # bandcamp page with custom domain
 345         {
 346             'add_ie': ['Bandcamp'],
 347             'url': 'http://bronyrock.com/track/the-pony-mash',
 348             'info_dict': {
 349                 'id': '3235767654',
 350                 'ext': 'mp3',
 351                 'title': 'The Pony Mash',
 352                 'uploader': 'M_Pallante',
 353             },
 354             'skip': 'There is a limit of 200 free downloads / month for the test song',
 355         },
 356         {
 357             # embedded brightcove video
 358             # it also tests brightcove videos that need to set the 'Referer'
 359             # in the http requests
 360             'add_ie': ['BrightcoveLegacy'],
 361             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 362             'info_dict': {
 363                 'id': '2765128793001',
 364                 'ext': 'mp4',
 365                 'title': 'Le cours de bourse : l’analyse technique',
 366                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 367                 'uploader': 'BFM BUSINESS',
 368             },
 369             'params': {
 370                 'skip_download': True,
 371             },
 372         },
 373         {
 374             # embedded with itemprop embedURL and video id spelled as `idVideo`
 375             'add_id': ['BrightcoveLegacy'],
 376             'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
 377             'info_dict': {
 378                 'id': '5255628253001',
 379                 'ext': 'mp4',
 380                 'title': 'md5:37c519b1128915607601e75a87995fc0',
 381                 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
 382                 'uploader': 'BFM BUSINESS',
 383                 'uploader_id': '876450612001',
 384                 'timestamp': 1482255315,
 385                 'upload_date': '20161220',
 386             },
 387             'params': {
 388                 'skip_download': True,
 389             },
 390         },
 391         {
 392             # https://github.com/rg3/youtube-dl/issues/2253
 393             'url': 'http://bcove.me/i6nfkrc3',
 394             'md5': '0ba9446db037002366bab3b3eb30c88c',
 395             'info_dict': {
 396                 'id': '3101154703001',
 397                 'ext': 'mp4',
 398                 'title': 'Still no power',
 399                 'uploader': 'thestar.com',
 400                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 401             },
 402             'add_ie': ['BrightcoveLegacy'],
 403             'skip': 'video gone',
 404         },
 405         {
 406             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 407             'md5': 'fb973ecf6e4a78a67453647444222983',
 408             'info_dict': {
 409                 'id': '3414141473001',
 410                 'ext': 'mp4',
 411                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 412                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 413                 'uploader': 'Championat',
 414             },
 415         },
 416         {
 417             # https://github.com/rg3/youtube-dl/issues/3541
 418             'add_ie': ['BrightcoveLegacy'],
 419             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 420             'info_dict': {
 421                 'id': '3866516442001',
 422                 'ext': 'mp4',
 423                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 424                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 425                 'uploader': 'SBS Broadcasting',
 426             },
 427             'skip': 'Restricted to Netherlands',
 428             'params': {
 429                 'skip_download': True,  # m3u8 download
 430             },
 431         },
 432         {
 433             # Brightcove with alternative playerID key
 434             'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
 435             'info_dict': {
 436                 'id': 'nmeth.2062_SV1',
 437                 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
 438             },
 439             'playlist': [{
 440                 'info_dict': {
 441                     'id': '2228375078001',
 442                     'ext': 'mp4',
 443                     'title': 'nmeth.2062-sv1',
 444                     'description': 'nmeth.2062-sv1',
 445                     'timestamp': 1363357591,
 446                     'upload_date': '20130315',
 447                     'uploader': 'Nature Publishing Group',
 448                     'uploader_id': '1964492299001',
 449                 },
 450             }],
 451         },
 452         {
 453             # Brightcove with UUID in videoPlayer
 454             'url': 'http://www8.hp.com/cn/zh/home.html',
 455             'info_dict': {
 456                 'id': '5255815316001',
 457                 'ext': 'mp4',
 458                 'title': 'Sprocket Video - China',
 459                 'description': 'Sprocket Video - China',
 460                 'uploader': 'HP-Video Gallery',
 461                 'timestamp': 1482263210,
 462                 'upload_date': '20161220',
 463                 'uploader_id': '1107601872001',
 464             },
 465             'params': {
 466                 'skip_download': True,  # m3u8 download
 467             },
 468         },
 469         # ooyala video
 470         {
 471             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 472             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 473             'info_dict': {
 474                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 475                 'ext': 'mp4',
 476                 'title': '2cc213299525360.mov',  # that's what we get
 477                 'duration': 238.231,
 478             },
 479             'add_ie': ['Ooyala'],
 480         },
 481         {
 482             # ooyala video embedded with http://player.ooyala.com/iframe.js
 483             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 484             'info_dict': {
 485                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 486                 'ext': 'mp4',
 487                 'title': '"Steve Jobs: Man in the Machine" trailer',
 488                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
 489                 'duration': 135.427,
 490             },
 491             'params': {
 492                 'skip_download': True,
 493             },
 494             'skip': 'movie expired',
 495         },
 496         # embed.ly video
 497         {
 498             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 499             'info_dict': {
 500                 'id': '9ODmcdjQcHQ',
 501                 'ext': 'mp4',
 502                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 503                 'upload_date': '20140225',
 504                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 505                 'uploader': 'Tested',
 506                 'uploader_id': 'testedcom',
 507             },
 508             # No need to test YoutubeIE here
 509             'params': {
 510                 'skip_download': True,
 511             },
 512         },
 513         # funnyordie embed
 514         {
 515             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 516             'info_dict': {
 517                 'id': '18e820ec3f',
 518                 'ext': 'mp4',
 519                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 520                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 521             },
 522             # HEAD requests lead to endless 301, while GET is OK
 523             'expected_warnings': ['301'],
 524         },
 525         # RUTV embed
 526         {
 527             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 528             'info_dict': {
 529                 'id': '776940',
 530                 'ext': 'mp4',
 531                 'title': 'Охотское море стало целиком российским',
 532                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 533             },
 534             'params': {
 535                 # m3u8 download
 536                 'skip_download': True,
 537             },
 538         },
 539         # TVC embed
 540         {
 541             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 542             'info_dict': {
 543                 'id': '55304',
 544                 'ext': 'mp4',
 545                 'title': 'Дошкольное воспитание',
 546             },
 547         },
 548         # SportBox embed
 549         {
 550             'url': 'http://www.vestifinance.ru/articles/25753',
 551             'info_dict': {
 552                 'id': '25753',
 553                 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 554             },
 555             'playlist': [{
 556                 'info_dict': {
 557                     'id': '370908',
 558                     'title': 'Госзаказ. День 3',
 559                     'ext': 'mp4',
 560                 }
 561             }, {
 562                 'info_dict': {
 563                     'id': '370905',
 564                     'title': 'Госзаказ. День 2',
 565                     'ext': 'mp4',
 566                 }
 567             }, {
 568                 'info_dict': {
 569                     'id': '370902',
 570                     'title': 'Госзаказ. День 1',
 571                     'ext': 'mp4',
 572                 }
 573             }],
 574             'params': {
 575                 # m3u8 download
 576                 'skip_download': True,
 577             },
 578         },
 579         # Myvi.ru embed
 580         {
 581             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 582             'info_dict': {
 583                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 584                 'ext': 'mp4',
 585                 'title': 'Ужастики, русский трейлер (2015)',
 586                 'thumbnail': r're:^https?://.*\.jpg$',
 587                 'duration': 153,
 588             }
 589         },
 590         # XHamster embed
 591         {
 592             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 593             'info_dict': {
 594                 'id': 'showthread',
 595                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 596             },
 597             'playlist_mincount': 7,
 598             # This forum does not allow <iframe> syntaxes anymore
 599             # Now HTML tags are displayed as-is
 600             'skip': 'No videos on this page',
 601         },
 602         # Embedded TED video
 603         {
 604             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 605             'md5': '65fdff94098e4a607385a60c5177c638',
 606             'info_dict': {
 607                 'id': '1969',
 608                 'ext': 'mp4',
 609                 'title': 'Hidden miracles of the natural world',
 610                 'uploader': 'Louie Schwartzberg',
 611                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 612             }
 613         },
 614         # nowvideo embed hidden behind percent encoding
 615         {
 616             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 617             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 618             'info_dict': {
 619                 'id': '06e53103ca9aa',
 620                 'ext': 'flv',
 621                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 622                 'description': 'No description',
 623             },
 624         },
 625         # arte embed
 626         {
 627             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 628             'md5': '7653032cbb25bf6c80d80f217055fa43',
 629             'info_dict': {
 630                 'id': '048195-004_PLUS7-F',
 631                 'ext': 'flv',
 632                 'title': 'X:enius',
 633                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 634                 'upload_date': '20140320',
 635             },
 636             'params': {
 637                 'skip_download': 'Requires rtmpdump'
 638             },
 639             'skip': 'video gone',
 640         },
 641         # francetv embed
 642         {
 643             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 644             'info_dict': {
 645                 'id': 'EV_30231',
 646                 'ext': 'mp4',
 647                 'title': 'Alcaline, le concert avec Calogero',
 648                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 649                 'upload_date': '20150226',
 650                 'timestamp': 1424989860,
 651                 'duration': 5400,
 652             },
 653             'params': {
 654                 # m3u8 downloads
 655                 'skip_download': True,
 656             },
 657             'expected_warnings': [
 658                 'Forbidden'
 659             ]
 660         },
 661         # Condé Nast embed
 662         {
 663             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 664             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 665             'info_dict': {
 666                 'id': '53501be369702d3275860000',
 667                 'ext': 'mp4',
 668                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 669             }
 670         },
 671         # Dailymotion embed
 672         {
 673             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 674             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 675             'info_dict': {
 676                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 677                 'ext': 'mp4',
 678                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 679                 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
 680                 'uploader': 'Spi0n',
 681                 'uploader_id': 'xgditw',
 682                 'upload_date': '20140425',
 683                 'timestamp': 1398441542,
 684             },
 685             'add_ie': ['Dailymotion'],
 686         },
 687         # YouTube embed
 688         {
 689             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 690             'info_dict': {
 691                 'id': 'FXRb4ykk4S0',
 692                 'ext': 'mp4',
 693                 'title': 'The NBL Auction 2014',
 694                 'uploader': 'BADMINTON England',
 695                 'uploader_id': 'BADMINTONEvents',
 696                 'upload_date': '20140603',
 697                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 698             },
 699             'add_ie': ['Youtube'],
 700             'params': {
 701                 'skip_download': True,
 702             }
 703         },
 704         # MTVSercices embed
 705         {
 706             'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
 707             'md5': 'ca1aef97695ef2c1d6973256a57e5252',
 708             'info_dict': {
 709                 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
 710                 'ext': 'mp4',
 711                 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
 712                 'description': 'Two valets share their love for movie star Liam Neesons.',
 713                 'timestamp': 1349922600,
 714                 'upload_date': '20121011',
 715             },
 716         },
 717         # YouTube embed via <data-embed-url="">
 718         {
 719             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 720             'info_dict': {
 721                 'id': '4vAffPZIT44',
 722                 'ext': 'mp4',
 723                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 724                 'uploader': 'Gameloft',
 725                 'uploader_id': 'gameloft',
 726                 'upload_date': '20140828',
 727                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 728             },
 729             'params': {
 730                 'skip_download': True,
 731             }
 732         },
 733         # Camtasia studio
 734         {
 735             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 736             'playlist': [{
 737                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 738                 'info_dict': {
 739                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 740                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 741                     'ext': 'flv',
 742                     'duration': 2235.90,
 743                 }
 744             }, {
 745                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 746                 'info_dict': {
 747                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 748                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 749                     'ext': 'flv',
 750                     'duration': 2235.93,
 751                 }
 752             }],
 753             'info_dict': {
 754                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 755             }
 756         },
 757         # Flowplayer
 758         {
 759             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 760             'md5': '9d65602bf31c6e20014319c7d07fba27',
 761             'info_dict': {
 762                 'id': '5123ea6d5e5a7',
 763                 'ext': 'mp4',
 764                 'age_limit': 18,
 765                 'uploader': 'www.handjobhub.com',
 766                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 767             }
 768         },
 769         # Multiple brightcove videos
 770         # https://github.com/rg3/youtube-dl/issues/2283
 771         {
 772             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 773             'info_dict': {
 774                 'id': 'always-never',
 775                 'title': 'Always / Never - The New Yorker',
 776             },
 777             'playlist_count': 3,
 778             'params': {
 779                 'extract_flat': False,
 780                 'skip_download': True,
 781             }
 782         },
 783         # MLB embed
 784         {
 785             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 786             'md5': '96f09a37e44da40dd083e12d9a683327',
 787             'info_dict': {
 788                 'id': '33322633',
 789                 'ext': 'mp4',
 790                 'title': 'Ump changes call to ball',
 791                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 792                 'duration': 48,
 793                 'timestamp': 1401537900,
 794                 'upload_date': '20140531',
 795                 'thumbnail': r're:^https?://.*\.jpg$',
 796             },
 797         },
 798         # Wistia embed
 799         {
 800             'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 801             'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
 802             'info_dict': {
 803                 'id': '6e2wtrbdaf',
 804                 'ext': 'mov',
 805                 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
 806                 'description': 'a Paywall Videos video from Remilon',
 807                 'duration': 644.072,
 808                 'uploader': 'study.com',
 809                 'timestamp': 1459678540,
 810                 'upload_date': '20160403',
 811                 'filesize': 24687186,
 812             },
 813         },
 814         {
 815             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 816             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 817             'info_dict': {
 818                 'id': 'uxjb0lwrcz',
 819                 'ext': 'mp4',
 820                 'title': 'Conversation about Hexagonal Rails Part 1',
 821                 'description': 'a Martin Fowler video from ThoughtWorks',
 822                 'duration': 1715.0,
 823                 'uploader': 'thoughtworks.wistia.com',
 824                 'timestamp': 1401832161,
 825                 'upload_date': '20140603',
 826             },
 827         },
 828         # Wistia standard embed (async)
 829         {
 830             'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
 831             'info_dict': {
 832                 'id': '807fafadvk',
 833                 'ext': 'mp4',
 834                 'title': 'Drip Brennan Dunn Workshop',
 835                 'description': 'a JV Webinars video from getdrip-1',
 836                 'duration': 4986.95,
 837                 'timestamp': 1463607249,
 838                 'upload_date': '20160518',
 839             },
 840             'params': {
 841                 'skip_download': True,
 842             }
 843         },
 844         # Soundcloud embed
 845         {
 846             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 847             'info_dict': {
 848                 'id': '174391317',
 849                 'ext': 'mp3',
 850                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 851                 'uploader': 'Sophos Security',
 852                 'title': 'Chet Chat 171 - Oct 29, 2014',
 853                 'upload_date': '20141029',
 854             }
 855         },
 856         # Soundcloud multiple embeds
 857         {
 858             'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
 859             'info_dict': {
 860                 'id': '52809',
 861                 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance  | TAB + AUDIO',
 862             },
 863             'playlist_mincount': 7,
 864         },
 865         # TuneIn station embed
 866         {
 867             'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
 868             'info_dict': {
 869                 'id': '204146',
 870                 'ext': 'mp3',
 871                 'title': 'CNRV',
 872                 'location': 'Paris, France',
 873                 'is_live': True,
 874             },
 875             'params': {
 876                 # Live stream
 877                 'skip_download': True,
 878             },
 879         },
 880         # Livestream embed
 881         {
 882             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 883             'info_dict': {
 884                 'id': '67864563',
 885                 'ext': 'flv',
 886                 'upload_date': '20141112',
 887                 'title': 'Rosetta #CometLanding webcast HL 10',
 888             }
 889         },
 890         # Another Livestream embed, without 'new.' in URL
 891         {
 892             'url': 'https://www.freespeech.org/',
 893             'info_dict': {
 894                 'id': '123537347',
 895                 'ext': 'mp4',
 896                 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 897             },
 898             'params': {
 899                 # Live stream
 900                 'skip_download': True,
 901             },
 902         },
 903         # LazyYT
 904         {
 905             'url': 'https://skiplagged.com/',
 906             'info_dict': {
 907                 'id': 'skiplagged',
 908                 'title': 'Skiplagged: The smart way to find cheap flights',
 909             },
 910             'playlist_mincount': 1,
 911             'add_ie': ['Youtube'],
 912         },
 913         # Cinchcast embed
 914         {
 915             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 916             'info_dict': {
 917                 'id': '7141703',
 918                 'ext': 'mp3',
 919                 'upload_date': '20141126',
 920                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 921             }
 922         },
 923         # Cinerama player
 924         {
 925             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 926             'info_dict': {
 927                 'id': '730m_DandD_1901_512k',
 928                 'ext': 'mp4',
 929                 'uploader': 'www.abc.net.au',
 930                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 931             }
 932         },
 933         # embedded viddler video
 934         {
 935             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 936             'info_dict': {
 937                 'id': '4d03aad9',
 938                 'ext': 'mp4',
 939                 'uploader': 'deadspin',
 940                 'title': 'WALL-TO-GORTAT',
 941                 'timestamp': 1422285291,
 942                 'upload_date': '20150126',
 943             },
 944             'add_ie': ['Viddler'],
 945         },
 946         # Libsyn embed
 947         {
 948             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 949             'info_dict': {
 950                 'id': '3377616',
 951                 'ext': 'mp3',
 952                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 953                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 954                 'upload_date': '20150220',
 955             },
 956             'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
 957         },
 958         # jwplayer YouTube
 959         {
 960             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 961             'info_dict': {
 962                 'id': 'Mrj4DVp2zeA',
 963                 'ext': 'mp4',
 964                 'upload_date': '20150212',
 965                 'uploader': 'The National Archives UK',
 966                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 967                 'uploader_id': 'NationalArchives08',
 968                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 969             },
 970         },
 971         # jwplayer rtmp
 972         {
 973             'url': 'http://www.suffolk.edu/sjc/',
 974             'info_dict': {
 975                 'id': 'sjclive',
 976                 'ext': 'flv',
 977                 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
 978                 'uploader': 'www.suffolk.edu',
 979             },
 980             'params': {
 981                 'skip_download': True,
 982             }
 983         },
 984         # Complex jwplayer
 985         {
 986             'url': 'http://www.indiedb.com/games/king-machine/videos',
 987             'info_dict': {
 988                 'id': 'videos',
 989                 'ext': 'mp4',
 990                 'title': 'king machine trailer 1',
 991                 'thumbnail': r're:^https?://.*\.jpg$',
 992             },
 993         },
 994         # rtl.nl embed
 995         {
 996             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 997             'playlist_mincount': 5,
 998             'info_dict': {
 999                 'id': 'aanslagen-kopenhagen',
1000                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
1001             }
1002         },
1003         # Zapiks embed
1004         {
1005             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
1006             'info_dict': {
1007                 'id': '118046',
1008                 'ext': 'mp4',
1009                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
1010             }
1011         },
1012         # Kaltura embed (different embed code)
1013         {
1014             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
1015             'info_dict': {
1016                 'id': '1_a52wc67y',
1017                 'ext': 'flv',
1018                 'upload_date': '20150127',
1019                 'uploader_id': 'PremierMedia',
1020                 'timestamp': int,
1021                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
1022             },
1023         },
1024         # Kaltura embed with single quotes
1025         {
1026             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
1027             'info_dict': {
1028                 'id': '0_izeg5utt',
1029                 'ext': 'mp4',
1030                 'title': '35871',
1031                 'timestamp': 1355743100,
1032                 'upload_date': '20121217',
1033                 'uploader_id': 'batchUser',
1034             },
1035             'add_ie': ['Kaltura'],
1036         },
1037         {
1038             # Kaltura embedded via quoted entry_id
1039             'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1040             'info_dict': {
1041                 'id': '0_utuok90b',
1042                 'ext': 'mp4',
1043                 'title': '06_matthew_brender_raj_dutt',
1044                 'timestamp': 1466638791,
1045                 'upload_date': '20160622',
1046             },
1047             'add_ie': ['Kaltura'],
1048             'expected_warnings': [
1049                 'Could not send HEAD request'
1050             ],
1051             'params': {
1052                 'skip_download': True,
1053             }
1054         },
1055         {
1056             # Kaltura embedded, some fileExt broken (#11480)
1057             'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1058             'info_dict': {
1059                 'id': '1_sgtvehim',
1060                 'ext': 'mp4',
1061                 'title': 'Our "Standard Models" of particle physics and cosmology',
1062                 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1063                 'timestamp': 1321158993,
1064                 'upload_date': '20111113',
1065                 'uploader_id': 'kps1',
1066             },
1067             'add_ie': ['Kaltura'],
1068         },
1069         # Eagle.Platform embed (generic URL)
1070         {
1071             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
1072             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1073             'info_dict': {
1074                 'id': '227304',
1075                 'ext': 'mp4',
1076                 'title': 'Навальный вышел на свободу',
1077                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
1078                 'thumbnail': r're:^https?://.*\.jpg$',
1079                 'duration': 87,
1080                 'view_count': int,
1081                 'age_limit': 0,
1082             },
1083         },
1084         # ClipYou (Eagle.Platform) embed (custom URL)
1085         {
1086             'url': 'http://muz-tv.ru/play/7129/',
1087             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1088             'info_dict': {
1089                 'id': '12820',
1090                 'ext': 'mp4',
1091                 'title': "'O Sole Mio",
1092                 'thumbnail': r're:^https?://.*\.jpg$',
1093                 'duration': 216,
1094                 'view_count': int,
1095             },
1096         },
1097         # Pladform embed
1098         {
1099             'url': 'http://muz-tv.ru/kinozal/view/7400/',
1100             'info_dict': {
1101                 'id': '100183293',
1102                 'ext': 'mp4',
1103                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
1104                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
1105                 'thumbnail': r're:^https?://.*\.jpg$',
1106                 'duration': 694,
1107                 'age_limit': 0,
1108             },
1109         },
1110         # Playwire embed
1111         {
1112             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1113             'info_dict': {
1114                 'id': '3519514',
1115                 'ext': 'mp4',
1116                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
1117                 'thumbnail': r're:^https?://.*\.png$',
1118                 'duration': 45.115,
1119             },
1120         },
1121         # 5min embed
1122         {
1123             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1124             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1125             'info_dict': {
1126                 'id': '518726732',
1127                 'ext': 'mp4',
1128                 'title': 'Facebook Creates "On This Day" | Crunch Report',
1129             },
1130         },
1131         # SVT embed
1132         {
1133             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
1134             'info_dict': {
1135                 'id': '2900353',
1136                 'ext': 'flv',
1137                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
1138                 'duration': 27,
1139                 'age_limit': 0,
1140             },
1141         },
1142         # Crooks and Liars embed
1143         {
1144             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1145             'info_dict': {
1146                 'id': '8RUoRhRi',
1147                 'ext': 'mp4',
1148                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1149                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1150                 'timestamp': 1428207000,
1151                 'upload_date': '20150405',
1152                 'uploader': 'Heather',
1153             },
1154         },
1155         # Crooks and Liars external embed
1156         {
1157             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1158             'info_dict': {
1159                 'id': 'MTE3MjUtMzQ2MzA',
1160                 'ext': 'mp4',
1161                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1162                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1163                 'timestamp': 1265032391,
1164                 'upload_date': '20100201',
1165                 'uploader': 'Heather',
1166             },
1167         },
1168         # NBC Sports vplayer embed
1169         {
1170             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1171             'info_dict': {
1172                 'id': 'ln7x1qSThw4k',
1173                 'ext': 'flv',
1174                 'title': "PFT Live: New leader in the 'new-look' defense",
1175                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1176                 'uploader': 'NBCU-SPORTS',
1177                 'upload_date': '20140107',
1178                 'timestamp': 1389118457,
1179             },
1180         },
1181         # NBC News embed
1182         {
1183             'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1184             'md5': '1aa589c675898ae6d37a17913cf68d66',
1185             'info_dict': {
1186                 'id': '701714499682',
1187                 'ext': 'mp4',
1188                 'title': 'PREVIEW: On Assignment: David Letterman',
1189                 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
1190             },
1191         },
1192         # UDN embed
1193         {
1194             'url': 'https://video.udn.com/news/300346',
1195             'md5': 'fd2060e988c326991037b9aff9df21a6',
1196             'info_dict': {
1197                 'id': '300346',
1198                 'ext': 'mp4',
1199                 'title': '中一中男師變性 全校師生力挺',
1200                 'thumbnail': r're:^https?://.*\.jpg$',
1201             },
1202             'params': {
1203                 # m3u8 download
1204                 'skip_download': True,
1205             },
1206         },
1207         # Ooyala embed
1208         {
1209             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1210             'info_dict': {
1211                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1212                 'ext': 'mp4',
1213                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1214                 'title': 'This is what separates the Excel masters from the wannabes',
1215                 'duration': 191.933,
1216             },
1217             'params': {
1218                 # m3u8 downloads
1219                 'skip_download': True,
1220             }
1221         },
1222         # Brightcove URL in single quotes
1223         {
1224             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1225             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1226             'info_dict': {
1227                 'id': '4255764656001',
1228                 'ext': 'mp4',
1229                 'title': 'SN Presents: Russell Martin, World Citizen',
1230                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1231                 'uploader': 'Rogers Sportsnet',
1232                 'uploader_id': '1704050871',
1233                 'upload_date': '20150525',
1234                 'timestamp': 1432570283,
1235             },
1236         },
1237         # Dailymotion Cloud video
1238         {
1239             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1240             'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
1241             'info_dict': {
1242                 'id': 'x2uy8t3',
1243                 'ext': 'mp4',
1244                 'title': 'Sauvons les abeilles ! - Le débat',
1245                 'description': 'md5:d9082128b1c5277987825d684939ca26',
1246                 'thumbnail': r're:^https?://.*\.jpe?g$',
1247                 'timestamp': 1434970506,
1248                 'upload_date': '20150622',
1249                 'uploader': 'Public Sénat',
1250                 'uploader_id': 'xa9gza',
1251             }
1252         },
1253         # OnionStudios embed
1254         {
1255             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1256             'info_dict': {
1257                 'id': '2855',
1258                 'ext': 'mp4',
1259                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1260                 'thumbnail': r're:^https?://.*\.jpe?g$',
1261                 'uploader': 'ClickHole',
1262                 'uploader_id': 'clickhole',
1263             }
1264         },
1265         # SnagFilms embed
1266         {
1267             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1268             'info_dict': {
1269                 'id': '74849a00-85a9-11e1-9660-123139220831',
1270                 'ext': 'mp4',
1271                 'title': '#whilewewatch',
1272             }
1273         },
1274         # AdobeTVVideo embed
1275         {
1276             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1277             'md5': '43662b577c018ad707a63766462b1e87',
1278             'info_dict': {
1279                 'id': '2456',
1280                 'ext': 'mp4',
1281                 'title': 'New experience with Acrobat DC',
1282                 'description': 'New experience with Acrobat DC',
1283                 'duration': 248.667,
1284             },
1285         },
1286         # BrightcoveInPageEmbed embed
1287         {
1288             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1289             'info_dict': {
1290                 'id': '4238694884001',
1291                 'ext': 'flv',
1292                 'title': 'Tabletop: Dread, Last Thoughts',
1293                 'description': 'Tabletop: Dread, Last Thoughts',
1294                 'duration': 51690,
1295             },
1296         },
1297         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1298         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1299         {
1300             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1301             'info_dict': {
1302                 'id': '4785848093001',
1303                 'ext': 'mp4',
1304                 'title': 'The Cardinal Pell Interview',
1305                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1306                 'uploader': 'GlobeCast Australia - GlobeStream',
1307                 'uploader_id': '2733773828001',
1308                 'upload_date': '20160304',
1309                 'timestamp': 1457083087,
1310             },
1311             'params': {
1312                 # m3u8 downloads
1313                 'skip_download': True,
1314             },
1315         },
1316         # Another form of arte.tv embed
1317         {
1318             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1319             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1320             'info_dict': {
1321                 'id': '030273-562_PLUS7-F',
1322                 'ext': 'mp4',
1323                 'title': 'ARTE Reportage - Nulle part, en France',
1324                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1325                 'upload_date': '20160409',
1326             },
1327         },
1328         # LiveLeak embed
1329         {
1330             'url': 'http://www.wykop.pl/link/3088787/',
1331             'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1332             'info_dict': {
1333                 'id': '874_1459135191',
1334                 'ext': 'mp4',
1335                 'title': 'Man shows poor quality of new apartment building',
1336                 'description': 'The wall is like a sand pile.',
1337                 'uploader': 'Lake8737',
1338             }
1339         },
1340         # Duplicated embedded video URLs
1341         {
1342             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1343             'info_dict': {
1344                 'id': '149298443_480_16c25b74_2',
1345                 'ext': 'mp4',
1346                 'title': 'vs. Blue Orange Spring Game',
1347                 'uploader': 'www.hudl.com',
1348             },
1349         },
1350         # twitter:player:stream embed
1351         {
1352             'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1353             'info_dict': {
1354                 'id': 'master',
1355                 'ext': 'mp4',
1356                 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1357                 'uploader': 'www.rtl.be',
1358             },
1359             'params': {
1360                 # m3u8 downloads
1361                 'skip_download': True,
1362             },
1363         },
1364         # twitter:player embed
1365         {
1366             'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1367             'md5': 'a3e0df96369831de324f0778e126653c',
1368             'info_dict': {
1369                 'id': '4909620399001',
1370                 'ext': 'mp4',
1371                 'title': 'What Do Black Holes Sound Like?',
1372                 'description': 'what do black holes sound like',
1373                 'upload_date': '20160524',
1374                 'uploader_id': '29913724001',
1375                 'timestamp': 1464107587,
1376                 'uploader': 'TheAtlantic',
1377             },
1378             'add_ie': ['BrightcoveLegacy'],
1379         },
1380         # Facebook <iframe> embed
1381         {
1382             'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
1383             'md5': 'fbcde74f534176ecb015849146dd3aee',
1384             'info_dict': {
1385                 'id': '599637780109885',
1386                 'ext': 'mp4',
1387                 'title': 'Facebook video #599637780109885',
1388             },
1389         },
1390         # Facebook API embed
1391         {
1392             'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
1393             'md5': 'a47372ee61b39a7b90287094d447d94e',
1394             'info_dict': {
1395                 'id': '10153467542406923',
1396                 'ext': 'mp4',
1397                 'title': 'Facebook video #10153467542406923',
1398             },
1399         },
1400         # Wordpress "YouTube Video Importer" plugin
1401         {
1402             'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
1403             'md5': 'd16797741b560b485194eddda8121b48',
1404             'info_dict': {
1405                 'id': 'HNTXWDXV9Is',
1406                 'ext': 'mp4',
1407                 'title': 'Blue Devils Drumline Stanford lot 2016',
1408                 'upload_date': '20160627',
1409                 'uploader_id': 'GENOCIDE8GENERAL10',
1410                 'uploader': 'cylus cyrus',
1411             },
1412         },
1413         {
1414             # video stored on custom kaltura server
1415             'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1416             'md5': '537617d06e64dfed891fa1593c4b30cc',
1417             'info_dict': {
1418                 'id': '0_1iotm5bh',
1419                 'ext': 'mp4',
1420                 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1421                 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1422                 'uploader_id': 'videos.expansion@el-mundo.net',
1423                 'upload_date': '20150429',
1424                 'timestamp': 1430303472,
1425             },
1426             'add_ie': ['Kaltura'],
1427         },
1428         {
1429             # Non-standard Vimeo embed
1430             'url': 'https://openclassrooms.com/courses/understanding-the-web',
1431             'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1432             'info_dict': {
1433                 'id': '148867247',
1434                 'ext': 'mp4',
1435                 'title': 'Understanding the web - Teaser',
1436                 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1437                 'upload_date': '20151214',
1438                 'uploader': 'OpenClassrooms',
1439                 'uploader_id': 'openclassrooms',
1440             },
1441             'add_ie': ['Vimeo'],
1442         },
1443         {
1444             # generic vimeo embed that requires original URL passed as Referer
1445             'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1446             'only_matching': True,
1447         },
1448         {
1449             'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1450             'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1451             'info_dict': {
1452                 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1453                 'ext': 'mp4',
1454                 'title': 'Big Buck Bunny',
1455                 'description': 'Royalty free test video',
1456                 'timestamp': 1432816365,
1457                 'upload_date': '20150528',
1458                 'is_live': False,
1459             },
1460             'params': {
1461                 'skip_download': True,
1462             },
1463             'add_ie': [ArkenaIE.ie_key()],
1464         },
1465         {
1466             'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1467             'info_dict': {
1468                 'id': '1c7141f46c',
1469                 'ext': 'mp4',
1470                 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1471             },
1472             'params': {
1473                 'skip_download': True,
1474             },
1475             'add_ie': [Vbox7IE.ie_key()],
1476         },
1477         {
1478             # DBTV embeds
1479             'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
1480             'info_dict': {
1481                 'id': '43254897',
1482                 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1483             },
1484             'playlist_mincount': 3,
1485         },
1486         {
1487             # Videa embeds
1488             'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1489             'info_dict': {
1490                 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1491                 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1492             },
1493             'playlist_mincount': 2,
1494         },
1495         {
1496             # 20 minuten embed
1497             'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1498             'info_dict': {
1499                 'id': '523629',
1500                 'ext': 'mp4',
1501                 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1502                 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1503             },
1504             'params': {
1505                 'skip_download': True,
1506             },
1507             'add_ie': [TwentyMinutenIE.ie_key()],
1508         },
1509         {
1510             # VideoPress embed
1511             'url': 'https://en.support.wordpress.com/videopress/',
1512             'info_dict': {
1513                 'id': 'OcobLTqC',
1514                 'ext': 'm4v',
1515                 'title': 'IMG_5786',
1516                 'timestamp': 1435711927,
1517                 'upload_date': '20150701',
1518             },
1519             'params': {
1520                 'skip_download': True,
1521             },
1522             'add_ie': [VideoPressIE.ie_key()],
1523         },
1524         {
1525             # Rutube embed
1526             'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
1527             'info_dict': {
1528                 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
1529                 'ext': 'flv',
1530                 'title': 'Магаззино: Казань 2',
1531                 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
1532                 'uploader': 'Магаззино',
1533                 'upload_date': '20170228',
1534                 'uploader_id': '996642',
1535             },
1536             'params': {
1537                 'skip_download': True,
1538             },
1539             'add_ie': [RutubeIE.ie_key()],
1540         },
1541         {
1542             # ThePlatform embedded with whitespaces in URLs
1543             'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
1544             'only_matching': True,
1545         },
1546         {
1547             # Senate ISVP iframe https
1548             'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
1549             'md5': 'fb8c70b0b515e5037981a2492099aab8',
1550             'info_dict': {
1551                 'id': 'govtaff020316',
1552                 'ext': 'mp4',
1553                 'title': 'Integrated Senate Video Player',
1554             },
1555             'add_ie': [SenateISVPIE.ie_key()],
1556         },
1557         # {
1558         #     # TODO: find another test
1559         #     # http://schema.org/VideoObject
1560         #     'url': 'https://flipagram.com/f/nyvTSJMKId',
1561         #     'md5': '888dcf08b7ea671381f00fab74692755',
1562         #     'info_dict': {
1563         #         'id': 'nyvTSJMKId',
1564         #         'ext': 'mp4',
1565         #         'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
1566         #         'description': '#love for cats.',
1567         #         'timestamp': 1461244995,
1568         #         'upload_date': '20160421',
1569         #     },
1570         #     'params': {
1571         #         'force_generic_extractor': True,
1572         #     },
1573         # }
1574     ]
1575
1576     def report_following_redirect(self, new_url):
1577         """Report information extraction."""
1578         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1579
1580     def _extract_rss(self, url, video_id, doc):
1581         playlist_title = doc.find('./channel/title').text
1582         playlist_desc_el = doc.find('./channel/description')
1583         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1584
1585         entries = []
1586         for it in doc.findall('./channel/item'):
1587             next_url = xpath_text(it, 'link', fatal=False)
1588             if not next_url:
1589                 enclosure_nodes = it.findall('./enclosure')
1590                 for e in enclosure_nodes:
1591                     next_url = e.attrib.get('url')
1592                     if next_url:
1593                         break
1594
1595             if not next_url:
1596                 continue
1597
1598             entries.append({
1599                 '_type': 'url',
1600                 'url': next_url,
1601                 'title': it.find('title').text,
1602             })
1603
1604         return {
1605             '_type': 'playlist',
1606             'id': url,
1607             'title': playlist_title,
1608             'description': playlist_desc,
1609             'entries': entries,
1610         }
1611
1612     def _extract_camtasia(self, url, video_id, webpage):
1613         """ Returns None if no camtasia video can be found. """
1614
1615         camtasia_cfg = self._search_regex(
1616             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1617             webpage, 'camtasia configuration file', default=None)
1618         if camtasia_cfg is None:
1619             return None
1620
1621         title = self._html_search_meta('DC.title', webpage, fatal=True)
1622
1623         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1624         camtasia_cfg = self._download_xml(
1625             camtasia_url, video_id,
1626             note='Downloading camtasia configuration',
1627             errnote='Failed to download camtasia configuration')
1628         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1629
1630         entries = []
1631         for n in fileset_node.getchildren():
1632             url_n = n.find('./uri')
1633             if url_n is None:
1634                 continue
1635
1636             entries.append({
1637                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1638                 'title': '%s - %s' % (title, n.tag),
1639                 'url': compat_urlparse.urljoin(url, url_n.text),
1640                 'duration': float_or_none(n.find('./duration').text),
1641             })
1642
1643         return {
1644             '_type': 'playlist',
1645             'entries': entries,
1646             'title': title,
1647         }
1648
1649     def _real_extract(self, url):
1650         if url.startswith('//'):
1651             return {
1652                 '_type': 'url',
1653                 'url': self.http_scheme() + url,
1654             }
1655
1656         parsed_url = compat_urlparse.urlparse(url)
1657         if not parsed_url.scheme:
1658             default_search = self._downloader.params.get('default_search')
1659             if default_search is None:
1660                 default_search = 'fixup_error'
1661
1662             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1663                 if '/' in url:
1664                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1665                     return self.url_result('http://' + url)
1666                 elif default_search != 'fixup_error':
1667                     if default_search == 'auto_warning':
1668                         if re.match(r'^(?:url|URL)$', url):
1669                             raise ExtractorError(
1670                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1671                                 expected=True)
1672                         else:
1673                             self._downloader.report_warning(
1674                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1675                     return self.url_result('ytsearch:' + url)
1676
1677             if default_search in ('error', 'fixup_error'):
1678                 raise ExtractorError(
1679                     '%r is not a valid URL. '
1680                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1681                     % (url, url), expected=True)
1682             else:
1683                 if ':' not in default_search:
1684                     default_search += ':'
1685                 return self.url_result(default_search + url)
1686
1687         url, smuggled_data = unsmuggle_url(url)
1688         force_videoid = None
1689         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1690         if smuggled_data and 'force_videoid' in smuggled_data:
1691             force_videoid = smuggled_data['force_videoid']
1692             video_id = force_videoid
1693         else:
1694             video_id = self._generic_id(url)
1695
1696         self.to_screen('%s: Requesting header' % video_id)
1697
1698         head_req = HEADRequest(url)
1699         head_response = self._request_webpage(
1700             head_req, video_id,
1701             note=False, errnote='Could not send HEAD request to %s' % url,
1702             fatal=False)
1703
1704         if head_response is not False:
1705             # Check for redirect
1706             new_url = head_response.geturl()
1707             if url != new_url:
1708                 self.report_following_redirect(new_url)
1709                 if force_videoid:
1710                     new_url = smuggle_url(
1711                         new_url, {'force_videoid': force_videoid})
1712                 return self.url_result(new_url)
1713
1714         full_response = None
1715         if head_response is False:
1716             request = sanitized_Request(url)
1717             request.add_header('Accept-Encoding', '*')
1718             full_response = self._request_webpage(request, video_id)
1719             head_response = full_response
1720
1721         info_dict = {
1722             'id': video_id,
1723             'title': self._generic_title(url),
1724             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1725         }
1726
1727         # Check for direct link to a video
1728         content_type = head_response.headers.get('Content-Type', '').lower()
1729         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1730         if m:
1731             format_id = m.group('format_id')
1732             if format_id.endswith('mpegurl'):
1733                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1734             elif format_id == 'f4m':
1735                 formats = self._extract_f4m_formats(url, video_id)
1736             else:
1737                 formats = [{
1738                     'format_id': m.group('format_id'),
1739                     'url': url,
1740                     'vcodec': 'none' if m.group('type') == 'audio' else None
1741                 }]
1742                 info_dict['direct'] = True
1743             self._sort_formats(formats)
1744             info_dict['formats'] = formats
1745             return info_dict
1746
1747         if not self._downloader.params.get('test', False) and not is_intentional:
1748             force = self._downloader.params.get('force_generic_extractor', False)
1749             self._downloader.report_warning(
1750                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1751
1752         if not full_response:
1753             request = sanitized_Request(url)
1754             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1755             # making it impossible to download only chunk of the file (yet we need only 512kB to
1756             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1757             # that will always result in downloading the whole file that is not desirable.
1758             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1759             # to accept raw bytes and being able to download only a chunk.
1760             # It may probably better to solve this by checking Content-Type for application/octet-stream
1761             # after HEAD request finishes, but not sure if we can rely on this.
1762             request.add_header('Accept-Encoding', '*')
1763             full_response = self._request_webpage(request, video_id)
1764
1765         first_bytes = full_response.read(512)
1766
1767         # Is it an M3U playlist?
1768         if first_bytes.startswith(b'#EXTM3U'):
1769             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1770             self._sort_formats(info_dict['formats'])
1771             return info_dict
1772
1773         # Maybe it's a direct link to a video?
1774         # Be careful not to download the whole thing!
1775         if not is_html(first_bytes):
1776             self._downloader.report_warning(
1777                 'URL could be a direct video link, returning it as such.')
1778             info_dict.update({
1779                 'direct': True,
1780                 'url': url,
1781             })
1782             return info_dict
1783
1784         webpage = self._webpage_read_content(
1785             full_response, url, video_id, prefix=first_bytes)
1786
1787         self.report_extraction(video_id)
1788
1789         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1790         try:
1791             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1792             if doc.tag == 'rss':
1793                 return self._extract_rss(url, video_id, doc)
1794             elif doc.tag == 'SmoothStreamingMedia':
1795                 info_dict['formats'] = self._parse_ism_formats(doc, url)
1796                 self._sort_formats(info_dict['formats'])
1797                 return info_dict
1798             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1799                 smil = self._parse_smil(doc, url, video_id)
1800                 self._sort_formats(smil['formats'])
1801                 return smil
1802             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1803                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1804             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1805                 info_dict['formats'] = self._parse_mpd_formats(
1806                     doc, video_id,
1807                     mpd_base_url=full_response.geturl().rpartition('/')[0],
1808                     mpd_url=url)
1809                 self._sort_formats(info_dict['formats'])
1810                 return info_dict
1811             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1812                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1813                 self._sort_formats(info_dict['formats'])
1814                 return info_dict
1815         except compat_xml_parse_error:
1816             pass
1817
1818         # Is it a Camtasia project?
1819         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1820         if camtasia_res is not None:
1821             return camtasia_res
1822
1823         # Sometimes embedded video player is hidden behind percent encoding
1824         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1825         # Unescaping the whole page allows to handle those cases in a generic way
1826         webpage = compat_urllib_parse_unquote(webpage)
1827
1828         # it's tempting to parse this further, but you would
1829         # have to take into account all the variations like
1830         #   Video Title - Site Name
1831         #   Site Name | Video Title
1832         #   Video Title - Tagline | Site Name
1833         # and so on and so forth; it's just not practical
1834         video_title = self._og_search_title(
1835             webpage, default=None) or self._html_search_regex(
1836             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1837             default='video')
1838
1839         # Try to detect age limit automatically
1840         age_limit = self._rta_search(webpage)
1841         # And then there are the jokers who advertise that they use RTA,
1842         # but actually don't.
1843         AGE_LIMIT_MARKERS = [
1844             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1845         ]
1846         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1847             age_limit = 18
1848
1849         # video uploader is domain name
1850         video_uploader = self._search_regex(
1851             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1852
1853         video_description = self._og_search_description(webpage, default=None)
1854         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
1855
1856         # Look for Brightcove Legacy Studio embeds
1857         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1858         if bc_urls:
1859             self.to_screen('Brightcove video detected.')
1860             entries = [{
1861                 '_type': 'url',
1862                 'url': smuggle_url(bc_url, {'Referer': url}),
1863                 'ie_key': 'BrightcoveLegacy'
1864             } for bc_url in bc_urls]
1865
1866             return {
1867                 '_type': 'playlist',
1868                 'title': video_title,
1869                 'id': video_id,
1870                 'entries': entries,
1871             }
1872
1873         # Look for Brightcove New Studio embeds
1874         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1875         if bc_urls:
1876             return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
1877
1878         # Look for ThePlatform embeds
1879         tp_urls = ThePlatformIE._extract_urls(webpage)
1880         if tp_urls:
1881             return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
1882
1883         # Look for Vessel embeds
1884         vessel_urls = VesselIE._extract_urls(webpage)
1885         if vessel_urls:
1886             return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
1887
1888         # Look for embedded rtl.nl player
1889         matches = re.findall(
1890             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1891             webpage)
1892         if matches:
1893             return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
1894
1895         vimeo_urls = VimeoIE._extract_urls(url, webpage)
1896         if vimeo_urls:
1897             return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
1898
1899         vid_me_embed_url = self._search_regex(
1900             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1901             webpage, 'vid.me embed', default=None)
1902         if vid_me_embed_url is not None:
1903             return self.url_result(vid_me_embed_url, 'Vidme')
1904
1905         # Look for embedded YouTube player
1906         matches = re.findall(r'''(?x)
1907             (?:
1908                 <iframe[^>]+?src=|
1909                 data-video-url=|
1910                 <embed[^>]+?src=|
1911                 embedSWF\(?:\s*|
1912                 new\s+SWFObject\(
1913             )
1914             (["\'])
1915                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1916                 (?:embed|v|p)/.+?)
1917             \1''', webpage)
1918         if matches:
1919             return self.playlist_from_matches(
1920                 matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
1921
1922         # Look for lazyYT YouTube embed
1923         matches = re.findall(
1924             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1925         if matches:
1926             return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
1927
1928         # Look for Wordpress "YouTube Video Importer" plugin
1929         matches = re.findall(r'''(?x)<div[^>]+
1930             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1931             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1932         if matches:
1933             return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
1934
1935         matches = DailymotionIE._extract_urls(webpage)
1936         if matches:
1937             return self.playlist_from_matches(matches, video_id, video_title)
1938
1939         # Look for embedded Dailymotion playlist player (#3822)
1940         m = re.search(
1941             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1942         if m:
1943             playlists = re.findall(
1944                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1945             if playlists:
1946                 return self.playlist_from_matches(
1947                     playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
1948
1949         # Look for embedded Wistia player
1950         match = re.search(
1951             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1952         if match:
1953             embed_url = self._proto_relative_url(
1954                 unescapeHTML(match.group('url')))
1955             return {
1956                 '_type': 'url_transparent',
1957                 'url': embed_url,
1958                 'ie_key': 'Wistia',
1959                 'uploader': video_uploader,
1960             }
1961
1962         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1963         if match:
1964             return {
1965                 '_type': 'url_transparent',
1966                 'url': 'wistia:%s' % match.group('id'),
1967                 'ie_key': 'Wistia',
1968                 'uploader': video_uploader,
1969             }
1970
1971         match = re.search(
1972             r'''(?sx)
1973                 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
1974                 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
1975             ''', webpage)
1976         if match:
1977             return self.url_result(self._proto_relative_url(
1978                 'wistia:%s' % match.group('id')), 'Wistia')
1979
1980         # Look for SVT player
1981         svt_url = SVTIE._extract_url(webpage)
1982         if svt_url:
1983             return self.url_result(svt_url, 'SVT')
1984
1985         # Look for embedded condenast player
1986         matches = re.findall(
1987             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1988             webpage)
1989         if matches:
1990             return {
1991                 '_type': 'playlist',
1992                 'entries': [{
1993                     '_type': 'url',
1994                     'ie_key': 'CondeNast',
1995                     'url': ma,
1996                 } for ma in matches],
1997                 'title': video_title,
1998                 'id': video_id,
1999             }
2000
2001         # Look for Bandcamp pages with custom domain
2002         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
2003         if mobj is not None:
2004             burl = unescapeHTML(mobj.group(1))
2005             # Don't set the extractor because it can be a track url or an album
2006             return self.url_result(burl)
2007
2008         # Look for embedded Vevo player
2009         mobj = re.search(
2010             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
2011         if mobj is not None:
2012             return self.url_result(mobj.group('url'))
2013
2014         # Look for embedded Viddler player
2015         mobj = re.search(
2016             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
2017             webpage)
2018         if mobj is not None:
2019             return self.url_result(mobj.group('url'))
2020
2021         # Look for NYTimes player
2022         mobj = re.search(
2023             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
2024             webpage)
2025         if mobj is not None:
2026             return self.url_result(mobj.group('url'))
2027
2028         # Look for Libsyn player
2029         mobj = re.search(
2030             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
2031         if mobj is not None:
2032             return self.url_result(mobj.group('url'))
2033
2034         # Look for Ooyala videos
2035         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
2036                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
2037                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
2038                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
2039         if mobj is not None:
2040             embed_token = self._search_regex(
2041                 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
2042                 webpage, 'ooyala embed token', default=None)
2043             return OoyalaIE._build_url_result(smuggle_url(
2044                 mobj.group('ec'), {
2045                     'domain': url,
2046                     'embed_token': embed_token,
2047                 }))
2048
2049         # Look for multiple Ooyala embeds on SBN network websites
2050         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
2051         if mobj is not None:
2052             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
2053             if embeds:
2054                 return self.playlist_from_matches(
2055                     embeds, video_id, video_title,
2056                     getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
2057
2058         # Look for Aparat videos
2059         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
2060         if mobj is not None:
2061             return self.url_result(mobj.group(1), 'Aparat')
2062
2063         # Look for MPORA videos
2064         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
2065         if mobj is not None:
2066             return self.url_result(mobj.group(1), 'Mpora')
2067
2068         # Look for embedded NovaMov-based player
2069         mobj = re.search(
2070             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
2071                     (?P<url>http://(?:(?:embed|www)\.)?
2072                         (?:novamov\.com|
2073                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
2074                            videoweed\.(?:es|com)|
2075                            movshare\.(?:net|sx|ag)|
2076                            divxstage\.(?:eu|net|ch|co|at|ag))
2077                         /embed\.php.+?)\1''', webpage)
2078         if mobj is not None:
2079             return self.url_result(mobj.group('url'))
2080
2081         # Look for embedded Facebook player
2082         facebook_url = FacebookIE._extract_url(webpage)
2083         if facebook_url is not None:
2084             return self.url_result(facebook_url, 'Facebook')
2085
2086         # Look for embedded VK player
2087         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
2088         if mobj is not None:
2089             return self.url_result(mobj.group('url'), 'VK')
2090
2091         # Look for embedded Odnoklassniki player
2092         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
2093         if mobj is not None:
2094             return self.url_result(mobj.group('url'), 'Odnoklassniki')
2095
2096         # Look for embedded ivi player
2097         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
2098         if mobj is not None:
2099             return self.url_result(mobj.group('url'), 'Ivi')
2100
2101         # Look for embedded Huffington Post player
2102         mobj = re.search(
2103             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
2104         if mobj is not None:
2105             return self.url_result(mobj.group('url'), 'HuffPost')
2106
2107         # Look for embed.ly
2108         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
2109         if mobj is not None:
2110             return self.url_result(mobj.group('url'))
2111         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
2112         if mobj is not None:
2113             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
2114
2115         # Look for funnyordie embed
2116         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
2117         if matches:
2118             return self.playlist_from_matches(
2119                 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
2120
2121         # Look for BBC iPlayer embed
2122         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
2123         if matches:
2124             return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
2125
2126         # Look for embedded RUTV player
2127         rutv_url = RUTVIE._extract_url(webpage)
2128         if rutv_url:
2129             return self.url_result(rutv_url, 'RUTV')
2130
2131         # Look for embedded TVC player
2132         tvc_url = TVCIE._extract_url(webpage)
2133         if tvc_url:
2134             return self.url_result(tvc_url, 'TVC')
2135
2136         # Look for embedded SportBox player
2137         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
2138         if sportbox_urls:
2139             return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
2140
2141         # Look for embedded XHamster player
2142         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
2143         if xhamster_urls:
2144             return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
2145
2146         # Look for embedded TNAFlixNetwork player
2147         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
2148         if tnaflix_urls:
2149             return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
2150
2151         # Look for embedded PornHub player
2152         pornhub_urls = PornHubIE._extract_urls(webpage)
2153         if pornhub_urls:
2154             return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
2155
2156         # Look for embedded DrTuber player
2157         drtuber_urls = DrTuberIE._extract_urls(webpage)
2158         if drtuber_urls:
2159             return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
2160
2161         # Look for embedded RedTube player
2162         redtube_urls = RedTubeIE._extract_urls(webpage)
2163         if redtube_urls:
2164             return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
2165
2166         # Look for embedded Tvigle player
2167         mobj = re.search(
2168             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
2169         if mobj is not None:
2170             return self.url_result(mobj.group('url'), 'Tvigle')
2171
2172         # Look for embedded TED player
2173         mobj = re.search(
2174             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
2175         if mobj is not None:
2176             return self.url_result(mobj.group('url'), 'TED')
2177
2178         # Look for embedded Ustream videos
2179         ustream_url = UstreamIE._extract_url(webpage)
2180         if ustream_url:
2181             return self.url_result(ustream_url, UstreamIE.ie_key())
2182
2183         # Look for embedded arte.tv player
2184         mobj = re.search(
2185             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
2186             webpage)
2187         if mobj is not None:
2188             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
2189
2190         # Look for embedded francetv player
2191         mobj = re.search(
2192             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
2193             webpage)
2194         if mobj is not None:
2195             return self.url_result(mobj.group('url'))
2196
2197         # Look for embedded smotri.com player
2198         smotri_url = SmotriIE._extract_url(webpage)
2199         if smotri_url:
2200             return self.url_result(smotri_url, 'Smotri')
2201
2202         # Look for embedded Myvi.ru player
2203         myvi_url = MyviIE._extract_url(webpage)
2204         if myvi_url:
2205             return self.url_result(myvi_url)
2206
2207         # Look for embedded soundcloud player
2208         soundcloud_urls = SoundcloudIE._extract_urls(webpage)
2209         if soundcloud_urls:
2210             return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
2211
2212         # Look for tunein player
2213         tunein_urls = TuneInBaseIE._extract_urls(webpage)
2214         if tunein_urls:
2215             return self.playlist_from_matches(tunein_urls, video_id, video_title)
2216
2217         # Look for embedded mtvservices player
2218         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
2219         if mtvservices_url:
2220             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
2221
2222         # Look for embedded yahoo player
2223         mobj = re.search(
2224             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
2225             webpage)
2226         if mobj is not None:
2227             return self.url_result(mobj.group('url'), 'Yahoo')
2228
2229         # Look for embedded sbs.com.au player
2230         mobj = re.search(
2231             r'''(?x)
2232             (?:
2233                 <meta\s+property="og:video"\s+content=|
2234                 <iframe[^>]+?src=
2235             )
2236             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2237             webpage)
2238         if mobj is not None:
2239             return self.url_result(mobj.group('url'), 'SBS')
2240
2241         # Look for embedded Cinchcast player
2242         mobj = re.search(
2243             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
2244             webpage)
2245         if mobj is not None:
2246             return self.url_result(mobj.group('url'), 'Cinchcast')
2247
2248         mobj = re.search(
2249             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
2250             webpage)
2251         if not mobj:
2252             mobj = re.search(
2253                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
2254                 webpage)
2255         if mobj is not None:
2256             return self.url_result(mobj.group('url'), 'MLB')
2257
2258         mobj = re.search(
2259             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
2260             webpage)
2261         if mobj is not None:
2262             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
2263
2264         mobj = re.search(
2265             r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
2266             webpage)
2267         if mobj is not None:
2268             return self.url_result(mobj.group('url'), 'Livestream')
2269
2270         # Look for Zapiks embed
2271         mobj = re.search(
2272             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
2273         if mobj is not None:
2274             return self.url_result(mobj.group('url'), 'Zapiks')
2275
2276         # Look for Kaltura embeds
2277         kaltura_url = KalturaIE._extract_url(webpage)
2278         if kaltura_url:
2279             return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
2280
2281         # Look for Eagle.Platform embeds
2282         eagleplatform_url = EaglePlatformIE._extract_url(webpage)
2283         if eagleplatform_url:
2284             return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
2285
2286         # Look for ClipYou (uses Eagle.Platform) embeds
2287         mobj = re.search(
2288             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
2289         if mobj is not None:
2290             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
2291
2292         # Look for Pladform embeds
2293         pladform_url = PladformIE._extract_url(webpage)
2294         if pladform_url:
2295             return self.url_result(pladform_url)
2296
2297         # Look for Videomore embeds
2298         videomore_url = VideomoreIE._extract_url(webpage)
2299         if videomore_url:
2300             return self.url_result(videomore_url)
2301
2302         # Look for Webcaster embeds
2303         webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
2304         if webcaster_url:
2305             return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
2306
2307         # Look for Playwire embeds
2308         mobj = re.search(
2309             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
2310         if mobj is not None:
2311             return self.url_result(mobj.group('url'))
2312
2313         # Look for 5min embeds
2314         mobj = re.search(
2315             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
2316         if mobj is not None:
2317             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
2318
2319         # Look for Crooks and Liars embeds
2320         mobj = re.search(
2321             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
2322         if mobj is not None:
2323             return self.url_result(mobj.group('url'))
2324
2325         # Look for NBC Sports VPlayer embeds
2326         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
2327         if nbc_sports_url:
2328             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
2329
2330         # Look for NBC News embeds
2331         nbc_news_embed_url = re.search(
2332             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
2333         if nbc_news_embed_url:
2334             return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
2335
2336         # Look for Google Drive embeds
2337         google_drive_url = GoogleDriveIE._extract_url(webpage)
2338         if google_drive_url:
2339             return self.url_result(google_drive_url, 'GoogleDrive')
2340
2341         # Look for UDN embeds
2342         mobj = re.search(
2343             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
2344         if mobj is not None:
2345             return self.url_result(
2346                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
2347
2348         # Look for Senate ISVP iframe
2349         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2350         if senate_isvp_url:
2351             return self.url_result(senate_isvp_url, 'SenateISVP')
2352
2353         # Look for Dailymotion Cloud videos
2354         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
2355         if dmcloud_url:
2356             return self.url_result(dmcloud_url, 'DailymotionCloud')
2357
2358         # Look for OnionStudios embeds
2359         onionstudios_url = OnionStudiosIE._extract_url(webpage)
2360         if onionstudios_url:
2361             return self.url_result(onionstudios_url)
2362
2363         # Look for ViewLift embeds
2364         viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2365         if viewlift_url:
2366             return self.url_result(viewlift_url)
2367
2368         # Look for JWPlatform embeds
2369         jwplatform_url = JWPlatformIE._extract_url(webpage)
2370         if jwplatform_url:
2371             return self.url_result(jwplatform_url, 'JWPlatform')
2372
2373         # Look for Digiteka embeds
2374         digiteka_url = DigitekaIE._extract_url(webpage)
2375         if digiteka_url:
2376             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2377
2378         # Look for Arkena embeds
2379         arkena_url = ArkenaIE._extract_url(webpage)
2380         if arkena_url:
2381             return self.url_result(arkena_url, ArkenaIE.ie_key())
2382
2383         # Look for Piksel embeds
2384         piksel_url = PikselIE._extract_url(webpage)
2385         if piksel_url:
2386             return self.url_result(piksel_url, PikselIE.ie_key())
2387
2388         # Look for Limelight embeds
2389         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
2390         if mobj:
2391             lm = {
2392                 'Media': 'media',
2393                 'Channel': 'channel',
2394                 'ChannelList': 'channel_list',
2395             }
2396             return self.url_result(smuggle_url('limelight:%s:%s' % (
2397                 lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
2398                 'Limelight%s' % mobj.group(1), mobj.group(2))
2399
2400         mobj = re.search(
2401             r'''(?sx)
2402                 <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
2403                     <param[^>]+
2404                         name=(["\'])flashVars\2[^>]+
2405                         value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
2406             ''', webpage)
2407         if mobj:
2408             return self.url_result(smuggle_url(
2409                 'limelight:media:%s' % mobj.group('id'),
2410                 {'source_url': url}), 'LimelightMedia', mobj.group('id'))
2411
2412         # Look for AdobeTVVideo embeds
2413         mobj = re.search(
2414             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2415             webpage)
2416         if mobj is not None:
2417             return self.url_result(
2418                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2419                 'AdobeTVVideo')
2420
2421         # Look for Vine embeds
2422         mobj = re.search(
2423             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2424             webpage)
2425         if mobj is not None:
2426             return self.url_result(
2427                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2428
2429         # Look for VODPlatform embeds
2430         mobj = re.search(
2431             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
2432             webpage)
2433         if mobj is not None:
2434             return self.url_result(
2435                 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
2436
2437         # Look for Mangomolo embeds
2438         mobj = re.search(
2439             r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
2440                 (?:
2441                     video\?.*?\bid=(?P<video_id>\d+)|
2442                     index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
2443                 ).+?)\1''', webpage)
2444         if mobj is not None:
2445             info = {
2446                 '_type': 'url_transparent',
2447                 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
2448                 'title': video_title,
2449                 'description': video_description,
2450                 'thumbnail': video_thumbnail,
2451                 'uploader': video_uploader,
2452             }
2453             video_id = mobj.group('video_id')
2454             if video_id:
2455                 info.update({
2456                     'ie_key': 'MangomoloVideo',
2457                     'id': video_id,
2458                 })
2459             else:
2460                 info.update({
2461                     'ie_key': 'MangomoloLive',
2462                     'id': mobj.group('channel_id'),
2463                 })
2464             return info
2465
2466         # Look for Instagram embeds
2467         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2468         if instagram_embed_url is not None:
2469             return self.url_result(
2470                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2471
2472         # Look for LiveLeak embeds
2473         liveleak_url = LiveLeakIE._extract_url(webpage)
2474         if liveleak_url:
2475             return self.url_result(liveleak_url, 'LiveLeak')
2476
2477         # Look for 3Q SDN embeds
2478         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2479         if threeqsdn_url:
2480             return {
2481                 '_type': 'url_transparent',
2482                 'ie_key': ThreeQSDNIE.ie_key(),
2483                 'url': self._proto_relative_url(threeqsdn_url),
2484                 'title': video_title,
2485                 'description': video_description,
2486                 'thumbnail': video_thumbnail,
2487                 'uploader': video_uploader,
2488             }
2489
2490         # Look for VBOX7 embeds
2491         vbox7_url = Vbox7IE._extract_url(webpage)
2492         if vbox7_url:
2493             return self.url_result(vbox7_url, Vbox7IE.ie_key())
2494
2495         # Look for DBTV embeds
2496         dbtv_urls = DBTVIE._extract_urls(webpage)
2497         if dbtv_urls:
2498             return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
2499
2500         # Look for Videa embeds
2501         videa_urls = VideaIE._extract_urls(webpage)
2502         if videa_urls:
2503             return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
2504
2505         # Look for 20 minuten embeds
2506         twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
2507         if twentymin_urls:
2508             return self.playlist_from_matches(
2509                 twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
2510
2511         # Look for Openload embeds
2512         openload_urls = OpenloadIE._extract_urls(webpage)
2513         if openload_urls:
2514             return self.playlist_from_matches(
2515                 openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
2516
2517         # Look for VideoPress embeds
2518         videopress_urls = VideoPressIE._extract_urls(webpage)
2519         if videopress_urls:
2520             return self.playlist_from_matches(
2521                 videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
2522
2523         # Look for Rutube embeds
2524         rutube_urls = RutubeIE._extract_urls(webpage)
2525         if rutube_urls:
2526             return self.playlist_from_matches(
2527                 rutube_urls, ie=RutubeIE.ie_key())
2528
2529         # Looking for http://schema.org/VideoObject
2530         json_ld = self._search_json_ld(
2531             webpage, video_id, default={}, expected_type='VideoObject')
2532         if json_ld.get('url'):
2533             info_dict.update({
2534                 'title': video_title or info_dict['title'],
2535                 'description': video_description,
2536                 'thumbnail': video_thumbnail,
2537                 'age_limit': age_limit
2538             })
2539             info_dict.update(json_ld)
2540             return info_dict
2541
2542         # Look for HTML5 media
2543         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
2544         if entries:
2545             for entry in entries:
2546                 entry.update({
2547                     'id': video_id,
2548                     'title': video_title,
2549                 })
2550                 self._sort_formats(entry['formats'])
2551             return self.playlist_result(entries)
2552
2553         jwplayer_data_str = self._find_jwplayer_data(webpage)
2554         if jwplayer_data_str:
2555             try:
2556                 jwplayer_data = self._parse_json(
2557                     jwplayer_data_str, video_id, transform_source=js_to_json)
2558                 info = self._parse_jwplayer_data(
2559                     jwplayer_data, video_id, require_title=False)
2560                 if not info.get('title'):
2561                     info['title'] = video_title
2562                 return info
2563             except ExtractorError:
2564                 pass
2565
2566         def check_video(vurl):
2567             if YoutubeIE.suitable(vurl):
2568                 return True
2569             if RtmpIE.suitable(vurl):
2570                 return True
2571             vpath = compat_urlparse.urlparse(vurl).path
2572             vext = determine_ext(vpath)
2573             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
2574
2575         def filter_video(urls):
2576             return list(filter(check_video, urls))
2577
2578         # Start with something easy: JW Player in SWFObject
2579         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
2580         if not found:
2581             # Look for gorilla-vid style embedding
2582             found = filter_video(re.findall(r'''(?sx)
2583                 (?:
2584                     jw_plugins|
2585                     JWPlayerOptions|
2586                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2587                 )
2588                 .*?
2589                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
2590         if not found:
2591             # Broaden the search a little bit
2592             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
2593         if not found:
2594             # Broaden the findall a little bit: JWPlayer JS loader
2595             found = filter_video(re.findall(
2596                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
2597         if not found:
2598             # Flow player
2599             found = filter_video(re.findall(r'''(?xs)
2600                 flowplayer\("[^"]+",\s*
2601                     \{[^}]+?\}\s*,
2602                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
2603                         ["']?url["']?\s*:\s*["']([^"']+)["']
2604             ''', webpage))
2605         if not found:
2606             # Cinerama player
2607             found = re.findall(
2608                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
2609         if not found:
2610             # Try to find twitter cards info
2611             # twitter:player:stream should be checked before twitter:player since
2612             # it is expected to contain a raw stream (see
2613             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2614             found = filter_video(re.findall(
2615                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
2616         if not found:
2617             # We look for Open Graph info:
2618             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
2619             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
2620             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2621             if m_video_type is not None:
2622                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
2623         if not found:
2624             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
2625             found = re.search(
2626                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2627                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2628                 webpage)
2629             if not found:
2630                 # Look also in Refresh HTTP header
2631                 refresh_header = head_response.headers.get('Refresh')
2632                 if refresh_header:
2633                     # In python 2 response HTTP headers are bytestrings
2634                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2635                         refresh_header = refresh_header.decode('iso-8859-1')
2636                     found = re.search(REDIRECT_REGEX, refresh_header)
2637             if found:
2638                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2639                 if new_url != url:
2640                     self.report_following_redirect(new_url)
2641                     return {
2642                         '_type': 'url',
2643                         'url': new_url,
2644                     }
2645                 else:
2646                     found = None
2647
2648         if not found:
2649             # twitter:player is a https URL to iframe player that may or may not
2650             # be supported by youtube-dl thus this is checked the very last (see
2651             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2652             embed_url = self._html_search_meta('twitter:player', webpage, default=None)
2653             if embed_url:
2654                 return self.url_result(embed_url)
2655
2656         if not found:
2657             raise UnsupportedError(url)
2658
2659         entries = []
2660         for video_url in orderedSet(found):
2661             video_url = unescapeHTML(video_url)
2662             video_url = video_url.replace('\\/', '/')
2663             video_url = compat_urlparse.urljoin(url, video_url)
2664             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2665
2666             # Sometimes, jwplayer extraction will result in a YouTube URL
2667             if YoutubeIE.suitable(video_url):
2668                 entries.append(self.url_result(video_url, 'Youtube'))
2669                 continue
2670
2671             # here's a fun little line of code for you:
2672             video_id = os.path.splitext(video_id)[0]
2673
2674             entry_info_dict = {
2675                 'id': video_id,
2676                 'uploader': video_uploader,
2677                 'title': video_title,
2678                 'age_limit': age_limit,
2679             }
2680
2681             if RtmpIE.suitable(video_url):
2682                 entry_info_dict.update({
2683                     '_type': 'url_transparent',
2684                     'ie_key': RtmpIE.ie_key(),
2685                     'url': video_url,
2686                 })
2687                 entries.append(entry_info_dict)
2688                 continue
2689
2690             ext = determine_ext(video_url)
2691             if ext == 'smil':
2692                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2693             elif ext == 'xspf':
2694                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2695             elif ext == 'm3u8':
2696                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2697             elif ext == 'mpd':
2698                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2699             elif ext == 'f4m':
2700                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2701             elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
2702                 # Just matching .ism/manifest is not enough to be reliably sure
2703                 # whether it's actually an ISM manifest or some other streaming
2704                 # manifest since there are various streaming URL formats
2705                 # possible (see [1]) as well as some other shenanigans like
2706                 # .smil/manifest URLs that actually serve an ISM (see [2]) and
2707                 # so on.
2708                 # Thus the most reasonable way to solve this is to delegate
2709                 # to generic extractor in order to look into the contents of
2710                 # the manifest itself.
2711                 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
2712                 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
2713                 entry_info_dict = self.url_result(
2714                     smuggle_url(video_url, {'to_generic': True}),
2715                     GenericIE.ie_key())
2716             else:
2717                 entry_info_dict['url'] = video_url
2718
2719             if entry_info_dict.get('formats'):
2720                 self._sort_formats(entry_info_dict['formats'])
2721
2722             entries.append(entry_info_dict)
2723
2724         if len(entries) == 1:
2725             return entries[0]
2726         else:
2727             for num, e in enumerate(entries, start=1):
2728                 # 'url' results don't have a title
2729                 if e.get('title') is not None:
2730                     e['title'] = '%s (%d)' % (e['title'], num)
2731             return {
2732                 '_type': 'playlist',
2733                 'entries': entries,
2734             }