_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7 import sys
   8
   9 from .common import InfoExtractor
  10 from .youtube import YoutubeIE
  11 from ..compat import (
  12     compat_etree_fromstring,
  13     compat_urllib_parse_unquote,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     js_to_json,
  24     orderedSet,
  25     sanitized_Request,
  26     smuggle_url,
  27     unescapeHTML,
  28     unified_strdate,
  29     unsmuggle_url,
  30     UnsupportedError,
  31     xpath_text,
  32 )
  33 from .commonprotocols import RtmpIE
  34 from .brightcove import (
  35     BrightcoveLegacyIE,
  36     BrightcoveNewIE,
  37 )
  38 from .nbc import NBCSportsVPlayerIE
  39 from .ooyala import OoyalaIE
  40 from .rutv import RUTVIE
  41 from .tvc import TVCIE
  42 from .sportbox import SportBoxEmbedIE
  43 from .smotri import SmotriIE
  44 from .myvi import MyviIE
  45 from .condenast import CondeNastIE
  46 from .udn import UDNEmbedIE
  47 from .senateisvp import SenateISVPIE
  48 from .svt import SVTIE
  49 from .pornhub import PornHubIE
  50 from .xhamster import XHamsterEmbedIE
  51 from .tnaflix import TNAFlixNetworkEmbedIE
  52 from .drtuber import DrTuberIE
  53 from .redtube import RedTubeIE
  54 from .vimeo import VimeoIE
  55 from .dailymotion import (
  56     DailymotionIE,
  57     DailymotionCloudIE,
  58 )
  59 from .onionstudios import OnionStudiosIE
  60 from .viewlift import ViewLiftEmbedIE
  61 from .mtv import MTVServicesEmbeddedIE
  62 from .pladform import PladformIE
  63 from .videomore import VideomoreIE
  64 from .webcaster import WebcasterFeedIE
  65 from .googledrive import GoogleDriveIE
  66 from .jwplatform import JWPlatformIE
  67 from .digiteka import DigitekaIE
  68 from .arkena import ArkenaIE
  69 from .instagram import InstagramIE
  70 from .liveleak import LiveLeakIE
  71 from .threeqsdn import ThreeQSDNIE
  72 from .theplatform import ThePlatformIE
  73 from .vessel import VesselIE
  74 from .kaltura import KalturaIE
  75 from .eagleplatform import EaglePlatformIE
  76 from .facebook import FacebookIE
  77 from .soundcloud import SoundcloudIE
  78 from .tunein import TuneInBaseIE
  79 from .vbox7 import Vbox7IE
  80 from .dbtv import DBTVIE
  81 from .piksel import PikselIE
  82 from .videa import VideaIE
  83 from .twentymin import TwentyMinutenIE
  84 from .ustream import UstreamIE
  85 from .openload import OpenloadIE
  86 from .videopress import VideoPressIE
  87 from .rutube import RutubeIE
  88
  89
  90 class GenericIE(InfoExtractor):
  91     IE_DESC = 'Generic downloader that works on some sites'
  92     _VALID_URL = r'.*'
  93     IE_NAME = 'generic'
  94     _TESTS = [
  95         # Direct link to a video
  96         {
  97             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  98             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  99             'info_dict': {
 100                 'id': 'trailer',
 101                 'ext': 'mp4',
 102                 'title': 'trailer',
 103                 'upload_date': '20100513',
 104             }
 105         },
 106         # Direct link to media delivered compressed (until Accept-Encoding is *)
 107         {
 108             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
 109             'md5': '128c42e68b13950268b648275386fc74',
 110             'info_dict': {
 111                 'id': 'FictionJunction-Parallel_Hearts',
 112                 'ext': 'flac',
 113                 'title': 'FictionJunction-Parallel_Hearts',
 114                 'upload_date': '20140522',
 115             },
 116             'expected_warnings': [
 117                 'URL could be a direct video link, returning it as such.'
 118             ],
 119             'skip': 'URL invalid',
 120         },
 121         # Direct download with broken HEAD
 122         {
 123             'url': 'http://ai-radio.org:8000/radio.opus',
 124             'info_dict': {
 125                 'id': 'radio',
 126                 'ext': 'opus',
 127                 'title': 'radio',
 128             },
 129             'params': {
 130                 'skip_download': True,  # infinite live stream
 131             },
 132             'expected_warnings': [
 133                 r'501.*Not Implemented',
 134                 r'400.*Bad Request',
 135             ],
 136         },
 137         # Direct link with incorrect MIME type
 138         {
 139             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 140             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 141             'info_dict': {
 142                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 143                 'id': '5_Lennart_Poettering_-_Systemd',
 144                 'ext': 'webm',
 145                 'title': '5_Lennart_Poettering_-_Systemd',
 146                 'upload_date': '20141120',
 147             },
 148             'expected_warnings': [
 149                 'URL could be a direct video link, returning it as such.'
 150             ]
 151         },
 152         # RSS feed
 153         {
 154             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 155             'info_dict': {
 156                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 157                 'title': 'Zero Punctuation',
 158                 'description': 're:.*groundbreaking video review series.*'
 159             },
 160             'playlist_mincount': 11,
 161         },
 162         # RSS feed with enclosure
 163         {
 164             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 165             'info_dict': {
 166                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 167                 'ext': 'm4v',
 168                 'upload_date': '20150228',
 169                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 170             }
 171         },
 172         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 173         {
 174             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 175             'info_dict': {
 176                 'id': 'smil',
 177                 'ext': 'mp4',
 178                 'title': 'Automatics, robotics and biocybernetics',
 179                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
 180                 'upload_date': '20130627',
 181                 'formats': 'mincount:16',
 182                 'subtitles': 'mincount:1',
 183             },
 184             'params': {
 185                 'force_generic_extractor': True,
 186                 'skip_download': True,
 187             },
 188         },
 189         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 190         {
 191             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 192             'info_dict': {
 193                 'id': 'hds',
 194                 'ext': 'flv',
 195                 'title': 'hds',
 196                 'formats': 'mincount:1',
 197             },
 198             'params': {
 199                 'skip_download': True,
 200             },
 201         },
 202         # SMIL from https://www.restudy.dk/video/play/id/1637
 203         {
 204             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 205             'info_dict': {
 206                 'id': 'video_1637',
 207                 'ext': 'flv',
 208                 'title': 'video_1637',
 209                 'formats': 'mincount:3',
 210             },
 211             'params': {
 212                 'skip_download': True,
 213             },
 214         },
 215         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 216         {
 217             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 218             'info_dict': {
 219                 'id': 'smil-service',
 220                 'ext': 'flv',
 221                 'title': 'smil-service',
 222                 'formats': 'mincount:1',
 223             },
 224             'params': {
 225                 'skip_download': True,
 226             },
 227         },
 228         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 229         {
 230             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 231             'info_dict': {
 232                 'id': '4719370',
 233                 'ext': 'mp4',
 234                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 235                 'formats': 'mincount:3',
 236             },
 237             'params': {
 238                 'skip_download': True,
 239             },
 240         },
 241         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 242         {
 243             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 244             'info_dict': {
 245                 'id': 'mZlp2ctYIUEB',
 246                 'ext': 'mp4',
 247                 'title': 'Tikibad ontruimd wegens brand',
 248                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 249                 'thumbnail': r're:^https?://.*\.jpg$',
 250                 'duration': 33,
 251             },
 252             'params': {
 253                 'skip_download': True,
 254             },
 255         },
 256         # MPD from http://dash-mse-test.appspot.com/media.html
 257         {
 258             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
 259             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
 260             'info_dict': {
 261                 'id': 'car-20120827-manifest',
 262                 'ext': 'mp4',
 263                 'title': 'car-20120827-manifest',
 264                 'formats': 'mincount:9',
 265                 'upload_date': '20130904',
 266             },
 267             'params': {
 268                 'format': 'bestvideo',
 269             },
 270         },
 271         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
 272         {
 273             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
 274             'info_dict': {
 275                 'id': 'content',
 276                 'ext': 'mp4',
 277                 'title': 'content',
 278                 'formats': 'mincount:8',
 279             },
 280             'params': {
 281                 # m3u8 downloads
 282                 'skip_download': True,
 283             },
 284             'skip': 'video gone',
 285         },
 286         # m3u8 served with Content-Type: text/plain
 287         {
 288             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
 289             'info_dict': {
 290                 'id': 'index',
 291                 'ext': 'mp4',
 292                 'title': 'index',
 293                 'upload_date': '20140720',
 294                 'formats': 'mincount:11',
 295             },
 296             'params': {
 297                 # m3u8 downloads
 298                 'skip_download': True,
 299             },
 300             'skip': 'video gone',
 301         },
 302         # google redirect
 303         {
 304             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 305             'info_dict': {
 306                 'id': 'cmQHVoWB5FY',
 307                 'ext': 'mp4',
 308                 'upload_date': '20130224',
 309                 'uploader_id': 'TheVerge',
 310                 'description': r're:^Chris Ziegler takes a look at the\.*',
 311                 'uploader': 'The Verge',
 312                 'title': 'First Firefox OS phones side-by-side',
 313             },
 314             'params': {
 315                 'skip_download': False,
 316             }
 317         },
 318         {
 319             # redirect in Refresh HTTP header
 320             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
 321             'info_dict': {
 322                 'id': 'pO8h3EaFRdo',
 323                 'ext': 'mp4',
 324                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 325                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
 326                 'upload_date': '20150917',
 327                 'uploader_id': 'brtvofficial',
 328                 'uploader': 'Boiler Room',
 329             },
 330             'params': {
 331                 'skip_download': False,
 332             },
 333         },
 334         {
 335             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 336             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 337             'info_dict': {
 338                 'id': '13601338388002',
 339                 'ext': 'mp4',
 340                 'uploader': 'www.hodiho.fr',
 341                 'title': 'R\u00e9gis plante sa Jeep',
 342             }
 343         },
 344         # bandcamp page with custom domain
 345         {
 346             'add_ie': ['Bandcamp'],
 347             'url': 'http://bronyrock.com/track/the-pony-mash',
 348             'info_dict': {
 349                 'id': '3235767654',
 350                 'ext': 'mp3',
 351                 'title': 'The Pony Mash',
 352                 'uploader': 'M_Pallante',
 353             },
 354             'skip': 'There is a limit of 200 free downloads / month for the test song',
 355         },
 356         {
 357             # embedded brightcove video
 358             # it also tests brightcove videos that need to set the 'Referer'
 359             # in the http requests
 360             'add_ie': ['BrightcoveLegacy'],
 361             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 362             'info_dict': {
 363                 'id': '2765128793001',
 364                 'ext': 'mp4',
 365                 'title': 'Le cours de bourse : l’analyse technique',
 366                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 367                 'uploader': 'BFM BUSINESS',
 368             },
 369             'params': {
 370                 'skip_download': True,
 371             },
 372         },
 373         {
 374             # embedded with itemprop embedURL and video id spelled as `idVideo`
 375             'add_id': ['BrightcoveLegacy'],
 376             'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
 377             'info_dict': {
 378                 'id': '5255628253001',
 379                 'ext': 'mp4',
 380                 'title': 'md5:37c519b1128915607601e75a87995fc0',
 381                 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
 382                 'uploader': 'BFM BUSINESS',
 383                 'uploader_id': '876450612001',
 384                 'timestamp': 1482255315,
 385                 'upload_date': '20161220',
 386             },
 387             'params': {
 388                 'skip_download': True,
 389             },
 390         },
 391         {
 392             # https://github.com/rg3/youtube-dl/issues/2253
 393             'url': 'http://bcove.me/i6nfkrc3',
 394             'md5': '0ba9446db037002366bab3b3eb30c88c',
 395             'info_dict': {
 396                 'id': '3101154703001',
 397                 'ext': 'mp4',
 398                 'title': 'Still no power',
 399                 'uploader': 'thestar.com',
 400                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 401             },
 402             'add_ie': ['BrightcoveLegacy'],
 403             'skip': 'video gone',
 404         },
 405         {
 406             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 407             'md5': 'fb973ecf6e4a78a67453647444222983',
 408             'info_dict': {
 409                 'id': '3414141473001',
 410                 'ext': 'mp4',
 411                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 412                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 413                 'uploader': 'Championat',
 414             },
 415         },
 416         {
 417             # https://github.com/rg3/youtube-dl/issues/3541
 418             'add_ie': ['BrightcoveLegacy'],
 419             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 420             'info_dict': {
 421                 'id': '3866516442001',
 422                 'ext': 'mp4',
 423                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 424                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 425                 'uploader': 'SBS Broadcasting',
 426             },
 427             'skip': 'Restricted to Netherlands',
 428             'params': {
 429                 'skip_download': True,  # m3u8 download
 430             },
 431         },
 432         {
 433             # Brightcove with alternative playerID key
 434             'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
 435             'info_dict': {
 436                 'id': 'nmeth.2062_SV1',
 437                 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
 438             },
 439             'playlist': [{
 440                 'info_dict': {
 441                     'id': '2228375078001',
 442                     'ext': 'mp4',
 443                     'title': 'nmeth.2062-sv1',
 444                     'description': 'nmeth.2062-sv1',
 445                     'timestamp': 1363357591,
 446                     'upload_date': '20130315',
 447                     'uploader': 'Nature Publishing Group',
 448                     'uploader_id': '1964492299001',
 449                 },
 450             }],
 451         },
 452         {
 453             # Brightcove with UUID in videoPlayer
 454             'url': 'http://www8.hp.com/cn/zh/home.html',
 455             'info_dict': {
 456                 'id': '5255815316001',
 457                 'ext': 'mp4',
 458                 'title': 'Sprocket Video - China',
 459                 'description': 'Sprocket Video - China',
 460                 'uploader': 'HP-Video Gallery',
 461                 'timestamp': 1482263210,
 462                 'upload_date': '20161220',
 463                 'uploader_id': '1107601872001',
 464             },
 465             'params': {
 466                 'skip_download': True,  # m3u8 download
 467             },
 468         },
 469         # ooyala video
 470         {
 471             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 472             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 473             'info_dict': {
 474                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 475                 'ext': 'mp4',
 476                 'title': '2cc213299525360.mov',  # that's what we get
 477                 'duration': 238.231,
 478             },
 479             'add_ie': ['Ooyala'],
 480         },
 481         {
 482             # ooyala video embedded with http://player.ooyala.com/iframe.js
 483             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 484             'info_dict': {
 485                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 486                 'ext': 'mp4',
 487                 'title': '"Steve Jobs: Man in the Machine" trailer',
 488                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
 489                 'duration': 135.427,
 490             },
 491             'params': {
 492                 'skip_download': True,
 493             },
 494             'skip': 'movie expired',
 495         },
 496         # embed.ly video
 497         {
 498             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 499             'info_dict': {
 500                 'id': '9ODmcdjQcHQ',
 501                 'ext': 'mp4',
 502                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 503                 'upload_date': '20140225',
 504                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 505                 'uploader': 'Tested',
 506                 'uploader_id': 'testedcom',
 507             },
 508             # No need to test YoutubeIE here
 509             'params': {
 510                 'skip_download': True,
 511             },
 512         },
 513         # funnyordie embed
 514         {
 515             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 516             'info_dict': {
 517                 'id': '18e820ec3f',
 518                 'ext': 'mp4',
 519                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 520                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 521             },
 522             # HEAD requests lead to endless 301, while GET is OK
 523             'expected_warnings': ['301'],
 524         },
 525         # RUTV embed
 526         {
 527             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 528             'info_dict': {
 529                 'id': '776940',
 530                 'ext': 'mp4',
 531                 'title': 'Охотское море стало целиком российским',
 532                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 533             },
 534             'params': {
 535                 # m3u8 download
 536                 'skip_download': True,
 537             },
 538         },
 539         # TVC embed
 540         {
 541             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 542             'info_dict': {
 543                 'id': '55304',
 544                 'ext': 'mp4',
 545                 'title': 'Дошкольное воспитание',
 546             },
 547         },
 548         # SportBox embed
 549         {
 550             'url': 'http://www.vestifinance.ru/articles/25753',
 551             'info_dict': {
 552                 'id': '25753',
 553                 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 554             },
 555             'playlist': [{
 556                 'info_dict': {
 557                     'id': '370908',
 558                     'title': 'Госзаказ. День 3',
 559                     'ext': 'mp4',
 560                 }
 561             }, {
 562                 'info_dict': {
 563                     'id': '370905',
 564                     'title': 'Госзаказ. День 2',
 565                     'ext': 'mp4',
 566                 }
 567             }, {
 568                 'info_dict': {
 569                     'id': '370902',
 570                     'title': 'Госзаказ. День 1',
 571                     'ext': 'mp4',
 572                 }
 573             }],
 574             'params': {
 575                 # m3u8 download
 576                 'skip_download': True,
 577             },
 578         },
 579         # Myvi.ru embed
 580         {
 581             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 582             'info_dict': {
 583                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 584                 'ext': 'mp4',
 585                 'title': 'Ужастики, русский трейлер (2015)',
 586                 'thumbnail': r're:^https?://.*\.jpg$',
 587                 'duration': 153,
 588             }
 589         },
 590         # XHamster embed
 591         {
 592             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 593             'info_dict': {
 594                 'id': 'showthread',
 595                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 596             },
 597             'playlist_mincount': 7,
 598             # This forum does not allow <iframe> syntaxes anymore
 599             # Now HTML tags are displayed as-is
 600             'skip': 'No videos on this page',
 601         },
 602         # Embedded TED video
 603         {
 604             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 605             'md5': '65fdff94098e4a607385a60c5177c638',
 606             'info_dict': {
 607                 'id': '1969',
 608                 'ext': 'mp4',
 609                 'title': 'Hidden miracles of the natural world',
 610                 'uploader': 'Louie Schwartzberg',
 611                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 612             }
 613         },
 614         # nowvideo embed hidden behind percent encoding
 615         {
 616             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 617             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 618             'info_dict': {
 619                 'id': '06e53103ca9aa',
 620                 'ext': 'flv',
 621                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 622                 'description': 'No description',
 623             },
 624         },
 625         # arte embed
 626         {
 627             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 628             'md5': '7653032cbb25bf6c80d80f217055fa43',
 629             'info_dict': {
 630                 'id': '048195-004_PLUS7-F',
 631                 'ext': 'flv',
 632                 'title': 'X:enius',
 633                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 634                 'upload_date': '20140320',
 635             },
 636             'params': {
 637                 'skip_download': 'Requires rtmpdump'
 638             },
 639             'skip': 'video gone',
 640         },
 641         # francetv embed
 642         {
 643             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 644             'info_dict': {
 645                 'id': 'EV_30231',
 646                 'ext': 'mp4',
 647                 'title': 'Alcaline, le concert avec Calogero',
 648                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 649                 'upload_date': '20150226',
 650                 'timestamp': 1424989860,
 651                 'duration': 5400,
 652             },
 653             'params': {
 654                 # m3u8 downloads
 655                 'skip_download': True,
 656             },
 657             'expected_warnings': [
 658                 'Forbidden'
 659             ]
 660         },
 661         # Condé Nast embed
 662         {
 663             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 664             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 665             'info_dict': {
 666                 'id': '53501be369702d3275860000',
 667                 'ext': 'mp4',
 668                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 669             }
 670         },
 671         # Dailymotion embed
 672         {
 673             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 674             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 675             'info_dict': {
 676                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 677                 'ext': 'mp4',
 678                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 679                 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
 680                 'uploader': 'Spi0n',
 681                 'uploader_id': 'xgditw',
 682                 'upload_date': '20140425',
 683                 'timestamp': 1398441542,
 684             },
 685             'add_ie': ['Dailymotion'],
 686         },
 687         # YouTube embed
 688         {
 689             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 690             'info_dict': {
 691                 'id': 'FXRb4ykk4S0',
 692                 'ext': 'mp4',
 693                 'title': 'The NBL Auction 2014',
 694                 'uploader': 'BADMINTON England',
 695                 'uploader_id': 'BADMINTONEvents',
 696                 'upload_date': '20140603',
 697                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 698             },
 699             'add_ie': ['Youtube'],
 700             'params': {
 701                 'skip_download': True,
 702             }
 703         },
 704         # MTVSercices embed
 705         {
 706             'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
 707             'md5': 'ca1aef97695ef2c1d6973256a57e5252',
 708             'info_dict': {
 709                 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
 710                 'ext': 'mp4',
 711                 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
 712                 'description': 'Two valets share their love for movie star Liam Neesons.',
 713                 'timestamp': 1349922600,
 714                 'upload_date': '20121011',
 715             },
 716         },
 717         # YouTube embed via <data-embed-url="">
 718         {
 719             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 720             'info_dict': {
 721                 'id': '4vAffPZIT44',
 722                 'ext': 'mp4',
 723                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 724                 'uploader': 'Gameloft',
 725                 'uploader_id': 'gameloft',
 726                 'upload_date': '20140828',
 727                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 728             },
 729             'params': {
 730                 'skip_download': True,
 731             }
 732         },
 733         # YouTube <object> embed
 734         {
 735             'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
 736             'md5': '516718101ec834f74318df76259fb3cc',
 737             'info_dict': {
 738                 'id': 'msN87y-iEx0',
 739                 'ext': 'webm',
 740                 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
 741                 'upload_date': '20080526',
 742                 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
 743                 'uploader': 'Christopher Sykes',
 744                 'uploader_id': 'ChristopherJSykes',
 745             },
 746             'add_ie': ['Youtube'],
 747         },
 748         # Camtasia studio
 749         {
 750             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 751             'playlist': [{
 752                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 753                 'info_dict': {
 754                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 755                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 756                     'ext': 'flv',
 757                     'duration': 2235.90,
 758                 }
 759             }, {
 760                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 761                 'info_dict': {
 762                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 763                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 764                     'ext': 'flv',
 765                     'duration': 2235.93,
 766                 }
 767             }],
 768             'info_dict': {
 769                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 770             }
 771         },
 772         # Flowplayer
 773         {
 774             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 775             'md5': '9d65602bf31c6e20014319c7d07fba27',
 776             'info_dict': {
 777                 'id': '5123ea6d5e5a7',
 778                 'ext': 'mp4',
 779                 'age_limit': 18,
 780                 'uploader': 'www.handjobhub.com',
 781                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 782             }
 783         },
 784         # Multiple brightcove videos
 785         # https://github.com/rg3/youtube-dl/issues/2283
 786         {
 787             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 788             'info_dict': {
 789                 'id': 'always-never',
 790                 'title': 'Always / Never - The New Yorker',
 791             },
 792             'playlist_count': 3,
 793             'params': {
 794                 'extract_flat': False,
 795                 'skip_download': True,
 796             }
 797         },
 798         # MLB embed
 799         {
 800             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 801             'md5': '96f09a37e44da40dd083e12d9a683327',
 802             'info_dict': {
 803                 'id': '33322633',
 804                 'ext': 'mp4',
 805                 'title': 'Ump changes call to ball',
 806                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 807                 'duration': 48,
 808                 'timestamp': 1401537900,
 809                 'upload_date': '20140531',
 810                 'thumbnail': r're:^https?://.*\.jpg$',
 811             },
 812         },
 813         # Wistia embed
 814         {
 815             'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 816             'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
 817             'info_dict': {
 818                 'id': '6e2wtrbdaf',
 819                 'ext': 'mov',
 820                 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
 821                 'description': 'a Paywall Videos video from Remilon',
 822                 'duration': 644.072,
 823                 'uploader': 'study.com',
 824                 'timestamp': 1459678540,
 825                 'upload_date': '20160403',
 826                 'filesize': 24687186,
 827             },
 828         },
 829         {
 830             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 831             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 832             'info_dict': {
 833                 'id': 'uxjb0lwrcz',
 834                 'ext': 'mp4',
 835                 'title': 'Conversation about Hexagonal Rails Part 1',
 836                 'description': 'a Martin Fowler video from ThoughtWorks',
 837                 'duration': 1715.0,
 838                 'uploader': 'thoughtworks.wistia.com',
 839                 'timestamp': 1401832161,
 840                 'upload_date': '20140603',
 841             },
 842         },
 843         # Wistia standard embed (async)
 844         {
 845             'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
 846             'info_dict': {
 847                 'id': '807fafadvk',
 848                 'ext': 'mp4',
 849                 'title': 'Drip Brennan Dunn Workshop',
 850                 'description': 'a JV Webinars video from getdrip-1',
 851                 'duration': 4986.95,
 852                 'timestamp': 1463607249,
 853                 'upload_date': '20160518',
 854             },
 855             'params': {
 856                 'skip_download': True,
 857             }
 858         },
 859         # Soundcloud embed
 860         {
 861             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 862             'info_dict': {
 863                 'id': '174391317',
 864                 'ext': 'mp3',
 865                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 866                 'uploader': 'Sophos Security',
 867                 'title': 'Chet Chat 171 - Oct 29, 2014',
 868                 'upload_date': '20141029',
 869             }
 870         },
 871         # Soundcloud multiple embeds
 872         {
 873             'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
 874             'info_dict': {
 875                 'id': '52809',
 876                 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance  | TAB + AUDIO',
 877             },
 878             'playlist_mincount': 7,
 879         },
 880         # TuneIn station embed
 881         {
 882             'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
 883             'info_dict': {
 884                 'id': '204146',
 885                 'ext': 'mp3',
 886                 'title': 'CNRV',
 887                 'location': 'Paris, France',
 888                 'is_live': True,
 889             },
 890             'params': {
 891                 # Live stream
 892                 'skip_download': True,
 893             },
 894         },
 895         # Livestream embed
 896         {
 897             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 898             'info_dict': {
 899                 'id': '67864563',
 900                 'ext': 'flv',
 901                 'upload_date': '20141112',
 902                 'title': 'Rosetta #CometLanding webcast HL 10',
 903             }
 904         },
 905         # Another Livestream embed, without 'new.' in URL
 906         {
 907             'url': 'https://www.freespeech.org/',
 908             'info_dict': {
 909                 'id': '123537347',
 910                 'ext': 'mp4',
 911                 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 912             },
 913             'params': {
 914                 # Live stream
 915                 'skip_download': True,
 916             },
 917         },
 918         # LazyYT
 919         {
 920             'url': 'https://skiplagged.com/',
 921             'info_dict': {
 922                 'id': 'skiplagged',
 923                 'title': 'Skiplagged: The smart way to find cheap flights',
 924             },
 925             'playlist_mincount': 1,
 926             'add_ie': ['Youtube'],
 927         },
 928         # Cinchcast embed
 929         {
 930             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 931             'info_dict': {
 932                 'id': '7141703',
 933                 'ext': 'mp3',
 934                 'upload_date': '20141126',
 935                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 936             }
 937         },
 938         # Cinerama player
 939         {
 940             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 941             'info_dict': {
 942                 'id': '730m_DandD_1901_512k',
 943                 'ext': 'mp4',
 944                 'uploader': 'www.abc.net.au',
 945                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 946             }
 947         },
 948         # embedded viddler video
 949         {
 950             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 951             'info_dict': {
 952                 'id': '4d03aad9',
 953                 'ext': 'mp4',
 954                 'uploader': 'deadspin',
 955                 'title': 'WALL-TO-GORTAT',
 956                 'timestamp': 1422285291,
 957                 'upload_date': '20150126',
 958             },
 959             'add_ie': ['Viddler'],
 960         },
 961         # Libsyn embed
 962         {
 963             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 964             'info_dict': {
 965                 'id': '3377616',
 966                 'ext': 'mp3',
 967                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 968                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 969                 'upload_date': '20150220',
 970             },
 971             'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
 972         },
 973         # jwplayer YouTube
 974         {
 975             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 976             'info_dict': {
 977                 'id': 'Mrj4DVp2zeA',
 978                 'ext': 'mp4',
 979                 'upload_date': '20150212',
 980                 'uploader': 'The National Archives UK',
 981                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 982                 'uploader_id': 'NationalArchives08',
 983                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 984             },
 985         },
 986         # jwplayer rtmp
 987         {
 988             'url': 'http://www.suffolk.edu/sjc/',
 989             'info_dict': {
 990                 'id': 'sjclive',
 991                 'ext': 'flv',
 992                 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
 993                 'uploader': 'www.suffolk.edu',
 994             },
 995             'params': {
 996                 'skip_download': True,
 997             }
 998         },
 999         # Complex jwplayer
1000         {
1001             'url': 'http://www.indiedb.com/games/king-machine/videos',
1002             'info_dict': {
1003                 'id': 'videos',
1004                 'ext': 'mp4',
1005                 'title': 'king machine trailer 1',
1006                 'thumbnail': r're:^https?://.*\.jpg$',
1007             },
1008         },
1009         {
1010             # JWPlayer config passed as variable
1011             'url': 'http://www.txxx.com/videos/3326530/ariele/',
1012             'info_dict': {
1013                 'id': '3326530_hq',
1014                 'ext': 'mp4',
1015                 'title': 'ARIELE | Tube Cup',
1016                 'uploader': 'www.txxx.com',
1017                 'age_limit': 18,
1018             },
1019             'params': {
1020                 'skip_download': True,
1021             }
1022         },
1023         # rtl.nl embed
1024         {
1025             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
1026             'playlist_mincount': 5,
1027             'info_dict': {
1028                 'id': 'aanslagen-kopenhagen',
1029                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
1030             }
1031         },
1032         # Zapiks embed
1033         {
1034             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
1035             'info_dict': {
1036                 'id': '118046',
1037                 'ext': 'mp4',
1038                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
1039             }
1040         },
1041         # Kaltura embed (different embed code)
1042         {
1043             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
1044             'info_dict': {
1045                 'id': '1_a52wc67y',
1046                 'ext': 'flv',
1047                 'upload_date': '20150127',
1048                 'uploader_id': 'PremierMedia',
1049                 'timestamp': int,
1050                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
1051             },
1052         },
1053         # Kaltura embed with single quotes
1054         {
1055             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
1056             'info_dict': {
1057                 'id': '0_izeg5utt',
1058                 'ext': 'mp4',
1059                 'title': '35871',
1060                 'timestamp': 1355743100,
1061                 'upload_date': '20121217',
1062                 'uploader_id': 'batchUser',
1063             },
1064             'add_ie': ['Kaltura'],
1065         },
1066         {
1067             # Kaltura embedded via quoted entry_id
1068             'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1069             'info_dict': {
1070                 'id': '0_utuok90b',
1071                 'ext': 'mp4',
1072                 'title': '06_matthew_brender_raj_dutt',
1073                 'timestamp': 1466638791,
1074                 'upload_date': '20160622',
1075             },
1076             'add_ie': ['Kaltura'],
1077             'expected_warnings': [
1078                 'Could not send HEAD request'
1079             ],
1080             'params': {
1081                 'skip_download': True,
1082             }
1083         },
1084         {
1085             # Kaltura embedded, some fileExt broken (#11480)
1086             'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1087             'info_dict': {
1088                 'id': '1_sgtvehim',
1089                 'ext': 'mp4',
1090                 'title': 'Our "Standard Models" of particle physics and cosmology',
1091                 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1092                 'timestamp': 1321158993,
1093                 'upload_date': '20111113',
1094                 'uploader_id': 'kps1',
1095             },
1096             'add_ie': ['Kaltura'],
1097         },
1098         {
1099             # Kaltura iframe embed
1100             'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
1101             'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
1102             'info_dict': {
1103                 'id': '0_f2cfbpwy',
1104                 'ext': 'mp4',
1105                 'title': 'I. M. Pei: A Centennial Celebration',
1106                 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
1107                 'upload_date': '20170403',
1108                 'uploader_id': 'batchUser',
1109                 'timestamp': 1491232186,
1110             },
1111             'add_ie': ['Kaltura'],
1112         },
1113         # Eagle.Platform embed (generic URL)
1114         {
1115             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
1116             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1117             'info_dict': {
1118                 'id': '227304',
1119                 'ext': 'mp4',
1120                 'title': 'Навальный вышел на свободу',
1121                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
1122                 'thumbnail': r're:^https?://.*\.jpg$',
1123                 'duration': 87,
1124                 'view_count': int,
1125                 'age_limit': 0,
1126             },
1127         },
1128         # ClipYou (Eagle.Platform) embed (custom URL)
1129         {
1130             'url': 'http://muz-tv.ru/play/7129/',
1131             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1132             'info_dict': {
1133                 'id': '12820',
1134                 'ext': 'mp4',
1135                 'title': "'O Sole Mio",
1136                 'thumbnail': r're:^https?://.*\.jpg$',
1137                 'duration': 216,
1138                 'view_count': int,
1139             },
1140         },
1141         # Pladform embed
1142         {
1143             'url': 'http://muz-tv.ru/kinozal/view/7400/',
1144             'info_dict': {
1145                 'id': '100183293',
1146                 'ext': 'mp4',
1147                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
1148                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
1149                 'thumbnail': r're:^https?://.*\.jpg$',
1150                 'duration': 694,
1151                 'age_limit': 0,
1152             },
1153         },
1154         # Playwire embed
1155         {
1156             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1157             'info_dict': {
1158                 'id': '3519514',
1159                 'ext': 'mp4',
1160                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
1161                 'thumbnail': r're:^https?://.*\.png$',
1162                 'duration': 45.115,
1163             },
1164         },
1165         # 5min embed
1166         {
1167             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1168             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1169             'info_dict': {
1170                 'id': '518726732',
1171                 'ext': 'mp4',
1172                 'title': 'Facebook Creates "On This Day" | Crunch Report',
1173             },
1174         },
1175         # SVT embed
1176         {
1177             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
1178             'info_dict': {
1179                 'id': '2900353',
1180                 'ext': 'flv',
1181                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
1182                 'duration': 27,
1183                 'age_limit': 0,
1184             },
1185         },
1186         # Crooks and Liars embed
1187         {
1188             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1189             'info_dict': {
1190                 'id': '8RUoRhRi',
1191                 'ext': 'mp4',
1192                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1193                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1194                 'timestamp': 1428207000,
1195                 'upload_date': '20150405',
1196                 'uploader': 'Heather',
1197             },
1198         },
1199         # Crooks and Liars external embed
1200         {
1201             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1202             'info_dict': {
1203                 'id': 'MTE3MjUtMzQ2MzA',
1204                 'ext': 'mp4',
1205                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1206                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1207                 'timestamp': 1265032391,
1208                 'upload_date': '20100201',
1209                 'uploader': 'Heather',
1210             },
1211         },
1212         # NBC Sports vplayer embed
1213         {
1214             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1215             'info_dict': {
1216                 'id': 'ln7x1qSThw4k',
1217                 'ext': 'flv',
1218                 'title': "PFT Live: New leader in the 'new-look' defense",
1219                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1220                 'uploader': 'NBCU-SPORTS',
1221                 'upload_date': '20140107',
1222                 'timestamp': 1389118457,
1223             },
1224         },
1225         # NBC News embed
1226         {
1227             'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1228             'md5': '1aa589c675898ae6d37a17913cf68d66',
1229             'info_dict': {
1230                 'id': '701714499682',
1231                 'ext': 'mp4',
1232                 'title': 'PREVIEW: On Assignment: David Letterman',
1233                 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
1234             },
1235         },
1236         # UDN embed
1237         {
1238             'url': 'https://video.udn.com/news/300346',
1239             'md5': 'fd2060e988c326991037b9aff9df21a6',
1240             'info_dict': {
1241                 'id': '300346',
1242                 'ext': 'mp4',
1243                 'title': '中一中男師變性 全校師生力挺',
1244                 'thumbnail': r're:^https?://.*\.jpg$',
1245             },
1246             'params': {
1247                 # m3u8 download
1248                 'skip_download': True,
1249             },
1250         },
1251         # Ooyala embed
1252         {
1253             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1254             'info_dict': {
1255                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1256                 'ext': 'mp4',
1257                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1258                 'title': 'This is what separates the Excel masters from the wannabes',
1259                 'duration': 191.933,
1260             },
1261             'params': {
1262                 # m3u8 downloads
1263                 'skip_download': True,
1264             }
1265         },
1266         # Brightcove URL in single quotes
1267         {
1268             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1269             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1270             'info_dict': {
1271                 'id': '4255764656001',
1272                 'ext': 'mp4',
1273                 'title': 'SN Presents: Russell Martin, World Citizen',
1274                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1275                 'uploader': 'Rogers Sportsnet',
1276                 'uploader_id': '1704050871',
1277                 'upload_date': '20150525',
1278                 'timestamp': 1432570283,
1279             },
1280         },
1281         # Dailymotion Cloud video
1282         {
1283             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1284             'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
1285             'info_dict': {
1286                 'id': 'x2uy8t3',
1287                 'ext': 'mp4',
1288                 'title': 'Sauvons les abeilles ! - Le débat',
1289                 'description': 'md5:d9082128b1c5277987825d684939ca26',
1290                 'thumbnail': r're:^https?://.*\.jpe?g$',
1291                 'timestamp': 1434970506,
1292                 'upload_date': '20150622',
1293                 'uploader': 'Public Sénat',
1294                 'uploader_id': 'xa9gza',
1295             }
1296         },
1297         # OnionStudios embed
1298         {
1299             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1300             'info_dict': {
1301                 'id': '2855',
1302                 'ext': 'mp4',
1303                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1304                 'thumbnail': r're:^https?://.*\.jpe?g$',
1305                 'uploader': 'ClickHole',
1306                 'uploader_id': 'clickhole',
1307             }
1308         },
1309         # SnagFilms embed
1310         {
1311             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1312             'info_dict': {
1313                 'id': '74849a00-85a9-11e1-9660-123139220831',
1314                 'ext': 'mp4',
1315                 'title': '#whilewewatch',
1316             }
1317         },
1318         # AdobeTVVideo embed
1319         {
1320             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1321             'md5': '43662b577c018ad707a63766462b1e87',
1322             'info_dict': {
1323                 'id': '2456',
1324                 'ext': 'mp4',
1325                 'title': 'New experience with Acrobat DC',
1326                 'description': 'New experience with Acrobat DC',
1327                 'duration': 248.667,
1328             },
1329         },
1330         # BrightcoveInPageEmbed embed
1331         {
1332             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1333             'info_dict': {
1334                 'id': '4238694884001',
1335                 'ext': 'flv',
1336                 'title': 'Tabletop: Dread, Last Thoughts',
1337                 'description': 'Tabletop: Dread, Last Thoughts',
1338                 'duration': 51690,
1339             },
1340         },
1341         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1342         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1343         {
1344             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1345             'info_dict': {
1346                 'id': '4785848093001',
1347                 'ext': 'mp4',
1348                 'title': 'The Cardinal Pell Interview',
1349                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1350                 'uploader': 'GlobeCast Australia - GlobeStream',
1351                 'uploader_id': '2733773828001',
1352                 'upload_date': '20160304',
1353                 'timestamp': 1457083087,
1354             },
1355             'params': {
1356                 # m3u8 downloads
1357                 'skip_download': True,
1358             },
1359         },
1360         # Another form of arte.tv embed
1361         {
1362             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1363             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1364             'info_dict': {
1365                 'id': '030273-562_PLUS7-F',
1366                 'ext': 'mp4',
1367                 'title': 'ARTE Reportage - Nulle part, en France',
1368                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1369                 'upload_date': '20160409',
1370             },
1371         },
1372         # LiveLeak embed
1373         {
1374             'url': 'http://www.wykop.pl/link/3088787/',
1375             'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1376             'info_dict': {
1377                 'id': '874_1459135191',
1378                 'ext': 'mp4',
1379                 'title': 'Man shows poor quality of new apartment building',
1380                 'description': 'The wall is like a sand pile.',
1381                 'uploader': 'Lake8737',
1382             }
1383         },
1384         # Duplicated embedded video URLs
1385         {
1386             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1387             'info_dict': {
1388                 'id': '149298443_480_16c25b74_2',
1389                 'ext': 'mp4',
1390                 'title': 'vs. Blue Orange Spring Game',
1391                 'uploader': 'www.hudl.com',
1392             },
1393         },
1394         # twitter:player:stream embed
1395         {
1396             'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1397             'info_dict': {
1398                 'id': 'master',
1399                 'ext': 'mp4',
1400                 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1401                 'uploader': 'www.rtl.be',
1402             },
1403             'params': {
1404                 # m3u8 downloads
1405                 'skip_download': True,
1406             },
1407         },
1408         # twitter:player embed
1409         {
1410             'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1411             'md5': 'a3e0df96369831de324f0778e126653c',
1412             'info_dict': {
1413                 'id': '4909620399001',
1414                 'ext': 'mp4',
1415                 'title': 'What Do Black Holes Sound Like?',
1416                 'description': 'what do black holes sound like',
1417                 'upload_date': '20160524',
1418                 'uploader_id': '29913724001',
1419                 'timestamp': 1464107587,
1420                 'uploader': 'TheAtlantic',
1421             },
1422             'add_ie': ['BrightcoveLegacy'],
1423         },
1424         # Facebook <iframe> embed
1425         {
1426             'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
1427             'md5': 'fbcde74f534176ecb015849146dd3aee',
1428             'info_dict': {
1429                 'id': '599637780109885',
1430                 'ext': 'mp4',
1431                 'title': 'Facebook video #599637780109885',
1432             },
1433         },
1434         # Facebook API embed
1435         {
1436             'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
1437             'md5': 'a47372ee61b39a7b90287094d447d94e',
1438             'info_dict': {
1439                 'id': '10153467542406923',
1440                 'ext': 'mp4',
1441                 'title': 'Facebook video #10153467542406923',
1442             },
1443         },
1444         # Wordpress "YouTube Video Importer" plugin
1445         {
1446             'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
1447             'md5': 'd16797741b560b485194eddda8121b48',
1448             'info_dict': {
1449                 'id': 'HNTXWDXV9Is',
1450                 'ext': 'mp4',
1451                 'title': 'Blue Devils Drumline Stanford lot 2016',
1452                 'upload_date': '20160627',
1453                 'uploader_id': 'GENOCIDE8GENERAL10',
1454                 'uploader': 'cylus cyrus',
1455             },
1456         },
1457         {
1458             # video stored on custom kaltura server
1459             'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1460             'md5': '537617d06e64dfed891fa1593c4b30cc',
1461             'info_dict': {
1462                 'id': '0_1iotm5bh',
1463                 'ext': 'mp4',
1464                 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1465                 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1466                 'uploader_id': 'videos.expansion@el-mundo.net',
1467                 'upload_date': '20150429',
1468                 'timestamp': 1430303472,
1469             },
1470             'add_ie': ['Kaltura'],
1471         },
1472         {
1473             # Non-standard Vimeo embed
1474             'url': 'https://openclassrooms.com/courses/understanding-the-web',
1475             'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1476             'info_dict': {
1477                 'id': '148867247',
1478                 'ext': 'mp4',
1479                 'title': 'Understanding the web - Teaser',
1480                 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1481                 'upload_date': '20151214',
1482                 'uploader': 'OpenClassrooms',
1483                 'uploader_id': 'openclassrooms',
1484             },
1485             'add_ie': ['Vimeo'],
1486         },
1487         {
1488             # generic vimeo embed that requires original URL passed as Referer
1489             'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1490             'only_matching': True,
1491         },
1492         {
1493             'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1494             'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1495             'info_dict': {
1496                 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1497                 'ext': 'mp4',
1498                 'title': 'Big Buck Bunny',
1499                 'description': 'Royalty free test video',
1500                 'timestamp': 1432816365,
1501                 'upload_date': '20150528',
1502                 'is_live': False,
1503             },
1504             'params': {
1505                 'skip_download': True,
1506             },
1507             'add_ie': [ArkenaIE.ie_key()],
1508         },
1509         {
1510             'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1511             'info_dict': {
1512                 'id': '1c7141f46c',
1513                 'ext': 'mp4',
1514                 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1515             },
1516             'params': {
1517                 'skip_download': True,
1518             },
1519             'add_ie': [Vbox7IE.ie_key()],
1520         },
1521         {
1522             # DBTV embeds
1523             'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
1524             'info_dict': {
1525                 'id': '43254897',
1526                 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1527             },
1528             'playlist_mincount': 3,
1529         },
1530         {
1531             # Videa embeds
1532             'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1533             'info_dict': {
1534                 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1535                 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1536             },
1537             'playlist_mincount': 2,
1538         },
1539         {
1540             # 20 minuten embed
1541             'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1542             'info_dict': {
1543                 'id': '523629',
1544                 'ext': 'mp4',
1545                 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1546                 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1547             },
1548             'params': {
1549                 'skip_download': True,
1550             },
1551             'add_ie': [TwentyMinutenIE.ie_key()],
1552         },
1553         {
1554             # VideoPress embed
1555             'url': 'https://en.support.wordpress.com/videopress/',
1556             'info_dict': {
1557                 'id': 'OcobLTqC',
1558                 'ext': 'm4v',
1559                 'title': 'IMG_5786',
1560                 'timestamp': 1435711927,
1561                 'upload_date': '20150701',
1562             },
1563             'params': {
1564                 'skip_download': True,
1565             },
1566             'add_ie': [VideoPressIE.ie_key()],
1567         },
1568         {
1569             # Rutube embed
1570             'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
1571             'info_dict': {
1572                 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
1573                 'ext': 'flv',
1574                 'title': 'Магаззино: Казань 2',
1575                 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
1576                 'uploader': 'Магаззино',
1577                 'upload_date': '20170228',
1578                 'uploader_id': '996642',
1579             },
1580             'params': {
1581                 'skip_download': True,
1582             },
1583             'add_ie': [RutubeIE.ie_key()],
1584         },
1585         {
1586             # ThePlatform embedded with whitespaces in URLs
1587             'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
1588             'only_matching': True,
1589         },
1590         {
1591             # Senate ISVP iframe https
1592             'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
1593             'md5': 'fb8c70b0b515e5037981a2492099aab8',
1594             'info_dict': {
1595                 'id': 'govtaff020316',
1596                 'ext': 'mp4',
1597                 'title': 'Integrated Senate Video Player',
1598             },
1599             'add_ie': [SenateISVPIE.ie_key()],
1600         },
1601         # {
1602         #     # TODO: find another test
1603         #     # http://schema.org/VideoObject
1604         #     'url': 'https://flipagram.com/f/nyvTSJMKId',
1605         #     'md5': '888dcf08b7ea671381f00fab74692755',
1606         #     'info_dict': {
1607         #         'id': 'nyvTSJMKId',
1608         #         'ext': 'mp4',
1609         #         'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
1610         #         'description': '#love for cats.',
1611         #         'timestamp': 1461244995,
1612         #         'upload_date': '20160421',
1613         #     },
1614         #     'params': {
1615         #         'force_generic_extractor': True,
1616         #     },
1617         # }
1618     ]
1619
1620     def report_following_redirect(self, new_url):
1621         """Report information extraction."""
1622         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1623
1624     def _extract_rss(self, url, video_id, doc):
1625         playlist_title = doc.find('./channel/title').text
1626         playlist_desc_el = doc.find('./channel/description')
1627         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1628
1629         entries = []
1630         for it in doc.findall('./channel/item'):
1631             next_url = xpath_text(it, 'link', fatal=False)
1632             if not next_url:
1633                 enclosure_nodes = it.findall('./enclosure')
1634                 for e in enclosure_nodes:
1635                     next_url = e.attrib.get('url')
1636                     if next_url:
1637                         break
1638
1639             if not next_url:
1640                 continue
1641
1642             entries.append({
1643                 '_type': 'url',
1644                 'url': next_url,
1645                 'title': it.find('title').text,
1646             })
1647
1648         return {
1649             '_type': 'playlist',
1650             'id': url,
1651             'title': playlist_title,
1652             'description': playlist_desc,
1653             'entries': entries,
1654         }
1655
1656     def _extract_camtasia(self, url, video_id, webpage):
1657         """ Returns None if no camtasia video can be found. """
1658
1659         camtasia_cfg = self._search_regex(
1660             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1661             webpage, 'camtasia configuration file', default=None)
1662         if camtasia_cfg is None:
1663             return None
1664
1665         title = self._html_search_meta('DC.title', webpage, fatal=True)
1666
1667         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1668         camtasia_cfg = self._download_xml(
1669             camtasia_url, video_id,
1670             note='Downloading camtasia configuration',
1671             errnote='Failed to download camtasia configuration')
1672         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1673
1674         entries = []
1675         for n in fileset_node.getchildren():
1676             url_n = n.find('./uri')
1677             if url_n is None:
1678                 continue
1679
1680             entries.append({
1681                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1682                 'title': '%s - %s' % (title, n.tag),
1683                 'url': compat_urlparse.urljoin(url, url_n.text),
1684                 'duration': float_or_none(n.find('./duration').text),
1685             })
1686
1687         return {
1688             '_type': 'playlist',
1689             'entries': entries,
1690             'title': title,
1691         }
1692
1693     def _real_extract(self, url):
1694         if url.startswith('//'):
1695             return {
1696                 '_type': 'url',
1697                 'url': self.http_scheme() + url,
1698             }
1699
1700         parsed_url = compat_urlparse.urlparse(url)
1701         if not parsed_url.scheme:
1702             default_search = self._downloader.params.get('default_search')
1703             if default_search is None:
1704                 default_search = 'fixup_error'
1705
1706             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1707                 if '/' in url:
1708                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1709                     return self.url_result('http://' + url)
1710                 elif default_search != 'fixup_error':
1711                     if default_search == 'auto_warning':
1712                         if re.match(r'^(?:url|URL)$', url):
1713                             raise ExtractorError(
1714                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1715                                 expected=True)
1716                         else:
1717                             self._downloader.report_warning(
1718                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1719                     return self.url_result('ytsearch:' + url)
1720
1721             if default_search in ('error', 'fixup_error'):
1722                 raise ExtractorError(
1723                     '%r is not a valid URL. '
1724                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1725                     % (url, url), expected=True)
1726             else:
1727                 if ':' not in default_search:
1728                     default_search += ':'
1729                 return self.url_result(default_search + url)
1730
1731         url, smuggled_data = unsmuggle_url(url)
1732         force_videoid = None
1733         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1734         if smuggled_data and 'force_videoid' in smuggled_data:
1735             force_videoid = smuggled_data['force_videoid']
1736             video_id = force_videoid
1737         else:
1738             video_id = self._generic_id(url)
1739
1740         self.to_screen('%s: Requesting header' % video_id)
1741
1742         head_req = HEADRequest(url)
1743         head_response = self._request_webpage(
1744             head_req, video_id,
1745             note=False, errnote='Could not send HEAD request to %s' % url,
1746             fatal=False)
1747
1748         if head_response is not False:
1749             # Check for redirect
1750             new_url = head_response.geturl()
1751             if url != new_url:
1752                 self.report_following_redirect(new_url)
1753                 if force_videoid:
1754                     new_url = smuggle_url(
1755                         new_url, {'force_videoid': force_videoid})
1756                 return self.url_result(new_url)
1757
1758         full_response = None
1759         if head_response is False:
1760             request = sanitized_Request(url)
1761             request.add_header('Accept-Encoding', '*')
1762             full_response = self._request_webpage(request, video_id)
1763             head_response = full_response
1764
1765         info_dict = {
1766             'id': video_id,
1767             'title': self._generic_title(url),
1768             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1769         }
1770
1771         # Check for direct link to a video
1772         content_type = head_response.headers.get('Content-Type', '').lower()
1773         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1774         if m:
1775             format_id = m.group('format_id')
1776             if format_id.endswith('mpegurl'):
1777                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1778             elif format_id == 'f4m':
1779                 formats = self._extract_f4m_formats(url, video_id)
1780             else:
1781                 formats = [{
1782                     'format_id': m.group('format_id'),
1783                     'url': url,
1784                     'vcodec': 'none' if m.group('type') == 'audio' else None
1785                 }]
1786                 info_dict['direct'] = True
1787             self._sort_formats(formats)
1788             info_dict['formats'] = formats
1789             return info_dict
1790
1791         if not self._downloader.params.get('test', False) and not is_intentional:
1792             force = self._downloader.params.get('force_generic_extractor', False)
1793             self._downloader.report_warning(
1794                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1795
1796         if not full_response:
1797             request = sanitized_Request(url)
1798             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1799             # making it impossible to download only chunk of the file (yet we need only 512kB to
1800             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1801             # that will always result in downloading the whole file that is not desirable.
1802             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1803             # to accept raw bytes and being able to download only a chunk.
1804             # It may probably better to solve this by checking Content-Type for application/octet-stream
1805             # after HEAD request finishes, but not sure if we can rely on this.
1806             request.add_header('Accept-Encoding', '*')
1807             full_response = self._request_webpage(request, video_id)
1808
1809         first_bytes = full_response.read(512)
1810
1811         # Is it an M3U playlist?
1812         if first_bytes.startswith(b'#EXTM3U'):
1813             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1814             self._sort_formats(info_dict['formats'])
1815             return info_dict
1816
1817         # Maybe it's a direct link to a video?
1818         # Be careful not to download the whole thing!
1819         if not is_html(first_bytes):
1820             self._downloader.report_warning(
1821                 'URL could be a direct video link, returning it as such.')
1822             info_dict.update({
1823                 'direct': True,
1824                 'url': url,
1825             })
1826             return info_dict
1827
1828         webpage = self._webpage_read_content(
1829             full_response, url, video_id, prefix=first_bytes)
1830
1831         self.report_extraction(video_id)
1832
1833         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1834         try:
1835             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1836             if doc.tag == 'rss':
1837                 return self._extract_rss(url, video_id, doc)
1838             elif doc.tag == 'SmoothStreamingMedia':
1839                 info_dict['formats'] = self._parse_ism_formats(doc, url)
1840                 self._sort_formats(info_dict['formats'])
1841                 return info_dict
1842             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1843                 smil = self._parse_smil(doc, url, video_id)
1844                 self._sort_formats(smil['formats'])
1845                 return smil
1846             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1847                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1848             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1849                 info_dict['formats'] = self._parse_mpd_formats(
1850                     doc, video_id,
1851                     mpd_base_url=full_response.geturl().rpartition('/')[0],
1852                     mpd_url=url)
1853                 self._sort_formats(info_dict['formats'])
1854                 return info_dict
1855             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1856                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1857                 self._sort_formats(info_dict['formats'])
1858                 return info_dict
1859         except compat_xml_parse_error:
1860             pass
1861
1862         # Is it a Camtasia project?
1863         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1864         if camtasia_res is not None:
1865             return camtasia_res
1866
1867         # Sometimes embedded video player is hidden behind percent encoding
1868         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1869         # Unescaping the whole page allows to handle those cases in a generic way
1870         webpage = compat_urllib_parse_unquote(webpage)
1871
1872         # it's tempting to parse this further, but you would
1873         # have to take into account all the variations like
1874         #   Video Title - Site Name
1875         #   Site Name | Video Title
1876         #   Video Title - Tagline | Site Name
1877         # and so on and so forth; it's just not practical
1878         video_title = self._og_search_title(
1879             webpage, default=None) or self._html_search_regex(
1880             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1881             default='video')
1882
1883         # Try to detect age limit automatically
1884         age_limit = self._rta_search(webpage)
1885         # And then there are the jokers who advertise that they use RTA,
1886         # but actually don't.
1887         AGE_LIMIT_MARKERS = [
1888             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1889         ]
1890         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1891             age_limit = 18
1892
1893         # video uploader is domain name
1894         video_uploader = self._search_regex(
1895             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1896
1897         video_description = self._og_search_description(webpage, default=None)
1898         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
1899
1900         # Look for Brightcove Legacy Studio embeds
1901         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1902         if bc_urls:
1903             self.to_screen('Brightcove video detected.')
1904             entries = [{
1905                 '_type': 'url',
1906                 'url': smuggle_url(bc_url, {'Referer': url}),
1907                 'ie_key': 'BrightcoveLegacy'
1908             } for bc_url in bc_urls]
1909
1910             return {
1911                 '_type': 'playlist',
1912                 'title': video_title,
1913                 'id': video_id,
1914                 'entries': entries,
1915             }
1916
1917         # Look for Brightcove New Studio embeds
1918         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1919         if bc_urls:
1920             return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
1921
1922         # Look for ThePlatform embeds
1923         tp_urls = ThePlatformIE._extract_urls(webpage)
1924         if tp_urls:
1925             return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
1926
1927         # Look for Vessel embeds
1928         vessel_urls = VesselIE._extract_urls(webpage)
1929         if vessel_urls:
1930             return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
1931
1932         # Look for embedded rtl.nl player
1933         matches = re.findall(
1934             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1935             webpage)
1936         if matches:
1937             return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
1938
1939         vimeo_urls = VimeoIE._extract_urls(url, webpage)
1940         if vimeo_urls:
1941             return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
1942
1943         vid_me_embed_url = self._search_regex(
1944             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1945             webpage, 'vid.me embed', default=None)
1946         if vid_me_embed_url is not None:
1947             return self.url_result(vid_me_embed_url, 'Vidme')
1948
1949         # Look for embedded YouTube player
1950         matches = re.findall(r'''(?x)
1951             (?:
1952                 <iframe[^>]+?src=|
1953                 data-video-url=|
1954                 <embed[^>]+?src=|
1955                 embedSWF\(?:\s*|
1956                 <object[^>]+data=|
1957                 new\s+SWFObject\(
1958             )
1959             (["\'])
1960                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1961                 (?:embed|v|p)/.+?)
1962             \1''', webpage)
1963         if matches:
1964             return self.playlist_from_matches(
1965                 matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
1966
1967         # Look for lazyYT YouTube embed
1968         matches = re.findall(
1969             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1970         if matches:
1971             return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
1972
1973         # Look for Wordpress "YouTube Video Importer" plugin
1974         matches = re.findall(r'''(?x)<div[^>]+
1975             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1976             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1977         if matches:
1978             return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
1979
1980         matches = DailymotionIE._extract_urls(webpage)
1981         if matches:
1982             return self.playlist_from_matches(matches, video_id, video_title)
1983
1984         # Look for embedded Dailymotion playlist player (#3822)
1985         m = re.search(
1986             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1987         if m:
1988             playlists = re.findall(
1989                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1990             if playlists:
1991                 return self.playlist_from_matches(
1992                     playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
1993
1994         # Look for embedded Wistia player
1995         match = re.search(
1996             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1997         if match:
1998             embed_url = self._proto_relative_url(
1999                 unescapeHTML(match.group('url')))
2000             return {
2001                 '_type': 'url_transparent',
2002                 'url': embed_url,
2003                 'ie_key': 'Wistia',
2004                 'uploader': video_uploader,
2005             }
2006
2007         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
2008         if match:
2009             return {
2010                 '_type': 'url_transparent',
2011                 'url': 'wistia:%s' % match.group('id'),
2012                 'ie_key': 'Wistia',
2013                 'uploader': video_uploader,
2014             }
2015
2016         match = re.search(
2017             r'''(?sx)
2018                 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
2019                 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
2020             ''', webpage)
2021         if match:
2022             return self.url_result(self._proto_relative_url(
2023                 'wistia:%s' % match.group('id')), 'Wistia')
2024
2025         # Look for SVT player
2026         svt_url = SVTIE._extract_url(webpage)
2027         if svt_url:
2028             return self.url_result(svt_url, 'SVT')
2029
2030         # Look for embedded condenast player
2031         matches = re.findall(
2032             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
2033             webpage)
2034         if matches:
2035             return {
2036                 '_type': 'playlist',
2037                 'entries': [{
2038                     '_type': 'url',
2039                     'ie_key': 'CondeNast',
2040                     'url': ma,
2041                 } for ma in matches],
2042                 'title': video_title,
2043                 'id': video_id,
2044             }
2045
2046         # Look for Bandcamp pages with custom domain
2047         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
2048         if mobj is not None:
2049             burl = unescapeHTML(mobj.group(1))
2050             # Don't set the extractor because it can be a track url or an album
2051             return self.url_result(burl)
2052
2053         # Look for embedded Vevo player
2054         mobj = re.search(
2055             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
2056         if mobj is not None:
2057             return self.url_result(mobj.group('url'))
2058
2059         # Look for embedded Viddler player
2060         mobj = re.search(
2061             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
2062             webpage)
2063         if mobj is not None:
2064             return self.url_result(mobj.group('url'))
2065
2066         # Look for NYTimes player
2067         mobj = re.search(
2068             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
2069             webpage)
2070         if mobj is not None:
2071             return self.url_result(mobj.group('url'))
2072
2073         # Look for Libsyn player
2074         mobj = re.search(
2075             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
2076         if mobj is not None:
2077             return self.url_result(mobj.group('url'))
2078
2079         # Look for Ooyala videos
2080         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
2081                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
2082                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
2083                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
2084         if mobj is not None:
2085             embed_token = self._search_regex(
2086                 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
2087                 webpage, 'ooyala embed token', default=None)
2088             return OoyalaIE._build_url_result(smuggle_url(
2089                 mobj.group('ec'), {
2090                     'domain': url,
2091                     'embed_token': embed_token,
2092                 }))
2093
2094         # Look for multiple Ooyala embeds on SBN network websites
2095         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
2096         if mobj is not None:
2097             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
2098             if embeds:
2099                 return self.playlist_from_matches(
2100                     embeds, video_id, video_title,
2101                     getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
2102
2103         # Look for Aparat videos
2104         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
2105         if mobj is not None:
2106             return self.url_result(mobj.group(1), 'Aparat')
2107
2108         # Look for MPORA videos
2109         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
2110         if mobj is not None:
2111             return self.url_result(mobj.group(1), 'Mpora')
2112
2113         # Look for embedded NovaMov-based player
2114         mobj = re.search(
2115             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
2116                     (?P<url>http://(?:(?:embed|www)\.)?
2117                         (?:novamov\.com|
2118                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
2119                            videoweed\.(?:es|com)|
2120                            movshare\.(?:net|sx|ag)|
2121                            divxstage\.(?:eu|net|ch|co|at|ag))
2122                         /embed\.php.+?)\1''', webpage)
2123         if mobj is not None:
2124             return self.url_result(mobj.group('url'))
2125
2126         # Look for embedded Facebook player
2127         facebook_url = FacebookIE._extract_url(webpage)
2128         if facebook_url is not None:
2129             return self.url_result(facebook_url, 'Facebook')
2130
2131         # Look for embedded VK player
2132         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
2133         if mobj is not None:
2134             return self.url_result(mobj.group('url'), 'VK')
2135
2136         # Look for embedded Odnoklassniki player
2137         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
2138         if mobj is not None:
2139             return self.url_result(mobj.group('url'), 'Odnoklassniki')
2140
2141         # Look for embedded ivi player
2142         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
2143         if mobj is not None:
2144             return self.url_result(mobj.group('url'), 'Ivi')
2145
2146         # Look for embedded Huffington Post player
2147         mobj = re.search(
2148             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
2149         if mobj is not None:
2150             return self.url_result(mobj.group('url'), 'HuffPost')
2151
2152         # Look for embed.ly
2153         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
2154         if mobj is not None:
2155             return self.url_result(mobj.group('url'))
2156         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
2157         if mobj is not None:
2158             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
2159
2160         # Look for funnyordie embed
2161         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
2162         if matches:
2163             return self.playlist_from_matches(
2164                 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
2165
2166         # Look for BBC iPlayer embed
2167         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
2168         if matches:
2169             return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
2170
2171         # Look for embedded RUTV player
2172         rutv_url = RUTVIE._extract_url(webpage)
2173         if rutv_url:
2174             return self.url_result(rutv_url, 'RUTV')
2175
2176         # Look for embedded TVC player
2177         tvc_url = TVCIE._extract_url(webpage)
2178         if tvc_url:
2179             return self.url_result(tvc_url, 'TVC')
2180
2181         # Look for embedded SportBox player
2182         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
2183         if sportbox_urls:
2184             return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
2185
2186         # Look for embedded XHamster player
2187         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
2188         if xhamster_urls:
2189             return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
2190
2191         # Look for embedded TNAFlixNetwork player
2192         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
2193         if tnaflix_urls:
2194             return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
2195
2196         # Look for embedded PornHub player
2197         pornhub_urls = PornHubIE._extract_urls(webpage)
2198         if pornhub_urls:
2199             return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
2200
2201         # Look for embedded DrTuber player
2202         drtuber_urls = DrTuberIE._extract_urls(webpage)
2203         if drtuber_urls:
2204             return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
2205
2206         # Look for embedded RedTube player
2207         redtube_urls = RedTubeIE._extract_urls(webpage)
2208         if redtube_urls:
2209             return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
2210
2211         # Look for embedded Tvigle player
2212         mobj = re.search(
2213             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
2214         if mobj is not None:
2215             return self.url_result(mobj.group('url'), 'Tvigle')
2216
2217         # Look for embedded TED player
2218         mobj = re.search(
2219             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
2220         if mobj is not None:
2221             return self.url_result(mobj.group('url'), 'TED')
2222
2223         # Look for embedded Ustream videos
2224         ustream_url = UstreamIE._extract_url(webpage)
2225         if ustream_url:
2226             return self.url_result(ustream_url, UstreamIE.ie_key())
2227
2228         # Look for embedded arte.tv player
2229         mobj = re.search(
2230             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
2231             webpage)
2232         if mobj is not None:
2233             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
2234
2235         # Look for embedded francetv player
2236         mobj = re.search(
2237             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
2238             webpage)
2239         if mobj is not None:
2240             return self.url_result(mobj.group('url'))
2241
2242         # Look for embedded smotri.com player
2243         smotri_url = SmotriIE._extract_url(webpage)
2244         if smotri_url:
2245             return self.url_result(smotri_url, 'Smotri')
2246
2247         # Look for embedded Myvi.ru player
2248         myvi_url = MyviIE._extract_url(webpage)
2249         if myvi_url:
2250             return self.url_result(myvi_url)
2251
2252         # Look for embedded soundcloud player
2253         soundcloud_urls = SoundcloudIE._extract_urls(webpage)
2254         if soundcloud_urls:
2255             return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
2256
2257         # Look for tunein player
2258         tunein_urls = TuneInBaseIE._extract_urls(webpage)
2259         if tunein_urls:
2260             return self.playlist_from_matches(tunein_urls, video_id, video_title)
2261
2262         # Look for embedded mtvservices player
2263         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
2264         if mtvservices_url:
2265             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
2266
2267         # Look for embedded yahoo player
2268         mobj = re.search(
2269             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
2270             webpage)
2271         if mobj is not None:
2272             return self.url_result(mobj.group('url'), 'Yahoo')
2273
2274         # Look for embedded sbs.com.au player
2275         mobj = re.search(
2276             r'''(?x)
2277             (?:
2278                 <meta\s+property="og:video"\s+content=|
2279                 <iframe[^>]+?src=
2280             )
2281             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2282             webpage)
2283         if mobj is not None:
2284             return self.url_result(mobj.group('url'), 'SBS')
2285
2286         # Look for embedded Cinchcast player
2287         mobj = re.search(
2288             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
2289             webpage)
2290         if mobj is not None:
2291             return self.url_result(mobj.group('url'), 'Cinchcast')
2292
2293         mobj = re.search(
2294             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
2295             webpage)
2296         if not mobj:
2297             mobj = re.search(
2298                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
2299                 webpage)
2300         if mobj is not None:
2301             return self.url_result(mobj.group('url'), 'MLB')
2302
2303         mobj = re.search(
2304             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
2305             webpage)
2306         if mobj is not None:
2307             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
2308
2309         mobj = re.search(
2310             r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
2311             webpage)
2312         if mobj is not None:
2313             return self.url_result(mobj.group('url'), 'Livestream')
2314
2315         # Look for Zapiks embed
2316         mobj = re.search(
2317             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
2318         if mobj is not None:
2319             return self.url_result(mobj.group('url'), 'Zapiks')
2320
2321         # Look for Kaltura embeds
2322         kaltura_url = KalturaIE._extract_url(webpage)
2323         if kaltura_url:
2324             return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
2325
2326         # Look for Eagle.Platform embeds
2327         eagleplatform_url = EaglePlatformIE._extract_url(webpage)
2328         if eagleplatform_url:
2329             return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
2330
2331         # Look for ClipYou (uses Eagle.Platform) embeds
2332         mobj = re.search(
2333             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
2334         if mobj is not None:
2335             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
2336
2337         # Look for Pladform embeds
2338         pladform_url = PladformIE._extract_url(webpage)
2339         if pladform_url:
2340             return self.url_result(pladform_url)
2341
2342         # Look for Videomore embeds
2343         videomore_url = VideomoreIE._extract_url(webpage)
2344         if videomore_url:
2345             return self.url_result(videomore_url)
2346
2347         # Look for Webcaster embeds
2348         webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
2349         if webcaster_url:
2350             return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
2351
2352         # Look for Playwire embeds
2353         mobj = re.search(
2354             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
2355         if mobj is not None:
2356             return self.url_result(mobj.group('url'))
2357
2358         # Look for 5min embeds
2359         mobj = re.search(
2360             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
2361         if mobj is not None:
2362             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
2363
2364         # Look for Crooks and Liars embeds
2365         mobj = re.search(
2366             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
2367         if mobj is not None:
2368             return self.url_result(mobj.group('url'))
2369
2370         # Look for NBC Sports VPlayer embeds
2371         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
2372         if nbc_sports_url:
2373             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
2374
2375         # Look for NBC News embeds
2376         nbc_news_embed_url = re.search(
2377             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
2378         if nbc_news_embed_url:
2379             return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
2380
2381         # Look for Google Drive embeds
2382         google_drive_url = GoogleDriveIE._extract_url(webpage)
2383         if google_drive_url:
2384             return self.url_result(google_drive_url, 'GoogleDrive')
2385
2386         # Look for UDN embeds
2387         mobj = re.search(
2388             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
2389         if mobj is not None:
2390             return self.url_result(
2391                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
2392
2393         # Look for Senate ISVP iframe
2394         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2395         if senate_isvp_url:
2396             return self.url_result(senate_isvp_url, 'SenateISVP')
2397
2398         # Look for Dailymotion Cloud videos
2399         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
2400         if dmcloud_url:
2401             return self.url_result(dmcloud_url, 'DailymotionCloud')
2402
2403         # Look for OnionStudios embeds
2404         onionstudios_url = OnionStudiosIE._extract_url(webpage)
2405         if onionstudios_url:
2406             return self.url_result(onionstudios_url)
2407
2408         # Look for ViewLift embeds
2409         viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2410         if viewlift_url:
2411             return self.url_result(viewlift_url)
2412
2413         # Look for JWPlatform embeds
2414         jwplatform_url = JWPlatformIE._extract_url(webpage)
2415         if jwplatform_url:
2416             return self.url_result(jwplatform_url, 'JWPlatform')
2417
2418         # Look for Digiteka embeds
2419         digiteka_url = DigitekaIE._extract_url(webpage)
2420         if digiteka_url:
2421             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2422
2423         # Look for Arkena embeds
2424         arkena_url = ArkenaIE._extract_url(webpage)
2425         if arkena_url:
2426             return self.url_result(arkena_url, ArkenaIE.ie_key())
2427
2428         # Look for Piksel embeds
2429         piksel_url = PikselIE._extract_url(webpage)
2430         if piksel_url:
2431             return self.url_result(piksel_url, PikselIE.ie_key())
2432
2433         # Look for Limelight embeds
2434         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
2435         if mobj:
2436             lm = {
2437                 'Media': 'media',
2438                 'Channel': 'channel',
2439                 'ChannelList': 'channel_list',
2440             }
2441             return self.url_result(smuggle_url('limelight:%s:%s' % (
2442                 lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
2443                 'Limelight%s' % mobj.group(1), mobj.group(2))
2444
2445         mobj = re.search(
2446             r'''(?sx)
2447                 <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
2448                     <param[^>]+
2449                         name=(["\'])flashVars\2[^>]+
2450                         value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
2451             ''', webpage)
2452         if mobj:
2453             return self.url_result(smuggle_url(
2454                 'limelight:media:%s' % mobj.group('id'),
2455                 {'source_url': url}), 'LimelightMedia', mobj.group('id'))
2456
2457         # Look for AdobeTVVideo embeds
2458         mobj = re.search(
2459             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2460             webpage)
2461         if mobj is not None:
2462             return self.url_result(
2463                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2464                 'AdobeTVVideo')
2465
2466         # Look for Vine embeds
2467         mobj = re.search(
2468             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2469             webpage)
2470         if mobj is not None:
2471             return self.url_result(
2472                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2473
2474         # Look for VODPlatform embeds
2475         mobj = re.search(
2476             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
2477             webpage)
2478         if mobj is not None:
2479             return self.url_result(
2480                 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
2481
2482         # Look for Mangomolo embeds
2483         mobj = re.search(
2484             r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
2485                 (?:
2486                     video\?.*?\bid=(?P<video_id>\d+)|
2487                     index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
2488                 ).+?)\1''', webpage)
2489         if mobj is not None:
2490             info = {
2491                 '_type': 'url_transparent',
2492                 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
2493                 'title': video_title,
2494                 'description': video_description,
2495                 'thumbnail': video_thumbnail,
2496                 'uploader': video_uploader,
2497             }
2498             video_id = mobj.group('video_id')
2499             if video_id:
2500                 info.update({
2501                     'ie_key': 'MangomoloVideo',
2502                     'id': video_id,
2503                 })
2504             else:
2505                 info.update({
2506                     'ie_key': 'MangomoloLive',
2507                     'id': mobj.group('channel_id'),
2508                 })
2509             return info
2510
2511         # Look for Instagram embeds
2512         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2513         if instagram_embed_url is not None:
2514             return self.url_result(
2515                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2516
2517         # Look for LiveLeak embeds
2518         liveleak_url = LiveLeakIE._extract_url(webpage)
2519         if liveleak_url:
2520             return self.url_result(liveleak_url, 'LiveLeak')
2521
2522         # Look for 3Q SDN embeds
2523         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2524         if threeqsdn_url:
2525             return {
2526                 '_type': 'url_transparent',
2527                 'ie_key': ThreeQSDNIE.ie_key(),
2528                 'url': self._proto_relative_url(threeqsdn_url),
2529                 'title': video_title,
2530                 'description': video_description,
2531                 'thumbnail': video_thumbnail,
2532                 'uploader': video_uploader,
2533             }
2534
2535         # Look for VBOX7 embeds
2536         vbox7_url = Vbox7IE._extract_url(webpage)
2537         if vbox7_url:
2538             return self.url_result(vbox7_url, Vbox7IE.ie_key())
2539
2540         # Look for DBTV embeds
2541         dbtv_urls = DBTVIE._extract_urls(webpage)
2542         if dbtv_urls:
2543             return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
2544
2545         # Look for Videa embeds
2546         videa_urls = VideaIE._extract_urls(webpage)
2547         if videa_urls:
2548             return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
2549
2550         # Look for 20 minuten embeds
2551         twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
2552         if twentymin_urls:
2553             return self.playlist_from_matches(
2554                 twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
2555
2556         # Look for Openload embeds
2557         openload_urls = OpenloadIE._extract_urls(webpage)
2558         if openload_urls:
2559             return self.playlist_from_matches(
2560                 openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
2561
2562         # Look for VideoPress embeds
2563         videopress_urls = VideoPressIE._extract_urls(webpage)
2564         if videopress_urls:
2565             return self.playlist_from_matches(
2566                 videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
2567
2568         # Look for Rutube embeds
2569         rutube_urls = RutubeIE._extract_urls(webpage)
2570         if rutube_urls:
2571             return self.playlist_from_matches(
2572                 rutube_urls, ie=RutubeIE.ie_key())
2573
2574         # Looking for http://schema.org/VideoObject
2575         json_ld = self._search_json_ld(
2576             webpage, video_id, default={}, expected_type='VideoObject')
2577         if json_ld.get('url'):
2578             info_dict.update({
2579                 'title': video_title or info_dict['title'],
2580                 'description': video_description,
2581                 'thumbnail': video_thumbnail,
2582                 'age_limit': age_limit
2583             })
2584             info_dict.update(json_ld)
2585             return info_dict
2586
2587         # Look for HTML5 media
2588         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
2589         if entries:
2590             for entry in entries:
2591                 entry.update({
2592                     'id': video_id,
2593                     'title': video_title,
2594                 })
2595                 self._sort_formats(entry['formats'])
2596             return self.playlist_result(entries)
2597
2598         jwplayer_data = self._find_jwplayer_data(
2599             webpage, video_id, transform_source=js_to_json)
2600         if jwplayer_data:
2601             info = self._parse_jwplayer_data(
2602                 jwplayer_data, video_id, require_title=False, base_url=url)
2603             if not info.get('title'):
2604                 info['title'] = video_title
2605             return info
2606
2607         def check_video(vurl):
2608             if YoutubeIE.suitable(vurl):
2609                 return True
2610             if RtmpIE.suitable(vurl):
2611                 return True
2612             vpath = compat_urlparse.urlparse(vurl).path
2613             vext = determine_ext(vpath)
2614             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
2615
2616         def filter_video(urls):
2617             return list(filter(check_video, urls))
2618
2619         # Start with something easy: JW Player in SWFObject
2620         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
2621         if not found:
2622             # Look for gorilla-vid style embedding
2623             found = filter_video(re.findall(r'''(?sx)
2624                 (?:
2625                     jw_plugins|
2626                     JWPlayerOptions|
2627                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2628                 )
2629                 .*?
2630                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
2631         if not found:
2632             # Broaden the search a little bit
2633             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
2634         if not found:
2635             # Broaden the findall a little bit: JWPlayer JS loader
2636             found = filter_video(re.findall(
2637                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
2638         if not found:
2639             # Flow player
2640             found = filter_video(re.findall(r'''(?xs)
2641                 flowplayer\("[^"]+",\s*
2642                     \{[^}]+?\}\s*,
2643                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
2644                         ["']?url["']?\s*:\s*["']([^"']+)["']
2645             ''', webpage))
2646         if not found:
2647             # Cinerama player
2648             found = re.findall(
2649                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
2650         if not found:
2651             # Try to find twitter cards info
2652             # twitter:player:stream should be checked before twitter:player since
2653             # it is expected to contain a raw stream (see
2654             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2655             found = filter_video(re.findall(
2656                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
2657         if not found:
2658             # We look for Open Graph info:
2659             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
2660             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
2661             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2662             if m_video_type is not None:
2663                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
2664         if not found:
2665             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
2666             found = re.search(
2667                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2668                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2669                 webpage)
2670             if not found:
2671                 # Look also in Refresh HTTP header
2672                 refresh_header = head_response.headers.get('Refresh')
2673                 if refresh_header:
2674                     # In python 2 response HTTP headers are bytestrings
2675                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2676                         refresh_header = refresh_header.decode('iso-8859-1')
2677                     found = re.search(REDIRECT_REGEX, refresh_header)
2678             if found:
2679                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2680                 if new_url != url:
2681                     self.report_following_redirect(new_url)
2682                     return {
2683                         '_type': 'url',
2684                         'url': new_url,
2685                     }
2686                 else:
2687                     found = None
2688
2689         if not found:
2690             # twitter:player is a https URL to iframe player that may or may not
2691             # be supported by youtube-dl thus this is checked the very last (see
2692             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2693             embed_url = self._html_search_meta('twitter:player', webpage, default=None)
2694             if embed_url:
2695                 return self.url_result(embed_url)
2696
2697         if not found:
2698             raise UnsupportedError(url)
2699
2700         entries = []
2701         for video_url in orderedSet(found):
2702             video_url = unescapeHTML(video_url)
2703             video_url = video_url.replace('\\/', '/')
2704             video_url = compat_urlparse.urljoin(url, video_url)
2705             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2706
2707             # Sometimes, jwplayer extraction will result in a YouTube URL
2708             if YoutubeIE.suitable(video_url):
2709                 entries.append(self.url_result(video_url, 'Youtube'))
2710                 continue
2711
2712             # here's a fun little line of code for you:
2713             video_id = os.path.splitext(video_id)[0]
2714
2715             entry_info_dict = {
2716                 'id': video_id,
2717                 'uploader': video_uploader,
2718                 'title': video_title,
2719                 'age_limit': age_limit,
2720             }
2721
2722             if RtmpIE.suitable(video_url):
2723                 entry_info_dict.update({
2724                     '_type': 'url_transparent',
2725                     'ie_key': RtmpIE.ie_key(),
2726                     'url': video_url,
2727                 })
2728                 entries.append(entry_info_dict)
2729                 continue
2730
2731             ext = determine_ext(video_url)
2732             if ext == 'smil':
2733                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2734             elif ext == 'xspf':
2735                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2736             elif ext == 'm3u8':
2737                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2738             elif ext == 'mpd':
2739                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2740             elif ext == 'f4m':
2741                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2742             elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
2743                 # Just matching .ism/manifest is not enough to be reliably sure
2744                 # whether it's actually an ISM manifest or some other streaming
2745                 # manifest since there are various streaming URL formats
2746                 # possible (see [1]) as well as some other shenanigans like
2747                 # .smil/manifest URLs that actually serve an ISM (see [2]) and
2748                 # so on.
2749                 # Thus the most reasonable way to solve this is to delegate
2750                 # to generic extractor in order to look into the contents of
2751                 # the manifest itself.
2752                 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
2753                 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
2754                 entry_info_dict = self.url_result(
2755                     smuggle_url(video_url, {'to_generic': True}),
2756                     GenericIE.ie_key())
2757             else:
2758                 entry_info_dict['url'] = video_url
2759
2760             if entry_info_dict.get('formats'):
2761                 self._sort_formats(entry_info_dict['formats'])
2762
2763             entries.append(entry_info_dict)
2764
2765         if len(entries) == 1:
2766             return entries[0]
2767         else:
2768             for num, e in enumerate(entries, start=1):
2769                 # 'url' results don't have a title
2770                 if e.get('title') is not None:
2771                     e['title'] = '%s (%d)' % (e['title'], num)
2772             return {
2773                 '_type': 'playlist',
2774                 'entries': entries,
2775             }