_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7 import sys
   8
   9 from .common import InfoExtractor
  10 from .youtube import YoutubeIE
  11 from ..compat import (
  12     compat_etree_fromstring,
  13     compat_urllib_parse_unquote,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     js_to_json,
  24     orderedSet,
  25     sanitized_Request,
  26     smuggle_url,
  27     unescapeHTML,
  28     unified_strdate,
  29     unsmuggle_url,
  30     UnsupportedError,
  31     xpath_text,
  32 )
  33 from .commonprotocols import RtmpIE
  34 from .brightcove import (
  35     BrightcoveLegacyIE,
  36     BrightcoveNewIE,
  37 )
  38 from .nbc import NBCSportsVPlayerIE
  39 from .ooyala import OoyalaIE
  40 from .rutv import RUTVIE
  41 from .tvc import TVCIE
  42 from .sportbox import SportBoxEmbedIE
  43 from .smotri import SmotriIE
  44 from .myvi import MyviIE
  45 from .condenast import CondeNastIE
  46 from .udn import UDNEmbedIE
  47 from .senateisvp import SenateISVPIE
  48 from .svt import SVTIE
  49 from .pornhub import PornHubIE
  50 from .xhamster import XHamsterEmbedIE
  51 from .tnaflix import TNAFlixNetworkEmbedIE
  52 from .drtuber import DrTuberIE
  53 from .redtube import RedTubeIE
  54 from .vimeo import VimeoIE
  55 from .dailymotion import (
  56     DailymotionIE,
  57     DailymotionCloudIE,
  58 )
  59 from .onionstudios import OnionStudiosIE
  60 from .viewlift import ViewLiftEmbedIE
  61 from .mtv import MTVServicesEmbeddedIE
  62 from .pladform import PladformIE
  63 from .videomore import VideomoreIE
  64 from .webcaster import WebcasterFeedIE
  65 from .googledrive import GoogleDriveIE
  66 from .jwplatform import JWPlatformIE
  67 from .digiteka import DigitekaIE
  68 from .arkena import ArkenaIE
  69 from .instagram import InstagramIE
  70 from .liveleak import LiveLeakIE
  71 from .threeqsdn import ThreeQSDNIE
  72 from .theplatform import ThePlatformIE
  73 from .vessel import VesselIE
  74 from .kaltura import KalturaIE
  75 from .eagleplatform import EaglePlatformIE
  76 from .facebook import FacebookIE
  77 from .soundcloud import SoundcloudIE
  78 from .tunein import TuneInBaseIE
  79 from .vbox7 import Vbox7IE
  80 from .dbtv import DBTVIE
  81 from .piksel import PikselIE
  82 from .videa import VideaIE
  83 from .twentymin import TwentyMinutenIE
  84 from .ustream import UstreamIE
  85 from .openload import OpenloadIE
  86 from .videopress import VideoPressIE
  87
  88
  89 class GenericIE(InfoExtractor):
  90     IE_DESC = 'Generic downloader that works on some sites'
  91     _VALID_URL = r'.*'
  92     IE_NAME = 'generic'
  93     _TESTS = [
  94         # Direct link to a video
  95         {
  96             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  97             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  98             'info_dict': {
  99                 'id': 'trailer',
 100                 'ext': 'mp4',
 101                 'title': 'trailer',
 102                 'upload_date': '20100513',
 103             }
 104         },
 105         # Direct link to media delivered compressed (until Accept-Encoding is *)
 106         {
 107             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
 108             'md5': '128c42e68b13950268b648275386fc74',
 109             'info_dict': {
 110                 'id': 'FictionJunction-Parallel_Hearts',
 111                 'ext': 'flac',
 112                 'title': 'FictionJunction-Parallel_Hearts',
 113                 'upload_date': '20140522',
 114             },
 115             'expected_warnings': [
 116                 'URL could be a direct video link, returning it as such.'
 117             ],
 118             'skip': 'URL invalid',
 119         },
 120         # Direct download with broken HEAD
 121         {
 122             'url': 'http://ai-radio.org:8000/radio.opus',
 123             'info_dict': {
 124                 'id': 'radio',
 125                 'ext': 'opus',
 126                 'title': 'radio',
 127             },
 128             'params': {
 129                 'skip_download': True,  # infinite live stream
 130             },
 131             'expected_warnings': [
 132                 r'501.*Not Implemented',
 133                 r'400.*Bad Request',
 134             ],
 135         },
 136         # Direct link with incorrect MIME type
 137         {
 138             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 139             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 140             'info_dict': {
 141                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 142                 'id': '5_Lennart_Poettering_-_Systemd',
 143                 'ext': 'webm',
 144                 'title': '5_Lennart_Poettering_-_Systemd',
 145                 'upload_date': '20141120',
 146             },
 147             'expected_warnings': [
 148                 'URL could be a direct video link, returning it as such.'
 149             ]
 150         },
 151         # RSS feed
 152         {
 153             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 154             'info_dict': {
 155                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 156                 'title': 'Zero Punctuation',
 157                 'description': 're:.*groundbreaking video review series.*'
 158             },
 159             'playlist_mincount': 11,
 160         },
 161         # RSS feed with enclosure
 162         {
 163             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 164             'info_dict': {
 165                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 166                 'ext': 'm4v',
 167                 'upload_date': '20150228',
 168                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 169             }
 170         },
 171         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 172         {
 173             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 174             'info_dict': {
 175                 'id': 'smil',
 176                 'ext': 'mp4',
 177                 'title': 'Automatics, robotics and biocybernetics',
 178                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
 179                 'upload_date': '20130627',
 180                 'formats': 'mincount:16',
 181                 'subtitles': 'mincount:1',
 182             },
 183             'params': {
 184                 'force_generic_extractor': True,
 185                 'skip_download': True,
 186             },
 187         },
 188         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 189         {
 190             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 191             'info_dict': {
 192                 'id': 'hds',
 193                 'ext': 'flv',
 194                 'title': 'hds',
 195                 'formats': 'mincount:1',
 196             },
 197             'params': {
 198                 'skip_download': True,
 199             },
 200         },
 201         # SMIL from https://www.restudy.dk/video/play/id/1637
 202         {
 203             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 204             'info_dict': {
 205                 'id': 'video_1637',
 206                 'ext': 'flv',
 207                 'title': 'video_1637',
 208                 'formats': 'mincount:3',
 209             },
 210             'params': {
 211                 'skip_download': True,
 212             },
 213         },
 214         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 215         {
 216             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 217             'info_dict': {
 218                 'id': 'smil-service',
 219                 'ext': 'flv',
 220                 'title': 'smil-service',
 221                 'formats': 'mincount:1',
 222             },
 223             'params': {
 224                 'skip_download': True,
 225             },
 226         },
 227         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 228         {
 229             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 230             'info_dict': {
 231                 'id': '4719370',
 232                 'ext': 'mp4',
 233                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 234                 'formats': 'mincount:3',
 235             },
 236             'params': {
 237                 'skip_download': True,
 238             },
 239         },
 240         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 241         {
 242             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 243             'info_dict': {
 244                 'id': 'mZlp2ctYIUEB',
 245                 'ext': 'mp4',
 246                 'title': 'Tikibad ontruimd wegens brand',
 247                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 248                 'thumbnail': r're:^https?://.*\.jpg$',
 249                 'duration': 33,
 250             },
 251             'params': {
 252                 'skip_download': True,
 253             },
 254         },
 255         # MPD from http://dash-mse-test.appspot.com/media.html
 256         {
 257             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
 258             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
 259             'info_dict': {
 260                 'id': 'car-20120827-manifest',
 261                 'ext': 'mp4',
 262                 'title': 'car-20120827-manifest',
 263                 'formats': 'mincount:9',
 264                 'upload_date': '20130904',
 265             },
 266             'params': {
 267                 'format': 'bestvideo',
 268             },
 269         },
 270         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
 271         {
 272             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
 273             'info_dict': {
 274                 'id': 'content',
 275                 'ext': 'mp4',
 276                 'title': 'content',
 277                 'formats': 'mincount:8',
 278             },
 279             'params': {
 280                 # m3u8 downloads
 281                 'skip_download': True,
 282             },
 283             'skip': 'video gone',
 284         },
 285         # m3u8 served with Content-Type: text/plain
 286         {
 287             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
 288             'info_dict': {
 289                 'id': 'index',
 290                 'ext': 'mp4',
 291                 'title': 'index',
 292                 'upload_date': '20140720',
 293                 'formats': 'mincount:11',
 294             },
 295             'params': {
 296                 # m3u8 downloads
 297                 'skip_download': True,
 298             },
 299             'skip': 'video gone',
 300         },
 301         # google redirect
 302         {
 303             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 304             'info_dict': {
 305                 'id': 'cmQHVoWB5FY',
 306                 'ext': 'mp4',
 307                 'upload_date': '20130224',
 308                 'uploader_id': 'TheVerge',
 309                 'description': r're:^Chris Ziegler takes a look at the\.*',
 310                 'uploader': 'The Verge',
 311                 'title': 'First Firefox OS phones side-by-side',
 312             },
 313             'params': {
 314                 'skip_download': False,
 315             }
 316         },
 317         {
 318             # redirect in Refresh HTTP header
 319             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
 320             'info_dict': {
 321                 'id': 'pO8h3EaFRdo',
 322                 'ext': 'mp4',
 323                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 324                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
 325                 'upload_date': '20150917',
 326                 'uploader_id': 'brtvofficial',
 327                 'uploader': 'Boiler Room',
 328             },
 329             'params': {
 330                 'skip_download': False,
 331             },
 332         },
 333         {
 334             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 335             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 336             'info_dict': {
 337                 'id': '13601338388002',
 338                 'ext': 'mp4',
 339                 'uploader': 'www.hodiho.fr',
 340                 'title': 'R\u00e9gis plante sa Jeep',
 341             }
 342         },
 343         # bandcamp page with custom domain
 344         {
 345             'add_ie': ['Bandcamp'],
 346             'url': 'http://bronyrock.com/track/the-pony-mash',
 347             'info_dict': {
 348                 'id': '3235767654',
 349                 'ext': 'mp3',
 350                 'title': 'The Pony Mash',
 351                 'uploader': 'M_Pallante',
 352             },
 353             'skip': 'There is a limit of 200 free downloads / month for the test song',
 354         },
 355         {
 356             # embedded brightcove video
 357             # it also tests brightcove videos that need to set the 'Referer'
 358             # in the http requests
 359             'add_ie': ['BrightcoveLegacy'],
 360             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 361             'info_dict': {
 362                 'id': '2765128793001',
 363                 'ext': 'mp4',
 364                 'title': 'Le cours de bourse : l’analyse technique',
 365                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 366                 'uploader': 'BFM BUSINESS',
 367             },
 368             'params': {
 369                 'skip_download': True,
 370             },
 371         },
 372         {
 373             # embedded with itemprop embedURL and video id spelled as `idVideo`
 374             'add_id': ['BrightcoveLegacy'],
 375             'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
 376             'info_dict': {
 377                 'id': '5255628253001',
 378                 'ext': 'mp4',
 379                 'title': 'md5:37c519b1128915607601e75a87995fc0',
 380                 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
 381                 'uploader': 'BFM BUSINESS',
 382                 'uploader_id': '876450612001',
 383                 'timestamp': 1482255315,
 384                 'upload_date': '20161220',
 385             },
 386             'params': {
 387                 'skip_download': True,
 388             },
 389         },
 390         {
 391             # https://github.com/rg3/youtube-dl/issues/2253
 392             'url': 'http://bcove.me/i6nfkrc3',
 393             'md5': '0ba9446db037002366bab3b3eb30c88c',
 394             'info_dict': {
 395                 'id': '3101154703001',
 396                 'ext': 'mp4',
 397                 'title': 'Still no power',
 398                 'uploader': 'thestar.com',
 399                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 400             },
 401             'add_ie': ['BrightcoveLegacy'],
 402             'skip': 'video gone',
 403         },
 404         {
 405             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 406             'md5': 'fb973ecf6e4a78a67453647444222983',
 407             'info_dict': {
 408                 'id': '3414141473001',
 409                 'ext': 'mp4',
 410                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 411                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 412                 'uploader': 'Championat',
 413             },
 414         },
 415         {
 416             # https://github.com/rg3/youtube-dl/issues/3541
 417             'add_ie': ['BrightcoveLegacy'],
 418             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 419             'info_dict': {
 420                 'id': '3866516442001',
 421                 'ext': 'mp4',
 422                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 423                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 424                 'uploader': 'SBS Broadcasting',
 425             },
 426             'skip': 'Restricted to Netherlands',
 427             'params': {
 428                 'skip_download': True,  # m3u8 download
 429             },
 430         },
 431         {
 432             # Brightcove with alternative playerID key
 433             'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
 434             'info_dict': {
 435                 'id': 'nmeth.2062_SV1',
 436                 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
 437             },
 438             'playlist': [{
 439                 'info_dict': {
 440                     'id': '2228375078001',
 441                     'ext': 'mp4',
 442                     'title': 'nmeth.2062-sv1',
 443                     'description': 'nmeth.2062-sv1',
 444                     'timestamp': 1363357591,
 445                     'upload_date': '20130315',
 446                     'uploader': 'Nature Publishing Group',
 447                     'uploader_id': '1964492299001',
 448                 },
 449             }],
 450         },
 451         # ooyala video
 452         {
 453             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 454             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 455             'info_dict': {
 456                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 457                 'ext': 'mp4',
 458                 'title': '2cc213299525360.mov',  # that's what we get
 459                 'duration': 238.231,
 460             },
 461             'add_ie': ['Ooyala'],
 462         },
 463         {
 464             # ooyala video embedded with http://player.ooyala.com/iframe.js
 465             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 466             'info_dict': {
 467                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 468                 'ext': 'mp4',
 469                 'title': '"Steve Jobs: Man in the Machine" trailer',
 470                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
 471                 'duration': 135.427,
 472             },
 473             'params': {
 474                 'skip_download': True,
 475             },
 476             'skip': 'movie expired',
 477         },
 478         # embed.ly video
 479         {
 480             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 481             'info_dict': {
 482                 'id': '9ODmcdjQcHQ',
 483                 'ext': 'mp4',
 484                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 485                 'upload_date': '20140225',
 486                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 487                 'uploader': 'Tested',
 488                 'uploader_id': 'testedcom',
 489             },
 490             # No need to test YoutubeIE here
 491             'params': {
 492                 'skip_download': True,
 493             },
 494         },
 495         # funnyordie embed
 496         {
 497             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 498             'info_dict': {
 499                 'id': '18e820ec3f',
 500                 'ext': 'mp4',
 501                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 502                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 503             },
 504             # HEAD requests lead to endless 301, while GET is OK
 505             'expected_warnings': ['301'],
 506         },
 507         # RUTV embed
 508         {
 509             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 510             'info_dict': {
 511                 'id': '776940',
 512                 'ext': 'mp4',
 513                 'title': 'Охотское море стало целиком российским',
 514                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 515             },
 516             'params': {
 517                 # m3u8 download
 518                 'skip_download': True,
 519             },
 520         },
 521         # TVC embed
 522         {
 523             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 524             'info_dict': {
 525                 'id': '55304',
 526                 'ext': 'mp4',
 527                 'title': 'Дошкольное воспитание',
 528             },
 529         },
 530         # SportBox embed
 531         {
 532             'url': 'http://www.vestifinance.ru/articles/25753',
 533             'info_dict': {
 534                 'id': '25753',
 535                 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 536             },
 537             'playlist': [{
 538                 'info_dict': {
 539                     'id': '370908',
 540                     'title': 'Госзаказ. День 3',
 541                     'ext': 'mp4',
 542                 }
 543             }, {
 544                 'info_dict': {
 545                     'id': '370905',
 546                     'title': 'Госзаказ. День 2',
 547                     'ext': 'mp4',
 548                 }
 549             }, {
 550                 'info_dict': {
 551                     'id': '370902',
 552                     'title': 'Госзаказ. День 1',
 553                     'ext': 'mp4',
 554                 }
 555             }],
 556             'params': {
 557                 # m3u8 download
 558                 'skip_download': True,
 559             },
 560         },
 561         # Myvi.ru embed
 562         {
 563             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 564             'info_dict': {
 565                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 566                 'ext': 'mp4',
 567                 'title': 'Ужастики, русский трейлер (2015)',
 568                 'thumbnail': r're:^https?://.*\.jpg$',
 569                 'duration': 153,
 570             }
 571         },
 572         # XHamster embed
 573         {
 574             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 575             'info_dict': {
 576                 'id': 'showthread',
 577                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 578             },
 579             'playlist_mincount': 7,
 580             # This forum does not allow <iframe> syntaxes anymore
 581             # Now HTML tags are displayed as-is
 582             'skip': 'No videos on this page',
 583         },
 584         # Embedded TED video
 585         {
 586             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 587             'md5': '65fdff94098e4a607385a60c5177c638',
 588             'info_dict': {
 589                 'id': '1969',
 590                 'ext': 'mp4',
 591                 'title': 'Hidden miracles of the natural world',
 592                 'uploader': 'Louie Schwartzberg',
 593                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 594             }
 595         },
 596         # nowvideo embed hidden behind percent encoding
 597         {
 598             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 599             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 600             'info_dict': {
 601                 'id': '06e53103ca9aa',
 602                 'ext': 'flv',
 603                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 604                 'description': 'No description',
 605             },
 606         },
 607         # arte embed
 608         {
 609             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 610             'md5': '7653032cbb25bf6c80d80f217055fa43',
 611             'info_dict': {
 612                 'id': '048195-004_PLUS7-F',
 613                 'ext': 'flv',
 614                 'title': 'X:enius',
 615                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 616                 'upload_date': '20140320',
 617             },
 618             'params': {
 619                 'skip_download': 'Requires rtmpdump'
 620             },
 621             'skip': 'video gone',
 622         },
 623         # francetv embed
 624         {
 625             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 626             'info_dict': {
 627                 'id': 'EV_30231',
 628                 'ext': 'mp4',
 629                 'title': 'Alcaline, le concert avec Calogero',
 630                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 631                 'upload_date': '20150226',
 632                 'timestamp': 1424989860,
 633                 'duration': 5400,
 634             },
 635             'params': {
 636                 # m3u8 downloads
 637                 'skip_download': True,
 638             },
 639             'expected_warnings': [
 640                 'Forbidden'
 641             ]
 642         },
 643         # Condé Nast embed
 644         {
 645             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 646             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 647             'info_dict': {
 648                 'id': '53501be369702d3275860000',
 649                 'ext': 'mp4',
 650                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 651             }
 652         },
 653         # Dailymotion embed
 654         {
 655             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 656             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 657             'info_dict': {
 658                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 659                 'ext': 'mp4',
 660                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 661                 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
 662                 'uploader': 'Spi0n',
 663                 'uploader_id': 'xgditw',
 664                 'upload_date': '20140425',
 665                 'timestamp': 1398441542,
 666             },
 667             'add_ie': ['Dailymotion'],
 668         },
 669         # YouTube embed
 670         {
 671             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 672             'info_dict': {
 673                 'id': 'FXRb4ykk4S0',
 674                 'ext': 'mp4',
 675                 'title': 'The NBL Auction 2014',
 676                 'uploader': 'BADMINTON England',
 677                 'uploader_id': 'BADMINTONEvents',
 678                 'upload_date': '20140603',
 679                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 680             },
 681             'add_ie': ['Youtube'],
 682             'params': {
 683                 'skip_download': True,
 684             }
 685         },
 686         # MTVSercices embed
 687         {
 688             'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
 689             'md5': 'ca1aef97695ef2c1d6973256a57e5252',
 690             'info_dict': {
 691                 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
 692                 'ext': 'mp4',
 693                 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
 694                 'description': 'Two valets share their love for movie star Liam Neesons.',
 695                 'timestamp': 1349922600,
 696                 'upload_date': '20121011',
 697             },
 698         },
 699         # YouTube embed via <data-embed-url="">
 700         {
 701             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 702             'info_dict': {
 703                 'id': '4vAffPZIT44',
 704                 'ext': 'mp4',
 705                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 706                 'uploader': 'Gameloft',
 707                 'uploader_id': 'gameloft',
 708                 'upload_date': '20140828',
 709                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 710             },
 711             'params': {
 712                 'skip_download': True,
 713             }
 714         },
 715         # Camtasia studio
 716         {
 717             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 718             'playlist': [{
 719                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 720                 'info_dict': {
 721                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 722                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 723                     'ext': 'flv',
 724                     'duration': 2235.90,
 725                 }
 726             }, {
 727                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 728                 'info_dict': {
 729                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 730                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 731                     'ext': 'flv',
 732                     'duration': 2235.93,
 733                 }
 734             }],
 735             'info_dict': {
 736                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 737             }
 738         },
 739         # Flowplayer
 740         {
 741             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 742             'md5': '9d65602bf31c6e20014319c7d07fba27',
 743             'info_dict': {
 744                 'id': '5123ea6d5e5a7',
 745                 'ext': 'mp4',
 746                 'age_limit': 18,
 747                 'uploader': 'www.handjobhub.com',
 748                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 749             }
 750         },
 751         # Multiple brightcove videos
 752         # https://github.com/rg3/youtube-dl/issues/2283
 753         {
 754             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 755             'info_dict': {
 756                 'id': 'always-never',
 757                 'title': 'Always / Never - The New Yorker',
 758             },
 759             'playlist_count': 3,
 760             'params': {
 761                 'extract_flat': False,
 762                 'skip_download': True,
 763             }
 764         },
 765         # MLB embed
 766         {
 767             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 768             'md5': '96f09a37e44da40dd083e12d9a683327',
 769             'info_dict': {
 770                 'id': '33322633',
 771                 'ext': 'mp4',
 772                 'title': 'Ump changes call to ball',
 773                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 774                 'duration': 48,
 775                 'timestamp': 1401537900,
 776                 'upload_date': '20140531',
 777                 'thumbnail': r're:^https?://.*\.jpg$',
 778             },
 779         },
 780         # Wistia embed
 781         {
 782             'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 783             'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
 784             'info_dict': {
 785                 'id': '6e2wtrbdaf',
 786                 'ext': 'mov',
 787                 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
 788                 'description': 'a Paywall Videos video from Remilon',
 789                 'duration': 644.072,
 790                 'uploader': 'study.com',
 791                 'timestamp': 1459678540,
 792                 'upload_date': '20160403',
 793                 'filesize': 24687186,
 794             },
 795         },
 796         {
 797             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 798             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 799             'info_dict': {
 800                 'id': 'uxjb0lwrcz',
 801                 'ext': 'mp4',
 802                 'title': 'Conversation about Hexagonal Rails Part 1',
 803                 'description': 'a Martin Fowler video from ThoughtWorks',
 804                 'duration': 1715.0,
 805                 'uploader': 'thoughtworks.wistia.com',
 806                 'timestamp': 1401832161,
 807                 'upload_date': '20140603',
 808             },
 809         },
 810         # Wistia standard embed (async)
 811         {
 812             'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
 813             'info_dict': {
 814                 'id': '807fafadvk',
 815                 'ext': 'mp4',
 816                 'title': 'Drip Brennan Dunn Workshop',
 817                 'description': 'a JV Webinars video from getdrip-1',
 818                 'duration': 4986.95,
 819                 'timestamp': 1463607249,
 820                 'upload_date': '20160518',
 821             },
 822             'params': {
 823                 'skip_download': True,
 824             }
 825         },
 826         # Soundcloud embed
 827         {
 828             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 829             'info_dict': {
 830                 'id': '174391317',
 831                 'ext': 'mp3',
 832                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 833                 'uploader': 'Sophos Security',
 834                 'title': 'Chet Chat 171 - Oct 29, 2014',
 835                 'upload_date': '20141029',
 836             }
 837         },
 838         # Soundcloud multiple embeds
 839         {
 840             'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
 841             'info_dict': {
 842                 'id': '52809',
 843                 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance  | TAB + AUDIO',
 844             },
 845             'playlist_mincount': 7,
 846         },
 847         # TuneIn station embed
 848         {
 849             'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
 850             'info_dict': {
 851                 'id': '204146',
 852                 'ext': 'mp3',
 853                 'title': 'CNRV',
 854                 'location': 'Paris, France',
 855                 'is_live': True,
 856             },
 857             'params': {
 858                 # Live stream
 859                 'skip_download': True,
 860             },
 861         },
 862         # Livestream embed
 863         {
 864             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 865             'info_dict': {
 866                 'id': '67864563',
 867                 'ext': 'flv',
 868                 'upload_date': '20141112',
 869                 'title': 'Rosetta #CometLanding webcast HL 10',
 870             }
 871         },
 872         # Another Livestream embed, without 'new.' in URL
 873         {
 874             'url': 'https://www.freespeech.org/',
 875             'info_dict': {
 876                 'id': '123537347',
 877                 'ext': 'mp4',
 878                 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 879             },
 880             'params': {
 881                 # Live stream
 882                 'skip_download': True,
 883             },
 884         },
 885         # LazyYT
 886         {
 887             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 888             'info_dict': {
 889                 'id': '1986',
 890                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 891             },
 892             'playlist_mincount': 2,
 893         },
 894         # Cinchcast embed
 895         {
 896             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 897             'info_dict': {
 898                 'id': '7141703',
 899                 'ext': 'mp3',
 900                 'upload_date': '20141126',
 901                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 902             }
 903         },
 904         # Cinerama player
 905         {
 906             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 907             'info_dict': {
 908                 'id': '730m_DandD_1901_512k',
 909                 'ext': 'mp4',
 910                 'uploader': 'www.abc.net.au',
 911                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 912             }
 913         },
 914         # embedded viddler video
 915         {
 916             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 917             'info_dict': {
 918                 'id': '4d03aad9',
 919                 'ext': 'mp4',
 920                 'uploader': 'deadspin',
 921                 'title': 'WALL-TO-GORTAT',
 922                 'timestamp': 1422285291,
 923                 'upload_date': '20150126',
 924             },
 925             'add_ie': ['Viddler'],
 926         },
 927         # Libsyn embed
 928         {
 929             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 930             'info_dict': {
 931                 'id': '3377616',
 932                 'ext': 'mp3',
 933                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 934                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 935                 'upload_date': '20150220',
 936             },
 937             'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
 938         },
 939         # jwplayer YouTube
 940         {
 941             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 942             'info_dict': {
 943                 'id': 'Mrj4DVp2zeA',
 944                 'ext': 'mp4',
 945                 'upload_date': '20150212',
 946                 'uploader': 'The National Archives UK',
 947                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 948                 'uploader_id': 'NationalArchives08',
 949                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 950             },
 951         },
 952         # jwplayer rtmp
 953         {
 954             'url': 'http://www.suffolk.edu/sjc/',
 955             'info_dict': {
 956                 'id': 'sjclive',
 957                 'ext': 'flv',
 958                 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
 959                 'uploader': 'www.suffolk.edu',
 960             },
 961             'params': {
 962                 'skip_download': True,
 963             }
 964         },
 965         # Complex jwplayer
 966         {
 967             'url': 'http://www.indiedb.com/games/king-machine/videos',
 968             'info_dict': {
 969                 'id': 'videos',
 970                 'ext': 'mp4',
 971                 'title': 'king machine trailer 1',
 972                 'thumbnail': r're:^https?://.*\.jpg$',
 973             },
 974         },
 975         # rtl.nl embed
 976         {
 977             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 978             'playlist_mincount': 5,
 979             'info_dict': {
 980                 'id': 'aanslagen-kopenhagen',
 981                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 982             }
 983         },
 984         # Zapiks embed
 985         {
 986             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 987             'info_dict': {
 988                 'id': '118046',
 989                 'ext': 'mp4',
 990                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 991             }
 992         },
 993         # Kaltura embed (different embed code)
 994         {
 995             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 996             'info_dict': {
 997                 'id': '1_a52wc67y',
 998                 'ext': 'flv',
 999                 'upload_date': '20150127',
1000                 'uploader_id': 'PremierMedia',
1001                 'timestamp': int,
1002                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
1003             },
1004         },
1005         # Kaltura embed with single quotes
1006         {
1007             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
1008             'info_dict': {
1009                 'id': '0_izeg5utt',
1010                 'ext': 'mp4',
1011                 'title': '35871',
1012                 'timestamp': 1355743100,
1013                 'upload_date': '20121217',
1014                 'uploader_id': 'batchUser',
1015             },
1016             'add_ie': ['Kaltura'],
1017         },
1018         {
1019             # Kaltura embedded via quoted entry_id
1020             'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1021             'info_dict': {
1022                 'id': '0_utuok90b',
1023                 'ext': 'mp4',
1024                 'title': '06_matthew_brender_raj_dutt',
1025                 'timestamp': 1466638791,
1026                 'upload_date': '20160622',
1027             },
1028             'add_ie': ['Kaltura'],
1029             'expected_warnings': [
1030                 'Could not send HEAD request'
1031             ],
1032             'params': {
1033                 'skip_download': True,
1034             }
1035         },
1036         {
1037             # Kaltura embedded, some fileExt broken (#11480)
1038             'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1039             'info_dict': {
1040                 'id': '1_sgtvehim',
1041                 'ext': 'mp4',
1042                 'title': 'Our "Standard Models" of particle physics and cosmology',
1043                 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1044                 'timestamp': 1321158993,
1045                 'upload_date': '20111113',
1046                 'uploader_id': 'kps1',
1047             },
1048             'add_ie': ['Kaltura'],
1049         },
1050         # Eagle.Platform embed (generic URL)
1051         {
1052             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
1053             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1054             'info_dict': {
1055                 'id': '227304',
1056                 'ext': 'mp4',
1057                 'title': 'Навальный вышел на свободу',
1058                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
1059                 'thumbnail': r're:^https?://.*\.jpg$',
1060                 'duration': 87,
1061                 'view_count': int,
1062                 'age_limit': 0,
1063             },
1064         },
1065         # ClipYou (Eagle.Platform) embed (custom URL)
1066         {
1067             'url': 'http://muz-tv.ru/play/7129/',
1068             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1069             'info_dict': {
1070                 'id': '12820',
1071                 'ext': 'mp4',
1072                 'title': "'O Sole Mio",
1073                 'thumbnail': r're:^https?://.*\.jpg$',
1074                 'duration': 216,
1075                 'view_count': int,
1076             },
1077         },
1078         # Pladform embed
1079         {
1080             'url': 'http://muz-tv.ru/kinozal/view/7400/',
1081             'info_dict': {
1082                 'id': '100183293',
1083                 'ext': 'mp4',
1084                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
1085                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
1086                 'thumbnail': r're:^https?://.*\.jpg$',
1087                 'duration': 694,
1088                 'age_limit': 0,
1089             },
1090         },
1091         # Playwire embed
1092         {
1093             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1094             'info_dict': {
1095                 'id': '3519514',
1096                 'ext': 'mp4',
1097                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
1098                 'thumbnail': r're:^https?://.*\.png$',
1099                 'duration': 45.115,
1100             },
1101         },
1102         # 5min embed
1103         {
1104             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1105             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1106             'info_dict': {
1107                 'id': '518726732',
1108                 'ext': 'mp4',
1109                 'title': 'Facebook Creates "On This Day" | Crunch Report',
1110             },
1111         },
1112         # SVT embed
1113         {
1114             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
1115             'info_dict': {
1116                 'id': '2900353',
1117                 'ext': 'flv',
1118                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
1119                 'duration': 27,
1120                 'age_limit': 0,
1121             },
1122         },
1123         # Crooks and Liars embed
1124         {
1125             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1126             'info_dict': {
1127                 'id': '8RUoRhRi',
1128                 'ext': 'mp4',
1129                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1130                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1131                 'timestamp': 1428207000,
1132                 'upload_date': '20150405',
1133                 'uploader': 'Heather',
1134             },
1135         },
1136         # Crooks and Liars external embed
1137         {
1138             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1139             'info_dict': {
1140                 'id': 'MTE3MjUtMzQ2MzA',
1141                 'ext': 'mp4',
1142                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1143                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1144                 'timestamp': 1265032391,
1145                 'upload_date': '20100201',
1146                 'uploader': 'Heather',
1147             },
1148         },
1149         # NBC Sports vplayer embed
1150         {
1151             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1152             'info_dict': {
1153                 'id': 'ln7x1qSThw4k',
1154                 'ext': 'flv',
1155                 'title': "PFT Live: New leader in the 'new-look' defense",
1156                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1157                 'uploader': 'NBCU-SPORTS',
1158                 'upload_date': '20140107',
1159                 'timestamp': 1389118457,
1160             },
1161         },
1162         # NBC News embed
1163         {
1164             'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1165             'md5': '1aa589c675898ae6d37a17913cf68d66',
1166             'info_dict': {
1167                 'id': '701714499682',
1168                 'ext': 'mp4',
1169                 'title': 'PREVIEW: On Assignment: David Letterman',
1170                 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
1171             },
1172         },
1173         # UDN embed
1174         {
1175             'url': 'https://video.udn.com/news/300346',
1176             'md5': 'fd2060e988c326991037b9aff9df21a6',
1177             'info_dict': {
1178                 'id': '300346',
1179                 'ext': 'mp4',
1180                 'title': '中一中男師變性 全校師生力挺',
1181                 'thumbnail': r're:^https?://.*\.jpg$',
1182             },
1183             'params': {
1184                 # m3u8 download
1185                 'skip_download': True,
1186             },
1187         },
1188         # Ooyala embed
1189         {
1190             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1191             'info_dict': {
1192                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1193                 'ext': 'mp4',
1194                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1195                 'title': 'This is what separates the Excel masters from the wannabes',
1196                 'duration': 191.933,
1197             },
1198             'params': {
1199                 # m3u8 downloads
1200                 'skip_download': True,
1201             }
1202         },
1203         # Brightcove URL in single quotes
1204         {
1205             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1206             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1207             'info_dict': {
1208                 'id': '4255764656001',
1209                 'ext': 'mp4',
1210                 'title': 'SN Presents: Russell Martin, World Citizen',
1211                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1212                 'uploader': 'Rogers Sportsnet',
1213                 'uploader_id': '1704050871',
1214                 'upload_date': '20150525',
1215                 'timestamp': 1432570283,
1216             },
1217         },
1218         # Dailymotion Cloud video
1219         {
1220             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1221             'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
1222             'info_dict': {
1223                 'id': 'x2uy8t3',
1224                 'ext': 'mp4',
1225                 'title': 'Sauvons les abeilles ! - Le débat',
1226                 'description': 'md5:d9082128b1c5277987825d684939ca26',
1227                 'thumbnail': r're:^https?://.*\.jpe?g$',
1228                 'timestamp': 1434970506,
1229                 'upload_date': '20150622',
1230                 'uploader': 'Public Sénat',
1231                 'uploader_id': 'xa9gza',
1232             }
1233         },
1234         # OnionStudios embed
1235         {
1236             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1237             'info_dict': {
1238                 'id': '2855',
1239                 'ext': 'mp4',
1240                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1241                 'thumbnail': r're:^https?://.*\.jpe?g$',
1242                 'uploader': 'ClickHole',
1243                 'uploader_id': 'clickhole',
1244             }
1245         },
1246         # SnagFilms embed
1247         {
1248             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1249             'info_dict': {
1250                 'id': '74849a00-85a9-11e1-9660-123139220831',
1251                 'ext': 'mp4',
1252                 'title': '#whilewewatch',
1253             }
1254         },
1255         # AdobeTVVideo embed
1256         {
1257             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1258             'md5': '43662b577c018ad707a63766462b1e87',
1259             'info_dict': {
1260                 'id': '2456',
1261                 'ext': 'mp4',
1262                 'title': 'New experience with Acrobat DC',
1263                 'description': 'New experience with Acrobat DC',
1264                 'duration': 248.667,
1265             },
1266         },
1267         # BrightcoveInPageEmbed embed
1268         {
1269             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1270             'info_dict': {
1271                 'id': '4238694884001',
1272                 'ext': 'flv',
1273                 'title': 'Tabletop: Dread, Last Thoughts',
1274                 'description': 'Tabletop: Dread, Last Thoughts',
1275                 'duration': 51690,
1276             },
1277         },
1278         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1279         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1280         {
1281             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1282             'info_dict': {
1283                 'id': '4785848093001',
1284                 'ext': 'mp4',
1285                 'title': 'The Cardinal Pell Interview',
1286                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1287                 'uploader': 'GlobeCast Australia - GlobeStream',
1288                 'uploader_id': '2733773828001',
1289                 'upload_date': '20160304',
1290                 'timestamp': 1457083087,
1291             },
1292             'params': {
1293                 # m3u8 downloads
1294                 'skip_download': True,
1295             },
1296         },
1297         # Another form of arte.tv embed
1298         {
1299             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1300             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1301             'info_dict': {
1302                 'id': '030273-562_PLUS7-F',
1303                 'ext': 'mp4',
1304                 'title': 'ARTE Reportage - Nulle part, en France',
1305                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1306                 'upload_date': '20160409',
1307             },
1308         },
1309         # LiveLeak embed
1310         {
1311             'url': 'http://www.wykop.pl/link/3088787/',
1312             'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1313             'info_dict': {
1314                 'id': '874_1459135191',
1315                 'ext': 'mp4',
1316                 'title': 'Man shows poor quality of new apartment building',
1317                 'description': 'The wall is like a sand pile.',
1318                 'uploader': 'Lake8737',
1319             }
1320         },
1321         # Duplicated embedded video URLs
1322         {
1323             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1324             'info_dict': {
1325                 'id': '149298443_480_16c25b74_2',
1326                 'ext': 'mp4',
1327                 'title': 'vs. Blue Orange Spring Game',
1328                 'uploader': 'www.hudl.com',
1329             },
1330         },
1331         # twitter:player:stream embed
1332         {
1333             'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1334             'info_dict': {
1335                 'id': 'master',
1336                 'ext': 'mp4',
1337                 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1338                 'uploader': 'www.rtl.be',
1339             },
1340             'params': {
1341                 # m3u8 downloads
1342                 'skip_download': True,
1343             },
1344         },
1345         # twitter:player embed
1346         {
1347             'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1348             'md5': 'a3e0df96369831de324f0778e126653c',
1349             'info_dict': {
1350                 'id': '4909620399001',
1351                 'ext': 'mp4',
1352                 'title': 'What Do Black Holes Sound Like?',
1353                 'description': 'what do black holes sound like',
1354                 'upload_date': '20160524',
1355                 'uploader_id': '29913724001',
1356                 'timestamp': 1464107587,
1357                 'uploader': 'TheAtlantic',
1358             },
1359             'add_ie': ['BrightcoveLegacy'],
1360         },
1361         # Facebook <iframe> embed
1362         {
1363             'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
1364             'md5': 'fbcde74f534176ecb015849146dd3aee',
1365             'info_dict': {
1366                 'id': '599637780109885',
1367                 'ext': 'mp4',
1368                 'title': 'Facebook video #599637780109885',
1369             },
1370         },
1371         # Facebook API embed
1372         {
1373             'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
1374             'md5': 'a47372ee61b39a7b90287094d447d94e',
1375             'info_dict': {
1376                 'id': '10153467542406923',
1377                 'ext': 'mp4',
1378                 'title': 'Facebook video #10153467542406923',
1379             },
1380         },
1381         # Wordpress "YouTube Video Importer" plugin
1382         {
1383             'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
1384             'md5': 'd16797741b560b485194eddda8121b48',
1385             'info_dict': {
1386                 'id': 'HNTXWDXV9Is',
1387                 'ext': 'mp4',
1388                 'title': 'Blue Devils Drumline Stanford lot 2016',
1389                 'upload_date': '20160627',
1390                 'uploader_id': 'GENOCIDE8GENERAL10',
1391                 'uploader': 'cylus cyrus',
1392             },
1393         },
1394         {
1395             # video stored on custom kaltura server
1396             'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1397             'md5': '537617d06e64dfed891fa1593c4b30cc',
1398             'info_dict': {
1399                 'id': '0_1iotm5bh',
1400                 'ext': 'mp4',
1401                 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1402                 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1403                 'uploader_id': 'videos.expansion@el-mundo.net',
1404                 'upload_date': '20150429',
1405                 'timestamp': 1430303472,
1406             },
1407             'add_ie': ['Kaltura'],
1408         },
1409         {
1410             # Non-standard Vimeo embed
1411             'url': 'https://openclassrooms.com/courses/understanding-the-web',
1412             'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1413             'info_dict': {
1414                 'id': '148867247',
1415                 'ext': 'mp4',
1416                 'title': 'Understanding the web - Teaser',
1417                 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1418                 'upload_date': '20151214',
1419                 'uploader': 'OpenClassrooms',
1420                 'uploader_id': 'openclassrooms',
1421             },
1422             'add_ie': ['Vimeo'],
1423         },
1424         {
1425             # generic vimeo embed that requires original URL passed as Referer
1426             'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1427             'only_matching': True,
1428         },
1429         {
1430             'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1431             'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1432             'info_dict': {
1433                 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1434                 'ext': 'mp4',
1435                 'title': 'Big Buck Bunny',
1436                 'description': 'Royalty free test video',
1437                 'timestamp': 1432816365,
1438                 'upload_date': '20150528',
1439                 'is_live': False,
1440             },
1441             'params': {
1442                 'skip_download': True,
1443             },
1444             'add_ie': [ArkenaIE.ie_key()],
1445         },
1446         {
1447             'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1448             'info_dict': {
1449                 'id': '1c7141f46c',
1450                 'ext': 'mp4',
1451                 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1452             },
1453             'params': {
1454                 'skip_download': True,
1455             },
1456             'add_ie': [Vbox7IE.ie_key()],
1457         },
1458         {
1459             # DBTV embeds
1460             'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
1461             'info_dict': {
1462                 'id': '43254897',
1463                 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1464             },
1465             'playlist_mincount': 3,
1466         },
1467         {
1468             # Videa embeds
1469             'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1470             'info_dict': {
1471                 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1472                 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1473             },
1474             'playlist_mincount': 2,
1475         },
1476         {
1477             # 20 minuten embed
1478             'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1479             'info_dict': {
1480                 'id': '523629',
1481                 'ext': 'mp4',
1482                 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1483                 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1484             },
1485             'params': {
1486                 'skip_download': True,
1487             },
1488             'add_ie': [TwentyMinutenIE.ie_key()],
1489         },
1490         {
1491             # VideoPress embed
1492             'url': 'https://en.support.wordpress.com/videopress/',
1493             'info_dict': {
1494                 'id': 'OcobLTqC',
1495                 'ext': 'm4v',
1496                 'title': 'IMG_5786',
1497                 'timestamp': 1435711927,
1498                 'upload_date': '20150701',
1499             },
1500             'params': {
1501                 'skip_download': True,
1502             },
1503             'add_ie': [VideoPressIE.ie_key()],
1504         },
1505         {
1506             # ThePlatform embedded with whitespaces in URLs
1507             'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
1508             'only_matching': True,
1509         },
1510         # {
1511         #     # TODO: find another test
1512         #     # http://schema.org/VideoObject
1513         #     'url': 'https://flipagram.com/f/nyvTSJMKId',
1514         #     'md5': '888dcf08b7ea671381f00fab74692755',
1515         #     'info_dict': {
1516         #         'id': 'nyvTSJMKId',
1517         #         'ext': 'mp4',
1518         #         'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
1519         #         'description': '#love for cats.',
1520         #         'timestamp': 1461244995,
1521         #         'upload_date': '20160421',
1522         #     },
1523         #     'params': {
1524         #         'force_generic_extractor': True,
1525         #     },
1526         # }
1527     ]
1528
1529     def report_following_redirect(self, new_url):
1530         """Report information extraction."""
1531         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1532
1533     def _extract_rss(self, url, video_id, doc):
1534         playlist_title = doc.find('./channel/title').text
1535         playlist_desc_el = doc.find('./channel/description')
1536         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1537
1538         entries = []
1539         for it in doc.findall('./channel/item'):
1540             next_url = xpath_text(it, 'link', fatal=False)
1541             if not next_url:
1542                 enclosure_nodes = it.findall('./enclosure')
1543                 for e in enclosure_nodes:
1544                     next_url = e.attrib.get('url')
1545                     if next_url:
1546                         break
1547
1548             if not next_url:
1549                 continue
1550
1551             entries.append({
1552                 '_type': 'url',
1553                 'url': next_url,
1554                 'title': it.find('title').text,
1555             })
1556
1557         return {
1558             '_type': 'playlist',
1559             'id': url,
1560             'title': playlist_title,
1561             'description': playlist_desc,
1562             'entries': entries,
1563         }
1564
1565     def _extract_camtasia(self, url, video_id, webpage):
1566         """ Returns None if no camtasia video can be found. """
1567
1568         camtasia_cfg = self._search_regex(
1569             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1570             webpage, 'camtasia configuration file', default=None)
1571         if camtasia_cfg is None:
1572             return None
1573
1574         title = self._html_search_meta('DC.title', webpage, fatal=True)
1575
1576         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1577         camtasia_cfg = self._download_xml(
1578             camtasia_url, video_id,
1579             note='Downloading camtasia configuration',
1580             errnote='Failed to download camtasia configuration')
1581         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1582
1583         entries = []
1584         for n in fileset_node.getchildren():
1585             url_n = n.find('./uri')
1586             if url_n is None:
1587                 continue
1588
1589             entries.append({
1590                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1591                 'title': '%s - %s' % (title, n.tag),
1592                 'url': compat_urlparse.urljoin(url, url_n.text),
1593                 'duration': float_or_none(n.find('./duration').text),
1594             })
1595
1596         return {
1597             '_type': 'playlist',
1598             'entries': entries,
1599             'title': title,
1600         }
1601
1602     def _real_extract(self, url):
1603         if url.startswith('//'):
1604             return {
1605                 '_type': 'url',
1606                 'url': self.http_scheme() + url,
1607             }
1608
1609         parsed_url = compat_urlparse.urlparse(url)
1610         if not parsed_url.scheme:
1611             default_search = self._downloader.params.get('default_search')
1612             if default_search is None:
1613                 default_search = 'fixup_error'
1614
1615             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1616                 if '/' in url:
1617                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1618                     return self.url_result('http://' + url)
1619                 elif default_search != 'fixup_error':
1620                     if default_search == 'auto_warning':
1621                         if re.match(r'^(?:url|URL)$', url):
1622                             raise ExtractorError(
1623                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1624                                 expected=True)
1625                         else:
1626                             self._downloader.report_warning(
1627                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1628                     return self.url_result('ytsearch:' + url)
1629
1630             if default_search in ('error', 'fixup_error'):
1631                 raise ExtractorError(
1632                     '%r is not a valid URL. '
1633                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1634                     % (url, url), expected=True)
1635             else:
1636                 if ':' not in default_search:
1637                     default_search += ':'
1638                 return self.url_result(default_search + url)
1639
1640         url, smuggled_data = unsmuggle_url(url)
1641         force_videoid = None
1642         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1643         if smuggled_data and 'force_videoid' in smuggled_data:
1644             force_videoid = smuggled_data['force_videoid']
1645             video_id = force_videoid
1646         else:
1647             video_id = self._generic_id(url)
1648
1649         self.to_screen('%s: Requesting header' % video_id)
1650
1651         head_req = HEADRequest(url)
1652         head_response = self._request_webpage(
1653             head_req, video_id,
1654             note=False, errnote='Could not send HEAD request to %s' % url,
1655             fatal=False)
1656
1657         if head_response is not False:
1658             # Check for redirect
1659             new_url = head_response.geturl()
1660             if url != new_url:
1661                 self.report_following_redirect(new_url)
1662                 if force_videoid:
1663                     new_url = smuggle_url(
1664                         new_url, {'force_videoid': force_videoid})
1665                 return self.url_result(new_url)
1666
1667         full_response = None
1668         if head_response is False:
1669             request = sanitized_Request(url)
1670             request.add_header('Accept-Encoding', '*')
1671             full_response = self._request_webpage(request, video_id)
1672             head_response = full_response
1673
1674         info_dict = {
1675             'id': video_id,
1676             'title': self._generic_title(url),
1677             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1678         }
1679
1680         # Check for direct link to a video
1681         content_type = head_response.headers.get('Content-Type', '').lower()
1682         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1683         if m:
1684             format_id = m.group('format_id')
1685             if format_id.endswith('mpegurl'):
1686                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1687             elif format_id == 'f4m':
1688                 formats = self._extract_f4m_formats(url, video_id)
1689             else:
1690                 formats = [{
1691                     'format_id': m.group('format_id'),
1692                     'url': url,
1693                     'vcodec': 'none' if m.group('type') == 'audio' else None
1694                 }]
1695                 info_dict['direct'] = True
1696             self._sort_formats(formats)
1697             info_dict['formats'] = formats
1698             return info_dict
1699
1700         if not self._downloader.params.get('test', False) and not is_intentional:
1701             force = self._downloader.params.get('force_generic_extractor', False)
1702             self._downloader.report_warning(
1703                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1704
1705         if not full_response:
1706             request = sanitized_Request(url)
1707             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1708             # making it impossible to download only chunk of the file (yet we need only 512kB to
1709             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1710             # that will always result in downloading the whole file that is not desirable.
1711             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1712             # to accept raw bytes and being able to download only a chunk.
1713             # It may probably better to solve this by checking Content-Type for application/octet-stream
1714             # after HEAD request finishes, but not sure if we can rely on this.
1715             request.add_header('Accept-Encoding', '*')
1716             full_response = self._request_webpage(request, video_id)
1717
1718         first_bytes = full_response.read(512)
1719
1720         # Is it an M3U playlist?
1721         if first_bytes.startswith(b'#EXTM3U'):
1722             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1723             self._sort_formats(info_dict['formats'])
1724             return info_dict
1725
1726         # Maybe it's a direct link to a video?
1727         # Be careful not to download the whole thing!
1728         if not is_html(first_bytes):
1729             self._downloader.report_warning(
1730                 'URL could be a direct video link, returning it as such.')
1731             info_dict.update({
1732                 'direct': True,
1733                 'url': url,
1734             })
1735             return info_dict
1736
1737         webpage = self._webpage_read_content(
1738             full_response, url, video_id, prefix=first_bytes)
1739
1740         self.report_extraction(video_id)
1741
1742         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1743         try:
1744             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1745             if doc.tag == 'rss':
1746                 return self._extract_rss(url, video_id, doc)
1747             elif doc.tag == 'SmoothStreamingMedia':
1748                 info_dict['formats'] = self._parse_ism_formats(doc, url)
1749                 self._sort_formats(info_dict['formats'])
1750                 return info_dict
1751             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1752                 smil = self._parse_smil(doc, url, video_id)
1753                 self._sort_formats(smil['formats'])
1754                 return smil
1755             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1756                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1757             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1758                 info_dict['formats'] = self._parse_mpd_formats(
1759                     doc, video_id,
1760                     mpd_base_url=full_response.geturl().rpartition('/')[0],
1761                     mpd_url=url)
1762                 self._sort_formats(info_dict['formats'])
1763                 return info_dict
1764             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1765                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1766                 self._sort_formats(info_dict['formats'])
1767                 return info_dict
1768         except compat_xml_parse_error:
1769             pass
1770
1771         # Is it a Camtasia project?
1772         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1773         if camtasia_res is not None:
1774             return camtasia_res
1775
1776         # Sometimes embedded video player is hidden behind percent encoding
1777         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1778         # Unescaping the whole page allows to handle those cases in a generic way
1779         webpage = compat_urllib_parse_unquote(webpage)
1780
1781         # it's tempting to parse this further, but you would
1782         # have to take into account all the variations like
1783         #   Video Title - Site Name
1784         #   Site Name | Video Title
1785         #   Video Title - Tagline | Site Name
1786         # and so on and so forth; it's just not practical
1787         video_title = self._og_search_title(
1788             webpage, default=None) or self._html_search_regex(
1789             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1790             default='video')
1791
1792         # Try to detect age limit automatically
1793         age_limit = self._rta_search(webpage)
1794         # And then there are the jokers who advertise that they use RTA,
1795         # but actually don't.
1796         AGE_LIMIT_MARKERS = [
1797             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1798         ]
1799         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1800             age_limit = 18
1801
1802         # video uploader is domain name
1803         video_uploader = self._search_regex(
1804             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1805
1806         video_description = self._og_search_description(webpage, default=None)
1807         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
1808
1809         # Helper method
1810         def _playlist_from_matches(matches, getter=None, ie=None):
1811             urlrs = orderedSet(
1812                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1813                 for m in matches)
1814             return self.playlist_result(
1815                 urlrs, playlist_id=video_id, playlist_title=video_title)
1816
1817         # Look for Brightcove Legacy Studio embeds
1818         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1819         if bc_urls:
1820             self.to_screen('Brightcove video detected.')
1821             entries = [{
1822                 '_type': 'url',
1823                 'url': smuggle_url(bc_url, {'Referer': url}),
1824                 'ie_key': 'BrightcoveLegacy'
1825             } for bc_url in bc_urls]
1826
1827             return {
1828                 '_type': 'playlist',
1829                 'title': video_title,
1830                 'id': video_id,
1831                 'entries': entries,
1832             }
1833
1834         # Look for Brightcove New Studio embeds
1835         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1836         if bc_urls:
1837             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1838
1839         # Look for ThePlatform embeds
1840         tp_urls = ThePlatformIE._extract_urls(webpage)
1841         if tp_urls:
1842             return _playlist_from_matches(tp_urls, ie='ThePlatform')
1843
1844         # Look for Vessel embeds
1845         vessel_urls = VesselIE._extract_urls(webpage)
1846         if vessel_urls:
1847             return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
1848
1849         # Look for embedded rtl.nl player
1850         matches = re.findall(
1851             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1852             webpage)
1853         if matches:
1854             return _playlist_from_matches(matches, ie='RtlNl')
1855
1856         vimeo_urls = VimeoIE._extract_urls(url, webpage)
1857         if vimeo_urls:
1858             return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
1859
1860         vid_me_embed_url = self._search_regex(
1861             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1862             webpage, 'vid.me embed', default=None)
1863         if vid_me_embed_url is not None:
1864             return self.url_result(vid_me_embed_url, 'Vidme')
1865
1866         # Look for embedded YouTube player
1867         matches = re.findall(r'''(?x)
1868             (?:
1869                 <iframe[^>]+?src=|
1870                 data-video-url=|
1871                 <embed[^>]+?src=|
1872                 embedSWF\(?:\s*|
1873                 new\s+SWFObject\(
1874             )
1875             (["\'])
1876                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1877                 (?:embed|v|p)/.+?)
1878             \1''', webpage)
1879         if matches:
1880             return _playlist_from_matches(
1881                 matches, lambda m: unescapeHTML(m[1]))
1882
1883         # Look for lazyYT YouTube embed
1884         matches = re.findall(
1885             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1886         if matches:
1887             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1888
1889         # Look for Wordpress "YouTube Video Importer" plugin
1890         matches = re.findall(r'''(?x)<div[^>]+
1891             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1892             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1893         if matches:
1894             return _playlist_from_matches(matches, lambda m: m[-1])
1895
1896         matches = DailymotionIE._extract_urls(webpage)
1897         if matches:
1898             return _playlist_from_matches(matches)
1899
1900         # Look for embedded Dailymotion playlist player (#3822)
1901         m = re.search(
1902             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1903         if m:
1904             playlists = re.findall(
1905                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1906             if playlists:
1907                 return _playlist_from_matches(
1908                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1909
1910         # Look for embedded Wistia player
1911         match = re.search(
1912             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1913         if match:
1914             embed_url = self._proto_relative_url(
1915                 unescapeHTML(match.group('url')))
1916             return {
1917                 '_type': 'url_transparent',
1918                 'url': embed_url,
1919                 'ie_key': 'Wistia',
1920                 'uploader': video_uploader,
1921             }
1922
1923         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1924         if match:
1925             return {
1926                 '_type': 'url_transparent',
1927                 'url': 'wistia:%s' % match.group('id'),
1928                 'ie_key': 'Wistia',
1929                 'uploader': video_uploader,
1930             }
1931
1932         match = re.search(
1933             r'''(?sx)
1934                 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
1935                 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
1936             ''', webpage)
1937         if match:
1938             return self.url_result(self._proto_relative_url(
1939                 'wistia:%s' % match.group('id')), 'Wistia')
1940
1941         # Look for SVT player
1942         svt_url = SVTIE._extract_url(webpage)
1943         if svt_url:
1944             return self.url_result(svt_url, 'SVT')
1945
1946         # Look for embedded condenast player
1947         matches = re.findall(
1948             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1949             webpage)
1950         if matches:
1951             return {
1952                 '_type': 'playlist',
1953                 'entries': [{
1954                     '_type': 'url',
1955                     'ie_key': 'CondeNast',
1956                     'url': ma,
1957                 } for ma in matches],
1958                 'title': video_title,
1959                 'id': video_id,
1960             }
1961
1962         # Look for Bandcamp pages with custom domain
1963         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1964         if mobj is not None:
1965             burl = unescapeHTML(mobj.group(1))
1966             # Don't set the extractor because it can be a track url or an album
1967             return self.url_result(burl)
1968
1969         # Look for embedded Vevo player
1970         mobj = re.search(
1971             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1972         if mobj is not None:
1973             return self.url_result(mobj.group('url'))
1974
1975         # Look for embedded Viddler player
1976         mobj = re.search(
1977             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1978             webpage)
1979         if mobj is not None:
1980             return self.url_result(mobj.group('url'))
1981
1982         # Look for NYTimes player
1983         mobj = re.search(
1984             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1985             webpage)
1986         if mobj is not None:
1987             return self.url_result(mobj.group('url'))
1988
1989         # Look for Libsyn player
1990         mobj = re.search(
1991             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1992         if mobj is not None:
1993             return self.url_result(mobj.group('url'))
1994
1995         # Look for Ooyala videos
1996         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1997                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1998                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1999                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
2000         if mobj is not None:
2001             embed_token = self._search_regex(
2002                 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
2003                 webpage, 'ooyala embed token', default=None)
2004             return OoyalaIE._build_url_result(smuggle_url(
2005                 mobj.group('ec'), {
2006                     'domain': url,
2007                     'embed_token': embed_token,
2008                 }))
2009
2010         # Look for multiple Ooyala embeds on SBN network websites
2011         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
2012         if mobj is not None:
2013             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
2014             if embeds:
2015                 return _playlist_from_matches(
2016                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
2017
2018         # Look for Aparat videos
2019         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
2020         if mobj is not None:
2021             return self.url_result(mobj.group(1), 'Aparat')
2022
2023         # Look for MPORA videos
2024         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
2025         if mobj is not None:
2026             return self.url_result(mobj.group(1), 'Mpora')
2027
2028         # Look for embedded NovaMov-based player
2029         mobj = re.search(
2030             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
2031                     (?P<url>http://(?:(?:embed|www)\.)?
2032                         (?:novamov\.com|
2033                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
2034                            videoweed\.(?:es|com)|
2035                            movshare\.(?:net|sx|ag)|
2036                            divxstage\.(?:eu|net|ch|co|at|ag))
2037                         /embed\.php.+?)\1''', webpage)
2038         if mobj is not None:
2039             return self.url_result(mobj.group('url'))
2040
2041         # Look for embedded Facebook player
2042         facebook_url = FacebookIE._extract_url(webpage)
2043         if facebook_url is not None:
2044             return self.url_result(facebook_url, 'Facebook')
2045
2046         # Look for embedded VK player
2047         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
2048         if mobj is not None:
2049             return self.url_result(mobj.group('url'), 'VK')
2050
2051         # Look for embedded Odnoklassniki player
2052         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
2053         if mobj is not None:
2054             return self.url_result(mobj.group('url'), 'Odnoklassniki')
2055
2056         # Look for embedded ivi player
2057         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
2058         if mobj is not None:
2059             return self.url_result(mobj.group('url'), 'Ivi')
2060
2061         # Look for embedded Huffington Post player
2062         mobj = re.search(
2063             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
2064         if mobj is not None:
2065             return self.url_result(mobj.group('url'), 'HuffPost')
2066
2067         # Look for embed.ly
2068         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
2069         if mobj is not None:
2070             return self.url_result(mobj.group('url'))
2071         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
2072         if mobj is not None:
2073             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
2074
2075         # Look for funnyordie embed
2076         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
2077         if matches:
2078             return _playlist_from_matches(
2079                 matches, getter=unescapeHTML, ie='FunnyOrDie')
2080
2081         # Look for BBC iPlayer embed
2082         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
2083         if matches:
2084             return _playlist_from_matches(matches, ie='BBCCoUk')
2085
2086         # Look for embedded RUTV player
2087         rutv_url = RUTVIE._extract_url(webpage)
2088         if rutv_url:
2089             return self.url_result(rutv_url, 'RUTV')
2090
2091         # Look for embedded TVC player
2092         tvc_url = TVCIE._extract_url(webpage)
2093         if tvc_url:
2094             return self.url_result(tvc_url, 'TVC')
2095
2096         # Look for embedded SportBox player
2097         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
2098         if sportbox_urls:
2099             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
2100
2101         # Look for embedded XHamster player
2102         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
2103         if xhamster_urls:
2104             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
2105
2106         # Look for embedded TNAFlixNetwork player
2107         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
2108         if tnaflix_urls:
2109             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
2110
2111         # Look for embedded PornHub player
2112         pornhub_urls = PornHubIE._extract_urls(webpage)
2113         if pornhub_urls:
2114             return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
2115
2116         # Look for embedded DrTuber player
2117         drtuber_urls = DrTuberIE._extract_urls(webpage)
2118         if drtuber_urls:
2119             return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
2120
2121         # Look for embedded RedTube player
2122         redtube_urls = RedTubeIE._extract_urls(webpage)
2123         if redtube_urls:
2124             return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
2125
2126         # Look for embedded Tvigle player
2127         mobj = re.search(
2128             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
2129         if mobj is not None:
2130             return self.url_result(mobj.group('url'), 'Tvigle')
2131
2132         # Look for embedded TED player
2133         mobj = re.search(
2134             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
2135         if mobj is not None:
2136             return self.url_result(mobj.group('url'), 'TED')
2137
2138         # Look for embedded Ustream videos
2139         ustream_url = UstreamIE._extract_url(webpage)
2140         if ustream_url:
2141             return self.url_result(ustream_url, UstreamIE.ie_key())
2142
2143         # Look for embedded arte.tv player
2144         mobj = re.search(
2145             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
2146             webpage)
2147         if mobj is not None:
2148             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
2149
2150         # Look for embedded francetv player
2151         mobj = re.search(
2152             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
2153             webpage)
2154         if mobj is not None:
2155             return self.url_result(mobj.group('url'))
2156
2157         # Look for embedded smotri.com player
2158         smotri_url = SmotriIE._extract_url(webpage)
2159         if smotri_url:
2160             return self.url_result(smotri_url, 'Smotri')
2161
2162         # Look for embedded Myvi.ru player
2163         myvi_url = MyviIE._extract_url(webpage)
2164         if myvi_url:
2165             return self.url_result(myvi_url)
2166
2167         # Look for embedded soundcloud player
2168         soundcloud_urls = SoundcloudIE._extract_urls(webpage)
2169         if soundcloud_urls:
2170             return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
2171
2172         # Look for tunein player
2173         tunein_urls = TuneInBaseIE._extract_urls(webpage)
2174         if tunein_urls:
2175             return _playlist_from_matches(tunein_urls)
2176
2177         # Look for embedded mtvservices player
2178         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
2179         if mtvservices_url:
2180             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
2181
2182         # Look for embedded yahoo player
2183         mobj = re.search(
2184             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
2185             webpage)
2186         if mobj is not None:
2187             return self.url_result(mobj.group('url'), 'Yahoo')
2188
2189         # Look for embedded sbs.com.au player
2190         mobj = re.search(
2191             r'''(?x)
2192             (?:
2193                 <meta\s+property="og:video"\s+content=|
2194                 <iframe[^>]+?src=
2195             )
2196             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2197             webpage)
2198         if mobj is not None:
2199             return self.url_result(mobj.group('url'), 'SBS')
2200
2201         # Look for embedded Cinchcast player
2202         mobj = re.search(
2203             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
2204             webpage)
2205         if mobj is not None:
2206             return self.url_result(mobj.group('url'), 'Cinchcast')
2207
2208         mobj = re.search(
2209             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
2210             webpage)
2211         if not mobj:
2212             mobj = re.search(
2213                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
2214                 webpage)
2215         if mobj is not None:
2216             return self.url_result(mobj.group('url'), 'MLB')
2217
2218         mobj = re.search(
2219             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
2220             webpage)
2221         if mobj is not None:
2222             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
2223
2224         mobj = re.search(
2225             r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
2226             webpage)
2227         if mobj is not None:
2228             return self.url_result(mobj.group('url'), 'Livestream')
2229
2230         # Look for Zapiks embed
2231         mobj = re.search(
2232             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
2233         if mobj is not None:
2234             return self.url_result(mobj.group('url'), 'Zapiks')
2235
2236         # Look for Kaltura embeds
2237         kaltura_url = KalturaIE._extract_url(webpage)
2238         if kaltura_url:
2239             return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
2240
2241         # Look for Eagle.Platform embeds
2242         eagleplatform_url = EaglePlatformIE._extract_url(webpage)
2243         if eagleplatform_url:
2244             return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
2245
2246         # Look for ClipYou (uses Eagle.Platform) embeds
2247         mobj = re.search(
2248             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
2249         if mobj is not None:
2250             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
2251
2252         # Look for Pladform embeds
2253         pladform_url = PladformIE._extract_url(webpage)
2254         if pladform_url:
2255             return self.url_result(pladform_url)
2256
2257         # Look for Videomore embeds
2258         videomore_url = VideomoreIE._extract_url(webpage)
2259         if videomore_url:
2260             return self.url_result(videomore_url)
2261
2262         # Look for Webcaster embeds
2263         webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
2264         if webcaster_url:
2265             return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
2266
2267         # Look for Playwire embeds
2268         mobj = re.search(
2269             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
2270         if mobj is not None:
2271             return self.url_result(mobj.group('url'))
2272
2273         # Look for 5min embeds
2274         mobj = re.search(
2275             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
2276         if mobj is not None:
2277             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
2278
2279         # Look for Crooks and Liars embeds
2280         mobj = re.search(
2281             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
2282         if mobj is not None:
2283             return self.url_result(mobj.group('url'))
2284
2285         # Look for NBC Sports VPlayer embeds
2286         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
2287         if nbc_sports_url:
2288             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
2289
2290         # Look for NBC News embeds
2291         nbc_news_embed_url = re.search(
2292             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
2293         if nbc_news_embed_url:
2294             return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
2295
2296         # Look for Google Drive embeds
2297         google_drive_url = GoogleDriveIE._extract_url(webpage)
2298         if google_drive_url:
2299             return self.url_result(google_drive_url, 'GoogleDrive')
2300
2301         # Look for UDN embeds
2302         mobj = re.search(
2303             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
2304         if mobj is not None:
2305             return self.url_result(
2306                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
2307
2308         # Look for Senate ISVP iframe
2309         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2310         if senate_isvp_url:
2311             return self.url_result(senate_isvp_url, 'SenateISVP')
2312
2313         # Look for Dailymotion Cloud videos
2314         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
2315         if dmcloud_url:
2316             return self.url_result(dmcloud_url, 'DailymotionCloud')
2317
2318         # Look for OnionStudios embeds
2319         onionstudios_url = OnionStudiosIE._extract_url(webpage)
2320         if onionstudios_url:
2321             return self.url_result(onionstudios_url)
2322
2323         # Look for ViewLift embeds
2324         viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2325         if viewlift_url:
2326             return self.url_result(viewlift_url)
2327
2328         # Look for JWPlatform embeds
2329         jwplatform_url = JWPlatformIE._extract_url(webpage)
2330         if jwplatform_url:
2331             return self.url_result(jwplatform_url, 'JWPlatform')
2332
2333         # Look for Digiteka embeds
2334         digiteka_url = DigitekaIE._extract_url(webpage)
2335         if digiteka_url:
2336             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2337
2338         # Look for Arkena embeds
2339         arkena_url = ArkenaIE._extract_url(webpage)
2340         if arkena_url:
2341             return self.url_result(arkena_url, ArkenaIE.ie_key())
2342
2343         # Look for Piksel embeds
2344         piksel_url = PikselIE._extract_url(webpage)
2345         if piksel_url:
2346             return self.url_result(piksel_url, PikselIE.ie_key())
2347
2348         # Look for Limelight embeds
2349         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
2350         if mobj:
2351             lm = {
2352                 'Media': 'media',
2353                 'Channel': 'channel',
2354                 'ChannelList': 'channel_list',
2355             }
2356             return self.url_result(smuggle_url('limelight:%s:%s' % (
2357                 lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
2358                 'Limelight%s' % mobj.group(1), mobj.group(2))
2359
2360         mobj = re.search(
2361             r'''(?sx)
2362                 <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
2363                     <param[^>]+
2364                         name=(["\'])flashVars\2[^>]+
2365                         value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
2366             ''', webpage)
2367         if mobj:
2368             return self.url_result(smuggle_url(
2369                 'limelight:media:%s' % mobj.group('id'),
2370                 {'source_url': url}), 'LimelightMedia', mobj.group('id'))
2371
2372         # Look for AdobeTVVideo embeds
2373         mobj = re.search(
2374             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2375             webpage)
2376         if mobj is not None:
2377             return self.url_result(
2378                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2379                 'AdobeTVVideo')
2380
2381         # Look for Vine embeds
2382         mobj = re.search(
2383             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2384             webpage)
2385         if mobj is not None:
2386             return self.url_result(
2387                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2388
2389         # Look for VODPlatform embeds
2390         mobj = re.search(
2391             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
2392             webpage)
2393         if mobj is not None:
2394             return self.url_result(
2395                 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
2396
2397         # Look for Mangomolo embeds
2398         mobj = re.search(
2399             r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
2400                 (?:
2401                     video\?.*?\bid=(?P<video_id>\d+)|
2402                     index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
2403                 ).+?)\1''', webpage)
2404         if mobj is not None:
2405             info = {
2406                 '_type': 'url_transparent',
2407                 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
2408                 'title': video_title,
2409                 'description': video_description,
2410                 'thumbnail': video_thumbnail,
2411                 'uploader': video_uploader,
2412             }
2413             video_id = mobj.group('video_id')
2414             if video_id:
2415                 info.update({
2416                     'ie_key': 'MangomoloVideo',
2417                     'id': video_id,
2418                 })
2419             else:
2420                 info.update({
2421                     'ie_key': 'MangomoloLive',
2422                     'id': mobj.group('channel_id'),
2423                 })
2424             return info
2425
2426         # Look for Instagram embeds
2427         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2428         if instagram_embed_url is not None:
2429             return self.url_result(
2430                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2431
2432         # Look for LiveLeak embeds
2433         liveleak_url = LiveLeakIE._extract_url(webpage)
2434         if liveleak_url:
2435             return self.url_result(liveleak_url, 'LiveLeak')
2436
2437         # Look for 3Q SDN embeds
2438         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2439         if threeqsdn_url:
2440             return {
2441                 '_type': 'url_transparent',
2442                 'ie_key': ThreeQSDNIE.ie_key(),
2443                 'url': self._proto_relative_url(threeqsdn_url),
2444                 'title': video_title,
2445                 'description': video_description,
2446                 'thumbnail': video_thumbnail,
2447                 'uploader': video_uploader,
2448             }
2449
2450         # Look for VBOX7 embeds
2451         vbox7_url = Vbox7IE._extract_url(webpage)
2452         if vbox7_url:
2453             return self.url_result(vbox7_url, Vbox7IE.ie_key())
2454
2455         # Look for DBTV embeds
2456         dbtv_urls = DBTVIE._extract_urls(webpage)
2457         if dbtv_urls:
2458             return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
2459
2460         # Look for Videa embeds
2461         videa_urls = VideaIE._extract_urls(webpage)
2462         if videa_urls:
2463             return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
2464
2465         # Look for 20 minuten embeds
2466         twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
2467         if twentymin_urls:
2468             return _playlist_from_matches(
2469                 twentymin_urls, ie=TwentyMinutenIE.ie_key())
2470
2471         # Look for Openload embeds
2472         openload_urls = OpenloadIE._extract_urls(webpage)
2473         if openload_urls:
2474             return _playlist_from_matches(
2475                 openload_urls, ie=OpenloadIE.ie_key())
2476
2477         # Look for VideoPress embeds
2478         videopress_urls = VideoPressIE._extract_urls(webpage)
2479         if videopress_urls:
2480             return _playlist_from_matches(
2481                 videopress_urls, ie=VideoPressIE.ie_key())
2482
2483         # Looking for http://schema.org/VideoObject
2484         json_ld = self._search_json_ld(
2485             webpage, video_id, default={}, expected_type='VideoObject')
2486         if json_ld.get('url'):
2487             info_dict.update({
2488                 'title': video_title or info_dict['title'],
2489                 'description': video_description,
2490                 'thumbnail': video_thumbnail,
2491                 'age_limit': age_limit
2492             })
2493             info_dict.update(json_ld)
2494             return info_dict
2495
2496         # Look for HTML5 media
2497         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
2498         if entries:
2499             for entry in entries:
2500                 entry.update({
2501                     'id': video_id,
2502                     'title': video_title,
2503                 })
2504                 self._sort_formats(entry['formats'])
2505             return self.playlist_result(entries)
2506
2507         jwplayer_data_str = self._find_jwplayer_data(webpage)
2508         if jwplayer_data_str:
2509             try:
2510                 jwplayer_data = self._parse_json(
2511                     jwplayer_data_str, video_id, transform_source=js_to_json)
2512                 return self._parse_jwplayer_data(jwplayer_data, video_id)
2513             except ExtractorError:
2514                 pass
2515
2516         def check_video(vurl):
2517             if YoutubeIE.suitable(vurl):
2518                 return True
2519             if RtmpIE.suitable(vurl):
2520                 return True
2521             vpath = compat_urlparse.urlparse(vurl).path
2522             vext = determine_ext(vpath)
2523             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
2524
2525         def filter_video(urls):
2526             return list(filter(check_video, urls))
2527
2528         # Start with something easy: JW Player in SWFObject
2529         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
2530         if not found:
2531             # Look for gorilla-vid style embedding
2532             found = filter_video(re.findall(r'''(?sx)
2533                 (?:
2534                     jw_plugins|
2535                     JWPlayerOptions|
2536                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2537                 )
2538                 .*?
2539                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
2540         if not found:
2541             # Broaden the search a little bit
2542             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
2543         if not found:
2544             # Broaden the findall a little bit: JWPlayer JS loader
2545             found = filter_video(re.findall(
2546                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
2547         if not found:
2548             # Flow player
2549             found = filter_video(re.findall(r'''(?xs)
2550                 flowplayer\("[^"]+",\s*
2551                     \{[^}]+?\}\s*,
2552                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
2553                         ["']?url["']?\s*:\s*["']([^"']+)["']
2554             ''', webpage))
2555         if not found:
2556             # Cinerama player
2557             found = re.findall(
2558                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
2559         if not found:
2560             # Try to find twitter cards info
2561             # twitter:player:stream should be checked before twitter:player since
2562             # it is expected to contain a raw stream (see
2563             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2564             found = filter_video(re.findall(
2565                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
2566         if not found:
2567             # We look for Open Graph info:
2568             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
2569             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
2570             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2571             if m_video_type is not None:
2572                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
2573         if not found:
2574             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
2575             found = re.search(
2576                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2577                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2578                 webpage)
2579             if not found:
2580                 # Look also in Refresh HTTP header
2581                 refresh_header = head_response.headers.get('Refresh')
2582                 if refresh_header:
2583                     # In python 2 response HTTP headers are bytestrings
2584                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2585                         refresh_header = refresh_header.decode('iso-8859-1')
2586                     found = re.search(REDIRECT_REGEX, refresh_header)
2587             if found:
2588                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2589                 self.report_following_redirect(new_url)
2590                 return {
2591                     '_type': 'url',
2592                     'url': new_url,
2593                 }
2594
2595         if not found:
2596             # twitter:player is a https URL to iframe player that may or may not
2597             # be supported by youtube-dl thus this is checked the very last (see
2598             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2599             embed_url = self._html_search_meta('twitter:player', webpage, default=None)
2600             if embed_url:
2601                 return self.url_result(embed_url)
2602
2603         if not found:
2604             raise UnsupportedError(url)
2605
2606         entries = []
2607         for video_url in orderedSet(found):
2608             video_url = unescapeHTML(video_url)
2609             video_url = video_url.replace('\\/', '/')
2610             video_url = compat_urlparse.urljoin(url, video_url)
2611             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2612
2613             # Sometimes, jwplayer extraction will result in a YouTube URL
2614             if YoutubeIE.suitable(video_url):
2615                 entries.append(self.url_result(video_url, 'Youtube'))
2616                 continue
2617
2618             # here's a fun little line of code for you:
2619             video_id = os.path.splitext(video_id)[0]
2620
2621             entry_info_dict = {
2622                 'id': video_id,
2623                 'uploader': video_uploader,
2624                 'title': video_title,
2625                 'age_limit': age_limit,
2626             }
2627
2628             if RtmpIE.suitable(video_url):
2629                 entry_info_dict.update({
2630                     '_type': 'url_transparent',
2631                     'ie_key': RtmpIE.ie_key(),
2632                     'url': video_url,
2633                 })
2634                 entries.append(entry_info_dict)
2635                 continue
2636
2637             ext = determine_ext(video_url)
2638             if ext == 'smil':
2639                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2640             elif ext == 'xspf':
2641                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2642             elif ext == 'm3u8':
2643                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2644             elif ext == 'mpd':
2645                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2646             elif ext == 'f4m':
2647                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2648             elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
2649                 # Just matching .ism/manifest is not enough to be reliably sure
2650                 # whether it's actually an ISM manifest or some other streaming
2651                 # manifest since there are various streaming URL formats
2652                 # possible (see [1]) as well as some other shenanigans like
2653                 # .smil/manifest URLs that actually serve an ISM (see [2]) and
2654                 # so on.
2655                 # Thus the most reasonable way to solve this is to delegate
2656                 # to generic extractor in order to look into the contents of
2657                 # the manifest itself.
2658                 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
2659                 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
2660                 entry_info_dict = self.url_result(
2661                     smuggle_url(video_url, {'to_generic': True}),
2662                     GenericIE.ie_key())
2663             else:
2664                 entry_info_dict['url'] = video_url
2665
2666             if entry_info_dict.get('formats'):
2667                 self._sort_formats(entry_info_dict['formats'])
2668
2669             entries.append(entry_info_dict)
2670
2671         if len(entries) == 1:
2672             return entries[0]
2673         else:
2674             for num, e in enumerate(entries, start=1):
2675                 # 'url' results don't have a title
2676                 if e.get('title') is not None:
2677                     e['title'] = '%s (%d)' % (e['title'], num)
2678             return {
2679                 '_type': 'playlist',
2680                 'entries': entries,
2681             }