_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7 import sys
   8
   9 from .common import InfoExtractor
  10 from .youtube import YoutubeIE
  11 from ..compat import (
  12     compat_etree_fromstring,
  13     compat_urllib_parse_unquote,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     js_to_json,
  24     orderedSet,
  25     sanitized_Request,
  26     smuggle_url,
  27     unescapeHTML,
  28     unified_strdate,
  29     unsmuggle_url,
  30     UnsupportedError,
  31     xpath_text,
  32 )
  33 from .commonprotocols import RtmpIE
  34 from .brightcove import (
  35     BrightcoveLegacyIE,
  36     BrightcoveNewIE,
  37 )
  38 from .nbc import NBCSportsVPlayerIE
  39 from .ooyala import OoyalaIE
  40 from .rutv import RUTVIE
  41 from .tvc import TVCIE
  42 from .sportbox import SportBoxEmbedIE
  43 from .smotri import SmotriIE
  44 from .myvi import MyviIE
  45 from .condenast import CondeNastIE
  46 from .udn import UDNEmbedIE
  47 from .senateisvp import SenateISVPIE
  48 from .svt import SVTIE
  49 from .pornhub import PornHubIE
  50 from .xhamster import XHamsterEmbedIE
  51 from .tnaflix import TNAFlixNetworkEmbedIE
  52 from .drtuber import DrTuberIE
  53 from .redtube import RedTubeIE
  54 from .vimeo import VimeoIE
  55 from .dailymotion import (
  56     DailymotionIE,
  57     DailymotionCloudIE,
  58 )
  59 from .onionstudios import OnionStudiosIE
  60 from .viewlift import ViewLiftEmbedIE
  61 from .mtv import MTVServicesEmbeddedIE
  62 from .pladform import PladformIE
  63 from .videomore import VideomoreIE
  64 from .webcaster import WebcasterFeedIE
  65 from .googledrive import GoogleDriveIE
  66 from .jwplatform import JWPlatformIE
  67 from .digiteka import DigitekaIE
  68 from .arkena import ArkenaIE
  69 from .instagram import InstagramIE
  70 from .liveleak import LiveLeakIE
  71 from .threeqsdn import ThreeQSDNIE
  72 from .theplatform import ThePlatformIE
  73 from .vessel import VesselIE
  74 from .kaltura import KalturaIE
  75 from .eagleplatform import EaglePlatformIE
  76 from .facebook import FacebookIE
  77 from .soundcloud import SoundcloudIE
  78 from .tunein import TuneInBaseIE
  79 from .vbox7 import Vbox7IE
  80 from .dbtv import DBTVIE
  81 from .piksel import PikselIE
  82 from .videa import VideaIE
  83 from .twentymin import TwentyMinutenIE
  84 from .ustream import UstreamIE
  85 from .openload import OpenloadIE
  86 from .videopress import VideoPressIE
  87
  88
  89 class GenericIE(InfoExtractor):
  90     IE_DESC = 'Generic downloader that works on some sites'
  91     _VALID_URL = r'.*'
  92     IE_NAME = 'generic'
  93     _TESTS = [
  94         # Direct link to a video
  95         {
  96             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  97             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  98             'info_dict': {
  99                 'id': 'trailer',
 100                 'ext': 'mp4',
 101                 'title': 'trailer',
 102                 'upload_date': '20100513',
 103             }
 104         },
 105         # Direct link to media delivered compressed (until Accept-Encoding is *)
 106         {
 107             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
 108             'md5': '128c42e68b13950268b648275386fc74',
 109             'info_dict': {
 110                 'id': 'FictionJunction-Parallel_Hearts',
 111                 'ext': 'flac',
 112                 'title': 'FictionJunction-Parallel_Hearts',
 113                 'upload_date': '20140522',
 114             },
 115             'expected_warnings': [
 116                 'URL could be a direct video link, returning it as such.'
 117             ],
 118             'skip': 'URL invalid',
 119         },
 120         # Direct download with broken HEAD
 121         {
 122             'url': 'http://ai-radio.org:8000/radio.opus',
 123             'info_dict': {
 124                 'id': 'radio',
 125                 'ext': 'opus',
 126                 'title': 'radio',
 127             },
 128             'params': {
 129                 'skip_download': True,  # infinite live stream
 130             },
 131             'expected_warnings': [
 132                 r'501.*Not Implemented',
 133                 r'400.*Bad Request',
 134             ],
 135         },
 136         # Direct link with incorrect MIME type
 137         {
 138             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 139             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 140             'info_dict': {
 141                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 142                 'id': '5_Lennart_Poettering_-_Systemd',
 143                 'ext': 'webm',
 144                 'title': '5_Lennart_Poettering_-_Systemd',
 145                 'upload_date': '20141120',
 146             },
 147             'expected_warnings': [
 148                 'URL could be a direct video link, returning it as such.'
 149             ]
 150         },
 151         # RSS feed
 152         {
 153             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 154             'info_dict': {
 155                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 156                 'title': 'Zero Punctuation',
 157                 'description': 're:.*groundbreaking video review series.*'
 158             },
 159             'playlist_mincount': 11,
 160         },
 161         # RSS feed with enclosure
 162         {
 163             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 164             'info_dict': {
 165                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 166                 'ext': 'm4v',
 167                 'upload_date': '20150228',
 168                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 169             }
 170         },
 171         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 172         {
 173             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 174             'info_dict': {
 175                 'id': 'smil',
 176                 'ext': 'mp4',
 177                 'title': 'Automatics, robotics and biocybernetics',
 178                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
 179                 'upload_date': '20130627',
 180                 'formats': 'mincount:16',
 181                 'subtitles': 'mincount:1',
 182             },
 183             'params': {
 184                 'force_generic_extractor': True,
 185                 'skip_download': True,
 186             },
 187         },
 188         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 189         {
 190             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 191             'info_dict': {
 192                 'id': 'hds',
 193                 'ext': 'flv',
 194                 'title': 'hds',
 195                 'formats': 'mincount:1',
 196             },
 197             'params': {
 198                 'skip_download': True,
 199             },
 200         },
 201         # SMIL from https://www.restudy.dk/video/play/id/1637
 202         {
 203             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 204             'info_dict': {
 205                 'id': 'video_1637',
 206                 'ext': 'flv',
 207                 'title': 'video_1637',
 208                 'formats': 'mincount:3',
 209             },
 210             'params': {
 211                 'skip_download': True,
 212             },
 213         },
 214         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 215         {
 216             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 217             'info_dict': {
 218                 'id': 'smil-service',
 219                 'ext': 'flv',
 220                 'title': 'smil-service',
 221                 'formats': 'mincount:1',
 222             },
 223             'params': {
 224                 'skip_download': True,
 225             },
 226         },
 227         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 228         {
 229             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 230             'info_dict': {
 231                 'id': '4719370',
 232                 'ext': 'mp4',
 233                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 234                 'formats': 'mincount:3',
 235             },
 236             'params': {
 237                 'skip_download': True,
 238             },
 239         },
 240         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 241         {
 242             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 243             'info_dict': {
 244                 'id': 'mZlp2ctYIUEB',
 245                 'ext': 'mp4',
 246                 'title': 'Tikibad ontruimd wegens brand',
 247                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 248                 'thumbnail': r're:^https?://.*\.jpg$',
 249                 'duration': 33,
 250             },
 251             'params': {
 252                 'skip_download': True,
 253             },
 254         },
 255         # MPD from http://dash-mse-test.appspot.com/media.html
 256         {
 257             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
 258             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
 259             'info_dict': {
 260                 'id': 'car-20120827-manifest',
 261                 'ext': 'mp4',
 262                 'title': 'car-20120827-manifest',
 263                 'formats': 'mincount:9',
 264                 'upload_date': '20130904',
 265             },
 266             'params': {
 267                 'format': 'bestvideo',
 268             },
 269         },
 270         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
 271         {
 272             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
 273             'info_dict': {
 274                 'id': 'content',
 275                 'ext': 'mp4',
 276                 'title': 'content',
 277                 'formats': 'mincount:8',
 278             },
 279             'params': {
 280                 # m3u8 downloads
 281                 'skip_download': True,
 282             },
 283             'skip': 'video gone',
 284         },
 285         # m3u8 served with Content-Type: text/plain
 286         {
 287             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
 288             'info_dict': {
 289                 'id': 'index',
 290                 'ext': 'mp4',
 291                 'title': 'index',
 292                 'upload_date': '20140720',
 293                 'formats': 'mincount:11',
 294             },
 295             'params': {
 296                 # m3u8 downloads
 297                 'skip_download': True,
 298             },
 299             'skip': 'video gone',
 300         },
 301         # google redirect
 302         {
 303             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 304             'info_dict': {
 305                 'id': 'cmQHVoWB5FY',
 306                 'ext': 'mp4',
 307                 'upload_date': '20130224',
 308                 'uploader_id': 'TheVerge',
 309                 'description': r're:^Chris Ziegler takes a look at the\.*',
 310                 'uploader': 'The Verge',
 311                 'title': 'First Firefox OS phones side-by-side',
 312             },
 313             'params': {
 314                 'skip_download': False,
 315             }
 316         },
 317         {
 318             # redirect in Refresh HTTP header
 319             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
 320             'info_dict': {
 321                 'id': 'pO8h3EaFRdo',
 322                 'ext': 'mp4',
 323                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 324                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
 325                 'upload_date': '20150917',
 326                 'uploader_id': 'brtvofficial',
 327                 'uploader': 'Boiler Room',
 328             },
 329             'params': {
 330                 'skip_download': False,
 331             },
 332         },
 333         {
 334             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 335             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 336             'info_dict': {
 337                 'id': '13601338388002',
 338                 'ext': 'mp4',
 339                 'uploader': 'www.hodiho.fr',
 340                 'title': 'R\u00e9gis plante sa Jeep',
 341             }
 342         },
 343         # bandcamp page with custom domain
 344         {
 345             'add_ie': ['Bandcamp'],
 346             'url': 'http://bronyrock.com/track/the-pony-mash',
 347             'info_dict': {
 348                 'id': '3235767654',
 349                 'ext': 'mp3',
 350                 'title': 'The Pony Mash',
 351                 'uploader': 'M_Pallante',
 352             },
 353             'skip': 'There is a limit of 200 free downloads / month for the test song',
 354         },
 355         {
 356             # embedded brightcove video
 357             # it also tests brightcove videos that need to set the 'Referer'
 358             # in the http requests
 359             'add_ie': ['BrightcoveLegacy'],
 360             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 361             'info_dict': {
 362                 'id': '2765128793001',
 363                 'ext': 'mp4',
 364                 'title': 'Le cours de bourse : l’analyse technique',
 365                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 366                 'uploader': 'BFM BUSINESS',
 367             },
 368             'params': {
 369                 'skip_download': True,
 370             },
 371         },
 372         {
 373             # embedded with itemprop embedURL and video id spelled as `idVideo`
 374             'add_id': ['BrightcoveLegacy'],
 375             'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
 376             'info_dict': {
 377                 'id': '5255628253001',
 378                 'ext': 'mp4',
 379                 'title': 'md5:37c519b1128915607601e75a87995fc0',
 380                 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
 381                 'uploader': 'BFM BUSINESS',
 382                 'uploader_id': '876450612001',
 383                 'timestamp': 1482255315,
 384                 'upload_date': '20161220',
 385             },
 386             'params': {
 387                 'skip_download': True,
 388             },
 389         },
 390         {
 391             # https://github.com/rg3/youtube-dl/issues/2253
 392             'url': 'http://bcove.me/i6nfkrc3',
 393             'md5': '0ba9446db037002366bab3b3eb30c88c',
 394             'info_dict': {
 395                 'id': '3101154703001',
 396                 'ext': 'mp4',
 397                 'title': 'Still no power',
 398                 'uploader': 'thestar.com',
 399                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 400             },
 401             'add_ie': ['BrightcoveLegacy'],
 402             'skip': 'video gone',
 403         },
 404         {
 405             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 406             'md5': 'fb973ecf6e4a78a67453647444222983',
 407             'info_dict': {
 408                 'id': '3414141473001',
 409                 'ext': 'mp4',
 410                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 411                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 412                 'uploader': 'Championat',
 413             },
 414         },
 415         {
 416             # https://github.com/rg3/youtube-dl/issues/3541
 417             'add_ie': ['BrightcoveLegacy'],
 418             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 419             'info_dict': {
 420                 'id': '3866516442001',
 421                 'ext': 'mp4',
 422                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 423                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 424                 'uploader': 'SBS Broadcasting',
 425             },
 426             'skip': 'Restricted to Netherlands',
 427             'params': {
 428                 'skip_download': True,  # m3u8 download
 429             },
 430         },
 431         {
 432             # Brightcove with alternative playerID key
 433             'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
 434             'info_dict': {
 435                 'id': 'nmeth.2062_SV1',
 436                 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
 437             },
 438             'playlist': [{
 439                 'info_dict': {
 440                     'id': '2228375078001',
 441                     'ext': 'mp4',
 442                     'title': 'nmeth.2062-sv1',
 443                     'description': 'nmeth.2062-sv1',
 444                     'timestamp': 1363357591,
 445                     'upload_date': '20130315',
 446                     'uploader': 'Nature Publishing Group',
 447                     'uploader_id': '1964492299001',
 448                 },
 449             }],
 450         },
 451         # ooyala video
 452         {
 453             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 454             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 455             'info_dict': {
 456                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 457                 'ext': 'mp4',
 458                 'title': '2cc213299525360.mov',  # that's what we get
 459                 'duration': 238.231,
 460             },
 461             'add_ie': ['Ooyala'],
 462         },
 463         {
 464             # ooyala video embedded with http://player.ooyala.com/iframe.js
 465             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 466             'info_dict': {
 467                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 468                 'ext': 'mp4',
 469                 'title': '"Steve Jobs: Man in the Machine" trailer',
 470                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
 471                 'duration': 135.427,
 472             },
 473             'params': {
 474                 'skip_download': True,
 475             },
 476             'skip': 'movie expired',
 477         },
 478         # embed.ly video
 479         {
 480             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 481             'info_dict': {
 482                 'id': '9ODmcdjQcHQ',
 483                 'ext': 'mp4',
 484                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 485                 'upload_date': '20140225',
 486                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 487                 'uploader': 'Tested',
 488                 'uploader_id': 'testedcom',
 489             },
 490             # No need to test YoutubeIE here
 491             'params': {
 492                 'skip_download': True,
 493             },
 494         },
 495         # funnyordie embed
 496         {
 497             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 498             'info_dict': {
 499                 'id': '18e820ec3f',
 500                 'ext': 'mp4',
 501                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 502                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 503             },
 504             # HEAD requests lead to endless 301, while GET is OK
 505             'expected_warnings': ['301'],
 506         },
 507         # RUTV embed
 508         {
 509             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 510             'info_dict': {
 511                 'id': '776940',
 512                 'ext': 'mp4',
 513                 'title': 'Охотское море стало целиком российским',
 514                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 515             },
 516             'params': {
 517                 # m3u8 download
 518                 'skip_download': True,
 519             },
 520         },
 521         # TVC embed
 522         {
 523             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 524             'info_dict': {
 525                 'id': '55304',
 526                 'ext': 'mp4',
 527                 'title': 'Дошкольное воспитание',
 528             },
 529         },
 530         # SportBox embed
 531         {
 532             'url': 'http://www.vestifinance.ru/articles/25753',
 533             'info_dict': {
 534                 'id': '25753',
 535                 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 536             },
 537             'playlist': [{
 538                 'info_dict': {
 539                     'id': '370908',
 540                     'title': 'Госзаказ. День 3',
 541                     'ext': 'mp4',
 542                 }
 543             }, {
 544                 'info_dict': {
 545                     'id': '370905',
 546                     'title': 'Госзаказ. День 2',
 547                     'ext': 'mp4',
 548                 }
 549             }, {
 550                 'info_dict': {
 551                     'id': '370902',
 552                     'title': 'Госзаказ. День 1',
 553                     'ext': 'mp4',
 554                 }
 555             }],
 556             'params': {
 557                 # m3u8 download
 558                 'skip_download': True,
 559             },
 560         },
 561         # Myvi.ru embed
 562         {
 563             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 564             'info_dict': {
 565                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 566                 'ext': 'mp4',
 567                 'title': 'Ужастики, русский трейлер (2015)',
 568                 'thumbnail': r're:^https?://.*\.jpg$',
 569                 'duration': 153,
 570             }
 571         },
 572         # XHamster embed
 573         {
 574             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 575             'info_dict': {
 576                 'id': 'showthread',
 577                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 578             },
 579             'playlist_mincount': 7,
 580             # This forum does not allow <iframe> syntaxes anymore
 581             # Now HTML tags are displayed as-is
 582             'skip': 'No videos on this page',
 583         },
 584         # Embedded TED video
 585         {
 586             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 587             'md5': '65fdff94098e4a607385a60c5177c638',
 588             'info_dict': {
 589                 'id': '1969',
 590                 'ext': 'mp4',
 591                 'title': 'Hidden miracles of the natural world',
 592                 'uploader': 'Louie Schwartzberg',
 593                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 594             }
 595         },
 596         # nowvideo embed hidden behind percent encoding
 597         {
 598             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 599             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 600             'info_dict': {
 601                 'id': '06e53103ca9aa',
 602                 'ext': 'flv',
 603                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 604                 'description': 'No description',
 605             },
 606         },
 607         # arte embed
 608         {
 609             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 610             'md5': '7653032cbb25bf6c80d80f217055fa43',
 611             'info_dict': {
 612                 'id': '048195-004_PLUS7-F',
 613                 'ext': 'flv',
 614                 'title': 'X:enius',
 615                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 616                 'upload_date': '20140320',
 617             },
 618             'params': {
 619                 'skip_download': 'Requires rtmpdump'
 620             },
 621             'skip': 'video gone',
 622         },
 623         # francetv embed
 624         {
 625             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 626             'info_dict': {
 627                 'id': 'EV_30231',
 628                 'ext': 'mp4',
 629                 'title': 'Alcaline, le concert avec Calogero',
 630                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 631                 'upload_date': '20150226',
 632                 'timestamp': 1424989860,
 633                 'duration': 5400,
 634             },
 635             'params': {
 636                 # m3u8 downloads
 637                 'skip_download': True,
 638             },
 639             'expected_warnings': [
 640                 'Forbidden'
 641             ]
 642         },
 643         # Condé Nast embed
 644         {
 645             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 646             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 647             'info_dict': {
 648                 'id': '53501be369702d3275860000',
 649                 'ext': 'mp4',
 650                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 651             }
 652         },
 653         # Dailymotion embed
 654         {
 655             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 656             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 657             'info_dict': {
 658                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 659                 'ext': 'mp4',
 660                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 661                 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
 662                 'uploader': 'Spi0n',
 663                 'uploader_id': 'xgditw',
 664                 'upload_date': '20140425',
 665                 'timestamp': 1398441542,
 666             },
 667             'add_ie': ['Dailymotion'],
 668         },
 669         # YouTube embed
 670         {
 671             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 672             'info_dict': {
 673                 'id': 'FXRb4ykk4S0',
 674                 'ext': 'mp4',
 675                 'title': 'The NBL Auction 2014',
 676                 'uploader': 'BADMINTON England',
 677                 'uploader_id': 'BADMINTONEvents',
 678                 'upload_date': '20140603',
 679                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 680             },
 681             'add_ie': ['Youtube'],
 682             'params': {
 683                 'skip_download': True,
 684             }
 685         },
 686         # MTVSercices embed
 687         {
 688             'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
 689             'md5': 'ca1aef97695ef2c1d6973256a57e5252',
 690             'info_dict': {
 691                 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
 692                 'ext': 'mp4',
 693                 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
 694                 'description': 'Two valets share their love for movie star Liam Neesons.',
 695                 'timestamp': 1349922600,
 696                 'upload_date': '20121011',
 697             },
 698         },
 699         # YouTube embed via <data-embed-url="">
 700         {
 701             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 702             'info_dict': {
 703                 'id': '4vAffPZIT44',
 704                 'ext': 'mp4',
 705                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 706                 'uploader': 'Gameloft',
 707                 'uploader_id': 'gameloft',
 708                 'upload_date': '20140828',
 709                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 710             },
 711             'params': {
 712                 'skip_download': True,
 713             }
 714         },
 715         # Camtasia studio
 716         {
 717             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 718             'playlist': [{
 719                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 720                 'info_dict': {
 721                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 722                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 723                     'ext': 'flv',
 724                     'duration': 2235.90,
 725                 }
 726             }, {
 727                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 728                 'info_dict': {
 729                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 730                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 731                     'ext': 'flv',
 732                     'duration': 2235.93,
 733                 }
 734             }],
 735             'info_dict': {
 736                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 737             }
 738         },
 739         # Flowplayer
 740         {
 741             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 742             'md5': '9d65602bf31c6e20014319c7d07fba27',
 743             'info_dict': {
 744                 'id': '5123ea6d5e5a7',
 745                 'ext': 'mp4',
 746                 'age_limit': 18,
 747                 'uploader': 'www.handjobhub.com',
 748                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 749             }
 750         },
 751         # Multiple brightcove videos
 752         # https://github.com/rg3/youtube-dl/issues/2283
 753         {
 754             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 755             'info_dict': {
 756                 'id': 'always-never',
 757                 'title': 'Always / Never - The New Yorker',
 758             },
 759             'playlist_count': 3,
 760             'params': {
 761                 'extract_flat': False,
 762                 'skip_download': True,
 763             }
 764         },
 765         # MLB embed
 766         {
 767             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 768             'md5': '96f09a37e44da40dd083e12d9a683327',
 769             'info_dict': {
 770                 'id': '33322633',
 771                 'ext': 'mp4',
 772                 'title': 'Ump changes call to ball',
 773                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 774                 'duration': 48,
 775                 'timestamp': 1401537900,
 776                 'upload_date': '20140531',
 777                 'thumbnail': r're:^https?://.*\.jpg$',
 778             },
 779         },
 780         # Wistia embed
 781         {
 782             'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 783             'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
 784             'info_dict': {
 785                 'id': '6e2wtrbdaf',
 786                 'ext': 'mov',
 787                 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
 788                 'description': 'a Paywall Videos video from Remilon',
 789                 'duration': 644.072,
 790                 'uploader': 'study.com',
 791                 'timestamp': 1459678540,
 792                 'upload_date': '20160403',
 793                 'filesize': 24687186,
 794             },
 795         },
 796         {
 797             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 798             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 799             'info_dict': {
 800                 'id': 'uxjb0lwrcz',
 801                 'ext': 'mp4',
 802                 'title': 'Conversation about Hexagonal Rails Part 1',
 803                 'description': 'a Martin Fowler video from ThoughtWorks',
 804                 'duration': 1715.0,
 805                 'uploader': 'thoughtworks.wistia.com',
 806                 'timestamp': 1401832161,
 807                 'upload_date': '20140603',
 808             },
 809         },
 810         # Wistia standard embed (async)
 811         {
 812             'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
 813             'info_dict': {
 814                 'id': '807fafadvk',
 815                 'ext': 'mp4',
 816                 'title': 'Drip Brennan Dunn Workshop',
 817                 'description': 'a JV Webinars video from getdrip-1',
 818                 'duration': 4986.95,
 819                 'timestamp': 1463607249,
 820                 'upload_date': '20160518',
 821             },
 822             'params': {
 823                 'skip_download': True,
 824             }
 825         },
 826         # Soundcloud embed
 827         {
 828             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 829             'info_dict': {
 830                 'id': '174391317',
 831                 'ext': 'mp3',
 832                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 833                 'uploader': 'Sophos Security',
 834                 'title': 'Chet Chat 171 - Oct 29, 2014',
 835                 'upload_date': '20141029',
 836             }
 837         },
 838         # Soundcloud multiple embeds
 839         {
 840             'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
 841             'info_dict': {
 842                 'id': '52809',
 843                 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance  | TAB + AUDIO',
 844             },
 845             'playlist_mincount': 7,
 846         },
 847         # TuneIn station embed
 848         {
 849             'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
 850             'info_dict': {
 851                 'id': '204146',
 852                 'ext': 'mp3',
 853                 'title': 'CNRV',
 854                 'location': 'Paris, France',
 855                 'is_live': True,
 856             },
 857             'params': {
 858                 # Live stream
 859                 'skip_download': True,
 860             },
 861         },
 862         # Livestream embed
 863         {
 864             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 865             'info_dict': {
 866                 'id': '67864563',
 867                 'ext': 'flv',
 868                 'upload_date': '20141112',
 869                 'title': 'Rosetta #CometLanding webcast HL 10',
 870             }
 871         },
 872         # Another Livestream embed, without 'new.' in URL
 873         {
 874             'url': 'https://www.freespeech.org/',
 875             'info_dict': {
 876                 'id': '123537347',
 877                 'ext': 'mp4',
 878                 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 879             },
 880             'params': {
 881                 # Live stream
 882                 'skip_download': True,
 883             },
 884         },
 885         # LazyYT
 886         {
 887             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 888             'info_dict': {
 889                 'id': '1986',
 890                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 891             },
 892             'playlist_mincount': 2,
 893         },
 894         # Cinchcast embed
 895         {
 896             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 897             'info_dict': {
 898                 'id': '7141703',
 899                 'ext': 'mp3',
 900                 'upload_date': '20141126',
 901                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 902             }
 903         },
 904         # Cinerama player
 905         {
 906             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 907             'info_dict': {
 908                 'id': '730m_DandD_1901_512k',
 909                 'ext': 'mp4',
 910                 'uploader': 'www.abc.net.au',
 911                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 912             }
 913         },
 914         # embedded viddler video
 915         {
 916             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 917             'info_dict': {
 918                 'id': '4d03aad9',
 919                 'ext': 'mp4',
 920                 'uploader': 'deadspin',
 921                 'title': 'WALL-TO-GORTAT',
 922                 'timestamp': 1422285291,
 923                 'upload_date': '20150126',
 924             },
 925             'add_ie': ['Viddler'],
 926         },
 927         # Libsyn embed
 928         {
 929             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 930             'info_dict': {
 931                 'id': '3377616',
 932                 'ext': 'mp3',
 933                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 934                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 935                 'upload_date': '20150220',
 936             },
 937             'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
 938         },
 939         # jwplayer YouTube
 940         {
 941             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 942             'info_dict': {
 943                 'id': 'Mrj4DVp2zeA',
 944                 'ext': 'mp4',
 945                 'upload_date': '20150212',
 946                 'uploader': 'The National Archives UK',
 947                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 948                 'uploader_id': 'NationalArchives08',
 949                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 950             },
 951         },
 952         # jwplayer rtmp
 953         {
 954             'url': 'http://www.suffolk.edu/sjc/',
 955             'info_dict': {
 956                 'id': 'sjclive',
 957                 'ext': 'flv',
 958                 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
 959                 'uploader': 'www.suffolk.edu',
 960             },
 961             'params': {
 962                 'skip_download': True,
 963             }
 964         },
 965         # Complex jwplayer
 966         {
 967             'url': 'http://www.indiedb.com/games/king-machine/videos',
 968             'info_dict': {
 969                 'id': 'videos',
 970                 'ext': 'mp4',
 971                 'title': 'king machine trailer 1',
 972                 'thumbnail': r're:^https?://.*\.jpg$',
 973             },
 974         },
 975         # rtl.nl embed
 976         {
 977             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 978             'playlist_mincount': 5,
 979             'info_dict': {
 980                 'id': 'aanslagen-kopenhagen',
 981                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 982             }
 983         },
 984         # Zapiks embed
 985         {
 986             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 987             'info_dict': {
 988                 'id': '118046',
 989                 'ext': 'mp4',
 990                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 991             }
 992         },
 993         # Kaltura embed (different embed code)
 994         {
 995             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 996             'info_dict': {
 997                 'id': '1_a52wc67y',
 998                 'ext': 'flv',
 999                 'upload_date': '20150127',
1000                 'uploader_id': 'PremierMedia',
1001                 'timestamp': int,
1002                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
1003             },
1004         },
1005         # Kaltura embed with single quotes
1006         {
1007             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
1008             'info_dict': {
1009                 'id': '0_izeg5utt',
1010                 'ext': 'mp4',
1011                 'title': '35871',
1012                 'timestamp': 1355743100,
1013                 'upload_date': '20121217',
1014                 'uploader_id': 'batchUser',
1015             },
1016             'add_ie': ['Kaltura'],
1017         },
1018         {
1019             # Kaltura embedded via quoted entry_id
1020             'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1021             'info_dict': {
1022                 'id': '0_utuok90b',
1023                 'ext': 'mp4',
1024                 'title': '06_matthew_brender_raj_dutt',
1025                 'timestamp': 1466638791,
1026                 'upload_date': '20160622',
1027             },
1028             'add_ie': ['Kaltura'],
1029             'expected_warnings': [
1030                 'Could not send HEAD request'
1031             ],
1032             'params': {
1033                 'skip_download': True,
1034             }
1035         },
1036         {
1037             # Kaltura embedded, some fileExt broken (#11480)
1038             'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1039             'info_dict': {
1040                 'id': '1_sgtvehim',
1041                 'ext': 'mp4',
1042                 'title': 'Our "Standard Models" of particle physics and cosmology',
1043                 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1044                 'timestamp': 1321158993,
1045                 'upload_date': '20111113',
1046                 'uploader_id': 'kps1',
1047             },
1048             'add_ie': ['Kaltura'],
1049         },
1050         # Eagle.Platform embed (generic URL)
1051         {
1052             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
1053             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1054             'info_dict': {
1055                 'id': '227304',
1056                 'ext': 'mp4',
1057                 'title': 'Навальный вышел на свободу',
1058                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
1059                 'thumbnail': r're:^https?://.*\.jpg$',
1060                 'duration': 87,
1061                 'view_count': int,
1062                 'age_limit': 0,
1063             },
1064         },
1065         # ClipYou (Eagle.Platform) embed (custom URL)
1066         {
1067             'url': 'http://muz-tv.ru/play/7129/',
1068             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1069             'info_dict': {
1070                 'id': '12820',
1071                 'ext': 'mp4',
1072                 'title': "'O Sole Mio",
1073                 'thumbnail': r're:^https?://.*\.jpg$',
1074                 'duration': 216,
1075                 'view_count': int,
1076             },
1077         },
1078         # Pladform embed
1079         {
1080             'url': 'http://muz-tv.ru/kinozal/view/7400/',
1081             'info_dict': {
1082                 'id': '100183293',
1083                 'ext': 'mp4',
1084                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
1085                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
1086                 'thumbnail': r're:^https?://.*\.jpg$',
1087                 'duration': 694,
1088                 'age_limit': 0,
1089             },
1090         },
1091         # Playwire embed
1092         {
1093             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1094             'info_dict': {
1095                 'id': '3519514',
1096                 'ext': 'mp4',
1097                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
1098                 'thumbnail': r're:^https?://.*\.png$',
1099                 'duration': 45.115,
1100             },
1101         },
1102         # 5min embed
1103         {
1104             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1105             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1106             'info_dict': {
1107                 'id': '518726732',
1108                 'ext': 'mp4',
1109                 'title': 'Facebook Creates "On This Day" | Crunch Report',
1110             },
1111         },
1112         # SVT embed
1113         {
1114             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
1115             'info_dict': {
1116                 'id': '2900353',
1117                 'ext': 'flv',
1118                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
1119                 'duration': 27,
1120                 'age_limit': 0,
1121             },
1122         },
1123         # Crooks and Liars embed
1124         {
1125             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1126             'info_dict': {
1127                 'id': '8RUoRhRi',
1128                 'ext': 'mp4',
1129                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1130                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1131                 'timestamp': 1428207000,
1132                 'upload_date': '20150405',
1133                 'uploader': 'Heather',
1134             },
1135         },
1136         # Crooks and Liars external embed
1137         {
1138             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1139             'info_dict': {
1140                 'id': 'MTE3MjUtMzQ2MzA',
1141                 'ext': 'mp4',
1142                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1143                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1144                 'timestamp': 1265032391,
1145                 'upload_date': '20100201',
1146                 'uploader': 'Heather',
1147             },
1148         },
1149         # NBC Sports vplayer embed
1150         {
1151             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1152             'info_dict': {
1153                 'id': 'ln7x1qSThw4k',
1154                 'ext': 'flv',
1155                 'title': "PFT Live: New leader in the 'new-look' defense",
1156                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1157                 'uploader': 'NBCU-SPORTS',
1158                 'upload_date': '20140107',
1159                 'timestamp': 1389118457,
1160             },
1161         },
1162         # NBC News embed
1163         {
1164             'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1165             'md5': '1aa589c675898ae6d37a17913cf68d66',
1166             'info_dict': {
1167                 'id': '701714499682',
1168                 'ext': 'mp4',
1169                 'title': 'PREVIEW: On Assignment: David Letterman',
1170                 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
1171             },
1172         },
1173         # UDN embed
1174         {
1175             'url': 'https://video.udn.com/news/300346',
1176             'md5': 'fd2060e988c326991037b9aff9df21a6',
1177             'info_dict': {
1178                 'id': '300346',
1179                 'ext': 'mp4',
1180                 'title': '中一中男師變性 全校師生力挺',
1181                 'thumbnail': r're:^https?://.*\.jpg$',
1182             },
1183             'params': {
1184                 # m3u8 download
1185                 'skip_download': True,
1186             },
1187         },
1188         # Ooyala embed
1189         {
1190             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1191             'info_dict': {
1192                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1193                 'ext': 'mp4',
1194                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1195                 'title': 'This is what separates the Excel masters from the wannabes',
1196                 'duration': 191.933,
1197             },
1198             'params': {
1199                 # m3u8 downloads
1200                 'skip_download': True,
1201             }
1202         },
1203         # Brightcove URL in single quotes
1204         {
1205             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1206             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1207             'info_dict': {
1208                 'id': '4255764656001',
1209                 'ext': 'mp4',
1210                 'title': 'SN Presents: Russell Martin, World Citizen',
1211                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1212                 'uploader': 'Rogers Sportsnet',
1213                 'uploader_id': '1704050871',
1214                 'upload_date': '20150525',
1215                 'timestamp': 1432570283,
1216             },
1217         },
1218         # Dailymotion Cloud video
1219         {
1220             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1221             'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
1222             'info_dict': {
1223                 'id': 'x2uy8t3',
1224                 'ext': 'mp4',
1225                 'title': 'Sauvons les abeilles ! - Le débat',
1226                 'description': 'md5:d9082128b1c5277987825d684939ca26',
1227                 'thumbnail': r're:^https?://.*\.jpe?g$',
1228                 'timestamp': 1434970506,
1229                 'upload_date': '20150622',
1230                 'uploader': 'Public Sénat',
1231                 'uploader_id': 'xa9gza',
1232             }
1233         },
1234         # OnionStudios embed
1235         {
1236             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1237             'info_dict': {
1238                 'id': '2855',
1239                 'ext': 'mp4',
1240                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1241                 'thumbnail': r're:^https?://.*\.jpe?g$',
1242                 'uploader': 'ClickHole',
1243                 'uploader_id': 'clickhole',
1244             }
1245         },
1246         # SnagFilms embed
1247         {
1248             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1249             'info_dict': {
1250                 'id': '74849a00-85a9-11e1-9660-123139220831',
1251                 'ext': 'mp4',
1252                 'title': '#whilewewatch',
1253             }
1254         },
1255         # AdobeTVVideo embed
1256         {
1257             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1258             'md5': '43662b577c018ad707a63766462b1e87',
1259             'info_dict': {
1260                 'id': '2456',
1261                 'ext': 'mp4',
1262                 'title': 'New experience with Acrobat DC',
1263                 'description': 'New experience with Acrobat DC',
1264                 'duration': 248.667,
1265             },
1266         },
1267         # BrightcoveInPageEmbed embed
1268         {
1269             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1270             'info_dict': {
1271                 'id': '4238694884001',
1272                 'ext': 'flv',
1273                 'title': 'Tabletop: Dread, Last Thoughts',
1274                 'description': 'Tabletop: Dread, Last Thoughts',
1275                 'duration': 51690,
1276             },
1277         },
1278         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1279         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1280         {
1281             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1282             'info_dict': {
1283                 'id': '4785848093001',
1284                 'ext': 'mp4',
1285                 'title': 'The Cardinal Pell Interview',
1286                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1287                 'uploader': 'GlobeCast Australia - GlobeStream',
1288                 'uploader_id': '2733773828001',
1289                 'upload_date': '20160304',
1290                 'timestamp': 1457083087,
1291             },
1292             'params': {
1293                 # m3u8 downloads
1294                 'skip_download': True,
1295             },
1296         },
1297         # Another form of arte.tv embed
1298         {
1299             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1300             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1301             'info_dict': {
1302                 'id': '030273-562_PLUS7-F',
1303                 'ext': 'mp4',
1304                 'title': 'ARTE Reportage - Nulle part, en France',
1305                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1306                 'upload_date': '20160409',
1307             },
1308         },
1309         # LiveLeak embed
1310         {
1311             'url': 'http://www.wykop.pl/link/3088787/',
1312             'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1313             'info_dict': {
1314                 'id': '874_1459135191',
1315                 'ext': 'mp4',
1316                 'title': 'Man shows poor quality of new apartment building',
1317                 'description': 'The wall is like a sand pile.',
1318                 'uploader': 'Lake8737',
1319             }
1320         },
1321         # Duplicated embedded video URLs
1322         {
1323             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1324             'info_dict': {
1325                 'id': '149298443_480_16c25b74_2',
1326                 'ext': 'mp4',
1327                 'title': 'vs. Blue Orange Spring Game',
1328                 'uploader': 'www.hudl.com',
1329             },
1330         },
1331         # twitter:player:stream embed
1332         {
1333             'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1334             'info_dict': {
1335                 'id': 'master',
1336                 'ext': 'mp4',
1337                 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1338                 'uploader': 'www.rtl.be',
1339             },
1340             'params': {
1341                 # m3u8 downloads
1342                 'skip_download': True,
1343             },
1344         },
1345         # twitter:player embed
1346         {
1347             'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1348             'md5': 'a3e0df96369831de324f0778e126653c',
1349             'info_dict': {
1350                 'id': '4909620399001',
1351                 'ext': 'mp4',
1352                 'title': 'What Do Black Holes Sound Like?',
1353                 'description': 'what do black holes sound like',
1354                 'upload_date': '20160524',
1355                 'uploader_id': '29913724001',
1356                 'timestamp': 1464107587,
1357                 'uploader': 'TheAtlantic',
1358             },
1359             'add_ie': ['BrightcoveLegacy'],
1360         },
1361         # Facebook <iframe> embed
1362         {
1363             'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
1364             'md5': 'fbcde74f534176ecb015849146dd3aee',
1365             'info_dict': {
1366                 'id': '599637780109885',
1367                 'ext': 'mp4',
1368                 'title': 'Facebook video #599637780109885',
1369             },
1370         },
1371         # Facebook API embed
1372         {
1373             'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
1374             'md5': 'a47372ee61b39a7b90287094d447d94e',
1375             'info_dict': {
1376                 'id': '10153467542406923',
1377                 'ext': 'mp4',
1378                 'title': 'Facebook video #10153467542406923',
1379             },
1380         },
1381         # Wordpress "YouTube Video Importer" plugin
1382         {
1383             'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
1384             'md5': 'd16797741b560b485194eddda8121b48',
1385             'info_dict': {
1386                 'id': 'HNTXWDXV9Is',
1387                 'ext': 'mp4',
1388                 'title': 'Blue Devils Drumline Stanford lot 2016',
1389                 'upload_date': '20160627',
1390                 'uploader_id': 'GENOCIDE8GENERAL10',
1391                 'uploader': 'cylus cyrus',
1392             },
1393         },
1394         {
1395             # video stored on custom kaltura server
1396             'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1397             'md5': '537617d06e64dfed891fa1593c4b30cc',
1398             'info_dict': {
1399                 'id': '0_1iotm5bh',
1400                 'ext': 'mp4',
1401                 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1402                 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1403                 'uploader_id': 'videos.expansion@el-mundo.net',
1404                 'upload_date': '20150429',
1405                 'timestamp': 1430303472,
1406             },
1407             'add_ie': ['Kaltura'],
1408         },
1409         {
1410             # Non-standard Vimeo embed
1411             'url': 'https://openclassrooms.com/courses/understanding-the-web',
1412             'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1413             'info_dict': {
1414                 'id': '148867247',
1415                 'ext': 'mp4',
1416                 'title': 'Understanding the web - Teaser',
1417                 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1418                 'upload_date': '20151214',
1419                 'uploader': 'OpenClassrooms',
1420                 'uploader_id': 'openclassrooms',
1421             },
1422             'add_ie': ['Vimeo'],
1423         },
1424         {
1425             # generic vimeo embed that requires original URL passed as Referer
1426             'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1427             'only_matching': True,
1428         },
1429         {
1430             'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1431             'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1432             'info_dict': {
1433                 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1434                 'ext': 'mp4',
1435                 'title': 'Big Buck Bunny',
1436                 'description': 'Royalty free test video',
1437                 'timestamp': 1432816365,
1438                 'upload_date': '20150528',
1439                 'is_live': False,
1440             },
1441             'params': {
1442                 'skip_download': True,
1443             },
1444             'add_ie': [ArkenaIE.ie_key()],
1445         },
1446         {
1447             'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1448             'info_dict': {
1449                 'id': '1c7141f46c',
1450                 'ext': 'mp4',
1451                 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1452             },
1453             'params': {
1454                 'skip_download': True,
1455             },
1456             'add_ie': [Vbox7IE.ie_key()],
1457         },
1458         {
1459             # DBTV embeds
1460             'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
1461             'info_dict': {
1462                 'id': '43254897',
1463                 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1464             },
1465             'playlist_mincount': 3,
1466         },
1467         {
1468             # Videa embeds
1469             'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1470             'info_dict': {
1471                 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1472                 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1473             },
1474             'playlist_mincount': 2,
1475         },
1476         {
1477             # 20 minuten embed
1478             'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1479             'info_dict': {
1480                 'id': '523629',
1481                 'ext': 'mp4',
1482                 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1483                 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1484             },
1485             'params': {
1486                 'skip_download': True,
1487             },
1488             'add_ie': [TwentyMinutenIE.ie_key()],
1489         },
1490         {
1491             # VideoPress embed
1492             'url': 'https://en.support.wordpress.com/videopress/',
1493             'info_dict': {
1494                 'id': 'OcobLTqC',
1495                 'ext': 'm4v',
1496                 'title': 'IMG_5786',
1497                 'timestamp': 1435711927,
1498                 'upload_date': '20150701',
1499             },
1500             'params': {
1501                 'skip_download': True,
1502             },
1503             'add_ie': [VideoPressIE.ie_key()],
1504         }
1505         # {
1506         #     # TODO: find another test
1507         #     # http://schema.org/VideoObject
1508         #     'url': 'https://flipagram.com/f/nyvTSJMKId',
1509         #     'md5': '888dcf08b7ea671381f00fab74692755',
1510         #     'info_dict': {
1511         #         'id': 'nyvTSJMKId',
1512         #         'ext': 'mp4',
1513         #         'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
1514         #         'description': '#love for cats.',
1515         #         'timestamp': 1461244995,
1516         #         'upload_date': '20160421',
1517         #     },
1518         #     'params': {
1519         #         'force_generic_extractor': True,
1520         #     },
1521         # }
1522     ]
1523
1524     def report_following_redirect(self, new_url):
1525         """Report information extraction."""
1526         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1527
1528     def _extract_rss(self, url, video_id, doc):
1529         playlist_title = doc.find('./channel/title').text
1530         playlist_desc_el = doc.find('./channel/description')
1531         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1532
1533         entries = []
1534         for it in doc.findall('./channel/item'):
1535             next_url = xpath_text(it, 'link', fatal=False)
1536             if not next_url:
1537                 enclosure_nodes = it.findall('./enclosure')
1538                 for e in enclosure_nodes:
1539                     next_url = e.attrib.get('url')
1540                     if next_url:
1541                         break
1542
1543             if not next_url:
1544                 continue
1545
1546             entries.append({
1547                 '_type': 'url',
1548                 'url': next_url,
1549                 'title': it.find('title').text,
1550             })
1551
1552         return {
1553             '_type': 'playlist',
1554             'id': url,
1555             'title': playlist_title,
1556             'description': playlist_desc,
1557             'entries': entries,
1558         }
1559
1560     def _extract_camtasia(self, url, video_id, webpage):
1561         """ Returns None if no camtasia video can be found. """
1562
1563         camtasia_cfg = self._search_regex(
1564             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1565             webpage, 'camtasia configuration file', default=None)
1566         if camtasia_cfg is None:
1567             return None
1568
1569         title = self._html_search_meta('DC.title', webpage, fatal=True)
1570
1571         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1572         camtasia_cfg = self._download_xml(
1573             camtasia_url, video_id,
1574             note='Downloading camtasia configuration',
1575             errnote='Failed to download camtasia configuration')
1576         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1577
1578         entries = []
1579         for n in fileset_node.getchildren():
1580             url_n = n.find('./uri')
1581             if url_n is None:
1582                 continue
1583
1584             entries.append({
1585                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1586                 'title': '%s - %s' % (title, n.tag),
1587                 'url': compat_urlparse.urljoin(url, url_n.text),
1588                 'duration': float_or_none(n.find('./duration').text),
1589             })
1590
1591         return {
1592             '_type': 'playlist',
1593             'entries': entries,
1594             'title': title,
1595         }
1596
1597     def _real_extract(self, url):
1598         if url.startswith('//'):
1599             return {
1600                 '_type': 'url',
1601                 'url': self.http_scheme() + url,
1602             }
1603
1604         parsed_url = compat_urlparse.urlparse(url)
1605         if not parsed_url.scheme:
1606             default_search = self._downloader.params.get('default_search')
1607             if default_search is None:
1608                 default_search = 'fixup_error'
1609
1610             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1611                 if '/' in url:
1612                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1613                     return self.url_result('http://' + url)
1614                 elif default_search != 'fixup_error':
1615                     if default_search == 'auto_warning':
1616                         if re.match(r'^(?:url|URL)$', url):
1617                             raise ExtractorError(
1618                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1619                                 expected=True)
1620                         else:
1621                             self._downloader.report_warning(
1622                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1623                     return self.url_result('ytsearch:' + url)
1624
1625             if default_search in ('error', 'fixup_error'):
1626                 raise ExtractorError(
1627                     '%r is not a valid URL. '
1628                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1629                     % (url, url), expected=True)
1630             else:
1631                 if ':' not in default_search:
1632                     default_search += ':'
1633                 return self.url_result(default_search + url)
1634
1635         url, smuggled_data = unsmuggle_url(url)
1636         force_videoid = None
1637         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1638         if smuggled_data and 'force_videoid' in smuggled_data:
1639             force_videoid = smuggled_data['force_videoid']
1640             video_id = force_videoid
1641         else:
1642             video_id = self._generic_id(url)
1643
1644         self.to_screen('%s: Requesting header' % video_id)
1645
1646         head_req = HEADRequest(url)
1647         head_response = self._request_webpage(
1648             head_req, video_id,
1649             note=False, errnote='Could not send HEAD request to %s' % url,
1650             fatal=False)
1651
1652         if head_response is not False:
1653             # Check for redirect
1654             new_url = head_response.geturl()
1655             if url != new_url:
1656                 self.report_following_redirect(new_url)
1657                 if force_videoid:
1658                     new_url = smuggle_url(
1659                         new_url, {'force_videoid': force_videoid})
1660                 return self.url_result(new_url)
1661
1662         full_response = None
1663         if head_response is False:
1664             request = sanitized_Request(url)
1665             request.add_header('Accept-Encoding', '*')
1666             full_response = self._request_webpage(request, video_id)
1667             head_response = full_response
1668
1669         info_dict = {
1670             'id': video_id,
1671             'title': self._generic_title(url),
1672             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1673         }
1674
1675         # Check for direct link to a video
1676         content_type = head_response.headers.get('Content-Type', '').lower()
1677         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1678         if m:
1679             format_id = m.group('format_id')
1680             if format_id.endswith('mpegurl'):
1681                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1682             elif format_id == 'f4m':
1683                 formats = self._extract_f4m_formats(url, video_id)
1684             else:
1685                 formats = [{
1686                     'format_id': m.group('format_id'),
1687                     'url': url,
1688                     'vcodec': 'none' if m.group('type') == 'audio' else None
1689                 }]
1690                 info_dict['direct'] = True
1691             self._sort_formats(formats)
1692             info_dict['formats'] = formats
1693             return info_dict
1694
1695         if not self._downloader.params.get('test', False) and not is_intentional:
1696             force = self._downloader.params.get('force_generic_extractor', False)
1697             self._downloader.report_warning(
1698                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1699
1700         if not full_response:
1701             request = sanitized_Request(url)
1702             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1703             # making it impossible to download only chunk of the file (yet we need only 512kB to
1704             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1705             # that will always result in downloading the whole file that is not desirable.
1706             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1707             # to accept raw bytes and being able to download only a chunk.
1708             # It may probably better to solve this by checking Content-Type for application/octet-stream
1709             # after HEAD request finishes, but not sure if we can rely on this.
1710             request.add_header('Accept-Encoding', '*')
1711             full_response = self._request_webpage(request, video_id)
1712
1713         first_bytes = full_response.read(512)
1714
1715         # Is it an M3U playlist?
1716         if first_bytes.startswith(b'#EXTM3U'):
1717             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1718             self._sort_formats(info_dict['formats'])
1719             return info_dict
1720
1721         # Maybe it's a direct link to a video?
1722         # Be careful not to download the whole thing!
1723         if not is_html(first_bytes):
1724             self._downloader.report_warning(
1725                 'URL could be a direct video link, returning it as such.')
1726             info_dict.update({
1727                 'direct': True,
1728                 'url': url,
1729             })
1730             return info_dict
1731
1732         webpage = self._webpage_read_content(
1733             full_response, url, video_id, prefix=first_bytes)
1734
1735         self.report_extraction(video_id)
1736
1737         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1738         try:
1739             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1740             if doc.tag == 'rss':
1741                 return self._extract_rss(url, video_id, doc)
1742             elif doc.tag == 'SmoothStreamingMedia':
1743                 info_dict['formats'] = self._parse_ism_formats(doc, url)
1744                 self._sort_formats(info_dict['formats'])
1745                 return info_dict
1746             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1747                 smil = self._parse_smil(doc, url, video_id)
1748                 self._sort_formats(smil['formats'])
1749                 return smil
1750             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1751                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1752             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1753                 info_dict['formats'] = self._parse_mpd_formats(
1754                     doc, video_id,
1755                     mpd_base_url=full_response.geturl().rpartition('/')[0],
1756                     mpd_url=url)
1757                 self._sort_formats(info_dict['formats'])
1758                 return info_dict
1759             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1760                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1761                 self._sort_formats(info_dict['formats'])
1762                 return info_dict
1763         except compat_xml_parse_error:
1764             pass
1765
1766         # Is it a Camtasia project?
1767         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1768         if camtasia_res is not None:
1769             return camtasia_res
1770
1771         # Sometimes embedded video player is hidden behind percent encoding
1772         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1773         # Unescaping the whole page allows to handle those cases in a generic way
1774         webpage = compat_urllib_parse_unquote(webpage)
1775
1776         # it's tempting to parse this further, but you would
1777         # have to take into account all the variations like
1778         #   Video Title - Site Name
1779         #   Site Name | Video Title
1780         #   Video Title - Tagline | Site Name
1781         # and so on and so forth; it's just not practical
1782         video_title = self._og_search_title(
1783             webpage, default=None) or self._html_search_regex(
1784             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1785             default='video')
1786
1787         # Try to detect age limit automatically
1788         age_limit = self._rta_search(webpage)
1789         # And then there are the jokers who advertise that they use RTA,
1790         # but actually don't.
1791         AGE_LIMIT_MARKERS = [
1792             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1793         ]
1794         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1795             age_limit = 18
1796
1797         # video uploader is domain name
1798         video_uploader = self._search_regex(
1799             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1800
1801         video_description = self._og_search_description(webpage, default=None)
1802         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
1803
1804         # Helper method
1805         def _playlist_from_matches(matches, getter=None, ie=None):
1806             urlrs = orderedSet(
1807                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1808                 for m in matches)
1809             return self.playlist_result(
1810                 urlrs, playlist_id=video_id, playlist_title=video_title)
1811
1812         # Look for Brightcove Legacy Studio embeds
1813         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1814         if bc_urls:
1815             self.to_screen('Brightcove video detected.')
1816             entries = [{
1817                 '_type': 'url',
1818                 'url': smuggle_url(bc_url, {'Referer': url}),
1819                 'ie_key': 'BrightcoveLegacy'
1820             } for bc_url in bc_urls]
1821
1822             return {
1823                 '_type': 'playlist',
1824                 'title': video_title,
1825                 'id': video_id,
1826                 'entries': entries,
1827             }
1828
1829         # Look for Brightcove New Studio embeds
1830         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1831         if bc_urls:
1832             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1833
1834         # Look for ThePlatform embeds
1835         tp_urls = ThePlatformIE._extract_urls(webpage)
1836         if tp_urls:
1837             return _playlist_from_matches(tp_urls, ie='ThePlatform')
1838
1839         # Look for Vessel embeds
1840         vessel_urls = VesselIE._extract_urls(webpage)
1841         if vessel_urls:
1842             return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
1843
1844         # Look for embedded rtl.nl player
1845         matches = re.findall(
1846             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1847             webpage)
1848         if matches:
1849             return _playlist_from_matches(matches, ie='RtlNl')
1850
1851         vimeo_urls = VimeoIE._extract_urls(url, webpage)
1852         if vimeo_urls:
1853             return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
1854
1855         vid_me_embed_url = self._search_regex(
1856             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1857             webpage, 'vid.me embed', default=None)
1858         if vid_me_embed_url is not None:
1859             return self.url_result(vid_me_embed_url, 'Vidme')
1860
1861         # Look for embedded YouTube player
1862         matches = re.findall(r'''(?x)
1863             (?:
1864                 <iframe[^>]+?src=|
1865                 data-video-url=|
1866                 <embed[^>]+?src=|
1867                 embedSWF\(?:\s*|
1868                 new\s+SWFObject\(
1869             )
1870             (["\'])
1871                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1872                 (?:embed|v|p)/.+?)
1873             \1''', webpage)
1874         if matches:
1875             return _playlist_from_matches(
1876                 matches, lambda m: unescapeHTML(m[1]))
1877
1878         # Look for lazyYT YouTube embed
1879         matches = re.findall(
1880             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1881         if matches:
1882             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1883
1884         # Look for Wordpress "YouTube Video Importer" plugin
1885         matches = re.findall(r'''(?x)<div[^>]+
1886             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1887             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1888         if matches:
1889             return _playlist_from_matches(matches, lambda m: m[-1])
1890
1891         matches = DailymotionIE._extract_urls(webpage)
1892         if matches:
1893             return _playlist_from_matches(matches)
1894
1895         # Look for embedded Dailymotion playlist player (#3822)
1896         m = re.search(
1897             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1898         if m:
1899             playlists = re.findall(
1900                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1901             if playlists:
1902                 return _playlist_from_matches(
1903                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1904
1905         # Look for embedded Wistia player
1906         match = re.search(
1907             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1908         if match:
1909             embed_url = self._proto_relative_url(
1910                 unescapeHTML(match.group('url')))
1911             return {
1912                 '_type': 'url_transparent',
1913                 'url': embed_url,
1914                 'ie_key': 'Wistia',
1915                 'uploader': video_uploader,
1916             }
1917
1918         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1919         if match:
1920             return {
1921                 '_type': 'url_transparent',
1922                 'url': 'wistia:%s' % match.group('id'),
1923                 'ie_key': 'Wistia',
1924                 'uploader': video_uploader,
1925             }
1926
1927         match = re.search(
1928             r'''(?sx)
1929                 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
1930                 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
1931             ''', webpage)
1932         if match:
1933             return self.url_result(self._proto_relative_url(
1934                 'wistia:%s' % match.group('id')), 'Wistia')
1935
1936         # Look for SVT player
1937         svt_url = SVTIE._extract_url(webpage)
1938         if svt_url:
1939             return self.url_result(svt_url, 'SVT')
1940
1941         # Look for embedded condenast player
1942         matches = re.findall(
1943             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1944             webpage)
1945         if matches:
1946             return {
1947                 '_type': 'playlist',
1948                 'entries': [{
1949                     '_type': 'url',
1950                     'ie_key': 'CondeNast',
1951                     'url': ma,
1952                 } for ma in matches],
1953                 'title': video_title,
1954                 'id': video_id,
1955             }
1956
1957         # Look for Bandcamp pages with custom domain
1958         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1959         if mobj is not None:
1960             burl = unescapeHTML(mobj.group(1))
1961             # Don't set the extractor because it can be a track url or an album
1962             return self.url_result(burl)
1963
1964         # Look for embedded Vevo player
1965         mobj = re.search(
1966             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1967         if mobj is not None:
1968             return self.url_result(mobj.group('url'))
1969
1970         # Look for embedded Viddler player
1971         mobj = re.search(
1972             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1973             webpage)
1974         if mobj is not None:
1975             return self.url_result(mobj.group('url'))
1976
1977         # Look for NYTimes player
1978         mobj = re.search(
1979             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1980             webpage)
1981         if mobj is not None:
1982             return self.url_result(mobj.group('url'))
1983
1984         # Look for Libsyn player
1985         mobj = re.search(
1986             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1987         if mobj is not None:
1988             return self.url_result(mobj.group('url'))
1989
1990         # Look for Ooyala videos
1991         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1992                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1993                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1994                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1995         if mobj is not None:
1996             embed_token = self._search_regex(
1997                 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
1998                 webpage, 'ooyala embed token', default=None)
1999             return OoyalaIE._build_url_result(smuggle_url(
2000                 mobj.group('ec'), {
2001                     'domain': url,
2002                     'embed_token': embed_token,
2003                 }))
2004
2005         # Look for multiple Ooyala embeds on SBN network websites
2006         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
2007         if mobj is not None:
2008             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
2009             if embeds:
2010                 return _playlist_from_matches(
2011                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
2012
2013         # Look for Aparat videos
2014         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
2015         if mobj is not None:
2016             return self.url_result(mobj.group(1), 'Aparat')
2017
2018         # Look for MPORA videos
2019         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
2020         if mobj is not None:
2021             return self.url_result(mobj.group(1), 'Mpora')
2022
2023         # Look for embedded NovaMov-based player
2024         mobj = re.search(
2025             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
2026                     (?P<url>http://(?:(?:embed|www)\.)?
2027                         (?:novamov\.com|
2028                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
2029                            videoweed\.(?:es|com)|
2030                            movshare\.(?:net|sx|ag)|
2031                            divxstage\.(?:eu|net|ch|co|at|ag))
2032                         /embed\.php.+?)\1''', webpage)
2033         if mobj is not None:
2034             return self.url_result(mobj.group('url'))
2035
2036         # Look for embedded Facebook player
2037         facebook_url = FacebookIE._extract_url(webpage)
2038         if facebook_url is not None:
2039             return self.url_result(facebook_url, 'Facebook')
2040
2041         # Look for embedded VK player
2042         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
2043         if mobj is not None:
2044             return self.url_result(mobj.group('url'), 'VK')
2045
2046         # Look for embedded Odnoklassniki player
2047         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
2048         if mobj is not None:
2049             return self.url_result(mobj.group('url'), 'Odnoklassniki')
2050
2051         # Look for embedded ivi player
2052         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
2053         if mobj is not None:
2054             return self.url_result(mobj.group('url'), 'Ivi')
2055
2056         # Look for embedded Huffington Post player
2057         mobj = re.search(
2058             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
2059         if mobj is not None:
2060             return self.url_result(mobj.group('url'), 'HuffPost')
2061
2062         # Look for embed.ly
2063         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
2064         if mobj is not None:
2065             return self.url_result(mobj.group('url'))
2066         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
2067         if mobj is not None:
2068             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
2069
2070         # Look for funnyordie embed
2071         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
2072         if matches:
2073             return _playlist_from_matches(
2074                 matches, getter=unescapeHTML, ie='FunnyOrDie')
2075
2076         # Look for BBC iPlayer embed
2077         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
2078         if matches:
2079             return _playlist_from_matches(matches, ie='BBCCoUk')
2080
2081         # Look for embedded RUTV player
2082         rutv_url = RUTVIE._extract_url(webpage)
2083         if rutv_url:
2084             return self.url_result(rutv_url, 'RUTV')
2085
2086         # Look for embedded TVC player
2087         tvc_url = TVCIE._extract_url(webpage)
2088         if tvc_url:
2089             return self.url_result(tvc_url, 'TVC')
2090
2091         # Look for embedded SportBox player
2092         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
2093         if sportbox_urls:
2094             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
2095
2096         # Look for embedded XHamster player
2097         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
2098         if xhamster_urls:
2099             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
2100
2101         # Look for embedded TNAFlixNetwork player
2102         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
2103         if tnaflix_urls:
2104             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
2105
2106         # Look for embedded PornHub player
2107         pornhub_urls = PornHubIE._extract_urls(webpage)
2108         if pornhub_urls:
2109             return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
2110
2111         # Look for embedded DrTuber player
2112         drtuber_urls = DrTuberIE._extract_urls(webpage)
2113         if drtuber_urls:
2114             return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
2115
2116         # Look for embedded RedTube player
2117         redtube_urls = RedTubeIE._extract_urls(webpage)
2118         if redtube_urls:
2119             return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
2120
2121         # Look for embedded Tvigle player
2122         mobj = re.search(
2123             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
2124         if mobj is not None:
2125             return self.url_result(mobj.group('url'), 'Tvigle')
2126
2127         # Look for embedded TED player
2128         mobj = re.search(
2129             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
2130         if mobj is not None:
2131             return self.url_result(mobj.group('url'), 'TED')
2132
2133         # Look for embedded Ustream videos
2134         ustream_url = UstreamIE._extract_url(webpage)
2135         if ustream_url:
2136             return self.url_result(ustream_url, UstreamIE.ie_key())
2137
2138         # Look for embedded arte.tv player
2139         mobj = re.search(
2140             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
2141             webpage)
2142         if mobj is not None:
2143             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
2144
2145         # Look for embedded francetv player
2146         mobj = re.search(
2147             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
2148             webpage)
2149         if mobj is not None:
2150             return self.url_result(mobj.group('url'))
2151
2152         # Look for embedded smotri.com player
2153         smotri_url = SmotriIE._extract_url(webpage)
2154         if smotri_url:
2155             return self.url_result(smotri_url, 'Smotri')
2156
2157         # Look for embedded Myvi.ru player
2158         myvi_url = MyviIE._extract_url(webpage)
2159         if myvi_url:
2160             return self.url_result(myvi_url)
2161
2162         # Look for embedded soundcloud player
2163         soundcloud_urls = SoundcloudIE._extract_urls(webpage)
2164         if soundcloud_urls:
2165             return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
2166
2167         # Look for tunein player
2168         tunein_urls = TuneInBaseIE._extract_urls(webpage)
2169         if tunein_urls:
2170             return _playlist_from_matches(tunein_urls)
2171
2172         # Look for embedded mtvservices player
2173         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
2174         if mtvservices_url:
2175             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
2176
2177         # Look for embedded yahoo player
2178         mobj = re.search(
2179             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
2180             webpage)
2181         if mobj is not None:
2182             return self.url_result(mobj.group('url'), 'Yahoo')
2183
2184         # Look for embedded sbs.com.au player
2185         mobj = re.search(
2186             r'''(?x)
2187             (?:
2188                 <meta\s+property="og:video"\s+content=|
2189                 <iframe[^>]+?src=
2190             )
2191             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2192             webpage)
2193         if mobj is not None:
2194             return self.url_result(mobj.group('url'), 'SBS')
2195
2196         # Look for embedded Cinchcast player
2197         mobj = re.search(
2198             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
2199             webpage)
2200         if mobj is not None:
2201             return self.url_result(mobj.group('url'), 'Cinchcast')
2202
2203         mobj = re.search(
2204             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
2205             webpage)
2206         if not mobj:
2207             mobj = re.search(
2208                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
2209                 webpage)
2210         if mobj is not None:
2211             return self.url_result(mobj.group('url'), 'MLB')
2212
2213         mobj = re.search(
2214             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
2215             webpage)
2216         if mobj is not None:
2217             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
2218
2219         mobj = re.search(
2220             r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
2221             webpage)
2222         if mobj is not None:
2223             return self.url_result(mobj.group('url'), 'Livestream')
2224
2225         # Look for Zapiks embed
2226         mobj = re.search(
2227             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
2228         if mobj is not None:
2229             return self.url_result(mobj.group('url'), 'Zapiks')
2230
2231         # Look for Kaltura embeds
2232         kaltura_url = KalturaIE._extract_url(webpage)
2233         if kaltura_url:
2234             return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
2235
2236         # Look for Eagle.Platform embeds
2237         eagleplatform_url = EaglePlatformIE._extract_url(webpage)
2238         if eagleplatform_url:
2239             return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
2240
2241         # Look for ClipYou (uses Eagle.Platform) embeds
2242         mobj = re.search(
2243             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
2244         if mobj is not None:
2245             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
2246
2247         # Look for Pladform embeds
2248         pladform_url = PladformIE._extract_url(webpage)
2249         if pladform_url:
2250             return self.url_result(pladform_url)
2251
2252         # Look for Videomore embeds
2253         videomore_url = VideomoreIE._extract_url(webpage)
2254         if videomore_url:
2255             return self.url_result(videomore_url)
2256
2257         # Look for Webcaster embeds
2258         webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
2259         if webcaster_url:
2260             return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
2261
2262         # Look for Playwire embeds
2263         mobj = re.search(
2264             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
2265         if mobj is not None:
2266             return self.url_result(mobj.group('url'))
2267
2268         # Look for 5min embeds
2269         mobj = re.search(
2270             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
2271         if mobj is not None:
2272             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
2273
2274         # Look for Crooks and Liars embeds
2275         mobj = re.search(
2276             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
2277         if mobj is not None:
2278             return self.url_result(mobj.group('url'))
2279
2280         # Look for NBC Sports VPlayer embeds
2281         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
2282         if nbc_sports_url:
2283             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
2284
2285         # Look for NBC News embeds
2286         nbc_news_embed_url = re.search(
2287             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
2288         if nbc_news_embed_url:
2289             return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
2290
2291         # Look for Google Drive embeds
2292         google_drive_url = GoogleDriveIE._extract_url(webpage)
2293         if google_drive_url:
2294             return self.url_result(google_drive_url, 'GoogleDrive')
2295
2296         # Look for UDN embeds
2297         mobj = re.search(
2298             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
2299         if mobj is not None:
2300             return self.url_result(
2301                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
2302
2303         # Look for Senate ISVP iframe
2304         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2305         if senate_isvp_url:
2306             return self.url_result(senate_isvp_url, 'SenateISVP')
2307
2308         # Look for Dailymotion Cloud videos
2309         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
2310         if dmcloud_url:
2311             return self.url_result(dmcloud_url, 'DailymotionCloud')
2312
2313         # Look for OnionStudios embeds
2314         onionstudios_url = OnionStudiosIE._extract_url(webpage)
2315         if onionstudios_url:
2316             return self.url_result(onionstudios_url)
2317
2318         # Look for ViewLift embeds
2319         viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2320         if viewlift_url:
2321             return self.url_result(viewlift_url)
2322
2323         # Look for JWPlatform embeds
2324         jwplatform_url = JWPlatformIE._extract_url(webpage)
2325         if jwplatform_url:
2326             return self.url_result(jwplatform_url, 'JWPlatform')
2327
2328         # Look for Digiteka embeds
2329         digiteka_url = DigitekaIE._extract_url(webpage)
2330         if digiteka_url:
2331             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2332
2333         # Look for Arkena embeds
2334         arkena_url = ArkenaIE._extract_url(webpage)
2335         if arkena_url:
2336             return self.url_result(arkena_url, ArkenaIE.ie_key())
2337
2338         # Look for Piksel embeds
2339         piksel_url = PikselIE._extract_url(webpage)
2340         if piksel_url:
2341             return self.url_result(piksel_url, PikselIE.ie_key())
2342
2343         # Look for Limelight embeds
2344         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
2345         if mobj:
2346             lm = {
2347                 'Media': 'media',
2348                 'Channel': 'channel',
2349                 'ChannelList': 'channel_list',
2350             }
2351             return self.url_result(smuggle_url('limelight:%s:%s' % (
2352                 lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
2353                 'Limelight%s' % mobj.group(1), mobj.group(2))
2354
2355         mobj = re.search(
2356             r'''(?sx)
2357                 <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
2358                     <param[^>]+
2359                         name=(["\'])flashVars\2[^>]+
2360                         value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
2361             ''', webpage)
2362         if mobj:
2363             return self.url_result(smuggle_url(
2364                 'limelight:media:%s' % mobj.group('id'),
2365                 {'source_url': url}), 'LimelightMedia', mobj.group('id'))
2366
2367         # Look for AdobeTVVideo embeds
2368         mobj = re.search(
2369             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2370             webpage)
2371         if mobj is not None:
2372             return self.url_result(
2373                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2374                 'AdobeTVVideo')
2375
2376         # Look for Vine embeds
2377         mobj = re.search(
2378             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2379             webpage)
2380         if mobj is not None:
2381             return self.url_result(
2382                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2383
2384         # Look for VODPlatform embeds
2385         mobj = re.search(
2386             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
2387             webpage)
2388         if mobj is not None:
2389             return self.url_result(
2390                 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
2391
2392         # Look for Mangomolo embeds
2393         mobj = re.search(
2394             r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
2395                 (?:
2396                     video\?.*?\bid=(?P<video_id>\d+)|
2397                     index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
2398                 ).+?)\1''', webpage)
2399         if mobj is not None:
2400             info = {
2401                 '_type': 'url_transparent',
2402                 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
2403                 'title': video_title,
2404                 'description': video_description,
2405                 'thumbnail': video_thumbnail,
2406                 'uploader': video_uploader,
2407             }
2408             video_id = mobj.group('video_id')
2409             if video_id:
2410                 info.update({
2411                     'ie_key': 'MangomoloVideo',
2412                     'id': video_id,
2413                 })
2414             else:
2415                 info.update({
2416                     'ie_key': 'MangomoloLive',
2417                     'id': mobj.group('channel_id'),
2418                 })
2419             return info
2420
2421         # Look for Instagram embeds
2422         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2423         if instagram_embed_url is not None:
2424             return self.url_result(
2425                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2426
2427         # Look for LiveLeak embeds
2428         liveleak_url = LiveLeakIE._extract_url(webpage)
2429         if liveleak_url:
2430             return self.url_result(liveleak_url, 'LiveLeak')
2431
2432         # Look for 3Q SDN embeds
2433         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2434         if threeqsdn_url:
2435             return {
2436                 '_type': 'url_transparent',
2437                 'ie_key': ThreeQSDNIE.ie_key(),
2438                 'url': self._proto_relative_url(threeqsdn_url),
2439                 'title': video_title,
2440                 'description': video_description,
2441                 'thumbnail': video_thumbnail,
2442                 'uploader': video_uploader,
2443             }
2444
2445         # Look for VBOX7 embeds
2446         vbox7_url = Vbox7IE._extract_url(webpage)
2447         if vbox7_url:
2448             return self.url_result(vbox7_url, Vbox7IE.ie_key())
2449
2450         # Look for DBTV embeds
2451         dbtv_urls = DBTVIE._extract_urls(webpage)
2452         if dbtv_urls:
2453             return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
2454
2455         # Look for Videa embeds
2456         videa_urls = VideaIE._extract_urls(webpage)
2457         if videa_urls:
2458             return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
2459
2460         # Look for 20 minuten embeds
2461         twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
2462         if twentymin_urls:
2463             return _playlist_from_matches(
2464                 twentymin_urls, ie=TwentyMinutenIE.ie_key())
2465
2466         # Look for Openload embeds
2467         openload_urls = OpenloadIE._extract_urls(webpage)
2468         if openload_urls:
2469             return _playlist_from_matches(
2470                 openload_urls, ie=OpenloadIE.ie_key())
2471
2472         # Look for VideoPress embeds
2473         videopress_urls = VideoPressIE._extract_urls(webpage)
2474         if videopress_urls:
2475             return _playlist_from_matches(
2476                 videopress_urls, ie=VideoPressIE.ie_key())
2477
2478         # Looking for http://schema.org/VideoObject
2479         json_ld = self._search_json_ld(
2480             webpage, video_id, default={}, expected_type='VideoObject')
2481         if json_ld.get('url'):
2482             info_dict.update({
2483                 'title': video_title or info_dict['title'],
2484                 'description': video_description,
2485                 'thumbnail': video_thumbnail,
2486                 'age_limit': age_limit
2487             })
2488             info_dict.update(json_ld)
2489             return info_dict
2490
2491         # Look for HTML5 media
2492         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
2493         if entries:
2494             for entry in entries:
2495                 entry.update({
2496                     'id': video_id,
2497                     'title': video_title,
2498                 })
2499                 self._sort_formats(entry['formats'])
2500             return self.playlist_result(entries)
2501
2502         jwplayer_data_str = self._find_jwplayer_data(webpage)
2503         if jwplayer_data_str:
2504             try:
2505                 jwplayer_data = self._parse_json(
2506                     jwplayer_data_str, video_id, transform_source=js_to_json)
2507                 return self._parse_jwplayer_data(jwplayer_data, video_id)
2508             except ExtractorError:
2509                 pass
2510
2511         def check_video(vurl):
2512             if YoutubeIE.suitable(vurl):
2513                 return True
2514             if RtmpIE.suitable(vurl):
2515                 return True
2516             vpath = compat_urlparse.urlparse(vurl).path
2517             vext = determine_ext(vpath)
2518             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
2519
2520         def filter_video(urls):
2521             return list(filter(check_video, urls))
2522
2523         # Start with something easy: JW Player in SWFObject
2524         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
2525         if not found:
2526             # Look for gorilla-vid style embedding
2527             found = filter_video(re.findall(r'''(?sx)
2528                 (?:
2529                     jw_plugins|
2530                     JWPlayerOptions|
2531                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2532                 )
2533                 .*?
2534                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
2535         if not found:
2536             # Broaden the search a little bit
2537             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
2538         if not found:
2539             # Broaden the findall a little bit: JWPlayer JS loader
2540             found = filter_video(re.findall(
2541                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
2542         if not found:
2543             # Flow player
2544             found = filter_video(re.findall(r'''(?xs)
2545                 flowplayer\("[^"]+",\s*
2546                     \{[^}]+?\}\s*,
2547                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
2548                         ["']?url["']?\s*:\s*["']([^"']+)["']
2549             ''', webpage))
2550         if not found:
2551             # Cinerama player
2552             found = re.findall(
2553                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
2554         if not found:
2555             # Try to find twitter cards info
2556             # twitter:player:stream should be checked before twitter:player since
2557             # it is expected to contain a raw stream (see
2558             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2559             found = filter_video(re.findall(
2560                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
2561         if not found:
2562             # We look for Open Graph info:
2563             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
2564             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
2565             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2566             if m_video_type is not None:
2567                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
2568         if not found:
2569             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
2570             found = re.search(
2571                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2572                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2573                 webpage)
2574             if not found:
2575                 # Look also in Refresh HTTP header
2576                 refresh_header = head_response.headers.get('Refresh')
2577                 if refresh_header:
2578                     # In python 2 response HTTP headers are bytestrings
2579                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2580                         refresh_header = refresh_header.decode('iso-8859-1')
2581                     found = re.search(REDIRECT_REGEX, refresh_header)
2582             if found:
2583                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2584                 self.report_following_redirect(new_url)
2585                 return {
2586                     '_type': 'url',
2587                     'url': new_url,
2588                 }
2589
2590         if not found:
2591             # twitter:player is a https URL to iframe player that may or may not
2592             # be supported by youtube-dl thus this is checked the very last (see
2593             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2594             embed_url = self._html_search_meta('twitter:player', webpage, default=None)
2595             if embed_url:
2596                 return self.url_result(embed_url)
2597
2598         if not found:
2599             raise UnsupportedError(url)
2600
2601         entries = []
2602         for video_url in orderedSet(found):
2603             video_url = unescapeHTML(video_url)
2604             video_url = video_url.replace('\\/', '/')
2605             video_url = compat_urlparse.urljoin(url, video_url)
2606             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2607
2608             # Sometimes, jwplayer extraction will result in a YouTube URL
2609             if YoutubeIE.suitable(video_url):
2610                 entries.append(self.url_result(video_url, 'Youtube'))
2611                 continue
2612
2613             # here's a fun little line of code for you:
2614             video_id = os.path.splitext(video_id)[0]
2615
2616             entry_info_dict = {
2617                 'id': video_id,
2618                 'uploader': video_uploader,
2619                 'title': video_title,
2620                 'age_limit': age_limit,
2621             }
2622
2623             if RtmpIE.suitable(video_url):
2624                 entry_info_dict.update({
2625                     '_type': 'url_transparent',
2626                     'ie_key': RtmpIE.ie_key(),
2627                     'url': video_url,
2628                 })
2629                 entries.append(entry_info_dict)
2630                 continue
2631
2632             ext = determine_ext(video_url)
2633             if ext == 'smil':
2634                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2635             elif ext == 'xspf':
2636                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2637             elif ext == 'm3u8':
2638                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2639             elif ext == 'mpd':
2640                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2641             elif ext == 'f4m':
2642                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2643             elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
2644                 # Just matching .ism/manifest is not enough to be reliably sure
2645                 # whether it's actually an ISM manifest or some other streaming
2646                 # manifest since there are various streaming URL formats
2647                 # possible (see [1]) as well as some other shenanigans like
2648                 # .smil/manifest URLs that actually serve an ISM (see [2]) and
2649                 # so on.
2650                 # Thus the most reasonable way to solve this is to delegate
2651                 # to generic extractor in order to look into the contents of
2652                 # the manifest itself.
2653                 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
2654                 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
2655                 entry_info_dict = self.url_result(
2656                     smuggle_url(video_url, {'to_generic': True}),
2657                     GenericIE.ie_key())
2658             else:
2659                 entry_info_dict['url'] = video_url
2660
2661             if entry_info_dict.get('formats'):
2662                 self._sort_formats(entry_info_dict['formats'])
2663
2664             entries.append(entry_info_dict)
2665
2666         if len(entries) == 1:
2667             return entries[0]
2668         else:
2669             for num, e in enumerate(entries, start=1):
2670                 # 'url' results don't have a title
2671                 if e.get('title') is not None:
2672                     e['title'] = '%s (%d)' % (e['title'], num)
2673             return {
2674                 '_type': 'playlist',
2675                 'entries': entries,
2676             }