_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7 import sys
   8
   9 from .common import InfoExtractor
  10 from .youtube import YoutubeIE
  11 from ..compat import (
  12     compat_etree_fromstring,
  13     compat_urllib_parse_unquote,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     orderedSet,
  24     sanitized_Request,
  25     smuggle_url,
  26     unescapeHTML,
  27     unified_strdate,
  28     unsmuggle_url,
  29     UnsupportedError,
  30     url_basename,
  31     xpath_text,
  32 )
  33 from .brightcove import (
  34     BrightcoveLegacyIE,
  35     BrightcoveNewIE,
  36 )
  37 from .nbc import NBCSportsVPlayerIE
  38 from .ooyala import OoyalaIE
  39 from .rutv import RUTVIE
  40 from .tvc import TVCIE
  41 from .sportbox import SportBoxEmbedIE
  42 from .smotri import SmotriIE
  43 from .myvi import MyviIE
  44 from .condenast import CondeNastIE
  45 from .udn import UDNEmbedIE
  46 from .senateisvp import SenateISVPIE
  47 from .svt import SVTIE
  48 from .pornhub import PornHubIE
  49 from .xhamster import XHamsterEmbedIE
  50 from .tnaflix import TNAFlixNetworkEmbedIE
  51 from .vimeo import VimeoIE
  52 from .dailymotion import DailymotionCloudIE
  53 from .onionstudios import OnionStudiosIE
  54 from .viewlift import ViewLiftEmbedIE
  55 from .screenwavemedia import ScreenwaveMediaIE
  56 from .mtv import MTVServicesEmbeddedIE
  57 from .pladform import PladformIE
  58 from .videomore import VideomoreIE
  59 from .googledrive import GoogleDriveIE
  60 from .jwplatform import JWPlatformIE
  61 from .digiteka import DigitekaIE
  62 from .instagram import InstagramIE
  63 from .liveleak import LiveLeakIE
  64 from .threeqsdn import ThreeQSDNIE
  65 from .theplatform import ThePlatformIE
  66
  67
  68 class GenericIE(InfoExtractor):
  69     IE_DESC = 'Generic downloader that works on some sites'
  70     _VALID_URL = r'.*'
  71     IE_NAME = 'generic'
  72     _TESTS = [
  73         # Direct link to a video
  74         {
  75             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  76             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  77             'info_dict': {
  78                 'id': 'trailer',
  79                 'ext': 'mp4',
  80                 'title': 'trailer',
  81                 'upload_date': '20100513',
  82             }
  83         },
  84         # Direct link to media delivered compressed (until Accept-Encoding is *)
  85         {
  86             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  87             'md5': '128c42e68b13950268b648275386fc74',
  88             'info_dict': {
  89                 'id': 'FictionJunction-Parallel_Hearts',
  90                 'ext': 'flac',
  91                 'title': 'FictionJunction-Parallel_Hearts',
  92                 'upload_date': '20140522',
  93             },
  94             'expected_warnings': [
  95                 'URL could be a direct video link, returning it as such.'
  96             ]
  97         },
  98         # Direct download with broken HEAD
  99         {
 100             'url': 'http://ai-radio.org:8000/radio.opus',
 101             'info_dict': {
 102                 'id': 'radio',
 103                 'ext': 'opus',
 104                 'title': 'radio',
 105             },
 106             'params': {
 107                 'skip_download': True,  # infinite live stream
 108             },
 109             'expected_warnings': [
 110                 r'501.*Not Implemented',
 111                 r'400.*Bad Request',
 112             ],
 113         },
 114         # Direct link with incorrect MIME type
 115         {
 116             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 117             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 118             'info_dict': {
 119                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 120                 'id': '5_Lennart_Poettering_-_Systemd',
 121                 'ext': 'webm',
 122                 'title': '5_Lennart_Poettering_-_Systemd',
 123                 'upload_date': '20141120',
 124             },
 125             'expected_warnings': [
 126                 'URL could be a direct video link, returning it as such.'
 127             ]
 128         },
 129         # RSS feed
 130         {
 131             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 132             'info_dict': {
 133                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 134                 'title': 'Zero Punctuation',
 135                 'description': 're:.*groundbreaking video review series.*'
 136             },
 137             'playlist_mincount': 11,
 138         },
 139         # RSS feed with enclosure
 140         {
 141             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 142             'info_dict': {
 143                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 144                 'ext': 'm4v',
 145                 'upload_date': '20150228',
 146                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 147             }
 148         },
 149         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 150         {
 151             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 152             'info_dict': {
 153                 'id': 'smil',
 154                 'ext': 'mp4',
 155                 'title': 'Automatics, robotics and biocybernetics',
 156                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
 157                 'upload_date': '20130627',
 158                 'formats': 'mincount:16',
 159                 'subtitles': 'mincount:1',
 160             },
 161             'params': {
 162                 'force_generic_extractor': True,
 163                 'skip_download': True,
 164             },
 165         },
 166         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 167         {
 168             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 169             'info_dict': {
 170                 'id': 'hds',
 171                 'ext': 'flv',
 172                 'title': 'hds',
 173                 'formats': 'mincount:1',
 174             },
 175             'params': {
 176                 'skip_download': True,
 177             },
 178         },
 179         # SMIL from https://www.restudy.dk/video/play/id/1637
 180         {
 181             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 182             'info_dict': {
 183                 'id': 'video_1637',
 184                 'ext': 'flv',
 185                 'title': 'video_1637',
 186                 'formats': 'mincount:3',
 187             },
 188             'params': {
 189                 'skip_download': True,
 190             },
 191         },
 192         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 193         {
 194             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 195             'info_dict': {
 196                 'id': 'smil-service',
 197                 'ext': 'flv',
 198                 'title': 'smil-service',
 199                 'formats': 'mincount:1',
 200             },
 201             'params': {
 202                 'skip_download': True,
 203             },
 204         },
 205         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 206         {
 207             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 208             'info_dict': {
 209                 'id': '4719370',
 210                 'ext': 'mp4',
 211                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 212                 'formats': 'mincount:3',
 213             },
 214             'params': {
 215                 'skip_download': True,
 216             },
 217         },
 218         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 219         {
 220             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 221             'info_dict': {
 222                 'id': 'mZlp2ctYIUEB',
 223                 'ext': 'mp4',
 224                 'title': 'Tikibad ontruimd wegens brand',
 225                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 226                 'thumbnail': 're:^https?://.*\.jpg$',
 227                 'duration': 33,
 228             },
 229             'params': {
 230                 'skip_download': True,
 231             },
 232         },
 233         # MPD from http://dash-mse-test.appspot.com/media.html
 234         {
 235             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
 236             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
 237             'info_dict': {
 238                 'id': 'car-20120827-manifest',
 239                 'ext': 'mp4',
 240                 'title': 'car-20120827-manifest',
 241                 'formats': 'mincount:9',
 242                 'upload_date': '20130904',
 243             },
 244             'params': {
 245                 'format': 'bestvideo',
 246             },
 247         },
 248         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
 249         {
 250             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
 251             'info_dict': {
 252                 'id': 'content',
 253                 'ext': 'mp4',
 254                 'title': 'content',
 255                 'formats': 'mincount:8',
 256             },
 257             'params': {
 258                 # m3u8 downloads
 259                 'skip_download': True,
 260             }
 261         },
 262         # m3u8 served with Content-Type: text/plain
 263         {
 264             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
 265             'info_dict': {
 266                 'id': 'index',
 267                 'ext': 'mp4',
 268                 'title': 'index',
 269                 'upload_date': '20140720',
 270                 'formats': 'mincount:11',
 271             },
 272             'params': {
 273                 # m3u8 downloads
 274                 'skip_download': True,
 275             }
 276         },
 277         # google redirect
 278         {
 279             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 280             'info_dict': {
 281                 'id': 'cmQHVoWB5FY',
 282                 'ext': 'mp4',
 283                 'upload_date': '20130224',
 284                 'uploader_id': 'TheVerge',
 285                 'description': 're:^Chris Ziegler takes a look at the\.*',
 286                 'uploader': 'The Verge',
 287                 'title': 'First Firefox OS phones side-by-side',
 288             },
 289             'params': {
 290                 'skip_download': False,
 291             }
 292         },
 293         {
 294             # redirect in Refresh HTTP header
 295             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
 296             'info_dict': {
 297                 'id': 'pO8h3EaFRdo',
 298                 'ext': 'mp4',
 299                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 300                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
 301                 'upload_date': '20150917',
 302                 'uploader_id': 'brtvofficial',
 303                 'uploader': 'Boiler Room',
 304             },
 305             'params': {
 306                 'skip_download': False,
 307             },
 308         },
 309         {
 310             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 311             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 312             'info_dict': {
 313                 'id': '13601338388002',
 314                 'ext': 'mp4',
 315                 'uploader': 'www.hodiho.fr',
 316                 'title': 'R\u00e9gis plante sa Jeep',
 317             }
 318         },
 319         # bandcamp page with custom domain
 320         {
 321             'add_ie': ['Bandcamp'],
 322             'url': 'http://bronyrock.com/track/the-pony-mash',
 323             'info_dict': {
 324                 'id': '3235767654',
 325                 'ext': 'mp3',
 326                 'title': 'The Pony Mash',
 327                 'uploader': 'M_Pallante',
 328             },
 329             'skip': 'There is a limit of 200 free downloads / month for the test song',
 330         },
 331         # embedded brightcove video
 332         # it also tests brightcove videos that need to set the 'Referer' in the
 333         # http requests
 334         {
 335             'add_ie': ['BrightcoveLegacy'],
 336             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 337             'info_dict': {
 338                 'id': '2765128793001',
 339                 'ext': 'mp4',
 340                 'title': 'Le cours de bourse : l’analyse technique',
 341                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 342                 'uploader': 'BFM BUSINESS',
 343             },
 344             'params': {
 345                 'skip_download': True,
 346             },
 347         },
 348         {
 349             # https://github.com/rg3/youtube-dl/issues/2253
 350             'url': 'http://bcove.me/i6nfkrc3',
 351             'md5': '0ba9446db037002366bab3b3eb30c88c',
 352             'info_dict': {
 353                 'id': '3101154703001',
 354                 'ext': 'mp4',
 355                 'title': 'Still no power',
 356                 'uploader': 'thestar.com',
 357                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 358             },
 359             'add_ie': ['BrightcoveLegacy'],
 360         },
 361         {
 362             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 363             'md5': 'fb973ecf6e4a78a67453647444222983',
 364             'info_dict': {
 365                 'id': '3414141473001',
 366                 'ext': 'mp4',
 367                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 368                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 369                 'uploader': 'Championat',
 370             },
 371         },
 372         {
 373             # https://github.com/rg3/youtube-dl/issues/3541
 374             'add_ie': ['BrightcoveLegacy'],
 375             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 376             'info_dict': {
 377                 'id': '3866516442001',
 378                 'ext': 'mp4',
 379                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 380                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 381                 'uploader': 'SBS Broadcasting',
 382             },
 383             'skip': 'Restricted to Netherlands',
 384             'params': {
 385                 'skip_download': True,  # m3u8 download
 386             },
 387         },
 388         # ooyala video
 389         {
 390             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 391             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 392             'info_dict': {
 393                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 394                 'ext': 'mp4',
 395                 'title': '2cc213299525360.mov',  # that's what we get
 396                 'duration': 238.231,
 397             },
 398             'add_ie': ['Ooyala'],
 399         },
 400         {
 401             # ooyala video embedded with http://player.ooyala.com/iframe.js
 402             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 403             'info_dict': {
 404                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 405                 'ext': 'mp4',
 406                 'title': '"Steve Jobs: Man in the Machine" trailer',
 407                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
 408                 'duration': 135.427,
 409             },
 410             'params': {
 411                 'skip_download': True,
 412             },
 413         },
 414         # embed.ly video
 415         {
 416             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 417             'info_dict': {
 418                 'id': '9ODmcdjQcHQ',
 419                 'ext': 'mp4',
 420                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 421                 'upload_date': '20140225',
 422                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 423                 'uploader': 'Tested',
 424                 'uploader_id': 'testedcom',
 425             },
 426             # No need to test YoutubeIE here
 427             'params': {
 428                 'skip_download': True,
 429             },
 430         },
 431         # funnyordie embed
 432         {
 433             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 434             'info_dict': {
 435                 'id': '18e820ec3f',
 436                 'ext': 'mp4',
 437                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 438                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 439             },
 440         },
 441         # RUTV embed
 442         {
 443             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 444             'info_dict': {
 445                 'id': '776940',
 446                 'ext': 'mp4',
 447                 'title': 'Охотское море стало целиком российским',
 448                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 449             },
 450             'params': {
 451                 # m3u8 download
 452                 'skip_download': True,
 453             },
 454         },
 455         # TVC embed
 456         {
 457             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 458             'info_dict': {
 459                 'id': '55304',
 460                 'ext': 'mp4',
 461                 'title': 'Дошкольное воспитание',
 462             },
 463         },
 464         # SportBox embed
 465         {
 466             'url': 'http://www.vestifinance.ru/articles/25753',
 467             'info_dict': {
 468                 'id': '25753',
 469                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 470             },
 471             'playlist': [{
 472                 'info_dict': {
 473                     'id': '370908',
 474                     'title': 'Госзаказ. День 3',
 475                     'ext': 'mp4',
 476                 }
 477             }, {
 478                 'info_dict': {
 479                     'id': '370905',
 480                     'title': 'Госзаказ. День 2',
 481                     'ext': 'mp4',
 482                 }
 483             }, {
 484                 'info_dict': {
 485                     'id': '370902',
 486                     'title': 'Госзаказ. День 1',
 487                     'ext': 'mp4',
 488                 }
 489             }],
 490             'params': {
 491                 # m3u8 download
 492                 'skip_download': True,
 493             },
 494         },
 495         # Myvi.ru embed
 496         {
 497             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 498             'info_dict': {
 499                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 500                 'ext': 'mp4',
 501                 'title': 'Ужастики, русский трейлер (2015)',
 502                 'thumbnail': 're:^https?://.*\.jpg$',
 503                 'duration': 153,
 504             }
 505         },
 506         # XHamster embed
 507         {
 508             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 509             'info_dict': {
 510                 'id': 'showthread',
 511                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 512             },
 513             'playlist_mincount': 7,
 514         },
 515         # Embedded TED video
 516         {
 517             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 518             'md5': '65fdff94098e4a607385a60c5177c638',
 519             'info_dict': {
 520                 'id': '1969',
 521                 'ext': 'mp4',
 522                 'title': 'Hidden miracles of the natural world',
 523                 'uploader': 'Louie Schwartzberg',
 524                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 525             }
 526         },
 527         # Embedded Ustream video
 528         {
 529             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 530             'md5': '27b99cdb639c9b12a79bca876a073417',
 531             'info_dict': {
 532                 'id': '45734260',
 533                 'ext': 'flv',
 534                 'uploader': 'AU SPA:  The NSA and Privacy',
 535                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 536             }
 537         },
 538         # nowvideo embed hidden behind percent encoding
 539         {
 540             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 541             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 542             'info_dict': {
 543                 'id': '06e53103ca9aa',
 544                 'ext': 'flv',
 545                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 546                 'description': 'No description',
 547             },
 548         },
 549         # arte embed
 550         {
 551             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 552             'md5': '7653032cbb25bf6c80d80f217055fa43',
 553             'info_dict': {
 554                 'id': '048195-004_PLUS7-F',
 555                 'ext': 'flv',
 556                 'title': 'X:enius',
 557                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 558                 'upload_date': '20140320',
 559             },
 560             'params': {
 561                 'skip_download': 'Requires rtmpdump'
 562             }
 563         },
 564         # francetv embed
 565         {
 566             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 567             'info_dict': {
 568                 'id': 'EV_30231',
 569                 'ext': 'mp4',
 570                 'title': 'Alcaline, le concert avec Calogero',
 571                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 572                 'upload_date': '20150226',
 573                 'timestamp': 1424989860,
 574                 'duration': 5400,
 575             },
 576             'params': {
 577                 # m3u8 downloads
 578                 'skip_download': True,
 579             },
 580             'expected_warnings': [
 581                 'Forbidden'
 582             ]
 583         },
 584         # Condé Nast embed
 585         {
 586             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 587             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 588             'info_dict': {
 589                 'id': '53501be369702d3275860000',
 590                 'ext': 'mp4',
 591                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 592             }
 593         },
 594         # Dailymotion embed
 595         {
 596             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 597             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 598             'info_dict': {
 599                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 600                 'ext': 'mp4',
 601                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 602                 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
 603                 'uploader': 'Spi0n',
 604                 'uploader_id': 'xgditw',
 605                 'upload_date': '20140425',
 606                 'timestamp': 1398441542,
 607             },
 608             'add_ie': ['Dailymotion'],
 609         },
 610         # YouTube embed
 611         {
 612             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 613             'info_dict': {
 614                 'id': 'FXRb4ykk4S0',
 615                 'ext': 'mp4',
 616                 'title': 'The NBL Auction 2014',
 617                 'uploader': 'BADMINTON England',
 618                 'uploader_id': 'BADMINTONEvents',
 619                 'upload_date': '20140603',
 620                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 621             },
 622             'add_ie': ['Youtube'],
 623             'params': {
 624                 'skip_download': True,
 625             }
 626         },
 627         # MTVSercices embed
 628         {
 629             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 630             'md5': '35727f82f58c76d996fc188f9755b0d5',
 631             'info_dict': {
 632                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 633                 'ext': 'mp4',
 634                 'title': 'Review',
 635                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 636             },
 637         },
 638         # YouTube embed via <data-embed-url="">
 639         {
 640             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 641             'info_dict': {
 642                 'id': '4vAffPZIT44',
 643                 'ext': 'mp4',
 644                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 645                 'uploader': 'Gameloft',
 646                 'uploader_id': 'gameloft',
 647                 'upload_date': '20140828',
 648                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 649             },
 650             'params': {
 651                 'skip_download': True,
 652             }
 653         },
 654         # Camtasia studio
 655         {
 656             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 657             'playlist': [{
 658                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 659                 'info_dict': {
 660                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 661                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 662                     'ext': 'flv',
 663                     'duration': 2235.90,
 664                 }
 665             }, {
 666                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 667                 'info_dict': {
 668                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 669                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 670                     'ext': 'flv',
 671                     'duration': 2235.93,
 672                 }
 673             }],
 674             'info_dict': {
 675                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 676             }
 677         },
 678         # Flowplayer
 679         {
 680             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 681             'md5': '9d65602bf31c6e20014319c7d07fba27',
 682             'info_dict': {
 683                 'id': '5123ea6d5e5a7',
 684                 'ext': 'mp4',
 685                 'age_limit': 18,
 686                 'uploader': 'www.handjobhub.com',
 687                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 688             }
 689         },
 690         # Multiple brightcove videos
 691         # https://github.com/rg3/youtube-dl/issues/2283
 692         {
 693             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 694             'info_dict': {
 695                 'id': 'always-never',
 696                 'title': 'Always / Never - The New Yorker',
 697             },
 698             'playlist_count': 3,
 699             'params': {
 700                 'extract_flat': False,
 701                 'skip_download': True,
 702             }
 703         },
 704         # MLB embed
 705         {
 706             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 707             'md5': '96f09a37e44da40dd083e12d9a683327',
 708             'info_dict': {
 709                 'id': '33322633',
 710                 'ext': 'mp4',
 711                 'title': 'Ump changes call to ball',
 712                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 713                 'duration': 48,
 714                 'timestamp': 1401537900,
 715                 'upload_date': '20140531',
 716                 'thumbnail': 're:^https?://.*\.jpg$',
 717             },
 718         },
 719         # Wistia embed
 720         {
 721             'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 722             'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
 723             'info_dict': {
 724                 'id': '6e2wtrbdaf',
 725                 'ext': 'mov',
 726                 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
 727                 'description': 'a Paywall Videos video from Remilon',
 728                 'duration': 644.072,
 729                 'uploader': 'study.com',
 730                 'timestamp': 1459678540,
 731                 'upload_date': '20160403',
 732                 'filesize': 24687186,
 733             },
 734         },
 735         {
 736             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 737             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 738             'info_dict': {
 739                 'id': 'uxjb0lwrcz',
 740                 'ext': 'mp4',
 741                 'title': 'Conversation about Hexagonal Rails Part 1',
 742                 'description': 'a Martin Fowler video from ThoughtWorks',
 743                 'duration': 1715.0,
 744                 'uploader': 'thoughtworks.wistia.com',
 745                 'timestamp': 1401832161,
 746                 'upload_date': '20140603',
 747             },
 748         },
 749         # Wistia standard embed (async)
 750         {
 751             'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
 752             'info_dict': {
 753                 'id': '807fafadvk',
 754                 'ext': 'mp4',
 755                 'title': 'Drip Brennan Dunn Workshop',
 756                 'description': 'a JV Webinars video from getdrip-1',
 757                 'duration': 4986.95,
 758                 'timestamp': 1463607249,
 759                 'upload_date': '20160518',
 760             },
 761             'params': {
 762                 'skip_download': True,
 763             }
 764         },
 765         # Soundcloud embed
 766         {
 767             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 768             'info_dict': {
 769                 'id': '174391317',
 770                 'ext': 'mp3',
 771                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 772                 'uploader': 'Sophos Security',
 773                 'title': 'Chet Chat 171 - Oct 29, 2014',
 774                 'upload_date': '20141029',
 775             }
 776         },
 777         # Livestream embed
 778         {
 779             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 780             'info_dict': {
 781                 'id': '67864563',
 782                 'ext': 'flv',
 783                 'upload_date': '20141112',
 784                 'title': 'Rosetta #CometLanding webcast HL 10',
 785             }
 786         },
 787         # Another Livestream embed, without 'new.' in URL
 788         {
 789             'url': 'https://www.freespeech.org/',
 790             'info_dict': {
 791                 'id': '123537347',
 792                 'ext': 'mp4',
 793                 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 794             },
 795             'params': {
 796                 # Live stream
 797                 'skip_download': True,
 798             },
 799         },
 800         # LazyYT
 801         {
 802             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 803             'info_dict': {
 804                 'id': '1986',
 805                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 806             },
 807             'playlist_mincount': 2,
 808         },
 809         # Cinchcast embed
 810         {
 811             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 812             'info_dict': {
 813                 'id': '7141703',
 814                 'ext': 'mp3',
 815                 'upload_date': '20141126',
 816                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 817             }
 818         },
 819         # Cinerama player
 820         {
 821             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 822             'info_dict': {
 823                 'id': '730m_DandD_1901_512k',
 824                 'ext': 'mp4',
 825                 'uploader': 'www.abc.net.au',
 826                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 827             }
 828         },
 829         # embedded viddler video
 830         {
 831             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 832             'info_dict': {
 833                 'id': '4d03aad9',
 834                 'ext': 'mp4',
 835                 'uploader': 'deadspin',
 836                 'title': 'WALL-TO-GORTAT',
 837                 'timestamp': 1422285291,
 838                 'upload_date': '20150126',
 839             },
 840             'add_ie': ['Viddler'],
 841         },
 842         # Libsyn embed
 843         {
 844             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 845             'info_dict': {
 846                 'id': '3377616',
 847                 'ext': 'mp3',
 848                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 849                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 850                 'upload_date': '20150220',
 851             },
 852         },
 853         # jwplayer YouTube
 854         {
 855             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 856             'info_dict': {
 857                 'id': 'Mrj4DVp2zeA',
 858                 'ext': 'mp4',
 859                 'upload_date': '20150212',
 860                 'uploader': 'The National Archives UK',
 861                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 862                 'uploader_id': 'NationalArchives08',
 863                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 864             },
 865         },
 866         # rtl.nl embed
 867         {
 868             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 869             'playlist_mincount': 5,
 870             'info_dict': {
 871                 'id': 'aanslagen-kopenhagen',
 872                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 873             }
 874         },
 875         # Zapiks embed
 876         {
 877             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 878             'info_dict': {
 879                 'id': '118046',
 880                 'ext': 'mp4',
 881                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 882             }
 883         },
 884         # Kaltura embed (different embed code)
 885         {
 886             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 887             'info_dict': {
 888                 'id': '1_a52wc67y',
 889                 'ext': 'flv',
 890                 'upload_date': '20150127',
 891                 'uploader_id': 'PremierMedia',
 892                 'timestamp': int,
 893                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 894             },
 895         },
 896         # Kaltura embed protected with referrer
 897         {
 898             'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
 899             'info_dict': {
 900                 'id': '1_g4fbemnq',
 901                 'ext': 'mp4',
 902                 'title': 'Violetta - Achter De Schermen - Ruggero',
 903                 'description': 'Achter de schermen met Ruggero',
 904                 'timestamp': 1435133761,
 905                 'upload_date': '20150624',
 906                 'uploader_id': 'echojecka',
 907             },
 908         },
 909         # Kaltura embed with single quotes
 910         {
 911             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
 912             'info_dict': {
 913                 'id': '0_izeg5utt',
 914                 'ext': 'mp4',
 915                 'title': '35871',
 916                 'timestamp': 1355743100,
 917                 'upload_date': '20121217',
 918                 'uploader_id': 'batchUser',
 919             },
 920             'add_ie': ['Kaltura'],
 921         },
 922         # Eagle.Platform embed (generic URL)
 923         {
 924             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 925             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
 926             'info_dict': {
 927                 'id': '227304',
 928                 'ext': 'mp4',
 929                 'title': 'Навальный вышел на свободу',
 930                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 931                 'thumbnail': 're:^https?://.*\.jpg$',
 932                 'duration': 87,
 933                 'view_count': int,
 934                 'age_limit': 0,
 935             },
 936         },
 937         # ClipYou (Eagle.Platform) embed (custom URL)
 938         {
 939             'url': 'http://muz-tv.ru/play/7129/',
 940             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
 941             'info_dict': {
 942                 'id': '12820',
 943                 'ext': 'mp4',
 944                 'title': "'O Sole Mio",
 945                 'thumbnail': 're:^https?://.*\.jpg$',
 946                 'duration': 216,
 947                 'view_count': int,
 948             },
 949         },
 950         # Pladform embed
 951         {
 952             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 953             'info_dict': {
 954                 'id': '100183293',
 955                 'ext': 'mp4',
 956                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 957                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 958                 'thumbnail': 're:^https?://.*\.jpg$',
 959                 'duration': 694,
 960                 'age_limit': 0,
 961             },
 962         },
 963         # Playwire embed
 964         {
 965             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 966             'info_dict': {
 967                 'id': '3519514',
 968                 'ext': 'mp4',
 969                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 970                 'thumbnail': 're:^https?://.*\.png$',
 971                 'duration': 45.115,
 972             },
 973         },
 974         # 5min embed
 975         {
 976             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 977             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 978             'info_dict': {
 979                 'id': '518726732',
 980                 'ext': 'mp4',
 981                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 982             },
 983         },
 984         # SVT embed
 985         {
 986             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 987             'info_dict': {
 988                 'id': '2900353',
 989                 'ext': 'flv',
 990                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 991                 'duration': 27,
 992                 'age_limit': 0,
 993             },
 994         },
 995         # Crooks and Liars embed
 996         {
 997             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 998             'info_dict': {
 999                 'id': '8RUoRhRi',
1000                 'ext': 'mp4',
1001                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1002                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1003                 'timestamp': 1428207000,
1004                 'upload_date': '20150405',
1005                 'uploader': 'Heather',
1006             },
1007         },
1008         # Crooks and Liars external embed
1009         {
1010             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1011             'info_dict': {
1012                 'id': 'MTE3MjUtMzQ2MzA',
1013                 'ext': 'mp4',
1014                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1015                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1016                 'timestamp': 1265032391,
1017                 'upload_date': '20100201',
1018                 'uploader': 'Heather',
1019             },
1020         },
1021         # NBC Sports vplayer embed
1022         {
1023             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1024             'info_dict': {
1025                 'id': 'ln7x1qSThw4k',
1026                 'ext': 'flv',
1027                 'title': "PFT Live: New leader in the 'new-look' defense",
1028                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1029                 'uploader': 'NBCU-SPORTS',
1030                 'upload_date': '20140107',
1031                 'timestamp': 1389118457,
1032             },
1033         },
1034         # UDN embed
1035         {
1036             'url': 'http://www.udn.com/news/story/7314/822787',
1037             'md5': 'fd2060e988c326991037b9aff9df21a6',
1038             'info_dict': {
1039                 'id': '300346',
1040                 'ext': 'mp4',
1041                 'title': '中一中男師變性 全校師生力挺',
1042                 'thumbnail': 're:^https?://.*\.jpg$',
1043             }
1044         },
1045         # Ooyala embed
1046         {
1047             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1048             'info_dict': {
1049                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1050                 'ext': 'mp4',
1051                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1052                 'title': 'This is what separates the Excel masters from the wannabes',
1053                 'duration': 191.933,
1054             },
1055             'params': {
1056                 # m3u8 downloads
1057                 'skip_download': True,
1058             }
1059         },
1060         # Contains a SMIL manifest
1061         {
1062             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
1063             'info_dict': {
1064                 'id': 'file',
1065                 'ext': 'flv',
1066                 'title': '+ Football: Lottery Champions League Europe',
1067                 'uploader': 'www.telewebion.com',
1068             },
1069             'params': {
1070                 # rtmpe downloads
1071                 'skip_download': True,
1072             }
1073         },
1074         # Brightcove URL in single quotes
1075         {
1076             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1077             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1078             'info_dict': {
1079                 'id': '4255764656001',
1080                 'ext': 'mp4',
1081                 'title': 'SN Presents: Russell Martin, World Citizen',
1082                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1083                 'uploader': 'Rogers Sportsnet',
1084                 'uploader_id': '1704050871',
1085                 'upload_date': '20150525',
1086                 'timestamp': 1432570283,
1087             },
1088         },
1089         # Dailymotion Cloud video
1090         {
1091             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1092             'md5': '49444254273501a64675a7e68c502681',
1093             'info_dict': {
1094                 'id': '5585de919473990de4bee11b',
1095                 'ext': 'mp4',
1096                 'title': 'Le débat',
1097                 'thumbnail': 're:^https?://.*\.jpe?g$',
1098             }
1099         },
1100         # OnionStudios embed
1101         {
1102             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1103             'info_dict': {
1104                 'id': '2855',
1105                 'ext': 'mp4',
1106                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1107                 'thumbnail': 're:^https?://.*\.jpe?g$',
1108                 'uploader': 'ClickHole',
1109                 'uploader_id': 'clickhole',
1110             }
1111         },
1112         # SnagFilms embed
1113         {
1114             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1115             'info_dict': {
1116                 'id': '74849a00-85a9-11e1-9660-123139220831',
1117                 'ext': 'mp4',
1118                 'title': '#whilewewatch',
1119             }
1120         },
1121         # AdobeTVVideo embed
1122         {
1123             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1124             'md5': '43662b577c018ad707a63766462b1e87',
1125             'info_dict': {
1126                 'id': '2456',
1127                 'ext': 'mp4',
1128                 'title': 'New experience with Acrobat DC',
1129                 'description': 'New experience with Acrobat DC',
1130                 'duration': 248.667,
1131             },
1132         },
1133         # ScreenwaveMedia embed
1134         {
1135             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1136             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1137             'info_dict': {
1138                 'id': 'cinemasnob-55d26273809dd',
1139                 'ext': 'mp4',
1140                 'title': 'cinemasnob',
1141             },
1142         },
1143         # BrightcoveInPageEmbed embed
1144         {
1145             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1146             'info_dict': {
1147                 'id': '4238694884001',
1148                 'ext': 'flv',
1149                 'title': 'Tabletop: Dread, Last Thoughts',
1150                 'description': 'Tabletop: Dread, Last Thoughts',
1151                 'duration': 51690,
1152             },
1153         },
1154         # JWPlayer with M3U8
1155         {
1156             'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1157             'info_dict': {
1158                 'id': 'playlist',
1159                 'ext': 'mp4',
1160                 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1161                 'uploader': 'ren.tv',
1162             },
1163             'params': {
1164                 # m3u8 downloads
1165                 'skip_download': True,
1166             }
1167         },
1168         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1169         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1170         {
1171             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1172             'info_dict': {
1173                 'id': '4785848093001',
1174                 'ext': 'mp4',
1175                 'title': 'The Cardinal Pell Interview',
1176                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1177                 'uploader': 'GlobeCast Australia - GlobeStream',
1178                 'uploader_id': '2733773828001',
1179                 'upload_date': '20160304',
1180                 'timestamp': 1457083087,
1181             },
1182             'params': {
1183                 # m3u8 downloads
1184                 'skip_download': True,
1185             },
1186         },
1187         # Another form of arte.tv embed
1188         {
1189             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1190             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1191             'info_dict': {
1192                 'id': '030273-562_PLUS7-F',
1193                 'ext': 'mp4',
1194                 'title': 'ARTE Reportage - Nulle part, en France',
1195                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1196                 'upload_date': '20160409',
1197             },
1198         },
1199         # LiveLeak embed
1200         {
1201             'url': 'http://www.wykop.pl/link/3088787/',
1202             'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1203             'info_dict': {
1204                 'id': '874_1459135191',
1205                 'ext': 'mp4',
1206                 'title': 'Man shows poor quality of new apartment building',
1207                 'description': 'The wall is like a sand pile.',
1208                 'uploader': 'Lake8737',
1209             }
1210         },
1211         # Duplicated embedded video URLs
1212         {
1213             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1214             'info_dict': {
1215                 'id': '149298443_480_16c25b74_2',
1216                 'ext': 'mp4',
1217                 'title': 'vs. Blue Orange Spring Game',
1218                 'uploader': 'www.hudl.com',
1219             },
1220         },
1221     ]
1222
1223     def report_following_redirect(self, new_url):
1224         """Report information extraction."""
1225         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1226
1227     def _extract_rss(self, url, video_id, doc):
1228         playlist_title = doc.find('./channel/title').text
1229         playlist_desc_el = doc.find('./channel/description')
1230         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1231
1232         entries = []
1233         for it in doc.findall('./channel/item'):
1234             next_url = xpath_text(it, 'link', fatal=False)
1235             if not next_url:
1236                 enclosure_nodes = it.findall('./enclosure')
1237                 for e in enclosure_nodes:
1238                     next_url = e.attrib.get('url')
1239                     if next_url:
1240                         break
1241
1242             if not next_url:
1243                 continue
1244
1245             entries.append({
1246                 '_type': 'url',
1247                 'url': next_url,
1248                 'title': it.find('title').text,
1249             })
1250
1251         return {
1252             '_type': 'playlist',
1253             'id': url,
1254             'title': playlist_title,
1255             'description': playlist_desc,
1256             'entries': entries,
1257         }
1258
1259     def _extract_camtasia(self, url, video_id, webpage):
1260         """ Returns None if no camtasia video can be found. """
1261
1262         camtasia_cfg = self._search_regex(
1263             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1264             webpage, 'camtasia configuration file', default=None)
1265         if camtasia_cfg is None:
1266             return None
1267
1268         title = self._html_search_meta('DC.title', webpage, fatal=True)
1269
1270         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1271         camtasia_cfg = self._download_xml(
1272             camtasia_url, video_id,
1273             note='Downloading camtasia configuration',
1274             errnote='Failed to download camtasia configuration')
1275         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1276
1277         entries = []
1278         for n in fileset_node.getchildren():
1279             url_n = n.find('./uri')
1280             if url_n is None:
1281                 continue
1282
1283             entries.append({
1284                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1285                 'title': '%s - %s' % (title, n.tag),
1286                 'url': compat_urlparse.urljoin(url, url_n.text),
1287                 'duration': float_or_none(n.find('./duration').text),
1288             })
1289
1290         return {
1291             '_type': 'playlist',
1292             'entries': entries,
1293             'title': title,
1294         }
1295
1296     def _real_extract(self, url):
1297         if url.startswith('//'):
1298             return {
1299                 '_type': 'url',
1300                 'url': self.http_scheme() + url,
1301             }
1302
1303         parsed_url = compat_urlparse.urlparse(url)
1304         if not parsed_url.scheme:
1305             default_search = self._downloader.params.get('default_search')
1306             if default_search is None:
1307                 default_search = 'fixup_error'
1308
1309             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1310                 if '/' in url:
1311                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1312                     return self.url_result('http://' + url)
1313                 elif default_search != 'fixup_error':
1314                     if default_search == 'auto_warning':
1315                         if re.match(r'^(?:url|URL)$', url):
1316                             raise ExtractorError(
1317                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1318                                 expected=True)
1319                         else:
1320                             self._downloader.report_warning(
1321                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1322                     return self.url_result('ytsearch:' + url)
1323
1324             if default_search in ('error', 'fixup_error'):
1325                 raise ExtractorError(
1326                     '%r is not a valid URL. '
1327                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1328                     % (url, url), expected=True)
1329             else:
1330                 if ':' not in default_search:
1331                     default_search += ':'
1332                 return self.url_result(default_search + url)
1333
1334         url, smuggled_data = unsmuggle_url(url)
1335         force_videoid = None
1336         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1337         if smuggled_data and 'force_videoid' in smuggled_data:
1338             force_videoid = smuggled_data['force_videoid']
1339             video_id = force_videoid
1340         else:
1341             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1342
1343         self.to_screen('%s: Requesting header' % video_id)
1344
1345         head_req = HEADRequest(url)
1346         head_response = self._request_webpage(
1347             head_req, video_id,
1348             note=False, errnote='Could not send HEAD request to %s' % url,
1349             fatal=False)
1350
1351         if head_response is not False:
1352             # Check for redirect
1353             new_url = head_response.geturl()
1354             if url != new_url:
1355                 self.report_following_redirect(new_url)
1356                 if force_videoid:
1357                     new_url = smuggle_url(
1358                         new_url, {'force_videoid': force_videoid})
1359                 return self.url_result(new_url)
1360
1361         full_response = None
1362         if head_response is False:
1363             request = sanitized_Request(url)
1364             request.add_header('Accept-Encoding', '*')
1365             full_response = self._request_webpage(request, video_id)
1366             head_response = full_response
1367
1368         info_dict = {
1369             'id': video_id,
1370             'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1371             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1372         }
1373
1374         # Check for direct link to a video
1375         content_type = head_response.headers.get('Content-Type', '').lower()
1376         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1377         if m:
1378             format_id = m.group('format_id')
1379             if format_id.endswith('mpegurl'):
1380                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1381             elif format_id == 'f4m':
1382                 formats = self._extract_f4m_formats(url, video_id)
1383             else:
1384                 formats = [{
1385                     'format_id': m.group('format_id'),
1386                     'url': url,
1387                     'vcodec': 'none' if m.group('type') == 'audio' else None
1388                 }]
1389                 info_dict['direct'] = True
1390             self._sort_formats(formats)
1391             info_dict['formats'] = formats
1392             return info_dict
1393
1394         if not self._downloader.params.get('test', False) and not is_intentional:
1395             force = self._downloader.params.get('force_generic_extractor', False)
1396             self._downloader.report_warning(
1397                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1398
1399         if not full_response:
1400             request = sanitized_Request(url)
1401             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1402             # making it impossible to download only chunk of the file (yet we need only 512kB to
1403             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1404             # that will always result in downloading the whole file that is not desirable.
1405             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1406             # to accept raw bytes and being able to download only a chunk.
1407             # It may probably better to solve this by checking Content-Type for application/octet-stream
1408             # after HEAD request finishes, but not sure if we can rely on this.
1409             request.add_header('Accept-Encoding', '*')
1410             full_response = self._request_webpage(request, video_id)
1411
1412         first_bytes = full_response.read(512)
1413
1414         # Is it an M3U playlist?
1415         if first_bytes.startswith(b'#EXTM3U'):
1416             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1417             self._sort_formats(info_dict['formats'])
1418             return info_dict
1419
1420         # Maybe it's a direct link to a video?
1421         # Be careful not to download the whole thing!
1422         if not is_html(first_bytes):
1423             self._downloader.report_warning(
1424                 'URL could be a direct video link, returning it as such.')
1425             info_dict.update({
1426                 'direct': True,
1427                 'url': url,
1428             })
1429             return info_dict
1430
1431         webpage = self._webpage_read_content(
1432             full_response, url, video_id, prefix=first_bytes)
1433
1434         self.report_extraction(video_id)
1435
1436         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1437         try:
1438             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1439             if doc.tag == 'rss':
1440                 return self._extract_rss(url, video_id, doc)
1441             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1442                 smil = self._parse_smil(doc, url, video_id)
1443                 self._sort_formats(smil['formats'])
1444                 return smil
1445             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1446                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1447             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1448                 info_dict['formats'] = self._parse_mpd_formats(
1449                     doc, video_id, mpd_base_url=url.rpartition('/')[0])
1450                 self._sort_formats(info_dict['formats'])
1451                 return info_dict
1452             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1453                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1454                 self._sort_formats(info_dict['formats'])
1455                 return info_dict
1456         except compat_xml_parse_error:
1457             pass
1458
1459         # Is it a Camtasia project?
1460         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1461         if camtasia_res is not None:
1462             return camtasia_res
1463
1464         # Sometimes embedded video player is hidden behind percent encoding
1465         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1466         # Unescaping the whole page allows to handle those cases in a generic way
1467         webpage = compat_urllib_parse_unquote(webpage)
1468
1469         # it's tempting to parse this further, but you would
1470         # have to take into account all the variations like
1471         #   Video Title - Site Name
1472         #   Site Name | Video Title
1473         #   Video Title - Tagline | Site Name
1474         # and so on and so forth; it's just not practical
1475         video_title = self._og_search_title(
1476             webpage, default=None) or self._html_search_regex(
1477             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1478             default='video')
1479
1480         # Try to detect age limit automatically
1481         age_limit = self._rta_search(webpage)
1482         # And then there are the jokers who advertise that they use RTA,
1483         # but actually don't.
1484         AGE_LIMIT_MARKERS = [
1485             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1486         ]
1487         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1488             age_limit = 18
1489
1490         # video uploader is domain name
1491         video_uploader = self._search_regex(
1492             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1493
1494         video_description = self._og_search_description(webpage, default=None)
1495         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
1496
1497         # Helper method
1498         def _playlist_from_matches(matches, getter=None, ie=None):
1499             urlrs = orderedSet(
1500                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1501                 for m in matches)
1502             return self.playlist_result(
1503                 urlrs, playlist_id=video_id, playlist_title=video_title)
1504
1505         # Look for Brightcove Legacy Studio embeds
1506         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1507         if bc_urls:
1508             self.to_screen('Brightcove video detected.')
1509             entries = [{
1510                 '_type': 'url',
1511                 'url': smuggle_url(bc_url, {'Referer': url}),
1512                 'ie_key': 'BrightcoveLegacy'
1513             } for bc_url in bc_urls]
1514
1515             return {
1516                 '_type': 'playlist',
1517                 'title': video_title,
1518                 'id': video_id,
1519                 'entries': entries,
1520             }
1521
1522         # Look for Brightcove New Studio embeds
1523         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1524         if bc_urls:
1525             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1526
1527         # Look for ThePlatform embeds
1528         tp_urls = ThePlatformIE._extract_urls(webpage)
1529         if tp_urls:
1530             return _playlist_from_matches(tp_urls, ie='ThePlatform')
1531
1532         # Look for embedded rtl.nl player
1533         matches = re.findall(
1534             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1535             webpage)
1536         if matches:
1537             return _playlist_from_matches(matches, ie='RtlNl')
1538
1539         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1540         if vimeo_url is not None:
1541             return self.url_result(vimeo_url)
1542
1543         vid_me_embed_url = self._search_regex(
1544             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1545             webpage, 'vid.me embed', default=None)
1546         if vid_me_embed_url is not None:
1547             return self.url_result(vid_me_embed_url, 'Vidme')
1548
1549         # Look for embedded YouTube player
1550         matches = re.findall(r'''(?x)
1551             (?:
1552                 <iframe[^>]+?src=|
1553                 data-video-url=|
1554                 <embed[^>]+?src=|
1555                 embedSWF\(?:\s*|
1556                 new\s+SWFObject\(
1557             )
1558             (["\'])
1559                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1560                 (?:embed|v|p)/.+?)
1561             \1''', webpage)
1562         if matches:
1563             return _playlist_from_matches(
1564                 matches, lambda m: unescapeHTML(m[1]))
1565
1566         # Look for lazyYT YouTube embed
1567         matches = re.findall(
1568             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1569         if matches:
1570             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1571
1572         # Look for embedded Dailymotion player
1573         matches = re.findall(
1574             r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
1575         if matches:
1576             return _playlist_from_matches(
1577                 matches, lambda m: unescapeHTML(m[1]))
1578
1579         # Look for embedded Dailymotion playlist player (#3822)
1580         m = re.search(
1581             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1582         if m:
1583             playlists = re.findall(
1584                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1585             if playlists:
1586                 return _playlist_from_matches(
1587                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1588
1589         # Look for embedded Wistia player
1590         match = re.search(
1591             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1592         if match:
1593             embed_url = self._proto_relative_url(
1594                 unescapeHTML(match.group('url')))
1595             return {
1596                 '_type': 'url_transparent',
1597                 'url': embed_url,
1598                 'ie_key': 'Wistia',
1599                 'uploader': video_uploader,
1600             }
1601
1602         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1603         if match:
1604             return {
1605                 '_type': 'url_transparent',
1606                 'url': 'wistia:%s' % match.group('id'),
1607                 'ie_key': 'Wistia',
1608                 'uploader': video_uploader,
1609             }
1610
1611         match = re.search(
1612             r'''(?sx)
1613                 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
1614                 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
1615             ''', webpage)
1616         if match:
1617             return self.url_result(self._proto_relative_url(
1618                 'wistia:%s' % match.group('id')), 'Wistia')
1619
1620         # Look for SVT player
1621         svt_url = SVTIE._extract_url(webpage)
1622         if svt_url:
1623             return self.url_result(svt_url, 'SVT')
1624
1625         # Look for embedded condenast player
1626         matches = re.findall(
1627             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1628             webpage)
1629         if matches:
1630             return {
1631                 '_type': 'playlist',
1632                 'entries': [{
1633                     '_type': 'url',
1634                     'ie_key': 'CondeNast',
1635                     'url': ma,
1636                 } for ma in matches],
1637                 'title': video_title,
1638                 'id': video_id,
1639             }
1640
1641         # Look for Bandcamp pages with custom domain
1642         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1643         if mobj is not None:
1644             burl = unescapeHTML(mobj.group(1))
1645             # Don't set the extractor because it can be a track url or an album
1646             return self.url_result(burl)
1647
1648         # Look for embedded Vevo player
1649         mobj = re.search(
1650             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1651         if mobj is not None:
1652             return self.url_result(mobj.group('url'))
1653
1654         # Look for embedded Viddler player
1655         mobj = re.search(
1656             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1657             webpage)
1658         if mobj is not None:
1659             return self.url_result(mobj.group('url'))
1660
1661         # Look for NYTimes player
1662         mobj = re.search(
1663             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1664             webpage)
1665         if mobj is not None:
1666             return self.url_result(mobj.group('url'))
1667
1668         # Look for Libsyn player
1669         mobj = re.search(
1670             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1671         if mobj is not None:
1672             return self.url_result(mobj.group('url'))
1673
1674         # Look for Ooyala videos
1675         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1676                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1677                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1678                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1679         if mobj is not None:
1680             return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
1681
1682         # Look for multiple Ooyala embeds on SBN network websites
1683         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1684         if mobj is not None:
1685             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1686             if embeds:
1687                 return _playlist_from_matches(
1688                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
1689
1690         # Look for Aparat videos
1691         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1692         if mobj is not None:
1693             return self.url_result(mobj.group(1), 'Aparat')
1694
1695         # Look for MPORA videos
1696         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1697         if mobj is not None:
1698             return self.url_result(mobj.group(1), 'Mpora')
1699
1700         # Look for embedded NovaMov-based player
1701         mobj = re.search(
1702             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1703                     (?P<url>http://(?:(?:embed|www)\.)?
1704                         (?:novamov\.com|
1705                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1706                            videoweed\.(?:es|com)|
1707                            movshare\.(?:net|sx|ag)|
1708                            divxstage\.(?:eu|net|ch|co|at|ag))
1709                         /embed\.php.+?)\1''', webpage)
1710         if mobj is not None:
1711             return self.url_result(mobj.group('url'))
1712
1713         # Look for embedded Facebook player
1714         mobj = re.search(
1715             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1716         if mobj is not None:
1717             return self.url_result(mobj.group('url'), 'Facebook')
1718
1719         # Look for embedded VK player
1720         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1721         if mobj is not None:
1722             return self.url_result(mobj.group('url'), 'VK')
1723
1724         # Look for embedded Odnoklassniki player
1725         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
1726         if mobj is not None:
1727             return self.url_result(mobj.group('url'), 'Odnoklassniki')
1728
1729         # Look for embedded ivi player
1730         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1731         if mobj is not None:
1732             return self.url_result(mobj.group('url'), 'Ivi')
1733
1734         # Look for embedded Huffington Post player
1735         mobj = re.search(
1736             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1737         if mobj is not None:
1738             return self.url_result(mobj.group('url'), 'HuffPost')
1739
1740         # Look for embed.ly
1741         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1742         if mobj is not None:
1743             return self.url_result(mobj.group('url'))
1744         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1745         if mobj is not None:
1746             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1747
1748         # Look for funnyordie embed
1749         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1750         if matches:
1751             return _playlist_from_matches(
1752                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1753
1754         # Look for BBC iPlayer embed
1755         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1756         if matches:
1757             return _playlist_from_matches(matches, ie='BBCCoUk')
1758
1759         # Look for embedded RUTV player
1760         rutv_url = RUTVIE._extract_url(webpage)
1761         if rutv_url:
1762             return self.url_result(rutv_url, 'RUTV')
1763
1764         # Look for embedded TVC player
1765         tvc_url = TVCIE._extract_url(webpage)
1766         if tvc_url:
1767             return self.url_result(tvc_url, 'TVC')
1768
1769         # Look for embedded SportBox player
1770         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1771         if sportbox_urls:
1772             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1773
1774         # Look for embedded PornHub player
1775         pornhub_url = PornHubIE._extract_url(webpage)
1776         if pornhub_url:
1777             return self.url_result(pornhub_url, 'PornHub')
1778
1779         # Look for embedded XHamster player
1780         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1781         if xhamster_urls:
1782             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1783
1784         # Look for embedded TNAFlixNetwork player
1785         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
1786         if tnaflix_urls:
1787             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
1788
1789         # Look for embedded Tvigle player
1790         mobj = re.search(
1791             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1792         if mobj is not None:
1793             return self.url_result(mobj.group('url'), 'Tvigle')
1794
1795         # Look for embedded TED player
1796         mobj = re.search(
1797             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1798         if mobj is not None:
1799             return self.url_result(mobj.group('url'), 'TED')
1800
1801         # Look for embedded Ustream videos
1802         mobj = re.search(
1803             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1804         if mobj is not None:
1805             return self.url_result(mobj.group('url'), 'Ustream')
1806
1807         # Look for embedded arte.tv player
1808         mobj = re.search(
1809             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
1810             webpage)
1811         if mobj is not None:
1812             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1813
1814         # Look for embedded francetv player
1815         mobj = re.search(
1816             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1817             webpage)
1818         if mobj is not None:
1819             return self.url_result(mobj.group('url'))
1820
1821         # Look for embedded smotri.com player
1822         smotri_url = SmotriIE._extract_url(webpage)
1823         if smotri_url:
1824             return self.url_result(smotri_url, 'Smotri')
1825
1826         # Look for embedded Myvi.ru player
1827         myvi_url = MyviIE._extract_url(webpage)
1828         if myvi_url:
1829             return self.url_result(myvi_url)
1830
1831         # Look for embedded soundcloud player
1832         mobj = re.search(
1833             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1834             webpage)
1835         if mobj is not None:
1836             url = unescapeHTML(mobj.group('url'))
1837             return self.url_result(url)
1838
1839         # Look for embedded vulture.com player
1840         mobj = re.search(
1841             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1842             webpage)
1843         if mobj is not None:
1844             url = unescapeHTML(mobj.group('url'))
1845             return self.url_result(url, ie='Vulture')
1846
1847         # Look for embedded mtvservices player
1848         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1849         if mtvservices_url:
1850             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1851
1852         # Look for embedded yahoo player
1853         mobj = re.search(
1854             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1855             webpage)
1856         if mobj is not None:
1857             return self.url_result(mobj.group('url'), 'Yahoo')
1858
1859         # Look for embedded sbs.com.au player
1860         mobj = re.search(
1861             r'''(?x)
1862             (?:
1863                 <meta\s+property="og:video"\s+content=|
1864                 <iframe[^>]+?src=
1865             )
1866             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1867             webpage)
1868         if mobj is not None:
1869             return self.url_result(mobj.group('url'), 'SBS')
1870
1871         # Look for embedded Cinchcast player
1872         mobj = re.search(
1873             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1874             webpage)
1875         if mobj is not None:
1876             return self.url_result(mobj.group('url'), 'Cinchcast')
1877
1878         mobj = re.search(
1879             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1880             webpage)
1881         if not mobj:
1882             mobj = re.search(
1883                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1884                 webpage)
1885         if mobj is not None:
1886             return self.url_result(mobj.group('url'), 'MLB')
1887
1888         mobj = re.search(
1889             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1890             webpage)
1891         if mobj is not None:
1892             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1893
1894         mobj = re.search(
1895             r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
1896             webpage)
1897         if mobj is not None:
1898             return self.url_result(mobj.group('url'), 'Livestream')
1899
1900         # Look for Zapiks embed
1901         mobj = re.search(
1902             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1903         if mobj is not None:
1904             return self.url_result(mobj.group('url'), 'Zapiks')
1905
1906         # Look for Kaltura embeds
1907         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
1908                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
1909         if mobj is not None:
1910             return self.url_result(smuggle_url(
1911                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1912                 {'source_url': url}), 'Kaltura')
1913
1914         # Look for Eagle.Platform embeds
1915         mobj = re.search(
1916             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1917         if mobj is not None:
1918             return self.url_result(mobj.group('url'), 'EaglePlatform')
1919
1920         # Look for ClipYou (uses Eagle.Platform) embeds
1921         mobj = re.search(
1922             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1923         if mobj is not None:
1924             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1925
1926         # Look for Pladform embeds
1927         pladform_url = PladformIE._extract_url(webpage)
1928         if pladform_url:
1929             return self.url_result(pladform_url)
1930
1931         # Look for Videomore embeds
1932         videomore_url = VideomoreIE._extract_url(webpage)
1933         if videomore_url:
1934             return self.url_result(videomore_url)
1935
1936         # Look for Playwire embeds
1937         mobj = re.search(
1938             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1939         if mobj is not None:
1940             return self.url_result(mobj.group('url'))
1941
1942         # Look for 5min embeds
1943         mobj = re.search(
1944             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1945         if mobj is not None:
1946             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1947
1948         # Look for Crooks and Liars embeds
1949         mobj = re.search(
1950             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1951         if mobj is not None:
1952             return self.url_result(mobj.group('url'))
1953
1954         # Look for NBC Sports VPlayer embeds
1955         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1956         if nbc_sports_url:
1957             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1958
1959         # Look for Google Drive embeds
1960         google_drive_url = GoogleDriveIE._extract_url(webpage)
1961         if google_drive_url:
1962             return self.url_result(google_drive_url, 'GoogleDrive')
1963
1964         # Look for UDN embeds
1965         mobj = re.search(
1966             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
1967         if mobj is not None:
1968             return self.url_result(
1969                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1970
1971         # Look for Senate ISVP iframe
1972         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1973         if senate_isvp_url:
1974             return self.url_result(senate_isvp_url, 'SenateISVP')
1975
1976         # Look for Dailymotion Cloud videos
1977         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1978         if dmcloud_url:
1979             return self.url_result(dmcloud_url, 'DailymotionCloud')
1980
1981         # Look for OnionStudios embeds
1982         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1983         if onionstudios_url:
1984             return self.url_result(onionstudios_url)
1985
1986         # Look for ViewLift embeds
1987         viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
1988         if viewlift_url:
1989             return self.url_result(viewlift_url)
1990
1991         # Look for JWPlatform embeds
1992         jwplatform_url = JWPlatformIE._extract_url(webpage)
1993         if jwplatform_url:
1994             return self.url_result(jwplatform_url, 'JWPlatform')
1995
1996         # Look for ScreenwaveMedia embeds
1997         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
1998         if mobj is not None:
1999             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
2000
2001         # Look for Digiteka embeds
2002         digiteka_url = DigitekaIE._extract_url(webpage)
2003         if digiteka_url:
2004             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2005
2006         # Look for Limelight embeds
2007         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
2008         if mobj:
2009             lm = {
2010                 'Media': 'media',
2011                 'Channel': 'channel',
2012                 'ChannelList': 'channel_list',
2013             }
2014             return self.url_result('limelight:%s:%s' % (
2015                 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
2016
2017         # Look for AdobeTVVideo embeds
2018         mobj = re.search(
2019             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2020             webpage)
2021         if mobj is not None:
2022             return self.url_result(
2023                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2024                 'AdobeTVVideo')
2025
2026         # Look for Vine embeds
2027         mobj = re.search(
2028             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2029             webpage)
2030         if mobj is not None:
2031             return self.url_result(
2032                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2033
2034         # Look for Instagram embeds
2035         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2036         if instagram_embed_url is not None:
2037             return self.url_result(
2038                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2039
2040         # Look for LiveLeak embeds
2041         liveleak_url = LiveLeakIE._extract_url(webpage)
2042         if liveleak_url:
2043             return self.url_result(liveleak_url, 'LiveLeak')
2044
2045         # Look for 3Q SDN embeds
2046         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2047         if threeqsdn_url:
2048             return {
2049                 '_type': 'url_transparent',
2050                 'ie_key': ThreeQSDNIE.ie_key(),
2051                 'url': self._proto_relative_url(threeqsdn_url),
2052                 'title': video_title,
2053                 'description': video_description,
2054                 'thumbnail': video_thumbnail,
2055                 'uploader': video_uploader,
2056             }
2057
2058         def check_video(vurl):
2059             if YoutubeIE.suitable(vurl):
2060                 return True
2061             vpath = compat_urlparse.urlparse(vurl).path
2062             vext = determine_ext(vpath)
2063             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
2064
2065         def filter_video(urls):
2066             return list(filter(check_video, urls))
2067
2068         # Start with something easy: JW Player in SWFObject
2069         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
2070         if not found:
2071             # Look for gorilla-vid style embedding
2072             found = filter_video(re.findall(r'''(?sx)
2073                 (?:
2074                     jw_plugins|
2075                     JWPlayerOptions|
2076                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2077                 )
2078                 .*?
2079                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
2080         if not found:
2081             # Broaden the search a little bit
2082             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
2083         if not found:
2084             # Broaden the findall a little bit: JWPlayer JS loader
2085             found = filter_video(re.findall(
2086                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
2087         if not found:
2088             # Flow player
2089             found = filter_video(re.findall(r'''(?xs)
2090                 flowplayer\("[^"]+",\s*
2091                     \{[^}]+?\}\s*,
2092                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
2093                         ["']?url["']?\s*:\s*["']([^"']+)["']
2094             ''', webpage))
2095         if not found:
2096             # Cinerama player
2097             found = re.findall(
2098                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
2099         if not found:
2100             # Try to find twitter cards info
2101             found = filter_video(re.findall(
2102                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
2103         if not found:
2104             # We look for Open Graph info:
2105             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
2106             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
2107             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2108             if m_video_type is not None:
2109                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
2110         if not found:
2111             # HTML5 video
2112             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
2113         if not found:
2114             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
2115             found = re.search(
2116                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2117                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2118                 webpage)
2119             if not found:
2120                 # Look also in Refresh HTTP header
2121                 refresh_header = head_response.headers.get('Refresh')
2122                 if refresh_header:
2123                     # In python 2 response HTTP headers are bytestrings
2124                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2125                         refresh_header = refresh_header.decode('iso-8859-1')
2126                     found = re.search(REDIRECT_REGEX, refresh_header)
2127             if found:
2128                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2129                 self.report_following_redirect(new_url)
2130                 return {
2131                     '_type': 'url',
2132                     'url': new_url,
2133                 }
2134         if not found:
2135             raise UnsupportedError(url)
2136
2137         entries = []
2138         for video_url in orderedSet(found):
2139             video_url = unescapeHTML(video_url)
2140             video_url = video_url.replace('\\/', '/')
2141             video_url = compat_urlparse.urljoin(url, video_url)
2142             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2143
2144             # Sometimes, jwplayer extraction will result in a YouTube URL
2145             if YoutubeIE.suitable(video_url):
2146                 entries.append(self.url_result(video_url, 'Youtube'))
2147                 continue
2148
2149             # here's a fun little line of code for you:
2150             video_id = os.path.splitext(video_id)[0]
2151
2152             entry_info_dict = {
2153                 'id': video_id,
2154                 'uploader': video_uploader,
2155                 'title': video_title,
2156                 'age_limit': age_limit,
2157             }
2158
2159             ext = determine_ext(video_url)
2160             if ext == 'smil':
2161                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2162             elif ext == 'xspf':
2163                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2164             elif ext == 'm3u8':
2165                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2166             elif ext == 'mpd':
2167                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2168             elif ext == 'f4m':
2169                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2170             else:
2171                 entry_info_dict['url'] = video_url
2172
2173             if entry_info_dict.get('formats'):
2174                 self._sort_formats(entry_info_dict['formats'])
2175
2176             entries.append(entry_info_dict)
2177
2178         if len(entries) == 1:
2179             return entries[0]
2180         else:
2181             for num, e in enumerate(entries, start=1):
2182                 # 'url' results don't have a title
2183                 if e.get('title') is not None:
2184                     e['title'] = '%s (%d)' % (e['title'], num)
2185             return {
2186                 '_type': 'playlist',
2187                 'entries': entries,
2188             }