_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7 import sys
   8
   9 from .common import InfoExtractor
  10 from .youtube import YoutubeIE
  11 from ..compat import (
  12     compat_etree_fromstring,
  13     compat_urllib_parse_unquote,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     orderedSet,
  24     sanitized_Request,
  25     smuggle_url,
  26     unescapeHTML,
  27     unified_strdate,
  28     unsmuggle_url,
  29     UnsupportedError,
  30     url_basename,
  31     xpath_text,
  32 )
  33 from .brightcove import (
  34     BrightcoveLegacyIE,
  35     BrightcoveNewIE,
  36 )
  37 from .nbc import NBCSportsVPlayerIE
  38 from .ooyala import OoyalaIE
  39 from .rutv import RUTVIE
  40 from .tvc import TVCIE
  41 from .sportbox import SportBoxEmbedIE
  42 from .smotri import SmotriIE
  43 from .myvi import MyviIE
  44 from .condenast import CondeNastIE
  45 from .udn import UDNEmbedIE
  46 from .senateisvp import SenateISVPIE
  47 from .svt import SVTIE
  48 from .pornhub import PornHubIE
  49 from .xhamster import XHamsterEmbedIE
  50 from .tnaflix import TNAFlixNetworkEmbedIE
  51 from .vimeo import VimeoIE
  52 from .dailymotion import DailymotionCloudIE
  53 from .onionstudios import OnionStudiosIE
  54 from .snagfilms import SnagFilmsEmbedIE
  55 from .screenwavemedia import ScreenwaveMediaIE
  56 from .mtv import MTVServicesEmbeddedIE
  57 from .pladform import PladformIE
  58 from .videomore import VideomoreIE
  59 from .googledrive import GoogleDriveIE
  60 from .jwplatform import JWPlatformIE
  61 from .digiteka import DigitekaIE
  62
  63
  64 class GenericIE(InfoExtractor):
  65     IE_DESC = 'Generic downloader that works on some sites'
  66     _VALID_URL = r'.*'
  67     IE_NAME = 'generic'
  68     _TESTS = [
  69         # Direct link to a video
  70         {
  71             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  72             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  73             'info_dict': {
  74                 'id': 'trailer',
  75                 'ext': 'mp4',
  76                 'title': 'trailer',
  77                 'upload_date': '20100513',
  78             }
  79         },
  80         # Direct link to media delivered compressed (until Accept-Encoding is *)
  81         {
  82             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  83             'md5': '128c42e68b13950268b648275386fc74',
  84             'info_dict': {
  85                 'id': 'FictionJunction-Parallel_Hearts',
  86                 'ext': 'flac',
  87                 'title': 'FictionJunction-Parallel_Hearts',
  88                 'upload_date': '20140522',
  89             },
  90             'expected_warnings': [
  91                 'URL could be a direct video link, returning it as such.'
  92             ]
  93         },
  94         # Direct download with broken HEAD
  95         {
  96             'url': 'http://ai-radio.org:8000/radio.opus',
  97             'info_dict': {
  98                 'id': 'radio',
  99                 'ext': 'opus',
 100                 'title': 'radio',
 101             },
 102             'params': {
 103                 'skip_download': True,  # infinite live stream
 104             },
 105             'expected_warnings': [
 106                 r'501.*Not Implemented'
 107             ],
 108         },
 109         # Direct link with incorrect MIME type
 110         {
 111             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 112             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 113             'info_dict': {
 114                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 115                 'id': '5_Lennart_Poettering_-_Systemd',
 116                 'ext': 'webm',
 117                 'title': '5_Lennart_Poettering_-_Systemd',
 118                 'upload_date': '20141120',
 119             },
 120             'expected_warnings': [
 121                 'URL could be a direct video link, returning it as such.'
 122             ]
 123         },
 124         # RSS feed
 125         {
 126             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 127             'info_dict': {
 128                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 129                 'title': 'Zero Punctuation',
 130                 'description': 're:.*groundbreaking video review series.*'
 131             },
 132             'playlist_mincount': 11,
 133         },
 134         # RSS feed with enclosure
 135         {
 136             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 137             'info_dict': {
 138                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 139                 'ext': 'm4v',
 140                 'upload_date': '20150228',
 141                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 142             }
 143         },
 144         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
 145         {
 146             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
 147             'info_dict': {
 148                 'id': 'smil',
 149                 'ext': 'mp4',
 150                 'title': 'Automatics, robotics and biocybernetics',
 151                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
 152                 'upload_date': '20130627',
 153                 'formats': 'mincount:16',
 154                 'subtitles': 'mincount:1',
 155             },
 156             'params': {
 157                 'force_generic_extractor': True,
 158                 'skip_download': True,
 159             },
 160         },
 161         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
 162         {
 163             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
 164             'info_dict': {
 165                 'id': 'hds',
 166                 'ext': 'flv',
 167                 'title': 'hds',
 168                 'formats': 'mincount:1',
 169             },
 170             'params': {
 171                 'skip_download': True,
 172             },
 173         },
 174         # SMIL from https://www.restudy.dk/video/play/id/1637
 175         {
 176             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
 177             'info_dict': {
 178                 'id': 'video_1637',
 179                 'ext': 'flv',
 180                 'title': 'video_1637',
 181                 'formats': 'mincount:3',
 182             },
 183             'params': {
 184                 'skip_download': True,
 185             },
 186         },
 187         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
 188         {
 189             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
 190             'info_dict': {
 191                 'id': 'smil-service',
 192                 'ext': 'flv',
 193                 'title': 'smil-service',
 194                 'formats': 'mincount:1',
 195             },
 196             'params': {
 197                 'skip_download': True,
 198             },
 199         },
 200         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
 201         {
 202             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
 203             'info_dict': {
 204                 'id': '4719370',
 205                 'ext': 'mp4',
 206                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
 207                 'formats': 'mincount:3',
 208             },
 209             'params': {
 210                 'skip_download': True,
 211             },
 212         },
 213         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
 214         {
 215             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
 216             'info_dict': {
 217                 'id': 'mZlp2ctYIUEB',
 218                 'ext': 'mp4',
 219                 'title': 'Tikibad ontruimd wegens brand',
 220                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
 221                 'thumbnail': 're:^https?://.*\.jpg$',
 222                 'duration': 33,
 223             },
 224             'params': {
 225                 'skip_download': True,
 226             },
 227         },
 228         # MPD from http://dash-mse-test.appspot.com/media.html
 229         {
 230             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
 231             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
 232             'info_dict': {
 233                 'id': 'car-20120827-manifest',
 234                 'ext': 'mp4',
 235                 'title': 'car-20120827-manifest',
 236                 'formats': 'mincount:9',
 237             },
 238             'params': {
 239                 'format': 'bestvideo',
 240             },
 241         },
 242         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
 243         {
 244             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
 245             'info_dict': {
 246                 'id': 'content',
 247                 'ext': 'mp4',
 248                 'title': 'content',
 249                 'formats': 'mincount:8',
 250             },
 251             'params': {
 252                 # m3u8 downloads
 253                 'skip_download': True,
 254             }
 255         },
 256         # google redirect
 257         {
 258             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 259             'info_dict': {
 260                 'id': 'cmQHVoWB5FY',
 261                 'ext': 'mp4',
 262                 'upload_date': '20130224',
 263                 'uploader_id': 'TheVerge',
 264                 'description': 're:^Chris Ziegler takes a look at the\.*',
 265                 'uploader': 'The Verge',
 266                 'title': 'First Firefox OS phones side-by-side',
 267             },
 268             'params': {
 269                 'skip_download': False,
 270             }
 271         },
 272         {
 273             # redirect in Refresh HTTP header
 274             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
 275             'info_dict': {
 276                 'id': 'pO8h3EaFRdo',
 277                 'ext': 'mp4',
 278                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
 279                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
 280                 'upload_date': '20150917',
 281                 'uploader_id': 'brtvofficial',
 282                 'uploader': 'Boiler Room',
 283             },
 284             'params': {
 285                 'skip_download': False,
 286             },
 287         },
 288         {
 289             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 290             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 291             'info_dict': {
 292                 'id': '13601338388002',
 293                 'ext': 'mp4',
 294                 'uploader': 'www.hodiho.fr',
 295                 'title': 'R\u00e9gis plante sa Jeep',
 296             }
 297         },
 298         # bandcamp page with custom domain
 299         {
 300             'add_ie': ['Bandcamp'],
 301             'url': 'http://bronyrock.com/track/the-pony-mash',
 302             'info_dict': {
 303                 'id': '3235767654',
 304                 'ext': 'mp3',
 305                 'title': 'The Pony Mash',
 306                 'uploader': 'M_Pallante',
 307             },
 308             'skip': 'There is a limit of 200 free downloads / month for the test song',
 309         },
 310         # embedded brightcove video
 311         # it also tests brightcove videos that need to set the 'Referer' in the
 312         # http requests
 313         {
 314             'add_ie': ['BrightcoveLegacy'],
 315             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 316             'info_dict': {
 317                 'id': '2765128793001',
 318                 'ext': 'mp4',
 319                 'title': 'Le cours de bourse : l’analyse technique',
 320                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 321                 'uploader': 'BFM BUSINESS',
 322             },
 323             'params': {
 324                 'skip_download': True,
 325             },
 326         },
 327         {
 328             # https://github.com/rg3/youtube-dl/issues/2253
 329             'url': 'http://bcove.me/i6nfkrc3',
 330             'md5': '0ba9446db037002366bab3b3eb30c88c',
 331             'info_dict': {
 332                 'id': '3101154703001',
 333                 'ext': 'mp4',
 334                 'title': 'Still no power',
 335                 'uploader': 'thestar.com',
 336                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 337             },
 338             'add_ie': ['BrightcoveLegacy'],
 339         },
 340         {
 341             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 342             'md5': 'fb973ecf6e4a78a67453647444222983',
 343             'info_dict': {
 344                 'id': '3414141473001',
 345                 'ext': 'mp4',
 346                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 347                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 348                 'uploader': 'Championat',
 349             },
 350         },
 351         {
 352             # https://github.com/rg3/youtube-dl/issues/3541
 353             'add_ie': ['BrightcoveLegacy'],
 354             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 355             'info_dict': {
 356                 'id': '3866516442001',
 357                 'ext': 'mp4',
 358                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 359                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 360                 'uploader': 'SBS Broadcasting',
 361             },
 362             'skip': 'Restricted to Netherlands',
 363             'params': {
 364                 'skip_download': True,  # m3u8 download
 365             },
 366         },
 367         # ooyala video
 368         {
 369             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 370             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 371             'info_dict': {
 372                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 373                 'ext': 'mp4',
 374                 'title': '2cc213299525360.mov',  # that's what we get
 375                 'duration': 238.231,
 376             },
 377             'add_ie': ['Ooyala'],
 378         },
 379         {
 380             # ooyala video embedded with http://player.ooyala.com/iframe.js
 381             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
 382             'info_dict': {
 383                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
 384                 'ext': 'mp4',
 385                 'title': '"Steve Jobs: Man in the Machine" trailer',
 386                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
 387                 'duration': 135.427,
 388             },
 389             'params': {
 390                 'skip_download': True,
 391             },
 392         },
 393         # multiple ooyala embeds on SBN network websites
 394         {
 395             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 396             'info_dict': {
 397                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 398                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 399             },
 400             'playlist_mincount': 3,
 401             'params': {
 402                 'skip_download': True,
 403             },
 404             'add_ie': ['Ooyala'],
 405         },
 406         # embed.ly video
 407         {
 408             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 409             'info_dict': {
 410                 'id': '9ODmcdjQcHQ',
 411                 'ext': 'mp4',
 412                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 413                 'upload_date': '20140225',
 414                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 415                 'uploader': 'Tested',
 416                 'uploader_id': 'testedcom',
 417             },
 418             # No need to test YoutubeIE here
 419             'params': {
 420                 'skip_download': True,
 421             },
 422         },
 423         # funnyordie embed
 424         {
 425             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 426             'info_dict': {
 427                 'id': '18e820ec3f',
 428                 'ext': 'mp4',
 429                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 430                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 431             },
 432         },
 433         # RUTV embed
 434         {
 435             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 436             'info_dict': {
 437                 'id': '776940',
 438                 'ext': 'mp4',
 439                 'title': 'Охотское море стало целиком российским',
 440                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 441             },
 442             'params': {
 443                 # m3u8 download
 444                 'skip_download': True,
 445             },
 446         },
 447         # TVC embed
 448         {
 449             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 450             'info_dict': {
 451                 'id': '55304',
 452                 'ext': 'mp4',
 453                 'title': 'Дошкольное воспитание',
 454             },
 455         },
 456         # SportBox embed
 457         {
 458             'url': 'http://www.vestifinance.ru/articles/25753',
 459             'info_dict': {
 460                 'id': '25753',
 461                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 462             },
 463             'playlist': [{
 464                 'info_dict': {
 465                     'id': '370908',
 466                     'title': 'Госзаказ. День 3',
 467                     'ext': 'mp4',
 468                 }
 469             }, {
 470                 'info_dict': {
 471                     'id': '370905',
 472                     'title': 'Госзаказ. День 2',
 473                     'ext': 'mp4',
 474                 }
 475             }, {
 476                 'info_dict': {
 477                     'id': '370902',
 478                     'title': 'Госзаказ. День 1',
 479                     'ext': 'mp4',
 480                 }
 481             }],
 482             'params': {
 483                 # m3u8 download
 484                 'skip_download': True,
 485             },
 486         },
 487         # Myvi.ru embed
 488         {
 489             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 490             'info_dict': {
 491                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 492                 'ext': 'mp4',
 493                 'title': 'Ужастики, русский трейлер (2015)',
 494                 'thumbnail': 're:^https?://.*\.jpg$',
 495                 'duration': 153,
 496             }
 497         },
 498         # XHamster embed
 499         {
 500             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 501             'info_dict': {
 502                 'id': 'showthread',
 503                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 504             },
 505             'playlist_mincount': 7,
 506         },
 507         # Embedded TED video
 508         {
 509             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 510             'md5': '65fdff94098e4a607385a60c5177c638',
 511             'info_dict': {
 512                 'id': '1969',
 513                 'ext': 'mp4',
 514                 'title': 'Hidden miracles of the natural world',
 515                 'uploader': 'Louie Schwartzberg',
 516                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 517             }
 518         },
 519         # Embedded Ustream video
 520         {
 521             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 522             'md5': '27b99cdb639c9b12a79bca876a073417',
 523             'info_dict': {
 524                 'id': '45734260',
 525                 'ext': 'flv',
 526                 'uploader': 'AU SPA:  The NSA and Privacy',
 527                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 528             }
 529         },
 530         # nowvideo embed hidden behind percent encoding
 531         {
 532             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 533             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 534             'info_dict': {
 535                 'id': '06e53103ca9aa',
 536                 'ext': 'flv',
 537                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 538                 'description': 'No description',
 539             },
 540         },
 541         # arte embed
 542         {
 543             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 544             'md5': '7653032cbb25bf6c80d80f217055fa43',
 545             'info_dict': {
 546                 'id': '048195-004_PLUS7-F',
 547                 'ext': 'flv',
 548                 'title': 'X:enius',
 549                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 550                 'upload_date': '20140320',
 551             },
 552             'params': {
 553                 'skip_download': 'Requires rtmpdump'
 554             }
 555         },
 556         # francetv embed
 557         {
 558             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 559             'info_dict': {
 560                 'id': 'EV_30231',
 561                 'ext': 'mp4',
 562                 'title': 'Alcaline, le concert avec Calogero',
 563                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 564                 'upload_date': '20150226',
 565                 'timestamp': 1424989860,
 566                 'duration': 5400,
 567             },
 568             'params': {
 569                 # m3u8 downloads
 570                 'skip_download': True,
 571             },
 572             'expected_warnings': [
 573                 'Forbidden'
 574             ]
 575         },
 576         # Condé Nast embed
 577         {
 578             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 579             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 580             'info_dict': {
 581                 'id': '53501be369702d3275860000',
 582                 'ext': 'mp4',
 583                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 584             }
 585         },
 586         # Dailymotion embed
 587         {
 588             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 589             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 590             'info_dict': {
 591                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 592                 'ext': 'mp4',
 593                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 594                 'uploader': 'Spi0n',
 595             },
 596             'add_ie': ['Dailymotion'],
 597         },
 598         # YouTube embed
 599         {
 600             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 601             'info_dict': {
 602                 'id': 'FXRb4ykk4S0',
 603                 'ext': 'mp4',
 604                 'title': 'The NBL Auction 2014',
 605                 'uploader': 'BADMINTON England',
 606                 'uploader_id': 'BADMINTONEvents',
 607                 'upload_date': '20140603',
 608                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 609             },
 610             'add_ie': ['Youtube'],
 611             'params': {
 612                 'skip_download': True,
 613             }
 614         },
 615         # MTVSercices embed
 616         {
 617             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 618             'md5': '35727f82f58c76d996fc188f9755b0d5',
 619             'info_dict': {
 620                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 621                 'ext': 'mp4',
 622                 'title': 'Review',
 623                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 624             },
 625         },
 626         # YouTube embed via <data-embed-url="">
 627         {
 628             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 629             'info_dict': {
 630                 'id': '4vAffPZIT44',
 631                 'ext': 'mp4',
 632                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 633                 'uploader': 'Gameloft',
 634                 'uploader_id': 'gameloft',
 635                 'upload_date': '20140828',
 636                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 637             },
 638             'params': {
 639                 'skip_download': True,
 640             }
 641         },
 642         # Camtasia studio
 643         {
 644             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 645             'playlist': [{
 646                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 647                 'info_dict': {
 648                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 649                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 650                     'ext': 'flv',
 651                     'duration': 2235.90,
 652                 }
 653             }, {
 654                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 655                 'info_dict': {
 656                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 657                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 658                     'ext': 'flv',
 659                     'duration': 2235.93,
 660                 }
 661             }],
 662             'info_dict': {
 663                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 664             }
 665         },
 666         # Flowplayer
 667         {
 668             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 669             'md5': '9d65602bf31c6e20014319c7d07fba27',
 670             'info_dict': {
 671                 'id': '5123ea6d5e5a7',
 672                 'ext': 'mp4',
 673                 'age_limit': 18,
 674                 'uploader': 'www.handjobhub.com',
 675                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 676             }
 677         },
 678         # Multiple brightcove videos
 679         # https://github.com/rg3/youtube-dl/issues/2283
 680         {
 681             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 682             'info_dict': {
 683                 'id': 'always-never',
 684                 'title': 'Always / Never - The New Yorker',
 685             },
 686             'playlist_count': 3,
 687             'params': {
 688                 'extract_flat': False,
 689                 'skip_download': True,
 690             }
 691         },
 692         # MLB embed
 693         {
 694             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 695             'md5': '96f09a37e44da40dd083e12d9a683327',
 696             'info_dict': {
 697                 'id': '33322633',
 698                 'ext': 'mp4',
 699                 'title': 'Ump changes call to ball',
 700                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 701                 'duration': 48,
 702                 'timestamp': 1401537900,
 703                 'upload_date': '20140531',
 704                 'thumbnail': 're:^https?://.*\.jpg$',
 705             },
 706         },
 707         # Wistia embed
 708         {
 709             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 710             'md5': '8788b683c777a5cf25621eaf286d0c23',
 711             'info_dict': {
 712                 'id': '1cfaf6b7ea',
 713                 'ext': 'mov',
 714                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 715                 'duration': 643.0,
 716                 'filesize': 182808282,
 717                 'uploader': 'education-portal.com',
 718             },
 719         },
 720         {
 721             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 722             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 723             'info_dict': {
 724                 'id': 'uxjb0lwrcz',
 725                 'ext': 'mp4',
 726                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 727                 'duration': 1715.0,
 728                 'uploader': 'thoughtworks.wistia.com',
 729             },
 730         },
 731         # Soundcloud embed
 732         {
 733             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 734             'info_dict': {
 735                 'id': '174391317',
 736                 'ext': 'mp3',
 737                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 738                 'uploader': 'Sophos Security',
 739                 'title': 'Chet Chat 171 - Oct 29, 2014',
 740                 'upload_date': '20141029',
 741             }
 742         },
 743         # Livestream embed
 744         {
 745             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 746             'info_dict': {
 747                 'id': '67864563',
 748                 'ext': 'flv',
 749                 'upload_date': '20141112',
 750                 'title': 'Rosetta #CometLanding webcast HL 10',
 751             }
 752         },
 753         # LazyYT
 754         {
 755             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 756             'info_dict': {
 757                 'id': '1986',
 758                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 759             },
 760             'playlist_mincount': 2,
 761         },
 762         # Cinchcast embed
 763         {
 764             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 765             'info_dict': {
 766                 'id': '7141703',
 767                 'ext': 'mp3',
 768                 'upload_date': '20141126',
 769                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 770             }
 771         },
 772         # Cinerama player
 773         {
 774             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 775             'info_dict': {
 776                 'id': '730m_DandD_1901_512k',
 777                 'ext': 'mp4',
 778                 'uploader': 'www.abc.net.au',
 779                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 780             }
 781         },
 782         # embedded viddler video
 783         {
 784             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 785             'info_dict': {
 786                 'id': '4d03aad9',
 787                 'ext': 'mp4',
 788                 'uploader': 'deadspin',
 789                 'title': 'WALL-TO-GORTAT',
 790                 'timestamp': 1422285291,
 791                 'upload_date': '20150126',
 792             },
 793             'add_ie': ['Viddler'],
 794         },
 795         # Libsyn embed
 796         {
 797             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 798             'info_dict': {
 799                 'id': '3377616',
 800                 'ext': 'mp3',
 801                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 802                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 803                 'upload_date': '20150220',
 804             },
 805         },
 806         # jwplayer YouTube
 807         {
 808             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 809             'info_dict': {
 810                 'id': 'Mrj4DVp2zeA',
 811                 'ext': 'mp4',
 812                 'upload_date': '20150212',
 813                 'uploader': 'The National Archives UK',
 814                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 815                 'uploader_id': 'NationalArchives08',
 816                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 817             },
 818         },
 819         # rtl.nl embed
 820         {
 821             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 822             'playlist_mincount': 5,
 823             'info_dict': {
 824                 'id': 'aanslagen-kopenhagen',
 825                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 826             }
 827         },
 828         # Zapiks embed
 829         {
 830             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 831             'info_dict': {
 832                 'id': '118046',
 833                 'ext': 'mp4',
 834                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 835             }
 836         },
 837         # Kaltura embed
 838         {
 839             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 840             'info_dict': {
 841                 'id': '1_eergr3h1',
 842                 'ext': 'mp4',
 843                 'upload_date': '20150226',
 844                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 845                 'timestamp': int,
 846                 'title': 'John Carlson Postgame 2/25/15',
 847             },
 848         },
 849         # Kaltura embed (different embed code)
 850         {
 851             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 852             'info_dict': {
 853                 'id': '1_a52wc67y',
 854                 'ext': 'flv',
 855                 'upload_date': '20150127',
 856                 'uploader_id': 'PremierMedia',
 857                 'timestamp': int,
 858                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 859             },
 860         },
 861         # Kaltura embed protected with referrer
 862         {
 863             'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
 864             'info_dict': {
 865                 'id': '1_g4fbemnq',
 866                 'ext': 'mp4',
 867                 'title': 'Violetta - Achter De Schermen - Ruggero',
 868                 'description': 'Achter de schermen met Ruggero',
 869                 'timestamp': 1435133761,
 870                 'upload_date': '20150624',
 871                 'uploader_id': 'echojecka',
 872             },
 873         },
 874         # Eagle.Platform embed (generic URL)
 875         {
 876             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 877             'info_dict': {
 878                 'id': '227304',
 879                 'ext': 'mp4',
 880                 'title': 'Навальный вышел на свободу',
 881                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 882                 'thumbnail': 're:^https?://.*\.jpg$',
 883                 'duration': 87,
 884                 'view_count': int,
 885                 'age_limit': 0,
 886             },
 887         },
 888         # ClipYou (Eagle.Platform) embed (custom URL)
 889         {
 890             'url': 'http://muz-tv.ru/play/7129/',
 891             'info_dict': {
 892                 'id': '12820',
 893                 'ext': 'mp4',
 894                 'title': "'O Sole Mio",
 895                 'thumbnail': 're:^https?://.*\.jpg$',
 896                 'duration': 216,
 897                 'view_count': int,
 898             },
 899         },
 900         # Pladform embed
 901         {
 902             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 903             'info_dict': {
 904                 'id': '100183293',
 905                 'ext': 'mp4',
 906                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 907                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 908                 'thumbnail': 're:^https?://.*\.jpg$',
 909                 'duration': 694,
 910                 'age_limit': 0,
 911             },
 912         },
 913         # Playwire embed
 914         {
 915             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 916             'info_dict': {
 917                 'id': '3519514',
 918                 'ext': 'mp4',
 919                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 920                 'thumbnail': 're:^https?://.*\.png$',
 921                 'duration': 45.115,
 922             },
 923         },
 924         # 5min embed
 925         {
 926             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 927             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 928             'info_dict': {
 929                 'id': '518726732',
 930                 'ext': 'mp4',
 931                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 932             },
 933         },
 934         # SVT embed
 935         {
 936             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 937             'info_dict': {
 938                 'id': '2900353',
 939                 'ext': 'flv',
 940                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 941                 'duration': 27,
 942                 'age_limit': 0,
 943             },
 944         },
 945         # Crooks and Liars embed
 946         {
 947             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 948             'info_dict': {
 949                 'id': '8RUoRhRi',
 950                 'ext': 'mp4',
 951                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 952                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 953                 'timestamp': 1428207000,
 954                 'upload_date': '20150405',
 955                 'uploader': 'Heather',
 956             },
 957         },
 958         # Crooks and Liars external embed
 959         {
 960             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 961             'info_dict': {
 962                 'id': 'MTE3MjUtMzQ2MzA',
 963                 'ext': 'mp4',
 964                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 965                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 966                 'timestamp': 1265032391,
 967                 'upload_date': '20100201',
 968                 'uploader': 'Heather',
 969             },
 970         },
 971         # NBC Sports vplayer embed
 972         {
 973             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 974             'info_dict': {
 975                 'id': 'ln7x1qSThw4k',
 976                 'ext': 'flv',
 977                 'title': "PFT Live: New leader in the 'new-look' defense",
 978                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 979             },
 980         },
 981         # UDN embed
 982         {
 983             'url': 'http://www.udn.com/news/story/7314/822787',
 984             'md5': 'fd2060e988c326991037b9aff9df21a6',
 985             'info_dict': {
 986                 'id': '300346',
 987                 'ext': 'mp4',
 988                 'title': '中一中男師變性 全校師生力挺',
 989                 'thumbnail': 're:^https?://.*\.jpg$',
 990             }
 991         },
 992         # Ooyala embed
 993         {
 994             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 995             'info_dict': {
 996                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 997                 'ext': 'mp4',
 998                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
 999                 'title': 'This is what separates the Excel masters from the wannabes',
1000                 'duration': 191.933,
1001             },
1002             'params': {
1003                 # m3u8 downloads
1004                 'skip_download': True,
1005             }
1006         },
1007         # Contains a SMIL manifest
1008         {
1009             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
1010             'info_dict': {
1011                 'id': 'file',
1012                 'ext': 'flv',
1013                 'title': '+ Football: Lottery Champions League Europe',
1014                 'uploader': 'www.telewebion.com',
1015             },
1016             'params': {
1017                 # rtmpe downloads
1018                 'skip_download': True,
1019             }
1020         },
1021         # Brightcove URL in single quotes
1022         {
1023             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1024             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1025             'info_dict': {
1026                 'id': '4255764656001',
1027                 'ext': 'mp4',
1028                 'title': 'SN Presents: Russell Martin, World Citizen',
1029                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1030                 'uploader': 'Rogers Sportsnet',
1031             },
1032         },
1033         # Dailymotion Cloud video
1034         {
1035             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1036             'md5': '49444254273501a64675a7e68c502681',
1037             'info_dict': {
1038                 'id': '5585de919473990de4bee11b',
1039                 'ext': 'mp4',
1040                 'title': 'Le débat',
1041                 'thumbnail': 're:^https?://.*\.jpe?g$',
1042             }
1043         },
1044         # OnionStudios embed
1045         {
1046             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1047             'info_dict': {
1048                 'id': '2855',
1049                 'ext': 'mp4',
1050                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1051                 'thumbnail': 're:^https?://.*\.jpe?g$',
1052                 'uploader': 'ClickHole',
1053                 'uploader_id': 'clickhole',
1054             }
1055         },
1056         # SnagFilms embed
1057         {
1058             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1059             'info_dict': {
1060                 'id': '74849a00-85a9-11e1-9660-123139220831',
1061                 'ext': 'mp4',
1062                 'title': '#whilewewatch',
1063             }
1064         },
1065         # AdobeTVVideo embed
1066         {
1067             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1068             'md5': '43662b577c018ad707a63766462b1e87',
1069             'info_dict': {
1070                 'id': '2456',
1071                 'ext': 'mp4',
1072                 'title': 'New experience with Acrobat DC',
1073                 'description': 'New experience with Acrobat DC',
1074                 'duration': 248.667,
1075             },
1076         },
1077         # ScreenwaveMedia embed
1078         {
1079             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1080             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1081             'info_dict': {
1082                 'id': 'cinemasnob-55d26273809dd',
1083                 'ext': 'mp4',
1084                 'title': 'cinemasnob',
1085             },
1086         },
1087         # BrightcoveInPageEmbed embed
1088         {
1089             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1090             'info_dict': {
1091                 'id': '4238694884001',
1092                 'ext': 'flv',
1093                 'title': 'Tabletop: Dread, Last Thoughts',
1094                 'description': 'Tabletop: Dread, Last Thoughts',
1095                 'duration': 51690,
1096             },
1097         },
1098         # JWPlayer with M3U8
1099         {
1100             'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1101             'info_dict': {
1102                 'id': 'playlist',
1103                 'ext': 'mp4',
1104                 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1105                 'uploader': 'ren.tv',
1106             },
1107             'params': {
1108                 # m3u8 downloads
1109                 'skip_download': True,
1110             }
1111         }
1112     ]
1113
1114     def report_following_redirect(self, new_url):
1115         """Report information extraction."""
1116         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1117
1118     def _extract_rss(self, url, video_id, doc):
1119         playlist_title = doc.find('./channel/title').text
1120         playlist_desc_el = doc.find('./channel/description')
1121         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1122
1123         entries = []
1124         for it in doc.findall('./channel/item'):
1125             next_url = xpath_text(it, 'link', fatal=False)
1126             if not next_url:
1127                 enclosure_nodes = it.findall('./enclosure')
1128                 for e in enclosure_nodes:
1129                     next_url = e.attrib.get('url')
1130                     if next_url:
1131                         break
1132
1133             if not next_url:
1134                 continue
1135
1136             entries.append({
1137                 '_type': 'url',
1138                 'url': next_url,
1139                 'title': it.find('title').text,
1140             })
1141
1142         return {
1143             '_type': 'playlist',
1144             'id': url,
1145             'title': playlist_title,
1146             'description': playlist_desc,
1147             'entries': entries,
1148         }
1149
1150     def _extract_camtasia(self, url, video_id, webpage):
1151         """ Returns None if no camtasia video can be found. """
1152
1153         camtasia_cfg = self._search_regex(
1154             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1155             webpage, 'camtasia configuration file', default=None)
1156         if camtasia_cfg is None:
1157             return None
1158
1159         title = self._html_search_meta('DC.title', webpage, fatal=True)
1160
1161         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1162         camtasia_cfg = self._download_xml(
1163             camtasia_url, video_id,
1164             note='Downloading camtasia configuration',
1165             errnote='Failed to download camtasia configuration')
1166         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1167
1168         entries = []
1169         for n in fileset_node.getchildren():
1170             url_n = n.find('./uri')
1171             if url_n is None:
1172                 continue
1173
1174             entries.append({
1175                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1176                 'title': '%s - %s' % (title, n.tag),
1177                 'url': compat_urlparse.urljoin(url, url_n.text),
1178                 'duration': float_or_none(n.find('./duration').text),
1179             })
1180
1181         return {
1182             '_type': 'playlist',
1183             'entries': entries,
1184             'title': title,
1185         }
1186
1187     def _real_extract(self, url):
1188         if url.startswith('//'):
1189             return {
1190                 '_type': 'url',
1191                 'url': self.http_scheme() + url,
1192             }
1193
1194         parsed_url = compat_urlparse.urlparse(url)
1195         if not parsed_url.scheme:
1196             default_search = self._downloader.params.get('default_search')
1197             if default_search is None:
1198                 default_search = 'fixup_error'
1199
1200             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1201                 if '/' in url:
1202                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1203                     return self.url_result('http://' + url)
1204                 elif default_search != 'fixup_error':
1205                     if default_search == 'auto_warning':
1206                         if re.match(r'^(?:url|URL)$', url):
1207                             raise ExtractorError(
1208                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1209                                 expected=True)
1210                         else:
1211                             self._downloader.report_warning(
1212                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1213                     return self.url_result('ytsearch:' + url)
1214
1215             if default_search in ('error', 'fixup_error'):
1216                 raise ExtractorError(
1217                     '%r is not a valid URL. '
1218                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1219                     % (url, url), expected=True)
1220             else:
1221                 if ':' not in default_search:
1222                     default_search += ':'
1223                 return self.url_result(default_search + url)
1224
1225         url, smuggled_data = unsmuggle_url(url)
1226         force_videoid = None
1227         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1228         if smuggled_data and 'force_videoid' in smuggled_data:
1229             force_videoid = smuggled_data['force_videoid']
1230             video_id = force_videoid
1231         else:
1232             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1233
1234         self.to_screen('%s: Requesting header' % video_id)
1235
1236         head_req = HEADRequest(url)
1237         head_response = self._request_webpage(
1238             head_req, video_id,
1239             note=False, errnote='Could not send HEAD request to %s' % url,
1240             fatal=False)
1241
1242         if head_response is not False:
1243             # Check for redirect
1244             new_url = head_response.geturl()
1245             if url != new_url:
1246                 self.report_following_redirect(new_url)
1247                 if force_videoid:
1248                     new_url = smuggle_url(
1249                         new_url, {'force_videoid': force_videoid})
1250                 return self.url_result(new_url)
1251
1252         full_response = None
1253         if head_response is False:
1254             request = sanitized_Request(url)
1255             request.add_header('Accept-Encoding', '*')
1256             full_response = self._request_webpage(request, video_id)
1257             head_response = full_response
1258
1259         info_dict = {
1260             'id': video_id,
1261             'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1262         }
1263
1264         # Check for direct link to a video
1265         content_type = head_response.headers.get('Content-Type', '').lower()
1266         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1267         if m:
1268             upload_date = unified_strdate(
1269                 head_response.headers.get('Last-Modified'))
1270             format_id = m.group('format_id')
1271             if format_id.endswith('mpegurl'):
1272                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1273             elif format_id == 'f4m':
1274                 formats = self._extract_f4m_formats(url, video_id)
1275             else:
1276                 formats = [{
1277                     'format_id': m.group('format_id'),
1278                     'url': url,
1279                     'vcodec': 'none' if m.group('type') == 'audio' else None
1280                 }]
1281             info_dict.update({
1282                 'direct': True,
1283                 'formats': formats,
1284                 'upload_date': upload_date,
1285             })
1286             return info_dict
1287
1288         if not self._downloader.params.get('test', False) and not is_intentional:
1289             force = self._downloader.params.get('force_generic_extractor', False)
1290             self._downloader.report_warning(
1291                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1292
1293         if not full_response:
1294             request = sanitized_Request(url)
1295             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1296             # making it impossible to download only chunk of the file (yet we need only 512kB to
1297             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1298             # that will always result in downloading the whole file that is not desirable.
1299             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1300             # to accept raw bytes and being able to download only a chunk.
1301             # It may probably better to solve this by checking Content-Type for application/octet-stream
1302             # after HEAD request finishes, but not sure if we can rely on this.
1303             request.add_header('Accept-Encoding', '*')
1304             full_response = self._request_webpage(request, video_id)
1305
1306         # Maybe it's a direct link to a video?
1307         # Be careful not to download the whole thing!
1308         first_bytes = full_response.read(512)
1309         if not is_html(first_bytes):
1310             self._downloader.report_warning(
1311                 'URL could be a direct video link, returning it as such.')
1312             upload_date = unified_strdate(
1313                 head_response.headers.get('Last-Modified'))
1314             info_dict.update({
1315                 'direct': True,
1316                 'url': url,
1317                 'upload_date': upload_date,
1318             })
1319             return info_dict
1320
1321         webpage = self._webpage_read_content(
1322             full_response, url, video_id, prefix=first_bytes)
1323
1324         self.report_extraction(video_id)
1325
1326         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1327         try:
1328             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1329             if doc.tag == 'rss':
1330                 return self._extract_rss(url, video_id, doc)
1331             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1332                 return self._parse_smil(doc, url, video_id)
1333             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1334                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1335             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1336                 info_dict['formats'] = self._parse_mpd_formats(
1337                     doc, video_id, mpd_base_url=url.rpartition('/')[0])
1338                 return info_dict
1339             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1340                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1341                 return info_dict
1342         except compat_xml_parse_error:
1343             pass
1344
1345         # Is it a Camtasia project?
1346         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1347         if camtasia_res is not None:
1348             return camtasia_res
1349
1350         # Sometimes embedded video player is hidden behind percent encoding
1351         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1352         # Unescaping the whole page allows to handle those cases in a generic way
1353         webpage = compat_urllib_parse_unquote(webpage)
1354
1355         # it's tempting to parse this further, but you would
1356         # have to take into account all the variations like
1357         #   Video Title - Site Name
1358         #   Site Name | Video Title
1359         #   Video Title - Tagline | Site Name
1360         # and so on and so forth; it's just not practical
1361         video_title = self._html_search_regex(
1362             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1363             default='video')
1364
1365         # Try to detect age limit automatically
1366         age_limit = self._rta_search(webpage)
1367         # And then there are the jokers who advertise that they use RTA,
1368         # but actually don't.
1369         AGE_LIMIT_MARKERS = [
1370             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1371         ]
1372         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1373             age_limit = 18
1374
1375         # video uploader is domain name
1376         video_uploader = self._search_regex(
1377             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1378
1379         # Helper method
1380         def _playlist_from_matches(matches, getter=None, ie=None):
1381             urlrs = orderedSet(
1382                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1383                 for m in matches)
1384             return self.playlist_result(
1385                 urlrs, playlist_id=video_id, playlist_title=video_title)
1386
1387         # Look for Brightcove Legacy Studio embeds
1388         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1389         if bc_urls:
1390             self.to_screen('Brightcove video detected.')
1391             entries = [{
1392                 '_type': 'url',
1393                 'url': smuggle_url(bc_url, {'Referer': url}),
1394                 'ie_key': 'BrightcoveLegacy'
1395             } for bc_url in bc_urls]
1396
1397             return {
1398                 '_type': 'playlist',
1399                 'title': video_title,
1400                 'id': video_id,
1401                 'entries': entries,
1402             }
1403
1404         # Look for Brightcove New Studio embeds
1405         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1406         if bc_urls:
1407             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1408
1409         # Look for embedded rtl.nl player
1410         matches = re.findall(
1411             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1412             webpage)
1413         if matches:
1414             return _playlist_from_matches(matches, ie='RtlNl')
1415
1416         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1417         if vimeo_url is not None:
1418             return self.url_result(vimeo_url)
1419
1420         vid_me_embed_url = self._search_regex(
1421             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1422             webpage, 'vid.me embed', default=None)
1423         if vid_me_embed_url is not None:
1424             return self.url_result(vid_me_embed_url, 'Vidme')
1425
1426         # Look for embedded YouTube player
1427         matches = re.findall(r'''(?x)
1428             (?:
1429                 <iframe[^>]+?src=|
1430                 data-video-url=|
1431                 <embed[^>]+?src=|
1432                 embedSWF\(?:\s*|
1433                 new\s+SWFObject\(
1434             )
1435             (["\'])
1436                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1437                 (?:embed|v|p)/.+?)
1438             \1''', webpage)
1439         if matches:
1440             return _playlist_from_matches(
1441                 matches, lambda m: unescapeHTML(m[1]))
1442
1443         # Look for lazyYT YouTube embed
1444         matches = re.findall(
1445             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1446         if matches:
1447             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1448
1449         # Look for embedded Dailymotion player
1450         matches = re.findall(
1451             r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
1452         if matches:
1453             return _playlist_from_matches(
1454                 matches, lambda m: unescapeHTML(m[1]))
1455
1456         # Look for embedded Dailymotion playlist player (#3822)
1457         m = re.search(
1458             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1459         if m:
1460             playlists = re.findall(
1461                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1462             if playlists:
1463                 return _playlist_from_matches(
1464                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1465
1466         # Look for embedded Wistia player
1467         match = re.search(
1468             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1469         if match:
1470             embed_url = self._proto_relative_url(
1471                 unescapeHTML(match.group('url')))
1472             return {
1473                 '_type': 'url_transparent',
1474                 'url': embed_url,
1475                 'ie_key': 'Wistia',
1476                 'uploader': video_uploader,
1477                 'title': video_title,
1478                 'id': video_id,
1479             }
1480
1481         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1482         if match:
1483             return {
1484                 '_type': 'url_transparent',
1485                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1486                 'ie_key': 'Wistia',
1487                 'uploader': video_uploader,
1488                 'title': video_title,
1489                 'id': match.group('id')
1490             }
1491
1492         # Look for SVT player
1493         svt_url = SVTIE._extract_url(webpage)
1494         if svt_url:
1495             return self.url_result(svt_url, 'SVT')
1496
1497         # Look for embedded condenast player
1498         matches = re.findall(
1499             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1500             webpage)
1501         if matches:
1502             return {
1503                 '_type': 'playlist',
1504                 'entries': [{
1505                     '_type': 'url',
1506                     'ie_key': 'CondeNast',
1507                     'url': ma,
1508                 } for ma in matches],
1509                 'title': video_title,
1510                 'id': video_id,
1511             }
1512
1513         # Look for Bandcamp pages with custom domain
1514         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1515         if mobj is not None:
1516             burl = unescapeHTML(mobj.group(1))
1517             # Don't set the extractor because it can be a track url or an album
1518             return self.url_result(burl)
1519
1520         # Look for embedded Vevo player
1521         mobj = re.search(
1522             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1523         if mobj is not None:
1524             return self.url_result(mobj.group('url'))
1525
1526         # Look for embedded Viddler player
1527         mobj = re.search(
1528             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1529             webpage)
1530         if mobj is not None:
1531             return self.url_result(mobj.group('url'))
1532
1533         # Look for NYTimes player
1534         mobj = re.search(
1535             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1536             webpage)
1537         if mobj is not None:
1538             return self.url_result(mobj.group('url'))
1539
1540         # Look for Libsyn player
1541         mobj = re.search(
1542             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1543         if mobj is not None:
1544             return self.url_result(mobj.group('url'))
1545
1546         # Look for Ooyala videos
1547         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1548                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1549                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1550                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1551         if mobj is not None:
1552             return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
1553
1554         # Look for multiple Ooyala embeds on SBN network websites
1555         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1556         if mobj is not None:
1557             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1558             if embeds:
1559                 return _playlist_from_matches(
1560                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
1561
1562         # Look for Aparat videos
1563         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1564         if mobj is not None:
1565             return self.url_result(mobj.group(1), 'Aparat')
1566
1567         # Look for MPORA videos
1568         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1569         if mobj is not None:
1570             return self.url_result(mobj.group(1), 'Mpora')
1571
1572         # Look for embedded NovaMov-based player
1573         mobj = re.search(
1574             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1575                     (?P<url>http://(?:(?:embed|www)\.)?
1576                         (?:novamov\.com|
1577                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1578                            videoweed\.(?:es|com)|
1579                            movshare\.(?:net|sx|ag)|
1580                            divxstage\.(?:eu|net|ch|co|at|ag))
1581                         /embed\.php.+?)\1''', webpage)
1582         if mobj is not None:
1583             return self.url_result(mobj.group('url'))
1584
1585         # Look for embedded Facebook player
1586         mobj = re.search(
1587             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1588         if mobj is not None:
1589             return self.url_result(mobj.group('url'), 'Facebook')
1590
1591         # Look for embedded VK player
1592         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1593         if mobj is not None:
1594             return self.url_result(mobj.group('url'), 'VK')
1595
1596         # Look for embedded Odnoklassniki player
1597         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
1598         if mobj is not None:
1599             return self.url_result(mobj.group('url'), 'Odnoklassniki')
1600
1601         # Look for embedded ivi player
1602         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1603         if mobj is not None:
1604             return self.url_result(mobj.group('url'), 'Ivi')
1605
1606         # Look for embedded Huffington Post player
1607         mobj = re.search(
1608             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1609         if mobj is not None:
1610             return self.url_result(mobj.group('url'), 'HuffPost')
1611
1612         # Look for embed.ly
1613         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1614         if mobj is not None:
1615             return self.url_result(mobj.group('url'))
1616         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1617         if mobj is not None:
1618             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1619
1620         # Look for funnyordie embed
1621         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1622         if matches:
1623             return _playlist_from_matches(
1624                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1625
1626         # Look for BBC iPlayer embed
1627         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1628         if matches:
1629             return _playlist_from_matches(matches, ie='BBCCoUk')
1630
1631         # Look for embedded RUTV player
1632         rutv_url = RUTVIE._extract_url(webpage)
1633         if rutv_url:
1634             return self.url_result(rutv_url, 'RUTV')
1635
1636         # Look for embedded TVC player
1637         tvc_url = TVCIE._extract_url(webpage)
1638         if tvc_url:
1639             return self.url_result(tvc_url, 'TVC')
1640
1641         # Look for embedded SportBox player
1642         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1643         if sportbox_urls:
1644             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1645
1646         # Look for embedded PornHub player
1647         pornhub_url = PornHubIE._extract_url(webpage)
1648         if pornhub_url:
1649             return self.url_result(pornhub_url, 'PornHub')
1650
1651         # Look for embedded XHamster player
1652         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1653         if xhamster_urls:
1654             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1655
1656         # Look for embedded TNAFlixNetwork player
1657         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
1658         if tnaflix_urls:
1659             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
1660
1661         # Look for embedded Tvigle player
1662         mobj = re.search(
1663             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1664         if mobj is not None:
1665             return self.url_result(mobj.group('url'), 'Tvigle')
1666
1667         # Look for embedded TED player
1668         mobj = re.search(
1669             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1670         if mobj is not None:
1671             return self.url_result(mobj.group('url'), 'TED')
1672
1673         # Look for embedded Ustream videos
1674         mobj = re.search(
1675             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1676         if mobj is not None:
1677             return self.url_result(mobj.group('url'), 'Ustream')
1678
1679         # Look for embedded arte.tv player
1680         mobj = re.search(
1681             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1682             webpage)
1683         if mobj is not None:
1684             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1685
1686         # Look for embedded francetv player
1687         mobj = re.search(
1688             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1689             webpage)
1690         if mobj is not None:
1691             return self.url_result(mobj.group('url'))
1692
1693         # Look for embedded smotri.com player
1694         smotri_url = SmotriIE._extract_url(webpage)
1695         if smotri_url:
1696             return self.url_result(smotri_url, 'Smotri')
1697
1698         # Look for embedded Myvi.ru player
1699         myvi_url = MyviIE._extract_url(webpage)
1700         if myvi_url:
1701             return self.url_result(myvi_url)
1702
1703         # Look for embedded soundcloud player
1704         mobj = re.search(
1705             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1706             webpage)
1707         if mobj is not None:
1708             url = unescapeHTML(mobj.group('url'))
1709             return self.url_result(url)
1710
1711         # Look for embedded vulture.com player
1712         mobj = re.search(
1713             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1714             webpage)
1715         if mobj is not None:
1716             url = unescapeHTML(mobj.group('url'))
1717             return self.url_result(url, ie='Vulture')
1718
1719         # Look for embedded mtvservices player
1720         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1721         if mtvservices_url:
1722             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1723
1724         # Look for embedded yahoo player
1725         mobj = re.search(
1726             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1727             webpage)
1728         if mobj is not None:
1729             return self.url_result(mobj.group('url'), 'Yahoo')
1730
1731         # Look for embedded sbs.com.au player
1732         mobj = re.search(
1733             r'''(?x)
1734             (?:
1735                 <meta\s+property="og:video"\s+content=|
1736                 <iframe[^>]+?src=
1737             )
1738             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1739             webpage)
1740         if mobj is not None:
1741             return self.url_result(mobj.group('url'), 'SBS')
1742
1743         # Look for embedded Cinchcast player
1744         mobj = re.search(
1745             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1746             webpage)
1747         if mobj is not None:
1748             return self.url_result(mobj.group('url'), 'Cinchcast')
1749
1750         mobj = re.search(
1751             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1752             webpage)
1753         if not mobj:
1754             mobj = re.search(
1755                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1756                 webpage)
1757         if mobj is not None:
1758             return self.url_result(mobj.group('url'), 'MLB')
1759
1760         mobj = re.search(
1761             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1762             webpage)
1763         if mobj is not None:
1764             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1765
1766         mobj = re.search(
1767             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1768             webpage)
1769         if mobj is not None:
1770             return self.url_result(mobj.group('url'), 'Livestream')
1771
1772         # Look for Zapiks embed
1773         mobj = re.search(
1774             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1775         if mobj is not None:
1776             return self.url_result(mobj.group('url'), 'Zapiks')
1777
1778         # Look for Kaltura embeds
1779         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
1780                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
1781         if mobj is not None:
1782             return self.url_result(smuggle_url(
1783                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1784                 {'source_url': url}), 'Kaltura')
1785
1786         # Look for Eagle.Platform embeds
1787         mobj = re.search(
1788             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1789         if mobj is not None:
1790             return self.url_result(mobj.group('url'), 'EaglePlatform')
1791
1792         # Look for ClipYou (uses Eagle.Platform) embeds
1793         mobj = re.search(
1794             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1795         if mobj is not None:
1796             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1797
1798         # Look for Pladform embeds
1799         pladform_url = PladformIE._extract_url(webpage)
1800         if pladform_url:
1801             return self.url_result(pladform_url)
1802
1803         # Look for Videomore embeds
1804         videomore_url = VideomoreIE._extract_url(webpage)
1805         if videomore_url:
1806             return self.url_result(videomore_url)
1807
1808         # Look for Playwire embeds
1809         mobj = re.search(
1810             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1811         if mobj is not None:
1812             return self.url_result(mobj.group('url'))
1813
1814         # Look for 5min embeds
1815         mobj = re.search(
1816             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1817         if mobj is not None:
1818             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1819
1820         # Look for Crooks and Liars embeds
1821         mobj = re.search(
1822             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1823         if mobj is not None:
1824             return self.url_result(mobj.group('url'))
1825
1826         # Look for NBC Sports VPlayer embeds
1827         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1828         if nbc_sports_url:
1829             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1830
1831         # Look for Google Drive embeds
1832         google_drive_url = GoogleDriveIE._extract_url(webpage)
1833         if google_drive_url:
1834             return self.url_result(google_drive_url, 'GoogleDrive')
1835
1836         # Look for UDN embeds
1837         mobj = re.search(
1838             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
1839         if mobj is not None:
1840             return self.url_result(
1841                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1842
1843         # Look for Senate ISVP iframe
1844         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1845         if senate_isvp_url:
1846             return self.url_result(senate_isvp_url, 'SenateISVP')
1847
1848         # Look for Dailymotion Cloud videos
1849         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1850         if dmcloud_url:
1851             return self.url_result(dmcloud_url, 'DailymotionCloud')
1852
1853         # Look for OnionStudios embeds
1854         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1855         if onionstudios_url:
1856             return self.url_result(onionstudios_url)
1857
1858         # Look for SnagFilms embeds
1859         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1860         if snagfilms_url:
1861             return self.url_result(snagfilms_url)
1862
1863         # Look for JWPlatform embeds
1864         jwplatform_url = JWPlatformIE._extract_url(webpage)
1865         if jwplatform_url:
1866             return self.url_result(jwplatform_url, 'JWPlatform')
1867
1868         # Look for ScreenwaveMedia embeds
1869         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
1870         if mobj is not None:
1871             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
1872
1873         # Look for Digiteka embeds
1874         digiteka_url = DigitekaIE._extract_url(webpage)
1875         if digiteka_url:
1876             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
1877
1878         # Look for Limelight embeds
1879         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
1880         if mobj:
1881             lm = {
1882                 'Media': 'media',
1883                 'Channel': 'channel',
1884                 'ChannelList': 'channel_list',
1885             }
1886             return self.url_result('limelight:%s:%s' % (
1887                 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
1888
1889         # Look for AdobeTVVideo embeds
1890         mobj = re.search(
1891             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1892             webpage)
1893         if mobj is not None:
1894             return self.url_result(
1895                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1896                 'AdobeTVVideo')
1897
1898         def check_video(vurl):
1899             if YoutubeIE.suitable(vurl):
1900                 return True
1901             vpath = compat_urlparse.urlparse(vurl).path
1902             vext = determine_ext(vpath)
1903             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1904
1905         def filter_video(urls):
1906             return list(filter(check_video, urls))
1907
1908         # Start with something easy: JW Player in SWFObject
1909         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1910         if not found:
1911             # Look for gorilla-vid style embedding
1912             found = filter_video(re.findall(r'''(?sx)
1913                 (?:
1914                     jw_plugins|
1915                     JWPlayerOptions|
1916                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1917                 )
1918                 .*?
1919                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1920         if not found:
1921             # Broaden the search a little bit
1922             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1923         if not found:
1924             # Broaden the findall a little bit: JWPlayer JS loader
1925             found = filter_video(re.findall(
1926                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1927         if not found:
1928             # Flow player
1929             found = filter_video(re.findall(r'''(?xs)
1930                 flowplayer\("[^"]+",\s*
1931                     \{[^}]+?\}\s*,
1932                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1933                         ["']?url["']?\s*:\s*["']([^"']+)["']
1934             ''', webpage))
1935         if not found:
1936             # Cinerama player
1937             found = re.findall(
1938                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1939         if not found:
1940             # Try to find twitter cards info
1941             found = filter_video(re.findall(
1942                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1943         if not found:
1944             # We look for Open Graph info:
1945             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1946             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1947             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1948             if m_video_type is not None:
1949                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1950         if not found:
1951             # HTML5 video
1952             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1953         if not found:
1954             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1955             found = re.search(
1956                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1957                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1958                 webpage)
1959             if not found:
1960                 # Look also in Refresh HTTP header
1961                 refresh_header = head_response.headers.get('Refresh')
1962                 if refresh_header:
1963                     # In python 2 response HTTP headers are bytestrings
1964                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
1965                         refresh_header = refresh_header.decode('iso-8859-1')
1966                     found = re.search(REDIRECT_REGEX, refresh_header)
1967             if found:
1968                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
1969                 self.report_following_redirect(new_url)
1970                 return {
1971                     '_type': 'url',
1972                     'url': new_url,
1973                 }
1974         if not found:
1975             raise UnsupportedError(url)
1976
1977         entries = []
1978         for video_url in found:
1979             video_url = video_url.replace('\\/', '/')
1980             video_url = compat_urlparse.urljoin(url, video_url)
1981             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
1982
1983             # Sometimes, jwplayer extraction will result in a YouTube URL
1984             if YoutubeIE.suitable(video_url):
1985                 entries.append(self.url_result(video_url, 'Youtube'))
1986                 continue
1987
1988             # here's a fun little line of code for you:
1989             video_id = os.path.splitext(video_id)[0]
1990
1991             entry_info_dict = {
1992                 'id': video_id,
1993                 'uploader': video_uploader,
1994                 'title': video_title,
1995                 'age_limit': age_limit,
1996             }
1997
1998             ext = determine_ext(video_url)
1999             if ext == 'smil':
2000                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2001             elif ext == 'xspf':
2002                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2003             elif ext == 'm3u8':
2004                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2005             elif ext == 'mpd':
2006                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2007             elif ext == 'f4m':
2008                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2009             else:
2010                 entry_info_dict['url'] = video_url
2011
2012             entries.append(entry_info_dict)
2013
2014         if len(entries) == 1:
2015             return entries[0]
2016         else:
2017             for num, e in enumerate(entries, start=1):
2018                 # 'url' results don't have a title
2019                 if e.get('title') is not None:
2020                     e['title'] = '%s (%d)' % (e['title'], num)
2021             return {
2022                 '_type': 'playlist',
2023                 'entries': entries,
2024             }