git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse,
  12     compat_urllib_parse_unquote,
  13     compat_urllib_request,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     orderedSet,
  24     parse_xml,
  25     smuggle_url,
  26     unescapeHTML,
  27     unified_strdate,
  28     unsmuggle_url,
  29     UnsupportedError,
  30     url_basename,
  31     xpath_text,
  32 )
  33 from .brightcove import BrightcoveIE
  34 from .nbc import NBCSportsVPlayerIE
  35 from .ooyala import OoyalaIE
  36 from .rutv import RUTVIE
  37 from .tvc import TVCIE
  38 from .sportbox import SportBoxEmbedIE
  39 from .smotri import SmotriIE
  40 from .condenast import CondeNastIE
  41 from .udn import UDNEmbedIE
  42 from .senateisvp import SenateISVPIE
  43 from .bliptv import BlipTVIE
  44 from .svt import SVTIE
  45
  46
  47 class GenericIE(InfoExtractor):
  48     IE_DESC = 'Generic downloader that works on some sites'
  49     _VALID_URL = r'.*'
  50     IE_NAME = 'generic'
  51     _TESTS = [
  52         # Direct link to a video
  53         {
  54             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  55             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  56             'info_dict': {
  57                 'id': 'trailer',
  58                 'ext': 'mp4',
  59                 'title': 'trailer',
  60                 'upload_date': '20100513',
  61             }
  62         },
  63         # Direct link to media delivered compressed (until Accept-Encoding is *)
  64         {
  65             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  66             'md5': '128c42e68b13950268b648275386fc74',
  67             'info_dict': {
  68                 'id': 'FictionJunction-Parallel_Hearts',
  69                 'ext': 'flac',
  70                 'title': 'FictionJunction-Parallel_Hearts',
  71                 'upload_date': '20140522',
  72             },
  73             'expected_warnings': [
  74                 'URL could be a direct video link, returning it as such.'
  75             ]
  76         },
  77         # Direct download with broken HEAD
  78         {
  79             'url': 'http://ai-radio.org:8000/radio.opus',
  80             'info_dict': {
  81                 'id': 'radio',
  82                 'ext': 'opus',
  83                 'title': 'radio',
  84             },
  85             'params': {
  86                 'skip_download': True,  # infinite live stream
  87             },
  88             'expected_warnings': [
  89                 r'501.*Not Implemented'
  90             ],
  91         },
  92         # Direct link with incorrect MIME type
  93         {
  94             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
  95             'md5': '4ccbebe5f36706d85221f204d7eb5913',
  96             'info_dict': {
  97                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
  98                 'id': '5_Lennart_Poettering_-_Systemd',
  99                 'ext': 'webm',
 100                 'title': '5_Lennart_Poettering_-_Systemd',
 101                 'upload_date': '20141120',
 102             },
 103             'expected_warnings': [
 104                 'URL could be a direct video link, returning it as such.'
 105             ]
 106         },
 107         # RSS feed
 108         {
 109             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 110             'info_dict': {
 111                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 112                 'title': 'Zero Punctuation',
 113                 'description': 're:.*groundbreaking video review series.*'
 114             },
 115             'playlist_mincount': 11,
 116         },
 117         # RSS feed with enclosure
 118         {
 119             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 120             'info_dict': {
 121                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 122                 'ext': 'm4v',
 123                 'upload_date': '20150228',
 124                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 125             }
 126         },
 127         # google redirect
 128         {
 129             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 130             'info_dict': {
 131                 'id': 'cmQHVoWB5FY',
 132                 'ext': 'mp4',
 133                 'upload_date': '20130224',
 134                 'uploader_id': 'TheVerge',
 135                 'description': 're:^Chris Ziegler takes a look at the\.*',
 136                 'uploader': 'The Verge',
 137                 'title': 'First Firefox OS phones side-by-side',
 138             },
 139             'params': {
 140                 'skip_download': False,
 141             }
 142         },
 143         {
 144             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 145             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 146             'info_dict': {
 147                 'id': '13601338388002',
 148                 'ext': 'mp4',
 149                 'uploader': 'www.hodiho.fr',
 150                 'title': 'R\u00e9gis plante sa Jeep',
 151             }
 152         },
 153         # bandcamp page with custom domain
 154         {
 155             'add_ie': ['Bandcamp'],
 156             'url': 'http://bronyrock.com/track/the-pony-mash',
 157             'info_dict': {
 158                 'id': '3235767654',
 159                 'ext': 'mp3',
 160                 'title': 'The Pony Mash',
 161                 'uploader': 'M_Pallante',
 162             },
 163             'skip': 'There is a limit of 200 free downloads / month for the test song',
 164         },
 165         # embedded brightcove video
 166         # it also tests brightcove videos that need to set the 'Referer' in the
 167         # http requests
 168         {
 169             'add_ie': ['Brightcove'],
 170             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 171             'info_dict': {
 172                 'id': '2765128793001',
 173                 'ext': 'mp4',
 174                 'title': 'Le cours de bourse : l’analyse technique',
 175                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 176                 'uploader': 'BFM BUSINESS',
 177             },
 178             'params': {
 179                 'skip_download': True,
 180             },
 181         },
 182         {
 183             # https://github.com/rg3/youtube-dl/issues/2253
 184             'url': 'http://bcove.me/i6nfkrc3',
 185             'md5': '0ba9446db037002366bab3b3eb30c88c',
 186             'info_dict': {
 187                 'id': '3101154703001',
 188                 'ext': 'mp4',
 189                 'title': 'Still no power',
 190                 'uploader': 'thestar.com',
 191                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 192             },
 193             'add_ie': ['Brightcove'],
 194         },
 195         {
 196             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 197             'md5': 'fb973ecf6e4a78a67453647444222983',
 198             'info_dict': {
 199                 'id': '3414141473001',
 200                 'ext': 'mp4',
 201                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 202                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 203                 'uploader': 'Championat',
 204             },
 205         },
 206         {
 207             # https://github.com/rg3/youtube-dl/issues/3541
 208             'add_ie': ['Brightcove'],
 209             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 210             'info_dict': {
 211                 'id': '3866516442001',
 212                 'ext': 'mp4',
 213                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 214                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 215                 'uploader': 'SBS Broadcasting',
 216             },
 217             'skip': 'Restricted to Netherlands',
 218             'params': {
 219                 'skip_download': True,  # m3u8 download
 220             },
 221         },
 222         # ooyala video
 223         {
 224             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 225             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 226             'info_dict': {
 227                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 228                 'ext': 'mp4',
 229                 'title': '2cc213299525360.mov',  # that's what we get
 230             },
 231             'add_ie': ['Ooyala'],
 232         },
 233         # multiple ooyala embeds on SBN network websites
 234         {
 235             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 236             'info_dict': {
 237                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 238                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 239             },
 240             'playlist_mincount': 3,
 241             'params': {
 242                 'skip_download': True,
 243             },
 244             'add_ie': ['Ooyala'],
 245         },
 246         # embed.ly video
 247         {
 248             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 249             'info_dict': {
 250                 'id': '9ODmcdjQcHQ',
 251                 'ext': 'mp4',
 252                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 253                 'upload_date': '20140225',
 254                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 255                 'uploader': 'Tested',
 256                 'uploader_id': 'testedcom',
 257             },
 258             # No need to test YoutubeIE here
 259             'params': {
 260                 'skip_download': True,
 261             },
 262         },
 263         # funnyordie embed
 264         {
 265             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 266             'info_dict': {
 267                 'id': '18e820ec3f',
 268                 'ext': 'mp4',
 269                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 270                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 271             },
 272         },
 273         # BBC iPlayer embeds
 274         {
 275             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 276             'info_dict': {
 277                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 278             },
 279             'playlist_mincount': 18,
 280         },
 281         # RUTV embed
 282         {
 283             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 284             'info_dict': {
 285                 'id': '776940',
 286                 'ext': 'mp4',
 287                 'title': 'Охотское море стало целиком российским',
 288                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 289             },
 290             'params': {
 291                 # m3u8 download
 292                 'skip_download': True,
 293             },
 294         },
 295         # SportBox embed
 296         {
 297             'url': 'http://www.vestifinance.ru/articles/25753',
 298             'info_dict': {
 299                 'id': '25753',
 300                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 301             },
 302             'playlist': [{
 303                 'info_dict': {
 304                     'id': '370908',
 305                     'title': 'Госзаказ. День 3',
 306                     'ext': 'mp4',
 307                 }
 308             }, {
 309                 'info_dict': {
 310                     'id': '370905',
 311                     'title': 'Госзаказ. День 2',
 312                     'ext': 'mp4',
 313                 }
 314             }, {
 315                 'info_dict': {
 316                     'id': '370902',
 317                     'title': 'Госзаказ. День 1',
 318                     'ext': 'mp4',
 319                 }
 320             }],
 321             'params': {
 322                 # m3u8 download
 323                 'skip_download': True,
 324             },
 325         },
 326         # Embedded TED video
 327         {
 328             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 329             'md5': '65fdff94098e4a607385a60c5177c638',
 330             'info_dict': {
 331                 'id': '1969',
 332                 'ext': 'mp4',
 333                 'title': 'Hidden miracles of the natural world',
 334                 'uploader': 'Louie Schwartzberg',
 335                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 336             }
 337         },
 338         # Embeded Ustream video
 339         {
 340             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 341             'md5': '27b99cdb639c9b12a79bca876a073417',
 342             'info_dict': {
 343                 'id': '45734260',
 344                 'ext': 'flv',
 345                 'uploader': 'AU SPA:  The NSA and Privacy',
 346                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 347             }
 348         },
 349         # nowvideo embed hidden behind percent encoding
 350         {
 351             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 352             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 353             'info_dict': {
 354                 'id': '06e53103ca9aa',
 355                 'ext': 'flv',
 356                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 357                 'description': 'No description',
 358             },
 359         },
 360         # arte embed
 361         {
 362             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 363             'md5': '7653032cbb25bf6c80d80f217055fa43',
 364             'info_dict': {
 365                 'id': '048195-004_PLUS7-F',
 366                 'ext': 'flv',
 367                 'title': 'X:enius',
 368                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 369                 'upload_date': '20140320',
 370             },
 371             'params': {
 372                 'skip_download': 'Requires rtmpdump'
 373             }
 374         },
 375         # Condé Nast embed
 376         {
 377             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 378             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 379             'info_dict': {
 380                 'id': '53501be369702d3275860000',
 381                 'ext': 'mp4',
 382                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 383             }
 384         },
 385         # Dailymotion embed
 386         {
 387             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 388             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 389             'info_dict': {
 390                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 391                 'ext': 'mp4',
 392                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 393                 'uploader': 'Spi0n',
 394             },
 395             'add_ie': ['Dailymotion'],
 396         },
 397         # YouTube embed
 398         {
 399             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 400             'info_dict': {
 401                 'id': 'FXRb4ykk4S0',
 402                 'ext': 'mp4',
 403                 'title': 'The NBL Auction 2014',
 404                 'uploader': 'BADMINTON England',
 405                 'uploader_id': 'BADMINTONEvents',
 406                 'upload_date': '20140603',
 407                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 408             },
 409             'add_ie': ['Youtube'],
 410             'params': {
 411                 'skip_download': True,
 412             }
 413         },
 414         # MTVSercices embed
 415         {
 416             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 417             'md5': '35727f82f58c76d996fc188f9755b0d5',
 418             'info_dict': {
 419                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 420                 'ext': 'mp4',
 421                 'title': 'Review',
 422                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 423             },
 424         },
 425         # YouTube embed via <data-embed-url="">
 426         {
 427             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 428             'info_dict': {
 429                 'id': '4vAffPZIT44',
 430                 'ext': 'mp4',
 431                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 432                 'uploader': 'Gameloft',
 433                 'uploader_id': 'gameloft',
 434                 'upload_date': '20140828',
 435                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 436             },
 437             'params': {
 438                 'skip_download': True,
 439             }
 440         },
 441         # Camtasia studio
 442         {
 443             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 444             'playlist': [{
 445                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 446                 'info_dict': {
 447                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 448                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 449                     'ext': 'flv',
 450                     'duration': 2235.90,
 451                 }
 452             }, {
 453                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 454                 'info_dict': {
 455                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 456                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 457                     'ext': 'flv',
 458                     'duration': 2235.93,
 459                 }
 460             }],
 461             'info_dict': {
 462                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 463             }
 464         },
 465         # Flowplayer
 466         {
 467             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 468             'md5': '9d65602bf31c6e20014319c7d07fba27',
 469             'info_dict': {
 470                 'id': '5123ea6d5e5a7',
 471                 'ext': 'mp4',
 472                 'age_limit': 18,
 473                 'uploader': 'www.handjobhub.com',
 474                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 475             }
 476         },
 477         # Multiple brightcove videos
 478         # https://github.com/rg3/youtube-dl/issues/2283
 479         {
 480             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 481             'info_dict': {
 482                 'id': 'always-never',
 483                 'title': 'Always / Never - The New Yorker',
 484             },
 485             'playlist_count': 3,
 486             'params': {
 487                 'extract_flat': False,
 488                 'skip_download': True,
 489             }
 490         },
 491         # MLB embed
 492         {
 493             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 494             'md5': '96f09a37e44da40dd083e12d9a683327',
 495             'info_dict': {
 496                 'id': '33322633',
 497                 'ext': 'mp4',
 498                 'title': 'Ump changes call to ball',
 499                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 500                 'duration': 48,
 501                 'timestamp': 1401537900,
 502                 'upload_date': '20140531',
 503                 'thumbnail': 're:^https?://.*\.jpg$',
 504             },
 505         },
 506         # Wistia embed
 507         {
 508             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 509             'md5': '8788b683c777a5cf25621eaf286d0c23',
 510             'info_dict': {
 511                 'id': '1cfaf6b7ea',
 512                 'ext': 'mov',
 513                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 514                 'duration': 643.0,
 515                 'filesize': 182808282,
 516                 'uploader': 'education-portal.com',
 517             },
 518         },
 519         {
 520             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 521             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 522             'info_dict': {
 523                 'id': 'uxjb0lwrcz',
 524                 'ext': 'mp4',
 525                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 526                 'duration': 1715.0,
 527                 'uploader': 'thoughtworks.wistia.com',
 528             },
 529         },
 530         # Soundcloud embed
 531         {
 532             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 533             'info_dict': {
 534                 'id': '174391317',
 535                 'ext': 'mp3',
 536                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 537                 'uploader': 'Sophos Security',
 538                 'title': 'Chet Chat 171 - Oct 29, 2014',
 539                 'upload_date': '20141029',
 540             }
 541         },
 542         # Livestream embed
 543         {
 544             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 545             'info_dict': {
 546                 'id': '67864563',
 547                 'ext': 'flv',
 548                 'upload_date': '20141112',
 549                 'title': 'Rosetta #CometLanding webcast HL 10',
 550             }
 551         },
 552         # LazyYT
 553         {
 554             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 555             'info_dict': {
 556                 'id': '1986',
 557                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 558             },
 559             'playlist_mincount': 2,
 560         },
 561         # Cinchcast embed
 562         {
 563             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 564             'info_dict': {
 565                 'id': '7141703',
 566                 'ext': 'mp3',
 567                 'upload_date': '20141126',
 568                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 569             }
 570         },
 571         # Cinerama player
 572         {
 573             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 574             'info_dict': {
 575                 'id': '730m_DandD_1901_512k',
 576                 'ext': 'mp4',
 577                 'uploader': 'www.abc.net.au',
 578                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 579             }
 580         },
 581         # embedded viddler video
 582         {
 583             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 584             'info_dict': {
 585                 'id': '4d03aad9',
 586                 'ext': 'mp4',
 587                 'uploader': 'deadspin',
 588                 'title': 'WALL-TO-GORTAT',
 589                 'timestamp': 1422285291,
 590                 'upload_date': '20150126',
 591             },
 592             'add_ie': ['Viddler'],
 593         },
 594         # Libsyn embed
 595         {
 596             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 597             'info_dict': {
 598                 'id': '3377616',
 599                 'ext': 'mp3',
 600                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 601                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 602                 'upload_date': '20150220',
 603             },
 604         },
 605         # jwplayer YouTube
 606         {
 607             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 608             'info_dict': {
 609                 'id': 'Mrj4DVp2zeA',
 610                 'ext': 'mp4',
 611                 'upload_date': '20150212',
 612                 'uploader': 'The National Archives UK',
 613                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 614                 'uploader_id': 'NationalArchives08',
 615                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 616             },
 617         },
 618         # rtl.nl embed
 619         {
 620             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 621             'playlist_mincount': 5,
 622             'info_dict': {
 623                 'id': 'aanslagen-kopenhagen',
 624                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 625             }
 626         },
 627         # Zapiks embed
 628         {
 629             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 630             'info_dict': {
 631                 'id': '118046',
 632                 'ext': 'mp4',
 633                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 634             }
 635         },
 636         # Kaltura embed
 637         {
 638             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 639             'info_dict': {
 640                 'id': '1_eergr3h1',
 641                 'ext': 'mp4',
 642                 'upload_date': '20150226',
 643                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 644                 'timestamp': int,
 645                 'title': 'John Carlson Postgame 2/25/15',
 646             },
 647         },
 648         # Eagle.Platform embed (generic URL)
 649         {
 650             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 651             'info_dict': {
 652                 'id': '227304',
 653                 'ext': 'mp4',
 654                 'title': 'Навальный вышел на свободу',
 655                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 656                 'thumbnail': 're:^https?://.*\.jpg$',
 657                 'duration': 87,
 658                 'view_count': int,
 659                 'age_limit': 0,
 660             },
 661         },
 662         # ClipYou (Eagle.Platform) embed (custom URL)
 663         {
 664             'url': 'http://muz-tv.ru/play/7129/',
 665             'info_dict': {
 666                 'id': '12820',
 667                 'ext': 'mp4',
 668                 'title': "'O Sole Mio",
 669                 'thumbnail': 're:^https?://.*\.jpg$',
 670                 'duration': 216,
 671                 'view_count': int,
 672             },
 673         },
 674         # Pladform embed
 675         {
 676             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 677             'info_dict': {
 678                 'id': '100183293',
 679                 'ext': 'mp4',
 680                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 681                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 682                 'thumbnail': 're:^https?://.*\.jpg$',
 683                 'duration': 694,
 684                 'age_limit': 0,
 685             },
 686         },
 687         # Playwire embed
 688         {
 689             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 690             'info_dict': {
 691                 'id': '3519514',
 692                 'ext': 'mp4',
 693                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 694                 'thumbnail': 're:^https?://.*\.png$',
 695                 'duration': 45.115,
 696             },
 697         },
 698         # 5min embed
 699         {
 700             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 701             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 702             'info_dict': {
 703                 'id': '518726732',
 704                 'ext': 'mp4',
 705                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 706             },
 707         },
 708         # SVT embed
 709         {
 710             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 711             'info_dict': {
 712                 'id': '2900353',
 713                 'ext': 'flv',
 714                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 715                 'duration': 27,
 716                 'age_limit': 0,
 717             },
 718         },
 719         # Crooks and Liars embed
 720         {
 721             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 722             'info_dict': {
 723                 'id': '8RUoRhRi',
 724                 'ext': 'mp4',
 725                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 726                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 727                 'timestamp': 1428207000,
 728                 'upload_date': '20150405',
 729                 'uploader': 'Heather',
 730             },
 731         },
 732         # Crooks and Liars external embed
 733         {
 734             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 735             'info_dict': {
 736                 'id': 'MTE3MjUtMzQ2MzA',
 737                 'ext': 'mp4',
 738                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 739                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 740                 'timestamp': 1265032391,
 741                 'upload_date': '20100201',
 742                 'uploader': 'Heather',
 743             },
 744         },
 745         # NBC Sports vplayer embed
 746         {
 747             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 748             'info_dict': {
 749                 'id': 'ln7x1qSThw4k',
 750                 'ext': 'flv',
 751                 'title': "PFT Live: New leader in the 'new-look' defense",
 752                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 753             },
 754         },
 755         # UDN embed
 756         {
 757             'url': 'http://www.udn.com/news/story/7314/822787',
 758             'md5': 'fd2060e988c326991037b9aff9df21a6',
 759             'info_dict': {
 760                 'id': '300346',
 761                 'ext': 'mp4',
 762                 'title': '中一中男師變性 全校師生力挺',
 763                 'thumbnail': 're:^https?://.*\.jpg$',
 764             }
 765         },
 766         # Ooyala embed
 767         {
 768             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 769             'info_dict': {
 770                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 771                 'ext': 'mp4',
 772                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 773                 'title': 'This is what separates the Excel masters from the wannabes',
 774             },
 775             'params': {
 776                 # m3u8 downloads
 777                 'skip_download': True,
 778             }
 779         },
 780         # Contains a SMIL manifest
 781         {
 782             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 783             'info_dict': {
 784                 'id': 'file',
 785                 'ext': 'flv',
 786                 'title': '+ Football: Lottery Champions League Europe',
 787                 'uploader': 'www.telewebion.com',
 788             },
 789             'params': {
 790                 # rtmpe downloads
 791                 'skip_download': True,
 792             }
 793         },
 794         # Brightcove URL in single quotes
 795         {
 796             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
 797             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
 798             'info_dict': {
 799                 'id': '4255764656001',
 800                 'ext': 'mp4',
 801                 'title': 'SN Presents: Russell Martin, World Citizen',
 802                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
 803                 'uploader': 'Rogers Sportsnet',
 804             },
 805         }
 806     ]
 807
 808     def report_following_redirect(self, new_url):
 809         """Report information extraction."""
 810         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 811
 812     def _extract_rss(self, url, video_id, doc):
 813         playlist_title = doc.find('./channel/title').text
 814         playlist_desc_el = doc.find('./channel/description')
 815         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 816
 817         entries = []
 818         for it in doc.findall('./channel/item'):
 819             next_url = xpath_text(it, 'link', fatal=False)
 820             if not next_url:
 821                 enclosure_nodes = it.findall('./enclosure')
 822                 for e in enclosure_nodes:
 823                     next_url = e.attrib.get('url')
 824                     if next_url:
 825                         break
 826
 827             if not next_url:
 828                 continue
 829
 830             entries.append({
 831                 '_type': 'url',
 832                 'url': next_url,
 833                 'title': it.find('title').text,
 834             })
 835
 836         return {
 837             '_type': 'playlist',
 838             'id': url,
 839             'title': playlist_title,
 840             'description': playlist_desc,
 841             'entries': entries,
 842         }
 843
 844     def _extract_camtasia(self, url, video_id, webpage):
 845         """ Returns None if no camtasia video can be found. """
 846
 847         camtasia_cfg = self._search_regex(
 848             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 849             webpage, 'camtasia configuration file', default=None)
 850         if camtasia_cfg is None:
 851             return None
 852
 853         title = self._html_search_meta('DC.title', webpage, fatal=True)
 854
 855         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 856         camtasia_cfg = self._download_xml(
 857             camtasia_url, video_id,
 858             note='Downloading camtasia configuration',
 859             errnote='Failed to download camtasia configuration')
 860         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 861
 862         entries = []
 863         for n in fileset_node.getchildren():
 864             url_n = n.find('./uri')
 865             if url_n is None:
 866                 continue
 867
 868             entries.append({
 869                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 870                 'title': '%s - %s' % (title, n.tag),
 871                 'url': compat_urlparse.urljoin(url, url_n.text),
 872                 'duration': float_or_none(n.find('./duration').text),
 873             })
 874
 875         return {
 876             '_type': 'playlist',
 877             'entries': entries,
 878             'title': title,
 879         }
 880
 881     def _real_extract(self, url):
 882         if url.startswith('//'):
 883             return {
 884                 '_type': 'url',
 885                 'url': self.http_scheme() + url,
 886             }
 887
 888         parsed_url = compat_urlparse.urlparse(url)
 889         if not parsed_url.scheme:
 890             default_search = self._downloader.params.get('default_search')
 891             if default_search is None:
 892                 default_search = 'fixup_error'
 893
 894             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 895                 if '/' in url:
 896                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 897                     return self.url_result('http://' + url)
 898                 elif default_search != 'fixup_error':
 899                     if default_search == 'auto_warning':
 900                         if re.match(r'^(?:url|URL)$', url):
 901                             raise ExtractorError(
 902                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 903                                 expected=True)
 904                         else:
 905                             self._downloader.report_warning(
 906                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 907                     return self.url_result('ytsearch:' + url)
 908
 909             if default_search in ('error', 'fixup_error'):
 910                 raise ExtractorError(
 911                     '%r is not a valid URL. '
 912                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 913                     % (url, url), expected=True)
 914             else:
 915                 if ':' not in default_search:
 916                     default_search += ':'
 917                 return self.url_result(default_search + url)
 918
 919         url, smuggled_data = unsmuggle_url(url)
 920         force_videoid = None
 921         is_intentional = smuggled_data and smuggled_data.get('to_generic')
 922         if smuggled_data and 'force_videoid' in smuggled_data:
 923             force_videoid = smuggled_data['force_videoid']
 924             video_id = force_videoid
 925         else:
 926             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
 927
 928         self.to_screen('%s: Requesting header' % video_id)
 929
 930         head_req = HEADRequest(url)
 931         head_response = self._request_webpage(
 932             head_req, video_id,
 933             note=False, errnote='Could not send HEAD request to %s' % url,
 934             fatal=False)
 935
 936         if head_response is not False:
 937             # Check for redirect
 938             new_url = head_response.geturl()
 939             if url != new_url:
 940                 self.report_following_redirect(new_url)
 941                 if force_videoid:
 942                     new_url = smuggle_url(
 943                         new_url, {'force_videoid': force_videoid})
 944                 return self.url_result(new_url)
 945
 946         full_response = None
 947         if head_response is False:
 948             request = compat_urllib_request.Request(url)
 949             request.add_header('Accept-Encoding', '*')
 950             full_response = self._request_webpage(request, video_id)
 951             head_response = full_response
 952
 953         # Check for direct link to a video
 954         content_type = head_response.headers.get('Content-Type', '')
 955         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
 956         if m:
 957             upload_date = unified_strdate(
 958                 head_response.headers.get('Last-Modified'))
 959             return {
 960                 'id': video_id,
 961                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
 962                 'direct': True,
 963                 'formats': [{
 964                     'format_id': m.group('format_id'),
 965                     'url': url,
 966                     'vcodec': 'none' if m.group('type') == 'audio' else None
 967                 }],
 968                 'upload_date': upload_date,
 969             }
 970
 971         if not self._downloader.params.get('test', False) and not is_intentional:
 972             self._downloader.report_warning('Falling back on generic information extractor.')
 973
 974         if not full_response:
 975             request = compat_urllib_request.Request(url)
 976             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
 977             # making it impossible to download only chunk of the file (yet we need only 512kB to
 978             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
 979             # that will always result in downloading the whole file that is not desirable.
 980             # Therefore for extraction pass we have to override Accept-Encoding to any in order
 981             # to accept raw bytes and being able to download only a chunk.
 982             # It may probably better to solve this by checking Content-Type for application/octet-stream
 983             # after HEAD request finishes, but not sure if we can rely on this.
 984             request.add_header('Accept-Encoding', '*')
 985             full_response = self._request_webpage(request, video_id)
 986
 987         # Maybe it's a direct link to a video?
 988         # Be careful not to download the whole thing!
 989         first_bytes = full_response.read(512)
 990         if not is_html(first_bytes):
 991             self._downloader.report_warning(
 992                 'URL could be a direct video link, returning it as such.')
 993             upload_date = unified_strdate(
 994                 head_response.headers.get('Last-Modified'))
 995             return {
 996                 'id': video_id,
 997                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
 998                 'direct': True,
 999                 'url': url,
1000                 'upload_date': upload_date,
1001             }
1002
1003         webpage = self._webpage_read_content(
1004             full_response, url, video_id, prefix=first_bytes)
1005
1006         self.report_extraction(video_id)
1007
1008         # Is it an RSS feed?
1009         try:
1010             doc = parse_xml(webpage)
1011             if doc.tag == 'rss':
1012                 return self._extract_rss(url, video_id, doc)
1013         except compat_xml_parse_error:
1014             pass
1015
1016         # Is it a Camtasia project?
1017         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1018         if camtasia_res is not None:
1019             return camtasia_res
1020
1021         # Sometimes embedded video player is hidden behind percent encoding
1022         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1023         # Unescaping the whole page allows to handle those cases in a generic way
1024         webpage = compat_urllib_parse.unquote(webpage)
1025
1026         # it's tempting to parse this further, but you would
1027         # have to take into account all the variations like
1028         #   Video Title - Site Name
1029         #   Site Name | Video Title
1030         #   Video Title - Tagline | Site Name
1031         # and so on and so forth; it's just not practical
1032         video_title = self._html_search_regex(
1033             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1034             default='video')
1035
1036         # Try to detect age limit automatically
1037         age_limit = self._rta_search(webpage)
1038         # And then there are the jokers who advertise that they use RTA,
1039         # but actually don't.
1040         AGE_LIMIT_MARKERS = [
1041             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1042         ]
1043         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1044             age_limit = 18
1045
1046         # video uploader is domain name
1047         video_uploader = self._search_regex(
1048             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1049
1050         # Helper method
1051         def _playlist_from_matches(matches, getter=None, ie=None):
1052             urlrs = orderedSet(
1053                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1054                 for m in matches)
1055             return self.playlist_result(
1056                 urlrs, playlist_id=video_id, playlist_title=video_title)
1057
1058         # Look for BrightCove:
1059         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1060         if bc_urls:
1061             self.to_screen('Brightcove video detected.')
1062             entries = [{
1063                 '_type': 'url',
1064                 'url': smuggle_url(bc_url, {'Referer': url}),
1065                 'ie_key': 'Brightcove'
1066             } for bc_url in bc_urls]
1067
1068             return {
1069                 '_type': 'playlist',
1070                 'title': video_title,
1071                 'id': video_id,
1072                 'entries': entries,
1073             }
1074
1075         # Look for embedded rtl.nl player
1076         matches = re.findall(
1077             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1078             webpage)
1079         if matches:
1080             return _playlist_from_matches(matches, ie='RtlNl')
1081
1082         # Look for embedded (iframe) Vimeo player
1083         mobj = re.search(
1084             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
1085         if mobj:
1086             player_url = unescapeHTML(mobj.group('url'))
1087             surl = smuggle_url(player_url, {'Referer': url})
1088             return self.url_result(surl)
1089         # Look for embedded (swf embed) Vimeo player
1090         mobj = re.search(
1091             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1092         if mobj:
1093             return self.url_result(mobj.group(1))
1094
1095         # Look for embedded YouTube player
1096         matches = re.findall(r'''(?x)
1097             (?:
1098                 <iframe[^>]+?src=|
1099                 data-video-url=|
1100                 <embed[^>]+?src=|
1101                 embedSWF\(?:\s*|
1102                 new\s+SWFObject\(
1103             )
1104             (["\'])
1105                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1106                 (?:embed|v|p)/.+?)
1107             \1''', webpage)
1108         if matches:
1109             return _playlist_from_matches(
1110                 matches, lambda m: unescapeHTML(m[1]))
1111
1112         # Look for lazyYT YouTube embed
1113         matches = re.findall(
1114             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1115         if matches:
1116             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1117
1118         # Look for embedded Dailymotion player
1119         matches = re.findall(
1120             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1121         if matches:
1122             return _playlist_from_matches(
1123                 matches, lambda m: unescapeHTML(m[1]))
1124
1125         # Look for embedded Dailymotion playlist player (#3822)
1126         m = re.search(
1127             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1128         if m:
1129             playlists = re.findall(
1130                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1131             if playlists:
1132                 return _playlist_from_matches(
1133                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1134
1135         # Look for embedded Wistia player
1136         match = re.search(
1137             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1138         if match:
1139             embed_url = self._proto_relative_url(
1140                 unescapeHTML(match.group('url')))
1141             return {
1142                 '_type': 'url_transparent',
1143                 'url': embed_url,
1144                 'ie_key': 'Wistia',
1145                 'uploader': video_uploader,
1146                 'title': video_title,
1147                 'id': video_id,
1148             }
1149
1150         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1151         if match:
1152             return {
1153                 '_type': 'url_transparent',
1154                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1155                 'ie_key': 'Wistia',
1156                 'uploader': video_uploader,
1157                 'title': video_title,
1158                 'id': match.group('id')
1159             }
1160
1161         # Look for embedded blip.tv player
1162         bliptv_url = BlipTVIE._extract_url(webpage)
1163         if bliptv_url:
1164             return self.url_result(bliptv_url, 'BlipTV')
1165
1166         # Look for SVT player
1167         svt_url = SVTIE._extract_url(webpage)
1168         if svt_url:
1169             return self.url_result(svt_url, 'SVT')
1170
1171         # Look for embedded condenast player
1172         matches = re.findall(
1173             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1174             webpage)
1175         if matches:
1176             return {
1177                 '_type': 'playlist',
1178                 'entries': [{
1179                     '_type': 'url',
1180                     'ie_key': 'CondeNast',
1181                     'url': ma,
1182                 } for ma in matches],
1183                 'title': video_title,
1184                 'id': video_id,
1185             }
1186
1187         # Look for Bandcamp pages with custom domain
1188         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1189         if mobj is not None:
1190             burl = unescapeHTML(mobj.group(1))
1191             # Don't set the extractor because it can be a track url or an album
1192             return self.url_result(burl)
1193
1194         # Look for embedded Vevo player
1195         mobj = re.search(
1196             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1197         if mobj is not None:
1198             return self.url_result(mobj.group('url'))
1199
1200         # Look for embedded Viddler player
1201         mobj = re.search(
1202             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1203             webpage)
1204         if mobj is not None:
1205             return self.url_result(mobj.group('url'))
1206
1207         # Look for NYTimes player
1208         mobj = re.search(
1209             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1210             webpage)
1211         if mobj is not None:
1212             return self.url_result(mobj.group('url'))
1213
1214         # Look for Libsyn player
1215         mobj = re.search(
1216             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1217         if mobj is not None:
1218             return self.url_result(mobj.group('url'))
1219
1220         # Look for Ooyala videos
1221         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1222                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1223                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1224                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1225         if mobj is not None:
1226             return OoyalaIE._build_url_result(mobj.group('ec'))
1227
1228         # Look for multiple Ooyala embeds on SBN network websites
1229         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1230         if mobj is not None:
1231             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1232             if embeds:
1233                 return _playlist_from_matches(
1234                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1235
1236         # Look for Aparat videos
1237         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1238         if mobj is not None:
1239             return self.url_result(mobj.group(1), 'Aparat')
1240
1241         # Look for MPORA videos
1242         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1243         if mobj is not None:
1244             return self.url_result(mobj.group(1), 'Mpora')
1245
1246         # Look for embedded NovaMov-based player
1247         mobj = re.search(
1248             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1249                     (?P<url>http://(?:(?:embed|www)\.)?
1250                         (?:novamov\.com|
1251                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1252                            videoweed\.(?:es|com)|
1253                            movshare\.(?:net|sx|ag)|
1254                            divxstage\.(?:eu|net|ch|co|at|ag))
1255                         /embed\.php.+?)\1''', webpage)
1256         if mobj is not None:
1257             return self.url_result(mobj.group('url'))
1258
1259         # Look for embedded Facebook player
1260         mobj = re.search(
1261             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1262         if mobj is not None:
1263             return self.url_result(mobj.group('url'), 'Facebook')
1264
1265         # Look for embedded VK player
1266         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1267         if mobj is not None:
1268             return self.url_result(mobj.group('url'), 'VK')
1269
1270         # Look for embedded ivi player
1271         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1272         if mobj is not None:
1273             return self.url_result(mobj.group('url'), 'Ivi')
1274
1275         # Look for embedded Huffington Post player
1276         mobj = re.search(
1277             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1278         if mobj is not None:
1279             return self.url_result(mobj.group('url'), 'HuffPost')
1280
1281         # Look for embed.ly
1282         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1283         if mobj is not None:
1284             return self.url_result(mobj.group('url'))
1285         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1286         if mobj is not None:
1287             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1288
1289         # Look for funnyordie embed
1290         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1291         if matches:
1292             return _playlist_from_matches(
1293                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1294
1295         # Look for BBC iPlayer embed
1296         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1297         if matches:
1298             return _playlist_from_matches(matches, ie='BBCCoUk')
1299
1300         # Look for embedded RUTV player
1301         rutv_url = RUTVIE._extract_url(webpage)
1302         if rutv_url:
1303             return self.url_result(rutv_url, 'RUTV')
1304
1305         # Look for embedded TVC player
1306         rutv_url = TVCIE._extract_url(webpage)
1307         if rutv_url:
1308             return self.url_result(rutv_url, 'TVC')
1309
1310         # Look for embedded SportBox player
1311         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1312         if sportbox_urls:
1313             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1314
1315         # Look for embedded TED player
1316         mobj = re.search(
1317             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1318         if mobj is not None:
1319             return self.url_result(mobj.group('url'), 'TED')
1320
1321         # Look for embedded Ustream videos
1322         mobj = re.search(
1323             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1324         if mobj is not None:
1325             return self.url_result(mobj.group('url'), 'Ustream')
1326
1327         # Look for embedded arte.tv player
1328         mobj = re.search(
1329             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1330             webpage)
1331         if mobj is not None:
1332             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1333
1334         # Look for embedded smotri.com player
1335         smotri_url = SmotriIE._extract_url(webpage)
1336         if smotri_url:
1337             return self.url_result(smotri_url, 'Smotri')
1338
1339         # Look for embeded soundcloud player
1340         mobj = re.search(
1341             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1342             webpage)
1343         if mobj is not None:
1344             url = unescapeHTML(mobj.group('url'))
1345             return self.url_result(url)
1346
1347         # Look for embedded vulture.com player
1348         mobj = re.search(
1349             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1350             webpage)
1351         if mobj is not None:
1352             url = unescapeHTML(mobj.group('url'))
1353             return self.url_result(url, ie='Vulture')
1354
1355         # Look for embedded mtvservices player
1356         mobj = re.search(
1357             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1358             webpage)
1359         if mobj is not None:
1360             url = unescapeHTML(mobj.group('url'))
1361             return self.url_result(url, ie='MTVServicesEmbedded')
1362
1363         # Look for embedded yahoo player
1364         mobj = re.search(
1365             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1366             webpage)
1367         if mobj is not None:
1368             return self.url_result(mobj.group('url'), 'Yahoo')
1369
1370         # Look for embedded sbs.com.au player
1371         mobj = re.search(
1372             r'''(?x)
1373             (?:
1374                 <meta\s+property="og:video"\s+content=|
1375                 <iframe[^>]+?src=
1376             )
1377             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1378             webpage)
1379         if mobj is not None:
1380             return self.url_result(mobj.group('url'), 'SBS')
1381
1382         # Look for embedded Cinchcast player
1383         mobj = re.search(
1384             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1385             webpage)
1386         if mobj is not None:
1387             return self.url_result(mobj.group('url'), 'Cinchcast')
1388
1389         mobj = re.search(
1390             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1391             webpage)
1392         if not mobj:
1393             mobj = re.search(
1394                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1395                 webpage)
1396         if mobj is not None:
1397             return self.url_result(mobj.group('url'), 'MLB')
1398
1399         mobj = re.search(
1400             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1401             webpage)
1402         if mobj is not None:
1403             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1404
1405         mobj = re.search(
1406             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1407             webpage)
1408         if mobj is not None:
1409             return self.url_result(mobj.group('url'), 'Livestream')
1410
1411         # Look for Zapiks embed
1412         mobj = re.search(
1413             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1414         if mobj is not None:
1415             return self.url_result(mobj.group('url'), 'Zapiks')
1416
1417         # Look for Kaltura embeds
1418         mobj = re.search(
1419             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1420         if mobj is not None:
1421             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1422
1423         # Look for Eagle.Platform embeds
1424         mobj = re.search(
1425             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1426         if mobj is not None:
1427             return self.url_result(mobj.group('url'), 'EaglePlatform')
1428
1429         # Look for ClipYou (uses Eagle.Platform) embeds
1430         mobj = re.search(
1431             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1432         if mobj is not None:
1433             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1434
1435         # Look for Pladform embeds
1436         mobj = re.search(
1437             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1438         if mobj is not None:
1439             return self.url_result(mobj.group('url'), 'Pladform')
1440
1441         # Look for Playwire embeds
1442         mobj = re.search(
1443             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1444         if mobj is not None:
1445             return self.url_result(mobj.group('url'))
1446
1447         # Look for 5min embeds
1448         mobj = re.search(
1449             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1450         if mobj is not None:
1451             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1452
1453         # Look for Crooks and Liars embeds
1454         mobj = re.search(
1455             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1456         if mobj is not None:
1457             return self.url_result(mobj.group('url'))
1458
1459         # Look for NBC Sports VPlayer embeds
1460         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1461         if nbc_sports_url:
1462             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1463
1464         # Look for UDN embeds
1465         mobj = re.search(
1466             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1467         if mobj is not None:
1468             return self.url_result(
1469                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1470
1471         # Look for Senate ISVP iframe
1472         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1473         if senate_isvp_url:
1474             return self.url_result(senate_isvp_url, 'SenateISVP')
1475
1476         def check_video(vurl):
1477             if YoutubeIE.suitable(vurl):
1478                 return True
1479             vpath = compat_urlparse.urlparse(vurl).path
1480             vext = determine_ext(vpath)
1481             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1482
1483         def filter_video(urls):
1484             return list(filter(check_video, urls))
1485
1486         # Start with something easy: JW Player in SWFObject
1487         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1488         if not found:
1489             # Look for gorilla-vid style embedding
1490             found = filter_video(re.findall(r'''(?sx)
1491                 (?:
1492                     jw_plugins|
1493                     JWPlayerOptions|
1494                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1495                 )
1496                 .*?
1497                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1498         if not found:
1499             # Broaden the search a little bit
1500             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1501         if not found:
1502             # Broaden the findall a little bit: JWPlayer JS loader
1503             found = filter_video(re.findall(
1504                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1505         if not found:
1506             # Flow player
1507             found = filter_video(re.findall(r'''(?xs)
1508                 flowplayer\("[^"]+",\s*
1509                     \{[^}]+?\}\s*,
1510                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1511                         ["']?url["']?\s*:\s*["']([^"']+)["']
1512             ''', webpage))
1513         if not found:
1514             # Cinerama player
1515             found = re.findall(
1516                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1517         if not found:
1518             # Try to find twitter cards info
1519             found = filter_video(re.findall(
1520                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1521         if not found:
1522             # We look for Open Graph info:
1523             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1524             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1525             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1526             if m_video_type is not None:
1527                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1528         if not found:
1529             # HTML5 video
1530             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1531         if not found:
1532             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1533             found = re.search(
1534                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1535                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1536                 webpage)
1537             if not found:
1538                 # Look also in Refresh HTTP header
1539                 refresh_header = head_response.headers.get('Refresh')
1540                 if refresh_header:
1541                     found = re.search(REDIRECT_REGEX, refresh_header)
1542             if found:
1543                 new_url = compat_urlparse.urljoin(url, found.group(1))
1544                 self.report_following_redirect(new_url)
1545                 return {
1546                     '_type': 'url',
1547                     'url': new_url,
1548                 }
1549         if not found:
1550             raise UnsupportedError(url)
1551
1552         entries = []
1553         for video_url in found:
1554             video_url = compat_urlparse.urljoin(url, video_url)
1555             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1556
1557             # Sometimes, jwplayer extraction will result in a YouTube URL
1558             if YoutubeIE.suitable(video_url):
1559                 entries.append(self.url_result(video_url, 'Youtube'))
1560                 continue
1561
1562             # here's a fun little line of code for you:
1563             video_id = os.path.splitext(video_id)[0]
1564
1565             if determine_ext(video_url) == 'smil':
1566                 entries.append({
1567                     'id': video_id,
1568                     'formats': self._extract_smil_formats(video_url, video_id),
1569                     'uploader': video_uploader,
1570                     'title': video_title,
1571                     'age_limit': age_limit,
1572                 })
1573             else:
1574                 entries.append({
1575                     'id': video_id,
1576                     'url': video_url,
1577                     'uploader': video_uploader,
1578                     'title': video_title,
1579                     'age_limit': age_limit,
1580                 })
1581
1582         if len(entries) == 1:
1583             return entries[0]
1584         else:
1585             for num, e in enumerate(entries, start=1):
1586                 # 'url' results don't have a title
1587                 if e.get('title') is not None:
1588                     e['title'] = '%s (%d)' % (e['title'], num)
1589             return {
1590                 '_type': 'playlist',
1591                 'entries': entries,
1592             }