git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse,
  12     compat_urlparse,
  13     compat_xml_parse_error,
  14 )
  15 from ..utils import (
  16     determine_ext,
  17     ExtractorError,
  18     float_or_none,
  19     HEADRequest,
  20     is_html,
  21     orderedSet,
  22     parse_xml,
  23     smuggle_url,
  24     unescapeHTML,
  25     unified_strdate,
  26     unsmuggle_url,
  27     UnsupportedError,
  28     url_basename,
  29     xpath_text,
  30 )
  31 from .brightcove import BrightcoveIE
  32 from .nbc import NBCSportsVPlayerIE
  33 from .ooyala import OoyalaIE
  34 from .rutv import RUTVIE
  35 from .smotri import SmotriIE
  36 from .condenast import CondeNastIE
  37 from .udn import UDNEmbedIE
  38 from .senateisvp import SenateISVPIE
  39 from .bliptv import BlipTVIE
  40 from .svt import SVTIE
  41
  42
  43 class GenericIE(InfoExtractor):
  44     IE_DESC = 'Generic downloader that works on some sites'
  45     _VALID_URL = r'.*'
  46     IE_NAME = 'generic'
  47     _TESTS = [
  48         {
  49             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
  50             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
  51             'info_dict': {
  52                 'id': '13601338388002',
  53                 'ext': 'mp4',
  54                 'uploader': 'www.hodiho.fr',
  55                 'title': 'R\u00e9gis plante sa Jeep',
  56             }
  57         },
  58         # bandcamp page with custom domain
  59         {
  60             'add_ie': ['Bandcamp'],
  61             'url': 'http://bronyrock.com/track/the-pony-mash',
  62             'info_dict': {
  63                 'id': '3235767654',
  64                 'ext': 'mp3',
  65                 'title': 'The Pony Mash',
  66                 'uploader': 'M_Pallante',
  67             },
  68             'skip': 'There is a limit of 200 free downloads / month for the test song',
  69         },
  70         # embedded brightcove video
  71         # it also tests brightcove videos that need to set the 'Referer' in the
  72         # http requests
  73         {
  74             'add_ie': ['Brightcove'],
  75             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
  76             'info_dict': {
  77                 'id': '2765128793001',
  78                 'ext': 'mp4',
  79                 'title': 'Le cours de bourse : l’analyse technique',
  80                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
  81                 'uploader': 'BFM BUSINESS',
  82             },
  83             'params': {
  84                 'skip_download': True,
  85             },
  86         },
  87         {
  88             # https://github.com/rg3/youtube-dl/issues/2253
  89             'url': 'http://bcove.me/i6nfkrc3',
  90             'md5': '0ba9446db037002366bab3b3eb30c88c',
  91             'info_dict': {
  92                 'id': '3101154703001',
  93                 'ext': 'mp4',
  94                 'title': 'Still no power',
  95                 'uploader': 'thestar.com',
  96                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
  97             },
  98             'add_ie': ['Brightcove'],
  99         },
 100         {
 101             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 102             'md5': 'fb973ecf6e4a78a67453647444222983',
 103             'info_dict': {
 104                 'id': '3414141473001',
 105                 'ext': 'mp4',
 106                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 107                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 108                 'uploader': 'Championat',
 109             },
 110         },
 111         {
 112             # https://github.com/rg3/youtube-dl/issues/3541
 113             'add_ie': ['Brightcove'],
 114             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 115             'info_dict': {
 116                 'id': '3866516442001',
 117                 'ext': 'mp4',
 118                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 119                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 120                 'uploader': 'SBS Broadcasting',
 121             },
 122             'skip': 'Restricted to Netherlands',
 123             'params': {
 124                 'skip_download': True,  # m3u8 download
 125             },
 126         },
 127         # Direct link to a video
 128         {
 129             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
 130             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
 131             'info_dict': {
 132                 'id': 'trailer',
 133                 'ext': 'mp4',
 134                 'title': 'trailer',
 135                 'upload_date': '20100513',
 136             }
 137         },
 138         # ooyala video
 139         {
 140             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 141             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 142             'info_dict': {
 143                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 144                 'ext': 'mp4',
 145                 'title': '2cc213299525360.mov',  # that's what we get
 146             },
 147             'add_ie': ['Ooyala'],
 148         },
 149         # multiple ooyala embeds on SBN network websites
 150         {
 151             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 152             'info_dict': {
 153                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 154                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 155             },
 156             'playlist_mincount': 3,
 157             'params': {
 158                 'skip_download': True,
 159             },
 160             'add_ie': ['Ooyala'],
 161         },
 162         # google redirect
 163         {
 164             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 165             'info_dict': {
 166                 'id': 'cmQHVoWB5FY',
 167                 'ext': 'mp4',
 168                 'upload_date': '20130224',
 169                 'uploader_id': 'TheVerge',
 170                 'description': 're:^Chris Ziegler takes a look at the\.*',
 171                 'uploader': 'The Verge',
 172                 'title': 'First Firefox OS phones side-by-side',
 173             },
 174             'params': {
 175                 'skip_download': False,
 176             }
 177         },
 178         # embed.ly video
 179         {
 180             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 181             'info_dict': {
 182                 'id': '9ODmcdjQcHQ',
 183                 'ext': 'mp4',
 184                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 185                 'upload_date': '20140225',
 186                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 187                 'uploader': 'Tested',
 188                 'uploader_id': 'testedcom',
 189             },
 190             # No need to test YoutubeIE here
 191             'params': {
 192                 'skip_download': True,
 193             },
 194         },
 195         # funnyordie embed
 196         {
 197             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 198             'info_dict': {
 199                 'id': '18e820ec3f',
 200                 'ext': 'mp4',
 201                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 202                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 203             },
 204         },
 205         # BBC iPlayer embeds
 206         {
 207             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 208             'info_dict': {
 209                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 210             },
 211             'playlist_mincount': 18,
 212         },
 213         # RUTV embed
 214         {
 215             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 216             'info_dict': {
 217                 'id': '776940',
 218                 'ext': 'mp4',
 219                 'title': 'Охотское море стало целиком российским',
 220                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 221             },
 222             'params': {
 223                 # m3u8 download
 224                 'skip_download': True,
 225             },
 226         },
 227         # Embedded TED video
 228         {
 229             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 230             'md5': '65fdff94098e4a607385a60c5177c638',
 231             'info_dict': {
 232                 'id': '1969',
 233                 'ext': 'mp4',
 234                 'title': 'Hidden miracles of the natural world',
 235                 'uploader': 'Louie Schwartzberg',
 236                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 237             }
 238         },
 239         # Embeded Ustream video
 240         {
 241             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 242             'md5': '27b99cdb639c9b12a79bca876a073417',
 243             'info_dict': {
 244                 'id': '45734260',
 245                 'ext': 'flv',
 246                 'uploader': 'AU SPA:  The NSA and Privacy',
 247                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 248             }
 249         },
 250         # nowvideo embed hidden behind percent encoding
 251         {
 252             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 253             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 254             'info_dict': {
 255                 'id': '06e53103ca9aa',
 256                 'ext': 'flv',
 257                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 258                 'description': 'No description',
 259             },
 260         },
 261         # arte embed
 262         {
 263             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 264             'md5': '7653032cbb25bf6c80d80f217055fa43',
 265             'info_dict': {
 266                 'id': '048195-004_PLUS7-F',
 267                 'ext': 'flv',
 268                 'title': 'X:enius',
 269                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 270                 'upload_date': '20140320',
 271             },
 272             'params': {
 273                 'skip_download': 'Requires rtmpdump'
 274             }
 275         },
 276         # Condé Nast embed
 277         {
 278             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 279             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 280             'info_dict': {
 281                 'id': '53501be369702d3275860000',
 282                 'ext': 'mp4',
 283                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 284             }
 285         },
 286         # Dailymotion embed
 287         {
 288             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 289             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 290             'info_dict': {
 291                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 292                 'ext': 'mp4',
 293                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 294                 'uploader': 'Spi0n',
 295             },
 296             'add_ie': ['Dailymotion'],
 297         },
 298         # YouTube embed
 299         {
 300             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 301             'info_dict': {
 302                 'id': 'FXRb4ykk4S0',
 303                 'ext': 'mp4',
 304                 'title': 'The NBL Auction 2014',
 305                 'uploader': 'BADMINTON England',
 306                 'uploader_id': 'BADMINTONEvents',
 307                 'upload_date': '20140603',
 308                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 309             },
 310             'add_ie': ['Youtube'],
 311             'params': {
 312                 'skip_download': True,
 313             }
 314         },
 315         # MTVSercices embed
 316         {
 317             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 318             'md5': '35727f82f58c76d996fc188f9755b0d5',
 319             'info_dict': {
 320                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 321                 'ext': 'mp4',
 322                 'title': 'Review',
 323                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 324             },
 325         },
 326         # YouTube embed via <data-embed-url="">
 327         {
 328             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 329             'info_dict': {
 330                 'id': '4vAffPZIT44',
 331                 'ext': 'mp4',
 332                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 333                 'uploader': 'Gameloft',
 334                 'uploader_id': 'gameloft',
 335                 'upload_date': '20140828',
 336                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 337             },
 338             'params': {
 339                 'skip_download': True,
 340             }
 341         },
 342         # Camtasia studio
 343         {
 344             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 345             'playlist': [{
 346                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 347                 'info_dict': {
 348                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 349                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 350                     'ext': 'flv',
 351                     'duration': 2235.90,
 352                 }
 353             }, {
 354                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 355                 'info_dict': {
 356                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 357                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 358                     'ext': 'flv',
 359                     'duration': 2235.93,
 360                 }
 361             }],
 362             'info_dict': {
 363                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 364             }
 365         },
 366         # Flowplayer
 367         {
 368             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 369             'md5': '9d65602bf31c6e20014319c7d07fba27',
 370             'info_dict': {
 371                 'id': '5123ea6d5e5a7',
 372                 'ext': 'mp4',
 373                 'age_limit': 18,
 374                 'uploader': 'www.handjobhub.com',
 375                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 376             }
 377         },
 378         # RSS feed
 379         {
 380             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 381             'info_dict': {
 382                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 383                 'title': 'Zero Punctuation',
 384                 'description': 're:.*groundbreaking video review series.*'
 385             },
 386             'playlist_mincount': 11,
 387         },
 388         # Multiple brightcove videos
 389         # https://github.com/rg3/youtube-dl/issues/2283
 390         {
 391             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 392             'info_dict': {
 393                 'id': 'always-never',
 394                 'title': 'Always / Never - The New Yorker',
 395             },
 396             'playlist_count': 3,
 397             'params': {
 398                 'extract_flat': False,
 399                 'skip_download': True,
 400             }
 401         },
 402         # MLB embed
 403         {
 404             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 405             'md5': '96f09a37e44da40dd083e12d9a683327',
 406             'info_dict': {
 407                 'id': '33322633',
 408                 'ext': 'mp4',
 409                 'title': 'Ump changes call to ball',
 410                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 411                 'duration': 48,
 412                 'timestamp': 1401537900,
 413                 'upload_date': '20140531',
 414                 'thumbnail': 're:^https?://.*\.jpg$',
 415             },
 416         },
 417         # MLB articles
 418         {
 419             'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
 420             'md5': 'b190e70141fb9a1552a85426b4da1b5d',
 421             'info_dict': {
 422                 'id': '75609783',
 423                 'ext': 'mp4',
 424                 'title': 'Must C: Pillar climbs for catch',
 425                 'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
 426                 'timestamp': 1429124820,
 427                 'upload_date': '20150415',
 428             }
 429         },
 430         # Wistia embed
 431         {
 432             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 433             'md5': '8788b683c777a5cf25621eaf286d0c23',
 434             'info_dict': {
 435                 'id': '1cfaf6b7ea',
 436                 'ext': 'mov',
 437                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 438                 'duration': 643.0,
 439                 'filesize': 182808282,
 440                 'uploader': 'education-portal.com',
 441             },
 442         },
 443         {
 444             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 445             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 446             'info_dict': {
 447                 'id': 'uxjb0lwrcz',
 448                 'ext': 'mp4',
 449                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 450                 'duration': 1715.0,
 451                 'uploader': 'thoughtworks.wistia.com',
 452             },
 453         },
 454         # Direct download with broken HEAD
 455         {
 456             'url': 'http://ai-radio.org:8000/radio.opus',
 457             'info_dict': {
 458                 'id': 'radio',
 459                 'ext': 'opus',
 460                 'title': 'radio',
 461             },
 462             'params': {
 463                 'skip_download': True,  # infinite live stream
 464             },
 465             'expected_warnings': [
 466                 r'501.*Not Implemented'
 467             ],
 468         },
 469         # Soundcloud embed
 470         {
 471             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 472             'info_dict': {
 473                 'id': '174391317',
 474                 'ext': 'mp3',
 475                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 476                 'uploader': 'Sophos Security',
 477                 'title': 'Chet Chat 171 - Oct 29, 2014',
 478                 'upload_date': '20141029',
 479             }
 480         },
 481         # Livestream embed
 482         {
 483             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 484             'info_dict': {
 485                 'id': '67864563',
 486                 'ext': 'flv',
 487                 'upload_date': '20141112',
 488                 'title': 'Rosetta #CometLanding webcast HL 10',
 489             }
 490         },
 491         # LazyYT
 492         {
 493             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 494             'info_dict': {
 495                 'id': '1986',
 496                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 497             },
 498             'playlist_mincount': 2,
 499         },
 500         # Direct link with incorrect MIME type
 501         {
 502             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 503             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 504             'info_dict': {
 505                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 506                 'id': '5_Lennart_Poettering_-_Systemd',
 507                 'ext': 'webm',
 508                 'title': '5_Lennart_Poettering_-_Systemd',
 509                 'upload_date': '20141120',
 510             },
 511             'expected_warnings': [
 512                 'URL could be a direct video link, returning it as such.'
 513             ]
 514         },
 515         # Cinchcast embed
 516         {
 517             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 518             'info_dict': {
 519                 'id': '7141703',
 520                 'ext': 'mp3',
 521                 'upload_date': '20141126',
 522                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 523             }
 524         },
 525         # Cinerama player
 526         {
 527             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 528             'info_dict': {
 529                 'id': '730m_DandD_1901_512k',
 530                 'ext': 'mp4',
 531                 'uploader': 'www.abc.net.au',
 532                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 533             }
 534         },
 535         # embedded viddler video
 536         {
 537             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 538             'info_dict': {
 539                 'id': '4d03aad9',
 540                 'ext': 'mp4',
 541                 'uploader': 'deadspin',
 542                 'title': 'WALL-TO-GORTAT',
 543                 'timestamp': 1422285291,
 544                 'upload_date': '20150126',
 545             },
 546             'add_ie': ['Viddler'],
 547         },
 548         # Libsyn embed
 549         {
 550             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 551             'info_dict': {
 552                 'id': '3377616',
 553                 'ext': 'mp3',
 554                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 555                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 556                 'upload_date': '20150220',
 557             },
 558         },
 559         # jwplayer YouTube
 560         {
 561             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 562             'info_dict': {
 563                 'id': 'Mrj4DVp2zeA',
 564                 'ext': 'mp4',
 565                 'upload_date': '20150212',
 566                 'uploader': 'The National Archives UK',
 567                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 568                 'uploader_id': 'NationalArchives08',
 569                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 570             },
 571         },
 572         # rtl.nl embed
 573         {
 574             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 575             'playlist_mincount': 5,
 576             'info_dict': {
 577                 'id': 'aanslagen-kopenhagen',
 578                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 579             }
 580         },
 581         # Zapiks embed
 582         {
 583             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 584             'info_dict': {
 585                 'id': '118046',
 586                 'ext': 'mp4',
 587                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 588             }
 589         },
 590         # Kaltura embed
 591         {
 592             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 593             'info_dict': {
 594                 'id': '1_eergr3h1',
 595                 'ext': 'mp4',
 596                 'upload_date': '20150226',
 597                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 598                 'timestamp': int,
 599                 'title': 'John Carlson Postgame 2/25/15',
 600             },
 601         },
 602         # Eagle.Platform embed (generic URL)
 603         {
 604             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 605             'info_dict': {
 606                 'id': '227304',
 607                 'ext': 'mp4',
 608                 'title': 'Навальный вышел на свободу',
 609                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 610                 'thumbnail': 're:^https?://.*\.jpg$',
 611                 'duration': 87,
 612                 'view_count': int,
 613                 'age_limit': 0,
 614             },
 615         },
 616         # ClipYou (Eagle.Platform) embed (custom URL)
 617         {
 618             'url': 'http://muz-tv.ru/play/7129/',
 619             'info_dict': {
 620                 'id': '12820',
 621                 'ext': 'mp4',
 622                 'title': "'O Sole Mio",
 623                 'thumbnail': 're:^https?://.*\.jpg$',
 624                 'duration': 216,
 625                 'view_count': int,
 626             },
 627         },
 628         # Pladform embed
 629         {
 630             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 631             'info_dict': {
 632                 'id': '100183293',
 633                 'ext': 'mp4',
 634                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 635                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 636                 'thumbnail': 're:^https?://.*\.jpg$',
 637                 'duration': 694,
 638                 'age_limit': 0,
 639             },
 640         },
 641         # Playwire embed
 642         {
 643             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 644             'info_dict': {
 645                 'id': '3519514',
 646                 'ext': 'mp4',
 647                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 648                 'thumbnail': 're:^https?://.*\.png$',
 649                 'duration': 45.115,
 650             },
 651         },
 652         # 5min embed
 653         {
 654             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 655             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 656             'info_dict': {
 657                 'id': '518726732',
 658                 'ext': 'mp4',
 659                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 660             },
 661         },
 662         # SVT embed
 663         {
 664             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 665             'info_dict': {
 666                 'id': '2900353',
 667                 'ext': 'flv',
 668                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 669                 'duration': 27,
 670                 'age_limit': 0,
 671             },
 672         },
 673         # RSS feed with enclosure
 674         {
 675             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 676             'info_dict': {
 677                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 678                 'ext': 'm4v',
 679                 'upload_date': '20150228',
 680                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 681             }
 682         },
 683         # Crooks and Liars embed
 684         {
 685             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 686             'info_dict': {
 687                 'id': '8RUoRhRi',
 688                 'ext': 'mp4',
 689                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 690                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 691                 'timestamp': 1428207000,
 692                 'upload_date': '20150405',
 693                 'uploader': 'Heather',
 694             },
 695         },
 696         # Crooks and Liars external embed
 697         {
 698             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 699             'info_dict': {
 700                 'id': 'MTE3MjUtMzQ2MzA',
 701                 'ext': 'mp4',
 702                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 703                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 704                 'timestamp': 1265032391,
 705                 'upload_date': '20100201',
 706                 'uploader': 'Heather',
 707             },
 708         },
 709         # NBC Sports vplayer embed
 710         {
 711             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 712             'info_dict': {
 713                 'id': 'ln7x1qSThw4k',
 714                 'ext': 'flv',
 715                 'title': "PFT Live: New leader in the 'new-look' defense",
 716                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 717             },
 718         },
 719         # UDN embed
 720         {
 721             'url': 'http://www.udn.com/news/story/7314/822787',
 722             'md5': 'fd2060e988c326991037b9aff9df21a6',
 723             'info_dict': {
 724                 'id': '300346',
 725                 'ext': 'mp4',
 726                 'title': '中一中男師變性 全校師生力挺',
 727                 'thumbnail': 're:^https?://.*\.jpg$',
 728             }
 729         },
 730         # Ooyala embed
 731         {
 732             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 733             'info_dict': {
 734                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 735                 'ext': 'mp4',
 736                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 737                 'title': 'This is what separates the Excel masters from the wannabes',
 738             },
 739             'params': {
 740                 # m3u8 downloads
 741                 'skip_download': True,
 742             }
 743         },
 744         # Contains a SMIL manifest
 745         {
 746             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 747             'info_dict': {
 748                 'id': 'file',
 749                 'ext': 'flv',
 750                 'title': '+ Football: Lottery Champions League Europe',
 751                 'uploader': 'www.telewebion.com',
 752             },
 753             'params': {
 754                 # rtmpe downloads
 755                 'skip_download': True,
 756             }
 757         }
 758     ]
 759
 760     def report_following_redirect(self, new_url):
 761         """Report information extraction."""
 762         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 763
 764     def _extract_rss(self, url, video_id, doc):
 765         playlist_title = doc.find('./channel/title').text
 766         playlist_desc_el = doc.find('./channel/description')
 767         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 768
 769         entries = []
 770         for it in doc.findall('./channel/item'):
 771             next_url = xpath_text(it, 'link', fatal=False)
 772             if not next_url:
 773                 enclosure_nodes = it.findall('./enclosure')
 774                 for e in enclosure_nodes:
 775                     next_url = e.attrib.get('url')
 776                     if next_url:
 777                         break
 778
 779             if not next_url:
 780                 continue
 781
 782             entries.append({
 783                 '_type': 'url',
 784                 'url': next_url,
 785                 'title': it.find('title').text,
 786             })
 787
 788         return {
 789             '_type': 'playlist',
 790             'id': url,
 791             'title': playlist_title,
 792             'description': playlist_desc,
 793             'entries': entries,
 794         }
 795
 796     def _extract_camtasia(self, url, video_id, webpage):
 797         """ Returns None if no camtasia video can be found. """
 798
 799         camtasia_cfg = self._search_regex(
 800             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 801             webpage, 'camtasia configuration file', default=None)
 802         if camtasia_cfg is None:
 803             return None
 804
 805         title = self._html_search_meta('DC.title', webpage, fatal=True)
 806
 807         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 808         camtasia_cfg = self._download_xml(
 809             camtasia_url, video_id,
 810             note='Downloading camtasia configuration',
 811             errnote='Failed to download camtasia configuration')
 812         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 813
 814         entries = []
 815         for n in fileset_node.getchildren():
 816             url_n = n.find('./uri')
 817             if url_n is None:
 818                 continue
 819
 820             entries.append({
 821                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 822                 'title': '%s - %s' % (title, n.tag),
 823                 'url': compat_urlparse.urljoin(url, url_n.text),
 824                 'duration': float_or_none(n.find('./duration').text),
 825             })
 826
 827         return {
 828             '_type': 'playlist',
 829             'entries': entries,
 830             'title': title,
 831         }
 832
 833     def _real_extract(self, url):
 834         if url.startswith('//'):
 835             return {
 836                 '_type': 'url',
 837                 'url': self.http_scheme() + url,
 838             }
 839
 840         parsed_url = compat_urlparse.urlparse(url)
 841         if not parsed_url.scheme:
 842             default_search = self._downloader.params.get('default_search')
 843             if default_search is None:
 844                 default_search = 'fixup_error'
 845
 846             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 847                 if '/' in url:
 848                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 849                     return self.url_result('http://' + url)
 850                 elif default_search != 'fixup_error':
 851                     if default_search == 'auto_warning':
 852                         if re.match(r'^(?:url|URL)$', url):
 853                             raise ExtractorError(
 854                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 855                                 expected=True)
 856                         else:
 857                             self._downloader.report_warning(
 858                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 859                     return self.url_result('ytsearch:' + url)
 860
 861             if default_search in ('error', 'fixup_error'):
 862                 raise ExtractorError(
 863                     '%r is not a valid URL. '
 864                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 865                     % (url, url), expected=True)
 866             else:
 867                 if ':' not in default_search:
 868                     default_search += ':'
 869                 return self.url_result(default_search + url)
 870
 871         url, smuggled_data = unsmuggle_url(url)
 872         force_videoid = None
 873         is_intentional = smuggled_data and smuggled_data.get('to_generic')
 874         if smuggled_data and 'force_videoid' in smuggled_data:
 875             force_videoid = smuggled_data['force_videoid']
 876             video_id = force_videoid
 877         else:
 878             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
 879
 880         self.to_screen('%s: Requesting header' % video_id)
 881
 882         head_req = HEADRequest(url)
 883         head_response = self._request_webpage(
 884             head_req, video_id,
 885             note=False, errnote='Could not send HEAD request to %s' % url,
 886             fatal=False)
 887
 888         if head_response is not False:
 889             # Check for redirect
 890             new_url = head_response.geturl()
 891             if url != new_url:
 892                 self.report_following_redirect(new_url)
 893                 if force_videoid:
 894                     new_url = smuggle_url(
 895                         new_url, {'force_videoid': force_videoid})
 896                 return self.url_result(new_url)
 897
 898         full_response = None
 899         if head_response is False:
 900             full_response = self._request_webpage(url, video_id)
 901             head_response = full_response
 902
 903         # Check for direct link to a video
 904         content_type = head_response.headers.get('Content-Type', '')
 905         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
 906         if m:
 907             upload_date = unified_strdate(
 908                 head_response.headers.get('Last-Modified'))
 909             return {
 910                 'id': video_id,
 911                 'title': os.path.splitext(url_basename(url))[0],
 912                 'direct': True,
 913                 'formats': [{
 914                     'format_id': m.group('format_id'),
 915                     'url': url,
 916                     'vcodec': 'none' if m.group('type') == 'audio' else None
 917                 }],
 918                 'upload_date': upload_date,
 919             }
 920
 921         if not self._downloader.params.get('test', False) and not is_intentional:
 922             self._downloader.report_warning('Falling back on generic information extractor.')
 923
 924         if not full_response:
 925             full_response = self._request_webpage(url, video_id)
 926
 927         # Maybe it's a direct link to a video?
 928         # Be careful not to download the whole thing!
 929         first_bytes = full_response.read(512)
 930         if not is_html(first_bytes):
 931             self._downloader.report_warning(
 932                 'URL could be a direct video link, returning it as such.')
 933             upload_date = unified_strdate(
 934                 head_response.headers.get('Last-Modified'))
 935             return {
 936                 'id': video_id,
 937                 'title': os.path.splitext(url_basename(url))[0],
 938                 'direct': True,
 939                 'url': url,
 940                 'upload_date': upload_date,
 941             }
 942
 943         webpage = self._webpage_read_content(
 944             full_response, url, video_id, prefix=first_bytes)
 945
 946         self.report_extraction(video_id)
 947
 948         # Is it an RSS feed?
 949         try:
 950             doc = parse_xml(webpage)
 951             if doc.tag == 'rss':
 952                 return self._extract_rss(url, video_id, doc)
 953         except compat_xml_parse_error:
 954             pass
 955
 956         # Is it a Camtasia project?
 957         camtasia_res = self._extract_camtasia(url, video_id, webpage)
 958         if camtasia_res is not None:
 959             return camtasia_res
 960
 961         # Sometimes embedded video player is hidden behind percent encoding
 962         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
 963         # Unescaping the whole page allows to handle those cases in a generic way
 964         webpage = compat_urllib_parse.unquote(webpage)
 965
 966         # it's tempting to parse this further, but you would
 967         # have to take into account all the variations like
 968         #   Video Title - Site Name
 969         #   Site Name | Video Title
 970         #   Video Title - Tagline | Site Name
 971         # and so on and so forth; it's just not practical
 972         video_title = self._html_search_regex(
 973             r'(?s)<title>(.*?)</title>', webpage, 'video title',
 974             default='video')
 975
 976         # Try to detect age limit automatically
 977         age_limit = self._rta_search(webpage)
 978         # And then there are the jokers who advertise that they use RTA,
 979         # but actually don't.
 980         AGE_LIMIT_MARKERS = [
 981             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
 982         ]
 983         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
 984             age_limit = 18
 985
 986         # video uploader is domain name
 987         video_uploader = self._search_regex(
 988             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 989
 990         # Helper method
 991         def _playlist_from_matches(matches, getter=None, ie=None):
 992             urlrs = orderedSet(
 993                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
 994                 for m in matches)
 995             return self.playlist_result(
 996                 urlrs, playlist_id=video_id, playlist_title=video_title)
 997
 998         # Look for BrightCove:
 999         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1000         if bc_urls:
1001             self.to_screen('Brightcove video detected.')
1002             entries = [{
1003                 '_type': 'url',
1004                 'url': smuggle_url(bc_url, {'Referer': url}),
1005                 'ie_key': 'Brightcove'
1006             } for bc_url in bc_urls]
1007
1008             return {
1009                 '_type': 'playlist',
1010                 'title': video_title,
1011                 'id': video_id,
1012                 'entries': entries,
1013             }
1014
1015         # Look for embedded rtl.nl player
1016         matches = re.findall(
1017             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
1018             webpage)
1019         if matches:
1020             return _playlist_from_matches(matches, ie='RtlNl')
1021
1022         # Look for embedded (iframe) Vimeo player
1023         mobj = re.search(
1024             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
1025         if mobj:
1026             player_url = unescapeHTML(mobj.group('url'))
1027             surl = smuggle_url(player_url, {'Referer': url})
1028             return self.url_result(surl)
1029         # Look for embedded (swf embed) Vimeo player
1030         mobj = re.search(
1031             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1032         if mobj:
1033             return self.url_result(mobj.group(1))
1034
1035         # Look for embedded YouTube player
1036         matches = re.findall(r'''(?x)
1037             (?:
1038                 <iframe[^>]+?src=|
1039                 data-video-url=|
1040                 <embed[^>]+?src=|
1041                 embedSWF\(?:\s*|
1042                 new\s+SWFObject\(
1043             )
1044             (["\'])
1045                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1046                 (?:embed|v|p)/.+?)
1047             \1''', webpage)
1048         if matches:
1049             return _playlist_from_matches(
1050                 matches, lambda m: unescapeHTML(m[1]))
1051
1052         # Look for lazyYT YouTube embed
1053         matches = re.findall(
1054             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1055         if matches:
1056             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1057
1058         # Look for embedded Dailymotion player
1059         matches = re.findall(
1060             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1061         if matches:
1062             return _playlist_from_matches(
1063                 matches, lambda m: unescapeHTML(m[1]))
1064
1065         # Look for embedded Dailymotion playlist player (#3822)
1066         m = re.search(
1067             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1068         if m:
1069             playlists = re.findall(
1070                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1071             if playlists:
1072                 return _playlist_from_matches(
1073                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1074
1075         # Look for embedded Wistia player
1076         match = re.search(
1077             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1078         if match:
1079             embed_url = self._proto_relative_url(
1080                 unescapeHTML(match.group('url')))
1081             return {
1082                 '_type': 'url_transparent',
1083                 'url': embed_url,
1084                 'ie_key': 'Wistia',
1085                 'uploader': video_uploader,
1086                 'title': video_title,
1087                 'id': video_id,
1088             }
1089
1090         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1091         if match:
1092             return {
1093                 '_type': 'url_transparent',
1094                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1095                 'ie_key': 'Wistia',
1096                 'uploader': video_uploader,
1097                 'title': video_title,
1098                 'id': match.group('id')
1099             }
1100
1101         # Look for embedded blip.tv player
1102         bliptv_url = BlipTVIE._extract_url(webpage)
1103         if bliptv_url:
1104             return self.url_result(bliptv_url, 'BlipTV')
1105
1106         # Look for SVT player
1107         svt_url = SVTIE._extract_url(webpage)
1108         if svt_url:
1109             return self.url_result(svt_url, 'SVT')
1110
1111         # Look for embedded condenast player
1112         matches = re.findall(
1113             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1114             webpage)
1115         if matches:
1116             return {
1117                 '_type': 'playlist',
1118                 'entries': [{
1119                     '_type': 'url',
1120                     'ie_key': 'CondeNast',
1121                     'url': ma,
1122                 } for ma in matches],
1123                 'title': video_title,
1124                 'id': video_id,
1125             }
1126
1127         # Look for Bandcamp pages with custom domain
1128         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1129         if mobj is not None:
1130             burl = unescapeHTML(mobj.group(1))
1131             # Don't set the extractor because it can be a track url or an album
1132             return self.url_result(burl)
1133
1134         # Look for embedded Vevo player
1135         mobj = re.search(
1136             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1137         if mobj is not None:
1138             return self.url_result(mobj.group('url'))
1139
1140         # Look for embedded Viddler player
1141         mobj = re.search(
1142             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1143             webpage)
1144         if mobj is not None:
1145             return self.url_result(mobj.group('url'))
1146
1147         # Look for NYTimes player
1148         mobj = re.search(
1149             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1150             webpage)
1151         if mobj is not None:
1152             return self.url_result(mobj.group('url'))
1153
1154         # Look for Libsyn player
1155         mobj = re.search(
1156             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1157         if mobj is not None:
1158             return self.url_result(mobj.group('url'))
1159
1160         # Look for Ooyala videos
1161         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1162                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1163                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1164                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1165         if mobj is not None:
1166             return OoyalaIE._build_url_result(mobj.group('ec'))
1167
1168         # Look for multiple Ooyala embeds on SBN network websites
1169         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1170         if mobj is not None:
1171             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1172             if embeds:
1173                 return _playlist_from_matches(
1174                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1175
1176         # Look for Aparat videos
1177         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1178         if mobj is not None:
1179             return self.url_result(mobj.group(1), 'Aparat')
1180
1181         # Look for MPORA videos
1182         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1183         if mobj is not None:
1184             return self.url_result(mobj.group(1), 'Mpora')
1185
1186         # Look for embedded NovaMov-based player
1187         mobj = re.search(
1188             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1189                     (?P<url>http://(?:(?:embed|www)\.)?
1190                         (?:novamov\.com|
1191                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1192                            videoweed\.(?:es|com)|
1193                            movshare\.(?:net|sx|ag)|
1194                            divxstage\.(?:eu|net|ch|co|at|ag))
1195                         /embed\.php.+?)\1''', webpage)
1196         if mobj is not None:
1197             return self.url_result(mobj.group('url'))
1198
1199         # Look for embedded Facebook player
1200         mobj = re.search(
1201             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1202         if mobj is not None:
1203             return self.url_result(mobj.group('url'), 'Facebook')
1204
1205         # Look for embedded VK player
1206         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1207         if mobj is not None:
1208             return self.url_result(mobj.group('url'), 'VK')
1209
1210         # Look for embedded ivi player
1211         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1212         if mobj is not None:
1213             return self.url_result(mobj.group('url'), 'Ivi')
1214
1215         # Look for embedded Huffington Post player
1216         mobj = re.search(
1217             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1218         if mobj is not None:
1219             return self.url_result(mobj.group('url'), 'HuffPost')
1220
1221         # Look for embed.ly
1222         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1223         if mobj is not None:
1224             return self.url_result(mobj.group('url'))
1225         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1226         if mobj is not None:
1227             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1228
1229         # Look for funnyordie embed
1230         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1231         if matches:
1232             return _playlist_from_matches(
1233                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1234
1235         # Look for BBC iPlayer embed
1236         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1237         if matches:
1238             return _playlist_from_matches(matches, ie='BBCCoUk')
1239
1240         # Look for embedded RUTV player
1241         rutv_url = RUTVIE._extract_url(webpage)
1242         if rutv_url:
1243             return self.url_result(rutv_url, 'RUTV')
1244
1245         # Look for embedded TED player
1246         mobj = re.search(
1247             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1248         if mobj is not None:
1249             return self.url_result(mobj.group('url'), 'TED')
1250
1251         # Look for embedded Ustream videos
1252         mobj = re.search(
1253             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1254         if mobj is not None:
1255             return self.url_result(mobj.group('url'), 'Ustream')
1256
1257         # Look for embedded arte.tv player
1258         mobj = re.search(
1259             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1260             webpage)
1261         if mobj is not None:
1262             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1263
1264         # Look for embedded smotri.com player
1265         smotri_url = SmotriIE._extract_url(webpage)
1266         if smotri_url:
1267             return self.url_result(smotri_url, 'Smotri')
1268
1269         # Look for embeded soundcloud player
1270         mobj = re.search(
1271             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1272             webpage)
1273         if mobj is not None:
1274             url = unescapeHTML(mobj.group('url'))
1275             return self.url_result(url)
1276
1277         # Look for embedded vulture.com player
1278         mobj = re.search(
1279             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1280             webpage)
1281         if mobj is not None:
1282             url = unescapeHTML(mobj.group('url'))
1283             return self.url_result(url, ie='Vulture')
1284
1285         # Look for embedded mtvservices player
1286         mobj = re.search(
1287             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1288             webpage)
1289         if mobj is not None:
1290             url = unescapeHTML(mobj.group('url'))
1291             return self.url_result(url, ie='MTVServicesEmbedded')
1292
1293         # Look for embedded yahoo player
1294         mobj = re.search(
1295             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1296             webpage)
1297         if mobj is not None:
1298             return self.url_result(mobj.group('url'), 'Yahoo')
1299
1300         # Look for embedded sbs.com.au player
1301         mobj = re.search(
1302             r'''(?x)
1303             (?:
1304                 <meta\s+property="og:video"\s+content=|
1305                 <iframe[^>]+?src=
1306             )
1307             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1308             webpage)
1309         if mobj is not None:
1310             return self.url_result(mobj.group('url'), 'SBS')
1311
1312         # Look for embedded Cinchcast player
1313         mobj = re.search(
1314             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1315             webpage)
1316         if mobj is not None:
1317             return self.url_result(mobj.group('url'), 'Cinchcast')
1318
1319         mobj = re.search(
1320             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1321             webpage)
1322         if not mobj:
1323             mobj = re.search(
1324                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1325                 webpage)
1326         if mobj is not None:
1327             return self.url_result(mobj.group('url'), 'MLB')
1328
1329         mobj = re.search(
1330             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1331             webpage)
1332         if mobj is not None:
1333             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1334
1335         mobj = re.search(
1336             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1337             webpage)
1338         if mobj is not None:
1339             return self.url_result(mobj.group('url'), 'Livestream')
1340
1341         # Look for Zapiks embed
1342         mobj = re.search(
1343             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1344         if mobj is not None:
1345             return self.url_result(mobj.group('url'), 'Zapiks')
1346
1347         # Look for Kaltura embeds
1348         mobj = re.search(
1349             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1350         if mobj is not None:
1351             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1352
1353         # Look for Eagle.Platform embeds
1354         mobj = re.search(
1355             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1356         if mobj is not None:
1357             return self.url_result(mobj.group('url'), 'EaglePlatform')
1358
1359         # Look for ClipYou (uses Eagle.Platform) embeds
1360         mobj = re.search(
1361             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1362         if mobj is not None:
1363             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1364
1365         # Look for Pladform embeds
1366         mobj = re.search(
1367             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1368         if mobj is not None:
1369             return self.url_result(mobj.group('url'), 'Pladform')
1370
1371         # Look for Playwire embeds
1372         mobj = re.search(
1373             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1374         if mobj is not None:
1375             return self.url_result(mobj.group('url'))
1376
1377         # Look for 5min embeds
1378         mobj = re.search(
1379             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1380         if mobj is not None:
1381             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1382
1383         # Look for Crooks and Liars embeds
1384         mobj = re.search(
1385             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1386         if mobj is not None:
1387             return self.url_result(mobj.group('url'))
1388
1389         # Look for NBC Sports VPlayer embeds
1390         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1391         if nbc_sports_url:
1392             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1393
1394         # Look for UDN embeds
1395         mobj = re.search(
1396             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1397         if mobj is not None:
1398             return self.url_result(
1399                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1400
1401         # Look for Senate ISVP iframe
1402         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1403         if senate_isvp_url:
1404             return self.url_result(surl, 'SenateISVP')
1405
1406         def check_video(vurl):
1407             if YoutubeIE.suitable(vurl):
1408                 return True
1409             vpath = compat_urlparse.urlparse(vurl).path
1410             vext = determine_ext(vpath)
1411             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1412
1413         def filter_video(urls):
1414             return list(filter(check_video, urls))
1415
1416         # Start with something easy: JW Player in SWFObject
1417         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1418         if not found:
1419             # Look for gorilla-vid style embedding
1420             found = filter_video(re.findall(r'''(?sx)
1421                 (?:
1422                     jw_plugins|
1423                     JWPlayerOptions|
1424                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1425                 )
1426                 .*?
1427                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1428         if not found:
1429             # Broaden the search a little bit
1430             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1431         if not found:
1432             # Broaden the findall a little bit: JWPlayer JS loader
1433             found = filter_video(re.findall(
1434                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1435         if not found:
1436             # Flow player
1437             found = filter_video(re.findall(r'''(?xs)
1438                 flowplayer\("[^"]+",\s*
1439                     \{[^}]+?\}\s*,
1440                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1441                         ["']?url["']?\s*:\s*["']([^"']+)["']
1442             ''', webpage))
1443         if not found:
1444             # Cinerama player
1445             found = re.findall(
1446                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1447         if not found:
1448             # Try to find twitter cards info
1449             found = filter_video(re.findall(
1450                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1451         if not found:
1452             # We look for Open Graph info:
1453             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1454             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1455             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1456             if m_video_type is not None:
1457                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1458         if not found:
1459             # HTML5 video
1460             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1461         if not found:
1462             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1463             found = re.search(
1464                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1465                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1466                 webpage)
1467             if not found:
1468                 # Look also in Refresh HTTP header
1469                 refresh_header = head_response.headers.get('Refresh')
1470                 if refresh_header:
1471                     found = re.search(REDIRECT_REGEX, refresh_header)
1472             if found:
1473                 new_url = compat_urlparse.urljoin(url, found.group(1))
1474                 self.report_following_redirect(new_url)
1475                 return {
1476                     '_type': 'url',
1477                     'url': new_url,
1478                 }
1479         if not found:
1480             raise UnsupportedError(url)
1481
1482         entries = []
1483         for video_url in found:
1484             video_url = compat_urlparse.urljoin(url, video_url)
1485             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1486
1487             # Sometimes, jwplayer extraction will result in a YouTube URL
1488             if YoutubeIE.suitable(video_url):
1489                 entries.append(self.url_result(video_url, 'Youtube'))
1490                 continue
1491
1492             # here's a fun little line of code for you:
1493             video_id = os.path.splitext(video_id)[0]
1494
1495             if determine_ext(video_url) == 'smil':
1496                 entries.append({
1497                     'id': video_id,
1498                     'formats': self._extract_smil_formats(video_url, video_id),
1499                     'uploader': video_uploader,
1500                     'title': video_title,
1501                     'age_limit': age_limit,
1502                 })
1503             else:
1504                 entries.append({
1505                     'id': video_id,
1506                     'url': video_url,
1507                     'uploader': video_uploader,
1508                     'title': video_title,
1509                     'age_limit': age_limit,
1510                 })
1511
1512         if len(entries) == 1:
1513             return entries[0]
1514         else:
1515             for num, e in enumerate(entries, start=1):
1516                 # 'url' results don't have a title
1517                 if e.get('title') is not None:
1518                     e['title'] = '%s (%d)' % (e['title'], num)
1519             return {
1520                 '_type': 'playlist',
1521                 'entries': entries,
1522             }