git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse,
  12     compat_urlparse,
  13     compat_xml_parse_error,
  14 )
  15 from ..utils import (
  16     determine_ext,
  17     ExtractorError,
  18     float_or_none,
  19     HEADRequest,
  20     is_html,
  21     orderedSet,
  22     parse_xml,
  23     smuggle_url,
  24     unescapeHTML,
  25     unified_strdate,
  26     unsmuggle_url,
  27     UnsupportedError,
  28     url_basename,
  29     xpath_text,
  30 )
  31 from .brightcove import BrightcoveIE
  32 from .nbc import NBCSportsVPlayerIE
  33 from .ooyala import OoyalaIE
  34 from .rutv import RUTVIE
  35 from .smotri import SmotriIE
  36 from .condenast import CondeNastIE
  37 from .udn import UDNEmbedIE
  38 from .senateisvp import SenateISVPIE
  39 from .bliptv import BlipTVIE
  40 from .svt import SVTIE
  41
  42
  43 class GenericIE(InfoExtractor):
  44     IE_DESC = 'Generic downloader that works on some sites'
  45     _VALID_URL = r'.*'
  46     IE_NAME = 'generic'
  47     _TESTS = [
  48         {
  49             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
  50             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
  51             'info_dict': {
  52                 'id': '13601338388002',
  53                 'ext': 'mp4',
  54                 'uploader': 'www.hodiho.fr',
  55                 'title': 'R\u00e9gis plante sa Jeep',
  56             }
  57         },
  58         # bandcamp page with custom domain
  59         {
  60             'add_ie': ['Bandcamp'],
  61             'url': 'http://bronyrock.com/track/the-pony-mash',
  62             'info_dict': {
  63                 'id': '3235767654',
  64                 'ext': 'mp3',
  65                 'title': 'The Pony Mash',
  66                 'uploader': 'M_Pallante',
  67             },
  68             'skip': 'There is a limit of 200 free downloads / month for the test song',
  69         },
  70         # embedded brightcove video
  71         # it also tests brightcove videos that need to set the 'Referer' in the
  72         # http requests
  73         {
  74             'add_ie': ['Brightcove'],
  75             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
  76             'info_dict': {
  77                 'id': '2765128793001',
  78                 'ext': 'mp4',
  79                 'title': 'Le cours de bourse : l’analyse technique',
  80                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
  81                 'uploader': 'BFM BUSINESS',
  82             },
  83             'params': {
  84                 'skip_download': True,
  85             },
  86         },
  87         {
  88             # https://github.com/rg3/youtube-dl/issues/2253
  89             'url': 'http://bcove.me/i6nfkrc3',
  90             'md5': '0ba9446db037002366bab3b3eb30c88c',
  91             'info_dict': {
  92                 'id': '3101154703001',
  93                 'ext': 'mp4',
  94                 'title': 'Still no power',
  95                 'uploader': 'thestar.com',
  96                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
  97             },
  98             'add_ie': ['Brightcove'],
  99         },
 100         {
 101             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 102             'md5': 'fb973ecf6e4a78a67453647444222983',
 103             'info_dict': {
 104                 'id': '3414141473001',
 105                 'ext': 'mp4',
 106                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 107                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 108                 'uploader': 'Championat',
 109             },
 110         },
 111         {
 112             # https://github.com/rg3/youtube-dl/issues/3541
 113             'add_ie': ['Brightcove'],
 114             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 115             'info_dict': {
 116                 'id': '3866516442001',
 117                 'ext': 'mp4',
 118                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 119                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 120                 'uploader': 'SBS Broadcasting',
 121             },
 122             'skip': 'Restricted to Netherlands',
 123             'params': {
 124                 'skip_download': True,  # m3u8 download
 125             },
 126         },
 127         # Direct link to a video
 128         {
 129             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
 130             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
 131             'info_dict': {
 132                 'id': 'trailer',
 133                 'ext': 'mp4',
 134                 'title': 'trailer',
 135                 'upload_date': '20100513',
 136             }
 137         },
 138         # ooyala video
 139         {
 140             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 141             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 142             'info_dict': {
 143                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 144                 'ext': 'mp4',
 145                 'title': '2cc213299525360.mov',  # that's what we get
 146             },
 147             'add_ie': ['Ooyala'],
 148         },
 149         # multiple ooyala embeds on SBN network websites
 150         {
 151             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 152             'info_dict': {
 153                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 154                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 155             },
 156             'playlist_mincount': 3,
 157             'params': {
 158                 'skip_download': True,
 159             },
 160             'add_ie': ['Ooyala'],
 161         },
 162         # google redirect
 163         {
 164             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 165             'info_dict': {
 166                 'id': 'cmQHVoWB5FY',
 167                 'ext': 'mp4',
 168                 'upload_date': '20130224',
 169                 'uploader_id': 'TheVerge',
 170                 'description': 're:^Chris Ziegler takes a look at the\.*',
 171                 'uploader': 'The Verge',
 172                 'title': 'First Firefox OS phones side-by-side',
 173             },
 174             'params': {
 175                 'skip_download': False,
 176             }
 177         },
 178         # embed.ly video
 179         {
 180             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 181             'info_dict': {
 182                 'id': '9ODmcdjQcHQ',
 183                 'ext': 'mp4',
 184                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 185                 'upload_date': '20140225',
 186                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 187                 'uploader': 'Tested',
 188                 'uploader_id': 'testedcom',
 189             },
 190             # No need to test YoutubeIE here
 191             'params': {
 192                 'skip_download': True,
 193             },
 194         },
 195         # funnyordie embed
 196         {
 197             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 198             'info_dict': {
 199                 'id': '18e820ec3f',
 200                 'ext': 'mp4',
 201                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 202                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 203             },
 204         },
 205         # BBC iPlayer embeds
 206         {
 207             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 208             'info_dict': {
 209                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 210             },
 211             'playlist_mincount': 18,
 212         },
 213         # RUTV embed
 214         {
 215             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 216             'info_dict': {
 217                 'id': '776940',
 218                 'ext': 'mp4',
 219                 'title': 'Охотское море стало целиком российским',
 220                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 221             },
 222             'params': {
 223                 # m3u8 download
 224                 'skip_download': True,
 225             },
 226         },
 227         # Embedded TED video
 228         {
 229             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 230             'md5': '65fdff94098e4a607385a60c5177c638',
 231             'info_dict': {
 232                 'id': '1969',
 233                 'ext': 'mp4',
 234                 'title': 'Hidden miracles of the natural world',
 235                 'uploader': 'Louie Schwartzberg',
 236                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 237             }
 238         },
 239         # Embeded Ustream video
 240         {
 241             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 242             'md5': '27b99cdb639c9b12a79bca876a073417',
 243             'info_dict': {
 244                 'id': '45734260',
 245                 'ext': 'flv',
 246                 'uploader': 'AU SPA:  The NSA and Privacy',
 247                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 248             }
 249         },
 250         # nowvideo embed hidden behind percent encoding
 251         {
 252             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 253             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 254             'info_dict': {
 255                 'id': '06e53103ca9aa',
 256                 'ext': 'flv',
 257                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 258                 'description': 'No description',
 259             },
 260         },
 261         # arte embed
 262         {
 263             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 264             'md5': '7653032cbb25bf6c80d80f217055fa43',
 265             'info_dict': {
 266                 'id': '048195-004_PLUS7-F',
 267                 'ext': 'flv',
 268                 'title': 'X:enius',
 269                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 270                 'upload_date': '20140320',
 271             },
 272             'params': {
 273                 'skip_download': 'Requires rtmpdump'
 274             }
 275         },
 276         # Condé Nast embed
 277         {
 278             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 279             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 280             'info_dict': {
 281                 'id': '53501be369702d3275860000',
 282                 'ext': 'mp4',
 283                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 284             }
 285         },
 286         # Dailymotion embed
 287         {
 288             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 289             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 290             'info_dict': {
 291                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 292                 'ext': 'mp4',
 293                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 294                 'uploader': 'Spi0n',
 295             },
 296             'add_ie': ['Dailymotion'],
 297         },
 298         # YouTube embed
 299         {
 300             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 301             'info_dict': {
 302                 'id': 'FXRb4ykk4S0',
 303                 'ext': 'mp4',
 304                 'title': 'The NBL Auction 2014',
 305                 'uploader': 'BADMINTON England',
 306                 'uploader_id': 'BADMINTONEvents',
 307                 'upload_date': '20140603',
 308                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 309             },
 310             'add_ie': ['Youtube'],
 311             'params': {
 312                 'skip_download': True,
 313             }
 314         },
 315         # MTVSercices embed
 316         {
 317             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 318             'md5': '35727f82f58c76d996fc188f9755b0d5',
 319             'info_dict': {
 320                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 321                 'ext': 'mp4',
 322                 'title': 'Review',
 323                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 324             },
 325         },
 326         # YouTube embed via <data-embed-url="">
 327         {
 328             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 329             'info_dict': {
 330                 'id': '4vAffPZIT44',
 331                 'ext': 'mp4',
 332                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 333                 'uploader': 'Gameloft',
 334                 'uploader_id': 'gameloft',
 335                 'upload_date': '20140828',
 336                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 337             },
 338             'params': {
 339                 'skip_download': True,
 340             }
 341         },
 342         # Camtasia studio
 343         {
 344             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 345             'playlist': [{
 346                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 347                 'info_dict': {
 348                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 349                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 350                     'ext': 'flv',
 351                     'duration': 2235.90,
 352                 }
 353             }, {
 354                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 355                 'info_dict': {
 356                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 357                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 358                     'ext': 'flv',
 359                     'duration': 2235.93,
 360                 }
 361             }],
 362             'info_dict': {
 363                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 364             }
 365         },
 366         # Flowplayer
 367         {
 368             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 369             'md5': '9d65602bf31c6e20014319c7d07fba27',
 370             'info_dict': {
 371                 'id': '5123ea6d5e5a7',
 372                 'ext': 'mp4',
 373                 'age_limit': 18,
 374                 'uploader': 'www.handjobhub.com',
 375                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 376             }
 377         },
 378         # RSS feed
 379         {
 380             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 381             'info_dict': {
 382                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 383                 'title': 'Zero Punctuation',
 384                 'description': 're:.*groundbreaking video review series.*'
 385             },
 386             'playlist_mincount': 11,
 387         },
 388         # Multiple brightcove videos
 389         # https://github.com/rg3/youtube-dl/issues/2283
 390         {
 391             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 392             'info_dict': {
 393                 'id': 'always-never',
 394                 'title': 'Always / Never - The New Yorker',
 395             },
 396             'playlist_count': 3,
 397             'params': {
 398                 'extract_flat': False,
 399                 'skip_download': True,
 400             }
 401         },
 402         # MLB embed
 403         {
 404             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 405             'md5': '96f09a37e44da40dd083e12d9a683327',
 406             'info_dict': {
 407                 'id': '33322633',
 408                 'ext': 'mp4',
 409                 'title': 'Ump changes call to ball',
 410                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 411                 'duration': 48,
 412                 'timestamp': 1401537900,
 413                 'upload_date': '20140531',
 414                 'thumbnail': 're:^https?://.*\.jpg$',
 415             },
 416         },
 417         # MLB articles
 418         {
 419             'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
 420             'md5': 'b190e70141fb9a1552a85426b4da1b5d',
 421             'info_dict': {
 422                 'id': '75609783',
 423                 'ext': 'mp4',
 424                 'title': 'Must C: Pillar climbs for catch',
 425                 'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
 426                 'timestamp': 1429124820,
 427                 'upload_date': '20150415',
 428             }
 429         },
 430         # Wistia embed
 431         {
 432             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 433             'md5': '8788b683c777a5cf25621eaf286d0c23',
 434             'info_dict': {
 435                 'id': '1cfaf6b7ea',
 436                 'ext': 'mov',
 437                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 438                 'duration': 643.0,
 439                 'filesize': 182808282,
 440                 'uploader': 'education-portal.com',
 441             },
 442         },
 443         {
 444             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 445             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 446             'info_dict': {
 447                 'id': 'uxjb0lwrcz',
 448                 'ext': 'mp4',
 449                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 450                 'duration': 1715.0,
 451                 'uploader': 'thoughtworks.wistia.com',
 452             },
 453         },
 454         # Direct download with broken HEAD
 455         {
 456             'url': 'http://ai-radio.org:8000/radio.opus',
 457             'info_dict': {
 458                 'id': 'radio',
 459                 'ext': 'opus',
 460                 'title': 'radio',
 461             },
 462             'params': {
 463                 'skip_download': True,  # infinite live stream
 464             },
 465             'expected_warnings': [
 466                 r'501.*Not Implemented'
 467             ],
 468         },
 469         # Soundcloud embed
 470         {
 471             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 472             'info_dict': {
 473                 'id': '174391317',
 474                 'ext': 'mp3',
 475                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 476                 'uploader': 'Sophos Security',
 477                 'title': 'Chet Chat 171 - Oct 29, 2014',
 478                 'upload_date': '20141029',
 479             }
 480         },
 481         # Livestream embed
 482         {
 483             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 484             'info_dict': {
 485                 'id': '67864563',
 486                 'ext': 'flv',
 487                 'upload_date': '20141112',
 488                 'title': 'Rosetta #CometLanding webcast HL 10',
 489             }
 490         },
 491         # LazyYT
 492         {
 493             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 494             'info_dict': {
 495                 'id': '1986',
 496                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 497             },
 498             'playlist_mincount': 2,
 499         },
 500         # Direct link with incorrect MIME type
 501         {
 502             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 503             'md5': '4ccbebe5f36706d85221f204d7eb5913',
 504             'info_dict': {
 505                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 506                 'id': '5_Lennart_Poettering_-_Systemd',
 507                 'ext': 'webm',
 508                 'title': '5_Lennart_Poettering_-_Systemd',
 509                 'upload_date': '20141120',
 510             },
 511             'expected_warnings': [
 512                 'URL could be a direct video link, returning it as such.'
 513             ]
 514         },
 515         # Cinchcast embed
 516         {
 517             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 518             'info_dict': {
 519                 'id': '7141703',
 520                 'ext': 'mp3',
 521                 'upload_date': '20141126',
 522                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 523             }
 524         },
 525         # Cinerama player
 526         {
 527             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 528             'info_dict': {
 529                 'id': '730m_DandD_1901_512k',
 530                 'ext': 'mp4',
 531                 'uploader': 'www.abc.net.au',
 532                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 533             }
 534         },
 535         # embedded viddler video
 536         {
 537             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 538             'info_dict': {
 539                 'id': '4d03aad9',
 540                 'ext': 'mp4',
 541                 'uploader': 'deadspin',
 542                 'title': 'WALL-TO-GORTAT',
 543                 'timestamp': 1422285291,
 544                 'upload_date': '20150126',
 545             },
 546             'add_ie': ['Viddler'],
 547         },
 548         # Libsyn embed
 549         {
 550             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 551             'info_dict': {
 552                 'id': '3377616',
 553                 'ext': 'mp3',
 554                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 555                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 556                 'upload_date': '20150220',
 557             },
 558         },
 559         # jwplayer YouTube
 560         {
 561             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 562             'info_dict': {
 563                 'id': 'Mrj4DVp2zeA',
 564                 'ext': 'mp4',
 565                 'upload_date': '20150212',
 566                 'uploader': 'The National Archives UK',
 567                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 568                 'uploader_id': 'NationalArchives08',
 569                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 570             },
 571         },
 572         # rtl.nl embed
 573         {
 574             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 575             'playlist_mincount': 5,
 576             'info_dict': {
 577                 'id': 'aanslagen-kopenhagen',
 578                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 579             }
 580         },
 581         # Zapiks embed
 582         {
 583             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 584             'info_dict': {
 585                 'id': '118046',
 586                 'ext': 'mp4',
 587                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 588             }
 589         },
 590         # Kaltura embed
 591         {
 592             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 593             'info_dict': {
 594                 'id': '1_eergr3h1',
 595                 'ext': 'mp4',
 596                 'upload_date': '20150226',
 597                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 598                 'timestamp': int,
 599                 'title': 'John Carlson Postgame 2/25/15',
 600             },
 601         },
 602         # Eagle.Platform embed (generic URL)
 603         {
 604             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 605             'info_dict': {
 606                 'id': '227304',
 607                 'ext': 'mp4',
 608                 'title': 'Навальный вышел на свободу',
 609                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 610                 'thumbnail': 're:^https?://.*\.jpg$',
 611                 'duration': 87,
 612                 'view_count': int,
 613                 'age_limit': 0,
 614             },
 615         },
 616         # ClipYou (Eagle.Platform) embed (custom URL)
 617         {
 618             'url': 'http://muz-tv.ru/play/7129/',
 619             'info_dict': {
 620                 'id': '12820',
 621                 'ext': 'mp4',
 622                 'title': "'O Sole Mio",
 623                 'thumbnail': 're:^https?://.*\.jpg$',
 624                 'duration': 216,
 625                 'view_count': int,
 626             },
 627         },
 628         # Pladform embed
 629         {
 630             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 631             'info_dict': {
 632                 'id': '100183293',
 633                 'ext': 'mp4',
 634                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 635                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 636                 'thumbnail': 're:^https?://.*\.jpg$',
 637                 'duration': 694,
 638                 'age_limit': 0,
 639             },
 640         },
 641         # Playwire embed
 642         {
 643             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 644             'info_dict': {
 645                 'id': '3519514',
 646                 'ext': 'mp4',
 647                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 648                 'thumbnail': 're:^https?://.*\.png$',
 649                 'duration': 45.115,
 650             },
 651         },
 652         # 5min embed
 653         {
 654             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 655             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 656             'info_dict': {
 657                 'id': '518726732',
 658                 'ext': 'mp4',
 659                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 660             },
 661         },
 662         # RSS feed with enclosure
 663         {
 664             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 665             'info_dict': {
 666                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 667                 'ext': 'm4v',
 668                 'upload_date': '20150228',
 669                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 670             }
 671         },
 672         # Crooks and Liars embed
 673         {
 674             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 675             'info_dict': {
 676                 'id': '8RUoRhRi',
 677                 'ext': 'mp4',
 678                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 679                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 680                 'timestamp': 1428207000,
 681                 'upload_date': '20150405',
 682                 'uploader': 'Heather',
 683             },
 684         },
 685         # Crooks and Liars external embed
 686         {
 687             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 688             'info_dict': {
 689                 'id': 'MTE3MjUtMzQ2MzA',
 690                 'ext': 'mp4',
 691                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 692                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 693                 'timestamp': 1265032391,
 694                 'upload_date': '20100201',
 695                 'uploader': 'Heather',
 696             },
 697         },
 698         # NBC Sports vplayer embed
 699         {
 700             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 701             'info_dict': {
 702                 'id': 'ln7x1qSThw4k',
 703                 'ext': 'flv',
 704                 'title': "PFT Live: New leader in the 'new-look' defense",
 705                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 706             },
 707         },
 708         # UDN embed
 709         {
 710             'url': 'http://www.udn.com/news/story/7314/822787',
 711             'md5': 'fd2060e988c326991037b9aff9df21a6',
 712             'info_dict': {
 713                 'id': '300346',
 714                 'ext': 'mp4',
 715                 'title': '中一中男師變性 全校師生力挺',
 716                 'thumbnail': 're:^https?://.*\.jpg$',
 717             }
 718         },
 719         # Ooyala embed
 720         {
 721             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 722             'info_dict': {
 723                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 724                 'ext': 'mp4',
 725                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 726                 'title': 'This is what separates the Excel masters from the wannabes',
 727             },
 728             'params': {
 729                 # m3u8 downloads
 730                 'skip_download': True,
 731             }
 732         },
 733         # Contains a SMIL manifest
 734         {
 735             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 736             'info_dict': {
 737                 'id': 'file',
 738                 'ext': 'flv',
 739                 'title': '+ Football: Lottery Champions League Europe',
 740                 'uploader': 'www.telewebion.com',
 741             },
 742             'params': {
 743                 # rtmpe downloads
 744                 'skip_download': True,
 745             }
 746         }
 747     ]
 748
 749     def report_following_redirect(self, new_url):
 750         """Report information extraction."""
 751         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 752
 753     def _extract_rss(self, url, video_id, doc):
 754         playlist_title = doc.find('./channel/title').text
 755         playlist_desc_el = doc.find('./channel/description')
 756         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 757
 758         entries = []
 759         for it in doc.findall('./channel/item'):
 760             next_url = xpath_text(it, 'link', fatal=False)
 761             if not next_url:
 762                 enclosure_nodes = it.findall('./enclosure')
 763                 for e in enclosure_nodes:
 764                     next_url = e.attrib.get('url')
 765                     if next_url:
 766                         break
 767
 768             if not next_url:
 769                 continue
 770
 771             entries.append({
 772                 '_type': 'url',
 773                 'url': next_url,
 774                 'title': it.find('title').text,
 775             })
 776
 777         return {
 778             '_type': 'playlist',
 779             'id': url,
 780             'title': playlist_title,
 781             'description': playlist_desc,
 782             'entries': entries,
 783         }
 784
 785     def _extract_camtasia(self, url, video_id, webpage):
 786         """ Returns None if no camtasia video can be found. """
 787
 788         camtasia_cfg = self._search_regex(
 789             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 790             webpage, 'camtasia configuration file', default=None)
 791         if camtasia_cfg is None:
 792             return None
 793
 794         title = self._html_search_meta('DC.title', webpage, fatal=True)
 795
 796         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 797         camtasia_cfg = self._download_xml(
 798             camtasia_url, video_id,
 799             note='Downloading camtasia configuration',
 800             errnote='Failed to download camtasia configuration')
 801         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 802
 803         entries = []
 804         for n in fileset_node.getchildren():
 805             url_n = n.find('./uri')
 806             if url_n is None:
 807                 continue
 808
 809             entries.append({
 810                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 811                 'title': '%s - %s' % (title, n.tag),
 812                 'url': compat_urlparse.urljoin(url, url_n.text),
 813                 'duration': float_or_none(n.find('./duration').text),
 814             })
 815
 816         return {
 817             '_type': 'playlist',
 818             'entries': entries,
 819             'title': title,
 820         }
 821
 822     def _real_extract(self, url):
 823         if url.startswith('//'):
 824             return {
 825                 '_type': 'url',
 826                 'url': self.http_scheme() + url,
 827             }
 828
 829         parsed_url = compat_urlparse.urlparse(url)
 830         if not parsed_url.scheme:
 831             default_search = self._downloader.params.get('default_search')
 832             if default_search is None:
 833                 default_search = 'fixup_error'
 834
 835             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 836                 if '/' in url:
 837                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 838                     return self.url_result('http://' + url)
 839                 elif default_search != 'fixup_error':
 840                     if default_search == 'auto_warning':
 841                         if re.match(r'^(?:url|URL)$', url):
 842                             raise ExtractorError(
 843                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 844                                 expected=True)
 845                         else:
 846                             self._downloader.report_warning(
 847                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 848                     return self.url_result('ytsearch:' + url)
 849
 850             if default_search in ('error', 'fixup_error'):
 851                 raise ExtractorError(
 852                     '%r is not a valid URL. '
 853                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 854                     % (url, url), expected=True)
 855             else:
 856                 if ':' not in default_search:
 857                     default_search += ':'
 858                 return self.url_result(default_search + url)
 859
 860         url, smuggled_data = unsmuggle_url(url)
 861         force_videoid = None
 862         is_intentional = smuggled_data and smuggled_data.get('to_generic')
 863         if smuggled_data and 'force_videoid' in smuggled_data:
 864             force_videoid = smuggled_data['force_videoid']
 865             video_id = force_videoid
 866         else:
 867             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
 868
 869         self.to_screen('%s: Requesting header' % video_id)
 870
 871         head_req = HEADRequest(url)
 872         head_response = self._request_webpage(
 873             head_req, video_id,
 874             note=False, errnote='Could not send HEAD request to %s' % url,
 875             fatal=False)
 876
 877         if head_response is not False:
 878             # Check for redirect
 879             new_url = head_response.geturl()
 880             if url != new_url:
 881                 self.report_following_redirect(new_url)
 882                 if force_videoid:
 883                     new_url = smuggle_url(
 884                         new_url, {'force_videoid': force_videoid})
 885                 return self.url_result(new_url)
 886
 887         full_response = None
 888         if head_response is False:
 889             full_response = self._request_webpage(url, video_id)
 890             head_response = full_response
 891
 892         # Check for direct link to a video
 893         content_type = head_response.headers.get('Content-Type', '')
 894         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
 895         if m:
 896             upload_date = unified_strdate(
 897                 head_response.headers.get('Last-Modified'))
 898             return {
 899                 'id': video_id,
 900                 'title': os.path.splitext(url_basename(url))[0],
 901                 'direct': True,
 902                 'formats': [{
 903                     'format_id': m.group('format_id'),
 904                     'url': url,
 905                     'vcodec': 'none' if m.group('type') == 'audio' else None
 906                 }],
 907                 'upload_date': upload_date,
 908             }
 909
 910         if not self._downloader.params.get('test', False) and not is_intentional:
 911             self._downloader.report_warning('Falling back on generic information extractor.')
 912
 913         if not full_response:
 914             full_response = self._request_webpage(url, video_id)
 915
 916         # Maybe it's a direct link to a video?
 917         # Be careful not to download the whole thing!
 918         first_bytes = full_response.read(512)
 919         if not is_html(first_bytes):
 920             self._downloader.report_warning(
 921                 'URL could be a direct video link, returning it as such.')
 922             upload_date = unified_strdate(
 923                 head_response.headers.get('Last-Modified'))
 924             return {
 925                 'id': video_id,
 926                 'title': os.path.splitext(url_basename(url))[0],
 927                 'direct': True,
 928                 'url': url,
 929                 'upload_date': upload_date,
 930             }
 931
 932         webpage = self._webpage_read_content(
 933             full_response, url, video_id, prefix=first_bytes)
 934
 935         self.report_extraction(video_id)
 936
 937         # Is it an RSS feed?
 938         try:
 939             doc = parse_xml(webpage)
 940             if doc.tag == 'rss':
 941                 return self._extract_rss(url, video_id, doc)
 942         except compat_xml_parse_error:
 943             pass
 944
 945         # Is it a Camtasia project?
 946         camtasia_res = self._extract_camtasia(url, video_id, webpage)
 947         if camtasia_res is not None:
 948             return camtasia_res
 949
 950         # Sometimes embedded video player is hidden behind percent encoding
 951         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
 952         # Unescaping the whole page allows to handle those cases in a generic way
 953         webpage = compat_urllib_parse.unquote(webpage)
 954
 955         # it's tempting to parse this further, but you would
 956         # have to take into account all the variations like
 957         #   Video Title - Site Name
 958         #   Site Name | Video Title
 959         #   Video Title - Tagline | Site Name
 960         # and so on and so forth; it's just not practical
 961         video_title = self._html_search_regex(
 962             r'(?s)<title>(.*?)</title>', webpage, 'video title',
 963             default='video')
 964
 965         # Try to detect age limit automatically
 966         age_limit = self._rta_search(webpage)
 967         # And then there are the jokers who advertise that they use RTA,
 968         # but actually don't.
 969         AGE_LIMIT_MARKERS = [
 970             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
 971         ]
 972         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
 973             age_limit = 18
 974
 975         # video uploader is domain name
 976         video_uploader = self._search_regex(
 977             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 978
 979         # Helper method
 980         def _playlist_from_matches(matches, getter=None, ie=None):
 981             urlrs = orderedSet(
 982                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
 983                 for m in matches)
 984             return self.playlist_result(
 985                 urlrs, playlist_id=video_id, playlist_title=video_title)
 986
 987         # Look for BrightCove:
 988         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
 989         if bc_urls:
 990             self.to_screen('Brightcove video detected.')
 991             entries = [{
 992                 '_type': 'url',
 993                 'url': smuggle_url(bc_url, {'Referer': url}),
 994                 'ie_key': 'Brightcove'
 995             } for bc_url in bc_urls]
 996
 997             return {
 998                 '_type': 'playlist',
 999                 'title': video_title,
1000                 'id': video_id,
1001                 'entries': entries,
1002             }
1003
1004         # Look for embedded rtl.nl player
1005         matches = re.findall(
1006             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
1007             webpage)
1008         if matches:
1009             return _playlist_from_matches(matches, ie='RtlNl')
1010
1011         # Look for embedded (iframe) Vimeo player
1012         mobj = re.search(
1013             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
1014         if mobj:
1015             player_url = unescapeHTML(mobj.group('url'))
1016             surl = smuggle_url(player_url, {'Referer': url})
1017             return self.url_result(surl)
1018         # Look for embedded (swf embed) Vimeo player
1019         mobj = re.search(
1020             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1021         if mobj:
1022             return self.url_result(mobj.group(1))
1023
1024         # Look for embedded YouTube player
1025         matches = re.findall(r'''(?x)
1026             (?:
1027                 <iframe[^>]+?src=|
1028                 data-video-url=|
1029                 <embed[^>]+?src=|
1030                 embedSWF\(?:\s*|
1031                 new\s+SWFObject\(
1032             )
1033             (["\'])
1034                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1035                 (?:embed|v|p)/.+?)
1036             \1''', webpage)
1037         if matches:
1038             return _playlist_from_matches(
1039                 matches, lambda m: unescapeHTML(m[1]))
1040
1041         # Look for lazyYT YouTube embed
1042         matches = re.findall(
1043             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1044         if matches:
1045             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1046
1047         # Look for embedded Dailymotion player
1048         matches = re.findall(
1049             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1050         if matches:
1051             return _playlist_from_matches(
1052                 matches, lambda m: unescapeHTML(m[1]))
1053
1054         # Look for embedded Dailymotion playlist player (#3822)
1055         m = re.search(
1056             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1057         if m:
1058             playlists = re.findall(
1059                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1060             if playlists:
1061                 return _playlist_from_matches(
1062                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1063
1064         # Look for embedded Wistia player
1065         match = re.search(
1066             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1067         if match:
1068             embed_url = self._proto_relative_url(
1069                 unescapeHTML(match.group('url')))
1070             return {
1071                 '_type': 'url_transparent',
1072                 'url': embed_url,
1073                 'ie_key': 'Wistia',
1074                 'uploader': video_uploader,
1075                 'title': video_title,
1076                 'id': video_id,
1077             }
1078
1079         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1080         if match:
1081             return {
1082                 '_type': 'url_transparent',
1083                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1084                 'ie_key': 'Wistia',
1085                 'uploader': video_uploader,
1086                 'title': video_title,
1087                 'id': match.group('id')
1088             }
1089
1090         # Look for embedded blip.tv player
1091         bliptv_url = BlipTVIE._extract_url(webpage)
1092         if bliptv_url:
1093             return self.url_result(bliptv_url, 'BlipTV')
1094
1095         # Look for SVT player
1096         svt_url = SVTIE._extract_url(webpage)
1097         if svt_url:
1098             return self.url_result(svt_url, 'SVT')
1099
1100         # Look for embedded condenast player
1101         matches = re.findall(
1102             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1103             webpage)
1104         if matches:
1105             return {
1106                 '_type': 'playlist',
1107                 'entries': [{
1108                     '_type': 'url',
1109                     'ie_key': 'CondeNast',
1110                     'url': ma,
1111                 } for ma in matches],
1112                 'title': video_title,
1113                 'id': video_id,
1114             }
1115
1116         # Look for Bandcamp pages with custom domain
1117         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1118         if mobj is not None:
1119             burl = unescapeHTML(mobj.group(1))
1120             # Don't set the extractor because it can be a track url or an album
1121             return self.url_result(burl)
1122
1123         # Look for embedded Vevo player
1124         mobj = re.search(
1125             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1126         if mobj is not None:
1127             return self.url_result(mobj.group('url'))
1128
1129         # Look for embedded Viddler player
1130         mobj = re.search(
1131             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1132             webpage)
1133         if mobj is not None:
1134             return self.url_result(mobj.group('url'))
1135
1136         # Look for NYTimes player
1137         mobj = re.search(
1138             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1139             webpage)
1140         if mobj is not None:
1141             return self.url_result(mobj.group('url'))
1142
1143         # Look for Libsyn player
1144         mobj = re.search(
1145             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1146         if mobj is not None:
1147             return self.url_result(mobj.group('url'))
1148
1149         # Look for Ooyala videos
1150         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1151                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1152                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1153                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1154         if mobj is not None:
1155             return OoyalaIE._build_url_result(mobj.group('ec'))
1156
1157         # Look for multiple Ooyala embeds on SBN network websites
1158         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1159         if mobj is not None:
1160             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1161             if embeds:
1162                 return _playlist_from_matches(
1163                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1164
1165         # Look for Aparat videos
1166         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1167         if mobj is not None:
1168             return self.url_result(mobj.group(1), 'Aparat')
1169
1170         # Look for MPORA videos
1171         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1172         if mobj is not None:
1173             return self.url_result(mobj.group(1), 'Mpora')
1174
1175         # Look for embedded NovaMov-based player
1176         mobj = re.search(
1177             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1178                     (?P<url>http://(?:(?:embed|www)\.)?
1179                         (?:novamov\.com|
1180                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1181                            videoweed\.(?:es|com)|
1182                            movshare\.(?:net|sx|ag)|
1183                            divxstage\.(?:eu|net|ch|co|at|ag))
1184                         /embed\.php.+?)\1''', webpage)
1185         if mobj is not None:
1186             return self.url_result(mobj.group('url'))
1187
1188         # Look for embedded Facebook player
1189         mobj = re.search(
1190             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1191         if mobj is not None:
1192             return self.url_result(mobj.group('url'), 'Facebook')
1193
1194         # Look for embedded VK player
1195         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1196         if mobj is not None:
1197             return self.url_result(mobj.group('url'), 'VK')
1198
1199         # Look for embedded ivi player
1200         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1201         if mobj is not None:
1202             return self.url_result(mobj.group('url'), 'Ivi')
1203
1204         # Look for embedded Huffington Post player
1205         mobj = re.search(
1206             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1207         if mobj is not None:
1208             return self.url_result(mobj.group('url'), 'HuffPost')
1209
1210         # Look for embed.ly
1211         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1212         if mobj is not None:
1213             return self.url_result(mobj.group('url'))
1214         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1215         if mobj is not None:
1216             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1217
1218         # Look for funnyordie embed
1219         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1220         if matches:
1221             return _playlist_from_matches(
1222                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1223
1224         # Look for BBC iPlayer embed
1225         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1226         if matches:
1227             return _playlist_from_matches(matches, ie='BBCCoUk')
1228
1229         # Look for embedded RUTV player
1230         rutv_url = RUTVIE._extract_url(webpage)
1231         if rutv_url:
1232             return self.url_result(rutv_url, 'RUTV')
1233
1234         # Look for embedded TED player
1235         mobj = re.search(
1236             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1237         if mobj is not None:
1238             return self.url_result(mobj.group('url'), 'TED')
1239
1240         # Look for embedded Ustream videos
1241         mobj = re.search(
1242             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1243         if mobj is not None:
1244             return self.url_result(mobj.group('url'), 'Ustream')
1245
1246         # Look for embedded arte.tv player
1247         mobj = re.search(
1248             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1249             webpage)
1250         if mobj is not None:
1251             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1252
1253         # Look for embedded smotri.com player
1254         smotri_url = SmotriIE._extract_url(webpage)
1255         if smotri_url:
1256             return self.url_result(smotri_url, 'Smotri')
1257
1258         # Look for embeded soundcloud player
1259         mobj = re.search(
1260             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1261             webpage)
1262         if mobj is not None:
1263             url = unescapeHTML(mobj.group('url'))
1264             return self.url_result(url)
1265
1266         # Look for embedded vulture.com player
1267         mobj = re.search(
1268             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1269             webpage)
1270         if mobj is not None:
1271             url = unescapeHTML(mobj.group('url'))
1272             return self.url_result(url, ie='Vulture')
1273
1274         # Look for embedded mtvservices player
1275         mobj = re.search(
1276             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1277             webpage)
1278         if mobj is not None:
1279             url = unescapeHTML(mobj.group('url'))
1280             return self.url_result(url, ie='MTVServicesEmbedded')
1281
1282         # Look for embedded yahoo player
1283         mobj = re.search(
1284             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1285             webpage)
1286         if mobj is not None:
1287             return self.url_result(mobj.group('url'), 'Yahoo')
1288
1289         # Look for embedded sbs.com.au player
1290         mobj = re.search(
1291             r'''(?x)
1292             (?:
1293                 <meta\s+property="og:video"\s+content=|
1294                 <iframe[^>]+?src=
1295             )
1296             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1297             webpage)
1298         if mobj is not None:
1299             return self.url_result(mobj.group('url'), 'SBS')
1300
1301         # Look for embedded Cinchcast player
1302         mobj = re.search(
1303             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1304             webpage)
1305         if mobj is not None:
1306             return self.url_result(mobj.group('url'), 'Cinchcast')
1307
1308         mobj = re.search(
1309             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1310             webpage)
1311         if not mobj:
1312             mobj = re.search(
1313                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1314                 webpage)
1315         if mobj is not None:
1316             return self.url_result(mobj.group('url'), 'MLB')
1317
1318         mobj = re.search(
1319             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1320             webpage)
1321         if mobj is not None:
1322             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1323
1324         mobj = re.search(
1325             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1326             webpage)
1327         if mobj is not None:
1328             return self.url_result(mobj.group('url'), 'Livestream')
1329
1330         # Look for Zapiks embed
1331         mobj = re.search(
1332             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1333         if mobj is not None:
1334             return self.url_result(mobj.group('url'), 'Zapiks')
1335
1336         # Look for Kaltura embeds
1337         mobj = re.search(
1338             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1339         if mobj is not None:
1340             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1341
1342         # Look for Eagle.Platform embeds
1343         mobj = re.search(
1344             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1345         if mobj is not None:
1346             return self.url_result(mobj.group('url'), 'EaglePlatform')
1347
1348         # Look for ClipYou (uses Eagle.Platform) embeds
1349         mobj = re.search(
1350             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1351         if mobj is not None:
1352             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1353
1354         # Look for Pladform embeds
1355         mobj = re.search(
1356             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1357         if mobj is not None:
1358             return self.url_result(mobj.group('url'), 'Pladform')
1359
1360         # Look for Playwire embeds
1361         mobj = re.search(
1362             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1363         if mobj is not None:
1364             return self.url_result(mobj.group('url'))
1365
1366         # Look for 5min embeds
1367         mobj = re.search(
1368             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1369         if mobj is not None:
1370             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1371
1372         # Look for Crooks and Liars embeds
1373         mobj = re.search(
1374             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1375         if mobj is not None:
1376             return self.url_result(mobj.group('url'))
1377
1378         # Look for NBC Sports VPlayer embeds
1379         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1380         if nbc_sports_url:
1381             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1382
1383         # Look for UDN embeds
1384         mobj = re.search(
1385             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1386         if mobj is not None:
1387             return self.url_result(
1388                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1389
1390         # Look for Senate ISVP iframe
1391         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1392         if senate_isvp_url:
1393             return self.url_result(surl, 'SenateISVP')
1394
1395         def check_video(vurl):
1396             if YoutubeIE.suitable(vurl):
1397                 return True
1398             vpath = compat_urlparse.urlparse(vurl).path
1399             vext = determine_ext(vpath)
1400             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1401
1402         def filter_video(urls):
1403             return list(filter(check_video, urls))
1404
1405         # Start with something easy: JW Player in SWFObject
1406         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1407         if not found:
1408             # Look for gorilla-vid style embedding
1409             found = filter_video(re.findall(r'''(?sx)
1410                 (?:
1411                     jw_plugins|
1412                     JWPlayerOptions|
1413                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1414                 )
1415                 .*?
1416                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1417         if not found:
1418             # Broaden the search a little bit
1419             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1420         if not found:
1421             # Broaden the findall a little bit: JWPlayer JS loader
1422             found = filter_video(re.findall(
1423                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1424         if not found:
1425             # Flow player
1426             found = filter_video(re.findall(r'''(?xs)
1427                 flowplayer\("[^"]+",\s*
1428                     \{[^}]+?\}\s*,
1429                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1430                         ["']?url["']?\s*:\s*["']([^"']+)["']
1431             ''', webpage))
1432         if not found:
1433             # Cinerama player
1434             found = re.findall(
1435                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1436         if not found:
1437             # Try to find twitter cards info
1438             found = filter_video(re.findall(
1439                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1440         if not found:
1441             # We look for Open Graph info:
1442             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1443             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1444             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1445             if m_video_type is not None:
1446                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1447         if not found:
1448             # HTML5 video
1449             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1450         if not found:
1451             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1452             found = re.search(
1453                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1454                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1455                 webpage)
1456             if not found:
1457                 # Look also in Refresh HTTP header
1458                 refresh_header = head_response.headers.get('Refresh')
1459                 if refresh_header:
1460                     found = re.search(REDIRECT_REGEX, refresh_header)
1461             if found:
1462                 new_url = compat_urlparse.urljoin(url, found.group(1))
1463                 self.report_following_redirect(new_url)
1464                 return {
1465                     '_type': 'url',
1466                     'url': new_url,
1467                 }
1468         if not found:
1469             raise UnsupportedError(url)
1470
1471         entries = []
1472         for video_url in found:
1473             video_url = compat_urlparse.urljoin(url, video_url)
1474             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1475
1476             # Sometimes, jwplayer extraction will result in a YouTube URL
1477             if YoutubeIE.suitable(video_url):
1478                 entries.append(self.url_result(video_url, 'Youtube'))
1479                 continue
1480
1481             # here's a fun little line of code for you:
1482             video_id = os.path.splitext(video_id)[0]
1483
1484             if determine_ext(video_url) == 'smil':
1485                 entries.append({
1486                     'id': video_id,
1487                     'formats': self._extract_smil_formats(video_url, video_id),
1488                     'uploader': video_uploader,
1489                     'title': video_title,
1490                     'age_limit': age_limit,
1491                 })
1492             else:
1493                 entries.append({
1494                     'id': video_id,
1495                     'url': video_url,
1496                     'uploader': video_uploader,
1497                     'title': video_title,
1498                     'age_limit': age_limit,
1499                 })
1500
1501         if len(entries) == 1:
1502             return entries[0]
1503         else:
1504             for num, e in enumerate(entries, start=1):
1505                 # 'url' results don't have a title
1506                 if e.get('title') is not None:
1507                     e['title'] = '%s (%d)' % (e['title'], num)
1508             return {
1509                 '_type': 'playlist',
1510                 'entries': entries,
1511             }