_ Git - youtube-dl/blob - youtube_dl/extractor/generic.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import os
   6 import re
   7
   8 from .common import InfoExtractor
   9 from .youtube import YoutubeIE
  10 from ..compat import (
  11     compat_urllib_parse,
  12     compat_urllib_parse_unquote,
  13     compat_urllib_request,
  14     compat_urlparse,
  15     compat_xml_parse_error,
  16 )
  17 from ..utils import (
  18     determine_ext,
  19     ExtractorError,
  20     float_or_none,
  21     HEADRequest,
  22     is_html,
  23     orderedSet,
  24     parse_xml,
  25     smuggle_url,
  26     unescapeHTML,
  27     unified_strdate,
  28     unsmuggle_url,
  29     UnsupportedError,
  30     url_basename,
  31     xpath_text,
  32 )
  33 from .brightcove import BrightcoveIE
  34 from .nbc import NBCSportsVPlayerIE
  35 from .ooyala import OoyalaIE
  36 from .rutv import RUTVIE
  37 from .tvc import TVCIE
  38 from .sportbox import SportBoxEmbedIE
  39 from .smotri import SmotriIE
  40 from .condenast import CondeNastIE
  41 from .udn import UDNEmbedIE
  42 from .senateisvp import SenateISVPIE
  43 from .bliptv import BlipTVIE
  44 from .svt import SVTIE
  45 from .pornhub import PornHubIE
  46 from .vimeo import VimeoIE
  47
  48
  49 class GenericIE(InfoExtractor):
  50     IE_DESC = 'Generic downloader that works on some sites'
  51     _VALID_URL = r'.*'
  52     IE_NAME = 'generic'
  53     _TESTS = [
  54         # Direct link to a video
  55         {
  56             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  57             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  58             'info_dict': {
  59                 'id': 'trailer',
  60                 'ext': 'mp4',
  61                 'title': 'trailer',
  62                 'upload_date': '20100513',
  63             }
  64         },
  65         # Direct link to media delivered compressed (until Accept-Encoding is *)
  66         {
  67             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  68             'md5': '128c42e68b13950268b648275386fc74',
  69             'info_dict': {
  70                 'id': 'FictionJunction-Parallel_Hearts',
  71                 'ext': 'flac',
  72                 'title': 'FictionJunction-Parallel_Hearts',
  73                 'upload_date': '20140522',
  74             },
  75             'expected_warnings': [
  76                 'URL could be a direct video link, returning it as such.'
  77             ]
  78         },
  79         # Direct download with broken HEAD
  80         {
  81             'url': 'http://ai-radio.org:8000/radio.opus',
  82             'info_dict': {
  83                 'id': 'radio',
  84                 'ext': 'opus',
  85                 'title': 'radio',
  86             },
  87             'params': {
  88                 'skip_download': True,  # infinite live stream
  89             },
  90             'expected_warnings': [
  91                 r'501.*Not Implemented'
  92             ],
  93         },
  94         # Direct link with incorrect MIME type
  95         {
  96             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
  97             'md5': '4ccbebe5f36706d85221f204d7eb5913',
  98             'info_dict': {
  99                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 100                 'id': '5_Lennart_Poettering_-_Systemd',
 101                 'ext': 'webm',
 102                 'title': '5_Lennart_Poettering_-_Systemd',
 103                 'upload_date': '20141120',
 104             },
 105             'expected_warnings': [
 106                 'URL could be a direct video link, returning it as such.'
 107             ]
 108         },
 109         # RSS feed
 110         {
 111             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 112             'info_dict': {
 113                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 114                 'title': 'Zero Punctuation',
 115                 'description': 're:.*groundbreaking video review series.*'
 116             },
 117             'playlist_mincount': 11,
 118         },
 119         # RSS feed with enclosure
 120         {
 121             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 122             'info_dict': {
 123                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 124                 'ext': 'm4v',
 125                 'upload_date': '20150228',
 126                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 127             }
 128         },
 129         # google redirect
 130         {
 131             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 132             'info_dict': {
 133                 'id': 'cmQHVoWB5FY',
 134                 'ext': 'mp4',
 135                 'upload_date': '20130224',
 136                 'uploader_id': 'TheVerge',
 137                 'description': 're:^Chris Ziegler takes a look at the\.*',
 138                 'uploader': 'The Verge',
 139                 'title': 'First Firefox OS phones side-by-side',
 140             },
 141             'params': {
 142                 'skip_download': False,
 143             }
 144         },
 145         {
 146             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
 147             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
 148             'info_dict': {
 149                 'id': '13601338388002',
 150                 'ext': 'mp4',
 151                 'uploader': 'www.hodiho.fr',
 152                 'title': 'R\u00e9gis plante sa Jeep',
 153             }
 154         },
 155         # bandcamp page with custom domain
 156         {
 157             'add_ie': ['Bandcamp'],
 158             'url': 'http://bronyrock.com/track/the-pony-mash',
 159             'info_dict': {
 160                 'id': '3235767654',
 161                 'ext': 'mp3',
 162                 'title': 'The Pony Mash',
 163                 'uploader': 'M_Pallante',
 164             },
 165             'skip': 'There is a limit of 200 free downloads / month for the test song',
 166         },
 167         # embedded brightcove video
 168         # it also tests brightcove videos that need to set the 'Referer' in the
 169         # http requests
 170         {
 171             'add_ie': ['Brightcove'],
 172             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 173             'info_dict': {
 174                 'id': '2765128793001',
 175                 'ext': 'mp4',
 176                 'title': 'Le cours de bourse : l’analyse technique',
 177                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 178                 'uploader': 'BFM BUSINESS',
 179             },
 180             'params': {
 181                 'skip_download': True,
 182             },
 183         },
 184         {
 185             # https://github.com/rg3/youtube-dl/issues/2253
 186             'url': 'http://bcove.me/i6nfkrc3',
 187             'md5': '0ba9446db037002366bab3b3eb30c88c',
 188             'info_dict': {
 189                 'id': '3101154703001',
 190                 'ext': 'mp4',
 191                 'title': 'Still no power',
 192                 'uploader': 'thestar.com',
 193                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 194             },
 195             'add_ie': ['Brightcove'],
 196         },
 197         {
 198             'url': 'http://www.championat.com/video/football/v/87/87499.html',
 199             'md5': 'fb973ecf6e4a78a67453647444222983',
 200             'info_dict': {
 201                 'id': '3414141473001',
 202                 'ext': 'mp4',
 203                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 204                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 205                 'uploader': 'Championat',
 206             },
 207         },
 208         {
 209             # https://github.com/rg3/youtube-dl/issues/3541
 210             'add_ie': ['Brightcove'],
 211             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 212             'info_dict': {
 213                 'id': '3866516442001',
 214                 'ext': 'mp4',
 215                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
 216                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
 217                 'uploader': 'SBS Broadcasting',
 218             },
 219             'skip': 'Restricted to Netherlands',
 220             'params': {
 221                 'skip_download': True,  # m3u8 download
 222             },
 223         },
 224         # ooyala video
 225         {
 226             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 227             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
 228             'info_dict': {
 229                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 230                 'ext': 'mp4',
 231                 'title': '2cc213299525360.mov',  # that's what we get
 232             },
 233             'add_ie': ['Ooyala'],
 234         },
 235         # multiple ooyala embeds on SBN network websites
 236         {
 237             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 238             'info_dict': {
 239                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 240                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 241             },
 242             'playlist_mincount': 3,
 243             'params': {
 244                 'skip_download': True,
 245             },
 246             'add_ie': ['Ooyala'],
 247         },
 248         # embed.ly video
 249         {
 250             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 251             'info_dict': {
 252                 'id': '9ODmcdjQcHQ',
 253                 'ext': 'mp4',
 254                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 255                 'upload_date': '20140225',
 256                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 257                 'uploader': 'Tested',
 258                 'uploader_id': 'testedcom',
 259             },
 260             # No need to test YoutubeIE here
 261             'params': {
 262                 'skip_download': True,
 263             },
 264         },
 265         # funnyordie embed
 266         {
 267             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 268             'info_dict': {
 269                 'id': '18e820ec3f',
 270                 'ext': 'mp4',
 271                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 272                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
 273             },
 274         },
 275         # BBC iPlayer embeds
 276         {
 277             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 278             'info_dict': {
 279                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 280             },
 281             'playlist_mincount': 18,
 282         },
 283         # RUTV embed
 284         {
 285             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 286             'info_dict': {
 287                 'id': '776940',
 288                 'ext': 'mp4',
 289                 'title': 'Охотское море стало целиком российским',
 290                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 291             },
 292             'params': {
 293                 # m3u8 download
 294                 'skip_download': True,
 295             },
 296         },
 297         # TVC embed
 298         {
 299             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 300             'info_dict': {
 301                 'id': '55304',
 302                 'ext': 'mp4',
 303                 'title': 'Дошкольное воспитание',
 304             },
 305         },
 306         # SportBox embed
 307         {
 308             'url': 'http://www.vestifinance.ru/articles/25753',
 309             'info_dict': {
 310                 'id': '25753',
 311                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 312             },
 313             'playlist': [{
 314                 'info_dict': {
 315                     'id': '370908',
 316                     'title': 'Госзаказ. День 3',
 317                     'ext': 'mp4',
 318                 }
 319             }, {
 320                 'info_dict': {
 321                     'id': '370905',
 322                     'title': 'Госзаказ. День 2',
 323                     'ext': 'mp4',
 324                 }
 325             }, {
 326                 'info_dict': {
 327                     'id': '370902',
 328                     'title': 'Госзаказ. День 1',
 329                     'ext': 'mp4',
 330                 }
 331             }],
 332             'params': {
 333                 # m3u8 download
 334                 'skip_download': True,
 335             },
 336         },
 337         # Embedded TED video
 338         {
 339             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
 340             'md5': '65fdff94098e4a607385a60c5177c638',
 341             'info_dict': {
 342                 'id': '1969',
 343                 'ext': 'mp4',
 344                 'title': 'Hidden miracles of the natural world',
 345                 'uploader': 'Louie Schwartzberg',
 346                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
 347             }
 348         },
 349         # Embeded Ustream video
 350         {
 351             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 352             'md5': '27b99cdb639c9b12a79bca876a073417',
 353             'info_dict': {
 354                 'id': '45734260',
 355                 'ext': 'flv',
 356                 'uploader': 'AU SPA:  The NSA and Privacy',
 357                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 358             }
 359         },
 360         # nowvideo embed hidden behind percent encoding
 361         {
 362             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 363             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 364             'info_dict': {
 365                 'id': '06e53103ca9aa',
 366                 'ext': 'flv',
 367                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 368                 'description': 'No description',
 369             },
 370         },
 371         # arte embed
 372         {
 373             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 374             'md5': '7653032cbb25bf6c80d80f217055fa43',
 375             'info_dict': {
 376                 'id': '048195-004_PLUS7-F',
 377                 'ext': 'flv',
 378                 'title': 'X:enius',
 379                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 380                 'upload_date': '20140320',
 381             },
 382             'params': {
 383                 'skip_download': 'Requires rtmpdump'
 384             }
 385         },
 386         # Condé Nast embed
 387         {
 388             'url': 'http://www.wired.com/2014/04/honda-asimo/',
 389             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 390             'info_dict': {
 391                 'id': '53501be369702d3275860000',
 392                 'ext': 'mp4',
 393                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 394             }
 395         },
 396         # Dailymotion embed
 397         {
 398             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 399             'md5': '441aeeb82eb72c422c7f14ec533999cd',
 400             'info_dict': {
 401                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
 402                 'ext': 'mp4',
 403                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 404                 'uploader': 'Spi0n',
 405             },
 406             'add_ie': ['Dailymotion'],
 407         },
 408         # YouTube embed
 409         {
 410             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 411             'info_dict': {
 412                 'id': 'FXRb4ykk4S0',
 413                 'ext': 'mp4',
 414                 'title': 'The NBL Auction 2014',
 415                 'uploader': 'BADMINTON England',
 416                 'uploader_id': 'BADMINTONEvents',
 417                 'upload_date': '20140603',
 418                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 419             },
 420             'add_ie': ['Youtube'],
 421             'params': {
 422                 'skip_download': True,
 423             }
 424         },
 425         # MTVSercices embed
 426         {
 427             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 428             'md5': '35727f82f58c76d996fc188f9755b0d5',
 429             'info_dict': {
 430                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 431                 'ext': 'mp4',
 432                 'title': 'Review',
 433                 'description': 'Mario\'s life in the fast lane has never looked so good.',
 434             },
 435         },
 436         # YouTube embed via <data-embed-url="">
 437         {
 438             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 439             'info_dict': {
 440                 'id': '4vAffPZIT44',
 441                 'ext': 'mp4',
 442                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
 443                 'uploader': 'Gameloft',
 444                 'uploader_id': 'gameloft',
 445                 'upload_date': '20140828',
 446                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
 447             },
 448             'params': {
 449                 'skip_download': True,
 450             }
 451         },
 452         # Camtasia studio
 453         {
 454             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 455             'playlist': [{
 456                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 457                 'info_dict': {
 458                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 459                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 460                     'ext': 'flv',
 461                     'duration': 2235.90,
 462                 }
 463             }, {
 464                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 465                 'info_dict': {
 466                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 467                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 468                     'ext': 'flv',
 469                     'duration': 2235.93,
 470                 }
 471             }],
 472             'info_dict': {
 473                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 474             }
 475         },
 476         # Flowplayer
 477         {
 478             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 479             'md5': '9d65602bf31c6e20014319c7d07fba27',
 480             'info_dict': {
 481                 'id': '5123ea6d5e5a7',
 482                 'ext': 'mp4',
 483                 'age_limit': 18,
 484                 'uploader': 'www.handjobhub.com',
 485                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
 486             }
 487         },
 488         # Multiple brightcove videos
 489         # https://github.com/rg3/youtube-dl/issues/2283
 490         {
 491             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 492             'info_dict': {
 493                 'id': 'always-never',
 494                 'title': 'Always / Never - The New Yorker',
 495             },
 496             'playlist_count': 3,
 497             'params': {
 498                 'extract_flat': False,
 499                 'skip_download': True,
 500             }
 501         },
 502         # MLB embed
 503         {
 504             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 505             'md5': '96f09a37e44da40dd083e12d9a683327',
 506             'info_dict': {
 507                 'id': '33322633',
 508                 'ext': 'mp4',
 509                 'title': 'Ump changes call to ball',
 510                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 511                 'duration': 48,
 512                 'timestamp': 1401537900,
 513                 'upload_date': '20140531',
 514                 'thumbnail': 're:^https?://.*\.jpg$',
 515             },
 516         },
 517         # Wistia embed
 518         {
 519             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 520             'md5': '8788b683c777a5cf25621eaf286d0c23',
 521             'info_dict': {
 522                 'id': '1cfaf6b7ea',
 523                 'ext': 'mov',
 524                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 525                 'duration': 643.0,
 526                 'filesize': 182808282,
 527                 'uploader': 'education-portal.com',
 528             },
 529         },
 530         {
 531             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 532             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 533             'info_dict': {
 534                 'id': 'uxjb0lwrcz',
 535                 'ext': 'mp4',
 536                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
 537                 'duration': 1715.0,
 538                 'uploader': 'thoughtworks.wistia.com',
 539             },
 540         },
 541         # Soundcloud embed
 542         {
 543             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 544             'info_dict': {
 545                 'id': '174391317',
 546                 'ext': 'mp3',
 547                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 548                 'uploader': 'Sophos Security',
 549                 'title': 'Chet Chat 171 - Oct 29, 2014',
 550                 'upload_date': '20141029',
 551             }
 552         },
 553         # Livestream embed
 554         {
 555             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 556             'info_dict': {
 557                 'id': '67864563',
 558                 'ext': 'flv',
 559                 'upload_date': '20141112',
 560                 'title': 'Rosetta #CometLanding webcast HL 10',
 561             }
 562         },
 563         # LazyYT
 564         {
 565             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 566             'info_dict': {
 567                 'id': '1986',
 568                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 569             },
 570             'playlist_mincount': 2,
 571         },
 572         # Cinchcast embed
 573         {
 574             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 575             'info_dict': {
 576                 'id': '7141703',
 577                 'ext': 'mp3',
 578                 'upload_date': '20141126',
 579                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 580             }
 581         },
 582         # Cinerama player
 583         {
 584             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 585             'info_dict': {
 586                 'id': '730m_DandD_1901_512k',
 587                 'ext': 'mp4',
 588                 'uploader': 'www.abc.net.au',
 589                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 590             }
 591         },
 592         # embedded viddler video
 593         {
 594             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 595             'info_dict': {
 596                 'id': '4d03aad9',
 597                 'ext': 'mp4',
 598                 'uploader': 'deadspin',
 599                 'title': 'WALL-TO-GORTAT',
 600                 'timestamp': 1422285291,
 601                 'upload_date': '20150126',
 602             },
 603             'add_ie': ['Viddler'],
 604         },
 605         # Libsyn embed
 606         {
 607             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 608             'info_dict': {
 609                 'id': '3377616',
 610                 'ext': 'mp3',
 611                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 612                 'description': 'md5:601cb790edd05908957dae8aaa866465',
 613                 'upload_date': '20150220',
 614             },
 615         },
 616         # jwplayer YouTube
 617         {
 618             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 619             'info_dict': {
 620                 'id': 'Mrj4DVp2zeA',
 621                 'ext': 'mp4',
 622                 'upload_date': '20150212',
 623                 'uploader': 'The National Archives UK',
 624                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 625                 'uploader_id': 'NationalArchives08',
 626                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 627             },
 628         },
 629         # rtl.nl embed
 630         {
 631             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 632             'playlist_mincount': 5,
 633             'info_dict': {
 634                 'id': 'aanslagen-kopenhagen',
 635                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 636             }
 637         },
 638         # Zapiks embed
 639         {
 640             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 641             'info_dict': {
 642                 'id': '118046',
 643                 'ext': 'mp4',
 644                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 645             }
 646         },
 647         # Kaltura embed
 648         {
 649             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 650             'info_dict': {
 651                 'id': '1_eergr3h1',
 652                 'ext': 'mp4',
 653                 'upload_date': '20150226',
 654                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 655                 'timestamp': int,
 656                 'title': 'John Carlson Postgame 2/25/15',
 657             },
 658         },
 659         # Eagle.Platform embed (generic URL)
 660         {
 661             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 662             'info_dict': {
 663                 'id': '227304',
 664                 'ext': 'mp4',
 665                 'title': 'Навальный вышел на свободу',
 666                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 667                 'thumbnail': 're:^https?://.*\.jpg$',
 668                 'duration': 87,
 669                 'view_count': int,
 670                 'age_limit': 0,
 671             },
 672         },
 673         # ClipYou (Eagle.Platform) embed (custom URL)
 674         {
 675             'url': 'http://muz-tv.ru/play/7129/',
 676             'info_dict': {
 677                 'id': '12820',
 678                 'ext': 'mp4',
 679                 'title': "'O Sole Mio",
 680                 'thumbnail': 're:^https?://.*\.jpg$',
 681                 'duration': 216,
 682                 'view_count': int,
 683             },
 684         },
 685         # Pladform embed
 686         {
 687             'url': 'http://muz-tv.ru/kinozal/view/7400/',
 688             'info_dict': {
 689                 'id': '100183293',
 690                 'ext': 'mp4',
 691                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
 692                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 693                 'thumbnail': 're:^https?://.*\.jpg$',
 694                 'duration': 694,
 695                 'age_limit': 0,
 696             },
 697         },
 698         # Playwire embed
 699         {
 700             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 701             'info_dict': {
 702                 'id': '3519514',
 703                 'ext': 'mp4',
 704                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 705                 'thumbnail': 're:^https?://.*\.png$',
 706                 'duration': 45.115,
 707             },
 708         },
 709         # 5min embed
 710         {
 711             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 712             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 713             'info_dict': {
 714                 'id': '518726732',
 715                 'ext': 'mp4',
 716                 'title': 'Facebook Creates "On This Day" | Crunch Report',
 717             },
 718         },
 719         # SVT embed
 720         {
 721             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 722             'info_dict': {
 723                 'id': '2900353',
 724                 'ext': 'flv',
 725                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 726                 'duration': 27,
 727                 'age_limit': 0,
 728             },
 729         },
 730         # Crooks and Liars embed
 731         {
 732             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 733             'info_dict': {
 734                 'id': '8RUoRhRi',
 735                 'ext': 'mp4',
 736                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 737                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 738                 'timestamp': 1428207000,
 739                 'upload_date': '20150405',
 740                 'uploader': 'Heather',
 741             },
 742         },
 743         # Crooks and Liars external embed
 744         {
 745             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 746             'info_dict': {
 747                 'id': 'MTE3MjUtMzQ2MzA',
 748                 'ext': 'mp4',
 749                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 750                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 751                 'timestamp': 1265032391,
 752                 'upload_date': '20100201',
 753                 'uploader': 'Heather',
 754             },
 755         },
 756         # NBC Sports vplayer embed
 757         {
 758             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
 759             'info_dict': {
 760                 'id': 'ln7x1qSThw4k',
 761                 'ext': 'flv',
 762                 'title': "PFT Live: New leader in the 'new-look' defense",
 763                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
 764             },
 765         },
 766         # UDN embed
 767         {
 768             'url': 'http://www.udn.com/news/story/7314/822787',
 769             'md5': 'fd2060e988c326991037b9aff9df21a6',
 770             'info_dict': {
 771                 'id': '300346',
 772                 'ext': 'mp4',
 773                 'title': '中一中男師變性 全校師生力挺',
 774                 'thumbnail': 're:^https?://.*\.jpg$',
 775             }
 776         },
 777         # Ooyala embed
 778         {
 779             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 780             'info_dict': {
 781                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 782                 'ext': 'mp4',
 783                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
 784                 'title': 'This is what separates the Excel masters from the wannabes',
 785             },
 786             'params': {
 787                 # m3u8 downloads
 788                 'skip_download': True,
 789             }
 790         },
 791         # Contains a SMIL manifest
 792         {
 793             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 794             'info_dict': {
 795                 'id': 'file',
 796                 'ext': 'flv',
 797                 'title': '+ Football: Lottery Champions League Europe',
 798                 'uploader': 'www.telewebion.com',
 799             },
 800             'params': {
 801                 # rtmpe downloads
 802                 'skip_download': True,
 803             }
 804         },
 805         # Brightcove URL in single quotes
 806         {
 807             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
 808             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
 809             'info_dict': {
 810                 'id': '4255764656001',
 811                 'ext': 'mp4',
 812                 'title': 'SN Presents: Russell Martin, World Citizen',
 813                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
 814                 'uploader': 'Rogers Sportsnet',
 815             },
 816         }
 817     ]
 818
 819     def report_following_redirect(self, new_url):
 820         """Report information extraction."""
 821         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 822
 823     def _extract_rss(self, url, video_id, doc):
 824         playlist_title = doc.find('./channel/title').text
 825         playlist_desc_el = doc.find('./channel/description')
 826         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 827
 828         entries = []
 829         for it in doc.findall('./channel/item'):
 830             next_url = xpath_text(it, 'link', fatal=False)
 831             if not next_url:
 832                 enclosure_nodes = it.findall('./enclosure')
 833                 for e in enclosure_nodes:
 834                     next_url = e.attrib.get('url')
 835                     if next_url:
 836                         break
 837
 838             if not next_url:
 839                 continue
 840
 841             entries.append({
 842                 '_type': 'url',
 843                 'url': next_url,
 844                 'title': it.find('title').text,
 845             })
 846
 847         return {
 848             '_type': 'playlist',
 849             'id': url,
 850             'title': playlist_title,
 851             'description': playlist_desc,
 852             'entries': entries,
 853         }
 854
 855     def _extract_camtasia(self, url, video_id, webpage):
 856         """ Returns None if no camtasia video can be found. """
 857
 858         camtasia_cfg = self._search_regex(
 859             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 860             webpage, 'camtasia configuration file', default=None)
 861         if camtasia_cfg is None:
 862             return None
 863
 864         title = self._html_search_meta('DC.title', webpage, fatal=True)
 865
 866         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 867         camtasia_cfg = self._download_xml(
 868             camtasia_url, video_id,
 869             note='Downloading camtasia configuration',
 870             errnote='Failed to download camtasia configuration')
 871         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 872
 873         entries = []
 874         for n in fileset_node.getchildren():
 875             url_n = n.find('./uri')
 876             if url_n is None:
 877                 continue
 878
 879             entries.append({
 880                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 881                 'title': '%s - %s' % (title, n.tag),
 882                 'url': compat_urlparse.urljoin(url, url_n.text),
 883                 'duration': float_or_none(n.find('./duration').text),
 884             })
 885
 886         return {
 887             '_type': 'playlist',
 888             'entries': entries,
 889             'title': title,
 890         }
 891
 892     def _real_extract(self, url):
 893         if url.startswith('//'):
 894             return {
 895                 '_type': 'url',
 896                 'url': self.http_scheme() + url,
 897             }
 898
 899         parsed_url = compat_urlparse.urlparse(url)
 900         if not parsed_url.scheme:
 901             default_search = self._downloader.params.get('default_search')
 902             if default_search is None:
 903                 default_search = 'fixup_error'
 904
 905             if default_search in ('auto', 'auto_warning', 'fixup_error'):
 906                 if '/' in url:
 907                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 908                     return self.url_result('http://' + url)
 909                 elif default_search != 'fixup_error':
 910                     if default_search == 'auto_warning':
 911                         if re.match(r'^(?:url|URL)$', url):
 912                             raise ExtractorError(
 913                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 914                                 expected=True)
 915                         else:
 916                             self._downloader.report_warning(
 917                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 918                     return self.url_result('ytsearch:' + url)
 919
 920             if default_search in ('error', 'fixup_error'):
 921                 raise ExtractorError(
 922                     '%r is not a valid URL. '
 923                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 924                     % (url, url), expected=True)
 925             else:
 926                 if ':' not in default_search:
 927                     default_search += ':'
 928                 return self.url_result(default_search + url)
 929
 930         url, smuggled_data = unsmuggle_url(url)
 931         force_videoid = None
 932         is_intentional = smuggled_data and smuggled_data.get('to_generic')
 933         if smuggled_data and 'force_videoid' in smuggled_data:
 934             force_videoid = smuggled_data['force_videoid']
 935             video_id = force_videoid
 936         else:
 937             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
 938
 939         self.to_screen('%s: Requesting header' % video_id)
 940
 941         head_req = HEADRequest(url)
 942         head_response = self._request_webpage(
 943             head_req, video_id,
 944             note=False, errnote='Could not send HEAD request to %s' % url,
 945             fatal=False)
 946
 947         if head_response is not False:
 948             # Check for redirect
 949             new_url = head_response.geturl()
 950             if url != new_url:
 951                 self.report_following_redirect(new_url)
 952                 if force_videoid:
 953                     new_url = smuggle_url(
 954                         new_url, {'force_videoid': force_videoid})
 955                 return self.url_result(new_url)
 956
 957         full_response = None
 958         if head_response is False:
 959             request = compat_urllib_request.Request(url)
 960             request.add_header('Accept-Encoding', '*')
 961             full_response = self._request_webpage(request, video_id)
 962             head_response = full_response
 963
 964         # Check for direct link to a video
 965         content_type = head_response.headers.get('Content-Type', '')
 966         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
 967         if m:
 968             upload_date = unified_strdate(
 969                 head_response.headers.get('Last-Modified'))
 970             return {
 971                 'id': video_id,
 972                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
 973                 'direct': True,
 974                 'formats': [{
 975                     'format_id': m.group('format_id'),
 976                     'url': url,
 977                     'vcodec': 'none' if m.group('type') == 'audio' else None
 978                 }],
 979                 'upload_date': upload_date,
 980             }
 981
 982         if not self._downloader.params.get('test', False) and not is_intentional:
 983             self._downloader.report_warning('Falling back on generic information extractor.')
 984
 985         if not full_response:
 986             request = compat_urllib_request.Request(url)
 987             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
 988             # making it impossible to download only chunk of the file (yet we need only 512kB to
 989             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
 990             # that will always result in downloading the whole file that is not desirable.
 991             # Therefore for extraction pass we have to override Accept-Encoding to any in order
 992             # to accept raw bytes and being able to download only a chunk.
 993             # It may probably better to solve this by checking Content-Type for application/octet-stream
 994             # after HEAD request finishes, but not sure if we can rely on this.
 995             request.add_header('Accept-Encoding', '*')
 996             full_response = self._request_webpage(request, video_id)
 997
 998         # Maybe it's a direct link to a video?
 999         # Be careful not to download the whole thing!
1000         first_bytes = full_response.read(512)
1001         if not is_html(first_bytes):
1002             self._downloader.report_warning(
1003                 'URL could be a direct video link, returning it as such.')
1004             upload_date = unified_strdate(
1005                 head_response.headers.get('Last-Modified'))
1006             return {
1007                 'id': video_id,
1008                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1009                 'direct': True,
1010                 'url': url,
1011                 'upload_date': upload_date,
1012             }
1013
1014         webpage = self._webpage_read_content(
1015             full_response, url, video_id, prefix=first_bytes)
1016
1017         self.report_extraction(video_id)
1018
1019         # Is it an RSS feed?
1020         try:
1021             doc = parse_xml(webpage)
1022             if doc.tag == 'rss':
1023                 return self._extract_rss(url, video_id, doc)
1024         except compat_xml_parse_error:
1025             pass
1026
1027         # Is it a Camtasia project?
1028         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1029         if camtasia_res is not None:
1030             return camtasia_res
1031
1032         # Sometimes embedded video player is hidden behind percent encoding
1033         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1034         # Unescaping the whole page allows to handle those cases in a generic way
1035         webpage = compat_urllib_parse.unquote(webpage)
1036
1037         # it's tempting to parse this further, but you would
1038         # have to take into account all the variations like
1039         #   Video Title - Site Name
1040         #   Site Name | Video Title
1041         #   Video Title - Tagline | Site Name
1042         # and so on and so forth; it's just not practical
1043         video_title = self._html_search_regex(
1044             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1045             default='video')
1046
1047         # Try to detect age limit automatically
1048         age_limit = self._rta_search(webpage)
1049         # And then there are the jokers who advertise that they use RTA,
1050         # but actually don't.
1051         AGE_LIMIT_MARKERS = [
1052             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1053         ]
1054         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1055             age_limit = 18
1056
1057         # video uploader is domain name
1058         video_uploader = self._search_regex(
1059             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1060
1061         # Helper method
1062         def _playlist_from_matches(matches, getter=None, ie=None):
1063             urlrs = orderedSet(
1064                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1065                 for m in matches)
1066             return self.playlist_result(
1067                 urlrs, playlist_id=video_id, playlist_title=video_title)
1068
1069         # Look for BrightCove:
1070         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1071         if bc_urls:
1072             self.to_screen('Brightcove video detected.')
1073             entries = [{
1074                 '_type': 'url',
1075                 'url': smuggle_url(bc_url, {'Referer': url}),
1076                 'ie_key': 'Brightcove'
1077             } for bc_url in bc_urls]
1078
1079             return {
1080                 '_type': 'playlist',
1081                 'title': video_title,
1082                 'id': video_id,
1083                 'entries': entries,
1084             }
1085
1086         # Look for embedded rtl.nl player
1087         matches = re.findall(
1088             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1089             webpage)
1090         if matches:
1091             return _playlist_from_matches(matches, ie='RtlNl')
1092
1093         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1094         if vimeo_url is not None:
1095             return self.url_result(vimeo_url)
1096
1097         # Look for embedded YouTube player
1098         matches = re.findall(r'''(?x)
1099             (?:
1100                 <iframe[^>]+?src=|
1101                 data-video-url=|
1102                 <embed[^>]+?src=|
1103                 embedSWF\(?:\s*|
1104                 new\s+SWFObject\(
1105             )
1106             (["\'])
1107                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1108                 (?:embed|v|p)/.+?)
1109             \1''', webpage)
1110         if matches:
1111             return _playlist_from_matches(
1112                 matches, lambda m: unescapeHTML(m[1]))
1113
1114         # Look for lazyYT YouTube embed
1115         matches = re.findall(
1116             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1117         if matches:
1118             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1119
1120         # Look for embedded Dailymotion player
1121         matches = re.findall(
1122             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1123         if matches:
1124             return _playlist_from_matches(
1125                 matches, lambda m: unescapeHTML(m[1]))
1126
1127         # Look for embedded Dailymotion playlist player (#3822)
1128         m = re.search(
1129             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1130         if m:
1131             playlists = re.findall(
1132                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1133             if playlists:
1134                 return _playlist_from_matches(
1135                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1136
1137         # Look for embedded Wistia player
1138         match = re.search(
1139             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1140         if match:
1141             embed_url = self._proto_relative_url(
1142                 unescapeHTML(match.group('url')))
1143             return {
1144                 '_type': 'url_transparent',
1145                 'url': embed_url,
1146                 'ie_key': 'Wistia',
1147                 'uploader': video_uploader,
1148                 'title': video_title,
1149                 'id': video_id,
1150             }
1151
1152         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1153         if match:
1154             return {
1155                 '_type': 'url_transparent',
1156                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1157                 'ie_key': 'Wistia',
1158                 'uploader': video_uploader,
1159                 'title': video_title,
1160                 'id': match.group('id')
1161             }
1162
1163         # Look for embedded blip.tv player
1164         bliptv_url = BlipTVIE._extract_url(webpage)
1165         if bliptv_url:
1166             return self.url_result(bliptv_url, 'BlipTV')
1167
1168         # Look for SVT player
1169         svt_url = SVTIE._extract_url(webpage)
1170         if svt_url:
1171             return self.url_result(svt_url, 'SVT')
1172
1173         # Look for embedded condenast player
1174         matches = re.findall(
1175             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1176             webpage)
1177         if matches:
1178             return {
1179                 '_type': 'playlist',
1180                 'entries': [{
1181                     '_type': 'url',
1182                     'ie_key': 'CondeNast',
1183                     'url': ma,
1184                 } for ma in matches],
1185                 'title': video_title,
1186                 'id': video_id,
1187             }
1188
1189         # Look for Bandcamp pages with custom domain
1190         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1191         if mobj is not None:
1192             burl = unescapeHTML(mobj.group(1))
1193             # Don't set the extractor because it can be a track url or an album
1194             return self.url_result(burl)
1195
1196         # Look for embedded Vevo player
1197         mobj = re.search(
1198             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1199         if mobj is not None:
1200             return self.url_result(mobj.group('url'))
1201
1202         # Look for embedded Viddler player
1203         mobj = re.search(
1204             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1205             webpage)
1206         if mobj is not None:
1207             return self.url_result(mobj.group('url'))
1208
1209         # Look for NYTimes player
1210         mobj = re.search(
1211             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1212             webpage)
1213         if mobj is not None:
1214             return self.url_result(mobj.group('url'))
1215
1216         # Look for Libsyn player
1217         mobj = re.search(
1218             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1219         if mobj is not None:
1220             return self.url_result(mobj.group('url'))
1221
1222         # Look for Ooyala videos
1223         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1224                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1225                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1226                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1227         if mobj is not None:
1228             return OoyalaIE._build_url_result(mobj.group('ec'))
1229
1230         # Look for multiple Ooyala embeds on SBN network websites
1231         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1232         if mobj is not None:
1233             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1234             if embeds:
1235                 return _playlist_from_matches(
1236                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1237
1238         # Look for Aparat videos
1239         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1240         if mobj is not None:
1241             return self.url_result(mobj.group(1), 'Aparat')
1242
1243         # Look for MPORA videos
1244         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1245         if mobj is not None:
1246             return self.url_result(mobj.group(1), 'Mpora')
1247
1248         # Look for embedded NovaMov-based player
1249         mobj = re.search(
1250             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1251                     (?P<url>http://(?:(?:embed|www)\.)?
1252                         (?:novamov\.com|
1253                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1254                            videoweed\.(?:es|com)|
1255                            movshare\.(?:net|sx|ag)|
1256                            divxstage\.(?:eu|net|ch|co|at|ag))
1257                         /embed\.php.+?)\1''', webpage)
1258         if mobj is not None:
1259             return self.url_result(mobj.group('url'))
1260
1261         # Look for embedded Facebook player
1262         mobj = re.search(
1263             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1264         if mobj is not None:
1265             return self.url_result(mobj.group('url'), 'Facebook')
1266
1267         # Look for embedded VK player
1268         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1269         if mobj is not None:
1270             return self.url_result(mobj.group('url'), 'VK')
1271
1272         # Look for embedded ivi player
1273         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1274         if mobj is not None:
1275             return self.url_result(mobj.group('url'), 'Ivi')
1276
1277         # Look for embedded Huffington Post player
1278         mobj = re.search(
1279             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1280         if mobj is not None:
1281             return self.url_result(mobj.group('url'), 'HuffPost')
1282
1283         # Look for embed.ly
1284         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1285         if mobj is not None:
1286             return self.url_result(mobj.group('url'))
1287         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1288         if mobj is not None:
1289             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1290
1291         # Look for funnyordie embed
1292         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1293         if matches:
1294             return _playlist_from_matches(
1295                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1296
1297         # Look for BBC iPlayer embed
1298         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1299         if matches:
1300             return _playlist_from_matches(matches, ie='BBCCoUk')
1301
1302         # Look for embedded RUTV player
1303         rutv_url = RUTVIE._extract_url(webpage)
1304         if rutv_url:
1305             return self.url_result(rutv_url, 'RUTV')
1306
1307         # Look for embedded TVC player
1308         tvc_url = TVCIE._extract_url(webpage)
1309         if tvc_url:
1310             return self.url_result(tvc_url, 'TVC')
1311
1312         # Look for embedded SportBox player
1313         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1314         if sportbox_urls:
1315             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1316
1317         # Look for embedded PornHub player
1318         pornhub_url = PornHubIE._extract_url(webpage)
1319         if pornhub_url:
1320             return self.url_result(pornhub_url, 'PornHub')
1321
1322         # Look for embedded Tvigle player
1323         mobj = re.search(
1324             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1325         if mobj is not None:
1326             return self.url_result(mobj.group('url'), 'Tvigle')
1327
1328         # Look for embedded TED player
1329         mobj = re.search(
1330             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1331         if mobj is not None:
1332             return self.url_result(mobj.group('url'), 'TED')
1333
1334         # Look for embedded Ustream videos
1335         mobj = re.search(
1336             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1337         if mobj is not None:
1338             return self.url_result(mobj.group('url'), 'Ustream')
1339
1340         # Look for embedded arte.tv player
1341         mobj = re.search(
1342             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1343             webpage)
1344         if mobj is not None:
1345             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1346
1347         # Look for embedded smotri.com player
1348         smotri_url = SmotriIE._extract_url(webpage)
1349         if smotri_url:
1350             return self.url_result(smotri_url, 'Smotri')
1351
1352         # Look for embeded soundcloud player
1353         mobj = re.search(
1354             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1355             webpage)
1356         if mobj is not None:
1357             url = unescapeHTML(mobj.group('url'))
1358             return self.url_result(url)
1359
1360         # Look for embedded vulture.com player
1361         mobj = re.search(
1362             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1363             webpage)
1364         if mobj is not None:
1365             url = unescapeHTML(mobj.group('url'))
1366             return self.url_result(url, ie='Vulture')
1367
1368         # Look for embedded mtvservices player
1369         mobj = re.search(
1370             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1371             webpage)
1372         if mobj is not None:
1373             url = unescapeHTML(mobj.group('url'))
1374             return self.url_result(url, ie='MTVServicesEmbedded')
1375
1376         # Look for embedded yahoo player
1377         mobj = re.search(
1378             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1379             webpage)
1380         if mobj is not None:
1381             return self.url_result(mobj.group('url'), 'Yahoo')
1382
1383         # Look for embedded sbs.com.au player
1384         mobj = re.search(
1385             r'''(?x)
1386             (?:
1387                 <meta\s+property="og:video"\s+content=|
1388                 <iframe[^>]+?src=
1389             )
1390             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1391             webpage)
1392         if mobj is not None:
1393             return self.url_result(mobj.group('url'), 'SBS')
1394
1395         # Look for embedded Cinchcast player
1396         mobj = re.search(
1397             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1398             webpage)
1399         if mobj is not None:
1400             return self.url_result(mobj.group('url'), 'Cinchcast')
1401
1402         mobj = re.search(
1403             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1404             webpage)
1405         if not mobj:
1406             mobj = re.search(
1407                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1408                 webpage)
1409         if mobj is not None:
1410             return self.url_result(mobj.group('url'), 'MLB')
1411
1412         mobj = re.search(
1413             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1414             webpage)
1415         if mobj is not None:
1416             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1417
1418         mobj = re.search(
1419             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1420             webpage)
1421         if mobj is not None:
1422             return self.url_result(mobj.group('url'), 'Livestream')
1423
1424         # Look for Zapiks embed
1425         mobj = re.search(
1426             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1427         if mobj is not None:
1428             return self.url_result(mobj.group('url'), 'Zapiks')
1429
1430         # Look for Kaltura embeds
1431         mobj = re.search(
1432             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1433         if mobj is not None:
1434             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1435
1436         # Look for Eagle.Platform embeds
1437         mobj = re.search(
1438             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1439         if mobj is not None:
1440             return self.url_result(mobj.group('url'), 'EaglePlatform')
1441
1442         # Look for ClipYou (uses Eagle.Platform) embeds
1443         mobj = re.search(
1444             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1445         if mobj is not None:
1446             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1447
1448         # Look for Pladform embeds
1449         mobj = re.search(
1450             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1451         if mobj is not None:
1452             return self.url_result(mobj.group('url'), 'Pladform')
1453
1454         # Look for Playwire embeds
1455         mobj = re.search(
1456             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1457         if mobj is not None:
1458             return self.url_result(mobj.group('url'))
1459
1460         # Look for 5min embeds
1461         mobj = re.search(
1462             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1463         if mobj is not None:
1464             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1465
1466         # Look for Crooks and Liars embeds
1467         mobj = re.search(
1468             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1469         if mobj is not None:
1470             return self.url_result(mobj.group('url'))
1471
1472         # Look for NBC Sports VPlayer embeds
1473         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1474         if nbc_sports_url:
1475             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1476
1477         # Look for UDN embeds
1478         mobj = re.search(
1479             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1480         if mobj is not None:
1481             return self.url_result(
1482                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1483
1484         # Look for Senate ISVP iframe
1485         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1486         if senate_isvp_url:
1487             return self.url_result(senate_isvp_url, 'SenateISVP')
1488
1489         def check_video(vurl):
1490             if YoutubeIE.suitable(vurl):
1491                 return True
1492             vpath = compat_urlparse.urlparse(vurl).path
1493             vext = determine_ext(vpath)
1494             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1495
1496         def filter_video(urls):
1497             return list(filter(check_video, urls))
1498
1499         # Start with something easy: JW Player in SWFObject
1500         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1501         if not found:
1502             # Look for gorilla-vid style embedding
1503             found = filter_video(re.findall(r'''(?sx)
1504                 (?:
1505                     jw_plugins|
1506                     JWPlayerOptions|
1507                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1508                 )
1509                 .*?
1510                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1511         if not found:
1512             # Broaden the search a little bit
1513             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1514         if not found:
1515             # Broaden the findall a little bit: JWPlayer JS loader
1516             found = filter_video(re.findall(
1517                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1518         if not found:
1519             # Flow player
1520             found = filter_video(re.findall(r'''(?xs)
1521                 flowplayer\("[^"]+",\s*
1522                     \{[^}]+?\}\s*,
1523                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1524                         ["']?url["']?\s*:\s*["']([^"']+)["']
1525             ''', webpage))
1526         if not found:
1527             # Cinerama player
1528             found = re.findall(
1529                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1530         if not found:
1531             # Try to find twitter cards info
1532             found = filter_video(re.findall(
1533                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1534         if not found:
1535             # We look for Open Graph info:
1536             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1537             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1538             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1539             if m_video_type is not None:
1540                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1541         if not found:
1542             # HTML5 video
1543             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1544         if not found:
1545             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1546             found = re.search(
1547                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1548                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1549                 webpage)
1550             if not found:
1551                 # Look also in Refresh HTTP header
1552                 refresh_header = head_response.headers.get('Refresh')
1553                 if refresh_header:
1554                     found = re.search(REDIRECT_REGEX, refresh_header)
1555             if found:
1556                 new_url = compat_urlparse.urljoin(url, found.group(1))
1557                 self.report_following_redirect(new_url)
1558                 return {
1559                     '_type': 'url',
1560                     'url': new_url,
1561                 }
1562         if not found:
1563             raise UnsupportedError(url)
1564
1565         entries = []
1566         for video_url in found:
1567             video_url = compat_urlparse.urljoin(url, video_url)
1568             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1569
1570             # Sometimes, jwplayer extraction will result in a YouTube URL
1571             if YoutubeIE.suitable(video_url):
1572                 entries.append(self.url_result(video_url, 'Youtube'))
1573                 continue
1574
1575             # here's a fun little line of code for you:
1576             video_id = os.path.splitext(video_id)[0]
1577
1578             if determine_ext(video_url) == 'smil':
1579                 entries.append({
1580                     'id': video_id,
1581                     'formats': self._extract_smil_formats(video_url, video_id),
1582                     'uploader': video_uploader,
1583                     'title': video_title,
1584                     'age_limit': age_limit,
1585                 })
1586             else:
1587                 entries.append({
1588                     'id': video_id,
1589                     'url': video_url,
1590                     'uploader': video_uploader,
1591                     'title': video_title,
1592                     'age_limit': age_limit,
1593                 })
1594
1595         if len(entries) == 1:
1596             return entries[0]
1597         else:
1598             for num, e in enumerate(entries, start=1):
1599                 # 'url' results don't have a title
1600                 if e.get('title') is not None:
1601                     e['title'] = '%s (%d)' % (e['title'], num)
1602             return {
1603                 '_type': 'playlist',
1604                 'entries': entries,
1605             }