[generic] remove sbnation test(handled by VoxMediaIE)
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7 import sys
8
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12     compat_etree_fromstring,
13     compat_urllib_parse_unquote,
14     compat_urlparse,
15     compat_xml_parse_error,
16 )
17 from ..utils import (
18     determine_ext,
19     ExtractorError,
20     float_or_none,
21     HEADRequest,
22     is_html,
23     orderedSet,
24     sanitized_Request,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import (
34     BrightcoveLegacyIE,
35     BrightcoveNewIE,
36 )
37 from .nbc import NBCSportsVPlayerIE
38 from .ooyala import OoyalaIE
39 from .rutv import RUTVIE
40 from .tvc import TVCIE
41 from .sportbox import SportBoxEmbedIE
42 from .smotri import SmotriIE
43 from .myvi import MyviIE
44 from .condenast import CondeNastIE
45 from .udn import UDNEmbedIE
46 from .senateisvp import SenateISVPIE
47 from .svt import SVTIE
48 from .pornhub import PornHubIE
49 from .xhamster import XHamsterEmbedIE
50 from .tnaflix import TNAFlixNetworkEmbedIE
51 from .vimeo import VimeoIE
52 from .dailymotion import DailymotionCloudIE
53 from .onionstudios import OnionStudiosIE
54 from .snagfilms import SnagFilmsEmbedIE
55 from .screenwavemedia import ScreenwaveMediaIE
56 from .mtv import MTVServicesEmbeddedIE
57 from .pladform import PladformIE
58 from .videomore import VideomoreIE
59 from .googledrive import GoogleDriveIE
60 from .jwplatform import JWPlatformIE
61 from .digiteka import DigitekaIE
62 from .instagram import InstagramIE
63
64
65 class GenericIE(InfoExtractor):
66     IE_DESC = 'Generic downloader that works on some sites'
67     _VALID_URL = r'.*'
68     IE_NAME = 'generic'
69     _TESTS = [
70         # Direct link to a video
71         {
72             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
73             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
74             'info_dict': {
75                 'id': 'trailer',
76                 'ext': 'mp4',
77                 'title': 'trailer',
78                 'upload_date': '20100513',
79             }
80         },
81         # Direct link to media delivered compressed (until Accept-Encoding is *)
82         {
83             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
84             'md5': '128c42e68b13950268b648275386fc74',
85             'info_dict': {
86                 'id': 'FictionJunction-Parallel_Hearts',
87                 'ext': 'flac',
88                 'title': 'FictionJunction-Parallel_Hearts',
89                 'upload_date': '20140522',
90             },
91             'expected_warnings': [
92                 'URL could be a direct video link, returning it as such.'
93             ]
94         },
95         # Direct download with broken HEAD
96         {
97             'url': 'http://ai-radio.org:8000/radio.opus',
98             'info_dict': {
99                 'id': 'radio',
100                 'ext': 'opus',
101                 'title': 'radio',
102             },
103             'params': {
104                 'skip_download': True,  # infinite live stream
105             },
106             'expected_warnings': [
107                 r'501.*Not Implemented'
108             ],
109         },
110         # Direct link with incorrect MIME type
111         {
112             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
113             'md5': '4ccbebe5f36706d85221f204d7eb5913',
114             'info_dict': {
115                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
116                 'id': '5_Lennart_Poettering_-_Systemd',
117                 'ext': 'webm',
118                 'title': '5_Lennart_Poettering_-_Systemd',
119                 'upload_date': '20141120',
120             },
121             'expected_warnings': [
122                 'URL could be a direct video link, returning it as such.'
123             ]
124         },
125         # RSS feed
126         {
127             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
128             'info_dict': {
129                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
130                 'title': 'Zero Punctuation',
131                 'description': 're:.*groundbreaking video review series.*'
132             },
133             'playlist_mincount': 11,
134         },
135         # RSS feed with enclosure
136         {
137             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
138             'info_dict': {
139                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
140                 'ext': 'm4v',
141                 'upload_date': '20150228',
142                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
143             }
144         },
145         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
146         {
147             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
148             'info_dict': {
149                 'id': 'smil',
150                 'ext': 'mp4',
151                 'title': 'Automatics, robotics and biocybernetics',
152                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
153                 'upload_date': '20130627',
154                 'formats': 'mincount:16',
155                 'subtitles': 'mincount:1',
156             },
157             'params': {
158                 'force_generic_extractor': True,
159                 'skip_download': True,
160             },
161         },
162         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
163         {
164             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
165             'info_dict': {
166                 'id': 'hds',
167                 'ext': 'flv',
168                 'title': 'hds',
169                 'formats': 'mincount:1',
170             },
171             'params': {
172                 'skip_download': True,
173             },
174         },
175         # SMIL from https://www.restudy.dk/video/play/id/1637
176         {
177             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
178             'info_dict': {
179                 'id': 'video_1637',
180                 'ext': 'flv',
181                 'title': 'video_1637',
182                 'formats': 'mincount:3',
183             },
184             'params': {
185                 'skip_download': True,
186             },
187         },
188         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
189         {
190             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
191             'info_dict': {
192                 'id': 'smil-service',
193                 'ext': 'flv',
194                 'title': 'smil-service',
195                 'formats': 'mincount:1',
196             },
197             'params': {
198                 'skip_download': True,
199             },
200         },
201         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
202         {
203             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
204             'info_dict': {
205                 'id': '4719370',
206                 'ext': 'mp4',
207                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
208                 'formats': 'mincount:3',
209             },
210             'params': {
211                 'skip_download': True,
212             },
213         },
214         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
215         {
216             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
217             'info_dict': {
218                 'id': 'mZlp2ctYIUEB',
219                 'ext': 'mp4',
220                 'title': 'Tikibad ontruimd wegens brand',
221                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
222                 'thumbnail': 're:^https?://.*\.jpg$',
223                 'duration': 33,
224             },
225             'params': {
226                 'skip_download': True,
227             },
228         },
229         # MPD from http://dash-mse-test.appspot.com/media.html
230         {
231             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
232             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
233             'info_dict': {
234                 'id': 'car-20120827-manifest',
235                 'ext': 'mp4',
236                 'title': 'car-20120827-manifest',
237                 'formats': 'mincount:9',
238             },
239             'params': {
240                 'format': 'bestvideo',
241             },
242         },
243         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
244         {
245             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
246             'info_dict': {
247                 'id': 'content',
248                 'ext': 'mp4',
249                 'title': 'content',
250                 'formats': 'mincount:8',
251             },
252             'params': {
253                 # m3u8 downloads
254                 'skip_download': True,
255             }
256         },
257         # m3u8 served with Content-Type: text/plain
258         {
259             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
260             'info_dict': {
261                 'id': 'index',
262                 'ext': 'mp4',
263                 'title': 'index',
264                 'upload_date': '20140720',
265                 'formats': 'mincount:11',
266             },
267             'params': {
268                 # m3u8 downloads
269                 'skip_download': True,
270             }
271         },
272         # google redirect
273         {
274             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
275             'info_dict': {
276                 'id': 'cmQHVoWB5FY',
277                 'ext': 'mp4',
278                 'upload_date': '20130224',
279                 'uploader_id': 'TheVerge',
280                 'description': 're:^Chris Ziegler takes a look at the\.*',
281                 'uploader': 'The Verge',
282                 'title': 'First Firefox OS phones side-by-side',
283             },
284             'params': {
285                 'skip_download': False,
286             }
287         },
288         {
289             # redirect in Refresh HTTP header
290             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
291             'info_dict': {
292                 'id': 'pO8h3EaFRdo',
293                 'ext': 'mp4',
294                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
295                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
296                 'upload_date': '20150917',
297                 'uploader_id': 'brtvofficial',
298                 'uploader': 'Boiler Room',
299             },
300             'params': {
301                 'skip_download': False,
302             },
303         },
304         {
305             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
306             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
307             'info_dict': {
308                 'id': '13601338388002',
309                 'ext': 'mp4',
310                 'uploader': 'www.hodiho.fr',
311                 'title': 'R\u00e9gis plante sa Jeep',
312             }
313         },
314         # bandcamp page with custom domain
315         {
316             'add_ie': ['Bandcamp'],
317             'url': 'http://bronyrock.com/track/the-pony-mash',
318             'info_dict': {
319                 'id': '3235767654',
320                 'ext': 'mp3',
321                 'title': 'The Pony Mash',
322                 'uploader': 'M_Pallante',
323             },
324             'skip': 'There is a limit of 200 free downloads / month for the test song',
325         },
326         # embedded brightcove video
327         # it also tests brightcove videos that need to set the 'Referer' in the
328         # http requests
329         {
330             'add_ie': ['BrightcoveLegacy'],
331             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
332             'info_dict': {
333                 'id': '2765128793001',
334                 'ext': 'mp4',
335                 'title': 'Le cours de bourse : l’analyse technique',
336                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
337                 'uploader': 'BFM BUSINESS',
338             },
339             'params': {
340                 'skip_download': True,
341             },
342         },
343         {
344             # https://github.com/rg3/youtube-dl/issues/2253
345             'url': 'http://bcove.me/i6nfkrc3',
346             'md5': '0ba9446db037002366bab3b3eb30c88c',
347             'info_dict': {
348                 'id': '3101154703001',
349                 'ext': 'mp4',
350                 'title': 'Still no power',
351                 'uploader': 'thestar.com',
352                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
353             },
354             'add_ie': ['BrightcoveLegacy'],
355         },
356         {
357             'url': 'http://www.championat.com/video/football/v/87/87499.html',
358             'md5': 'fb973ecf6e4a78a67453647444222983',
359             'info_dict': {
360                 'id': '3414141473001',
361                 'ext': 'mp4',
362                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
363                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
364                 'uploader': 'Championat',
365             },
366         },
367         {
368             # https://github.com/rg3/youtube-dl/issues/3541
369             'add_ie': ['BrightcoveLegacy'],
370             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
371             'info_dict': {
372                 'id': '3866516442001',
373                 'ext': 'mp4',
374                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
375                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
376                 'uploader': 'SBS Broadcasting',
377             },
378             'skip': 'Restricted to Netherlands',
379             'params': {
380                 'skip_download': True,  # m3u8 download
381             },
382         },
383         # ooyala video
384         {
385             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
386             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
387             'info_dict': {
388                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
389                 'ext': 'mp4',
390                 'title': '2cc213299525360.mov',  # that's what we get
391                 'duration': 238.231,
392             },
393             'add_ie': ['Ooyala'],
394         },
395         {
396             # ooyala video embedded with http://player.ooyala.com/iframe.js
397             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
398             'info_dict': {
399                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
400                 'ext': 'mp4',
401                 'title': '"Steve Jobs: Man in the Machine" trailer',
402                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
403                 'duration': 135.427,
404             },
405             'params': {
406                 'skip_download': True,
407             },
408         },
409         # embed.ly video
410         {
411             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
412             'info_dict': {
413                 'id': '9ODmcdjQcHQ',
414                 'ext': 'mp4',
415                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
416                 'upload_date': '20140225',
417                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
418                 'uploader': 'Tested',
419                 'uploader_id': 'testedcom',
420             },
421             # No need to test YoutubeIE here
422             'params': {
423                 'skip_download': True,
424             },
425         },
426         # funnyordie embed
427         {
428             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
429             'info_dict': {
430                 'id': '18e820ec3f',
431                 'ext': 'mp4',
432                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
433                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
434             },
435         },
436         # RUTV embed
437         {
438             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
439             'info_dict': {
440                 'id': '776940',
441                 'ext': 'mp4',
442                 'title': 'Охотское море стало целиком российским',
443                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
444             },
445             'params': {
446                 # m3u8 download
447                 'skip_download': True,
448             },
449         },
450         # TVC embed
451         {
452             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
453             'info_dict': {
454                 'id': '55304',
455                 'ext': 'mp4',
456                 'title': 'Дошкольное воспитание',
457             },
458         },
459         # SportBox embed
460         {
461             'url': 'http://www.vestifinance.ru/articles/25753',
462             'info_dict': {
463                 'id': '25753',
464                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
465             },
466             'playlist': [{
467                 'info_dict': {
468                     'id': '370908',
469                     'title': 'Госзаказ. День 3',
470                     'ext': 'mp4',
471                 }
472             }, {
473                 'info_dict': {
474                     'id': '370905',
475                     'title': 'Госзаказ. День 2',
476                     'ext': 'mp4',
477                 }
478             }, {
479                 'info_dict': {
480                     'id': '370902',
481                     'title': 'Госзаказ. День 1',
482                     'ext': 'mp4',
483                 }
484             }],
485             'params': {
486                 # m3u8 download
487                 'skip_download': True,
488             },
489         },
490         # Myvi.ru embed
491         {
492             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
493             'info_dict': {
494                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
495                 'ext': 'mp4',
496                 'title': 'Ужастики, русский трейлер (2015)',
497                 'thumbnail': 're:^https?://.*\.jpg$',
498                 'duration': 153,
499             }
500         },
501         # XHamster embed
502         {
503             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
504             'info_dict': {
505                 'id': 'showthread',
506                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
507             },
508             'playlist_mincount': 7,
509         },
510         # Embedded TED video
511         {
512             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
513             'md5': '65fdff94098e4a607385a60c5177c638',
514             'info_dict': {
515                 'id': '1969',
516                 'ext': 'mp4',
517                 'title': 'Hidden miracles of the natural world',
518                 'uploader': 'Louie Schwartzberg',
519                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
520             }
521         },
522         # Embedded Ustream video
523         {
524             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
525             'md5': '27b99cdb639c9b12a79bca876a073417',
526             'info_dict': {
527                 'id': '45734260',
528                 'ext': 'flv',
529                 'uploader': 'AU SPA:  The NSA and Privacy',
530                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
531             }
532         },
533         # nowvideo embed hidden behind percent encoding
534         {
535             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
536             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
537             'info_dict': {
538                 'id': '06e53103ca9aa',
539                 'ext': 'flv',
540                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
541                 'description': 'No description',
542             },
543         },
544         # arte embed
545         {
546             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
547             'md5': '7653032cbb25bf6c80d80f217055fa43',
548             'info_dict': {
549                 'id': '048195-004_PLUS7-F',
550                 'ext': 'flv',
551                 'title': 'X:enius',
552                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
553                 'upload_date': '20140320',
554             },
555             'params': {
556                 'skip_download': 'Requires rtmpdump'
557             }
558         },
559         # francetv embed
560         {
561             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
562             'info_dict': {
563                 'id': 'EV_30231',
564                 'ext': 'mp4',
565                 'title': 'Alcaline, le concert avec Calogero',
566                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
567                 'upload_date': '20150226',
568                 'timestamp': 1424989860,
569                 'duration': 5400,
570             },
571             'params': {
572                 # m3u8 downloads
573                 'skip_download': True,
574             },
575             'expected_warnings': [
576                 'Forbidden'
577             ]
578         },
579         # Condé Nast embed
580         {
581             'url': 'http://www.wired.com/2014/04/honda-asimo/',
582             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
583             'info_dict': {
584                 'id': '53501be369702d3275860000',
585                 'ext': 'mp4',
586                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
587             }
588         },
589         # Dailymotion embed
590         {
591             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
592             'md5': '441aeeb82eb72c422c7f14ec533999cd',
593             'info_dict': {
594                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
595                 'ext': 'mp4',
596                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
597                 'uploader': 'Spi0n',
598             },
599             'add_ie': ['Dailymotion'],
600         },
601         # YouTube embed
602         {
603             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
604             'info_dict': {
605                 'id': 'FXRb4ykk4S0',
606                 'ext': 'mp4',
607                 'title': 'The NBL Auction 2014',
608                 'uploader': 'BADMINTON England',
609                 'uploader_id': 'BADMINTONEvents',
610                 'upload_date': '20140603',
611                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
612             },
613             'add_ie': ['Youtube'],
614             'params': {
615                 'skip_download': True,
616             }
617         },
618         # MTVSercices embed
619         {
620             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
621             'md5': '35727f82f58c76d996fc188f9755b0d5',
622             'info_dict': {
623                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
624                 'ext': 'mp4',
625                 'title': 'Review',
626                 'description': 'Mario\'s life in the fast lane has never looked so good.',
627             },
628         },
629         # YouTube embed via <data-embed-url="">
630         {
631             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
632             'info_dict': {
633                 'id': '4vAffPZIT44',
634                 'ext': 'mp4',
635                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
636                 'uploader': 'Gameloft',
637                 'uploader_id': 'gameloft',
638                 'upload_date': '20140828',
639                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
640             },
641             'params': {
642                 'skip_download': True,
643             }
644         },
645         # Camtasia studio
646         {
647             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
648             'playlist': [{
649                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
650                 'info_dict': {
651                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
652                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
653                     'ext': 'flv',
654                     'duration': 2235.90,
655                 }
656             }, {
657                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
658                 'info_dict': {
659                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
660                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
661                     'ext': 'flv',
662                     'duration': 2235.93,
663                 }
664             }],
665             'info_dict': {
666                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
667             }
668         },
669         # Flowplayer
670         {
671             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
672             'md5': '9d65602bf31c6e20014319c7d07fba27',
673             'info_dict': {
674                 'id': '5123ea6d5e5a7',
675                 'ext': 'mp4',
676                 'age_limit': 18,
677                 'uploader': 'www.handjobhub.com',
678                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
679             }
680         },
681         # Multiple brightcove videos
682         # https://github.com/rg3/youtube-dl/issues/2283
683         {
684             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
685             'info_dict': {
686                 'id': 'always-never',
687                 'title': 'Always / Never - The New Yorker',
688             },
689             'playlist_count': 3,
690             'params': {
691                 'extract_flat': False,
692                 'skip_download': True,
693             }
694         },
695         # MLB embed
696         {
697             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
698             'md5': '96f09a37e44da40dd083e12d9a683327',
699             'info_dict': {
700                 'id': '33322633',
701                 'ext': 'mp4',
702                 'title': 'Ump changes call to ball',
703                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
704                 'duration': 48,
705                 'timestamp': 1401537900,
706                 'upload_date': '20140531',
707                 'thumbnail': 're:^https?://.*\.jpg$',
708             },
709         },
710         # Wistia embed
711         {
712             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
713             'md5': '8788b683c777a5cf25621eaf286d0c23',
714             'info_dict': {
715                 'id': '1cfaf6b7ea',
716                 'ext': 'mov',
717                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
718                 'duration': 643.0,
719                 'filesize': 182808282,
720                 'uploader': 'education-portal.com',
721             },
722         },
723         {
724             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
725             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
726             'info_dict': {
727                 'id': 'uxjb0lwrcz',
728                 'ext': 'mp4',
729                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
730                 'duration': 1715.0,
731                 'uploader': 'thoughtworks.wistia.com',
732             },
733         },
734         # Soundcloud embed
735         {
736             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
737             'info_dict': {
738                 'id': '174391317',
739                 'ext': 'mp3',
740                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
741                 'uploader': 'Sophos Security',
742                 'title': 'Chet Chat 171 - Oct 29, 2014',
743                 'upload_date': '20141029',
744             }
745         },
746         # Livestream embed
747         {
748             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
749             'info_dict': {
750                 'id': '67864563',
751                 'ext': 'flv',
752                 'upload_date': '20141112',
753                 'title': 'Rosetta #CometLanding webcast HL 10',
754             }
755         },
756         # LazyYT
757         {
758             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
759             'info_dict': {
760                 'id': '1986',
761                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
762             },
763             'playlist_mincount': 2,
764         },
765         # Cinchcast embed
766         {
767             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
768             'info_dict': {
769                 'id': '7141703',
770                 'ext': 'mp3',
771                 'upload_date': '20141126',
772                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
773             }
774         },
775         # Cinerama player
776         {
777             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
778             'info_dict': {
779                 'id': '730m_DandD_1901_512k',
780                 'ext': 'mp4',
781                 'uploader': 'www.abc.net.au',
782                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
783             }
784         },
785         # embedded viddler video
786         {
787             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
788             'info_dict': {
789                 'id': '4d03aad9',
790                 'ext': 'mp4',
791                 'uploader': 'deadspin',
792                 'title': 'WALL-TO-GORTAT',
793                 'timestamp': 1422285291,
794                 'upload_date': '20150126',
795             },
796             'add_ie': ['Viddler'],
797         },
798         # Libsyn embed
799         {
800             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
801             'info_dict': {
802                 'id': '3377616',
803                 'ext': 'mp3',
804                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
805                 'description': 'md5:601cb790edd05908957dae8aaa866465',
806                 'upload_date': '20150220',
807             },
808         },
809         # jwplayer YouTube
810         {
811             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
812             'info_dict': {
813                 'id': 'Mrj4DVp2zeA',
814                 'ext': 'mp4',
815                 'upload_date': '20150212',
816                 'uploader': 'The National Archives UK',
817                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
818                 'uploader_id': 'NationalArchives08',
819                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
820             },
821         },
822         # rtl.nl embed
823         {
824             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
825             'playlist_mincount': 5,
826             'info_dict': {
827                 'id': 'aanslagen-kopenhagen',
828                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
829             }
830         },
831         # Zapiks embed
832         {
833             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
834             'info_dict': {
835                 'id': '118046',
836                 'ext': 'mp4',
837                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
838             }
839         },
840         # Kaltura embed
841         {
842             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
843             'info_dict': {
844                 'id': '1_eergr3h1',
845                 'ext': 'mp4',
846                 'upload_date': '20150226',
847                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
848                 'timestamp': int,
849                 'title': 'John Carlson Postgame 2/25/15',
850             },
851         },
852         # Kaltura embed (different embed code)
853         {
854             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
855             'info_dict': {
856                 'id': '1_a52wc67y',
857                 'ext': 'flv',
858                 'upload_date': '20150127',
859                 'uploader_id': 'PremierMedia',
860                 'timestamp': int,
861                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
862             },
863         },
864         # Kaltura embed protected with referrer
865         {
866             'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
867             'info_dict': {
868                 'id': '1_g4fbemnq',
869                 'ext': 'mp4',
870                 'title': 'Violetta - Achter De Schermen - Ruggero',
871                 'description': 'Achter de schermen met Ruggero',
872                 'timestamp': 1435133761,
873                 'upload_date': '20150624',
874                 'uploader_id': 'echojecka',
875             },
876         },
877         # Eagle.Platform embed (generic URL)
878         {
879             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
880             'info_dict': {
881                 'id': '227304',
882                 'ext': 'mp4',
883                 'title': 'Навальный вышел на свободу',
884                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
885                 'thumbnail': 're:^https?://.*\.jpg$',
886                 'duration': 87,
887                 'view_count': int,
888                 'age_limit': 0,
889             },
890         },
891         # ClipYou (Eagle.Platform) embed (custom URL)
892         {
893             'url': 'http://muz-tv.ru/play/7129/',
894             'info_dict': {
895                 'id': '12820',
896                 'ext': 'mp4',
897                 'title': "'O Sole Mio",
898                 'thumbnail': 're:^https?://.*\.jpg$',
899                 'duration': 216,
900                 'view_count': int,
901             },
902         },
903         # Pladform embed
904         {
905             'url': 'http://muz-tv.ru/kinozal/view/7400/',
906             'info_dict': {
907                 'id': '100183293',
908                 'ext': 'mp4',
909                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
910                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
911                 'thumbnail': 're:^https?://.*\.jpg$',
912                 'duration': 694,
913                 'age_limit': 0,
914             },
915         },
916         # Playwire embed
917         {
918             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
919             'info_dict': {
920                 'id': '3519514',
921                 'ext': 'mp4',
922                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
923                 'thumbnail': 're:^https?://.*\.png$',
924                 'duration': 45.115,
925             },
926         },
927         # 5min embed
928         {
929             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
930             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
931             'info_dict': {
932                 'id': '518726732',
933                 'ext': 'mp4',
934                 'title': 'Facebook Creates "On This Day" | Crunch Report',
935             },
936         },
937         # SVT embed
938         {
939             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
940             'info_dict': {
941                 'id': '2900353',
942                 'ext': 'flv',
943                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
944                 'duration': 27,
945                 'age_limit': 0,
946             },
947         },
948         # Crooks and Liars embed
949         {
950             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
951             'info_dict': {
952                 'id': '8RUoRhRi',
953                 'ext': 'mp4',
954                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
955                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
956                 'timestamp': 1428207000,
957                 'upload_date': '20150405',
958                 'uploader': 'Heather',
959             },
960         },
961         # Crooks and Liars external embed
962         {
963             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
964             'info_dict': {
965                 'id': 'MTE3MjUtMzQ2MzA',
966                 'ext': 'mp4',
967                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
968                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
969                 'timestamp': 1265032391,
970                 'upload_date': '20100201',
971                 'uploader': 'Heather',
972             },
973         },
974         # NBC Sports vplayer embed
975         {
976             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
977             'info_dict': {
978                 'id': 'ln7x1qSThw4k',
979                 'ext': 'flv',
980                 'title': "PFT Live: New leader in the 'new-look' defense",
981                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
982             },
983         },
984         # UDN embed
985         {
986             'url': 'http://www.udn.com/news/story/7314/822787',
987             'md5': 'fd2060e988c326991037b9aff9df21a6',
988             'info_dict': {
989                 'id': '300346',
990                 'ext': 'mp4',
991                 'title': '中一中男師變性 全校師生力挺',
992                 'thumbnail': 're:^https?://.*\.jpg$',
993             }
994         },
995         # Ooyala embed
996         {
997             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
998             'info_dict': {
999                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1000                 'ext': 'mp4',
1001                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1002                 'title': 'This is what separates the Excel masters from the wannabes',
1003                 'duration': 191.933,
1004             },
1005             'params': {
1006                 # m3u8 downloads
1007                 'skip_download': True,
1008             }
1009         },
1010         # Contains a SMIL manifest
1011         {
1012             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
1013             'info_dict': {
1014                 'id': 'file',
1015                 'ext': 'flv',
1016                 'title': '+ Football: Lottery Champions League Europe',
1017                 'uploader': 'www.telewebion.com',
1018             },
1019             'params': {
1020                 # rtmpe downloads
1021                 'skip_download': True,
1022             }
1023         },
1024         # Brightcove URL in single quotes
1025         {
1026             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1027             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1028             'info_dict': {
1029                 'id': '4255764656001',
1030                 'ext': 'mp4',
1031                 'title': 'SN Presents: Russell Martin, World Citizen',
1032                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1033                 'uploader': 'Rogers Sportsnet',
1034             },
1035         },
1036         # Dailymotion Cloud video
1037         {
1038             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1039             'md5': '49444254273501a64675a7e68c502681',
1040             'info_dict': {
1041                 'id': '5585de919473990de4bee11b',
1042                 'ext': 'mp4',
1043                 'title': 'Le débat',
1044                 'thumbnail': 're:^https?://.*\.jpe?g$',
1045             }
1046         },
1047         # OnionStudios embed
1048         {
1049             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1050             'info_dict': {
1051                 'id': '2855',
1052                 'ext': 'mp4',
1053                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1054                 'thumbnail': 're:^https?://.*\.jpe?g$',
1055                 'uploader': 'ClickHole',
1056                 'uploader_id': 'clickhole',
1057             }
1058         },
1059         # SnagFilms embed
1060         {
1061             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1062             'info_dict': {
1063                 'id': '74849a00-85a9-11e1-9660-123139220831',
1064                 'ext': 'mp4',
1065                 'title': '#whilewewatch',
1066             }
1067         },
1068         # AdobeTVVideo embed
1069         {
1070             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1071             'md5': '43662b577c018ad707a63766462b1e87',
1072             'info_dict': {
1073                 'id': '2456',
1074                 'ext': 'mp4',
1075                 'title': 'New experience with Acrobat DC',
1076                 'description': 'New experience with Acrobat DC',
1077                 'duration': 248.667,
1078             },
1079         },
1080         # ScreenwaveMedia embed
1081         {
1082             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1083             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1084             'info_dict': {
1085                 'id': 'cinemasnob-55d26273809dd',
1086                 'ext': 'mp4',
1087                 'title': 'cinemasnob',
1088             },
1089         },
1090         # BrightcoveInPageEmbed embed
1091         {
1092             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1093             'info_dict': {
1094                 'id': '4238694884001',
1095                 'ext': 'flv',
1096                 'title': 'Tabletop: Dread, Last Thoughts',
1097                 'description': 'Tabletop: Dread, Last Thoughts',
1098                 'duration': 51690,
1099             },
1100         },
1101         # JWPlayer with M3U8
1102         {
1103             'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1104             'info_dict': {
1105                 'id': 'playlist',
1106                 'ext': 'mp4',
1107                 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1108                 'uploader': 'ren.tv',
1109             },
1110             'params': {
1111                 # m3u8 downloads
1112                 'skip_download': True,
1113             }
1114         },
1115         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1116         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1117         {
1118             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1119             'info_dict': {
1120                 'id': '4785848093001',
1121                 'ext': 'mp4',
1122                 'title': 'The Cardinal Pell Interview',
1123                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1124                 'uploader': 'GlobeCast Australia - GlobeStream',
1125             },
1126             'params': {
1127                 # m3u8 downloads
1128                 'skip_download': True,
1129             },
1130         },
1131     ]
1132
1133     def report_following_redirect(self, new_url):
1134         """Report information extraction."""
1135         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1136
1137     def _extract_rss(self, url, video_id, doc):
1138         playlist_title = doc.find('./channel/title').text
1139         playlist_desc_el = doc.find('./channel/description')
1140         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1141
1142         entries = []
1143         for it in doc.findall('./channel/item'):
1144             next_url = xpath_text(it, 'link', fatal=False)
1145             if not next_url:
1146                 enclosure_nodes = it.findall('./enclosure')
1147                 for e in enclosure_nodes:
1148                     next_url = e.attrib.get('url')
1149                     if next_url:
1150                         break
1151
1152             if not next_url:
1153                 continue
1154
1155             entries.append({
1156                 '_type': 'url',
1157                 'url': next_url,
1158                 'title': it.find('title').text,
1159             })
1160
1161         return {
1162             '_type': 'playlist',
1163             'id': url,
1164             'title': playlist_title,
1165             'description': playlist_desc,
1166             'entries': entries,
1167         }
1168
1169     def _extract_camtasia(self, url, video_id, webpage):
1170         """ Returns None if no camtasia video can be found. """
1171
1172         camtasia_cfg = self._search_regex(
1173             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1174             webpage, 'camtasia configuration file', default=None)
1175         if camtasia_cfg is None:
1176             return None
1177
1178         title = self._html_search_meta('DC.title', webpage, fatal=True)
1179
1180         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1181         camtasia_cfg = self._download_xml(
1182             camtasia_url, video_id,
1183             note='Downloading camtasia configuration',
1184             errnote='Failed to download camtasia configuration')
1185         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1186
1187         entries = []
1188         for n in fileset_node.getchildren():
1189             url_n = n.find('./uri')
1190             if url_n is None:
1191                 continue
1192
1193             entries.append({
1194                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1195                 'title': '%s - %s' % (title, n.tag),
1196                 'url': compat_urlparse.urljoin(url, url_n.text),
1197                 'duration': float_or_none(n.find('./duration').text),
1198             })
1199
1200         return {
1201             '_type': 'playlist',
1202             'entries': entries,
1203             'title': title,
1204         }
1205
1206     def _real_extract(self, url):
1207         if url.startswith('//'):
1208             return {
1209                 '_type': 'url',
1210                 'url': self.http_scheme() + url,
1211             }
1212
1213         parsed_url = compat_urlparse.urlparse(url)
1214         if not parsed_url.scheme:
1215             default_search = self._downloader.params.get('default_search')
1216             if default_search is None:
1217                 default_search = 'fixup_error'
1218
1219             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1220                 if '/' in url:
1221                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1222                     return self.url_result('http://' + url)
1223                 elif default_search != 'fixup_error':
1224                     if default_search == 'auto_warning':
1225                         if re.match(r'^(?:url|URL)$', url):
1226                             raise ExtractorError(
1227                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1228                                 expected=True)
1229                         else:
1230                             self._downloader.report_warning(
1231                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1232                     return self.url_result('ytsearch:' + url)
1233
1234             if default_search in ('error', 'fixup_error'):
1235                 raise ExtractorError(
1236                     '%r is not a valid URL. '
1237                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1238                     % (url, url), expected=True)
1239             else:
1240                 if ':' not in default_search:
1241                     default_search += ':'
1242                 return self.url_result(default_search + url)
1243
1244         url, smuggled_data = unsmuggle_url(url)
1245         force_videoid = None
1246         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1247         if smuggled_data and 'force_videoid' in smuggled_data:
1248             force_videoid = smuggled_data['force_videoid']
1249             video_id = force_videoid
1250         else:
1251             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1252
1253         self.to_screen('%s: Requesting header' % video_id)
1254
1255         head_req = HEADRequest(url)
1256         head_response = self._request_webpage(
1257             head_req, video_id,
1258             note=False, errnote='Could not send HEAD request to %s' % url,
1259             fatal=False)
1260
1261         if head_response is not False:
1262             # Check for redirect
1263             new_url = head_response.geturl()
1264             if url != new_url:
1265                 self.report_following_redirect(new_url)
1266                 if force_videoid:
1267                     new_url = smuggle_url(
1268                         new_url, {'force_videoid': force_videoid})
1269                 return self.url_result(new_url)
1270
1271         full_response = None
1272         if head_response is False:
1273             request = sanitized_Request(url)
1274             request.add_header('Accept-Encoding', '*')
1275             full_response = self._request_webpage(request, video_id)
1276             head_response = full_response
1277
1278         info_dict = {
1279             'id': video_id,
1280             'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1281             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1282         }
1283
1284         # Check for direct link to a video
1285         content_type = head_response.headers.get('Content-Type', '').lower()
1286         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1287         if m:
1288             format_id = m.group('format_id')
1289             if format_id.endswith('mpegurl'):
1290                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1291             elif format_id == 'f4m':
1292                 formats = self._extract_f4m_formats(url, video_id)
1293             else:
1294                 formats = [{
1295                     'format_id': m.group('format_id'),
1296                     'url': url,
1297                     'vcodec': 'none' if m.group('type') == 'audio' else None
1298                 }]
1299                 info_dict['direct'] = True
1300             self._sort_formats(formats)
1301             info_dict['formats'] = formats
1302             return info_dict
1303
1304         if not self._downloader.params.get('test', False) and not is_intentional:
1305             force = self._downloader.params.get('force_generic_extractor', False)
1306             self._downloader.report_warning(
1307                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1308
1309         if not full_response:
1310             request = sanitized_Request(url)
1311             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1312             # making it impossible to download only chunk of the file (yet we need only 512kB to
1313             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1314             # that will always result in downloading the whole file that is not desirable.
1315             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1316             # to accept raw bytes and being able to download only a chunk.
1317             # It may probably better to solve this by checking Content-Type for application/octet-stream
1318             # after HEAD request finishes, but not sure if we can rely on this.
1319             request.add_header('Accept-Encoding', '*')
1320             full_response = self._request_webpage(request, video_id)
1321
1322         first_bytes = full_response.read(512)
1323
1324         # Is it an M3U playlist?
1325         if first_bytes.startswith(b'#EXTM3U'):
1326             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1327             self._sort_formats(info_dict['formats'])
1328             return info_dict
1329
1330         # Maybe it's a direct link to a video?
1331         # Be careful not to download the whole thing!
1332         if not is_html(first_bytes):
1333             self._downloader.report_warning(
1334                 'URL could be a direct video link, returning it as such.')
1335             info_dict.update({
1336                 'direct': True,
1337                 'url': url,
1338             })
1339             return info_dict
1340
1341         webpage = self._webpage_read_content(
1342             full_response, url, video_id, prefix=first_bytes)
1343
1344         self.report_extraction(video_id)
1345
1346         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1347         try:
1348             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1349             if doc.tag == 'rss':
1350                 return self._extract_rss(url, video_id, doc)
1351             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1352                 smil = self._parse_smil(doc, url, video_id)
1353                 self._sort_formats(smil['formats'])
1354                 return smil
1355             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1356                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1357             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1358                 info_dict['formats'] = self._parse_mpd_formats(
1359                     doc, video_id, mpd_base_url=url.rpartition('/')[0])
1360                 self._sort_formats(info_dict['formats'])
1361                 return info_dict
1362             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1363                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1364                 self._sort_formats(info_dict['formats'])
1365                 return info_dict
1366         except compat_xml_parse_error:
1367             pass
1368
1369         # Is it a Camtasia project?
1370         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1371         if camtasia_res is not None:
1372             return camtasia_res
1373
1374         # Sometimes embedded video player is hidden behind percent encoding
1375         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1376         # Unescaping the whole page allows to handle those cases in a generic way
1377         webpage = compat_urllib_parse_unquote(webpage)
1378
1379         # it's tempting to parse this further, but you would
1380         # have to take into account all the variations like
1381         #   Video Title - Site Name
1382         #   Site Name | Video Title
1383         #   Video Title - Tagline | Site Name
1384         # and so on and so forth; it's just not practical
1385         video_title = self._html_search_regex(
1386             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1387             default='video')
1388
1389         # Try to detect age limit automatically
1390         age_limit = self._rta_search(webpage)
1391         # And then there are the jokers who advertise that they use RTA,
1392         # but actually don't.
1393         AGE_LIMIT_MARKERS = [
1394             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1395         ]
1396         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1397             age_limit = 18
1398
1399         # video uploader is domain name
1400         video_uploader = self._search_regex(
1401             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1402
1403         # Helper method
1404         def _playlist_from_matches(matches, getter=None, ie=None):
1405             urlrs = orderedSet(
1406                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1407                 for m in matches)
1408             return self.playlist_result(
1409                 urlrs, playlist_id=video_id, playlist_title=video_title)
1410
1411         # Look for Brightcove Legacy Studio embeds
1412         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1413         if bc_urls:
1414             self.to_screen('Brightcove video detected.')
1415             entries = [{
1416                 '_type': 'url',
1417                 'url': smuggle_url(bc_url, {'Referer': url}),
1418                 'ie_key': 'BrightcoveLegacy'
1419             } for bc_url in bc_urls]
1420
1421             return {
1422                 '_type': 'playlist',
1423                 'title': video_title,
1424                 'id': video_id,
1425                 'entries': entries,
1426             }
1427
1428         # Look for Brightcove New Studio embeds
1429         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1430         if bc_urls:
1431             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1432
1433         # Look for embedded rtl.nl player
1434         matches = re.findall(
1435             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1436             webpage)
1437         if matches:
1438             return _playlist_from_matches(matches, ie='RtlNl')
1439
1440         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1441         if vimeo_url is not None:
1442             return self.url_result(vimeo_url)
1443
1444         vid_me_embed_url = self._search_regex(
1445             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1446             webpage, 'vid.me embed', default=None)
1447         if vid_me_embed_url is not None:
1448             return self.url_result(vid_me_embed_url, 'Vidme')
1449
1450         # Look for embedded YouTube player
1451         matches = re.findall(r'''(?x)
1452             (?:
1453                 <iframe[^>]+?src=|
1454                 data-video-url=|
1455                 <embed[^>]+?src=|
1456                 embedSWF\(?:\s*|
1457                 new\s+SWFObject\(
1458             )
1459             (["\'])
1460                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1461                 (?:embed|v|p)/.+?)
1462             \1''', webpage)
1463         if matches:
1464             return _playlist_from_matches(
1465                 matches, lambda m: unescapeHTML(m[1]))
1466
1467         # Look for lazyYT YouTube embed
1468         matches = re.findall(
1469             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1470         if matches:
1471             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1472
1473         # Look for embedded Dailymotion player
1474         matches = re.findall(
1475             r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
1476         if matches:
1477             return _playlist_from_matches(
1478                 matches, lambda m: unescapeHTML(m[1]))
1479
1480         # Look for embedded Dailymotion playlist player (#3822)
1481         m = re.search(
1482             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1483         if m:
1484             playlists = re.findall(
1485                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1486             if playlists:
1487                 return _playlist_from_matches(
1488                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1489
1490         # Look for embedded Wistia player
1491         match = re.search(
1492             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1493         if match:
1494             embed_url = self._proto_relative_url(
1495                 unescapeHTML(match.group('url')))
1496             return {
1497                 '_type': 'url_transparent',
1498                 'url': embed_url,
1499                 'ie_key': 'Wistia',
1500                 'uploader': video_uploader,
1501                 'title': video_title,
1502                 'id': video_id,
1503             }
1504
1505         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1506         if match:
1507             return {
1508                 '_type': 'url_transparent',
1509                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1510                 'ie_key': 'Wistia',
1511                 'uploader': video_uploader,
1512                 'title': video_title,
1513                 'id': match.group('id')
1514             }
1515
1516         # Look for SVT player
1517         svt_url = SVTIE._extract_url(webpage)
1518         if svt_url:
1519             return self.url_result(svt_url, 'SVT')
1520
1521         # Look for embedded condenast player
1522         matches = re.findall(
1523             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1524             webpage)
1525         if matches:
1526             return {
1527                 '_type': 'playlist',
1528                 'entries': [{
1529                     '_type': 'url',
1530                     'ie_key': 'CondeNast',
1531                     'url': ma,
1532                 } for ma in matches],
1533                 'title': video_title,
1534                 'id': video_id,
1535             }
1536
1537         # Look for Bandcamp pages with custom domain
1538         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1539         if mobj is not None:
1540             burl = unescapeHTML(mobj.group(1))
1541             # Don't set the extractor because it can be a track url or an album
1542             return self.url_result(burl)
1543
1544         # Look for embedded Vevo player
1545         mobj = re.search(
1546             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1547         if mobj is not None:
1548             return self.url_result(mobj.group('url'))
1549
1550         # Look for embedded Viddler player
1551         mobj = re.search(
1552             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1553             webpage)
1554         if mobj is not None:
1555             return self.url_result(mobj.group('url'))
1556
1557         # Look for NYTimes player
1558         mobj = re.search(
1559             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1560             webpage)
1561         if mobj is not None:
1562             return self.url_result(mobj.group('url'))
1563
1564         # Look for Libsyn player
1565         mobj = re.search(
1566             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1567         if mobj is not None:
1568             return self.url_result(mobj.group('url'))
1569
1570         # Look for Ooyala videos
1571         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1572                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1573                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1574                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1575         if mobj is not None:
1576             return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
1577
1578         # Look for multiple Ooyala embeds on SBN network websites
1579         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1580         if mobj is not None:
1581             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1582             if embeds:
1583                 return _playlist_from_matches(
1584                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
1585
1586         # Look for Aparat videos
1587         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1588         if mobj is not None:
1589             return self.url_result(mobj.group(1), 'Aparat')
1590
1591         # Look for MPORA videos
1592         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1593         if mobj is not None:
1594             return self.url_result(mobj.group(1), 'Mpora')
1595
1596         # Look for embedded NovaMov-based player
1597         mobj = re.search(
1598             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1599                     (?P<url>http://(?:(?:embed|www)\.)?
1600                         (?:novamov\.com|
1601                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1602                            videoweed\.(?:es|com)|
1603                            movshare\.(?:net|sx|ag)|
1604                            divxstage\.(?:eu|net|ch|co|at|ag))
1605                         /embed\.php.+?)\1''', webpage)
1606         if mobj is not None:
1607             return self.url_result(mobj.group('url'))
1608
1609         # Look for embedded Facebook player
1610         mobj = re.search(
1611             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1612         if mobj is not None:
1613             return self.url_result(mobj.group('url'), 'Facebook')
1614
1615         # Look for embedded VK player
1616         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1617         if mobj is not None:
1618             return self.url_result(mobj.group('url'), 'VK')
1619
1620         # Look for embedded Odnoklassniki player
1621         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
1622         if mobj is not None:
1623             return self.url_result(mobj.group('url'), 'Odnoklassniki')
1624
1625         # Look for embedded ivi player
1626         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1627         if mobj is not None:
1628             return self.url_result(mobj.group('url'), 'Ivi')
1629
1630         # Look for embedded Huffington Post player
1631         mobj = re.search(
1632             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1633         if mobj is not None:
1634             return self.url_result(mobj.group('url'), 'HuffPost')
1635
1636         # Look for embed.ly
1637         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1638         if mobj is not None:
1639             return self.url_result(mobj.group('url'))
1640         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1641         if mobj is not None:
1642             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1643
1644         # Look for funnyordie embed
1645         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1646         if matches:
1647             return _playlist_from_matches(
1648                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1649
1650         # Look for BBC iPlayer embed
1651         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1652         if matches:
1653             return _playlist_from_matches(matches, ie='BBCCoUk')
1654
1655         # Look for embedded RUTV player
1656         rutv_url = RUTVIE._extract_url(webpage)
1657         if rutv_url:
1658             return self.url_result(rutv_url, 'RUTV')
1659
1660         # Look for embedded TVC player
1661         tvc_url = TVCIE._extract_url(webpage)
1662         if tvc_url:
1663             return self.url_result(tvc_url, 'TVC')
1664
1665         # Look for embedded SportBox player
1666         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1667         if sportbox_urls:
1668             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1669
1670         # Look for embedded PornHub player
1671         pornhub_url = PornHubIE._extract_url(webpage)
1672         if pornhub_url:
1673             return self.url_result(pornhub_url, 'PornHub')
1674
1675         # Look for embedded XHamster player
1676         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1677         if xhamster_urls:
1678             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1679
1680         # Look for embedded TNAFlixNetwork player
1681         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
1682         if tnaflix_urls:
1683             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
1684
1685         # Look for embedded Tvigle player
1686         mobj = re.search(
1687             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1688         if mobj is not None:
1689             return self.url_result(mobj.group('url'), 'Tvigle')
1690
1691         # Look for embedded TED player
1692         mobj = re.search(
1693             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1694         if mobj is not None:
1695             return self.url_result(mobj.group('url'), 'TED')
1696
1697         # Look for embedded Ustream videos
1698         mobj = re.search(
1699             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1700         if mobj is not None:
1701             return self.url_result(mobj.group('url'), 'Ustream')
1702
1703         # Look for embedded arte.tv player
1704         mobj = re.search(
1705             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1706             webpage)
1707         if mobj is not None:
1708             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1709
1710         # Look for embedded francetv player
1711         mobj = re.search(
1712             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1713             webpage)
1714         if mobj is not None:
1715             return self.url_result(mobj.group('url'))
1716
1717         # Look for embedded smotri.com player
1718         smotri_url = SmotriIE._extract_url(webpage)
1719         if smotri_url:
1720             return self.url_result(smotri_url, 'Smotri')
1721
1722         # Look for embedded Myvi.ru player
1723         myvi_url = MyviIE._extract_url(webpage)
1724         if myvi_url:
1725             return self.url_result(myvi_url)
1726
1727         # Look for embedded soundcloud player
1728         mobj = re.search(
1729             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1730             webpage)
1731         if mobj is not None:
1732             url = unescapeHTML(mobj.group('url'))
1733             return self.url_result(url)
1734
1735         # Look for embedded vulture.com player
1736         mobj = re.search(
1737             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1738             webpage)
1739         if mobj is not None:
1740             url = unescapeHTML(mobj.group('url'))
1741             return self.url_result(url, ie='Vulture')
1742
1743         # Look for embedded mtvservices player
1744         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1745         if mtvservices_url:
1746             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1747
1748         # Look for embedded yahoo player
1749         mobj = re.search(
1750             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1751             webpage)
1752         if mobj is not None:
1753             return self.url_result(mobj.group('url'), 'Yahoo')
1754
1755         # Look for embedded sbs.com.au player
1756         mobj = re.search(
1757             r'''(?x)
1758             (?:
1759                 <meta\s+property="og:video"\s+content=|
1760                 <iframe[^>]+?src=
1761             )
1762             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1763             webpage)
1764         if mobj is not None:
1765             return self.url_result(mobj.group('url'), 'SBS')
1766
1767         # Look for embedded Cinchcast player
1768         mobj = re.search(
1769             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1770             webpage)
1771         if mobj is not None:
1772             return self.url_result(mobj.group('url'), 'Cinchcast')
1773
1774         mobj = re.search(
1775             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1776             webpage)
1777         if not mobj:
1778             mobj = re.search(
1779                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1780                 webpage)
1781         if mobj is not None:
1782             return self.url_result(mobj.group('url'), 'MLB')
1783
1784         mobj = re.search(
1785             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1786             webpage)
1787         if mobj is not None:
1788             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1789
1790         mobj = re.search(
1791             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1792             webpage)
1793         if mobj is not None:
1794             return self.url_result(mobj.group('url'), 'Livestream')
1795
1796         # Look for Zapiks embed
1797         mobj = re.search(
1798             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1799         if mobj is not None:
1800             return self.url_result(mobj.group('url'), 'Zapiks')
1801
1802         # Look for Kaltura embeds
1803         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
1804                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
1805         if mobj is not None:
1806             return self.url_result(smuggle_url(
1807                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1808                 {'source_url': url}), 'Kaltura')
1809
1810         # Look for Eagle.Platform embeds
1811         mobj = re.search(
1812             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1813         if mobj is not None:
1814             return self.url_result(mobj.group('url'), 'EaglePlatform')
1815
1816         # Look for ClipYou (uses Eagle.Platform) embeds
1817         mobj = re.search(
1818             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1819         if mobj is not None:
1820             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1821
1822         # Look for Pladform embeds
1823         pladform_url = PladformIE._extract_url(webpage)
1824         if pladform_url:
1825             return self.url_result(pladform_url)
1826
1827         # Look for Videomore embeds
1828         videomore_url = VideomoreIE._extract_url(webpage)
1829         if videomore_url:
1830             return self.url_result(videomore_url)
1831
1832         # Look for Playwire embeds
1833         mobj = re.search(
1834             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1835         if mobj is not None:
1836             return self.url_result(mobj.group('url'))
1837
1838         # Look for 5min embeds
1839         mobj = re.search(
1840             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1841         if mobj is not None:
1842             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1843
1844         # Look for Crooks and Liars embeds
1845         mobj = re.search(
1846             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1847         if mobj is not None:
1848             return self.url_result(mobj.group('url'))
1849
1850         # Look for NBC Sports VPlayer embeds
1851         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1852         if nbc_sports_url:
1853             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1854
1855         # Look for Google Drive embeds
1856         google_drive_url = GoogleDriveIE._extract_url(webpage)
1857         if google_drive_url:
1858             return self.url_result(google_drive_url, 'GoogleDrive')
1859
1860         # Look for UDN embeds
1861         mobj = re.search(
1862             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
1863         if mobj is not None:
1864             return self.url_result(
1865                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1866
1867         # Look for Senate ISVP iframe
1868         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1869         if senate_isvp_url:
1870             return self.url_result(senate_isvp_url, 'SenateISVP')
1871
1872         # Look for Dailymotion Cloud videos
1873         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1874         if dmcloud_url:
1875             return self.url_result(dmcloud_url, 'DailymotionCloud')
1876
1877         # Look for OnionStudios embeds
1878         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1879         if onionstudios_url:
1880             return self.url_result(onionstudios_url)
1881
1882         # Look for SnagFilms embeds
1883         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1884         if snagfilms_url:
1885             return self.url_result(snagfilms_url)
1886
1887         # Look for JWPlatform embeds
1888         jwplatform_url = JWPlatformIE._extract_url(webpage)
1889         if jwplatform_url:
1890             return self.url_result(jwplatform_url, 'JWPlatform')
1891
1892         # Look for ScreenwaveMedia embeds
1893         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
1894         if mobj is not None:
1895             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
1896
1897         # Look for Digiteka embeds
1898         digiteka_url = DigitekaIE._extract_url(webpage)
1899         if digiteka_url:
1900             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
1901
1902         # Look for Limelight embeds
1903         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
1904         if mobj:
1905             lm = {
1906                 'Media': 'media',
1907                 'Channel': 'channel',
1908                 'ChannelList': 'channel_list',
1909             }
1910             return self.url_result('limelight:%s:%s' % (
1911                 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
1912
1913         # Look for AdobeTVVideo embeds
1914         mobj = re.search(
1915             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1916             webpage)
1917         if mobj is not None:
1918             return self.url_result(
1919                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1920                 'AdobeTVVideo')
1921
1922         # Look for Vine embeds
1923         mobj = re.search(
1924             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
1925             webpage)
1926         if mobj is not None:
1927             return self.url_result(
1928                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
1929
1930         # Look for Instagram embeds
1931         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
1932         if instagram_embed_url is not None:
1933             return self.url_result(instagram_embed_url, InstagramIE.ie_key())
1934
1935         def check_video(vurl):
1936             if YoutubeIE.suitable(vurl):
1937                 return True
1938             vpath = compat_urlparse.urlparse(vurl).path
1939             vext = determine_ext(vpath)
1940             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1941
1942         def filter_video(urls):
1943             return list(filter(check_video, urls))
1944
1945         # Start with something easy: JW Player in SWFObject
1946         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1947         if not found:
1948             # Look for gorilla-vid style embedding
1949             found = filter_video(re.findall(r'''(?sx)
1950                 (?:
1951                     jw_plugins|
1952                     JWPlayerOptions|
1953                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1954                 )
1955                 .*?
1956                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1957         if not found:
1958             # Broaden the search a little bit
1959             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1960         if not found:
1961             # Broaden the findall a little bit: JWPlayer JS loader
1962             found = filter_video(re.findall(
1963                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1964         if not found:
1965             # Flow player
1966             found = filter_video(re.findall(r'''(?xs)
1967                 flowplayer\("[^"]+",\s*
1968                     \{[^}]+?\}\s*,
1969                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1970                         ["']?url["']?\s*:\s*["']([^"']+)["']
1971             ''', webpage))
1972         if not found:
1973             # Cinerama player
1974             found = re.findall(
1975                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1976         if not found:
1977             # Try to find twitter cards info
1978             found = filter_video(re.findall(
1979                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1980         if not found:
1981             # We look for Open Graph info:
1982             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1983             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1984             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1985             if m_video_type is not None:
1986                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1987         if not found:
1988             # HTML5 video
1989             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1990         if not found:
1991             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1992             found = re.search(
1993                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1994                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1995                 webpage)
1996             if not found:
1997                 # Look also in Refresh HTTP header
1998                 refresh_header = head_response.headers.get('Refresh')
1999                 if refresh_header:
2000                     # In python 2 response HTTP headers are bytestrings
2001                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2002                         refresh_header = refresh_header.decode('iso-8859-1')
2003                     found = re.search(REDIRECT_REGEX, refresh_header)
2004             if found:
2005                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2006                 self.report_following_redirect(new_url)
2007                 return {
2008                     '_type': 'url',
2009                     'url': new_url,
2010                 }
2011         if not found:
2012             raise UnsupportedError(url)
2013
2014         entries = []
2015         for video_url in found:
2016             video_url = video_url.replace('\\/', '/')
2017             video_url = compat_urlparse.urljoin(url, video_url)
2018             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2019
2020             # Sometimes, jwplayer extraction will result in a YouTube URL
2021             if YoutubeIE.suitable(video_url):
2022                 entries.append(self.url_result(video_url, 'Youtube'))
2023                 continue
2024
2025             # here's a fun little line of code for you:
2026             video_id = os.path.splitext(video_id)[0]
2027
2028             entry_info_dict = {
2029                 'id': video_id,
2030                 'uploader': video_uploader,
2031                 'title': video_title,
2032                 'age_limit': age_limit,
2033             }
2034
2035             ext = determine_ext(video_url)
2036             if ext == 'smil':
2037                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2038             elif ext == 'xspf':
2039                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2040             elif ext == 'm3u8':
2041                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2042             elif ext == 'mpd':
2043                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2044             elif ext == 'f4m':
2045                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2046             else:
2047                 entry_info_dict['url'] = video_url
2048
2049             if entry_info_dict.get('formats'):
2050                 self._sort_formats(entry_info_dict['formats'])
2051
2052             entries.append(entry_info_dict)
2053
2054         if len(entries) == 1:
2055             return entries[0]
2056         else:
2057             for num, e in enumerate(entries, start=1):
2058                 # 'url' results don't have a title
2059                 if e.get('title') is not None:
2060                     e['title'] = '%s (%d)' % (e['title'], num)
2061             return {
2062                 '_type': 'playlist',
2063                 'entries': entries,
2064             }