[arte.tv:embed] Extended support (#2620)
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7 import sys
8
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12     compat_etree_fromstring,
13     compat_urllib_parse_unquote,
14     compat_urlparse,
15     compat_xml_parse_error,
16 )
17 from ..utils import (
18     determine_ext,
19     ExtractorError,
20     float_or_none,
21     HEADRequest,
22     is_html,
23     orderedSet,
24     sanitized_Request,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import (
34     BrightcoveLegacyIE,
35     BrightcoveNewIE,
36 )
37 from .nbc import NBCSportsVPlayerIE
38 from .ooyala import OoyalaIE
39 from .rutv import RUTVIE
40 from .tvc import TVCIE
41 from .sportbox import SportBoxEmbedIE
42 from .smotri import SmotriIE
43 from .myvi import MyviIE
44 from .condenast import CondeNastIE
45 from .udn import UDNEmbedIE
46 from .senateisvp import SenateISVPIE
47 from .svt import SVTIE
48 from .pornhub import PornHubIE
49 from .xhamster import XHamsterEmbedIE
50 from .tnaflix import TNAFlixNetworkEmbedIE
51 from .vimeo import VimeoIE
52 from .dailymotion import DailymotionCloudIE
53 from .onionstudios import OnionStudiosIE
54 from .snagfilms import SnagFilmsEmbedIE
55 from .screenwavemedia import ScreenwaveMediaIE
56 from .mtv import MTVServicesEmbeddedIE
57 from .pladform import PladformIE
58 from .videomore import VideomoreIE
59 from .googledrive import GoogleDriveIE
60 from .jwplatform import JWPlatformIE
61 from .digiteka import DigitekaIE
62 from .instagram import InstagramIE
63
64
65 class GenericIE(InfoExtractor):
66     IE_DESC = 'Generic downloader that works on some sites'
67     _VALID_URL = r'.*'
68     IE_NAME = 'generic'
69     _TESTS = [
70         # Direct link to a video
71         {
72             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
73             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
74             'info_dict': {
75                 'id': 'trailer',
76                 'ext': 'mp4',
77                 'title': 'trailer',
78                 'upload_date': '20100513',
79             }
80         },
81         # Direct link to media delivered compressed (until Accept-Encoding is *)
82         {
83             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
84             'md5': '128c42e68b13950268b648275386fc74',
85             'info_dict': {
86                 'id': 'FictionJunction-Parallel_Hearts',
87                 'ext': 'flac',
88                 'title': 'FictionJunction-Parallel_Hearts',
89                 'upload_date': '20140522',
90             },
91             'expected_warnings': [
92                 'URL could be a direct video link, returning it as such.'
93             ]
94         },
95         # Direct download with broken HEAD
96         {
97             'url': 'http://ai-radio.org:8000/radio.opus',
98             'info_dict': {
99                 'id': 'radio',
100                 'ext': 'opus',
101                 'title': 'radio',
102             },
103             'params': {
104                 'skip_download': True,  # infinite live stream
105             },
106             'expected_warnings': [
107                 r'501.*Not Implemented'
108             ],
109         },
110         # Direct link with incorrect MIME type
111         {
112             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
113             'md5': '4ccbebe5f36706d85221f204d7eb5913',
114             'info_dict': {
115                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
116                 'id': '5_Lennart_Poettering_-_Systemd',
117                 'ext': 'webm',
118                 'title': '5_Lennart_Poettering_-_Systemd',
119                 'upload_date': '20141120',
120             },
121             'expected_warnings': [
122                 'URL could be a direct video link, returning it as such.'
123             ]
124         },
125         # RSS feed
126         {
127             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
128             'info_dict': {
129                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
130                 'title': 'Zero Punctuation',
131                 'description': 're:.*groundbreaking video review series.*'
132             },
133             'playlist_mincount': 11,
134         },
135         # RSS feed with enclosure
136         {
137             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
138             'info_dict': {
139                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
140                 'ext': 'm4v',
141                 'upload_date': '20150228',
142                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
143             }
144         },
145         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
146         {
147             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
148             'info_dict': {
149                 'id': 'smil',
150                 'ext': 'mp4',
151                 'title': 'Automatics, robotics and biocybernetics',
152                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
153                 'upload_date': '20130627',
154                 'formats': 'mincount:16',
155                 'subtitles': 'mincount:1',
156             },
157             'params': {
158                 'force_generic_extractor': True,
159                 'skip_download': True,
160             },
161         },
162         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
163         {
164             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
165             'info_dict': {
166                 'id': 'hds',
167                 'ext': 'flv',
168                 'title': 'hds',
169                 'formats': 'mincount:1',
170             },
171             'params': {
172                 'skip_download': True,
173             },
174         },
175         # SMIL from https://www.restudy.dk/video/play/id/1637
176         {
177             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
178             'info_dict': {
179                 'id': 'video_1637',
180                 'ext': 'flv',
181                 'title': 'video_1637',
182                 'formats': 'mincount:3',
183             },
184             'params': {
185                 'skip_download': True,
186             },
187         },
188         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
189         {
190             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
191             'info_dict': {
192                 'id': 'smil-service',
193                 'ext': 'flv',
194                 'title': 'smil-service',
195                 'formats': 'mincount:1',
196             },
197             'params': {
198                 'skip_download': True,
199             },
200         },
201         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
202         {
203             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
204             'info_dict': {
205                 'id': '4719370',
206                 'ext': 'mp4',
207                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
208                 'formats': 'mincount:3',
209             },
210             'params': {
211                 'skip_download': True,
212             },
213         },
214         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
215         {
216             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
217             'info_dict': {
218                 'id': 'mZlp2ctYIUEB',
219                 'ext': 'mp4',
220                 'title': 'Tikibad ontruimd wegens brand',
221                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
222                 'thumbnail': 're:^https?://.*\.jpg$',
223                 'duration': 33,
224             },
225             'params': {
226                 'skip_download': True,
227             },
228         },
229         # MPD from http://dash-mse-test.appspot.com/media.html
230         {
231             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
232             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
233             'info_dict': {
234                 'id': 'car-20120827-manifest',
235                 'ext': 'mp4',
236                 'title': 'car-20120827-manifest',
237                 'formats': 'mincount:9',
238             },
239             'params': {
240                 'format': 'bestvideo',
241             },
242         },
243         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
244         {
245             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
246             'info_dict': {
247                 'id': 'content',
248                 'ext': 'mp4',
249                 'title': 'content',
250                 'formats': 'mincount:8',
251             },
252             'params': {
253                 # m3u8 downloads
254                 'skip_download': True,
255             }
256         },
257         # m3u8 served with Content-Type: text/plain
258         {
259             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
260             'info_dict': {
261                 'id': 'index',
262                 'ext': 'mp4',
263                 'title': 'index',
264                 'upload_date': '20140720',
265                 'formats': 'mincount:11',
266             },
267             'params': {
268                 # m3u8 downloads
269                 'skip_download': True,
270             }
271         },
272         # google redirect
273         {
274             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
275             'info_dict': {
276                 'id': 'cmQHVoWB5FY',
277                 'ext': 'mp4',
278                 'upload_date': '20130224',
279                 'uploader_id': 'TheVerge',
280                 'description': 're:^Chris Ziegler takes a look at the\.*',
281                 'uploader': 'The Verge',
282                 'title': 'First Firefox OS phones side-by-side',
283             },
284             'params': {
285                 'skip_download': False,
286             }
287         },
288         {
289             # redirect in Refresh HTTP header
290             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
291             'info_dict': {
292                 'id': 'pO8h3EaFRdo',
293                 'ext': 'mp4',
294                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
295                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
296                 'upload_date': '20150917',
297                 'uploader_id': 'brtvofficial',
298                 'uploader': 'Boiler Room',
299             },
300             'params': {
301                 'skip_download': False,
302             },
303         },
304         {
305             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
306             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
307             'info_dict': {
308                 'id': '13601338388002',
309                 'ext': 'mp4',
310                 'uploader': 'www.hodiho.fr',
311                 'title': 'R\u00e9gis plante sa Jeep',
312             }
313         },
314         # bandcamp page with custom domain
315         {
316             'add_ie': ['Bandcamp'],
317             'url': 'http://bronyrock.com/track/the-pony-mash',
318             'info_dict': {
319                 'id': '3235767654',
320                 'ext': 'mp3',
321                 'title': 'The Pony Mash',
322                 'uploader': 'M_Pallante',
323             },
324             'skip': 'There is a limit of 200 free downloads / month for the test song',
325         },
326         # embedded brightcove video
327         # it also tests brightcove videos that need to set the 'Referer' in the
328         # http requests
329         {
330             'add_ie': ['BrightcoveLegacy'],
331             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
332             'info_dict': {
333                 'id': '2765128793001',
334                 'ext': 'mp4',
335                 'title': 'Le cours de bourse : l’analyse technique',
336                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
337                 'uploader': 'BFM BUSINESS',
338             },
339             'params': {
340                 'skip_download': True,
341             },
342         },
343         {
344             # https://github.com/rg3/youtube-dl/issues/2253
345             'url': 'http://bcove.me/i6nfkrc3',
346             'md5': '0ba9446db037002366bab3b3eb30c88c',
347             'info_dict': {
348                 'id': '3101154703001',
349                 'ext': 'mp4',
350                 'title': 'Still no power',
351                 'uploader': 'thestar.com',
352                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
353             },
354             'add_ie': ['BrightcoveLegacy'],
355         },
356         {
357             'url': 'http://www.championat.com/video/football/v/87/87499.html',
358             'md5': 'fb973ecf6e4a78a67453647444222983',
359             'info_dict': {
360                 'id': '3414141473001',
361                 'ext': 'mp4',
362                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
363                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
364                 'uploader': 'Championat',
365             },
366         },
367         {
368             # https://github.com/rg3/youtube-dl/issues/3541
369             'add_ie': ['BrightcoveLegacy'],
370             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
371             'info_dict': {
372                 'id': '3866516442001',
373                 'ext': 'mp4',
374                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
375                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
376                 'uploader': 'SBS Broadcasting',
377             },
378             'skip': 'Restricted to Netherlands',
379             'params': {
380                 'skip_download': True,  # m3u8 download
381             },
382         },
383         # ooyala video
384         {
385             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
386             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
387             'info_dict': {
388                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
389                 'ext': 'mp4',
390                 'title': '2cc213299525360.mov',  # that's what we get
391                 'duration': 238.231,
392             },
393             'add_ie': ['Ooyala'],
394         },
395         {
396             # ooyala video embedded with http://player.ooyala.com/iframe.js
397             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
398             'info_dict': {
399                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
400                 'ext': 'mp4',
401                 'title': '"Steve Jobs: Man in the Machine" trailer',
402                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
403                 'duration': 135.427,
404             },
405             'params': {
406                 'skip_download': True,
407             },
408         },
409         # embed.ly video
410         {
411             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
412             'info_dict': {
413                 'id': '9ODmcdjQcHQ',
414                 'ext': 'mp4',
415                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
416                 'upload_date': '20140225',
417                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
418                 'uploader': 'Tested',
419                 'uploader_id': 'testedcom',
420             },
421             # No need to test YoutubeIE here
422             'params': {
423                 'skip_download': True,
424             },
425         },
426         # funnyordie embed
427         {
428             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
429             'info_dict': {
430                 'id': '18e820ec3f',
431                 'ext': 'mp4',
432                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
433                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
434             },
435         },
436         # RUTV embed
437         {
438             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
439             'info_dict': {
440                 'id': '776940',
441                 'ext': 'mp4',
442                 'title': 'Охотское море стало целиком российским',
443                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
444             },
445             'params': {
446                 # m3u8 download
447                 'skip_download': True,
448             },
449         },
450         # TVC embed
451         {
452             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
453             'info_dict': {
454                 'id': '55304',
455                 'ext': 'mp4',
456                 'title': 'Дошкольное воспитание',
457             },
458         },
459         # SportBox embed
460         {
461             'url': 'http://www.vestifinance.ru/articles/25753',
462             'info_dict': {
463                 'id': '25753',
464                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
465             },
466             'playlist': [{
467                 'info_dict': {
468                     'id': '370908',
469                     'title': 'Госзаказ. День 3',
470                     'ext': 'mp4',
471                 }
472             }, {
473                 'info_dict': {
474                     'id': '370905',
475                     'title': 'Госзаказ. День 2',
476                     'ext': 'mp4',
477                 }
478             }, {
479                 'info_dict': {
480                     'id': '370902',
481                     'title': 'Госзаказ. День 1',
482                     'ext': 'mp4',
483                 }
484             }],
485             'params': {
486                 # m3u8 download
487                 'skip_download': True,
488             },
489         },
490         # Myvi.ru embed
491         {
492             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
493             'info_dict': {
494                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
495                 'ext': 'mp4',
496                 'title': 'Ужастики, русский трейлер (2015)',
497                 'thumbnail': 're:^https?://.*\.jpg$',
498                 'duration': 153,
499             }
500         },
501         # XHamster embed
502         {
503             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
504             'info_dict': {
505                 'id': 'showthread',
506                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
507             },
508             'playlist_mincount': 7,
509         },
510         # Embedded TED video
511         {
512             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
513             'md5': '65fdff94098e4a607385a60c5177c638',
514             'info_dict': {
515                 'id': '1969',
516                 'ext': 'mp4',
517                 'title': 'Hidden miracles of the natural world',
518                 'uploader': 'Louie Schwartzberg',
519                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
520             }
521         },
522         # Embedded Ustream video
523         {
524             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
525             'md5': '27b99cdb639c9b12a79bca876a073417',
526             'info_dict': {
527                 'id': '45734260',
528                 'ext': 'flv',
529                 'uploader': 'AU SPA:  The NSA and Privacy',
530                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
531             }
532         },
533         # nowvideo embed hidden behind percent encoding
534         {
535             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
536             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
537             'info_dict': {
538                 'id': '06e53103ca9aa',
539                 'ext': 'flv',
540                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
541                 'description': 'No description',
542             },
543         },
544         # arte embed
545         {
546             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
547             'md5': '7653032cbb25bf6c80d80f217055fa43',
548             'info_dict': {
549                 'id': '048195-004_PLUS7-F',
550                 'ext': 'flv',
551                 'title': 'X:enius',
552                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
553                 'upload_date': '20140320',
554             },
555             'params': {
556                 'skip_download': 'Requires rtmpdump'
557             }
558         },
559         # francetv embed
560         {
561             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
562             'info_dict': {
563                 'id': 'EV_30231',
564                 'ext': 'mp4',
565                 'title': 'Alcaline, le concert avec Calogero',
566                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
567                 'upload_date': '20150226',
568                 'timestamp': 1424989860,
569                 'duration': 5400,
570             },
571             'params': {
572                 # m3u8 downloads
573                 'skip_download': True,
574             },
575             'expected_warnings': [
576                 'Forbidden'
577             ]
578         },
579         # Condé Nast embed
580         {
581             'url': 'http://www.wired.com/2014/04/honda-asimo/',
582             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
583             'info_dict': {
584                 'id': '53501be369702d3275860000',
585                 'ext': 'mp4',
586                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
587             }
588         },
589         # Dailymotion embed
590         {
591             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
592             'md5': '441aeeb82eb72c422c7f14ec533999cd',
593             'info_dict': {
594                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
595                 'ext': 'mp4',
596                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
597                 'uploader': 'Spi0n',
598             },
599             'add_ie': ['Dailymotion'],
600         },
601         # YouTube embed
602         {
603             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
604             'info_dict': {
605                 'id': 'FXRb4ykk4S0',
606                 'ext': 'mp4',
607                 'title': 'The NBL Auction 2014',
608                 'uploader': 'BADMINTON England',
609                 'uploader_id': 'BADMINTONEvents',
610                 'upload_date': '20140603',
611                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
612             },
613             'add_ie': ['Youtube'],
614             'params': {
615                 'skip_download': True,
616             }
617         },
618         # MTVSercices embed
619         {
620             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
621             'md5': '35727f82f58c76d996fc188f9755b0d5',
622             'info_dict': {
623                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
624                 'ext': 'mp4',
625                 'title': 'Review',
626                 'description': 'Mario\'s life in the fast lane has never looked so good.',
627             },
628         },
629         # YouTube embed via <data-embed-url="">
630         {
631             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
632             'info_dict': {
633                 'id': '4vAffPZIT44',
634                 'ext': 'mp4',
635                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
636                 'uploader': 'Gameloft',
637                 'uploader_id': 'gameloft',
638                 'upload_date': '20140828',
639                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
640             },
641             'params': {
642                 'skip_download': True,
643             }
644         },
645         # Camtasia studio
646         {
647             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
648             'playlist': [{
649                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
650                 'info_dict': {
651                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
652                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
653                     'ext': 'flv',
654                     'duration': 2235.90,
655                 }
656             }, {
657                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
658                 'info_dict': {
659                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
660                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
661                     'ext': 'flv',
662                     'duration': 2235.93,
663                 }
664             }],
665             'info_dict': {
666                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
667             }
668         },
669         # Flowplayer
670         {
671             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
672             'md5': '9d65602bf31c6e20014319c7d07fba27',
673             'info_dict': {
674                 'id': '5123ea6d5e5a7',
675                 'ext': 'mp4',
676                 'age_limit': 18,
677                 'uploader': 'www.handjobhub.com',
678                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
679             }
680         },
681         # Multiple brightcove videos
682         # https://github.com/rg3/youtube-dl/issues/2283
683         {
684             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
685             'info_dict': {
686                 'id': 'always-never',
687                 'title': 'Always / Never - The New Yorker',
688             },
689             'playlist_count': 3,
690             'params': {
691                 'extract_flat': False,
692                 'skip_download': True,
693             }
694         },
695         # MLB embed
696         {
697             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
698             'md5': '96f09a37e44da40dd083e12d9a683327',
699             'info_dict': {
700                 'id': '33322633',
701                 'ext': 'mp4',
702                 'title': 'Ump changes call to ball',
703                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
704                 'duration': 48,
705                 'timestamp': 1401537900,
706                 'upload_date': '20140531',
707                 'thumbnail': 're:^https?://.*\.jpg$',
708             },
709         },
710         # Wistia embed
711         {
712             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
713             'md5': '8788b683c777a5cf25621eaf286d0c23',
714             'info_dict': {
715                 'id': '1cfaf6b7ea',
716                 'ext': 'mov',
717                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
718                 'duration': 643.0,
719                 'filesize': 182808282,
720                 'uploader': 'education-portal.com',
721             },
722         },
723         {
724             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
725             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
726             'info_dict': {
727                 'id': 'uxjb0lwrcz',
728                 'ext': 'mp4',
729                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
730                 'duration': 1715.0,
731                 'uploader': 'thoughtworks.wistia.com',
732             },
733         },
734         # Soundcloud embed
735         {
736             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
737             'info_dict': {
738                 'id': '174391317',
739                 'ext': 'mp3',
740                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
741                 'uploader': 'Sophos Security',
742                 'title': 'Chet Chat 171 - Oct 29, 2014',
743                 'upload_date': '20141029',
744             }
745         },
746         # Livestream embed
747         {
748             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
749             'info_dict': {
750                 'id': '67864563',
751                 'ext': 'flv',
752                 'upload_date': '20141112',
753                 'title': 'Rosetta #CometLanding webcast HL 10',
754             }
755         },
756         # LazyYT
757         {
758             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
759             'info_dict': {
760                 'id': '1986',
761                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
762             },
763             'playlist_mincount': 2,
764         },
765         # Cinchcast embed
766         {
767             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
768             'info_dict': {
769                 'id': '7141703',
770                 'ext': 'mp3',
771                 'upload_date': '20141126',
772                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
773             }
774         },
775         # Cinerama player
776         {
777             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
778             'info_dict': {
779                 'id': '730m_DandD_1901_512k',
780                 'ext': 'mp4',
781                 'uploader': 'www.abc.net.au',
782                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
783             }
784         },
785         # embedded viddler video
786         {
787             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
788             'info_dict': {
789                 'id': '4d03aad9',
790                 'ext': 'mp4',
791                 'uploader': 'deadspin',
792                 'title': 'WALL-TO-GORTAT',
793                 'timestamp': 1422285291,
794                 'upload_date': '20150126',
795             },
796             'add_ie': ['Viddler'],
797         },
798         # Libsyn embed
799         {
800             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
801             'info_dict': {
802                 'id': '3377616',
803                 'ext': 'mp3',
804                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
805                 'description': 'md5:601cb790edd05908957dae8aaa866465',
806                 'upload_date': '20150220',
807             },
808         },
809         # jwplayer YouTube
810         {
811             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
812             'info_dict': {
813                 'id': 'Mrj4DVp2zeA',
814                 'ext': 'mp4',
815                 'upload_date': '20150212',
816                 'uploader': 'The National Archives UK',
817                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
818                 'uploader_id': 'NationalArchives08',
819                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
820             },
821         },
822         # rtl.nl embed
823         {
824             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
825             'playlist_mincount': 5,
826             'info_dict': {
827                 'id': 'aanslagen-kopenhagen',
828                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
829             }
830         },
831         # Zapiks embed
832         {
833             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
834             'info_dict': {
835                 'id': '118046',
836                 'ext': 'mp4',
837                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
838             }
839         },
840         # Kaltura embed
841         {
842             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
843             'info_dict': {
844                 'id': '1_eergr3h1',
845                 'ext': 'mp4',
846                 'upload_date': '20150226',
847                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
848                 'timestamp': int,
849                 'title': 'John Carlson Postgame 2/25/15',
850             },
851         },
852         # Kaltura embed (different embed code)
853         {
854             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
855             'info_dict': {
856                 'id': '1_a52wc67y',
857                 'ext': 'flv',
858                 'upload_date': '20150127',
859                 'uploader_id': 'PremierMedia',
860                 'timestamp': int,
861                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
862             },
863         },
864         # Kaltura embed protected with referrer
865         {
866             'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
867             'info_dict': {
868                 'id': '1_g4fbemnq',
869                 'ext': 'mp4',
870                 'title': 'Violetta - Achter De Schermen - Ruggero',
871                 'description': 'Achter de schermen met Ruggero',
872                 'timestamp': 1435133761,
873                 'upload_date': '20150624',
874                 'uploader_id': 'echojecka',
875             },
876         },
877         # Eagle.Platform embed (generic URL)
878         {
879             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
880             'info_dict': {
881                 'id': '227304',
882                 'ext': 'mp4',
883                 'title': 'Навальный вышел на свободу',
884                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
885                 'thumbnail': 're:^https?://.*\.jpg$',
886                 'duration': 87,
887                 'view_count': int,
888                 'age_limit': 0,
889             },
890         },
891         # ClipYou (Eagle.Platform) embed (custom URL)
892         {
893             'url': 'http://muz-tv.ru/play/7129/',
894             'info_dict': {
895                 'id': '12820',
896                 'ext': 'mp4',
897                 'title': "'O Sole Mio",
898                 'thumbnail': 're:^https?://.*\.jpg$',
899                 'duration': 216,
900                 'view_count': int,
901             },
902         },
903         # Pladform embed
904         {
905             'url': 'http://muz-tv.ru/kinozal/view/7400/',
906             'info_dict': {
907                 'id': '100183293',
908                 'ext': 'mp4',
909                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
910                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
911                 'thumbnail': 're:^https?://.*\.jpg$',
912                 'duration': 694,
913                 'age_limit': 0,
914             },
915         },
916         # Playwire embed
917         {
918             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
919             'info_dict': {
920                 'id': '3519514',
921                 'ext': 'mp4',
922                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
923                 'thumbnail': 're:^https?://.*\.png$',
924                 'duration': 45.115,
925             },
926         },
927         # 5min embed
928         {
929             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
930             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
931             'info_dict': {
932                 'id': '518726732',
933                 'ext': 'mp4',
934                 'title': 'Facebook Creates "On This Day" | Crunch Report',
935             },
936         },
937         # SVT embed
938         {
939             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
940             'info_dict': {
941                 'id': '2900353',
942                 'ext': 'flv',
943                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
944                 'duration': 27,
945                 'age_limit': 0,
946             },
947         },
948         # Crooks and Liars embed
949         {
950             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
951             'info_dict': {
952                 'id': '8RUoRhRi',
953                 'ext': 'mp4',
954                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
955                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
956                 'timestamp': 1428207000,
957                 'upload_date': '20150405',
958                 'uploader': 'Heather',
959             },
960         },
961         # Crooks and Liars external embed
962         {
963             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
964             'info_dict': {
965                 'id': 'MTE3MjUtMzQ2MzA',
966                 'ext': 'mp4',
967                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
968                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
969                 'timestamp': 1265032391,
970                 'upload_date': '20100201',
971                 'uploader': 'Heather',
972             },
973         },
974         # NBC Sports vplayer embed
975         {
976             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
977             'info_dict': {
978                 'id': 'ln7x1qSThw4k',
979                 'ext': 'flv',
980                 'title': "PFT Live: New leader in the 'new-look' defense",
981                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
982             },
983         },
984         # UDN embed
985         {
986             'url': 'http://www.udn.com/news/story/7314/822787',
987             'md5': 'fd2060e988c326991037b9aff9df21a6',
988             'info_dict': {
989                 'id': '300346',
990                 'ext': 'mp4',
991                 'title': '中一中男師變性 全校師生力挺',
992                 'thumbnail': 're:^https?://.*\.jpg$',
993             }
994         },
995         # Ooyala embed
996         {
997             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
998             'info_dict': {
999                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1000                 'ext': 'mp4',
1001                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1002                 'title': 'This is what separates the Excel masters from the wannabes',
1003                 'duration': 191.933,
1004             },
1005             'params': {
1006                 # m3u8 downloads
1007                 'skip_download': True,
1008             }
1009         },
1010         # Contains a SMIL manifest
1011         {
1012             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
1013             'info_dict': {
1014                 'id': 'file',
1015                 'ext': 'flv',
1016                 'title': '+ Football: Lottery Champions League Europe',
1017                 'uploader': 'www.telewebion.com',
1018             },
1019             'params': {
1020                 # rtmpe downloads
1021                 'skip_download': True,
1022             }
1023         },
1024         # Brightcove URL in single quotes
1025         {
1026             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1027             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1028             'info_dict': {
1029                 'id': '4255764656001',
1030                 'ext': 'mp4',
1031                 'title': 'SN Presents: Russell Martin, World Citizen',
1032                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1033                 'uploader': 'Rogers Sportsnet',
1034             },
1035         },
1036         # Dailymotion Cloud video
1037         {
1038             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1039             'md5': '49444254273501a64675a7e68c502681',
1040             'info_dict': {
1041                 'id': '5585de919473990de4bee11b',
1042                 'ext': 'mp4',
1043                 'title': 'Le débat',
1044                 'thumbnail': 're:^https?://.*\.jpe?g$',
1045             }
1046         },
1047         # OnionStudios embed
1048         {
1049             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1050             'info_dict': {
1051                 'id': '2855',
1052                 'ext': 'mp4',
1053                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1054                 'thumbnail': 're:^https?://.*\.jpe?g$',
1055                 'uploader': 'ClickHole',
1056                 'uploader_id': 'clickhole',
1057             }
1058         },
1059         # SnagFilms embed
1060         {
1061             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1062             'info_dict': {
1063                 'id': '74849a00-85a9-11e1-9660-123139220831',
1064                 'ext': 'mp4',
1065                 'title': '#whilewewatch',
1066             }
1067         },
1068         # AdobeTVVideo embed
1069         {
1070             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1071             'md5': '43662b577c018ad707a63766462b1e87',
1072             'info_dict': {
1073                 'id': '2456',
1074                 'ext': 'mp4',
1075                 'title': 'New experience with Acrobat DC',
1076                 'description': 'New experience with Acrobat DC',
1077                 'duration': 248.667,
1078             },
1079         },
1080         # ScreenwaveMedia embed
1081         {
1082             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1083             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1084             'info_dict': {
1085                 'id': 'cinemasnob-55d26273809dd',
1086                 'ext': 'mp4',
1087                 'title': 'cinemasnob',
1088             },
1089         },
1090         # BrightcoveInPageEmbed embed
1091         {
1092             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1093             'info_dict': {
1094                 'id': '4238694884001',
1095                 'ext': 'flv',
1096                 'title': 'Tabletop: Dread, Last Thoughts',
1097                 'description': 'Tabletop: Dread, Last Thoughts',
1098                 'duration': 51690,
1099             },
1100         },
1101         # JWPlayer with M3U8
1102         {
1103             'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1104             'info_dict': {
1105                 'id': 'playlist',
1106                 'ext': 'mp4',
1107                 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1108                 'uploader': 'ren.tv',
1109             },
1110             'params': {
1111                 # m3u8 downloads
1112                 'skip_download': True,
1113             }
1114         },
1115         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1116         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1117         {
1118             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1119             'info_dict': {
1120                 'id': '4785848093001',
1121                 'ext': 'mp4',
1122                 'title': 'The Cardinal Pell Interview',
1123                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1124                 'uploader': 'GlobeCast Australia - GlobeStream',
1125             },
1126             'params': {
1127                 # m3u8 downloads
1128                 'skip_download': True,
1129             },
1130         },
1131         # Another form of arte.tv embed
1132         {
1133             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1134             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1135             'info_dict': {
1136                 'id': '030273-562_PLUS7-F',
1137                 'ext': 'mp4',
1138                 'title': 'ARTE Reportage - Nulle part, en France',
1139                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1140                 'upload_date': '20160409',
1141             },
1142         },
1143     ]
1144
1145     def report_following_redirect(self, new_url):
1146         """Report information extraction."""
1147         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1148
1149     def _extract_rss(self, url, video_id, doc):
1150         playlist_title = doc.find('./channel/title').text
1151         playlist_desc_el = doc.find('./channel/description')
1152         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1153
1154         entries = []
1155         for it in doc.findall('./channel/item'):
1156             next_url = xpath_text(it, 'link', fatal=False)
1157             if not next_url:
1158                 enclosure_nodes = it.findall('./enclosure')
1159                 for e in enclosure_nodes:
1160                     next_url = e.attrib.get('url')
1161                     if next_url:
1162                         break
1163
1164             if not next_url:
1165                 continue
1166
1167             entries.append({
1168                 '_type': 'url',
1169                 'url': next_url,
1170                 'title': it.find('title').text,
1171             })
1172
1173         return {
1174             '_type': 'playlist',
1175             'id': url,
1176             'title': playlist_title,
1177             'description': playlist_desc,
1178             'entries': entries,
1179         }
1180
1181     def _extract_camtasia(self, url, video_id, webpage):
1182         """ Returns None if no camtasia video can be found. """
1183
1184         camtasia_cfg = self._search_regex(
1185             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1186             webpage, 'camtasia configuration file', default=None)
1187         if camtasia_cfg is None:
1188             return None
1189
1190         title = self._html_search_meta('DC.title', webpage, fatal=True)
1191
1192         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1193         camtasia_cfg = self._download_xml(
1194             camtasia_url, video_id,
1195             note='Downloading camtasia configuration',
1196             errnote='Failed to download camtasia configuration')
1197         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1198
1199         entries = []
1200         for n in fileset_node.getchildren():
1201             url_n = n.find('./uri')
1202             if url_n is None:
1203                 continue
1204
1205             entries.append({
1206                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1207                 'title': '%s - %s' % (title, n.tag),
1208                 'url': compat_urlparse.urljoin(url, url_n.text),
1209                 'duration': float_or_none(n.find('./duration').text),
1210             })
1211
1212         return {
1213             '_type': 'playlist',
1214             'entries': entries,
1215             'title': title,
1216         }
1217
1218     def _real_extract(self, url):
1219         if url.startswith('//'):
1220             return {
1221                 '_type': 'url',
1222                 'url': self.http_scheme() + url,
1223             }
1224
1225         parsed_url = compat_urlparse.urlparse(url)
1226         if not parsed_url.scheme:
1227             default_search = self._downloader.params.get('default_search')
1228             if default_search is None:
1229                 default_search = 'fixup_error'
1230
1231             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1232                 if '/' in url:
1233                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1234                     return self.url_result('http://' + url)
1235                 elif default_search != 'fixup_error':
1236                     if default_search == 'auto_warning':
1237                         if re.match(r'^(?:url|URL)$', url):
1238                             raise ExtractorError(
1239                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1240                                 expected=True)
1241                         else:
1242                             self._downloader.report_warning(
1243                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1244                     return self.url_result('ytsearch:' + url)
1245
1246             if default_search in ('error', 'fixup_error'):
1247                 raise ExtractorError(
1248                     '%r is not a valid URL. '
1249                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1250                     % (url, url), expected=True)
1251             else:
1252                 if ':' not in default_search:
1253                     default_search += ':'
1254                 return self.url_result(default_search + url)
1255
1256         url, smuggled_data = unsmuggle_url(url)
1257         force_videoid = None
1258         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1259         if smuggled_data and 'force_videoid' in smuggled_data:
1260             force_videoid = smuggled_data['force_videoid']
1261             video_id = force_videoid
1262         else:
1263             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1264
1265         self.to_screen('%s: Requesting header' % video_id)
1266
1267         head_req = HEADRequest(url)
1268         head_response = self._request_webpage(
1269             head_req, video_id,
1270             note=False, errnote='Could not send HEAD request to %s' % url,
1271             fatal=False)
1272
1273         if head_response is not False:
1274             # Check for redirect
1275             new_url = head_response.geturl()
1276             if url != new_url:
1277                 self.report_following_redirect(new_url)
1278                 if force_videoid:
1279                     new_url = smuggle_url(
1280                         new_url, {'force_videoid': force_videoid})
1281                 return self.url_result(new_url)
1282
1283         full_response = None
1284         if head_response is False:
1285             request = sanitized_Request(url)
1286             request.add_header('Accept-Encoding', '*')
1287             full_response = self._request_webpage(request, video_id)
1288             head_response = full_response
1289
1290         info_dict = {
1291             'id': video_id,
1292             'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1293             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1294         }
1295
1296         # Check for direct link to a video
1297         content_type = head_response.headers.get('Content-Type', '').lower()
1298         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1299         if m:
1300             format_id = m.group('format_id')
1301             if format_id.endswith('mpegurl'):
1302                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1303             elif format_id == 'f4m':
1304                 formats = self._extract_f4m_formats(url, video_id)
1305             else:
1306                 formats = [{
1307                     'format_id': m.group('format_id'),
1308                     'url': url,
1309                     'vcodec': 'none' if m.group('type') == 'audio' else None
1310                 }]
1311                 info_dict['direct'] = True
1312             self._sort_formats(formats)
1313             info_dict['formats'] = formats
1314             return info_dict
1315
1316         if not self._downloader.params.get('test', False) and not is_intentional:
1317             force = self._downloader.params.get('force_generic_extractor', False)
1318             self._downloader.report_warning(
1319                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1320
1321         if not full_response:
1322             request = sanitized_Request(url)
1323             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1324             # making it impossible to download only chunk of the file (yet we need only 512kB to
1325             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1326             # that will always result in downloading the whole file that is not desirable.
1327             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1328             # to accept raw bytes and being able to download only a chunk.
1329             # It may probably better to solve this by checking Content-Type for application/octet-stream
1330             # after HEAD request finishes, but not sure if we can rely on this.
1331             request.add_header('Accept-Encoding', '*')
1332             full_response = self._request_webpage(request, video_id)
1333
1334         first_bytes = full_response.read(512)
1335
1336         # Is it an M3U playlist?
1337         if first_bytes.startswith(b'#EXTM3U'):
1338             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1339             self._sort_formats(info_dict['formats'])
1340             return info_dict
1341
1342         # Maybe it's a direct link to a video?
1343         # Be careful not to download the whole thing!
1344         if not is_html(first_bytes):
1345             self._downloader.report_warning(
1346                 'URL could be a direct video link, returning it as such.')
1347             info_dict.update({
1348                 'direct': True,
1349                 'url': url,
1350             })
1351             return info_dict
1352
1353         webpage = self._webpage_read_content(
1354             full_response, url, video_id, prefix=first_bytes)
1355
1356         self.report_extraction(video_id)
1357
1358         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1359         try:
1360             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1361             if doc.tag == 'rss':
1362                 return self._extract_rss(url, video_id, doc)
1363             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1364                 smil = self._parse_smil(doc, url, video_id)
1365                 self._sort_formats(smil['formats'])
1366                 return smil
1367             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1368                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1369             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1370                 info_dict['formats'] = self._parse_mpd_formats(
1371                     doc, video_id, mpd_base_url=url.rpartition('/')[0])
1372                 self._sort_formats(info_dict['formats'])
1373                 return info_dict
1374             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1375                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1376                 self._sort_formats(info_dict['formats'])
1377                 return info_dict
1378         except compat_xml_parse_error:
1379             pass
1380
1381         # Is it a Camtasia project?
1382         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1383         if camtasia_res is not None:
1384             return camtasia_res
1385
1386         # Sometimes embedded video player is hidden behind percent encoding
1387         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1388         # Unescaping the whole page allows to handle those cases in a generic way
1389         webpage = compat_urllib_parse_unquote(webpage)
1390
1391         # it's tempting to parse this further, but you would
1392         # have to take into account all the variations like
1393         #   Video Title - Site Name
1394         #   Site Name | Video Title
1395         #   Video Title - Tagline | Site Name
1396         # and so on and so forth; it's just not practical
1397         video_title = self._html_search_regex(
1398             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1399             default='video')
1400
1401         # Try to detect age limit automatically
1402         age_limit = self._rta_search(webpage)
1403         # And then there are the jokers who advertise that they use RTA,
1404         # but actually don't.
1405         AGE_LIMIT_MARKERS = [
1406             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1407         ]
1408         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1409             age_limit = 18
1410
1411         # video uploader is domain name
1412         video_uploader = self._search_regex(
1413             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1414
1415         # Helper method
1416         def _playlist_from_matches(matches, getter=None, ie=None):
1417             urlrs = orderedSet(
1418                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1419                 for m in matches)
1420             return self.playlist_result(
1421                 urlrs, playlist_id=video_id, playlist_title=video_title)
1422
1423         # Look for Brightcove Legacy Studio embeds
1424         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1425         if bc_urls:
1426             self.to_screen('Brightcove video detected.')
1427             entries = [{
1428                 '_type': 'url',
1429                 'url': smuggle_url(bc_url, {'Referer': url}),
1430                 'ie_key': 'BrightcoveLegacy'
1431             } for bc_url in bc_urls]
1432
1433             return {
1434                 '_type': 'playlist',
1435                 'title': video_title,
1436                 'id': video_id,
1437                 'entries': entries,
1438             }
1439
1440         # Look for Brightcove New Studio embeds
1441         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1442         if bc_urls:
1443             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1444
1445         # Look for embedded rtl.nl player
1446         matches = re.findall(
1447             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1448             webpage)
1449         if matches:
1450             return _playlist_from_matches(matches, ie='RtlNl')
1451
1452         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1453         if vimeo_url is not None:
1454             return self.url_result(vimeo_url)
1455
1456         vid_me_embed_url = self._search_regex(
1457             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1458             webpage, 'vid.me embed', default=None)
1459         if vid_me_embed_url is not None:
1460             return self.url_result(vid_me_embed_url, 'Vidme')
1461
1462         # Look for embedded YouTube player
1463         matches = re.findall(r'''(?x)
1464             (?:
1465                 <iframe[^>]+?src=|
1466                 data-video-url=|
1467                 <embed[^>]+?src=|
1468                 embedSWF\(?:\s*|
1469                 new\s+SWFObject\(
1470             )
1471             (["\'])
1472                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1473                 (?:embed|v|p)/.+?)
1474             \1''', webpage)
1475         if matches:
1476             return _playlist_from_matches(
1477                 matches, lambda m: unescapeHTML(m[1]))
1478
1479         # Look for lazyYT YouTube embed
1480         matches = re.findall(
1481             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1482         if matches:
1483             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1484
1485         # Look for embedded Dailymotion player
1486         matches = re.findall(
1487             r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
1488         if matches:
1489             return _playlist_from_matches(
1490                 matches, lambda m: unescapeHTML(m[1]))
1491
1492         # Look for embedded Dailymotion playlist player (#3822)
1493         m = re.search(
1494             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1495         if m:
1496             playlists = re.findall(
1497                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1498             if playlists:
1499                 return _playlist_from_matches(
1500                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1501
1502         # Look for embedded Wistia player
1503         match = re.search(
1504             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1505         if match:
1506             embed_url = self._proto_relative_url(
1507                 unescapeHTML(match.group('url')))
1508             return {
1509                 '_type': 'url_transparent',
1510                 'url': embed_url,
1511                 'ie_key': 'Wistia',
1512                 'uploader': video_uploader,
1513                 'title': video_title,
1514                 'id': video_id,
1515             }
1516
1517         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1518         if match:
1519             return {
1520                 '_type': 'url_transparent',
1521                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1522                 'ie_key': 'Wistia',
1523                 'uploader': video_uploader,
1524                 'title': video_title,
1525                 'id': match.group('id')
1526             }
1527
1528         # Look for SVT player
1529         svt_url = SVTIE._extract_url(webpage)
1530         if svt_url:
1531             return self.url_result(svt_url, 'SVT')
1532
1533         # Look for embedded condenast player
1534         matches = re.findall(
1535             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1536             webpage)
1537         if matches:
1538             return {
1539                 '_type': 'playlist',
1540                 'entries': [{
1541                     '_type': 'url',
1542                     'ie_key': 'CondeNast',
1543                     'url': ma,
1544                 } for ma in matches],
1545                 'title': video_title,
1546                 'id': video_id,
1547             }
1548
1549         # Look for Bandcamp pages with custom domain
1550         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1551         if mobj is not None:
1552             burl = unescapeHTML(mobj.group(1))
1553             # Don't set the extractor because it can be a track url or an album
1554             return self.url_result(burl)
1555
1556         # Look for embedded Vevo player
1557         mobj = re.search(
1558             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1559         if mobj is not None:
1560             return self.url_result(mobj.group('url'))
1561
1562         # Look for embedded Viddler player
1563         mobj = re.search(
1564             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1565             webpage)
1566         if mobj is not None:
1567             return self.url_result(mobj.group('url'))
1568
1569         # Look for NYTimes player
1570         mobj = re.search(
1571             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1572             webpage)
1573         if mobj is not None:
1574             return self.url_result(mobj.group('url'))
1575
1576         # Look for Libsyn player
1577         mobj = re.search(
1578             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1579         if mobj is not None:
1580             return self.url_result(mobj.group('url'))
1581
1582         # Look for Ooyala videos
1583         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1584                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1585                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1586                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1587         if mobj is not None:
1588             return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
1589
1590         # Look for multiple Ooyala embeds on SBN network websites
1591         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1592         if mobj is not None:
1593             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1594             if embeds:
1595                 return _playlist_from_matches(
1596                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
1597
1598         # Look for Aparat videos
1599         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1600         if mobj is not None:
1601             return self.url_result(mobj.group(1), 'Aparat')
1602
1603         # Look for MPORA videos
1604         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1605         if mobj is not None:
1606             return self.url_result(mobj.group(1), 'Mpora')
1607
1608         # Look for embedded NovaMov-based player
1609         mobj = re.search(
1610             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1611                     (?P<url>http://(?:(?:embed|www)\.)?
1612                         (?:novamov\.com|
1613                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1614                            videoweed\.(?:es|com)|
1615                            movshare\.(?:net|sx|ag)|
1616                            divxstage\.(?:eu|net|ch|co|at|ag))
1617                         /embed\.php.+?)\1''', webpage)
1618         if mobj is not None:
1619             return self.url_result(mobj.group('url'))
1620
1621         # Look for embedded Facebook player
1622         mobj = re.search(
1623             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1624         if mobj is not None:
1625             return self.url_result(mobj.group('url'), 'Facebook')
1626
1627         # Look for embedded VK player
1628         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1629         if mobj is not None:
1630             return self.url_result(mobj.group('url'), 'VK')
1631
1632         # Look for embedded Odnoklassniki player
1633         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
1634         if mobj is not None:
1635             return self.url_result(mobj.group('url'), 'Odnoklassniki')
1636
1637         # Look for embedded ivi player
1638         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1639         if mobj is not None:
1640             return self.url_result(mobj.group('url'), 'Ivi')
1641
1642         # Look for embedded Huffington Post player
1643         mobj = re.search(
1644             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1645         if mobj is not None:
1646             return self.url_result(mobj.group('url'), 'HuffPost')
1647
1648         # Look for embed.ly
1649         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1650         if mobj is not None:
1651             return self.url_result(mobj.group('url'))
1652         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1653         if mobj is not None:
1654             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1655
1656         # Look for funnyordie embed
1657         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1658         if matches:
1659             return _playlist_from_matches(
1660                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1661
1662         # Look for BBC iPlayer embed
1663         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1664         if matches:
1665             return _playlist_from_matches(matches, ie='BBCCoUk')
1666
1667         # Look for embedded RUTV player
1668         rutv_url = RUTVIE._extract_url(webpage)
1669         if rutv_url:
1670             return self.url_result(rutv_url, 'RUTV')
1671
1672         # Look for embedded TVC player
1673         tvc_url = TVCIE._extract_url(webpage)
1674         if tvc_url:
1675             return self.url_result(tvc_url, 'TVC')
1676
1677         # Look for embedded SportBox player
1678         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1679         if sportbox_urls:
1680             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1681
1682         # Look for embedded PornHub player
1683         pornhub_url = PornHubIE._extract_url(webpage)
1684         if pornhub_url:
1685             return self.url_result(pornhub_url, 'PornHub')
1686
1687         # Look for embedded XHamster player
1688         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1689         if xhamster_urls:
1690             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1691
1692         # Look for embedded TNAFlixNetwork player
1693         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
1694         if tnaflix_urls:
1695             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
1696
1697         # Look for embedded Tvigle player
1698         mobj = re.search(
1699             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1700         if mobj is not None:
1701             return self.url_result(mobj.group('url'), 'Tvigle')
1702
1703         # Look for embedded TED player
1704         mobj = re.search(
1705             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1706         if mobj is not None:
1707             return self.url_result(mobj.group('url'), 'TED')
1708
1709         # Look for embedded Ustream videos
1710         mobj = re.search(
1711             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1712         if mobj is not None:
1713             return self.url_result(mobj.group('url'), 'Ustream')
1714
1715         # Look for embedded arte.tv player
1716         mobj = re.search(
1717             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
1718             webpage)
1719         if mobj is not None:
1720             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1721
1722         # Look for embedded francetv player
1723         mobj = re.search(
1724             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1725             webpage)
1726         if mobj is not None:
1727             return self.url_result(mobj.group('url'))
1728
1729         # Look for embedded smotri.com player
1730         smotri_url = SmotriIE._extract_url(webpage)
1731         if smotri_url:
1732             return self.url_result(smotri_url, 'Smotri')
1733
1734         # Look for embedded Myvi.ru player
1735         myvi_url = MyviIE._extract_url(webpage)
1736         if myvi_url:
1737             return self.url_result(myvi_url)
1738
1739         # Look for embedded soundcloud player
1740         mobj = re.search(
1741             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1742             webpage)
1743         if mobj is not None:
1744             url = unescapeHTML(mobj.group('url'))
1745             return self.url_result(url)
1746
1747         # Look for embedded vulture.com player
1748         mobj = re.search(
1749             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1750             webpage)
1751         if mobj is not None:
1752             url = unescapeHTML(mobj.group('url'))
1753             return self.url_result(url, ie='Vulture')
1754
1755         # Look for embedded mtvservices player
1756         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1757         if mtvservices_url:
1758             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1759
1760         # Look for embedded yahoo player
1761         mobj = re.search(
1762             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1763             webpage)
1764         if mobj is not None:
1765             return self.url_result(mobj.group('url'), 'Yahoo')
1766
1767         # Look for embedded sbs.com.au player
1768         mobj = re.search(
1769             r'''(?x)
1770             (?:
1771                 <meta\s+property="og:video"\s+content=|
1772                 <iframe[^>]+?src=
1773             )
1774             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1775             webpage)
1776         if mobj is not None:
1777             return self.url_result(mobj.group('url'), 'SBS')
1778
1779         # Look for embedded Cinchcast player
1780         mobj = re.search(
1781             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1782             webpage)
1783         if mobj is not None:
1784             return self.url_result(mobj.group('url'), 'Cinchcast')
1785
1786         mobj = re.search(
1787             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1788             webpage)
1789         if not mobj:
1790             mobj = re.search(
1791                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1792                 webpage)
1793         if mobj is not None:
1794             return self.url_result(mobj.group('url'), 'MLB')
1795
1796         mobj = re.search(
1797             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1798             webpage)
1799         if mobj is not None:
1800             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1801
1802         mobj = re.search(
1803             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1804             webpage)
1805         if mobj is not None:
1806             return self.url_result(mobj.group('url'), 'Livestream')
1807
1808         # Look for Zapiks embed
1809         mobj = re.search(
1810             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1811         if mobj is not None:
1812             return self.url_result(mobj.group('url'), 'Zapiks')
1813
1814         # Look for Kaltura embeds
1815         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
1816                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
1817         if mobj is not None:
1818             return self.url_result(smuggle_url(
1819                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1820                 {'source_url': url}), 'Kaltura')
1821
1822         # Look for Eagle.Platform embeds
1823         mobj = re.search(
1824             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1825         if mobj is not None:
1826             return self.url_result(mobj.group('url'), 'EaglePlatform')
1827
1828         # Look for ClipYou (uses Eagle.Platform) embeds
1829         mobj = re.search(
1830             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1831         if mobj is not None:
1832             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1833
1834         # Look for Pladform embeds
1835         pladform_url = PladformIE._extract_url(webpage)
1836         if pladform_url:
1837             return self.url_result(pladform_url)
1838
1839         # Look for Videomore embeds
1840         videomore_url = VideomoreIE._extract_url(webpage)
1841         if videomore_url:
1842             return self.url_result(videomore_url)
1843
1844         # Look for Playwire embeds
1845         mobj = re.search(
1846             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1847         if mobj is not None:
1848             return self.url_result(mobj.group('url'))
1849
1850         # Look for 5min embeds
1851         mobj = re.search(
1852             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1853         if mobj is not None:
1854             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1855
1856         # Look for Crooks and Liars embeds
1857         mobj = re.search(
1858             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1859         if mobj is not None:
1860             return self.url_result(mobj.group('url'))
1861
1862         # Look for NBC Sports VPlayer embeds
1863         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1864         if nbc_sports_url:
1865             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1866
1867         # Look for Google Drive embeds
1868         google_drive_url = GoogleDriveIE._extract_url(webpage)
1869         if google_drive_url:
1870             return self.url_result(google_drive_url, 'GoogleDrive')
1871
1872         # Look for UDN embeds
1873         mobj = re.search(
1874             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
1875         if mobj is not None:
1876             return self.url_result(
1877                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1878
1879         # Look for Senate ISVP iframe
1880         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1881         if senate_isvp_url:
1882             return self.url_result(senate_isvp_url, 'SenateISVP')
1883
1884         # Look for Dailymotion Cloud videos
1885         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1886         if dmcloud_url:
1887             return self.url_result(dmcloud_url, 'DailymotionCloud')
1888
1889         # Look for OnionStudios embeds
1890         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1891         if onionstudios_url:
1892             return self.url_result(onionstudios_url)
1893
1894         # Look for SnagFilms embeds
1895         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1896         if snagfilms_url:
1897             return self.url_result(snagfilms_url)
1898
1899         # Look for JWPlatform embeds
1900         jwplatform_url = JWPlatformIE._extract_url(webpage)
1901         if jwplatform_url:
1902             return self.url_result(jwplatform_url, 'JWPlatform')
1903
1904         # Look for ScreenwaveMedia embeds
1905         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
1906         if mobj is not None:
1907             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
1908
1909         # Look for Digiteka embeds
1910         digiteka_url = DigitekaIE._extract_url(webpage)
1911         if digiteka_url:
1912             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
1913
1914         # Look for Limelight embeds
1915         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
1916         if mobj:
1917             lm = {
1918                 'Media': 'media',
1919                 'Channel': 'channel',
1920                 'ChannelList': 'channel_list',
1921             }
1922             return self.url_result('limelight:%s:%s' % (
1923                 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
1924
1925         # Look for AdobeTVVideo embeds
1926         mobj = re.search(
1927             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1928             webpage)
1929         if mobj is not None:
1930             return self.url_result(
1931                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1932                 'AdobeTVVideo')
1933
1934         # Look for Vine embeds
1935         mobj = re.search(
1936             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
1937             webpage)
1938         if mobj is not None:
1939             return self.url_result(
1940                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
1941
1942         # Look for Instagram embeds
1943         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
1944         if instagram_embed_url is not None:
1945             return self.url_result(instagram_embed_url, InstagramIE.ie_key())
1946
1947         def check_video(vurl):
1948             if YoutubeIE.suitable(vurl):
1949                 return True
1950             vpath = compat_urlparse.urlparse(vurl).path
1951             vext = determine_ext(vpath)
1952             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1953
1954         def filter_video(urls):
1955             return list(filter(check_video, urls))
1956
1957         # Start with something easy: JW Player in SWFObject
1958         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1959         if not found:
1960             # Look for gorilla-vid style embedding
1961             found = filter_video(re.findall(r'''(?sx)
1962                 (?:
1963                     jw_plugins|
1964                     JWPlayerOptions|
1965                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1966                 )
1967                 .*?
1968                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1969         if not found:
1970             # Broaden the search a little bit
1971             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1972         if not found:
1973             # Broaden the findall a little bit: JWPlayer JS loader
1974             found = filter_video(re.findall(
1975                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1976         if not found:
1977             # Flow player
1978             found = filter_video(re.findall(r'''(?xs)
1979                 flowplayer\("[^"]+",\s*
1980                     \{[^}]+?\}\s*,
1981                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1982                         ["']?url["']?\s*:\s*["']([^"']+)["']
1983             ''', webpage))
1984         if not found:
1985             # Cinerama player
1986             found = re.findall(
1987                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1988         if not found:
1989             # Try to find twitter cards info
1990             found = filter_video(re.findall(
1991                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1992         if not found:
1993             # We look for Open Graph info:
1994             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1995             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1996             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1997             if m_video_type is not None:
1998                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1999         if not found:
2000             # HTML5 video
2001             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
2002         if not found:
2003             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
2004             found = re.search(
2005                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2006                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2007                 webpage)
2008             if not found:
2009                 # Look also in Refresh HTTP header
2010                 refresh_header = head_response.headers.get('Refresh')
2011                 if refresh_header:
2012                     # In python 2 response HTTP headers are bytestrings
2013                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2014                         refresh_header = refresh_header.decode('iso-8859-1')
2015                     found = re.search(REDIRECT_REGEX, refresh_header)
2016             if found:
2017                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2018                 self.report_following_redirect(new_url)
2019                 return {
2020                     '_type': 'url',
2021                     'url': new_url,
2022                 }
2023         if not found:
2024             raise UnsupportedError(url)
2025
2026         entries = []
2027         for video_url in found:
2028             video_url = video_url.replace('\\/', '/')
2029             video_url = compat_urlparse.urljoin(url, video_url)
2030             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2031
2032             # Sometimes, jwplayer extraction will result in a YouTube URL
2033             if YoutubeIE.suitable(video_url):
2034                 entries.append(self.url_result(video_url, 'Youtube'))
2035                 continue
2036
2037             # here's a fun little line of code for you:
2038             video_id = os.path.splitext(video_id)[0]
2039
2040             entry_info_dict = {
2041                 'id': video_id,
2042                 'uploader': video_uploader,
2043                 'title': video_title,
2044                 'age_limit': age_limit,
2045             }
2046
2047             ext = determine_ext(video_url)
2048             if ext == 'smil':
2049                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2050             elif ext == 'xspf':
2051                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2052             elif ext == 'm3u8':
2053                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2054             elif ext == 'mpd':
2055                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2056             elif ext == 'f4m':
2057                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2058             else:
2059                 entry_info_dict['url'] = video_url
2060
2061             if entry_info_dict.get('formats'):
2062                 self._sort_formats(entry_info_dict['formats'])
2063
2064             entries.append(entry_info_dict)
2065
2066         if len(entries) == 1:
2067             return entries[0]
2068         else:
2069             for num, e in enumerate(entries, start=1):
2070                 # 'url' results don't have a title
2071                 if e.get('title') is not None:
2072                     e['title'] = '%s (%d)' % (e['title'], num)
2073             return {
2074                 '_type': 'playlist',
2075                 'entries': entries,
2076             }