[generic] Add a test case for brightcove embed
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7 import sys
8
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12     compat_etree_fromstring,
13     compat_urllib_parse_unquote,
14     compat_urlparse,
15     compat_xml_parse_error,
16 )
17 from ..utils import (
18     determine_ext,
19     ExtractorError,
20     float_or_none,
21     HEADRequest,
22     is_html,
23     orderedSet,
24     sanitized_Request,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import (
34     BrightcoveLegacyIE,
35     BrightcoveNewIE,
36 )
37 from .nbc import NBCSportsVPlayerIE
38 from .ooyala import OoyalaIE
39 from .rutv import RUTVIE
40 from .tvc import TVCIE
41 from .sportbox import SportBoxEmbedIE
42 from .smotri import SmotriIE
43 from .myvi import MyviIE
44 from .condenast import CondeNastIE
45 from .udn import UDNEmbedIE
46 from .senateisvp import SenateISVPIE
47 from .svt import SVTIE
48 from .pornhub import PornHubIE
49 from .xhamster import XHamsterEmbedIE
50 from .tnaflix import TNAFlixNetworkEmbedIE
51 from .vimeo import VimeoIE
52 from .dailymotion import DailymotionCloudIE
53 from .onionstudios import OnionStudiosIE
54 from .snagfilms import SnagFilmsEmbedIE
55 from .screenwavemedia import ScreenwaveMediaIE
56 from .mtv import MTVServicesEmbeddedIE
57 from .pladform import PladformIE
58 from .videomore import VideomoreIE
59 from .googledrive import GoogleDriveIE
60 from .jwplatform import JWPlatformIE
61 from .digiteka import DigitekaIE
62 from .instagram import InstagramIE
63
64
65 class GenericIE(InfoExtractor):
66     IE_DESC = 'Generic downloader that works on some sites'
67     _VALID_URL = r'.*'
68     IE_NAME = 'generic'
69     _TESTS = [
70         # Direct link to a video
71         {
72             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
73             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
74             'info_dict': {
75                 'id': 'trailer',
76                 'ext': 'mp4',
77                 'title': 'trailer',
78                 'upload_date': '20100513',
79             }
80         },
81         # Direct link to media delivered compressed (until Accept-Encoding is *)
82         {
83             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
84             'md5': '128c42e68b13950268b648275386fc74',
85             'info_dict': {
86                 'id': 'FictionJunction-Parallel_Hearts',
87                 'ext': 'flac',
88                 'title': 'FictionJunction-Parallel_Hearts',
89                 'upload_date': '20140522',
90             },
91             'expected_warnings': [
92                 'URL could be a direct video link, returning it as such.'
93             ]
94         },
95         # Direct download with broken HEAD
96         {
97             'url': 'http://ai-radio.org:8000/radio.opus',
98             'info_dict': {
99                 'id': 'radio',
100                 'ext': 'opus',
101                 'title': 'radio',
102             },
103             'params': {
104                 'skip_download': True,  # infinite live stream
105             },
106             'expected_warnings': [
107                 r'501.*Not Implemented'
108             ],
109         },
110         # Direct link with incorrect MIME type
111         {
112             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
113             'md5': '4ccbebe5f36706d85221f204d7eb5913',
114             'info_dict': {
115                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
116                 'id': '5_Lennart_Poettering_-_Systemd',
117                 'ext': 'webm',
118                 'title': '5_Lennart_Poettering_-_Systemd',
119                 'upload_date': '20141120',
120             },
121             'expected_warnings': [
122                 'URL could be a direct video link, returning it as such.'
123             ]
124         },
125         # RSS feed
126         {
127             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
128             'info_dict': {
129                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
130                 'title': 'Zero Punctuation',
131                 'description': 're:.*groundbreaking video review series.*'
132             },
133             'playlist_mincount': 11,
134         },
135         # RSS feed with enclosure
136         {
137             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
138             'info_dict': {
139                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
140                 'ext': 'm4v',
141                 'upload_date': '20150228',
142                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
143             }
144         },
145         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
146         {
147             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
148             'info_dict': {
149                 'id': 'smil',
150                 'ext': 'mp4',
151                 'title': 'Automatics, robotics and biocybernetics',
152                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
153                 'upload_date': '20130627',
154                 'formats': 'mincount:16',
155                 'subtitles': 'mincount:1',
156             },
157             'params': {
158                 'force_generic_extractor': True,
159                 'skip_download': True,
160             },
161         },
162         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
163         {
164             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
165             'info_dict': {
166                 'id': 'hds',
167                 'ext': 'flv',
168                 'title': 'hds',
169                 'formats': 'mincount:1',
170             },
171             'params': {
172                 'skip_download': True,
173             },
174         },
175         # SMIL from https://www.restudy.dk/video/play/id/1637
176         {
177             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
178             'info_dict': {
179                 'id': 'video_1637',
180                 'ext': 'flv',
181                 'title': 'video_1637',
182                 'formats': 'mincount:3',
183             },
184             'params': {
185                 'skip_download': True,
186             },
187         },
188         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
189         {
190             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
191             'info_dict': {
192                 'id': 'smil-service',
193                 'ext': 'flv',
194                 'title': 'smil-service',
195                 'formats': 'mincount:1',
196             },
197             'params': {
198                 'skip_download': True,
199             },
200         },
201         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
202         {
203             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
204             'info_dict': {
205                 'id': '4719370',
206                 'ext': 'mp4',
207                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
208                 'formats': 'mincount:3',
209             },
210             'params': {
211                 'skip_download': True,
212             },
213         },
214         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
215         {
216             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
217             'info_dict': {
218                 'id': 'mZlp2ctYIUEB',
219                 'ext': 'mp4',
220                 'title': 'Tikibad ontruimd wegens brand',
221                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
222                 'thumbnail': 're:^https?://.*\.jpg$',
223                 'duration': 33,
224             },
225             'params': {
226                 'skip_download': True,
227             },
228         },
229         # MPD from http://dash-mse-test.appspot.com/media.html
230         {
231             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
232             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
233             'info_dict': {
234                 'id': 'car-20120827-manifest',
235                 'ext': 'mp4',
236                 'title': 'car-20120827-manifest',
237                 'formats': 'mincount:9',
238             },
239             'params': {
240                 'format': 'bestvideo',
241             },
242         },
243         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
244         {
245             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
246             'info_dict': {
247                 'id': 'content',
248                 'ext': 'mp4',
249                 'title': 'content',
250                 'formats': 'mincount:8',
251             },
252             'params': {
253                 # m3u8 downloads
254                 'skip_download': True,
255             }
256         },
257         # m3u8 served with Content-Type: text/plain
258         {
259             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
260             'info_dict': {
261                 'id': 'index',
262                 'ext': 'mp4',
263                 'title': 'index',
264                 'upload_date': '20140720',
265                 'formats': 'mincount:11',
266             },
267             'params': {
268                 # m3u8 downloads
269                 'skip_download': True,
270             }
271         },
272         # google redirect
273         {
274             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
275             'info_dict': {
276                 'id': 'cmQHVoWB5FY',
277                 'ext': 'mp4',
278                 'upload_date': '20130224',
279                 'uploader_id': 'TheVerge',
280                 'description': 're:^Chris Ziegler takes a look at the\.*',
281                 'uploader': 'The Verge',
282                 'title': 'First Firefox OS phones side-by-side',
283             },
284             'params': {
285                 'skip_download': False,
286             }
287         },
288         {
289             # redirect in Refresh HTTP header
290             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
291             'info_dict': {
292                 'id': 'pO8h3EaFRdo',
293                 'ext': 'mp4',
294                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
295                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
296                 'upload_date': '20150917',
297                 'uploader_id': 'brtvofficial',
298                 'uploader': 'Boiler Room',
299             },
300             'params': {
301                 'skip_download': False,
302             },
303         },
304         {
305             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
306             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
307             'info_dict': {
308                 'id': '13601338388002',
309                 'ext': 'mp4',
310                 'uploader': 'www.hodiho.fr',
311                 'title': 'R\u00e9gis plante sa Jeep',
312             }
313         },
314         # bandcamp page with custom domain
315         {
316             'add_ie': ['Bandcamp'],
317             'url': 'http://bronyrock.com/track/the-pony-mash',
318             'info_dict': {
319                 'id': '3235767654',
320                 'ext': 'mp3',
321                 'title': 'The Pony Mash',
322                 'uploader': 'M_Pallante',
323             },
324             'skip': 'There is a limit of 200 free downloads / month for the test song',
325         },
326         # embedded brightcove video
327         # it also tests brightcove videos that need to set the 'Referer' in the
328         # http requests
329         {
330             'add_ie': ['BrightcoveLegacy'],
331             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
332             'info_dict': {
333                 'id': '2765128793001',
334                 'ext': 'mp4',
335                 'title': 'Le cours de bourse : l’analyse technique',
336                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
337                 'uploader': 'BFM BUSINESS',
338             },
339             'params': {
340                 'skip_download': True,
341             },
342         },
343         {
344             # https://github.com/rg3/youtube-dl/issues/2253
345             'url': 'http://bcove.me/i6nfkrc3',
346             'md5': '0ba9446db037002366bab3b3eb30c88c',
347             'info_dict': {
348                 'id': '3101154703001',
349                 'ext': 'mp4',
350                 'title': 'Still no power',
351                 'uploader': 'thestar.com',
352                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
353             },
354             'add_ie': ['BrightcoveLegacy'],
355         },
356         {
357             'url': 'http://www.championat.com/video/football/v/87/87499.html',
358             'md5': 'fb973ecf6e4a78a67453647444222983',
359             'info_dict': {
360                 'id': '3414141473001',
361                 'ext': 'mp4',
362                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
363                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
364                 'uploader': 'Championat',
365             },
366         },
367         {
368             # https://github.com/rg3/youtube-dl/issues/3541
369             'add_ie': ['BrightcoveLegacy'],
370             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
371             'info_dict': {
372                 'id': '3866516442001',
373                 'ext': 'mp4',
374                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
375                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
376                 'uploader': 'SBS Broadcasting',
377             },
378             'skip': 'Restricted to Netherlands',
379             'params': {
380                 'skip_download': True,  # m3u8 download
381             },
382         },
383         # ooyala video
384         {
385             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
386             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
387             'info_dict': {
388                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
389                 'ext': 'mp4',
390                 'title': '2cc213299525360.mov',  # that's what we get
391                 'duration': 238.231,
392             },
393             'add_ie': ['Ooyala'],
394         },
395         {
396             # ooyala video embedded with http://player.ooyala.com/iframe.js
397             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
398             'info_dict': {
399                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
400                 'ext': 'mp4',
401                 'title': '"Steve Jobs: Man in the Machine" trailer',
402                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
403                 'duration': 135.427,
404             },
405             'params': {
406                 'skip_download': True,
407             },
408         },
409         # multiple ooyala embeds on SBN network websites
410         {
411             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
412             'info_dict': {
413                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
414                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
415             },
416             'playlist_mincount': 3,
417             'params': {
418                 'skip_download': True,
419             },
420             'add_ie': ['Ooyala'],
421         },
422         # embed.ly video
423         {
424             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
425             'info_dict': {
426                 'id': '9ODmcdjQcHQ',
427                 'ext': 'mp4',
428                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
429                 'upload_date': '20140225',
430                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
431                 'uploader': 'Tested',
432                 'uploader_id': 'testedcom',
433             },
434             # No need to test YoutubeIE here
435             'params': {
436                 'skip_download': True,
437             },
438         },
439         # funnyordie embed
440         {
441             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
442             'info_dict': {
443                 'id': '18e820ec3f',
444                 'ext': 'mp4',
445                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
446                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
447             },
448         },
449         # RUTV embed
450         {
451             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
452             'info_dict': {
453                 'id': '776940',
454                 'ext': 'mp4',
455                 'title': 'Охотское море стало целиком российским',
456                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
457             },
458             'params': {
459                 # m3u8 download
460                 'skip_download': True,
461             },
462         },
463         # TVC embed
464         {
465             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
466             'info_dict': {
467                 'id': '55304',
468                 'ext': 'mp4',
469                 'title': 'Дошкольное воспитание',
470             },
471         },
472         # SportBox embed
473         {
474             'url': 'http://www.vestifinance.ru/articles/25753',
475             'info_dict': {
476                 'id': '25753',
477                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
478             },
479             'playlist': [{
480                 'info_dict': {
481                     'id': '370908',
482                     'title': 'Госзаказ. День 3',
483                     'ext': 'mp4',
484                 }
485             }, {
486                 'info_dict': {
487                     'id': '370905',
488                     'title': 'Госзаказ. День 2',
489                     'ext': 'mp4',
490                 }
491             }, {
492                 'info_dict': {
493                     'id': '370902',
494                     'title': 'Госзаказ. День 1',
495                     'ext': 'mp4',
496                 }
497             }],
498             'params': {
499                 # m3u8 download
500                 'skip_download': True,
501             },
502         },
503         # Myvi.ru embed
504         {
505             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
506             'info_dict': {
507                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
508                 'ext': 'mp4',
509                 'title': 'Ужастики, русский трейлер (2015)',
510                 'thumbnail': 're:^https?://.*\.jpg$',
511                 'duration': 153,
512             }
513         },
514         # XHamster embed
515         {
516             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
517             'info_dict': {
518                 'id': 'showthread',
519                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
520             },
521             'playlist_mincount': 7,
522         },
523         # Embedded TED video
524         {
525             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
526             'md5': '65fdff94098e4a607385a60c5177c638',
527             'info_dict': {
528                 'id': '1969',
529                 'ext': 'mp4',
530                 'title': 'Hidden miracles of the natural world',
531                 'uploader': 'Louie Schwartzberg',
532                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
533             }
534         },
535         # Embedded Ustream video
536         {
537             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
538             'md5': '27b99cdb639c9b12a79bca876a073417',
539             'info_dict': {
540                 'id': '45734260',
541                 'ext': 'flv',
542                 'uploader': 'AU SPA:  The NSA and Privacy',
543                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
544             }
545         },
546         # nowvideo embed hidden behind percent encoding
547         {
548             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
549             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
550             'info_dict': {
551                 'id': '06e53103ca9aa',
552                 'ext': 'flv',
553                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
554                 'description': 'No description',
555             },
556         },
557         # arte embed
558         {
559             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
560             'md5': '7653032cbb25bf6c80d80f217055fa43',
561             'info_dict': {
562                 'id': '048195-004_PLUS7-F',
563                 'ext': 'flv',
564                 'title': 'X:enius',
565                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
566                 'upload_date': '20140320',
567             },
568             'params': {
569                 'skip_download': 'Requires rtmpdump'
570             }
571         },
572         # francetv embed
573         {
574             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
575             'info_dict': {
576                 'id': 'EV_30231',
577                 'ext': 'mp4',
578                 'title': 'Alcaline, le concert avec Calogero',
579                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
580                 'upload_date': '20150226',
581                 'timestamp': 1424989860,
582                 'duration': 5400,
583             },
584             'params': {
585                 # m3u8 downloads
586                 'skip_download': True,
587             },
588             'expected_warnings': [
589                 'Forbidden'
590             ]
591         },
592         # Condé Nast embed
593         {
594             'url': 'http://www.wired.com/2014/04/honda-asimo/',
595             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
596             'info_dict': {
597                 'id': '53501be369702d3275860000',
598                 'ext': 'mp4',
599                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
600             }
601         },
602         # Dailymotion embed
603         {
604             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
605             'md5': '441aeeb82eb72c422c7f14ec533999cd',
606             'info_dict': {
607                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
608                 'ext': 'mp4',
609                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
610                 'uploader': 'Spi0n',
611             },
612             'add_ie': ['Dailymotion'],
613         },
614         # YouTube embed
615         {
616             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
617             'info_dict': {
618                 'id': 'FXRb4ykk4S0',
619                 'ext': 'mp4',
620                 'title': 'The NBL Auction 2014',
621                 'uploader': 'BADMINTON England',
622                 'uploader_id': 'BADMINTONEvents',
623                 'upload_date': '20140603',
624                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
625             },
626             'add_ie': ['Youtube'],
627             'params': {
628                 'skip_download': True,
629             }
630         },
631         # MTVSercices embed
632         {
633             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
634             'md5': '35727f82f58c76d996fc188f9755b0d5',
635             'info_dict': {
636                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
637                 'ext': 'mp4',
638                 'title': 'Review',
639                 'description': 'Mario\'s life in the fast lane has never looked so good.',
640             },
641         },
642         # YouTube embed via <data-embed-url="">
643         {
644             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
645             'info_dict': {
646                 'id': '4vAffPZIT44',
647                 'ext': 'mp4',
648                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
649                 'uploader': 'Gameloft',
650                 'uploader_id': 'gameloft',
651                 'upload_date': '20140828',
652                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
653             },
654             'params': {
655                 'skip_download': True,
656             }
657         },
658         # Camtasia studio
659         {
660             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
661             'playlist': [{
662                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
663                 'info_dict': {
664                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
665                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
666                     'ext': 'flv',
667                     'duration': 2235.90,
668                 }
669             }, {
670                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
671                 'info_dict': {
672                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
673                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
674                     'ext': 'flv',
675                     'duration': 2235.93,
676                 }
677             }],
678             'info_dict': {
679                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
680             }
681         },
682         # Flowplayer
683         {
684             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
685             'md5': '9d65602bf31c6e20014319c7d07fba27',
686             'info_dict': {
687                 'id': '5123ea6d5e5a7',
688                 'ext': 'mp4',
689                 'age_limit': 18,
690                 'uploader': 'www.handjobhub.com',
691                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
692             }
693         },
694         # Multiple brightcove videos
695         # https://github.com/rg3/youtube-dl/issues/2283
696         {
697             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
698             'info_dict': {
699                 'id': 'always-never',
700                 'title': 'Always / Never - The New Yorker',
701             },
702             'playlist_count': 3,
703             'params': {
704                 'extract_flat': False,
705                 'skip_download': True,
706             }
707         },
708         # MLB embed
709         {
710             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
711             'md5': '96f09a37e44da40dd083e12d9a683327',
712             'info_dict': {
713                 'id': '33322633',
714                 'ext': 'mp4',
715                 'title': 'Ump changes call to ball',
716                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
717                 'duration': 48,
718                 'timestamp': 1401537900,
719                 'upload_date': '20140531',
720                 'thumbnail': 're:^https?://.*\.jpg$',
721             },
722         },
723         # Wistia embed
724         {
725             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
726             'md5': '8788b683c777a5cf25621eaf286d0c23',
727             'info_dict': {
728                 'id': '1cfaf6b7ea',
729                 'ext': 'mov',
730                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
731                 'duration': 643.0,
732                 'filesize': 182808282,
733                 'uploader': 'education-portal.com',
734             },
735         },
736         {
737             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
738             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
739             'info_dict': {
740                 'id': 'uxjb0lwrcz',
741                 'ext': 'mp4',
742                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
743                 'duration': 1715.0,
744                 'uploader': 'thoughtworks.wistia.com',
745             },
746         },
747         # Soundcloud embed
748         {
749             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
750             'info_dict': {
751                 'id': '174391317',
752                 'ext': 'mp3',
753                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
754                 'uploader': 'Sophos Security',
755                 'title': 'Chet Chat 171 - Oct 29, 2014',
756                 'upload_date': '20141029',
757             }
758         },
759         # Livestream embed
760         {
761             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
762             'info_dict': {
763                 'id': '67864563',
764                 'ext': 'flv',
765                 'upload_date': '20141112',
766                 'title': 'Rosetta #CometLanding webcast HL 10',
767             }
768         },
769         # LazyYT
770         {
771             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
772             'info_dict': {
773                 'id': '1986',
774                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
775             },
776             'playlist_mincount': 2,
777         },
778         # Cinchcast embed
779         {
780             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
781             'info_dict': {
782                 'id': '7141703',
783                 'ext': 'mp3',
784                 'upload_date': '20141126',
785                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
786             }
787         },
788         # Cinerama player
789         {
790             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
791             'info_dict': {
792                 'id': '730m_DandD_1901_512k',
793                 'ext': 'mp4',
794                 'uploader': 'www.abc.net.au',
795                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
796             }
797         },
798         # embedded viddler video
799         {
800             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
801             'info_dict': {
802                 'id': '4d03aad9',
803                 'ext': 'mp4',
804                 'uploader': 'deadspin',
805                 'title': 'WALL-TO-GORTAT',
806                 'timestamp': 1422285291,
807                 'upload_date': '20150126',
808             },
809             'add_ie': ['Viddler'],
810         },
811         # Libsyn embed
812         {
813             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
814             'info_dict': {
815                 'id': '3377616',
816                 'ext': 'mp3',
817                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
818                 'description': 'md5:601cb790edd05908957dae8aaa866465',
819                 'upload_date': '20150220',
820             },
821         },
822         # jwplayer YouTube
823         {
824             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
825             'info_dict': {
826                 'id': 'Mrj4DVp2zeA',
827                 'ext': 'mp4',
828                 'upload_date': '20150212',
829                 'uploader': 'The National Archives UK',
830                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
831                 'uploader_id': 'NationalArchives08',
832                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
833             },
834         },
835         # rtl.nl embed
836         {
837             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
838             'playlist_mincount': 5,
839             'info_dict': {
840                 'id': 'aanslagen-kopenhagen',
841                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
842             }
843         },
844         # Zapiks embed
845         {
846             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
847             'info_dict': {
848                 'id': '118046',
849                 'ext': 'mp4',
850                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
851             }
852         },
853         # Kaltura embed
854         {
855             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
856             'info_dict': {
857                 'id': '1_eergr3h1',
858                 'ext': 'mp4',
859                 'upload_date': '20150226',
860                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
861                 'timestamp': int,
862                 'title': 'John Carlson Postgame 2/25/15',
863             },
864         },
865         # Kaltura embed (different embed code)
866         {
867             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
868             'info_dict': {
869                 'id': '1_a52wc67y',
870                 'ext': 'flv',
871                 'upload_date': '20150127',
872                 'uploader_id': 'PremierMedia',
873                 'timestamp': int,
874                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
875             },
876         },
877         # Kaltura embed protected with referrer
878         {
879             'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
880             'info_dict': {
881                 'id': '1_g4fbemnq',
882                 'ext': 'mp4',
883                 'title': 'Violetta - Achter De Schermen - Ruggero',
884                 'description': 'Achter de schermen met Ruggero',
885                 'timestamp': 1435133761,
886                 'upload_date': '20150624',
887                 'uploader_id': 'echojecka',
888             },
889         },
890         # Eagle.Platform embed (generic URL)
891         {
892             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
893             'info_dict': {
894                 'id': '227304',
895                 'ext': 'mp4',
896                 'title': 'Навальный вышел на свободу',
897                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
898                 'thumbnail': 're:^https?://.*\.jpg$',
899                 'duration': 87,
900                 'view_count': int,
901                 'age_limit': 0,
902             },
903         },
904         # ClipYou (Eagle.Platform) embed (custom URL)
905         {
906             'url': 'http://muz-tv.ru/play/7129/',
907             'info_dict': {
908                 'id': '12820',
909                 'ext': 'mp4',
910                 'title': "'O Sole Mio",
911                 'thumbnail': 're:^https?://.*\.jpg$',
912                 'duration': 216,
913                 'view_count': int,
914             },
915         },
916         # Pladform embed
917         {
918             'url': 'http://muz-tv.ru/kinozal/view/7400/',
919             'info_dict': {
920                 'id': '100183293',
921                 'ext': 'mp4',
922                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
923                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
924                 'thumbnail': 're:^https?://.*\.jpg$',
925                 'duration': 694,
926                 'age_limit': 0,
927             },
928         },
929         # Playwire embed
930         {
931             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
932             'info_dict': {
933                 'id': '3519514',
934                 'ext': 'mp4',
935                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
936                 'thumbnail': 're:^https?://.*\.png$',
937                 'duration': 45.115,
938             },
939         },
940         # 5min embed
941         {
942             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
943             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
944             'info_dict': {
945                 'id': '518726732',
946                 'ext': 'mp4',
947                 'title': 'Facebook Creates "On This Day" | Crunch Report',
948             },
949         },
950         # SVT embed
951         {
952             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
953             'info_dict': {
954                 'id': '2900353',
955                 'ext': 'flv',
956                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
957                 'duration': 27,
958                 'age_limit': 0,
959             },
960         },
961         # Crooks and Liars embed
962         {
963             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
964             'info_dict': {
965                 'id': '8RUoRhRi',
966                 'ext': 'mp4',
967                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
968                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
969                 'timestamp': 1428207000,
970                 'upload_date': '20150405',
971                 'uploader': 'Heather',
972             },
973         },
974         # Crooks and Liars external embed
975         {
976             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
977             'info_dict': {
978                 'id': 'MTE3MjUtMzQ2MzA',
979                 'ext': 'mp4',
980                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
981                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
982                 'timestamp': 1265032391,
983                 'upload_date': '20100201',
984                 'uploader': 'Heather',
985             },
986         },
987         # NBC Sports vplayer embed
988         {
989             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
990             'info_dict': {
991                 'id': 'ln7x1qSThw4k',
992                 'ext': 'flv',
993                 'title': "PFT Live: New leader in the 'new-look' defense",
994                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
995             },
996         },
997         # UDN embed
998         {
999             'url': 'http://www.udn.com/news/story/7314/822787',
1000             'md5': 'fd2060e988c326991037b9aff9df21a6',
1001             'info_dict': {
1002                 'id': '300346',
1003                 'ext': 'mp4',
1004                 'title': '中一中男師變性 全校師生力挺',
1005                 'thumbnail': 're:^https?://.*\.jpg$',
1006             }
1007         },
1008         # Ooyala embed
1009         {
1010             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1011             'info_dict': {
1012                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1013                 'ext': 'mp4',
1014                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1015                 'title': 'This is what separates the Excel masters from the wannabes',
1016                 'duration': 191.933,
1017             },
1018             'params': {
1019                 # m3u8 downloads
1020                 'skip_download': True,
1021             }
1022         },
1023         # Contains a SMIL manifest
1024         {
1025             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
1026             'info_dict': {
1027                 'id': 'file',
1028                 'ext': 'flv',
1029                 'title': '+ Football: Lottery Champions League Europe',
1030                 'uploader': 'www.telewebion.com',
1031             },
1032             'params': {
1033                 # rtmpe downloads
1034                 'skip_download': True,
1035             }
1036         },
1037         # Brightcove URL in single quotes
1038         {
1039             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1040             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1041             'info_dict': {
1042                 'id': '4255764656001',
1043                 'ext': 'mp4',
1044                 'title': 'SN Presents: Russell Martin, World Citizen',
1045                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1046                 'uploader': 'Rogers Sportsnet',
1047             },
1048         },
1049         # Dailymotion Cloud video
1050         {
1051             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1052             'md5': '49444254273501a64675a7e68c502681',
1053             'info_dict': {
1054                 'id': '5585de919473990de4bee11b',
1055                 'ext': 'mp4',
1056                 'title': 'Le débat',
1057                 'thumbnail': 're:^https?://.*\.jpe?g$',
1058             }
1059         },
1060         # OnionStudios embed
1061         {
1062             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1063             'info_dict': {
1064                 'id': '2855',
1065                 'ext': 'mp4',
1066                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1067                 'thumbnail': 're:^https?://.*\.jpe?g$',
1068                 'uploader': 'ClickHole',
1069                 'uploader_id': 'clickhole',
1070             }
1071         },
1072         # SnagFilms embed
1073         {
1074             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1075             'info_dict': {
1076                 'id': '74849a00-85a9-11e1-9660-123139220831',
1077                 'ext': 'mp4',
1078                 'title': '#whilewewatch',
1079             }
1080         },
1081         # AdobeTVVideo embed
1082         {
1083             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1084             'md5': '43662b577c018ad707a63766462b1e87',
1085             'info_dict': {
1086                 'id': '2456',
1087                 'ext': 'mp4',
1088                 'title': 'New experience with Acrobat DC',
1089                 'description': 'New experience with Acrobat DC',
1090                 'duration': 248.667,
1091             },
1092         },
1093         # ScreenwaveMedia embed
1094         {
1095             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1096             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1097             'info_dict': {
1098                 'id': 'cinemasnob-55d26273809dd',
1099                 'ext': 'mp4',
1100                 'title': 'cinemasnob',
1101             },
1102         },
1103         # BrightcoveInPageEmbed embed
1104         {
1105             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1106             'info_dict': {
1107                 'id': '4238694884001',
1108                 'ext': 'flv',
1109                 'title': 'Tabletop: Dread, Last Thoughts',
1110                 'description': 'Tabletop: Dread, Last Thoughts',
1111                 'duration': 51690,
1112             },
1113         },
1114         # JWPlayer with M3U8
1115         {
1116             'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1117             'info_dict': {
1118                 'id': 'playlist',
1119                 'ext': 'mp4',
1120                 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1121                 'uploader': 'ren.tv',
1122             },
1123             'params': {
1124                 # m3u8 downloads
1125                 'skip_download': True,
1126             }
1127         },
1128         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1129         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1130         {
1131             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1132             'info_dict': {
1133                 'id': '4785848093001',
1134                 'ext': 'mp4',
1135                 'title': 'The Cardinal Pell Interview',
1136                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1137                 'uploader': 'GlobeCast Australia - GlobeStream',
1138             },
1139             'params': {
1140                 # m3u8 downloads
1141                 'skip_download': True,
1142             },
1143         },
1144     ]
1145
1146     def report_following_redirect(self, new_url):
1147         """Report information extraction."""
1148         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1149
1150     def _extract_rss(self, url, video_id, doc):
1151         playlist_title = doc.find('./channel/title').text
1152         playlist_desc_el = doc.find('./channel/description')
1153         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1154
1155         entries = []
1156         for it in doc.findall('./channel/item'):
1157             next_url = xpath_text(it, 'link', fatal=False)
1158             if not next_url:
1159                 enclosure_nodes = it.findall('./enclosure')
1160                 for e in enclosure_nodes:
1161                     next_url = e.attrib.get('url')
1162                     if next_url:
1163                         break
1164
1165             if not next_url:
1166                 continue
1167
1168             entries.append({
1169                 '_type': 'url',
1170                 'url': next_url,
1171                 'title': it.find('title').text,
1172             })
1173
1174         return {
1175             '_type': 'playlist',
1176             'id': url,
1177             'title': playlist_title,
1178             'description': playlist_desc,
1179             'entries': entries,
1180         }
1181
1182     def _extract_camtasia(self, url, video_id, webpage):
1183         """ Returns None if no camtasia video can be found. """
1184
1185         camtasia_cfg = self._search_regex(
1186             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1187             webpage, 'camtasia configuration file', default=None)
1188         if camtasia_cfg is None:
1189             return None
1190
1191         title = self._html_search_meta('DC.title', webpage, fatal=True)
1192
1193         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1194         camtasia_cfg = self._download_xml(
1195             camtasia_url, video_id,
1196             note='Downloading camtasia configuration',
1197             errnote='Failed to download camtasia configuration')
1198         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1199
1200         entries = []
1201         for n in fileset_node.getchildren():
1202             url_n = n.find('./uri')
1203             if url_n is None:
1204                 continue
1205
1206             entries.append({
1207                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1208                 'title': '%s - %s' % (title, n.tag),
1209                 'url': compat_urlparse.urljoin(url, url_n.text),
1210                 'duration': float_or_none(n.find('./duration').text),
1211             })
1212
1213         return {
1214             '_type': 'playlist',
1215             'entries': entries,
1216             'title': title,
1217         }
1218
1219     def _real_extract(self, url):
1220         if url.startswith('//'):
1221             return {
1222                 '_type': 'url',
1223                 'url': self.http_scheme() + url,
1224             }
1225
1226         parsed_url = compat_urlparse.urlparse(url)
1227         if not parsed_url.scheme:
1228             default_search = self._downloader.params.get('default_search')
1229             if default_search is None:
1230                 default_search = 'fixup_error'
1231
1232             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1233                 if '/' in url:
1234                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1235                     return self.url_result('http://' + url)
1236                 elif default_search != 'fixup_error':
1237                     if default_search == 'auto_warning':
1238                         if re.match(r'^(?:url|URL)$', url):
1239                             raise ExtractorError(
1240                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1241                                 expected=True)
1242                         else:
1243                             self._downloader.report_warning(
1244                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1245                     return self.url_result('ytsearch:' + url)
1246
1247             if default_search in ('error', 'fixup_error'):
1248                 raise ExtractorError(
1249                     '%r is not a valid URL. '
1250                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1251                     % (url, url), expected=True)
1252             else:
1253                 if ':' not in default_search:
1254                     default_search += ':'
1255                 return self.url_result(default_search + url)
1256
1257         url, smuggled_data = unsmuggle_url(url)
1258         force_videoid = None
1259         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1260         if smuggled_data and 'force_videoid' in smuggled_data:
1261             force_videoid = smuggled_data['force_videoid']
1262             video_id = force_videoid
1263         else:
1264             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1265
1266         self.to_screen('%s: Requesting header' % video_id)
1267
1268         head_req = HEADRequest(url)
1269         head_response = self._request_webpage(
1270             head_req, video_id,
1271             note=False, errnote='Could not send HEAD request to %s' % url,
1272             fatal=False)
1273
1274         if head_response is not False:
1275             # Check for redirect
1276             new_url = head_response.geturl()
1277             if url != new_url:
1278                 self.report_following_redirect(new_url)
1279                 if force_videoid:
1280                     new_url = smuggle_url(
1281                         new_url, {'force_videoid': force_videoid})
1282                 return self.url_result(new_url)
1283
1284         full_response = None
1285         if head_response is False:
1286             request = sanitized_Request(url)
1287             request.add_header('Accept-Encoding', '*')
1288             full_response = self._request_webpage(request, video_id)
1289             head_response = full_response
1290
1291         info_dict = {
1292             'id': video_id,
1293             'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1294             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1295         }
1296
1297         # Check for direct link to a video
1298         content_type = head_response.headers.get('Content-Type', '').lower()
1299         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1300         if m:
1301             format_id = m.group('format_id')
1302             if format_id.endswith('mpegurl'):
1303                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1304             elif format_id == 'f4m':
1305                 formats = self._extract_f4m_formats(url, video_id)
1306             else:
1307                 formats = [{
1308                     'format_id': m.group('format_id'),
1309                     'url': url,
1310                     'vcodec': 'none' if m.group('type') == 'audio' else None
1311                 }]
1312                 info_dict['direct'] = True
1313             info_dict['formats'] = formats
1314             return info_dict
1315
1316         if not self._downloader.params.get('test', False) and not is_intentional:
1317             force = self._downloader.params.get('force_generic_extractor', False)
1318             self._downloader.report_warning(
1319                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1320
1321         if not full_response:
1322             request = sanitized_Request(url)
1323             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1324             # making it impossible to download only chunk of the file (yet we need only 512kB to
1325             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1326             # that will always result in downloading the whole file that is not desirable.
1327             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1328             # to accept raw bytes and being able to download only a chunk.
1329             # It may probably better to solve this by checking Content-Type for application/octet-stream
1330             # after HEAD request finishes, but not sure if we can rely on this.
1331             request.add_header('Accept-Encoding', '*')
1332             full_response = self._request_webpage(request, video_id)
1333
1334         first_bytes = full_response.read(512)
1335
1336         # Is it an M3U playlist?
1337         if first_bytes.startswith(b'#EXTM3U'):
1338             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1339             return info_dict
1340
1341         # Maybe it's a direct link to a video?
1342         # Be careful not to download the whole thing!
1343         if not is_html(first_bytes):
1344             self._downloader.report_warning(
1345                 'URL could be a direct video link, returning it as such.')
1346             info_dict.update({
1347                 'direct': True,
1348                 'url': url,
1349             })
1350             return info_dict
1351
1352         webpage = self._webpage_read_content(
1353             full_response, url, video_id, prefix=first_bytes)
1354
1355         self.report_extraction(video_id)
1356
1357         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1358         try:
1359             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1360             if doc.tag == 'rss':
1361                 return self._extract_rss(url, video_id, doc)
1362             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1363                 return self._parse_smil(doc, url, video_id)
1364             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1365                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1366             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1367                 info_dict['formats'] = self._parse_mpd_formats(
1368                     doc, video_id, mpd_base_url=url.rpartition('/')[0])
1369                 return info_dict
1370             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1371                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1372                 return info_dict
1373         except compat_xml_parse_error:
1374             pass
1375
1376         # Is it a Camtasia project?
1377         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1378         if camtasia_res is not None:
1379             return camtasia_res
1380
1381         # Sometimes embedded video player is hidden behind percent encoding
1382         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1383         # Unescaping the whole page allows to handle those cases in a generic way
1384         webpage = compat_urllib_parse_unquote(webpage)
1385
1386         # it's tempting to parse this further, but you would
1387         # have to take into account all the variations like
1388         #   Video Title - Site Name
1389         #   Site Name | Video Title
1390         #   Video Title - Tagline | Site Name
1391         # and so on and so forth; it's just not practical
1392         video_title = self._html_search_regex(
1393             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1394             default='video')
1395
1396         # Try to detect age limit automatically
1397         age_limit = self._rta_search(webpage)
1398         # And then there are the jokers who advertise that they use RTA,
1399         # but actually don't.
1400         AGE_LIMIT_MARKERS = [
1401             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1402         ]
1403         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1404             age_limit = 18
1405
1406         # video uploader is domain name
1407         video_uploader = self._search_regex(
1408             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1409
1410         # Helper method
1411         def _playlist_from_matches(matches, getter=None, ie=None):
1412             urlrs = orderedSet(
1413                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1414                 for m in matches)
1415             return self.playlist_result(
1416                 urlrs, playlist_id=video_id, playlist_title=video_title)
1417
1418         # Look for Brightcove Legacy Studio embeds
1419         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1420         if bc_urls:
1421             self.to_screen('Brightcove video detected.')
1422             entries = [{
1423                 '_type': 'url',
1424                 'url': smuggle_url(bc_url, {'Referer': url}),
1425                 'ie_key': 'BrightcoveLegacy'
1426             } for bc_url in bc_urls]
1427
1428             return {
1429                 '_type': 'playlist',
1430                 'title': video_title,
1431                 'id': video_id,
1432                 'entries': entries,
1433             }
1434
1435         # Look for Brightcove New Studio embeds
1436         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1437         if bc_urls:
1438             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1439
1440         # Look for embedded rtl.nl player
1441         matches = re.findall(
1442             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1443             webpage)
1444         if matches:
1445             return _playlist_from_matches(matches, ie='RtlNl')
1446
1447         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1448         if vimeo_url is not None:
1449             return self.url_result(vimeo_url)
1450
1451         vid_me_embed_url = self._search_regex(
1452             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1453             webpage, 'vid.me embed', default=None)
1454         if vid_me_embed_url is not None:
1455             return self.url_result(vid_me_embed_url, 'Vidme')
1456
1457         # Look for embedded YouTube player
1458         matches = re.findall(r'''(?x)
1459             (?:
1460                 <iframe[^>]+?src=|
1461                 data-video-url=|
1462                 <embed[^>]+?src=|
1463                 embedSWF\(?:\s*|
1464                 new\s+SWFObject\(
1465             )
1466             (["\'])
1467                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1468                 (?:embed|v|p)/.+?)
1469             \1''', webpage)
1470         if matches:
1471             return _playlist_from_matches(
1472                 matches, lambda m: unescapeHTML(m[1]))
1473
1474         # Look for lazyYT YouTube embed
1475         matches = re.findall(
1476             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1477         if matches:
1478             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1479
1480         # Look for embedded Dailymotion player
1481         matches = re.findall(
1482             r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
1483         if matches:
1484             return _playlist_from_matches(
1485                 matches, lambda m: unescapeHTML(m[1]))
1486
1487         # Look for embedded Dailymotion playlist player (#3822)
1488         m = re.search(
1489             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1490         if m:
1491             playlists = re.findall(
1492                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1493             if playlists:
1494                 return _playlist_from_matches(
1495                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1496
1497         # Look for embedded Wistia player
1498         match = re.search(
1499             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1500         if match:
1501             embed_url = self._proto_relative_url(
1502                 unescapeHTML(match.group('url')))
1503             return {
1504                 '_type': 'url_transparent',
1505                 'url': embed_url,
1506                 'ie_key': 'Wistia',
1507                 'uploader': video_uploader,
1508                 'title': video_title,
1509                 'id': video_id,
1510             }
1511
1512         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1513         if match:
1514             return {
1515                 '_type': 'url_transparent',
1516                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1517                 'ie_key': 'Wistia',
1518                 'uploader': video_uploader,
1519                 'title': video_title,
1520                 'id': match.group('id')
1521             }
1522
1523         # Look for SVT player
1524         svt_url = SVTIE._extract_url(webpage)
1525         if svt_url:
1526             return self.url_result(svt_url, 'SVT')
1527
1528         # Look for embedded condenast player
1529         matches = re.findall(
1530             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1531             webpage)
1532         if matches:
1533             return {
1534                 '_type': 'playlist',
1535                 'entries': [{
1536                     '_type': 'url',
1537                     'ie_key': 'CondeNast',
1538                     'url': ma,
1539                 } for ma in matches],
1540                 'title': video_title,
1541                 'id': video_id,
1542             }
1543
1544         # Look for Bandcamp pages with custom domain
1545         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1546         if mobj is not None:
1547             burl = unescapeHTML(mobj.group(1))
1548             # Don't set the extractor because it can be a track url or an album
1549             return self.url_result(burl)
1550
1551         # Look for embedded Vevo player
1552         mobj = re.search(
1553             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1554         if mobj is not None:
1555             return self.url_result(mobj.group('url'))
1556
1557         # Look for embedded Viddler player
1558         mobj = re.search(
1559             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1560             webpage)
1561         if mobj is not None:
1562             return self.url_result(mobj.group('url'))
1563
1564         # Look for NYTimes player
1565         mobj = re.search(
1566             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1567             webpage)
1568         if mobj is not None:
1569             return self.url_result(mobj.group('url'))
1570
1571         # Look for Libsyn player
1572         mobj = re.search(
1573             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1574         if mobj is not None:
1575             return self.url_result(mobj.group('url'))
1576
1577         # Look for Ooyala videos
1578         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1579                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1580                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1581                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1582         if mobj is not None:
1583             return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
1584
1585         # Look for multiple Ooyala embeds on SBN network websites
1586         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1587         if mobj is not None:
1588             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1589             if embeds:
1590                 return _playlist_from_matches(
1591                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
1592
1593         # Look for Aparat videos
1594         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1595         if mobj is not None:
1596             return self.url_result(mobj.group(1), 'Aparat')
1597
1598         # Look for MPORA videos
1599         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1600         if mobj is not None:
1601             return self.url_result(mobj.group(1), 'Mpora')
1602
1603         # Look for embedded NovaMov-based player
1604         mobj = re.search(
1605             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1606                     (?P<url>http://(?:(?:embed|www)\.)?
1607                         (?:novamov\.com|
1608                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1609                            videoweed\.(?:es|com)|
1610                            movshare\.(?:net|sx|ag)|
1611                            divxstage\.(?:eu|net|ch|co|at|ag))
1612                         /embed\.php.+?)\1''', webpage)
1613         if mobj is not None:
1614             return self.url_result(mobj.group('url'))
1615
1616         # Look for embedded Facebook player
1617         mobj = re.search(
1618             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1619         if mobj is not None:
1620             return self.url_result(mobj.group('url'), 'Facebook')
1621
1622         # Look for embedded VK player
1623         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1624         if mobj is not None:
1625             return self.url_result(mobj.group('url'), 'VK')
1626
1627         # Look for embedded Odnoklassniki player
1628         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
1629         if mobj is not None:
1630             return self.url_result(mobj.group('url'), 'Odnoklassniki')
1631
1632         # Look for embedded ivi player
1633         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1634         if mobj is not None:
1635             return self.url_result(mobj.group('url'), 'Ivi')
1636
1637         # Look for embedded Huffington Post player
1638         mobj = re.search(
1639             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1640         if mobj is not None:
1641             return self.url_result(mobj.group('url'), 'HuffPost')
1642
1643         # Look for embed.ly
1644         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1645         if mobj is not None:
1646             return self.url_result(mobj.group('url'))
1647         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1648         if mobj is not None:
1649             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1650
1651         # Look for funnyordie embed
1652         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1653         if matches:
1654             return _playlist_from_matches(
1655                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1656
1657         # Look for BBC iPlayer embed
1658         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1659         if matches:
1660             return _playlist_from_matches(matches, ie='BBCCoUk')
1661
1662         # Look for embedded RUTV player
1663         rutv_url = RUTVIE._extract_url(webpage)
1664         if rutv_url:
1665             return self.url_result(rutv_url, 'RUTV')
1666
1667         # Look for embedded TVC player
1668         tvc_url = TVCIE._extract_url(webpage)
1669         if tvc_url:
1670             return self.url_result(tvc_url, 'TVC')
1671
1672         # Look for embedded SportBox player
1673         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1674         if sportbox_urls:
1675             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1676
1677         # Look for embedded PornHub player
1678         pornhub_url = PornHubIE._extract_url(webpage)
1679         if pornhub_url:
1680             return self.url_result(pornhub_url, 'PornHub')
1681
1682         # Look for embedded XHamster player
1683         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1684         if xhamster_urls:
1685             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1686
1687         # Look for embedded TNAFlixNetwork player
1688         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
1689         if tnaflix_urls:
1690             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
1691
1692         # Look for embedded Tvigle player
1693         mobj = re.search(
1694             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1695         if mobj is not None:
1696             return self.url_result(mobj.group('url'), 'Tvigle')
1697
1698         # Look for embedded TED player
1699         mobj = re.search(
1700             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1701         if mobj is not None:
1702             return self.url_result(mobj.group('url'), 'TED')
1703
1704         # Look for embedded Ustream videos
1705         mobj = re.search(
1706             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1707         if mobj is not None:
1708             return self.url_result(mobj.group('url'), 'Ustream')
1709
1710         # Look for embedded arte.tv player
1711         mobj = re.search(
1712             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1713             webpage)
1714         if mobj is not None:
1715             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1716
1717         # Look for embedded francetv player
1718         mobj = re.search(
1719             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1720             webpage)
1721         if mobj is not None:
1722             return self.url_result(mobj.group('url'))
1723
1724         # Look for embedded smotri.com player
1725         smotri_url = SmotriIE._extract_url(webpage)
1726         if smotri_url:
1727             return self.url_result(smotri_url, 'Smotri')
1728
1729         # Look for embedded Myvi.ru player
1730         myvi_url = MyviIE._extract_url(webpage)
1731         if myvi_url:
1732             return self.url_result(myvi_url)
1733
1734         # Look for embedded soundcloud player
1735         mobj = re.search(
1736             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1737             webpage)
1738         if mobj is not None:
1739             url = unescapeHTML(mobj.group('url'))
1740             return self.url_result(url)
1741
1742         # Look for embedded vulture.com player
1743         mobj = re.search(
1744             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1745             webpage)
1746         if mobj is not None:
1747             url = unescapeHTML(mobj.group('url'))
1748             return self.url_result(url, ie='Vulture')
1749
1750         # Look for embedded mtvservices player
1751         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1752         if mtvservices_url:
1753             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1754
1755         # Look for embedded yahoo player
1756         mobj = re.search(
1757             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1758             webpage)
1759         if mobj is not None:
1760             return self.url_result(mobj.group('url'), 'Yahoo')
1761
1762         # Look for embedded sbs.com.au player
1763         mobj = re.search(
1764             r'''(?x)
1765             (?:
1766                 <meta\s+property="og:video"\s+content=|
1767                 <iframe[^>]+?src=
1768             )
1769             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1770             webpage)
1771         if mobj is not None:
1772             return self.url_result(mobj.group('url'), 'SBS')
1773
1774         # Look for embedded Cinchcast player
1775         mobj = re.search(
1776             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1777             webpage)
1778         if mobj is not None:
1779             return self.url_result(mobj.group('url'), 'Cinchcast')
1780
1781         mobj = re.search(
1782             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1783             webpage)
1784         if not mobj:
1785             mobj = re.search(
1786                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1787                 webpage)
1788         if mobj is not None:
1789             return self.url_result(mobj.group('url'), 'MLB')
1790
1791         mobj = re.search(
1792             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1793             webpage)
1794         if mobj is not None:
1795             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1796
1797         mobj = re.search(
1798             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1799             webpage)
1800         if mobj is not None:
1801             return self.url_result(mobj.group('url'), 'Livestream')
1802
1803         # Look for Zapiks embed
1804         mobj = re.search(
1805             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1806         if mobj is not None:
1807             return self.url_result(mobj.group('url'), 'Zapiks')
1808
1809         # Look for Kaltura embeds
1810         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
1811                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
1812         if mobj is not None:
1813             return self.url_result(smuggle_url(
1814                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1815                 {'source_url': url}), 'Kaltura')
1816
1817         # Look for Eagle.Platform embeds
1818         mobj = re.search(
1819             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1820         if mobj is not None:
1821             return self.url_result(mobj.group('url'), 'EaglePlatform')
1822
1823         # Look for ClipYou (uses Eagle.Platform) embeds
1824         mobj = re.search(
1825             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1826         if mobj is not None:
1827             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1828
1829         # Look for Pladform embeds
1830         pladform_url = PladformIE._extract_url(webpage)
1831         if pladform_url:
1832             return self.url_result(pladform_url)
1833
1834         # Look for Videomore embeds
1835         videomore_url = VideomoreIE._extract_url(webpage)
1836         if videomore_url:
1837             return self.url_result(videomore_url)
1838
1839         # Look for Playwire embeds
1840         mobj = re.search(
1841             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1842         if mobj is not None:
1843             return self.url_result(mobj.group('url'))
1844
1845         # Look for 5min embeds
1846         mobj = re.search(
1847             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1848         if mobj is not None:
1849             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1850
1851         # Look for Crooks and Liars embeds
1852         mobj = re.search(
1853             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1854         if mobj is not None:
1855             return self.url_result(mobj.group('url'))
1856
1857         # Look for NBC Sports VPlayer embeds
1858         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1859         if nbc_sports_url:
1860             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1861
1862         # Look for Google Drive embeds
1863         google_drive_url = GoogleDriveIE._extract_url(webpage)
1864         if google_drive_url:
1865             return self.url_result(google_drive_url, 'GoogleDrive')
1866
1867         # Look for UDN embeds
1868         mobj = re.search(
1869             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
1870         if mobj is not None:
1871             return self.url_result(
1872                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1873
1874         # Look for Senate ISVP iframe
1875         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1876         if senate_isvp_url:
1877             return self.url_result(senate_isvp_url, 'SenateISVP')
1878
1879         # Look for Dailymotion Cloud videos
1880         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1881         if dmcloud_url:
1882             return self.url_result(dmcloud_url, 'DailymotionCloud')
1883
1884         # Look for OnionStudios embeds
1885         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1886         if onionstudios_url:
1887             return self.url_result(onionstudios_url)
1888
1889         # Look for SnagFilms embeds
1890         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1891         if snagfilms_url:
1892             return self.url_result(snagfilms_url)
1893
1894         # Look for JWPlatform embeds
1895         jwplatform_url = JWPlatformIE._extract_url(webpage)
1896         if jwplatform_url:
1897             return self.url_result(jwplatform_url, 'JWPlatform')
1898
1899         # Look for ScreenwaveMedia embeds
1900         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
1901         if mobj is not None:
1902             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
1903
1904         # Look for Digiteka embeds
1905         digiteka_url = DigitekaIE._extract_url(webpage)
1906         if digiteka_url:
1907             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
1908
1909         # Look for Limelight embeds
1910         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
1911         if mobj:
1912             lm = {
1913                 'Media': 'media',
1914                 'Channel': 'channel',
1915                 'ChannelList': 'channel_list',
1916             }
1917             return self.url_result('limelight:%s:%s' % (
1918                 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
1919
1920         # Look for AdobeTVVideo embeds
1921         mobj = re.search(
1922             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1923             webpage)
1924         if mobj is not None:
1925             return self.url_result(
1926                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1927                 'AdobeTVVideo')
1928
1929         # Look for Vine embeds
1930         mobj = re.search(
1931             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
1932             webpage)
1933         if mobj is not None:
1934             return self.url_result(
1935                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
1936
1937         # Look for Instagram embeds
1938         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
1939         if instagram_embed_url is not None:
1940             return self.url_result(instagram_embed_url, InstagramIE.ie_key())
1941
1942         def check_video(vurl):
1943             if YoutubeIE.suitable(vurl):
1944                 return True
1945             vpath = compat_urlparse.urlparse(vurl).path
1946             vext = determine_ext(vpath)
1947             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1948
1949         def filter_video(urls):
1950             return list(filter(check_video, urls))
1951
1952         # Start with something easy: JW Player in SWFObject
1953         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1954         if not found:
1955             # Look for gorilla-vid style embedding
1956             found = filter_video(re.findall(r'''(?sx)
1957                 (?:
1958                     jw_plugins|
1959                     JWPlayerOptions|
1960                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1961                 )
1962                 .*?
1963                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1964         if not found:
1965             # Broaden the search a little bit
1966             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1967         if not found:
1968             # Broaden the findall a little bit: JWPlayer JS loader
1969             found = filter_video(re.findall(
1970                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1971         if not found:
1972             # Flow player
1973             found = filter_video(re.findall(r'''(?xs)
1974                 flowplayer\("[^"]+",\s*
1975                     \{[^}]+?\}\s*,
1976                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1977                         ["']?url["']?\s*:\s*["']([^"']+)["']
1978             ''', webpage))
1979         if not found:
1980             # Cinerama player
1981             found = re.findall(
1982                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1983         if not found:
1984             # Try to find twitter cards info
1985             found = filter_video(re.findall(
1986                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1987         if not found:
1988             # We look for Open Graph info:
1989             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1990             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1991             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1992             if m_video_type is not None:
1993                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1994         if not found:
1995             # HTML5 video
1996             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1997         if not found:
1998             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1999             found = re.search(
2000                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2001                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2002                 webpage)
2003             if not found:
2004                 # Look also in Refresh HTTP header
2005                 refresh_header = head_response.headers.get('Refresh')
2006                 if refresh_header:
2007                     # In python 2 response HTTP headers are bytestrings
2008                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2009                         refresh_header = refresh_header.decode('iso-8859-1')
2010                     found = re.search(REDIRECT_REGEX, refresh_header)
2011             if found:
2012                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2013                 self.report_following_redirect(new_url)
2014                 return {
2015                     '_type': 'url',
2016                     'url': new_url,
2017                 }
2018         if not found:
2019             raise UnsupportedError(url)
2020
2021         entries = []
2022         for video_url in found:
2023             video_url = video_url.replace('\\/', '/')
2024             video_url = compat_urlparse.urljoin(url, video_url)
2025             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2026
2027             # Sometimes, jwplayer extraction will result in a YouTube URL
2028             if YoutubeIE.suitable(video_url):
2029                 entries.append(self.url_result(video_url, 'Youtube'))
2030                 continue
2031
2032             # here's a fun little line of code for you:
2033             video_id = os.path.splitext(video_id)[0]
2034
2035             entry_info_dict = {
2036                 'id': video_id,
2037                 'uploader': video_uploader,
2038                 'title': video_title,
2039                 'age_limit': age_limit,
2040             }
2041
2042             ext = determine_ext(video_url)
2043             if ext == 'smil':
2044                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2045             elif ext == 'xspf':
2046                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2047             elif ext == 'm3u8':
2048                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2049             elif ext == 'mpd':
2050                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2051             elif ext == 'f4m':
2052                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2053             else:
2054                 entry_info_dict['url'] = video_url
2055
2056             entries.append(entry_info_dict)
2057
2058         if len(entries) == 1:
2059             return entries[0]
2060         else:
2061             for num, e in enumerate(entries, start=1):
2062                 # 'url' results don't have a title
2063                 if e.get('title') is not None:
2064                     e['title'] = '%s (%d)' % (e['title'], num)
2065             return {
2066                 '_type': 'playlist',
2067                 'entries': entries,
2068             }