[generic] Use referrer from source kaltura embed URLs (#7409)
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7 import sys
8
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12     compat_etree_fromstring,
13     compat_urllib_parse_unquote,
14     compat_urllib_request,
15     compat_urlparse,
16     compat_xml_parse_error,
17 )
18 from ..utils import (
19     determine_ext,
20     ExtractorError,
21     float_or_none,
22     HEADRequest,
23     is_html,
24     orderedSet,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import (
34     BrightcoveLegacyIE,
35     BrightcoveNewIE,
36 )
37 from .nbc import NBCSportsVPlayerIE
38 from .ooyala import OoyalaIE
39 from .rutv import RUTVIE
40 from .tvc import TVCIE
41 from .sportbox import SportBoxEmbedIE
42 from .smotri import SmotriIE
43 from .myvi import MyviIE
44 from .condenast import CondeNastIE
45 from .udn import UDNEmbedIE
46 from .senateisvp import SenateISVPIE
47 from .bliptv import BlipTVIE
48 from .svt import SVTIE
49 from .pornhub import PornHubIE
50 from .xhamster import XHamsterEmbedIE
51 from .vimeo import VimeoIE
52 from .dailymotion import DailymotionCloudIE
53 from .onionstudios import OnionStudiosIE
54 from .snagfilms import SnagFilmsEmbedIE
55 from .screenwavemedia import ScreenwaveMediaIE
56 from .mtv import MTVServicesEmbeddedIE
57
58
59 class GenericIE(InfoExtractor):
60     IE_DESC = 'Generic downloader that works on some sites'
61     _VALID_URL = r'.*'
62     IE_NAME = 'generic'
63     _TESTS = [
64         # Direct link to a video
65         {
66             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
67             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
68             'info_dict': {
69                 'id': 'trailer',
70                 'ext': 'mp4',
71                 'title': 'trailer',
72                 'upload_date': '20100513',
73             }
74         },
75         # Direct link to media delivered compressed (until Accept-Encoding is *)
76         {
77             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
78             'md5': '128c42e68b13950268b648275386fc74',
79             'info_dict': {
80                 'id': 'FictionJunction-Parallel_Hearts',
81                 'ext': 'flac',
82                 'title': 'FictionJunction-Parallel_Hearts',
83                 'upload_date': '20140522',
84             },
85             'expected_warnings': [
86                 'URL could be a direct video link, returning it as such.'
87             ]
88         },
89         # Direct download with broken HEAD
90         {
91             'url': 'http://ai-radio.org:8000/radio.opus',
92             'info_dict': {
93                 'id': 'radio',
94                 'ext': 'opus',
95                 'title': 'radio',
96             },
97             'params': {
98                 'skip_download': True,  # infinite live stream
99             },
100             'expected_warnings': [
101                 r'501.*Not Implemented'
102             ],
103         },
104         # Direct link with incorrect MIME type
105         {
106             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
107             'md5': '4ccbebe5f36706d85221f204d7eb5913',
108             'info_dict': {
109                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
110                 'id': '5_Lennart_Poettering_-_Systemd',
111                 'ext': 'webm',
112                 'title': '5_Lennart_Poettering_-_Systemd',
113                 'upload_date': '20141120',
114             },
115             'expected_warnings': [
116                 'URL could be a direct video link, returning it as such.'
117             ]
118         },
119         # RSS feed
120         {
121             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
122             'info_dict': {
123                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
124                 'title': 'Zero Punctuation',
125                 'description': 're:.*groundbreaking video review series.*'
126             },
127             'playlist_mincount': 11,
128         },
129         # RSS feed with enclosure
130         {
131             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
132             'info_dict': {
133                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
134                 'ext': 'm4v',
135                 'upload_date': '20150228',
136                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
137             }
138         },
139         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
140         {
141             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
142             'info_dict': {
143                 'id': 'smil',
144                 'ext': 'mp4',
145                 'title': 'Automatics, robotics and biocybernetics',
146                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
147                 'upload_date': '20130627',
148                 'formats': 'mincount:16',
149                 'subtitles': 'mincount:1',
150             },
151             'params': {
152                 'force_generic_extractor': True,
153                 'skip_download': True,
154             },
155         },
156         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
157         {
158             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
159             'info_dict': {
160                 'id': 'hds',
161                 'ext': 'flv',
162                 'title': 'hds',
163                 'formats': 'mincount:1',
164             },
165             'params': {
166                 'skip_download': True,
167             },
168         },
169         # SMIL from https://www.restudy.dk/video/play/id/1637
170         {
171             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
172             'info_dict': {
173                 'id': 'video_1637',
174                 'ext': 'flv',
175                 'title': 'video_1637',
176                 'formats': 'mincount:3',
177             },
178             'params': {
179                 'skip_download': True,
180             },
181         },
182         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
183         {
184             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
185             'info_dict': {
186                 'id': 'smil-service',
187                 'ext': 'flv',
188                 'title': 'smil-service',
189                 'formats': 'mincount:1',
190             },
191             'params': {
192                 'skip_download': True,
193             },
194         },
195         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
196         {
197             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
198             'info_dict': {
199                 'id': '4719370',
200                 'ext': 'mp4',
201                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
202                 'formats': 'mincount:3',
203             },
204             'params': {
205                 'skip_download': True,
206             },
207         },
208         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
209         {
210             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
211             'info_dict': {
212                 'id': 'mZlp2ctYIUEB',
213                 'ext': 'mp4',
214                 'title': 'Tikibad ontruimd wegens brand',
215                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
216                 'thumbnail': 're:^https?://.*\.jpg$',
217                 'duration': 33,
218             },
219             'params': {
220                 'skip_download': True,
221             },
222         },
223         # google redirect
224         {
225             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
226             'info_dict': {
227                 'id': 'cmQHVoWB5FY',
228                 'ext': 'mp4',
229                 'upload_date': '20130224',
230                 'uploader_id': 'TheVerge',
231                 'description': 're:^Chris Ziegler takes a look at the\.*',
232                 'uploader': 'The Verge',
233                 'title': 'First Firefox OS phones side-by-side',
234             },
235             'params': {
236                 'skip_download': False,
237             }
238         },
239         {
240             # redirect in Refresh HTTP header
241             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
242             'info_dict': {
243                 'id': 'pO8h3EaFRdo',
244                 'ext': 'mp4',
245                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
246                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
247                 'upload_date': '20150917',
248                 'uploader_id': 'brtvofficial',
249                 'uploader': 'Boiler Room',
250             },
251             'params': {
252                 'skip_download': False,
253             },
254         },
255         {
256             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
257             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
258             'info_dict': {
259                 'id': '13601338388002',
260                 'ext': 'mp4',
261                 'uploader': 'www.hodiho.fr',
262                 'title': 'R\u00e9gis plante sa Jeep',
263             }
264         },
265         # bandcamp page with custom domain
266         {
267             'add_ie': ['Bandcamp'],
268             'url': 'http://bronyrock.com/track/the-pony-mash',
269             'info_dict': {
270                 'id': '3235767654',
271                 'ext': 'mp3',
272                 'title': 'The Pony Mash',
273                 'uploader': 'M_Pallante',
274             },
275             'skip': 'There is a limit of 200 free downloads / month for the test song',
276         },
277         # embedded brightcove video
278         # it also tests brightcove videos that need to set the 'Referer' in the
279         # http requests
280         {
281             'add_ie': ['BrightcoveLegacy'],
282             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
283             'info_dict': {
284                 'id': '2765128793001',
285                 'ext': 'mp4',
286                 'title': 'Le cours de bourse : l’analyse technique',
287                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
288                 'uploader': 'BFM BUSINESS',
289             },
290             'params': {
291                 'skip_download': True,
292             },
293         },
294         {
295             # https://github.com/rg3/youtube-dl/issues/2253
296             'url': 'http://bcove.me/i6nfkrc3',
297             'md5': '0ba9446db037002366bab3b3eb30c88c',
298             'info_dict': {
299                 'id': '3101154703001',
300                 'ext': 'mp4',
301                 'title': 'Still no power',
302                 'uploader': 'thestar.com',
303                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
304             },
305             'add_ie': ['BrightcoveLegacy'],
306         },
307         {
308             'url': 'http://www.championat.com/video/football/v/87/87499.html',
309             'md5': 'fb973ecf6e4a78a67453647444222983',
310             'info_dict': {
311                 'id': '3414141473001',
312                 'ext': 'mp4',
313                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
314                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
315                 'uploader': 'Championat',
316             },
317         },
318         {
319             # https://github.com/rg3/youtube-dl/issues/3541
320             'add_ie': ['BrightcoveLegacy'],
321             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
322             'info_dict': {
323                 'id': '3866516442001',
324                 'ext': 'mp4',
325                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
326                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
327                 'uploader': 'SBS Broadcasting',
328             },
329             'skip': 'Restricted to Netherlands',
330             'params': {
331                 'skip_download': True,  # m3u8 download
332             },
333         },
334         # ooyala video
335         {
336             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
337             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
338             'info_dict': {
339                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
340                 'ext': 'mp4',
341                 'title': '2cc213299525360.mov',  # that's what we get
342             },
343             'add_ie': ['Ooyala'],
344         },
345         {
346             # ooyala video embedded with http://player.ooyala.com/iframe.js
347             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
348             'info_dict': {
349                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
350                 'ext': 'mp4',
351                 'title': '"Steve Jobs: Man in the Machine" trailer',
352                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
353             },
354             'params': {
355                 'skip_download': True,
356             },
357         },
358         # multiple ooyala embeds on SBN network websites
359         {
360             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
361             'info_dict': {
362                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
363                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
364             },
365             'playlist_mincount': 3,
366             'params': {
367                 'skip_download': True,
368             },
369             'add_ie': ['Ooyala'],
370         },
371         # embed.ly video
372         {
373             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
374             'info_dict': {
375                 'id': '9ODmcdjQcHQ',
376                 'ext': 'mp4',
377                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
378                 'upload_date': '20140225',
379                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
380                 'uploader': 'Tested',
381                 'uploader_id': 'testedcom',
382             },
383             # No need to test YoutubeIE here
384             'params': {
385                 'skip_download': True,
386             },
387         },
388         # funnyordie embed
389         {
390             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
391             'info_dict': {
392                 'id': '18e820ec3f',
393                 'ext': 'mp4',
394                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
395                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
396             },
397         },
398         # RUTV embed
399         {
400             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
401             'info_dict': {
402                 'id': '776940',
403                 'ext': 'mp4',
404                 'title': 'Охотское море стало целиком российским',
405                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
406             },
407             'params': {
408                 # m3u8 download
409                 'skip_download': True,
410             },
411         },
412         # TVC embed
413         {
414             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
415             'info_dict': {
416                 'id': '55304',
417                 'ext': 'mp4',
418                 'title': 'Дошкольное воспитание',
419             },
420         },
421         # SportBox embed
422         {
423             'url': 'http://www.vestifinance.ru/articles/25753',
424             'info_dict': {
425                 'id': '25753',
426                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
427             },
428             'playlist': [{
429                 'info_dict': {
430                     'id': '370908',
431                     'title': 'Госзаказ. День 3',
432                     'ext': 'mp4',
433                 }
434             }, {
435                 'info_dict': {
436                     'id': '370905',
437                     'title': 'Госзаказ. День 2',
438                     'ext': 'mp4',
439                 }
440             }, {
441                 'info_dict': {
442                     'id': '370902',
443                     'title': 'Госзаказ. День 1',
444                     'ext': 'mp4',
445                 }
446             }],
447             'params': {
448                 # m3u8 download
449                 'skip_download': True,
450             },
451         },
452         # Myvi.ru embed
453         {
454             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
455             'info_dict': {
456                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
457                 'ext': 'mp4',
458                 'title': 'Ужастики, русский трейлер (2015)',
459                 'thumbnail': 're:^https?://.*\.jpg$',
460                 'duration': 153,
461             }
462         },
463         # XHamster embed
464         {
465             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
466             'info_dict': {
467                 'id': 'showthread',
468                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
469             },
470             'playlist_mincount': 7,
471         },
472         # Embedded TED video
473         {
474             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
475             'md5': '65fdff94098e4a607385a60c5177c638',
476             'info_dict': {
477                 'id': '1969',
478                 'ext': 'mp4',
479                 'title': 'Hidden miracles of the natural world',
480                 'uploader': 'Louie Schwartzberg',
481                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
482             }
483         },
484         # Embeded Ustream video
485         {
486             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
487             'md5': '27b99cdb639c9b12a79bca876a073417',
488             'info_dict': {
489                 'id': '45734260',
490                 'ext': 'flv',
491                 'uploader': 'AU SPA:  The NSA and Privacy',
492                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
493             }
494         },
495         # nowvideo embed hidden behind percent encoding
496         {
497             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
498             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
499             'info_dict': {
500                 'id': '06e53103ca9aa',
501                 'ext': 'flv',
502                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
503                 'description': 'No description',
504             },
505         },
506         # arte embed
507         {
508             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
509             'md5': '7653032cbb25bf6c80d80f217055fa43',
510             'info_dict': {
511                 'id': '048195-004_PLUS7-F',
512                 'ext': 'flv',
513                 'title': 'X:enius',
514                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
515                 'upload_date': '20140320',
516             },
517             'params': {
518                 'skip_download': 'Requires rtmpdump'
519             }
520         },
521         # francetv embed
522         {
523             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
524             'info_dict': {
525                 'id': 'EV_30231',
526                 'ext': 'mp4',
527                 'title': 'Alcaline, le concert avec Calogero',
528                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
529                 'upload_date': '20150226',
530                 'timestamp': 1424989860,
531                 'duration': 5400,
532             },
533             'params': {
534                 # m3u8 downloads
535                 'skip_download': True,
536             },
537             'expected_warnings': [
538                 'Forbidden'
539             ]
540         },
541         # Condé Nast embed
542         {
543             'url': 'http://www.wired.com/2014/04/honda-asimo/',
544             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
545             'info_dict': {
546                 'id': '53501be369702d3275860000',
547                 'ext': 'mp4',
548                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
549             }
550         },
551         # Dailymotion embed
552         {
553             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
554             'md5': '441aeeb82eb72c422c7f14ec533999cd',
555             'info_dict': {
556                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
557                 'ext': 'mp4',
558                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
559                 'uploader': 'Spi0n',
560             },
561             'add_ie': ['Dailymotion'],
562         },
563         # YouTube embed
564         {
565             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
566             'info_dict': {
567                 'id': 'FXRb4ykk4S0',
568                 'ext': 'mp4',
569                 'title': 'The NBL Auction 2014',
570                 'uploader': 'BADMINTON England',
571                 'uploader_id': 'BADMINTONEvents',
572                 'upload_date': '20140603',
573                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
574             },
575             'add_ie': ['Youtube'],
576             'params': {
577                 'skip_download': True,
578             }
579         },
580         # MTVSercices embed
581         {
582             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
583             'md5': '35727f82f58c76d996fc188f9755b0d5',
584             'info_dict': {
585                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
586                 'ext': 'mp4',
587                 'title': 'Review',
588                 'description': 'Mario\'s life in the fast lane has never looked so good.',
589             },
590         },
591         # YouTube embed via <data-embed-url="">
592         {
593             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
594             'info_dict': {
595                 'id': '4vAffPZIT44',
596                 'ext': 'mp4',
597                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
598                 'uploader': 'Gameloft',
599                 'uploader_id': 'gameloft',
600                 'upload_date': '20140828',
601                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
602             },
603             'params': {
604                 'skip_download': True,
605             }
606         },
607         # Camtasia studio
608         {
609             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
610             'playlist': [{
611                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
612                 'info_dict': {
613                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
614                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
615                     'ext': 'flv',
616                     'duration': 2235.90,
617                 }
618             }, {
619                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
620                 'info_dict': {
621                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
622                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
623                     'ext': 'flv',
624                     'duration': 2235.93,
625                 }
626             }],
627             'info_dict': {
628                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
629             }
630         },
631         # Flowplayer
632         {
633             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
634             'md5': '9d65602bf31c6e20014319c7d07fba27',
635             'info_dict': {
636                 'id': '5123ea6d5e5a7',
637                 'ext': 'mp4',
638                 'age_limit': 18,
639                 'uploader': 'www.handjobhub.com',
640                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
641             }
642         },
643         # Multiple brightcove videos
644         # https://github.com/rg3/youtube-dl/issues/2283
645         {
646             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
647             'info_dict': {
648                 'id': 'always-never',
649                 'title': 'Always / Never - The New Yorker',
650             },
651             'playlist_count': 3,
652             'params': {
653                 'extract_flat': False,
654                 'skip_download': True,
655             }
656         },
657         # MLB embed
658         {
659             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
660             'md5': '96f09a37e44da40dd083e12d9a683327',
661             'info_dict': {
662                 'id': '33322633',
663                 'ext': 'mp4',
664                 'title': 'Ump changes call to ball',
665                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
666                 'duration': 48,
667                 'timestamp': 1401537900,
668                 'upload_date': '20140531',
669                 'thumbnail': 're:^https?://.*\.jpg$',
670             },
671         },
672         # Wistia embed
673         {
674             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
675             'md5': '8788b683c777a5cf25621eaf286d0c23',
676             'info_dict': {
677                 'id': '1cfaf6b7ea',
678                 'ext': 'mov',
679                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
680                 'duration': 643.0,
681                 'filesize': 182808282,
682                 'uploader': 'education-portal.com',
683             },
684         },
685         {
686             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
687             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
688             'info_dict': {
689                 'id': 'uxjb0lwrcz',
690                 'ext': 'mp4',
691                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
692                 'duration': 1715.0,
693                 'uploader': 'thoughtworks.wistia.com',
694             },
695         },
696         # Soundcloud embed
697         {
698             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
699             'info_dict': {
700                 'id': '174391317',
701                 'ext': 'mp3',
702                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
703                 'uploader': 'Sophos Security',
704                 'title': 'Chet Chat 171 - Oct 29, 2014',
705                 'upload_date': '20141029',
706             }
707         },
708         # Livestream embed
709         {
710             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
711             'info_dict': {
712                 'id': '67864563',
713                 'ext': 'flv',
714                 'upload_date': '20141112',
715                 'title': 'Rosetta #CometLanding webcast HL 10',
716             }
717         },
718         # LazyYT
719         {
720             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
721             'info_dict': {
722                 'id': '1986',
723                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
724             },
725             'playlist_mincount': 2,
726         },
727         # Cinchcast embed
728         {
729             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
730             'info_dict': {
731                 'id': '7141703',
732                 'ext': 'mp3',
733                 'upload_date': '20141126',
734                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
735             }
736         },
737         # Cinerama player
738         {
739             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
740             'info_dict': {
741                 'id': '730m_DandD_1901_512k',
742                 'ext': 'mp4',
743                 'uploader': 'www.abc.net.au',
744                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
745             }
746         },
747         # embedded viddler video
748         {
749             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
750             'info_dict': {
751                 'id': '4d03aad9',
752                 'ext': 'mp4',
753                 'uploader': 'deadspin',
754                 'title': 'WALL-TO-GORTAT',
755                 'timestamp': 1422285291,
756                 'upload_date': '20150126',
757             },
758             'add_ie': ['Viddler'],
759         },
760         # Libsyn embed
761         {
762             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
763             'info_dict': {
764                 'id': '3377616',
765                 'ext': 'mp3',
766                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
767                 'description': 'md5:601cb790edd05908957dae8aaa866465',
768                 'upload_date': '20150220',
769             },
770         },
771         # jwplayer YouTube
772         {
773             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
774             'info_dict': {
775                 'id': 'Mrj4DVp2zeA',
776                 'ext': 'mp4',
777                 'upload_date': '20150212',
778                 'uploader': 'The National Archives UK',
779                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
780                 'uploader_id': 'NationalArchives08',
781                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
782             },
783         },
784         # rtl.nl embed
785         {
786             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
787             'playlist_mincount': 5,
788             'info_dict': {
789                 'id': 'aanslagen-kopenhagen',
790                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
791             }
792         },
793         # Zapiks embed
794         {
795             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
796             'info_dict': {
797                 'id': '118046',
798                 'ext': 'mp4',
799                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
800             }
801         },
802         # Kaltura embed
803         {
804             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
805             'info_dict': {
806                 'id': '1_eergr3h1',
807                 'ext': 'mp4',
808                 'upload_date': '20150226',
809                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
810                 'timestamp': int,
811                 'title': 'John Carlson Postgame 2/25/15',
812             },
813         },
814         # Kaltura embed (different embed code)
815         {
816             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
817             'info_dict': {
818                 'id': '1_a52wc67y',
819                 'ext': 'flv',
820                 'upload_date': '20150127',
821                 'uploader_id': 'PremierMedia',
822                 'timestamp': int,
823                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
824             },
825         },
826         # Eagle.Platform embed (generic URL)
827         {
828             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
829             'info_dict': {
830                 'id': '227304',
831                 'ext': 'mp4',
832                 'title': 'Навальный вышел на свободу',
833                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
834                 'thumbnail': 're:^https?://.*\.jpg$',
835                 'duration': 87,
836                 'view_count': int,
837                 'age_limit': 0,
838             },
839         },
840         # ClipYou (Eagle.Platform) embed (custom URL)
841         {
842             'url': 'http://muz-tv.ru/play/7129/',
843             'info_dict': {
844                 'id': '12820',
845                 'ext': 'mp4',
846                 'title': "'O Sole Mio",
847                 'thumbnail': 're:^https?://.*\.jpg$',
848                 'duration': 216,
849                 'view_count': int,
850             },
851         },
852         # Pladform embed
853         {
854             'url': 'http://muz-tv.ru/kinozal/view/7400/',
855             'info_dict': {
856                 'id': '100183293',
857                 'ext': 'mp4',
858                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
859                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
860                 'thumbnail': 're:^https?://.*\.jpg$',
861                 'duration': 694,
862                 'age_limit': 0,
863             },
864         },
865         # Playwire embed
866         {
867             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
868             'info_dict': {
869                 'id': '3519514',
870                 'ext': 'mp4',
871                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
872                 'thumbnail': 're:^https?://.*\.png$',
873                 'duration': 45.115,
874             },
875         },
876         # 5min embed
877         {
878             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
879             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
880             'info_dict': {
881                 'id': '518726732',
882                 'ext': 'mp4',
883                 'title': 'Facebook Creates "On This Day" | Crunch Report',
884             },
885         },
886         # SVT embed
887         {
888             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
889             'info_dict': {
890                 'id': '2900353',
891                 'ext': 'flv',
892                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
893                 'duration': 27,
894                 'age_limit': 0,
895             },
896         },
897         # Crooks and Liars embed
898         {
899             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
900             'info_dict': {
901                 'id': '8RUoRhRi',
902                 'ext': 'mp4',
903                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
904                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
905                 'timestamp': 1428207000,
906                 'upload_date': '20150405',
907                 'uploader': 'Heather',
908             },
909         },
910         # Crooks and Liars external embed
911         {
912             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
913             'info_dict': {
914                 'id': 'MTE3MjUtMzQ2MzA',
915                 'ext': 'mp4',
916                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
917                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
918                 'timestamp': 1265032391,
919                 'upload_date': '20100201',
920                 'uploader': 'Heather',
921             },
922         },
923         # NBC Sports vplayer embed
924         {
925             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
926             'info_dict': {
927                 'id': 'ln7x1qSThw4k',
928                 'ext': 'flv',
929                 'title': "PFT Live: New leader in the 'new-look' defense",
930                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
931             },
932         },
933         # UDN embed
934         {
935             'url': 'http://www.udn.com/news/story/7314/822787',
936             'md5': 'fd2060e988c326991037b9aff9df21a6',
937             'info_dict': {
938                 'id': '300346',
939                 'ext': 'mp4',
940                 'title': '中一中男師變性 全校師生力挺',
941                 'thumbnail': 're:^https?://.*\.jpg$',
942             }
943         },
944         # Ooyala embed
945         {
946             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
947             'info_dict': {
948                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
949                 'ext': 'mp4',
950                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
951                 'title': 'This is what separates the Excel masters from the wannabes',
952             },
953             'params': {
954                 # m3u8 downloads
955                 'skip_download': True,
956             }
957         },
958         # Contains a SMIL manifest
959         {
960             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
961             'info_dict': {
962                 'id': 'file',
963                 'ext': 'flv',
964                 'title': '+ Football: Lottery Champions League Europe',
965                 'uploader': 'www.telewebion.com',
966             },
967             'params': {
968                 # rtmpe downloads
969                 'skip_download': True,
970             }
971         },
972         # Brightcove URL in single quotes
973         {
974             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
975             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
976             'info_dict': {
977                 'id': '4255764656001',
978                 'ext': 'mp4',
979                 'title': 'SN Presents: Russell Martin, World Citizen',
980                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
981                 'uploader': 'Rogers Sportsnet',
982             },
983         },
984         # Dailymotion Cloud video
985         {
986             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
987             'md5': '49444254273501a64675a7e68c502681',
988             'info_dict': {
989                 'id': '5585de919473990de4bee11b',
990                 'ext': 'mp4',
991                 'title': 'Le débat',
992                 'thumbnail': 're:^https?://.*\.jpe?g$',
993             }
994         },
995         # OnionStudios embed
996         {
997             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
998             'info_dict': {
999                 'id': '2855',
1000                 'ext': 'mp4',
1001                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1002                 'thumbnail': 're:^https?://.*\.jpe?g$',
1003                 'uploader': 'ClickHole',
1004                 'uploader_id': 'clickhole',
1005             }
1006         },
1007         # SnagFilms embed
1008         {
1009             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1010             'info_dict': {
1011                 'id': '74849a00-85a9-11e1-9660-123139220831',
1012                 'ext': 'mp4',
1013                 'title': '#whilewewatch',
1014             }
1015         },
1016         # AdobeTVVideo embed
1017         {
1018             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1019             'md5': '43662b577c018ad707a63766462b1e87',
1020             'info_dict': {
1021                 'id': '2456',
1022                 'ext': 'mp4',
1023                 'title': 'New experience with Acrobat DC',
1024                 'description': 'New experience with Acrobat DC',
1025                 'duration': 248.667,
1026             },
1027         },
1028         # ScreenwaveMedia embed
1029         {
1030             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1031             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1032             'info_dict': {
1033                 'id': 'cinemasnob-55d26273809dd',
1034                 'ext': 'mp4',
1035                 'title': 'cinemasnob',
1036             },
1037         },
1038         # BrightcoveInPageEmbed embed
1039         {
1040             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1041             'info_dict': {
1042                 'id': '4238694884001',
1043                 'ext': 'flv',
1044                 'title': 'Tabletop: Dread, Last Thoughts',
1045                 'description': 'Tabletop: Dread, Last Thoughts',
1046                 'duration': 51690,
1047             },
1048         }
1049     ]
1050
1051     def report_following_redirect(self, new_url):
1052         """Report information extraction."""
1053         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1054
1055     def _extract_rss(self, url, video_id, doc):
1056         playlist_title = doc.find('./channel/title').text
1057         playlist_desc_el = doc.find('./channel/description')
1058         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1059
1060         entries = []
1061         for it in doc.findall('./channel/item'):
1062             next_url = xpath_text(it, 'link', fatal=False)
1063             if not next_url:
1064                 enclosure_nodes = it.findall('./enclosure')
1065                 for e in enclosure_nodes:
1066                     next_url = e.attrib.get('url')
1067                     if next_url:
1068                         break
1069
1070             if not next_url:
1071                 continue
1072
1073             entries.append({
1074                 '_type': 'url',
1075                 'url': next_url,
1076                 'title': it.find('title').text,
1077             })
1078
1079         return {
1080             '_type': 'playlist',
1081             'id': url,
1082             'title': playlist_title,
1083             'description': playlist_desc,
1084             'entries': entries,
1085         }
1086
1087     def _extract_camtasia(self, url, video_id, webpage):
1088         """ Returns None if no camtasia video can be found. """
1089
1090         camtasia_cfg = self._search_regex(
1091             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1092             webpage, 'camtasia configuration file', default=None)
1093         if camtasia_cfg is None:
1094             return None
1095
1096         title = self._html_search_meta('DC.title', webpage, fatal=True)
1097
1098         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1099         camtasia_cfg = self._download_xml(
1100             camtasia_url, video_id,
1101             note='Downloading camtasia configuration',
1102             errnote='Failed to download camtasia configuration')
1103         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1104
1105         entries = []
1106         for n in fileset_node.getchildren():
1107             url_n = n.find('./uri')
1108             if url_n is None:
1109                 continue
1110
1111             entries.append({
1112                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1113                 'title': '%s - %s' % (title, n.tag),
1114                 'url': compat_urlparse.urljoin(url, url_n.text),
1115                 'duration': float_or_none(n.find('./duration').text),
1116             })
1117
1118         return {
1119             '_type': 'playlist',
1120             'entries': entries,
1121             'title': title,
1122         }
1123
1124     def _real_extract(self, url):
1125         if url.startswith('//'):
1126             return {
1127                 '_type': 'url',
1128                 'url': self.http_scheme() + url,
1129             }
1130
1131         parsed_url = compat_urlparse.urlparse(url)
1132         if not parsed_url.scheme:
1133             default_search = self._downloader.params.get('default_search')
1134             if default_search is None:
1135                 default_search = 'fixup_error'
1136
1137             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1138                 if '/' in url:
1139                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1140                     return self.url_result('http://' + url)
1141                 elif default_search != 'fixup_error':
1142                     if default_search == 'auto_warning':
1143                         if re.match(r'^(?:url|URL)$', url):
1144                             raise ExtractorError(
1145                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1146                                 expected=True)
1147                         else:
1148                             self._downloader.report_warning(
1149                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1150                     return self.url_result('ytsearch:' + url)
1151
1152             if default_search in ('error', 'fixup_error'):
1153                 raise ExtractorError(
1154                     '%r is not a valid URL. '
1155                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1156                     % (url, url), expected=True)
1157             else:
1158                 if ':' not in default_search:
1159                     default_search += ':'
1160                 return self.url_result(default_search + url)
1161
1162         url, smuggled_data = unsmuggle_url(url)
1163         force_videoid = None
1164         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1165         if smuggled_data and 'force_videoid' in smuggled_data:
1166             force_videoid = smuggled_data['force_videoid']
1167             video_id = force_videoid
1168         else:
1169             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1170
1171         self.to_screen('%s: Requesting header' % video_id)
1172
1173         head_req = HEADRequest(url)
1174         head_response = self._request_webpage(
1175             head_req, video_id,
1176             note=False, errnote='Could not send HEAD request to %s' % url,
1177             fatal=False)
1178
1179         if head_response is not False:
1180             # Check for redirect
1181             new_url = head_response.geturl()
1182             if url != new_url:
1183                 self.report_following_redirect(new_url)
1184                 if force_videoid:
1185                     new_url = smuggle_url(
1186                         new_url, {'force_videoid': force_videoid})
1187                 return self.url_result(new_url)
1188
1189         full_response = None
1190         if head_response is False:
1191             request = compat_urllib_request.Request(url)
1192             request.add_header('Accept-Encoding', '*')
1193             full_response = self._request_webpage(request, video_id)
1194             head_response = full_response
1195
1196         # Check for direct link to a video
1197         content_type = head_response.headers.get('Content-Type', '')
1198         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1199         if m:
1200             upload_date = unified_strdate(
1201                 head_response.headers.get('Last-Modified'))
1202             return {
1203                 'id': video_id,
1204                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1205                 'direct': True,
1206                 'formats': [{
1207                     'format_id': m.group('format_id'),
1208                     'url': url,
1209                     'vcodec': 'none' if m.group('type') == 'audio' else None
1210                 }],
1211                 'upload_date': upload_date,
1212             }
1213
1214         if not self._downloader.params.get('test', False) and not is_intentional:
1215             force = self._downloader.params.get('force_generic_extractor', False)
1216             self._downloader.report_warning(
1217                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1218
1219         if not full_response:
1220             request = compat_urllib_request.Request(url)
1221             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1222             # making it impossible to download only chunk of the file (yet we need only 512kB to
1223             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1224             # that will always result in downloading the whole file that is not desirable.
1225             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1226             # to accept raw bytes and being able to download only a chunk.
1227             # It may probably better to solve this by checking Content-Type for application/octet-stream
1228             # after HEAD request finishes, but not sure if we can rely on this.
1229             request.add_header('Accept-Encoding', '*')
1230             full_response = self._request_webpage(request, video_id)
1231
1232         # Maybe it's a direct link to a video?
1233         # Be careful not to download the whole thing!
1234         first_bytes = full_response.read(512)
1235         if not is_html(first_bytes):
1236             self._downloader.report_warning(
1237                 'URL could be a direct video link, returning it as such.')
1238             upload_date = unified_strdate(
1239                 head_response.headers.get('Last-Modified'))
1240             return {
1241                 'id': video_id,
1242                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1243                 'direct': True,
1244                 'url': url,
1245                 'upload_date': upload_date,
1246             }
1247
1248         webpage = self._webpage_read_content(
1249             full_response, url, video_id, prefix=first_bytes)
1250
1251         self.report_extraction(video_id)
1252
1253         # Is it an RSS feed, a SMIL file or a XSPF playlist?
1254         try:
1255             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1256             if doc.tag == 'rss':
1257                 return self._extract_rss(url, video_id, doc)
1258             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1259                 return self._parse_smil(doc, url, video_id)
1260             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1261                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1262         except compat_xml_parse_error:
1263             pass
1264
1265         # Is it a Camtasia project?
1266         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1267         if camtasia_res is not None:
1268             return camtasia_res
1269
1270         # Sometimes embedded video player is hidden behind percent encoding
1271         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1272         # Unescaping the whole page allows to handle those cases in a generic way
1273         webpage = compat_urllib_parse_unquote(webpage)
1274
1275         # it's tempting to parse this further, but you would
1276         # have to take into account all the variations like
1277         #   Video Title - Site Name
1278         #   Site Name | Video Title
1279         #   Video Title - Tagline | Site Name
1280         # and so on and so forth; it's just not practical
1281         video_title = self._html_search_regex(
1282             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1283             default='video')
1284
1285         # Try to detect age limit automatically
1286         age_limit = self._rta_search(webpage)
1287         # And then there are the jokers who advertise that they use RTA,
1288         # but actually don't.
1289         AGE_LIMIT_MARKERS = [
1290             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1291         ]
1292         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1293             age_limit = 18
1294
1295         # video uploader is domain name
1296         video_uploader = self._search_regex(
1297             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1298
1299         # Helper method
1300         def _playlist_from_matches(matches, getter=None, ie=None):
1301             urlrs = orderedSet(
1302                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1303                 for m in matches)
1304             return self.playlist_result(
1305                 urlrs, playlist_id=video_id, playlist_title=video_title)
1306
1307         # Look for Brightcove Legacy Studio embeds
1308         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1309         if bc_urls:
1310             self.to_screen('Brightcove video detected.')
1311             entries = [{
1312                 '_type': 'url',
1313                 'url': smuggle_url(bc_url, {'Referer': url}),
1314                 'ie_key': 'BrightcoveLegacy'
1315             } for bc_url in bc_urls]
1316
1317             return {
1318                 '_type': 'playlist',
1319                 'title': video_title,
1320                 'id': video_id,
1321                 'entries': entries,
1322             }
1323
1324         # Look for Brightcove New Studio embeds
1325         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1326         if bc_urls:
1327             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1328
1329         # Look for embedded rtl.nl player
1330         matches = re.findall(
1331             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1332             webpage)
1333         if matches:
1334             return _playlist_from_matches(matches, ie='RtlNl')
1335
1336         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1337         if vimeo_url is not None:
1338             return self.url_result(vimeo_url)
1339
1340         vid_me_embed_url = self._search_regex(
1341             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1342             webpage, 'vid.me embed', default=None)
1343         if vid_me_embed_url is not None:
1344             return self.url_result(vid_me_embed_url, 'Vidme')
1345
1346         # Look for embedded YouTube player
1347         matches = re.findall(r'''(?x)
1348             (?:
1349                 <iframe[^>]+?src=|
1350                 data-video-url=|
1351                 <embed[^>]+?src=|
1352                 embedSWF\(?:\s*|
1353                 new\s+SWFObject\(
1354             )
1355             (["\'])
1356                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1357                 (?:embed|v|p)/.+?)
1358             \1''', webpage)
1359         if matches:
1360             return _playlist_from_matches(
1361                 matches, lambda m: unescapeHTML(m[1]))
1362
1363         # Look for lazyYT YouTube embed
1364         matches = re.findall(
1365             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1366         if matches:
1367             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1368
1369         # Look for embedded Dailymotion player
1370         matches = re.findall(
1371             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1372         if matches:
1373             return _playlist_from_matches(
1374                 matches, lambda m: unescapeHTML(m[1]))
1375
1376         # Look for embedded Dailymotion playlist player (#3822)
1377         m = re.search(
1378             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1379         if m:
1380             playlists = re.findall(
1381                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1382             if playlists:
1383                 return _playlist_from_matches(
1384                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1385
1386         # Look for embedded Wistia player
1387         match = re.search(
1388             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1389         if match:
1390             embed_url = self._proto_relative_url(
1391                 unescapeHTML(match.group('url')))
1392             return {
1393                 '_type': 'url_transparent',
1394                 'url': embed_url,
1395                 'ie_key': 'Wistia',
1396                 'uploader': video_uploader,
1397                 'title': video_title,
1398                 'id': video_id,
1399             }
1400
1401         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1402         if match:
1403             return {
1404                 '_type': 'url_transparent',
1405                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1406                 'ie_key': 'Wistia',
1407                 'uploader': video_uploader,
1408                 'title': video_title,
1409                 'id': match.group('id')
1410             }
1411
1412         # Look for embedded blip.tv player
1413         bliptv_url = BlipTVIE._extract_url(webpage)
1414         if bliptv_url:
1415             return self.url_result(bliptv_url, 'BlipTV')
1416
1417         # Look for SVT player
1418         svt_url = SVTIE._extract_url(webpage)
1419         if svt_url:
1420             return self.url_result(svt_url, 'SVT')
1421
1422         # Look for embedded condenast player
1423         matches = re.findall(
1424             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1425             webpage)
1426         if matches:
1427             return {
1428                 '_type': 'playlist',
1429                 'entries': [{
1430                     '_type': 'url',
1431                     'ie_key': 'CondeNast',
1432                     'url': ma,
1433                 } for ma in matches],
1434                 'title': video_title,
1435                 'id': video_id,
1436             }
1437
1438         # Look for Bandcamp pages with custom domain
1439         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1440         if mobj is not None:
1441             burl = unescapeHTML(mobj.group(1))
1442             # Don't set the extractor because it can be a track url or an album
1443             return self.url_result(burl)
1444
1445         # Look for embedded Vevo player
1446         mobj = re.search(
1447             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1448         if mobj is not None:
1449             return self.url_result(mobj.group('url'))
1450
1451         # Look for embedded Viddler player
1452         mobj = re.search(
1453             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1454             webpage)
1455         if mobj is not None:
1456             return self.url_result(mobj.group('url'))
1457
1458         # Look for NYTimes player
1459         mobj = re.search(
1460             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1461             webpage)
1462         if mobj is not None:
1463             return self.url_result(mobj.group('url'))
1464
1465         # Look for Libsyn player
1466         mobj = re.search(
1467             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1468         if mobj is not None:
1469             return self.url_result(mobj.group('url'))
1470
1471         # Look for Ooyala videos
1472         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1473                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1474                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1475                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1476         if mobj is not None:
1477             return OoyalaIE._build_url_result(mobj.group('ec'))
1478
1479         # Look for multiple Ooyala embeds on SBN network websites
1480         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1481         if mobj is not None:
1482             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1483             if embeds:
1484                 return _playlist_from_matches(
1485                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1486
1487         # Look for Aparat videos
1488         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1489         if mobj is not None:
1490             return self.url_result(mobj.group(1), 'Aparat')
1491
1492         # Look for MPORA videos
1493         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1494         if mobj is not None:
1495             return self.url_result(mobj.group(1), 'Mpora')
1496
1497         # Look for embedded NovaMov-based player
1498         mobj = re.search(
1499             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1500                     (?P<url>http://(?:(?:embed|www)\.)?
1501                         (?:novamov\.com|
1502                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1503                            videoweed\.(?:es|com)|
1504                            movshare\.(?:net|sx|ag)|
1505                            divxstage\.(?:eu|net|ch|co|at|ag))
1506                         /embed\.php.+?)\1''', webpage)
1507         if mobj is not None:
1508             return self.url_result(mobj.group('url'))
1509
1510         # Look for embedded Facebook player
1511         mobj = re.search(
1512             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1513         if mobj is not None:
1514             return self.url_result(mobj.group('url'), 'Facebook')
1515
1516         # Look for embedded VK player
1517         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1518         if mobj is not None:
1519             return self.url_result(mobj.group('url'), 'VK')
1520
1521         # Look for embedded ivi player
1522         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1523         if mobj is not None:
1524             return self.url_result(mobj.group('url'), 'Ivi')
1525
1526         # Look for embedded Huffington Post player
1527         mobj = re.search(
1528             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1529         if mobj is not None:
1530             return self.url_result(mobj.group('url'), 'HuffPost')
1531
1532         # Look for embed.ly
1533         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1534         if mobj is not None:
1535             return self.url_result(mobj.group('url'))
1536         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1537         if mobj is not None:
1538             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1539
1540         # Look for funnyordie embed
1541         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1542         if matches:
1543             return _playlist_from_matches(
1544                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1545
1546         # Look for BBC iPlayer embed
1547         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1548         if matches:
1549             return _playlist_from_matches(matches, ie='BBCCoUk')
1550
1551         # Look for embedded RUTV player
1552         rutv_url = RUTVIE._extract_url(webpage)
1553         if rutv_url:
1554             return self.url_result(rutv_url, 'RUTV')
1555
1556         # Look for embedded TVC player
1557         tvc_url = TVCIE._extract_url(webpage)
1558         if tvc_url:
1559             return self.url_result(tvc_url, 'TVC')
1560
1561         # Look for embedded SportBox player
1562         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1563         if sportbox_urls:
1564             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1565
1566         # Look for embedded PornHub player
1567         pornhub_url = PornHubIE._extract_url(webpage)
1568         if pornhub_url:
1569             return self.url_result(pornhub_url, 'PornHub')
1570
1571         # Look for embedded XHamster player
1572         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1573         if xhamster_urls:
1574             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1575
1576         # Look for embedded Tvigle player
1577         mobj = re.search(
1578             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1579         if mobj is not None:
1580             return self.url_result(mobj.group('url'), 'Tvigle')
1581
1582         # Look for embedded TED player
1583         mobj = re.search(
1584             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1585         if mobj is not None:
1586             return self.url_result(mobj.group('url'), 'TED')
1587
1588         # Look for embedded Ustream videos
1589         mobj = re.search(
1590             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1591         if mobj is not None:
1592             return self.url_result(mobj.group('url'), 'Ustream')
1593
1594         # Look for embedded arte.tv player
1595         mobj = re.search(
1596             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1597             webpage)
1598         if mobj is not None:
1599             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1600
1601         # Look for embedded francetv player
1602         mobj = re.search(
1603             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1604             webpage)
1605         if mobj is not None:
1606             return self.url_result(mobj.group('url'))
1607
1608         # Look for embedded smotri.com player
1609         smotri_url = SmotriIE._extract_url(webpage)
1610         if smotri_url:
1611             return self.url_result(smotri_url, 'Smotri')
1612
1613         # Look for embedded Myvi.ru player
1614         myvi_url = MyviIE._extract_url(webpage)
1615         if myvi_url:
1616             return self.url_result(myvi_url)
1617
1618         # Look for embeded soundcloud player
1619         mobj = re.search(
1620             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1621             webpage)
1622         if mobj is not None:
1623             url = unescapeHTML(mobj.group('url'))
1624             return self.url_result(url)
1625
1626         # Look for embedded vulture.com player
1627         mobj = re.search(
1628             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1629             webpage)
1630         if mobj is not None:
1631             url = unescapeHTML(mobj.group('url'))
1632             return self.url_result(url, ie='Vulture')
1633
1634         # Look for embedded mtvservices player
1635         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1636         if mtvservices_url:
1637             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1638
1639         # Look for embedded yahoo player
1640         mobj = re.search(
1641             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1642             webpage)
1643         if mobj is not None:
1644             return self.url_result(mobj.group('url'), 'Yahoo')
1645
1646         # Look for embedded sbs.com.au player
1647         mobj = re.search(
1648             r'''(?x)
1649             (?:
1650                 <meta\s+property="og:video"\s+content=|
1651                 <iframe[^>]+?src=
1652             )
1653             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1654             webpage)
1655         if mobj is not None:
1656             return self.url_result(mobj.group('url'), 'SBS')
1657
1658         # Look for embedded Cinchcast player
1659         mobj = re.search(
1660             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1661             webpage)
1662         if mobj is not None:
1663             return self.url_result(mobj.group('url'), 'Cinchcast')
1664
1665         mobj = re.search(
1666             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1667             webpage)
1668         if not mobj:
1669             mobj = re.search(
1670                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1671                 webpage)
1672         if mobj is not None:
1673             return self.url_result(mobj.group('url'), 'MLB')
1674
1675         mobj = re.search(
1676             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1677             webpage)
1678         if mobj is not None:
1679             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1680
1681         mobj = re.search(
1682             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1683             webpage)
1684         if mobj is not None:
1685             return self.url_result(mobj.group('url'), 'Livestream')
1686
1687         # Look for Zapiks embed
1688         mobj = re.search(
1689             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1690         if mobj is not None:
1691             return self.url_result(mobj.group('url'), 'Zapiks')
1692
1693         # Look for Kaltura embeds
1694         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
1695                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
1696         if mobj is not None:
1697             return self.url_result(smuggle_url(
1698                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1699                 {'source_url': url}), 'Kaltura')
1700
1701         # Look for Eagle.Platform embeds
1702         mobj = re.search(
1703             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1704         if mobj is not None:
1705             return self.url_result(mobj.group('url'), 'EaglePlatform')
1706
1707         # Look for ClipYou (uses Eagle.Platform) embeds
1708         mobj = re.search(
1709             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1710         if mobj is not None:
1711             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1712
1713         # Look for Pladform embeds
1714         mobj = re.search(
1715             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1716         if mobj is not None:
1717             return self.url_result(mobj.group('url'), 'Pladform')
1718
1719         # Look for Playwire embeds
1720         mobj = re.search(
1721             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1722         if mobj is not None:
1723             return self.url_result(mobj.group('url'))
1724
1725         # Look for 5min embeds
1726         mobj = re.search(
1727             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1728         if mobj is not None:
1729             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1730
1731         # Look for Crooks and Liars embeds
1732         mobj = re.search(
1733             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1734         if mobj is not None:
1735             return self.url_result(mobj.group('url'))
1736
1737         # Look for NBC Sports VPlayer embeds
1738         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1739         if nbc_sports_url:
1740             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1741
1742         # Look for UDN embeds
1743         mobj = re.search(
1744             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
1745         if mobj is not None:
1746             return self.url_result(
1747                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1748
1749         # Look for Senate ISVP iframe
1750         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1751         if senate_isvp_url:
1752             return self.url_result(senate_isvp_url, 'SenateISVP')
1753
1754         # Look for Dailymotion Cloud videos
1755         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1756         if dmcloud_url:
1757             return self.url_result(dmcloud_url, 'DailymotionCloud')
1758
1759         # Look for OnionStudios embeds
1760         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1761         if onionstudios_url:
1762             return self.url_result(onionstudios_url)
1763
1764         # Look for SnagFilms embeds
1765         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1766         if snagfilms_url:
1767             return self.url_result(snagfilms_url)
1768
1769         # Look for ScreenwaveMedia embeds
1770         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
1771         if mobj is not None:
1772             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
1773
1774         # Look for AdobeTVVideo embeds
1775         mobj = re.search(
1776             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1777             webpage)
1778         if mobj is not None:
1779             return self.url_result(
1780                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1781                 'AdobeTVVideo')
1782
1783         def check_video(vurl):
1784             if YoutubeIE.suitable(vurl):
1785                 return True
1786             vpath = compat_urlparse.urlparse(vurl).path
1787             vext = determine_ext(vpath)
1788             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1789
1790         def filter_video(urls):
1791             return list(filter(check_video, urls))
1792
1793         # Start with something easy: JW Player in SWFObject
1794         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1795         if not found:
1796             # Look for gorilla-vid style embedding
1797             found = filter_video(re.findall(r'''(?sx)
1798                 (?:
1799                     jw_plugins|
1800                     JWPlayerOptions|
1801                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1802                 )
1803                 .*?
1804                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1805         if not found:
1806             # Broaden the search a little bit
1807             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1808         if not found:
1809             # Broaden the findall a little bit: JWPlayer JS loader
1810             found = filter_video(re.findall(
1811                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1812         if not found:
1813             # Flow player
1814             found = filter_video(re.findall(r'''(?xs)
1815                 flowplayer\("[^"]+",\s*
1816                     \{[^}]+?\}\s*,
1817                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1818                         ["']?url["']?\s*:\s*["']([^"']+)["']
1819             ''', webpage))
1820         if not found:
1821             # Cinerama player
1822             found = re.findall(
1823                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1824         if not found:
1825             # Try to find twitter cards info
1826             found = filter_video(re.findall(
1827                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1828         if not found:
1829             # We look for Open Graph info:
1830             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1831             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1832             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1833             if m_video_type is not None:
1834                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1835         if not found:
1836             # HTML5 video
1837             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1838         if not found:
1839             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1840             found = re.search(
1841                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1842                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1843                 webpage)
1844             if not found:
1845                 # Look also in Refresh HTTP header
1846                 refresh_header = head_response.headers.get('Refresh')
1847                 if refresh_header:
1848                     # In python 2 response HTTP headers are bytestrings
1849                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
1850                         refresh_header = refresh_header.decode('iso-8859-1')
1851                     found = re.search(REDIRECT_REGEX, refresh_header)
1852             if found:
1853                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
1854                 self.report_following_redirect(new_url)
1855                 return {
1856                     '_type': 'url',
1857                     'url': new_url,
1858                 }
1859         if not found:
1860             raise UnsupportedError(url)
1861
1862         entries = []
1863         for video_url in found:
1864             video_url = compat_urlparse.urljoin(url, video_url)
1865             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
1866
1867             # Sometimes, jwplayer extraction will result in a YouTube URL
1868             if YoutubeIE.suitable(video_url):
1869                 entries.append(self.url_result(video_url, 'Youtube'))
1870                 continue
1871
1872             # here's a fun little line of code for you:
1873             video_id = os.path.splitext(video_id)[0]
1874
1875             ext = determine_ext(video_url)
1876             if ext == 'smil':
1877                 entries.append({
1878                     'id': video_id,
1879                     'formats': self._extract_smil_formats(video_url, video_id),
1880                     'uploader': video_uploader,
1881                     'title': video_title,
1882                     'age_limit': age_limit,
1883                 })
1884             elif ext == 'xspf':
1885                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
1886             else:
1887                 entries.append({
1888                     'id': video_id,
1889                     'url': video_url,
1890                     'uploader': video_uploader,
1891                     'title': video_title,
1892                     'age_limit': age_limit,
1893                 })
1894
1895         if len(entries) == 1:
1896             return entries[0]
1897         else:
1898             for num, e in enumerate(entries, start=1):
1899                 # 'url' results don't have a title
1900                 if e.get('title') is not None:
1901                     e['title'] = '%s (%d)' % (e['title'], num)
1902             return {
1903                 '_type': 'playlist',
1904                 'entries': entries,
1905             }